minimap2 0.2.21 → 0.2.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -76
- data/ext/Rakefile +53 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +821 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +369 -0
- data/ext/minimap2/main.c +459 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +410 -0
- data/ext/minimap2/minimap2.1 +725 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +132 -0
- data/ext/minimap2/options.c +234 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +75 -56
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +4 -5
- data/lib/minimap2/version.rb +2 -2
- data/lib/minimap2.rb +80 -28
- metadata +97 -65
- data/lib/minimap2/ffi_helper.rb +0 -53
- data/vendor/libminimap2.so +0 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
Q 60 23616 0 0.000000000
|
|
2
|
+
Q 45 3520 1 0.000036851
|
|
3
|
+
Q 41 1840 1 0.000069023
|
|
4
|
+
Q 37 328 2 0.000136500
|
|
5
|
+
Q 36 276 1 0.000169033
|
|
6
|
+
Q 35 480 1 0.000199601
|
|
7
|
+
Q 33 375 2 0.000262855
|
|
8
|
+
Q 31 178 2 0.000326659
|
|
9
|
+
Q 30 153 5 0.000487551
|
|
10
|
+
Q 29 200 1 0.000516696
|
|
11
|
+
Q 27 100 3 0.000611601
|
|
12
|
+
Q 26 93 3 0.000706056
|
|
13
|
+
Q 25 75 2 0.000768393
|
|
14
|
+
Q 24 82 1 0.000798314
|
|
15
|
+
Q 23 80 6 0.000987387
|
|
16
|
+
Q 22 71 6 0.001175835
|
|
17
|
+
Q 21 76 7 0.001394921
|
|
18
|
+
Q 20 63 9 0.001676897
|
|
19
|
+
Q 19 55 4 0.001800322
|
|
20
|
+
Q 18 62 8 0.002048987
|
|
21
|
+
Q 17 55 7 0.002265718
|
|
22
|
+
Q 16 60 10 0.002575539
|
|
23
|
+
Q 15 82 9 0.002850877
|
|
24
|
+
Q 14 67 7 0.003063745
|
|
25
|
+
Q 13 62 11 0.003401042
|
|
26
|
+
Q 12 64 13 0.003799084
|
|
27
|
+
Q 11 56 5 0.003947900
|
|
28
|
+
Q 10 58 17 0.004468303
|
|
29
|
+
Q 9 70 22 0.005139796
|
|
30
|
+
Q 8 23 9 0.005414604
|
|
31
|
+
Q 7 41 17 0.005933068
|
|
32
|
+
Q 6 42 18 0.006480881
|
|
33
|
+
Q 5 33 9 0.006751757
|
|
34
|
+
Q 4 29 9 0.007022948
|
|
35
|
+
Q 3 27 15 0.007478764
|
|
36
|
+
Q 2 23 10 0.007781024
|
|
37
|
+
Q 1 9 2 0.007840364
|
|
38
|
+
Q 0 13 8 0.008083105
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
set t po eps enh co so "Helvetica,26"
|
|
2
|
+
|
|
3
|
+
set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
|
|
4
|
+
set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
|
|
5
|
+
set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
|
|
6
|
+
set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
|
|
7
|
+
set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
|
|
8
|
+
set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
|
|
9
|
+
|
|
10
|
+
set out "roc-color.eps"
|
|
11
|
+
|
|
12
|
+
set pointsize 2.0
|
|
13
|
+
set size 1.59,1.04
|
|
14
|
+
set multiplot layout 1,2
|
|
15
|
+
|
|
16
|
+
set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
|
|
17
|
+
set xlab "Error rate of mapped PacBio reads"
|
|
18
|
+
set ylab "Fraction of mapped reads" off +1.8
|
|
19
|
+
set ytics 0.02
|
|
20
|
+
set yran [0.9:1]
|
|
21
|
+
|
|
22
|
+
set size 0.8,1
|
|
23
|
+
set log x
|
|
24
|
+
set format x "10^{%L}"
|
|
25
|
+
set key bot right
|
|
26
|
+
plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
|
|
27
|
+
"<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
|
|
28
|
+
"<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
|
|
29
|
+
"<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
|
|
30
|
+
"<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
|
|
31
|
+
"<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
|
|
32
|
+
unset label
|
|
33
|
+
|
|
34
|
+
set origin 0.8,0
|
|
35
|
+
set size 0.79,1
|
|
36
|
+
set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
|
|
37
|
+
set xlab "Error rate of mapped short reads"
|
|
38
|
+
|
|
39
|
+
set key top left
|
|
40
|
+
plot "<./eval2roc.pl -n2e7 bowtie2-s3.sam.eval" u 2:3 t "bowtie2" w lp ls 5, \
|
|
41
|
+
"<./eval2roc.pl -n2e7 bwa-s3.sam.eval" u 2:3 t "bwa-mem" w lp ls 2, \
|
|
42
|
+
"<./eval2roc.pl -n2e7 mm2-s3.sam.eval" u 2:3 t "minimap2" w lp ls 6, \
|
|
43
|
+
"<./eval2roc.pl -n2e7 snap-s3.sam.eval" u 2:3 t "snap" w lp ls 3
|
|
44
|
+
|
|
45
|
+
#unset log
|
|
46
|
+
#unset format
|
|
47
|
+
#unset key
|
|
48
|
+
#set log y
|
|
49
|
+
#set ylab "Accumulative mapping error rate" off +0
|
|
50
|
+
#set xlab "Mapping quality"
|
|
51
|
+
#set yran [1e-5:0.1]
|
|
52
|
+
#set ytics 1e-5,0.1
|
|
53
|
+
#set format y "10^{%L}"
|
|
54
|
+
#set xran [60:0] reverse
|
|
55
|
+
#plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
|
|
56
|
+
# "<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
|
|
57
|
+
# "<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
|
|
58
|
+
# "<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
|
|
59
|
+
# "<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
|
|
60
|
+
# "<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
Q 60 18993268 10320 0.000543350 18993268
|
|
2
|
+
Q 59 33156 216 0.000553756 19026424
|
|
3
|
+
Q 58 29982 295 0.000568365 19056406
|
|
4
|
+
Q 57 9412 278 0.000582666 19065818
|
|
5
|
+
Q 56 11012 228 0.000594281 19076830
|
|
6
|
+
Q 55 9968 235 0.000606283 19086798
|
|
7
|
+
Q 54 8602 292 0.000621301 19095400
|
|
8
|
+
Q 53 6094 259 0.000634662 19101494
|
|
9
|
+
Q 52 5026 257 0.000647946 19106520
|
|
10
|
+
Q 51 4278 224 0.000659522 19110798
|
|
11
|
+
Q 50 3682 178 0.000668708 19114480
|
|
12
|
+
Q 49 2750 156 0.000676772 19117230
|
|
13
|
+
Q 48 2314 112 0.000682548 19119544
|
|
14
|
+
Q 47 2056 96 0.000687495 19121600
|
|
15
|
+
Q 46 1658 62 0.000690677 19123258
|
|
16
|
+
Q 45 1492 74 0.000694493 19124750
|
|
17
|
+
Q 44 1150 56 0.000697379 19125900
|
|
18
|
+
Q 43 1062 48 0.000699850 19126962
|
|
19
|
+
Q 42 976 60 0.000702951 19127938
|
|
20
|
+
Q 41 884 36 0.000704800 19128822
|
|
21
|
+
Q 40 708 52 0.000707493 19129530
|
|
22
|
+
Q 39 870 26 0.000708819 19130400
|
|
23
|
+
Q 38 598 26 0.000710156 19130998
|
|
24
|
+
Q 37 542 34 0.000711913 19131540
|
|
25
|
+
Q 36 846 50 0.000714495 19132386
|
|
26
|
+
Q 35 590 50 0.000717087 19132976
|
|
27
|
+
Q 34 550 42 0.000719261 19133526
|
|
28
|
+
Q 33 2174 66 0.000722628 19135700
|
|
29
|
+
Q 32 876 86 0.000727089 19136576
|
|
30
|
+
Q 31 638 104 0.000732500 19137214
|
|
31
|
+
Q 30 1718 196 0.000742675 19138932
|
|
32
|
+
Q 29 91022 968 0.000789497 19229954
|
|
33
|
+
Q 28 12864 781 0.000829556 19242818
|
|
34
|
+
Q 27 5806 427 0.000851489 19248624
|
|
35
|
+
Q 26 25274 728 0.000888144 19273898
|
|
36
|
+
Q 25 7418 680 0.000923070 19281316
|
|
37
|
+
Q 24 11800 701 0.000958839 19293116
|
|
38
|
+
Q 23 57328 3933 0.001159250 19350444
|
|
39
|
+
Q 22 7662 846 0.001202494 19358106
|
|
40
|
+
Q 21 5924 617 0.001233989 19364030
|
|
41
|
+
Q 20 4623 574 0.001263330 19368653
|
|
42
|
+
Q 19 4988 942 0.001311627 19373641
|
|
43
|
+
Q 18 3968 793 0.001352282 19377609
|
|
44
|
+
Q 17 3630 681 0.001387166 19381239
|
|
45
|
+
Q 16 2921 513 0.001413422 19384160
|
|
46
|
+
Q 15 2716 424 0.001435095 19386876
|
|
47
|
+
Q 14 2366 365 0.001453744 19389242
|
|
48
|
+
Q 13 2169 412 0.001474828 19391411
|
|
49
|
+
Q 12 2077 360 0.001493233 19393488
|
|
50
|
+
Q 11 2016 441 0.001515815 19395504
|
|
51
|
+
Q 10 2292 738 0.001553682 19397796
|
|
52
|
+
Q 9 4165 1832 0.001647772 19401961
|
|
53
|
+
Q 8 3963 1862 0.001743385 19405924
|
|
54
|
+
Q 7 3927 1793 0.001835408 19409851
|
|
55
|
+
Q 6 3572 1639 0.001919497 19413423
|
|
56
|
+
Q 5 3270 1533 0.001998126 19416693
|
|
57
|
+
Q 4 3046 1610 0.002080718 19419739
|
|
58
|
+
Q 3 251447 125550 0.008436553 19671186
|
|
59
|
+
Q 2 24390 13537 0.009113417 19695576
|
|
60
|
+
Q 1 124406 86780 0.013434624 19819982
|
|
61
|
+
Q 0 171254 153874 0.021016609 19991236
|
|
62
|
+
U 8764
|
data/ext/minimap2.patch
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
--- Makefile.org 2021-05-27 15:45:11.993128205 +0900
|
|
2
|
+
+++ Makefile 2021-05-27 15:46:02.320569154 +0900
|
|
3
|
+
@@ -1,9 +1,9 @@
|
|
4
|
+
-CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
|
|
5
|
+
+CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
|
|
6
|
+
CPPFLAGS= -DHAVE_KALLOC
|
|
7
|
+
INCLUDES=
|
|
8
|
+
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
|
9
|
+
lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
|
10
|
+
- ksw2_ll_sse.o
|
|
11
|
+
+ ksw2_ll_sse.o cmappy.o
|
|
12
|
+
PROG= minimap2
|
|
13
|
+
PROG_EXTRA= sdust minimap2-lite
|
|
14
|
+
LIBS= -lm -lz -lpthread
|
|
15
|
+
@@ -130,3 +130,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
|
16
|
+
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
|
|
17
|
+
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
|
18
|
+
splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
|
|
19
|
+
+cmappy.o: cmappy.h
|
data/lib/minimap2/aligner.rb
CHANGED
|
@@ -90,7 +90,7 @@ module Minimap2
|
|
|
90
90
|
end
|
|
91
91
|
|
|
92
92
|
if fn_idx_in
|
|
93
|
-
warn
|
|
93
|
+
warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
|
|
94
94
|
reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
|
|
95
95
|
|
|
96
96
|
# The Ruby version raises an error here
|
|
@@ -169,13 +169,13 @@ module Minimap2
|
|
|
169
169
|
c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
|
|
170
170
|
cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
|
|
171
171
|
|
|
172
|
-
_cs =
|
|
172
|
+
_cs = ""
|
|
173
173
|
if cs
|
|
174
174
|
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
|
|
175
175
|
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
|
176
176
|
end
|
|
177
177
|
|
|
178
|
-
_md =
|
|
178
|
+
_md = ""
|
|
179
179
|
if md
|
|
180
180
|
l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
|
|
181
181
|
_md = cs_str.read_pointer.read_string(l_cs_str)
|
|
@@ -204,7 +204,7 @@ module Minimap2
|
|
|
204
204
|
lp = ::FFI::MemoryPointer.new(:int)
|
|
205
205
|
s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
|
|
206
206
|
l = lp.read_int
|
|
207
|
-
return nil if l
|
|
207
|
+
return nil if l == 0
|
|
208
208
|
|
|
209
209
|
s.read_string(l)
|
|
210
210
|
end
|
data/lib/minimap2/alignment.rb
CHANGED
|
@@ -89,20 +89,20 @@ module Minimap2
|
|
|
89
89
|
# Convert to the PAF format without the QueryName and QueryLength columns.
|
|
90
90
|
|
|
91
91
|
def to_s
|
|
92
|
-
strand = if @strand
|
|
93
|
-
|
|
94
|
-
elsif @strand
|
|
95
|
-
|
|
92
|
+
strand = if @strand > 0
|
|
93
|
+
"+"
|
|
94
|
+
elsif @strand < 0
|
|
95
|
+
"-"
|
|
96
96
|
else
|
|
97
|
-
|
|
97
|
+
"?"
|
|
98
98
|
end
|
|
99
|
-
tp = @primary != 0 ?
|
|
100
|
-
ts = if @trans_strand
|
|
101
|
-
|
|
102
|
-
elsif @trans_strand
|
|
103
|
-
|
|
99
|
+
tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
|
|
100
|
+
ts = if @trans_strand > 0
|
|
101
|
+
"ts:A:+"
|
|
102
|
+
elsif @trans_strand < 0
|
|
103
|
+
"ts:A:-"
|
|
104
104
|
else
|
|
105
|
-
|
|
105
|
+
"ts:A:."
|
|
106
106
|
end
|
|
107
107
|
a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
|
|
108
108
|
@mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
|
|
@@ -3,48 +3,61 @@
|
|
|
3
3
|
module Minimap2
|
|
4
4
|
module FFI
|
|
5
5
|
# flags
|
|
6
|
-
NO_DIAG
|
|
7
|
-
NO_DUAL
|
|
8
|
-
CIGAR
|
|
9
|
-
OUT_SAM
|
|
10
|
-
NO_QUAL
|
|
11
|
-
OUT_CG
|
|
12
|
-
OUT_CS
|
|
13
|
-
SPLICE
|
|
14
|
-
SPLICE_FOR
|
|
15
|
-
SPLICE_REV
|
|
16
|
-
NO_LJOIN
|
|
17
|
-
OUT_CS_LONG
|
|
18
|
-
SR
|
|
19
|
-
FRAG_MODE
|
|
20
|
-
NO_PRINT_2ND
|
|
21
|
-
TWO_IO_THREADS
|
|
22
|
-
LONG_CIGAR
|
|
23
|
-
INDEPEND_SEG
|
|
24
|
-
SPLICE_FLANK
|
|
25
|
-
SOFTCLIP
|
|
26
|
-
FOR_ONLY
|
|
27
|
-
REV_ONLY
|
|
28
|
-
HEAP_SORT
|
|
29
|
-
ALL_CHAINS
|
|
30
|
-
OUT_MD
|
|
31
|
-
COPY_COMMENT
|
|
32
|
-
EQX
|
|
33
|
-
PAF_NO_HIT
|
|
34
|
-
NO_END_FLT
|
|
35
|
-
HARD_MLEVEL
|
|
36
|
-
SAM_HIT_ONLY
|
|
37
|
-
RMQ
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
6
|
+
NO_DIAG = 0x001 # no exact diagonal hit
|
|
7
|
+
NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
|
|
8
|
+
CIGAR = 0x004
|
|
9
|
+
OUT_SAM = 0x008
|
|
10
|
+
NO_QUAL = 0x010
|
|
11
|
+
OUT_CG = 0x020
|
|
12
|
+
OUT_CS = 0x040
|
|
13
|
+
SPLICE = 0x080 # splice mode
|
|
14
|
+
SPLICE_FOR = 0x100 # match GT-AG
|
|
15
|
+
SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
|
|
16
|
+
NO_LJOIN = 0x400
|
|
17
|
+
OUT_CS_LONG = 0x800
|
|
18
|
+
SR = 0x1000
|
|
19
|
+
FRAG_MODE = 0x2000
|
|
20
|
+
NO_PRINT_2ND = 0x4000
|
|
21
|
+
TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
|
|
22
|
+
LONG_CIGAR = 0x10000
|
|
23
|
+
INDEPEND_SEG = 0x20000
|
|
24
|
+
SPLICE_FLANK = 0x40000
|
|
25
|
+
SOFTCLIP = 0x80000
|
|
26
|
+
FOR_ONLY = 0x100000
|
|
27
|
+
REV_ONLY = 0x200000
|
|
28
|
+
HEAP_SORT = 0x400000
|
|
29
|
+
ALL_CHAINS = 0x800000
|
|
30
|
+
OUT_MD = 0x1000000
|
|
31
|
+
COPY_COMMENT = 0x2000000
|
|
32
|
+
EQX = 0x4000000 # use =/X instead of M
|
|
33
|
+
PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
|
|
34
|
+
NO_END_FLT = 0x10000000
|
|
35
|
+
HARD_MLEVEL = 0x20000000
|
|
36
|
+
SAM_HIT_ONLY = 0x40000000
|
|
37
|
+
RMQ = 0x80000000 # LL
|
|
38
|
+
QSTRAND = 0x100000000 # LL
|
|
39
|
+
NO_INV = 0x200000000 # LL
|
|
40
|
+
NO_HASH_NAME = 0x400000000 # LL
|
|
41
|
+
|
|
42
|
+
HPC = 0x1
|
|
43
|
+
NO_SEQ = 0x2
|
|
44
|
+
NO_NAME = 0x4
|
|
45
|
+
|
|
46
|
+
IDX_MAGIC = "MMI\2"
|
|
47
|
+
|
|
48
|
+
MAX_SEG = 255
|
|
49
|
+
|
|
50
|
+
CIGAR_MATCH = 0
|
|
51
|
+
CIGAR_INS = 1
|
|
52
|
+
CIGAR_DEL = 2
|
|
53
|
+
CIGAR_N_SKIP = 3
|
|
54
|
+
CIGAR_SOFTCLIP = 4
|
|
55
|
+
CIGAR_HARDCLIP = 5
|
|
56
|
+
CIGAR_PADDING = 6
|
|
57
|
+
CIGAR_EQ_MATCH = 7
|
|
58
|
+
CIGAR_X_MISMATCH = 8
|
|
59
|
+
|
|
60
|
+
CIGAR_STR = "MIDNSHP=XB"
|
|
48
61
|
|
|
49
62
|
# emulate 128-bit integers
|
|
50
63
|
class MM128 < ::FFI::Struct
|
|
@@ -89,6 +102,7 @@ module Minimap2
|
|
|
89
102
|
:min_cnt, :int, # min number of minimizers on each chain
|
|
90
103
|
:min_chain_score, :int, # min chaining score
|
|
91
104
|
:chain_gap_scale, :float,
|
|
105
|
+
:chain_skip_scale, :float,
|
|
92
106
|
:rmq_size_cap, :int,
|
|
93
107
|
:rmq_inner_dist, :int,
|
|
94
108
|
:rmq_rescue_size, :int,
|
|
@@ -115,14 +129,18 @@ module Minimap2
|
|
|
115
129
|
:anchor_ext_len, :int,
|
|
116
130
|
:anchor_ext_shift, :int,
|
|
117
131
|
:max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
|
|
132
|
+
:rank_min_len, :int,
|
|
133
|
+
:rank_frac, :float,
|
|
118
134
|
:pe_ori, :int,
|
|
119
135
|
:pe_bonus, :int,
|
|
120
136
|
:mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
|
|
137
|
+
:q_occ_frac, :float,
|
|
121
138
|
:min_mid_occ, :int32_t,
|
|
122
139
|
:mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
|
|
123
140
|
:max_occ, :int32_t,
|
|
124
141
|
:mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
|
|
125
142
|
:max_sw_mat, :int64_t,
|
|
143
|
+
:cap_kalloc, :int64_t,
|
|
126
144
|
:split_prefix, :string
|
|
127
145
|
end
|
|
128
146
|
|
|
@@ -173,7 +191,7 @@ module Minimap2
|
|
|
173
191
|
:n_ambi_trans_strand, :uint32,
|
|
174
192
|
:n_cigar, :uint32
|
|
175
193
|
|
|
176
|
-
|
|
194
|
+
bit_field :n_ambi_trans_strand,
|
|
177
195
|
:n_ambi, 30, # number of ambiguous bases
|
|
178
196
|
:trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
|
|
179
197
|
|
|
@@ -205,19 +223,20 @@ module Minimap2
|
|
|
205
223
|
:div, :float,
|
|
206
224
|
:p, Extra.ptr
|
|
207
225
|
|
|
208
|
-
|
|
209
|
-
:mapq,
|
|
210
|
-
:split,
|
|
211
|
-
:rev,
|
|
212
|
-
:inv,
|
|
213
|
-
:sam_pri,
|
|
214
|
-
:proper_frag,
|
|
215
|
-
:pe_thru,
|
|
216
|
-
:seg_split,
|
|
217
|
-
:seg_id,
|
|
218
|
-
:split_inv,
|
|
219
|
-
:is_alt,
|
|
220
|
-
:
|
|
226
|
+
bit_field :fields,
|
|
227
|
+
:mapq, 8,
|
|
228
|
+
:split, 2,
|
|
229
|
+
:rev, 1,
|
|
230
|
+
:inv, 1,
|
|
231
|
+
:sam_pri, 1,
|
|
232
|
+
:proper_frag, 1,
|
|
233
|
+
:pe_thru, 1,
|
|
234
|
+
:seg_split, 1,
|
|
235
|
+
:seg_id, 8,
|
|
236
|
+
:split_inv, 1,
|
|
237
|
+
:is_alt, 1,
|
|
238
|
+
:strand_retained, 1,
|
|
239
|
+
:dummy, 5
|
|
221
240
|
end
|
|
222
241
|
|
|
223
242
|
# memory buffer for thread-local storage during mapping
|
data/lib/minimap2/ffi.rb
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
# bit fields
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
require "ffi/bit_struct"
|
|
6
5
|
module Minimap2
|
|
7
6
|
# Native APIs
|
|
8
7
|
module FFI
|
|
@@ -22,6 +21,6 @@ module Minimap2
|
|
|
22
21
|
end
|
|
23
22
|
end
|
|
24
23
|
|
|
25
|
-
require_relative
|
|
26
|
-
require_relative
|
|
27
|
-
require_relative
|
|
24
|
+
require_relative "ffi/constants"
|
|
25
|
+
require_relative "ffi/functions"
|
|
26
|
+
require_relative "ffi/mappy"
|
data/lib/minimap2/version.rb
CHANGED
data/lib/minimap2.rb
CHANGED
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
# dependencies
|
|
4
|
-
require
|
|
5
|
-
|
|
6
|
-
# bit fields
|
|
7
|
-
require_relative 'minimap2/ffi_helper'
|
|
4
|
+
require "ffi"
|
|
8
5
|
|
|
9
6
|
# modules
|
|
10
|
-
require_relative
|
|
11
|
-
require_relative
|
|
12
|
-
require_relative
|
|
7
|
+
require_relative "minimap2/aligner"
|
|
8
|
+
require_relative "minimap2/alignment"
|
|
9
|
+
require_relative "minimap2/version"
|
|
13
10
|
|
|
14
11
|
# Minimap2 mapper for long read sequences
|
|
15
12
|
# https://github.com/lh3/minimap2
|
|
@@ -22,39 +19,82 @@ module Minimap2
|
|
|
22
19
|
attr_accessor :ffi_lib
|
|
23
20
|
end
|
|
24
21
|
|
|
25
|
-
lib_name = ::FFI.map_library_name(
|
|
26
|
-
self.ffi_lib = if ENV[
|
|
27
|
-
File.expand_path(lib_name, ENV[
|
|
22
|
+
lib_name = ::FFI.map_library_name("minimap2")
|
|
23
|
+
self.ffi_lib = if ENV["MINIMAPDIR"]
|
|
24
|
+
File.expand_path(lib_name, ENV["MINIMAPDIR"])
|
|
28
25
|
else
|
|
29
26
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
|
30
27
|
end
|
|
31
28
|
|
|
32
29
|
# friendlier error message
|
|
33
|
-
autoload :FFI,
|
|
30
|
+
autoload :FFI, "minimap2/ffi"
|
|
34
31
|
|
|
35
32
|
# methods from mappy
|
|
36
33
|
class << self
|
|
34
|
+
# Execute minimap2 comannd with given options.
|
|
35
|
+
# @overload execute(arg0,arg1,...)
|
|
36
|
+
# @param [String] arg minimap2 command option.
|
|
37
|
+
# @example Get minimap2 version
|
|
38
|
+
# Minimap2.execute('--version')
|
|
39
|
+
|
|
40
|
+
def Minimap2.execute(*rb_argv)
|
|
41
|
+
str_ptrs = []
|
|
42
|
+
# First argument is the program name.
|
|
43
|
+
str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
|
|
44
|
+
rb_argv.each do |arg|
|
|
45
|
+
arg.to_s.split(/\s+/).each do |s|
|
|
46
|
+
str_ptrs << ::FFI::MemoryPointer.from_string(s)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
str_ptrs << nil
|
|
50
|
+
|
|
51
|
+
# Load all the pointers into a native memory block
|
|
52
|
+
argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
|
|
53
|
+
str_ptrs.each_with_index do |p, i|
|
|
54
|
+
argv[i].put_pointer(0, p)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
FFI.main(str_ptrs.length - 1, argv)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get verbosity level.
|
|
61
|
+
# @return [Integer] verbosity level.
|
|
62
|
+
|
|
63
|
+
def verbose
|
|
64
|
+
FFI.mm_verbose_level(-1)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Set verbosity level.
|
|
68
|
+
# @param [Integer] verbosity level
|
|
69
|
+
# @return [Integer] verbosity level.
|
|
70
|
+
|
|
71
|
+
def verbose=(level)
|
|
72
|
+
FFI.mm_verbose_level(level)
|
|
73
|
+
end
|
|
74
|
+
|
|
37
75
|
# Read fasta/fastq file.
|
|
38
76
|
# @param [String] file_path
|
|
39
|
-
# @param [Boolean]
|
|
77
|
+
# @param [Boolean] comment If True, the comment will be read.
|
|
40
78
|
# @yield [name, seq, qual, comment]
|
|
41
|
-
#
|
|
79
|
+
# @return [Enumerator] enum Retrun Enumerator if not block given.
|
|
80
|
+
# Note: You can BioRuby instead of this method.
|
|
42
81
|
|
|
43
|
-
def fastx_read(file_path,
|
|
82
|
+
def fastx_read(file_path, comment: false, &block)
|
|
44
83
|
path = File.expand_path(file_path)
|
|
84
|
+
|
|
85
|
+
# raise error in Ruby because ks.null? is false even if file not exist.
|
|
86
|
+
raise ArgumentError, "File not found: #{path}" unless File.exist?(path)
|
|
87
|
+
|
|
45
88
|
ks = FFI.mm_fastx_open(path)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
else
|
|
54
|
-
yield [name, seq, qual]
|
|
89
|
+
|
|
90
|
+
if block_given?
|
|
91
|
+
fastx_each(ks, comment, &block)
|
|
92
|
+
else
|
|
93
|
+
Enumerator.new do |y|
|
|
94
|
+
# rewind not work
|
|
95
|
+
fastx_each(ks, comment) { |r| y << r }
|
|
55
96
|
end
|
|
56
97
|
end
|
|
57
|
-
FFI.mm_fastx_close(ks)
|
|
58
98
|
end
|
|
59
99
|
|
|
60
100
|
# Reverse complement sequence.
|
|
@@ -68,11 +108,23 @@ module Minimap2
|
|
|
68
108
|
FFI.mappy_revcomp(l, bseq)
|
|
69
109
|
end
|
|
70
110
|
|
|
71
|
-
|
|
72
|
-
# @param [Integer] level
|
|
111
|
+
private
|
|
73
112
|
|
|
74
|
-
def
|
|
75
|
-
FFI.
|
|
113
|
+
def fastx_each(ks, comment)
|
|
114
|
+
yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
|
|
115
|
+
FFI.mm_fastx_close(ks)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def fastx_next(ks, read_comment)
|
|
119
|
+
qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
|
|
120
|
+
name = ks[:name][:s]
|
|
121
|
+
seq = ks[:seq][:s]
|
|
122
|
+
if read_comment
|
|
123
|
+
comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
|
|
124
|
+
[name, seq, qual, comment]
|
|
125
|
+
else
|
|
126
|
+
[name, seq, qual]
|
|
127
|
+
end
|
|
76
128
|
end
|
|
77
129
|
end
|
|
78
130
|
end
|