minimap2 0.0.4 → 0.2.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +113 -98
- data/ext/Rakefile +41 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +807 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +344 -0
- data/ext/minimap2/main.c +455 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +409 -0
- data/ext/minimap2/minimap2.1 +722 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +131 -0
- data/ext/minimap2/options.c +233 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/ext/vendor/libminimap2.so +0 -0
- data/lib/minimap2/aligner.rb +16 -5
- data/lib/minimap2/alignment.rb +6 -2
- data/lib/minimap2/ffi/constants.rb +74 -53
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +1 -2
- data/lib/minimap2/version.rb +2 -1
- data/lib/minimap2.rb +67 -22
- metadata +98 -64
- data/lib/minimap2/ffi_helper.rb +0 -53
@@ -0,0 +1,38 @@
|
|
1
|
+
Q 60 23616 0 0.000000000
|
2
|
+
Q 45 3520 1 0.000036851
|
3
|
+
Q 41 1840 1 0.000069023
|
4
|
+
Q 37 328 2 0.000136500
|
5
|
+
Q 36 276 1 0.000169033
|
6
|
+
Q 35 480 1 0.000199601
|
7
|
+
Q 33 375 2 0.000262855
|
8
|
+
Q 31 178 2 0.000326659
|
9
|
+
Q 30 153 5 0.000487551
|
10
|
+
Q 29 200 1 0.000516696
|
11
|
+
Q 27 100 3 0.000611601
|
12
|
+
Q 26 93 3 0.000706056
|
13
|
+
Q 25 75 2 0.000768393
|
14
|
+
Q 24 82 1 0.000798314
|
15
|
+
Q 23 80 6 0.000987387
|
16
|
+
Q 22 71 6 0.001175835
|
17
|
+
Q 21 76 7 0.001394921
|
18
|
+
Q 20 63 9 0.001676897
|
19
|
+
Q 19 55 4 0.001800322
|
20
|
+
Q 18 62 8 0.002048987
|
21
|
+
Q 17 55 7 0.002265718
|
22
|
+
Q 16 60 10 0.002575539
|
23
|
+
Q 15 82 9 0.002850877
|
24
|
+
Q 14 67 7 0.003063745
|
25
|
+
Q 13 62 11 0.003401042
|
26
|
+
Q 12 64 13 0.003799084
|
27
|
+
Q 11 56 5 0.003947900
|
28
|
+
Q 10 58 17 0.004468303
|
29
|
+
Q 9 70 22 0.005139796
|
30
|
+
Q 8 23 9 0.005414604
|
31
|
+
Q 7 41 17 0.005933068
|
32
|
+
Q 6 42 18 0.006480881
|
33
|
+
Q 5 33 9 0.006751757
|
34
|
+
Q 4 29 9 0.007022948
|
35
|
+
Q 3 27 15 0.007478764
|
36
|
+
Q 2 23 10 0.007781024
|
37
|
+
Q 1 9 2 0.007840364
|
38
|
+
Q 0 13 8 0.008083105
|
@@ -0,0 +1,60 @@
|
|
1
|
+
set t po eps enh co so "Helvetica,26"
|
2
|
+
|
3
|
+
set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
|
4
|
+
set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
|
5
|
+
set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
|
6
|
+
set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
|
7
|
+
set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
|
8
|
+
set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
|
9
|
+
|
10
|
+
set out "roc-color.eps"
|
11
|
+
|
12
|
+
set pointsize 2.0
|
13
|
+
set size 1.59,1.04
|
14
|
+
set multiplot layout 1,2
|
15
|
+
|
16
|
+
set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
|
17
|
+
set xlab "Error rate of mapped PacBio reads"
|
18
|
+
set ylab "Fraction of mapped reads" off +1.8
|
19
|
+
set ytics 0.02
|
20
|
+
set yran [0.9:1]
|
21
|
+
|
22
|
+
set size 0.8,1
|
23
|
+
set log x
|
24
|
+
set format x "10^{%L}"
|
25
|
+
set key bot right
|
26
|
+
plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
|
27
|
+
"<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
|
28
|
+
"<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
|
29
|
+
"<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
|
30
|
+
"<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
|
31
|
+
"<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
|
32
|
+
unset label
|
33
|
+
|
34
|
+
set origin 0.8,0
|
35
|
+
set size 0.79,1
|
36
|
+
set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
|
37
|
+
set xlab "Error rate of mapped short reads"
|
38
|
+
|
39
|
+
set key top left
|
40
|
+
plot "<./eval2roc.pl -n2e7 bowtie2-s3.sam.eval" u 2:3 t "bowtie2" w lp ls 5, \
|
41
|
+
"<./eval2roc.pl -n2e7 bwa-s3.sam.eval" u 2:3 t "bwa-mem" w lp ls 2, \
|
42
|
+
"<./eval2roc.pl -n2e7 mm2-s3.sam.eval" u 2:3 t "minimap2" w lp ls 6, \
|
43
|
+
"<./eval2roc.pl -n2e7 snap-s3.sam.eval" u 2:3 t "snap" w lp ls 3
|
44
|
+
|
45
|
+
#unset log
|
46
|
+
#unset format
|
47
|
+
#unset key
|
48
|
+
#set log y
|
49
|
+
#set ylab "Accumulative mapping error rate" off +0
|
50
|
+
#set xlab "Mapping quality"
|
51
|
+
#set yran [1e-5:0.1]
|
52
|
+
#set ytics 1e-5,0.1
|
53
|
+
#set format y "10^{%L}"
|
54
|
+
#set xran [60:0] reverse
|
55
|
+
#plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
|
56
|
+
# "<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
|
57
|
+
# "<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
|
58
|
+
# "<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
|
59
|
+
# "<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
|
60
|
+
# "<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5
|
@@ -0,0 +1,62 @@
|
|
1
|
+
Q 60 18993268 10320 0.000543350 18993268
|
2
|
+
Q 59 33156 216 0.000553756 19026424
|
3
|
+
Q 58 29982 295 0.000568365 19056406
|
4
|
+
Q 57 9412 278 0.000582666 19065818
|
5
|
+
Q 56 11012 228 0.000594281 19076830
|
6
|
+
Q 55 9968 235 0.000606283 19086798
|
7
|
+
Q 54 8602 292 0.000621301 19095400
|
8
|
+
Q 53 6094 259 0.000634662 19101494
|
9
|
+
Q 52 5026 257 0.000647946 19106520
|
10
|
+
Q 51 4278 224 0.000659522 19110798
|
11
|
+
Q 50 3682 178 0.000668708 19114480
|
12
|
+
Q 49 2750 156 0.000676772 19117230
|
13
|
+
Q 48 2314 112 0.000682548 19119544
|
14
|
+
Q 47 2056 96 0.000687495 19121600
|
15
|
+
Q 46 1658 62 0.000690677 19123258
|
16
|
+
Q 45 1492 74 0.000694493 19124750
|
17
|
+
Q 44 1150 56 0.000697379 19125900
|
18
|
+
Q 43 1062 48 0.000699850 19126962
|
19
|
+
Q 42 976 60 0.000702951 19127938
|
20
|
+
Q 41 884 36 0.000704800 19128822
|
21
|
+
Q 40 708 52 0.000707493 19129530
|
22
|
+
Q 39 870 26 0.000708819 19130400
|
23
|
+
Q 38 598 26 0.000710156 19130998
|
24
|
+
Q 37 542 34 0.000711913 19131540
|
25
|
+
Q 36 846 50 0.000714495 19132386
|
26
|
+
Q 35 590 50 0.000717087 19132976
|
27
|
+
Q 34 550 42 0.000719261 19133526
|
28
|
+
Q 33 2174 66 0.000722628 19135700
|
29
|
+
Q 32 876 86 0.000727089 19136576
|
30
|
+
Q 31 638 104 0.000732500 19137214
|
31
|
+
Q 30 1718 196 0.000742675 19138932
|
32
|
+
Q 29 91022 968 0.000789497 19229954
|
33
|
+
Q 28 12864 781 0.000829556 19242818
|
34
|
+
Q 27 5806 427 0.000851489 19248624
|
35
|
+
Q 26 25274 728 0.000888144 19273898
|
36
|
+
Q 25 7418 680 0.000923070 19281316
|
37
|
+
Q 24 11800 701 0.000958839 19293116
|
38
|
+
Q 23 57328 3933 0.001159250 19350444
|
39
|
+
Q 22 7662 846 0.001202494 19358106
|
40
|
+
Q 21 5924 617 0.001233989 19364030
|
41
|
+
Q 20 4623 574 0.001263330 19368653
|
42
|
+
Q 19 4988 942 0.001311627 19373641
|
43
|
+
Q 18 3968 793 0.001352282 19377609
|
44
|
+
Q 17 3630 681 0.001387166 19381239
|
45
|
+
Q 16 2921 513 0.001413422 19384160
|
46
|
+
Q 15 2716 424 0.001435095 19386876
|
47
|
+
Q 14 2366 365 0.001453744 19389242
|
48
|
+
Q 13 2169 412 0.001474828 19391411
|
49
|
+
Q 12 2077 360 0.001493233 19393488
|
50
|
+
Q 11 2016 441 0.001515815 19395504
|
51
|
+
Q 10 2292 738 0.001553682 19397796
|
52
|
+
Q 9 4165 1832 0.001647772 19401961
|
53
|
+
Q 8 3963 1862 0.001743385 19405924
|
54
|
+
Q 7 3927 1793 0.001835408 19409851
|
55
|
+
Q 6 3572 1639 0.001919497 19413423
|
56
|
+
Q 5 3270 1533 0.001998126 19416693
|
57
|
+
Q 4 3046 1610 0.002080718 19419739
|
58
|
+
Q 3 251447 125550 0.008436553 19671186
|
59
|
+
Q 2 24390 13537 0.009113417 19695576
|
60
|
+
Q 1 124406 86780 0.013434624 19819982
|
61
|
+
Q 0 171254 153874 0.021016609 19991236
|
62
|
+
U 8764
|
data/ext/minimap2.patch
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
--- Makefile.org 2021-05-27 15:45:11.993128205 +0900
|
2
|
+
+++ Makefile 2021-05-27 15:46:02.320569154 +0900
|
3
|
+
@@ -1,9 +1,9 @@
|
4
|
+
-CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
|
5
|
+
+CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
|
6
|
+
CPPFLAGS= -DHAVE_KALLOC
|
7
|
+
INCLUDES=
|
8
|
+
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
9
|
+
lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
10
|
+
- ksw2_ll_sse.o
|
11
|
+
+ ksw2_ll_sse.o cmappy.o
|
12
|
+
PROG= minimap2
|
13
|
+
PROG_EXTRA= sdust minimap2-lite
|
14
|
+
LIBS= -lm -lz -lpthread
|
15
|
+
@@ -130,3 +130,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
16
|
+
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
|
17
|
+
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
18
|
+
splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
|
19
|
+
+cmappy.o: cmappy.h
|
Binary file
|
data/lib/minimap2/aligner.rb
CHANGED
@@ -4,11 +4,21 @@ module Minimap2
|
|
4
4
|
class Aligner
|
5
5
|
attr_reader :idx_opt, :map_opt, :index
|
6
6
|
|
7
|
-
# Create a new aligner
|
7
|
+
# Create a new aligner.
|
8
8
|
#
|
9
9
|
# @param fn_idx_in [String] index or sequence file name.
|
10
10
|
# @param seq [String] a single sequence to index.
|
11
11
|
# @param preset [String] minimap2 preset.
|
12
|
+
# * map-pb : PacBio CLR genomic reads
|
13
|
+
# * map-ont : Oxford Nanopore genomic reads
|
14
|
+
# * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)
|
15
|
+
# * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)
|
16
|
+
# * sr : short genomic paired-end reads
|
17
|
+
# * splice : spliced long reads (strand unknown)
|
18
|
+
# * splice:hq : Final PacBio Iso-seq or traditional cDNA
|
19
|
+
# * asm5 : intra-species asm-to-asm alignment
|
20
|
+
# * ava-pb : PacBio read overlap
|
21
|
+
# * ava-ont : Nanopore read overlap
|
12
22
|
# @param k [Integer] k-mer length, no larger than 28.
|
13
23
|
# @param w [Integer] minimizer window size, no larger than 255.
|
14
24
|
# @param min_cnt [Integer] mininum number of minimizers on a chain.
|
@@ -101,6 +111,7 @@ module Minimap2
|
|
101
111
|
end
|
102
112
|
|
103
113
|
# Explicitly releases the memory of the index object.
|
114
|
+
|
104
115
|
def free_index
|
105
116
|
FFI.mm_idx_destroy(index) unless index.null?
|
106
117
|
end
|
@@ -184,10 +195,10 @@ module Minimap2
|
|
184
195
|
alignments
|
185
196
|
end
|
186
197
|
|
187
|
-
#
|
188
|
-
# @
|
189
|
-
# @
|
190
|
-
# @
|
198
|
+
# Retrieve a subsequence from the index.
|
199
|
+
# @param name
|
200
|
+
# @param start
|
201
|
+
# @param stop
|
191
202
|
|
192
203
|
def seq(name, start = 0, stop = 0x7fffffff)
|
193
204
|
lp = ::FFI::MemoryPointer.new(:int)
|
data/lib/minimap2/alignment.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Minimap2
|
4
|
-
# Alignment result
|
4
|
+
# Alignment result.
|
5
5
|
#
|
6
6
|
# @!attribute ctg
|
7
7
|
# @return [String] name of the reference sequence the query is mapped to.
|
@@ -73,17 +73,21 @@ module Minimap2
|
|
73
73
|
@cs = cs
|
74
74
|
@md = md
|
75
75
|
|
76
|
-
@cigar_str = cigar.map { |x| x[0].to_s +
|
76
|
+
@cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
|
77
77
|
end
|
78
78
|
|
79
79
|
def primary?
|
80
80
|
@primary == 1
|
81
81
|
end
|
82
82
|
|
83
|
+
# Convert Alignment to hash.
|
84
|
+
|
83
85
|
def to_h
|
84
86
|
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
85
87
|
end
|
86
88
|
|
89
|
+
# Convert to the PAF format without the QueryName and QueryLength columns.
|
90
|
+
|
87
91
|
def to_s
|
88
92
|
strand = if @strand.positive?
|
89
93
|
'+'
|
@@ -3,45 +3,60 @@
|
|
3
3
|
module Minimap2
|
4
4
|
module FFI
|
5
5
|
# flags
|
6
|
-
NO_DIAG
|
7
|
-
NO_DUAL
|
8
|
-
CIGAR
|
9
|
-
OUT_SAM
|
10
|
-
NO_QUAL
|
11
|
-
OUT_CG
|
12
|
-
OUT_CS
|
13
|
-
SPLICE
|
14
|
-
SPLICE_FOR
|
15
|
-
SPLICE_REV
|
16
|
-
NO_LJOIN
|
17
|
-
OUT_CS_LONG
|
18
|
-
SR
|
19
|
-
FRAG_MODE
|
20
|
-
NO_PRINT_2ND
|
21
|
-
TWO_IO_THREADS
|
22
|
-
LONG_CIGAR
|
23
|
-
INDEPEND_SEG
|
24
|
-
SPLICE_FLANK
|
25
|
-
SOFTCLIP
|
26
|
-
FOR_ONLY
|
27
|
-
REV_ONLY
|
28
|
-
HEAP_SORT
|
29
|
-
ALL_CHAINS
|
30
|
-
OUT_MD
|
31
|
-
COPY_COMMENT
|
32
|
-
EQX
|
33
|
-
PAF_NO_HIT
|
34
|
-
NO_END_FLT
|
35
|
-
HARD_MLEVEL
|
36
|
-
SAM_HIT_ONLY
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
6
|
+
NO_DIAG = 0x001 # no exact diagonal hit
|
7
|
+
NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
|
8
|
+
CIGAR = 0x004
|
9
|
+
OUT_SAM = 0x008
|
10
|
+
NO_QUAL = 0x010
|
11
|
+
OUT_CG = 0x020
|
12
|
+
OUT_CS = 0x040
|
13
|
+
SPLICE = 0x080 # splice mode
|
14
|
+
SPLICE_FOR = 0x100 # match GT-AG
|
15
|
+
SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
|
16
|
+
NO_LJOIN = 0x400
|
17
|
+
OUT_CS_LONG = 0x800
|
18
|
+
SR = 0x1000
|
19
|
+
FRAG_MODE = 0x2000
|
20
|
+
NO_PRINT_2ND = 0x4000
|
21
|
+
TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
|
22
|
+
LONG_CIGAR = 0x10000
|
23
|
+
INDEPEND_SEG = 0x20000
|
24
|
+
SPLICE_FLANK = 0x40000
|
25
|
+
SOFTCLIP = 0x80000
|
26
|
+
FOR_ONLY = 0x100000
|
27
|
+
REV_ONLY = 0x200000
|
28
|
+
HEAP_SORT = 0x400000
|
29
|
+
ALL_CHAINS = 0x800000
|
30
|
+
OUT_MD = 0x1000000
|
31
|
+
COPY_COMMENT = 0x2000000
|
32
|
+
EQX = 0x4000000 # use =/X instead of M
|
33
|
+
PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
|
34
|
+
NO_END_FLT = 0x10000000
|
35
|
+
HARD_MLEVEL = 0x20000000
|
36
|
+
SAM_HIT_ONLY = 0x40000000
|
37
|
+
RMQ = 0x80000000 # LL
|
38
|
+
QSTRAND = 0x100000000 # LL
|
39
|
+
NO_INV = 0x200000000
|
40
|
+
|
41
|
+
HPC = 0x1
|
42
|
+
NO_SEQ = 0x2
|
43
|
+
NO_NAME = 0x4
|
44
|
+
|
45
|
+
IDX_MAGIC = "MMI\2"
|
46
|
+
|
47
|
+
MAX_SEG = 255
|
48
|
+
|
49
|
+
CIGAR_MATCH = 0
|
50
|
+
CIGAR_INS = 1
|
51
|
+
CIGAR_DEL = 2
|
52
|
+
CIGAR_N_SKIP = 3
|
53
|
+
CIGAR_SOFTCLIP = 4
|
54
|
+
CIGAR_HARDCLIP = 5
|
55
|
+
CIGAR_PADDING = 6
|
56
|
+
CIGAR_EQ_MATCH = 7
|
57
|
+
CIGAR_X_MISMATCH = 8
|
58
|
+
|
59
|
+
CIGAR_STR = 'MIDNSHP=XB'
|
45
60
|
|
46
61
|
# emulate 128-bit integers
|
47
62
|
class MM128 < ::FFI::Struct
|
@@ -86,6 +101,7 @@ module Minimap2
|
|
86
101
|
:min_cnt, :int, # min number of minimizers on each chain
|
87
102
|
:min_chain_score, :int, # min chaining score
|
88
103
|
:chain_gap_scale, :float,
|
104
|
+
:chain_skip_scale, :float,
|
89
105
|
:rmq_size_cap, :int,
|
90
106
|
:rmq_inner_dist, :int,
|
91
107
|
:rmq_rescue_size, :int,
|
@@ -112,14 +128,18 @@ module Minimap2
|
|
112
128
|
:anchor_ext_len, :int,
|
113
129
|
:anchor_ext_shift, :int,
|
114
130
|
:max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
|
131
|
+
:rank_min_len, :int,
|
132
|
+
:rank_frac, :float,
|
115
133
|
:pe_ori, :int,
|
116
134
|
:pe_bonus, :int,
|
117
135
|
:mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
|
136
|
+
:q_occ_frac, :float,
|
118
137
|
:min_mid_occ, :int32_t,
|
119
138
|
:mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
|
120
139
|
:max_occ, :int32_t,
|
121
140
|
:mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
|
122
141
|
:max_sw_mat, :int64_t,
|
142
|
+
:cap_kalloc, :int64_t,
|
123
143
|
:split_prefix, :string
|
124
144
|
end
|
125
145
|
|
@@ -170,7 +190,7 @@ module Minimap2
|
|
170
190
|
:n_ambi_trans_strand, :uint32,
|
171
191
|
:n_cigar, :uint32
|
172
192
|
|
173
|
-
|
193
|
+
bit_field :n_ambi_trans_strand,
|
174
194
|
:n_ambi, 30, # number of ambiguous bases
|
175
195
|
:trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
|
176
196
|
|
@@ -202,19 +222,20 @@ module Minimap2
|
|
202
222
|
:div, :float,
|
203
223
|
:p, Extra.ptr
|
204
224
|
|
205
|
-
|
206
|
-
:mapq,
|
207
|
-
:split,
|
208
|
-
:rev,
|
209
|
-
:inv,
|
210
|
-
:sam_pri,
|
211
|
-
:proper_frag,
|
212
|
-
:pe_thru,
|
213
|
-
:seg_split,
|
214
|
-
:seg_id,
|
215
|
-
:split_inv,
|
216
|
-
:is_alt,
|
217
|
-
:
|
225
|
+
bit_field :fields,
|
226
|
+
:mapq, 8,
|
227
|
+
:split, 2,
|
228
|
+
:rev, 1,
|
229
|
+
:inv, 1,
|
230
|
+
:sam_pri, 1,
|
231
|
+
:proper_frag, 1,
|
232
|
+
:pe_thru, 1,
|
233
|
+
:seg_split, 1,
|
234
|
+
:seg_id, 8,
|
235
|
+
:split_inv, 1,
|
236
|
+
:is_alt, 1,
|
237
|
+
:strand_retained, 1,
|
238
|
+
:dummy, 5
|
218
239
|
end
|
219
240
|
|
220
241
|
# memory buffer for thread-local storage during mapping
|
data/lib/minimap2/ffi.rb
CHANGED
data/lib/minimap2/version.rb
CHANGED
data/lib/minimap2.rb
CHANGED
@@ -3,9 +3,6 @@
|
|
3
3
|
# dependencies
|
4
4
|
require 'ffi'
|
5
5
|
|
6
|
-
# bit fields
|
7
|
-
require_relative 'minimap2/ffi_helper'
|
8
|
-
|
9
6
|
# modules
|
10
7
|
require_relative 'minimap2/aligner'
|
11
8
|
require_relative 'minimap2/alignment'
|
@@ -34,30 +31,66 @@ module Minimap2
|
|
34
31
|
|
35
32
|
# methods from mappy
|
36
33
|
class << self
|
37
|
-
|
34
|
+
|
35
|
+
# Execute minimap2 comannd with given options.
|
36
|
+
# @overload execute(arg0,arg1,...)
|
37
|
+
# @param [String] arg minimap2 command option.
|
38
|
+
# @example Get minimap2 version
|
39
|
+
# Minimap2.execute('--version')
|
40
|
+
|
41
|
+
def Minimap2.execute(*rb_argv)
|
42
|
+
str_ptrs = []
|
43
|
+
# First argument is the program name.
|
44
|
+
str_ptrs << ::FFI::MemoryPointer.from_string('minimap2')
|
45
|
+
rb_argv.each do |arg|
|
46
|
+
arg.to_s.split(/\s+/).each do |s|
|
47
|
+
str_ptrs << ::FFI::MemoryPointer.from_string(s)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
strptrs << nil
|
51
|
+
|
52
|
+
# Load all the pointers into a native memory block
|
53
|
+
argv = ::FFI::MemoryPointer.new(:pointer, strptrs.length)
|
54
|
+
strptrs.each_with_index do |p, i|
|
55
|
+
argv[i].put_pointer(0, p)
|
56
|
+
end
|
57
|
+
|
58
|
+
FFI.main(strptrs.length - 1, argv)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Set verbosity level.
|
62
|
+
# @param [Integer] level
|
63
|
+
|
64
|
+
def verbose(level = -1)
|
65
|
+
FFI.mm_verbose_level(level)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Read fasta/fastq file.
|
38
69
|
# @param [String] file_path
|
39
|
-
# @param [Boolean]
|
70
|
+
# @param [Boolean] comment If True, the comment will be read.
|
40
71
|
# @yield [name, seq, qual, comment]
|
41
|
-
#
|
72
|
+
# @return [Enumerator] enum Retrun Enumerator if not block given.
|
73
|
+
# Note: You can BioRuby instead of this method.
|
42
74
|
|
43
|
-
def fastx_read(file_path,
|
75
|
+
def fastx_read(file_path, comment: false, &block)
|
44
76
|
path = File.expand_path(file_path)
|
77
|
+
|
78
|
+
# raise error in Ruby because ks.null? is false even if file not exist.
|
79
|
+
raise ArgumentError, "File not found: #{path}" unless File.exist?(path)
|
80
|
+
|
45
81
|
ks = FFI.mm_fastx_open(path)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
else
|
54
|
-
yield [name, seq, qual]
|
82
|
+
|
83
|
+
if block_given?
|
84
|
+
fastx_each(ks, comment, &block)
|
85
|
+
else
|
86
|
+
Enumerator.new do |y|
|
87
|
+
# rewind not work
|
88
|
+
fastx_each(ks, comment) { |r| y << r }
|
55
89
|
end
|
56
90
|
end
|
57
|
-
FFI.mm_fastx_close(ks)
|
58
91
|
end
|
59
92
|
|
60
|
-
#
|
93
|
+
# Reverse complement sequence.
|
61
94
|
# @param [String] seq
|
62
95
|
# @return [string] seq
|
63
96
|
|
@@ -68,11 +101,23 @@ module Minimap2
|
|
68
101
|
FFI.mappy_revcomp(l, bseq)
|
69
102
|
end
|
70
103
|
|
71
|
-
|
72
|
-
# @param [Integer] level
|
104
|
+
private
|
73
105
|
|
74
|
-
def
|
75
|
-
FFI.
|
106
|
+
def fastx_each(ks, comment)
|
107
|
+
yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
|
108
|
+
FFI.mm_fastx_close(ks)
|
109
|
+
end
|
110
|
+
|
111
|
+
def fastx_next(ks, read_comment)
|
112
|
+
qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
|
113
|
+
name = ks[:name][:s]
|
114
|
+
seq = ks[:seq][:s]
|
115
|
+
if read_comment
|
116
|
+
comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
|
117
|
+
[name, seq, qual, comment]
|
118
|
+
else
|
119
|
+
[name, seq, qual]
|
120
|
+
end
|
76
121
|
end
|
77
122
|
end
|
78
123
|
end
|