minimap2 0.2.24.6 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/ext/minimap2.patch +3 -3
- data/lib/minimap2/aligner.rb +4 -0
- data/lib/minimap2/ffi/constants.rb +90 -88
- data/lib/minimap2/version.rb +2 -2
- metadata +4 -3
data/ext/minimap2/minimap.h
CHANGED
@@ -5,41 +5,45 @@
|
|
5
5
|
#include <stdio.h>
|
6
6
|
#include <sys/types.h>
|
7
7
|
|
8
|
-
#define
|
9
|
-
|
10
|
-
#define
|
11
|
-
#define
|
12
|
-
#define
|
13
|
-
#define
|
14
|
-
#define
|
15
|
-
#define
|
16
|
-
#define
|
17
|
-
#define
|
18
|
-
#define
|
19
|
-
#define
|
20
|
-
#define
|
21
|
-
#define
|
22
|
-
#define
|
23
|
-
#define
|
24
|
-
#define
|
25
|
-
#define
|
26
|
-
#define
|
27
|
-
#define
|
28
|
-
#define
|
29
|
-
#define
|
30
|
-
#define
|
31
|
-
#define
|
32
|
-
#define
|
33
|
-
#define
|
34
|
-
#define
|
35
|
-
#define
|
36
|
-
#define
|
37
|
-
#define
|
38
|
-
#define
|
8
|
+
#define MM_VERSION "2.25-r1173"
|
9
|
+
|
10
|
+
#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
|
11
|
+
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
|
12
|
+
#define MM_F_CIGAR (0x004LL)
|
13
|
+
#define MM_F_OUT_SAM (0x008LL)
|
14
|
+
#define MM_F_NO_QUAL (0x010LL)
|
15
|
+
#define MM_F_OUT_CG (0x020LL)
|
16
|
+
#define MM_F_OUT_CS (0x040LL)
|
17
|
+
#define MM_F_SPLICE (0x080LL) // splice mode
|
18
|
+
#define MM_F_SPLICE_FOR (0x100LL) // match GT-AG
|
19
|
+
#define MM_F_SPLICE_REV (0x200LL) // match CT-AC, the reverse complement of GT-AG
|
20
|
+
#define MM_F_NO_LJOIN (0x400LL)
|
21
|
+
#define MM_F_OUT_CS_LONG (0x800LL)
|
22
|
+
#define MM_F_SR (0x1000LL)
|
23
|
+
#define MM_F_FRAG_MODE (0x2000LL)
|
24
|
+
#define MM_F_NO_PRINT_2ND (0x4000LL)
|
25
|
+
#define MM_F_2_IO_THREADS (0x8000LL)
|
26
|
+
#define MM_F_LONG_CIGAR (0x10000LL)
|
27
|
+
#define MM_F_INDEPEND_SEG (0x20000LL)
|
28
|
+
#define MM_F_SPLICE_FLANK (0x40000LL)
|
29
|
+
#define MM_F_SOFTCLIP (0x80000LL)
|
30
|
+
#define MM_F_FOR_ONLY (0x100000LL)
|
31
|
+
#define MM_F_REV_ONLY (0x200000LL)
|
32
|
+
#define MM_F_HEAP_SORT (0x400000LL)
|
33
|
+
#define MM_F_ALL_CHAINS (0x800000LL)
|
34
|
+
#define MM_F_OUT_MD (0x1000000LL)
|
35
|
+
#define MM_F_COPY_COMMENT (0x2000000LL)
|
36
|
+
#define MM_F_EQX (0x4000000LL) // use =/X instead of M
|
37
|
+
#define MM_F_PAF_NO_HIT (0x8000000LL) // output unmapped reads to PAF
|
38
|
+
#define MM_F_NO_END_FLT (0x10000000LL)
|
39
|
+
#define MM_F_HARD_MLEVEL (0x20000000LL)
|
40
|
+
#define MM_F_SAM_HIT_ONLY (0x40000000LL)
|
39
41
|
#define MM_F_RMQ (0x80000000LL)
|
40
42
|
#define MM_F_QSTRAND (0x100000000LL)
|
41
43
|
#define MM_F_NO_INV (0x200000000LL)
|
42
44
|
#define MM_F_NO_HASH_NAME (0x400000000LL)
|
45
|
+
#define MM_F_SPLICE_OLD (0x800000000LL)
|
46
|
+
#define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping
|
43
47
|
|
44
48
|
#define MM_I_HPC 0x1
|
45
49
|
#define MM_I_NO_SEQ 0x2
|
@@ -189,6 +193,11 @@ typedef struct {
|
|
189
193
|
} mm_idx_reader_t;
|
190
194
|
|
191
195
|
// memory buffer for thread-local storage during mapping
|
196
|
+
struct mm_tbuf_s {
|
197
|
+
void *km;
|
198
|
+
int rep_len, frag_gap;
|
199
|
+
};
|
200
|
+
|
192
201
|
typedef struct mm_tbuf_s mm_tbuf_t;
|
193
202
|
|
194
203
|
// global variables
|
data/ext/minimap2/minimap2.1
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
.TH minimap2 1 "
|
1
|
+
.TH minimap2 1 "25 April 2023" "minimap2-2.25 (r1173)" "Bioinformatics tools"
|
2
2
|
.SH NAME
|
3
3
|
.PP
|
4
4
|
minimap2 - mapping and alignment between collections of DNA sequences
|
@@ -79,6 +79,19 @@ Minimizer k-mer length [15]
|
|
79
79
|
.BI -w \ INT
|
80
80
|
Minimizer window size [10]. A minimizer is the smallest k-mer
|
81
81
|
in a window of w consecutive k-mers.
|
82
|
+
.TP
|
83
|
+
.BI -j \ INT
|
84
|
+
Syncmer submer size [10]. Option
|
85
|
+
.B -j
|
86
|
+
and
|
87
|
+
.B -w
|
88
|
+
will override each: if
|
89
|
+
.B -w
|
90
|
+
is applied after
|
91
|
+
.BR -j ,
|
92
|
+
.B -j
|
93
|
+
will have no effect, and vice versa.
|
94
|
+
|
82
95
|
.TP
|
83
96
|
.B -H
|
84
97
|
Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
|
@@ -88,16 +101,17 @@ on the HPC sequence.
|
|
88
101
|
.BI -I \ NUM
|
89
102
|
Load at most
|
90
103
|
.I NUM
|
91
|
-
target bases into RAM for indexing [
|
104
|
+
target bases into RAM for indexing [8G]. If there are more than
|
92
105
|
.I NUM
|
93
106
|
bases in
|
94
107
|
.IR target.fa ,
|
95
108
|
minimap2 needs to read
|
96
109
|
.I query.fa
|
97
|
-
multiple times to map it against each batch of target sequences.
|
110
|
+
multiple times to map it against each batch of target sequences. This would create a multi-part index.
|
98
111
|
.I NUM
|
99
112
|
may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
|
100
|
-
multi-part index.
|
113
|
+
multi-part index. See also option
|
114
|
+
.BR --split-prefix .
|
101
115
|
.TP
|
102
116
|
.B --idx-no-seq
|
103
117
|
Don't store target sequences in the index. It saves disk space and memory but
|
@@ -587,7 +601,7 @@ Up to 20% sequence divergence.
|
|
587
601
|
.B splice
|
588
602
|
Long-read spliced alignment
|
589
603
|
.RB ( -k15
|
590
|
-
.B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -
|
604
|
+
.B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
|
591
605
|
.BR --splice-flank=yes ).
|
592
606
|
In the splice mode, 1) long deletions are taken as introns and represented as
|
593
607
|
the
|