minimap2 0.2.25.0 → 0.2.25.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/Rakefile +2 -2
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +4 -3
data/ext/minimap2/main.c
CHANGED
@@ -7,8 +7,6 @@
|
|
7
7
|
#include "mmpriv.h"
|
8
8
|
#include "ketopt.h"
|
9
9
|
|
10
|
-
#define MM_VERSION "2.24-r1122"
|
11
|
-
|
12
10
|
#ifdef __linux__
|
13
11
|
#include <sys/resource.h>
|
14
12
|
#include <sys/time.h>
|
@@ -78,6 +76,7 @@ static ko_longopt_t long_options[] = {
|
|
78
76
|
{ "chain-skip-scale",ko_required_argument,351 },
|
79
77
|
{ "print-chains", ko_no_argument, 352 },
|
80
78
|
{ "no-hash-name", ko_no_argument, 353 },
|
79
|
+
{ "secondary-seq", ko_no_argument, 354 },
|
81
80
|
{ "help", ko_no_argument, 'h' },
|
82
81
|
{ "max-intron-len", ko_required_argument, 'G' },
|
83
82
|
{ "version", ko_no_argument, 'V' },
|
@@ -121,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
|
|
121
120
|
|
122
121
|
int main(int argc, char *argv[])
|
123
122
|
{
|
124
|
-
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
|
123
|
+
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
|
125
124
|
ketopt_t o = KETOPT_INIT;
|
126
125
|
mm_mapopt_t opt;
|
127
126
|
mm_idxopt_t ipt;
|
@@ -187,7 +186,12 @@ int main(int argc, char *argv[])
|
|
187
186
|
else if (c == 'R') rg = o.arg;
|
188
187
|
else if (c == 'h') fp_help = stdout;
|
189
188
|
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
190
|
-
else if (c == '
|
189
|
+
else if (c == 'J') {
|
190
|
+
int t;
|
191
|
+
t = atoi(o.arg);
|
192
|
+
if (t == 0) opt.flag |= MM_F_SPLICE_OLD;
|
193
|
+
else if (t == 1) opt.flag &= ~MM_F_SPLICE_OLD;
|
194
|
+
} else if (c == 'o') {
|
191
195
|
if (strcmp(o.arg, "-") != 0) {
|
192
196
|
if (freopen(o.arg, "wb", stdout) == NULL) {
|
193
197
|
fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
|
@@ -237,6 +241,7 @@ int main(int argc, char *argv[])
|
|
237
241
|
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
|
238
242
|
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
|
239
243
|
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
|
244
|
+
else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
|
240
245
|
else if (c == 330) {
|
241
246
|
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
242
247
|
} else if (c == 314) { // --frag
|
@@ -261,7 +266,8 @@ int main(int argc, char *argv[])
|
|
261
266
|
} else if (c == 326) { // --dual
|
262
267
|
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
263
268
|
} else if (c == 347) { // --rmq
|
264
|
-
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
269
|
+
if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
270
|
+
else opt.flag |= MM_F_RMQ;
|
265
271
|
} else if (c == 'S') {
|
266
272
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
267
273
|
if (mm_verbose >= 2)
|
@@ -322,7 +328,7 @@ int main(int argc, char *argv[])
|
|
322
328
|
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
|
323
329
|
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
|
324
330
|
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
|
325
|
-
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [
|
331
|
+
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [8G]\n");
|
326
332
|
fprintf(fp_help, " -d FILE dump index to FILE []\n");
|
327
333
|
fprintf(fp_help, " Mapping:\n");
|
328
334
|
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
@@ -344,6 +350,7 @@ int main(int argc, char *argv[])
|
|
344
350
|
fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
|
345
351
|
fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
346
352
|
fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
353
|
+
fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
|
347
354
|
fprintf(fp_help, " Input/Output:\n");
|
348
355
|
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
349
356
|
fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
|
data/ext/minimap2/map.c
CHANGED
data/ext/minimap2/minimap.h
CHANGED
@@ -5,41 +5,45 @@
|
|
5
5
|
#include <stdio.h>
|
6
6
|
#include <sys/types.h>
|
7
7
|
|
8
|
-
#define
|
9
|
-
|
10
|
-
#define
|
11
|
-
#define
|
12
|
-
#define
|
13
|
-
#define
|
14
|
-
#define
|
15
|
-
#define
|
16
|
-
#define
|
17
|
-
#define
|
18
|
-
#define
|
19
|
-
#define
|
20
|
-
#define
|
21
|
-
#define
|
22
|
-
#define
|
23
|
-
#define
|
24
|
-
#define
|
25
|
-
#define
|
26
|
-
#define
|
27
|
-
#define
|
28
|
-
#define
|
29
|
-
#define
|
30
|
-
#define
|
31
|
-
#define
|
32
|
-
#define
|
33
|
-
#define
|
34
|
-
#define
|
35
|
-
#define
|
36
|
-
#define
|
37
|
-
#define
|
38
|
-
#define
|
8
|
+
#define MM_VERSION "2.25-r1173"
|
9
|
+
|
10
|
+
#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
|
11
|
+
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
|
12
|
+
#define MM_F_CIGAR (0x004LL)
|
13
|
+
#define MM_F_OUT_SAM (0x008LL)
|
14
|
+
#define MM_F_NO_QUAL (0x010LL)
|
15
|
+
#define MM_F_OUT_CG (0x020LL)
|
16
|
+
#define MM_F_OUT_CS (0x040LL)
|
17
|
+
#define MM_F_SPLICE (0x080LL) // splice mode
|
18
|
+
#define MM_F_SPLICE_FOR (0x100LL) // match GT-AG
|
19
|
+
#define MM_F_SPLICE_REV (0x200LL) // match CT-AC, the reverse complement of GT-AG
|
20
|
+
#define MM_F_NO_LJOIN (0x400LL)
|
21
|
+
#define MM_F_OUT_CS_LONG (0x800LL)
|
22
|
+
#define MM_F_SR (0x1000LL)
|
23
|
+
#define MM_F_FRAG_MODE (0x2000LL)
|
24
|
+
#define MM_F_NO_PRINT_2ND (0x4000LL)
|
25
|
+
#define MM_F_2_IO_THREADS (0x8000LL)
|
26
|
+
#define MM_F_LONG_CIGAR (0x10000LL)
|
27
|
+
#define MM_F_INDEPEND_SEG (0x20000LL)
|
28
|
+
#define MM_F_SPLICE_FLANK (0x40000LL)
|
29
|
+
#define MM_F_SOFTCLIP (0x80000LL)
|
30
|
+
#define MM_F_FOR_ONLY (0x100000LL)
|
31
|
+
#define MM_F_REV_ONLY (0x200000LL)
|
32
|
+
#define MM_F_HEAP_SORT (0x400000LL)
|
33
|
+
#define MM_F_ALL_CHAINS (0x800000LL)
|
34
|
+
#define MM_F_OUT_MD (0x1000000LL)
|
35
|
+
#define MM_F_COPY_COMMENT (0x2000000LL)
|
36
|
+
#define MM_F_EQX (0x4000000LL) // use =/X instead of M
|
37
|
+
#define MM_F_PAF_NO_HIT (0x8000000LL) // output unmapped reads to PAF
|
38
|
+
#define MM_F_NO_END_FLT (0x10000000LL)
|
39
|
+
#define MM_F_HARD_MLEVEL (0x20000000LL)
|
40
|
+
#define MM_F_SAM_HIT_ONLY (0x40000000LL)
|
39
41
|
#define MM_F_RMQ (0x80000000LL)
|
40
42
|
#define MM_F_QSTRAND (0x100000000LL)
|
41
43
|
#define MM_F_NO_INV (0x200000000LL)
|
42
44
|
#define MM_F_NO_HASH_NAME (0x400000000LL)
|
45
|
+
#define MM_F_SPLICE_OLD (0x800000000LL)
|
46
|
+
#define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping
|
43
47
|
|
44
48
|
#define MM_I_HPC 0x1
|
45
49
|
#define MM_I_NO_SEQ 0x2
|
@@ -189,6 +193,11 @@ typedef struct {
|
|
189
193
|
} mm_idx_reader_t;
|
190
194
|
|
191
195
|
// memory buffer for thread-local storage during mapping
|
196
|
+
struct mm_tbuf_s {
|
197
|
+
void *km;
|
198
|
+
int rep_len, frag_gap;
|
199
|
+
};
|
200
|
+
|
192
201
|
typedef struct mm_tbuf_s mm_tbuf_t;
|
193
202
|
|
194
203
|
// global variables
|
data/ext/minimap2/minimap2.1
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
.TH minimap2 1 "
|
1
|
+
.TH minimap2 1 "25 April 2023" "minimap2-2.25 (r1173)" "Bioinformatics tools"
|
2
2
|
.SH NAME
|
3
3
|
.PP
|
4
4
|
minimap2 - mapping and alignment between collections of DNA sequences
|
@@ -79,6 +79,19 @@ Minimizer k-mer length [15]
|
|
79
79
|
.BI -w \ INT
|
80
80
|
Minimizer window size [10]. A minimizer is the smallest k-mer
|
81
81
|
in a window of w consecutive k-mers.
|
82
|
+
.TP
|
83
|
+
.BI -j \ INT
|
84
|
+
Syncmer submer size [10]. Option
|
85
|
+
.B -j
|
86
|
+
and
|
87
|
+
.B -w
|
88
|
+
will override each: if
|
89
|
+
.B -w
|
90
|
+
is applied after
|
91
|
+
.BR -j ,
|
92
|
+
.B -j
|
93
|
+
will have no effect, and vice versa.
|
94
|
+
|
82
95
|
.TP
|
83
96
|
.B -H
|
84
97
|
Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
|
@@ -88,16 +101,17 @@ on the HPC sequence.
|
|
88
101
|
.BI -I \ NUM
|
89
102
|
Load at most
|
90
103
|
.I NUM
|
91
|
-
target bases into RAM for indexing [
|
104
|
+
target bases into RAM for indexing [8G]. If there are more than
|
92
105
|
.I NUM
|
93
106
|
bases in
|
94
107
|
.IR target.fa ,
|
95
108
|
minimap2 needs to read
|
96
109
|
.I query.fa
|
97
|
-
multiple times to map it against each batch of target sequences.
|
110
|
+
multiple times to map it against each batch of target sequences. This would create a multi-part index.
|
98
111
|
.I NUM
|
99
112
|
may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
|
100
|
-
multi-part index.
|
113
|
+
multi-part index. See also option
|
114
|
+
.BR --split-prefix .
|
101
115
|
.TP
|
102
116
|
.B --idx-no-seq
|
103
117
|
Don't store target sequences in the index. It saves disk space and memory but
|
@@ -587,7 +601,7 @@ Up to 20% sequence divergence.
|
|
587
601
|
.B splice
|
588
602
|
Long-read spliced alignment
|
589
603
|
.RB ( -k15
|
590
|
-
.B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -
|
604
|
+
.B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
|
591
605
|
.BR --splice-flank=yes ).
|
592
606
|
In the splice mode, 1) long deletions are taken as introns and represented as
|
593
607
|
the
|