minimap2 0.2.25.0 → 0.2.25.2

Sign up to get free protection for your applications and to get access to all the features.
data/ext/minimap2/main.c CHANGED
@@ -7,8 +7,6 @@
7
7
  #include "mmpriv.h"
8
8
  #include "ketopt.h"
9
9
 
10
- #define MM_VERSION "2.24-r1122"
11
-
12
10
  #ifdef __linux__
13
11
  #include <sys/resource.h>
14
12
  #include <sys/time.h>
@@ -78,6 +76,7 @@ static ko_longopt_t long_options[] = {
78
76
  { "chain-skip-scale",ko_required_argument,351 },
79
77
  { "print-chains", ko_no_argument, 352 },
80
78
  { "no-hash-name", ko_no_argument, 353 },
79
+ { "secondary-seq", ko_no_argument, 354 },
81
80
  { "help", ko_no_argument, 'h' },
82
81
  { "max-intron-len", ko_required_argument, 'G' },
83
82
  { "version", ko_no_argument, 'V' },
@@ -121,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
121
120
 
122
121
  int main(int argc, char *argv[])
123
122
  {
124
- const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
123
+ const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
125
124
  ketopt_t o = KETOPT_INIT;
126
125
  mm_mapopt_t opt;
127
126
  mm_idxopt_t ipt;
@@ -187,7 +186,12 @@ int main(int argc, char *argv[])
187
186
  else if (c == 'R') rg = o.arg;
188
187
  else if (c == 'h') fp_help = stdout;
189
188
  else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
190
- else if (c == 'o') {
189
+ else if (c == 'J') {
190
+ int t;
191
+ t = atoi(o.arg);
192
+ if (t == 0) opt.flag |= MM_F_SPLICE_OLD;
193
+ else if (t == 1) opt.flag &= ~MM_F_SPLICE_OLD;
194
+ } else if (c == 'o') {
191
195
  if (strcmp(o.arg, "-") != 0) {
192
196
  if (freopen(o.arg, "wb", stdout) == NULL) {
193
197
  fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
@@ -237,6 +241,7 @@ int main(int argc, char *argv[])
237
241
  else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
238
242
  else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
239
243
  else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
244
+ else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
240
245
  else if (c == 330) {
241
246
  fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
242
247
  } else if (c == 314) { // --frag
@@ -261,7 +266,8 @@ int main(int argc, char *argv[])
261
266
  } else if (c == 326) { // --dual
262
267
  yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
263
268
  } else if (c == 347) { // --rmq
264
- yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
269
+ if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
270
+ else opt.flag |= MM_F_RMQ;
265
271
  } else if (c == 'S') {
266
272
  opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
267
273
  if (mm_verbose >= 2)
@@ -322,7 +328,7 @@ int main(int argc, char *argv[])
322
328
  fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
323
329
  fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
324
330
  fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
325
- fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
331
+ fprintf(fp_help, " -I NUM split index for every ~NUM input bases [8G]\n");
326
332
  fprintf(fp_help, " -d FILE dump index to FILE []\n");
327
333
  fprintf(fp_help, " Mapping:\n");
328
334
  fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
@@ -344,6 +350,7 @@ int main(int argc, char *argv[])
344
350
  fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
345
351
  fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
346
352
  fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
353
+ fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
347
354
  fprintf(fp_help, " Input/Output:\n");
348
355
  fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
349
356
  fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
data/ext/minimap2/map.c CHANGED
@@ -10,11 +10,6 @@
10
10
  #include "bseq.h"
11
11
  #include "khash.h"
12
12
 
13
- struct mm_tbuf_s {
14
- void *km;
15
- int rep_len, frag_gap;
16
- };
17
-
18
13
  mm_tbuf_t *mm_tbuf_init(void)
19
14
  {
20
15
  mm_tbuf_t *b;
@@ -5,41 +5,45 @@
5
5
  #include <stdio.h>
6
6
  #include <sys/types.h>
7
7
 
8
- #define MM_F_NO_DIAG 0x001 // no exact diagonal hit
9
- #define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
10
- #define MM_F_CIGAR 0x004
11
- #define MM_F_OUT_SAM 0x008
12
- #define MM_F_NO_QUAL 0x010
13
- #define MM_F_OUT_CG 0x020
14
- #define MM_F_OUT_CS 0x040
15
- #define MM_F_SPLICE 0x080 // splice mode
16
- #define MM_F_SPLICE_FOR 0x100 // match GT-AG
17
- #define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
18
- #define MM_F_NO_LJOIN 0x400
19
- #define MM_F_OUT_CS_LONG 0x800
20
- #define MM_F_SR 0x1000
21
- #define MM_F_FRAG_MODE 0x2000
22
- #define MM_F_NO_PRINT_2ND 0x4000
23
- #define MM_F_2_IO_THREADS 0x8000
24
- #define MM_F_LONG_CIGAR 0x10000
25
- #define MM_F_INDEPEND_SEG 0x20000
26
- #define MM_F_SPLICE_FLANK 0x40000
27
- #define MM_F_SOFTCLIP 0x80000
28
- #define MM_F_FOR_ONLY 0x100000
29
- #define MM_F_REV_ONLY 0x200000
30
- #define MM_F_HEAP_SORT 0x400000
31
- #define MM_F_ALL_CHAINS 0x800000
32
- #define MM_F_OUT_MD 0x1000000
33
- #define MM_F_COPY_COMMENT 0x2000000
34
- #define MM_F_EQX 0x4000000 // use =/X instead of M
35
- #define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF
36
- #define MM_F_NO_END_FLT 0x10000000
37
- #define MM_F_HARD_MLEVEL 0x20000000
38
- #define MM_F_SAM_HIT_ONLY 0x40000000
8
+ #define MM_VERSION "2.25-r1173"
9
+
10
+ #define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
11
+ #define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
12
+ #define MM_F_CIGAR (0x004LL)
13
+ #define MM_F_OUT_SAM (0x008LL)
14
+ #define MM_F_NO_QUAL (0x010LL)
15
+ #define MM_F_OUT_CG (0x020LL)
16
+ #define MM_F_OUT_CS (0x040LL)
17
+ #define MM_F_SPLICE (0x080LL) // splice mode
18
+ #define MM_F_SPLICE_FOR (0x100LL) // match GT-AG
19
+ #define MM_F_SPLICE_REV (0x200LL) // match CT-AC, the reverse complement of GT-AG
20
+ #define MM_F_NO_LJOIN (0x400LL)
21
+ #define MM_F_OUT_CS_LONG (0x800LL)
22
+ #define MM_F_SR (0x1000LL)
23
+ #define MM_F_FRAG_MODE (0x2000LL)
24
+ #define MM_F_NO_PRINT_2ND (0x4000LL)
25
+ #define MM_F_2_IO_THREADS (0x8000LL)
26
+ #define MM_F_LONG_CIGAR (0x10000LL)
27
+ #define MM_F_INDEPEND_SEG (0x20000LL)
28
+ #define MM_F_SPLICE_FLANK (0x40000LL)
29
+ #define MM_F_SOFTCLIP (0x80000LL)
30
+ #define MM_F_FOR_ONLY (0x100000LL)
31
+ #define MM_F_REV_ONLY (0x200000LL)
32
+ #define MM_F_HEAP_SORT (0x400000LL)
33
+ #define MM_F_ALL_CHAINS (0x800000LL)
34
+ #define MM_F_OUT_MD (0x1000000LL)
35
+ #define MM_F_COPY_COMMENT (0x2000000LL)
36
+ #define MM_F_EQX (0x4000000LL) // use =/X instead of M
37
+ #define MM_F_PAF_NO_HIT (0x8000000LL) // output unmapped reads to PAF
38
+ #define MM_F_NO_END_FLT (0x10000000LL)
39
+ #define MM_F_HARD_MLEVEL (0x20000000LL)
40
+ #define MM_F_SAM_HIT_ONLY (0x40000000LL)
39
41
  #define MM_F_RMQ (0x80000000LL)
40
42
  #define MM_F_QSTRAND (0x100000000LL)
41
43
  #define MM_F_NO_INV (0x200000000LL)
42
44
  #define MM_F_NO_HASH_NAME (0x400000000LL)
45
+ #define MM_F_SPLICE_OLD (0x800000000LL)
46
+ #define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping
43
47
 
44
48
  #define MM_I_HPC 0x1
45
49
  #define MM_I_NO_SEQ 0x2
@@ -189,6 +193,11 @@ typedef struct {
189
193
  } mm_idx_reader_t;
190
194
 
191
195
  // memory buffer for thread-local storage during mapping
196
+ struct mm_tbuf_s {
197
+ void *km;
198
+ int rep_len, frag_gap;
199
+ };
200
+
192
201
  typedef struct mm_tbuf_s mm_tbuf_t;
193
202
 
194
203
  // global variables
@@ -1,4 +1,4 @@
1
- .TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
1
+ .TH minimap2 1 "25 April 2023" "minimap2-2.25 (r1173)" "Bioinformatics tools"
2
2
  .SH NAME
3
3
  .PP
4
4
  minimap2 - mapping and alignment between collections of DNA sequences
@@ -79,6 +79,19 @@ Minimizer k-mer length [15]
79
79
  .BI -w \ INT
80
80
  Minimizer window size [10]. A minimizer is the smallest k-mer
81
81
  in a window of w consecutive k-mers.
82
+ .TP
83
+ .BI -j \ INT
84
+ Syncmer submer size [10]. Option
85
+ .B -j
86
+ and
87
+ .B -w
88
+ will override each: if
89
+ .B -w
90
+ is applied after
91
+ .BR -j ,
92
+ .B -j
93
+ will have no effect, and vice versa.
94
+
82
95
  .TP
83
96
  .B -H
84
97
  Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
@@ -88,16 +101,17 @@ on the HPC sequence.
88
101
  .BI -I \ NUM
89
102
  Load at most
90
103
  .I NUM
91
- target bases into RAM for indexing [4G]. If there are more than
104
+ target bases into RAM for indexing [8G]. If there are more than
92
105
  .I NUM
93
106
  bases in
94
107
  .IR target.fa ,
95
108
  minimap2 needs to read
96
109
  .I query.fa
97
- multiple times to map it against each batch of target sequences.
110
+ multiple times to map it against each batch of target sequences. This would create a multi-part index.
98
111
  .I NUM
99
112
  may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
100
- multi-part index.
113
+ multi-part index. See also option
114
+ .BR --split-prefix .
101
115
  .TP
102
116
  .B --idx-no-seq
103
117
  Don't store target sequences in the index. It saves disk space and memory but
@@ -587,7 +601,7 @@ Up to 20% sequence divergence.
587
601
  .B splice
588
602
  Long-read spliced alignment
589
603
  .RB ( -k15
590
- .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -b0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
604
+ .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
591
605
  .BR --splice-flank=yes ).
592
606
  In the splice mode, 1) long deletions are taken as introns and represented as
593
607
  the