minimap2 0.2.24.6 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,41 +5,45 @@
5
5
  #include <stdio.h>
6
6
  #include <sys/types.h>
7
7
 
8
- #define MM_F_NO_DIAG 0x001 // no exact diagonal hit
9
- #define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
10
- #define MM_F_CIGAR 0x004
11
- #define MM_F_OUT_SAM 0x008
12
- #define MM_F_NO_QUAL 0x010
13
- #define MM_F_OUT_CG 0x020
14
- #define MM_F_OUT_CS 0x040
15
- #define MM_F_SPLICE 0x080 // splice mode
16
- #define MM_F_SPLICE_FOR 0x100 // match GT-AG
17
- #define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
18
- #define MM_F_NO_LJOIN 0x400
19
- #define MM_F_OUT_CS_LONG 0x800
20
- #define MM_F_SR 0x1000
21
- #define MM_F_FRAG_MODE 0x2000
22
- #define MM_F_NO_PRINT_2ND 0x4000
23
- #define MM_F_2_IO_THREADS 0x8000
24
- #define MM_F_LONG_CIGAR 0x10000
25
- #define MM_F_INDEPEND_SEG 0x20000
26
- #define MM_F_SPLICE_FLANK 0x40000
27
- #define MM_F_SOFTCLIP 0x80000
28
- #define MM_F_FOR_ONLY 0x100000
29
- #define MM_F_REV_ONLY 0x200000
30
- #define MM_F_HEAP_SORT 0x400000
31
- #define MM_F_ALL_CHAINS 0x800000
32
- #define MM_F_OUT_MD 0x1000000
33
- #define MM_F_COPY_COMMENT 0x2000000
34
- #define MM_F_EQX 0x4000000 // use =/X instead of M
35
- #define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF
36
- #define MM_F_NO_END_FLT 0x10000000
37
- #define MM_F_HARD_MLEVEL 0x20000000
38
- #define MM_F_SAM_HIT_ONLY 0x40000000
8
+ #define MM_VERSION "2.25-r1173"
9
+
10
+ #define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
11
+ #define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
12
+ #define MM_F_CIGAR (0x004LL)
13
+ #define MM_F_OUT_SAM (0x008LL)
14
+ #define MM_F_NO_QUAL (0x010LL)
15
+ #define MM_F_OUT_CG (0x020LL)
16
+ #define MM_F_OUT_CS (0x040LL)
17
+ #define MM_F_SPLICE (0x080LL) // splice mode
18
+ #define MM_F_SPLICE_FOR (0x100LL) // match GT-AG
19
+ #define MM_F_SPLICE_REV (0x200LL) // match CT-AC, the reverse complement of GT-AG
20
+ #define MM_F_NO_LJOIN (0x400LL)
21
+ #define MM_F_OUT_CS_LONG (0x800LL)
22
+ #define MM_F_SR (0x1000LL)
23
+ #define MM_F_FRAG_MODE (0x2000LL)
24
+ #define MM_F_NO_PRINT_2ND (0x4000LL)
25
+ #define MM_F_2_IO_THREADS (0x8000LL)
26
+ #define MM_F_LONG_CIGAR (0x10000LL)
27
+ #define MM_F_INDEPEND_SEG (0x20000LL)
28
+ #define MM_F_SPLICE_FLANK (0x40000LL)
29
+ #define MM_F_SOFTCLIP (0x80000LL)
30
+ #define MM_F_FOR_ONLY (0x100000LL)
31
+ #define MM_F_REV_ONLY (0x200000LL)
32
+ #define MM_F_HEAP_SORT (0x400000LL)
33
+ #define MM_F_ALL_CHAINS (0x800000LL)
34
+ #define MM_F_OUT_MD (0x1000000LL)
35
+ #define MM_F_COPY_COMMENT (0x2000000LL)
36
+ #define MM_F_EQX (0x4000000LL) // use =/X instead of M
37
+ #define MM_F_PAF_NO_HIT (0x8000000LL) // output unmapped reads to PAF
38
+ #define MM_F_NO_END_FLT (0x10000000LL)
39
+ #define MM_F_HARD_MLEVEL (0x20000000LL)
40
+ #define MM_F_SAM_HIT_ONLY (0x40000000LL)
39
41
  #define MM_F_RMQ (0x80000000LL)
40
42
  #define MM_F_QSTRAND (0x100000000LL)
41
43
  #define MM_F_NO_INV (0x200000000LL)
42
44
  #define MM_F_NO_HASH_NAME (0x400000000LL)
45
+ #define MM_F_SPLICE_OLD (0x800000000LL)
46
+ #define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping
43
47
 
44
48
  #define MM_I_HPC 0x1
45
49
  #define MM_I_NO_SEQ 0x2
@@ -189,6 +193,11 @@ typedef struct {
189
193
  } mm_idx_reader_t;
190
194
 
191
195
  // memory buffer for thread-local storage during mapping
196
+ struct mm_tbuf_s {
197
+ void *km;
198
+ int rep_len, frag_gap;
199
+ };
200
+
192
201
  typedef struct mm_tbuf_s mm_tbuf_t;
193
202
 
194
203
  // global variables
@@ -1,4 +1,4 @@
1
- .TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
1
+ .TH minimap2 1 "25 April 2023" "minimap2-2.25 (r1173)" "Bioinformatics tools"
2
2
  .SH NAME
3
3
  .PP
4
4
  minimap2 - mapping and alignment between collections of DNA sequences
@@ -79,6 +79,19 @@ Minimizer k-mer length [15]
79
79
  .BI -w \ INT
80
80
  Minimizer window size [10]. A minimizer is the smallest k-mer
81
81
  in a window of w consecutive k-mers.
82
+ .TP
83
+ .BI -j \ INT
84
+ Syncmer submer size [10]. Option
85
+ .B -j
86
+ and
87
+ .B -w
88
+ will override each: if
89
+ .B -w
90
+ is applied after
91
+ .BR -j ,
92
+ .B -j
93
+ will have no effect, and vice versa.
94
+
82
95
  .TP
83
96
  .B -H
84
97
  Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
@@ -88,16 +101,17 @@ on the HPC sequence.
88
101
  .BI -I \ NUM
89
102
  Load at most
90
103
  .I NUM
91
- target bases into RAM for indexing [4G]. If there are more than
104
+ target bases into RAM for indexing [8G]. If there are more than
92
105
  .I NUM
93
106
  bases in
94
107
  .IR target.fa ,
95
108
  minimap2 needs to read
96
109
  .I query.fa
97
- multiple times to map it against each batch of target sequences.
110
+ multiple times to map it against each batch of target sequences. This would create a multi-part index.
98
111
  .I NUM
99
112
  may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
100
- multi-part index.
113
+ multi-part index. See also option
114
+ .BR --split-prefix .
101
115
  .TP
102
116
  .B --idx-no-seq
103
117
  Don't store target sequences in the index. It saves disk space and memory but
@@ -587,7 +601,7 @@ Up to 20% sequence divergence.
587
601
  .B splice
588
602
  Long-read spliced alignment
589
603
  .RB ( -k15
590
- .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -b0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
604
+ .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
591
605
  .BR --splice-flank=yes ).
592
606
  In the splice mode, 1) long deletions are taken as introns and represented as
593
607
  the