minimap2 0.2.24.6 → 0.2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,41 +5,45 @@
5
5
  #include <stdio.h>
6
6
  #include <sys/types.h>
7
7
 
8
- #define MM_F_NO_DIAG 0x001 // no exact diagonal hit
9
- #define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
10
- #define MM_F_CIGAR 0x004
11
- #define MM_F_OUT_SAM 0x008
12
- #define MM_F_NO_QUAL 0x010
13
- #define MM_F_OUT_CG 0x020
14
- #define MM_F_OUT_CS 0x040
15
- #define MM_F_SPLICE 0x080 // splice mode
16
- #define MM_F_SPLICE_FOR 0x100 // match GT-AG
17
- #define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
18
- #define MM_F_NO_LJOIN 0x400
19
- #define MM_F_OUT_CS_LONG 0x800
20
- #define MM_F_SR 0x1000
21
- #define MM_F_FRAG_MODE 0x2000
22
- #define MM_F_NO_PRINT_2ND 0x4000
23
- #define MM_F_2_IO_THREADS 0x8000
24
- #define MM_F_LONG_CIGAR 0x10000
25
- #define MM_F_INDEPEND_SEG 0x20000
26
- #define MM_F_SPLICE_FLANK 0x40000
27
- #define MM_F_SOFTCLIP 0x80000
28
- #define MM_F_FOR_ONLY 0x100000
29
- #define MM_F_REV_ONLY 0x200000
30
- #define MM_F_HEAP_SORT 0x400000
31
- #define MM_F_ALL_CHAINS 0x800000
32
- #define MM_F_OUT_MD 0x1000000
33
- #define MM_F_COPY_COMMENT 0x2000000
34
- #define MM_F_EQX 0x4000000 // use =/X instead of M
35
- #define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF
36
- #define MM_F_NO_END_FLT 0x10000000
37
- #define MM_F_HARD_MLEVEL 0x20000000
38
- #define MM_F_SAM_HIT_ONLY 0x40000000
8
+ #define MM_VERSION "2.25-r1173"
9
+
10
+ #define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
11
+ #define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
12
+ #define MM_F_CIGAR (0x004LL)
13
+ #define MM_F_OUT_SAM (0x008LL)
14
+ #define MM_F_NO_QUAL (0x010LL)
15
+ #define MM_F_OUT_CG (0x020LL)
16
+ #define MM_F_OUT_CS (0x040LL)
17
+ #define MM_F_SPLICE (0x080LL) // splice mode
18
+ #define MM_F_SPLICE_FOR (0x100LL) // match GT-AG
19
+ #define MM_F_SPLICE_REV (0x200LL) // match CT-AC, the reverse complement of GT-AG
20
+ #define MM_F_NO_LJOIN (0x400LL)
21
+ #define MM_F_OUT_CS_LONG (0x800LL)
22
+ #define MM_F_SR (0x1000LL)
23
+ #define MM_F_FRAG_MODE (0x2000LL)
24
+ #define MM_F_NO_PRINT_2ND (0x4000LL)
25
+ #define MM_F_2_IO_THREADS (0x8000LL)
26
+ #define MM_F_LONG_CIGAR (0x10000LL)
27
+ #define MM_F_INDEPEND_SEG (0x20000LL)
28
+ #define MM_F_SPLICE_FLANK (0x40000LL)
29
+ #define MM_F_SOFTCLIP (0x80000LL)
30
+ #define MM_F_FOR_ONLY (0x100000LL)
31
+ #define MM_F_REV_ONLY (0x200000LL)
32
+ #define MM_F_HEAP_SORT (0x400000LL)
33
+ #define MM_F_ALL_CHAINS (0x800000LL)
34
+ #define MM_F_OUT_MD (0x1000000LL)
35
+ #define MM_F_COPY_COMMENT (0x2000000LL)
36
+ #define MM_F_EQX (0x4000000LL) // use =/X instead of M
37
+ #define MM_F_PAF_NO_HIT (0x8000000LL) // output unmapped reads to PAF
38
+ #define MM_F_NO_END_FLT (0x10000000LL)
39
+ #define MM_F_HARD_MLEVEL (0x20000000LL)
40
+ #define MM_F_SAM_HIT_ONLY (0x40000000LL)
39
41
  #define MM_F_RMQ (0x80000000LL)
40
42
  #define MM_F_QSTRAND (0x100000000LL)
41
43
  #define MM_F_NO_INV (0x200000000LL)
42
44
  #define MM_F_NO_HASH_NAME (0x400000000LL)
45
+ #define MM_F_SPLICE_OLD (0x800000000LL)
46
+ #define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping
43
47
 
44
48
  #define MM_I_HPC 0x1
45
49
  #define MM_I_NO_SEQ 0x2
@@ -189,6 +193,11 @@ typedef struct {
189
193
  } mm_idx_reader_t;
190
194
 
191
195
  // memory buffer for thread-local storage during mapping
196
+ struct mm_tbuf_s {
197
+ void *km;
198
+ int rep_len, frag_gap;
199
+ };
200
+
192
201
  typedef struct mm_tbuf_s mm_tbuf_t;
193
202
 
194
203
  // global variables
@@ -1,4 +1,4 @@
1
- .TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
1
+ .TH minimap2 1 "25 April 2023" "minimap2-2.25 (r1173)" "Bioinformatics tools"
2
2
  .SH NAME
3
3
  .PP
4
4
  minimap2 - mapping and alignment between collections of DNA sequences
@@ -79,6 +79,19 @@ Minimizer k-mer length [15]
79
79
  .BI -w \ INT
80
80
  Minimizer window size [10]. A minimizer is the smallest k-mer
81
81
  in a window of w consecutive k-mers.
82
+ .TP
83
+ .BI -j \ INT
84
+ Syncmer submer size [10]. Option
85
+ .B -j
86
+ and
87
+ .B -w
88
+ will override each: if
89
+ .B -w
90
+ is applied after
91
+ .BR -j ,
92
+ .B -j
93
+ will have no effect, and vice versa.
94
+
82
95
  .TP
83
96
  .B -H
84
97
  Use homopolymer-compressed (HPC) minimizers. An HPC sequence is constructed by
@@ -88,16 +101,17 @@ on the HPC sequence.
88
101
  .BI -I \ NUM
89
102
  Load at most
90
103
  .I NUM
91
- target bases into RAM for indexing [4G]. If there are more than
104
+ target bases into RAM for indexing [8G]. If there are more than
92
105
  .I NUM
93
106
  bases in
94
107
  .IR target.fa ,
95
108
  minimap2 needs to read
96
109
  .I query.fa
97
- multiple times to map it against each batch of target sequences.
110
+ multiple times to map it against each batch of target sequences. This would create a multi-part index.
98
111
  .I NUM
99
112
  may be ending with k/K/m/M/g/G. NB: mapping quality is incorrect given a
100
- multi-part index.
113
+ multi-part index. See also option
114
+ .BR --split-prefix .
101
115
  .TP
102
116
  .B --idx-no-seq
103
117
  Don't store target sequences in the index. It saves disk space and memory but
@@ -587,7 +601,7 @@ Up to 20% sequence divergence.
587
601
  .B splice
588
602
  Long-read spliced alignment
589
603
  .RB ( -k15
590
- .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -b0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
604
+ .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
591
605
  .BR --splice-flank=yes ).
592
606
  In the splice mode, 1) long deletions are taken as introns and represented as
593
607
  the