minimap2 0.2.27.0 → 0.2.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/cmappy/cmappy.c +3 -3
- data/ext/cmappy/cmappy.h +1 -1
- data/ext/minimap2/FAQ.md +1 -1
- data/ext/minimap2/Makefile +4 -3
- data/ext/minimap2/NEWS.md +68 -0
- data/ext/minimap2/README.md +30 -14
- data/ext/minimap2/align.c +136 -52
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +59 -5
- data/ext/minimap2/hit.c +14 -6
- data/ext/minimap2/index.c +304 -13
- data/ext/minimap2/jump.c +201 -0
- data/ext/minimap2/kalloc.h +8 -0
- data/ext/minimap2/ksw2.h +5 -2
- data/ext/minimap2/ksw2_dispatch.c +5 -5
- data/ext/minimap2/ksw2_exts2_sse.c +17 -6
- data/ext/minimap2/lchain.c +5 -5
- data/ext/minimap2/main.c +64 -12
- data/ext/minimap2/map.c +35 -8
- data/ext/minimap2/minimap.h +14 -3
- data/ext/minimap2/minimap2.1 +98 -46
- data/ext/minimap2/misc/README.md +2 -1
- data/ext/minimap2/misc/pafcluster.js +241 -0
- data/ext/minimap2/misc/paftools.js +17 -6
- data/ext/minimap2/mmpriv.h +25 -4
- data/ext/minimap2/options.c +36 -3
- data/ext/minimap2/python/cmappy.h +3 -3
- data/ext/minimap2/python/cmappy.pxd +5 -2
- data/ext/minimap2/python/mappy.pyx +20 -7
- data/ext/minimap2/python/minimap2.py +5 -3
- data/ext/minimap2/seed.c +2 -1
- data/ext/minimap2/setup.py +2 -2
- data/ext/minimap2.patch +2 -2
- data/lib/minimap2/aligner.rb +19 -12
- data/lib/minimap2/alignment.rb +1 -0
- data/lib/minimap2/ffi/constants.rb +10 -2
- data/lib/minimap2/ffi/functions.rb +145 -6
- data/lib/minimap2/ffi/mappy.rb +1 -1
- data/lib/minimap2/version.rb +1 -1
- data/lib/minimap2.rb +2 -2
- metadata +8 -7
- data/ext/minimap2/misc/mmphase.js +0 -335
data/ext/minimap2/jump.c
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "mmpriv.h"
|
3
|
+
#include "kalloc.h"
|
4
|
+
|
5
|
+
#define MM_MIN_EXON_LEN 20
|
6
|
+
|
7
|
+
static int32_t mm_jump_check(void *km, const mm_idx_t *mi, int32_t qlen, const uint8_t *qseq0, const mm_reg1_t *r, int32_t ext, int32_t is_left) // TODO: check close N
|
8
|
+
{
|
9
|
+
int32_t clip, clen, e = !r->rev ^ !is_left; // 0 for left of the alignment; 1 for right
|
10
|
+
uint32_t cigar;
|
11
|
+
if (!r->p || r->p->n_cigar <= 0) return -1; // only working with CIGAR
|
12
|
+
clip = e == 0? r->qs : qlen - r->qe;
|
13
|
+
cigar = r->p->cigar[is_left? 0 : r->p->n_cigar - 1];
|
14
|
+
clen = (cigar&0xf) == MM_CIGAR_MATCH? cigar>>4 : 0;
|
15
|
+
if (clen <= ext) return -1;
|
16
|
+
if (is_left) {
|
17
|
+
if (clip >= r->rs) return -1; // no space to jump
|
18
|
+
} else {
|
19
|
+
if (clip >= mi->seq[r->rid].len - r->re) return -1; // no space to jump
|
20
|
+
}
|
21
|
+
return 0;
|
22
|
+
}
|
23
|
+
|
24
|
+
static uint8_t *mm_jump_get_qseq_seq(void *km, int32_t qlen, const uint8_t *qseq0, const mm_reg1_t *r, int32_t is_left, int32_t ql0, uint8_t *qseq)
|
25
|
+
{
|
26
|
+
extern unsigned char seq_nt4_table[256];
|
27
|
+
int32_t i, k = 0;
|
28
|
+
if (!r->rev) {
|
29
|
+
if (is_left)
|
30
|
+
for (i = 0; i < ql0; ++i)
|
31
|
+
qseq[k++] = seq_nt4_table[(uint8_t)qseq0[i]];
|
32
|
+
else
|
33
|
+
for (i = qlen - ql0; i < qlen; ++i)
|
34
|
+
qseq[k++] = seq_nt4_table[(uint8_t)qseq0[i]];
|
35
|
+
} else {
|
36
|
+
if (is_left)
|
37
|
+
for (i = qlen - 1; i >= qlen - ql0; --i) {
|
38
|
+
uint8_t c = seq_nt4_table[(uint8_t)qseq0[i]];
|
39
|
+
qseq[k++] = c >= 4? c : 3 - c;
|
40
|
+
}
|
41
|
+
else
|
42
|
+
for (i = ql0 - 1; i >= 0; --i) {
|
43
|
+
uint8_t c = seq_nt4_table[(uint8_t)qseq0[i]];
|
44
|
+
qseq[k++] = c >= 4? c : 3 - c;
|
45
|
+
}
|
46
|
+
}
|
47
|
+
return qseq;
|
48
|
+
}
|
49
|
+
|
50
|
+
static void mm_jump_split_left(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq0, mm_reg1_t *r, int32_t ts_strand)
|
51
|
+
{
|
52
|
+
uint8_t *tseq = 0, *qseq = 0;
|
53
|
+
int32_t i, n, l, i0, m, mm0;
|
54
|
+
int32_t i0_anno = -1, n_anno = 0, mm0_anno = 0, i0_misc = -1, n_misc = 0, mm0_misc = 0;
|
55
|
+
int32_t ext = 1 + (opt->b + opt->a - 1) / opt->a + 1;
|
56
|
+
int32_t clip = !r->rev? r->qs : qlen - r->qe;
|
57
|
+
int32_t extt = clip < ext? clip : ext;
|
58
|
+
const mm_idx_jjump1_t *a;
|
59
|
+
|
60
|
+
if (mm_jump_check(km, mi, qlen, qseq0, r, ext + MM_MIN_EXON_LEN, 1) < 0) return;
|
61
|
+
a = mm_idx_jump_get(mi, r->rid, r->rs - extt, r->rs + ext, &n);
|
62
|
+
if (n == 0) return;
|
63
|
+
|
64
|
+
for (i = 0; i < n; ++i) { // traverse possible jumps
|
65
|
+
const mm_idx_jjump1_t *ai = &a[i];
|
66
|
+
int32_t tlen, tl1, j, mm1, mm2;
|
67
|
+
assert(ai->off >= r->rs - extt && ai->off <= r->rs + ext);
|
68
|
+
if (ts_strand * ai->strand < 0) continue; // wrong strand
|
69
|
+
if (ai->off2 >= ai->off) continue; // wrong direction
|
70
|
+
if (ai->off - ai->off2 < 6) continue; // intron too small
|
71
|
+
if (ai->off2 < clip + ext) continue; // not long enough
|
72
|
+
if (tseq == 0) {
|
73
|
+
tseq = Kcalloc(km, uint8_t, (clip + ext) * 2); // tseq and qseq are allocated together
|
74
|
+
qseq = tseq + clip + ext;
|
75
|
+
mm_jump_get_qseq_seq(km, qlen, qseq0, r, 1, clip + ext, qseq);
|
76
|
+
}
|
77
|
+
tl1 = clip + (ai->off - r->rs);
|
78
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off, r->rs + ext, &tseq[tl1]);
|
79
|
+
assert(tlen == r->rs + ext - ai->off);
|
80
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off2 - tl1, ai->off2, tseq);
|
81
|
+
assert(tlen == tl1);
|
82
|
+
for (j = 0, mm1 = 0; j < tl1; ++j)
|
83
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
84
|
+
++mm1;
|
85
|
+
for (mm2 = 0; j < clip + ext; ++j)
|
86
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
87
|
+
++mm2;
|
88
|
+
if (mm1 == 0 && mm2 <= 1) {
|
89
|
+
if (ai->flag & MM_JUNC_ANNO)
|
90
|
+
i0_anno = i, mm0_anno = mm1 + mm2, ++n_anno; // i0 points to the rightmost i
|
91
|
+
else
|
92
|
+
i0_misc = i, mm0_misc = mm1 + mm2, ++n_misc;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
if (n_anno > 0) m = n_anno, i0 = i0_anno, mm0 = mm0_anno;
|
96
|
+
else m = n_misc, i0 = i0_misc, mm0 = mm0_misc;
|
97
|
+
kfree(km, tseq);
|
98
|
+
|
99
|
+
l = m > 0? a[i0].off - r->rs : 0; // may be negative
|
100
|
+
if (m == 1 && clip + l >= opt->jump_min_match) { // add one more exon
|
101
|
+
mm_enlarge_cigar(r, 2);
|
102
|
+
memmove(r->p->cigar + 2, r->p->cigar, r->p->n_cigar * 4);
|
103
|
+
r->p->cigar[0] = (clip + l) << 4 | MM_CIGAR_MATCH;
|
104
|
+
r->p->cigar[1] = (a[i0].off - a[i0].off2) << 4 | MM_CIGAR_N_SKIP;
|
105
|
+
r->p->cigar[2] = ((r->p->cigar[2]>>4) - l) << 4 | MM_CIGAR_MATCH;
|
106
|
+
r->p->n_cigar += 2;
|
107
|
+
r->rs = a[i0].off2 - (clip + l);
|
108
|
+
if (!r->rev) r->qs = 0;
|
109
|
+
else r->qe = qlen;
|
110
|
+
r->blen += clip, r->mlen += clip - mm0;
|
111
|
+
r->p->dp_max0 += (clip - mm0) * opt->a - mm0 * opt->b;
|
112
|
+
r->p->dp_max += (clip - mm0) * opt->a - mm0 * opt->b;
|
113
|
+
if (!r->is_spliced) r->is_spliced = 1, r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
|
114
|
+
} else if (m > 0 && a[i0].off > r->rs) { // trim by l; l is always positive
|
115
|
+
r->p->cigar[0] -= l << 4 | MM_CIGAR_MATCH;
|
116
|
+
r->rs += l;
|
117
|
+
if (!r->rev) r->qs += l;
|
118
|
+
else r->qe -= l;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
static void mm_jump_split_right(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq0, mm_reg1_t *r, int32_t ts_strand)
|
123
|
+
{
|
124
|
+
uint8_t *tseq = 0, *qseq = 0;
|
125
|
+
int32_t i, n, l, i0, m, mm0;
|
126
|
+
int32_t i0_anno = -1, n_anno = 0, mm0_anno = 0, i0_misc = -1, n_misc = 0, mm0_misc = 0;
|
127
|
+
int32_t ext = 1 + (opt->b + opt->a - 1) / opt->a + 1;
|
128
|
+
int32_t clip = !r->rev? qlen - r->qe : r->qs;
|
129
|
+
int32_t extt = clip < ext? clip : ext;
|
130
|
+
const mm_idx_jjump1_t *a;
|
131
|
+
|
132
|
+
if (mm_jump_check(km, mi, qlen, qseq0, r, ext + MM_MIN_EXON_LEN, 0) < 0) return;
|
133
|
+
a = mm_idx_jump_get(mi, r->rid, r->re - ext, r->re + extt, &n);
|
134
|
+
if (n == 0) return;
|
135
|
+
|
136
|
+
for (i = 0; i < n; ++i) { // traverse possible jumps
|
137
|
+
const mm_idx_jjump1_t *ai = &a[i];
|
138
|
+
int32_t tlen, tl1, j, mm1, mm2;
|
139
|
+
assert(ai->off >= r->re - ext && ai->off <= r->re + extt);
|
140
|
+
if (ts_strand * ai->strand < 0) continue; // wrong strand
|
141
|
+
if (ai->off2 <= ai->off) continue; // wrong direction
|
142
|
+
if (ai->off2 - ai->off < 6) continue; // intron too small
|
143
|
+
if (ai->off2 + clip + ext > mi->seq[r->rid].len) continue; // not long enough
|
144
|
+
if (tseq == 0) {
|
145
|
+
tseq = Kcalloc(km, uint8_t, (clip + ext) * 2); // tseq and qseq are allocated together
|
146
|
+
qseq = tseq + clip + ext;
|
147
|
+
mm_jump_get_qseq_seq(km, qlen, qseq0, r, 0, clip + ext, qseq);
|
148
|
+
}
|
149
|
+
tl1 = clip + (r->re - ai->off);
|
150
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, r->re - ext, ai->off, tseq);
|
151
|
+
assert(tlen == ai->off - (r->re - ext));
|
152
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off2, ai->off2 + tl1, &tseq[clip + ext - tl1]);
|
153
|
+
assert(tlen == tl1);
|
154
|
+
for (j = 0, mm2 = 0; j < clip + ext - tl1; ++j)
|
155
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
156
|
+
++mm2;
|
157
|
+
for (mm1 = 0; j < clip + ext; ++j)
|
158
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
159
|
+
++mm1;
|
160
|
+
if (mm1 == 0 && mm2 <= 1) {
|
161
|
+
if (ai->flag & MM_JUNC_ANNO) {
|
162
|
+
if (i0_anno < 0) i0_anno = i, mm0_anno = mm1 + mm2;
|
163
|
+
++n_anno;
|
164
|
+
} else {
|
165
|
+
if (i0_misc < 0) i0_misc = i, mm0_misc = mm1 + mm2;
|
166
|
+
++n_misc;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
}
|
170
|
+
if (n_anno > 0) m = n_anno, i0 = i0_anno, mm0 = mm0_anno;
|
171
|
+
else m = n_misc, i0 = i0_misc, mm0 = mm0_misc;
|
172
|
+
kfree(km, tseq);
|
173
|
+
|
174
|
+
l = m > 0? r->re - a[i0].off : 0; // may be negative
|
175
|
+
if (m == 1 && clip + l >= opt->jump_min_match) { // add one more exon
|
176
|
+
mm_enlarge_cigar(r, 2);
|
177
|
+
r->p->cigar[r->p->n_cigar - 1] = ((r->p->cigar[r->p->n_cigar - 1]>>4) - l) << 4 | MM_CIGAR_MATCH;
|
178
|
+
r->p->cigar[r->p->n_cigar] = (a[i0].off2 - a[i0].off) << 4 | MM_CIGAR_N_SKIP;
|
179
|
+
r->p->cigar[r->p->n_cigar + 1] = (clip + l) << 4 | MM_CIGAR_MATCH;
|
180
|
+
r->p->n_cigar += 2;
|
181
|
+
r->re = a[i0].off2 + (clip + l);
|
182
|
+
if (!r->rev) r->qe = qlen;
|
183
|
+
else r->qs = 0;
|
184
|
+
r->blen += clip, r->mlen += clip - mm0;
|
185
|
+
r->p->dp_max0 += (clip - mm0) * opt->a - mm0 * opt->b;
|
186
|
+
r->p->dp_max += (clip - mm0) * opt->a - mm0 * opt->b;
|
187
|
+
if (!r->is_spliced) r->is_spliced = 1, r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
|
188
|
+
} else if (m > 0 && r->re > a[i0].off) { // trim by l; l is always positive
|
189
|
+
r->p->cigar[r->p->n_cigar - 1] -= l << 4 | MM_CIGAR_MATCH;
|
190
|
+
r->re -= l;
|
191
|
+
if (!r->rev) r->qe -= l;
|
192
|
+
else r->qs += l;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
void mm_jump_split(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq, mm_reg1_t *r, int32_t ts_strand)
|
197
|
+
{
|
198
|
+
assert((opt->flag & MM_F_EQX) == 0);
|
199
|
+
mm_jump_split_left(km, mi, opt, qlen, qseq, r, ts_strand);
|
200
|
+
mm_jump_split_right(km, mi, opt, qlen, qseq, r, ts_strand);
|
201
|
+
}
|
data/ext/minimap2/kalloc.h
CHANGED
@@ -31,6 +31,14 @@ void km_stat_print(const void *km);
|
|
31
31
|
#define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
|
32
32
|
#define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
|
33
33
|
|
34
|
+
#define Kgrow(km, type, ptr, __i, __m) do { \
|
35
|
+
if ((__i) >= (__m)) { \
|
36
|
+
(__m) = (__i) + 1; \
|
37
|
+
(__m) += ((__m)>>1) + 16; \
|
38
|
+
(ptr) = Krealloc(km, type, ptr, (__m)); \
|
39
|
+
} \
|
40
|
+
} while (0)
|
41
|
+
|
34
42
|
#define Kexpand(km, type, a, m) do { \
|
35
43
|
(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
|
36
44
|
(a) = Krealloc(km, type, (a), (m)); \
|
data/ext/minimap2/ksw2.h
CHANGED
@@ -15,7 +15,8 @@
|
|
15
15
|
#define KSW_EZ_SPLICE_FOR 0x100
|
16
16
|
#define KSW_EZ_SPLICE_REV 0x200
|
17
17
|
#define KSW_EZ_SPLICE_FLANK 0x400
|
18
|
-
#define KSW_EZ_SPLICE_CMPLX 0x800
|
18
|
+
#define KSW_EZ_SPLICE_CMPLX 0x800 // use the miniprot splice model
|
19
|
+
#define KSW_EZ_SPLICE_SCORE 0x1000 // use splice score
|
19
20
|
|
20
21
|
// The subset of CIGAR operators used by ksw code.
|
21
22
|
// Use MM_CIGAR_* from minimap.h if you need the full list.
|
@@ -24,6 +25,8 @@
|
|
24
25
|
#define KSW_CIGAR_DEL 2
|
25
26
|
#define KSW_CIGAR_N_SKIP 3
|
26
27
|
|
28
|
+
#define KSW_SPSC_OFFSET 64
|
29
|
+
|
27
30
|
#ifdef __cplusplus
|
28
31
|
extern "C" {
|
29
32
|
#endif
|
@@ -69,7 +72,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
69
72
|
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
|
70
73
|
|
71
74
|
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
72
|
-
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
75
|
+
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
73
76
|
|
74
77
|
void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez);
|
75
78
|
|
@@ -80,17 +80,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
80
80
|
}
|
81
81
|
|
82
82
|
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
83
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
83
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
84
84
|
{
|
85
85
|
extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
86
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
86
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
87
87
|
extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
88
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
88
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
89
89
|
if (ksw_simd < 0) ksw_simd = x86_simd();
|
90
90
|
if (ksw_simd & SIMD_SSE4_1)
|
91
|
-
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
|
91
|
+
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, end_bonus, junc_bonus, junc_pen, flag, junc, ez);
|
92
92
|
else if (ksw_simd & SIMD_SSE2)
|
93
|
-
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
|
93
|
+
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, end_bonus, junc_bonus, junc_pen, flag, junc, ez);
|
94
94
|
else abort();
|
95
95
|
}
|
96
96
|
#endif
|
@@ -24,14 +24,14 @@
|
|
24
24
|
#ifdef KSW_CPU_DISPATCH
|
25
25
|
#ifdef __SSE4_1__
|
26
26
|
void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
27
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
27
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
28
28
|
#else
|
29
29
|
void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
30
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
30
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
31
31
|
#endif
|
32
32
|
#else
|
33
33
|
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
34
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
34
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
35
35
|
#endif // ~KSW_CPU_DISPATCH
|
36
36
|
{
|
37
37
|
#define __dp_code_block1 \
|
@@ -191,7 +191,14 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
191
191
|
}
|
192
192
|
}
|
193
193
|
|
194
|
-
if (junc) {
|
194
|
+
if (junc && (flag & KSW_EZ_SPLICE_SCORE)) { // junc[] keeps the donor score
|
195
|
+
uint8_t donor_val = !!(flag & KSW_EZ_SPLICE_FOR) == !(flag & KSW_EZ_REV_CIGAR)? 0 : 1;
|
196
|
+
for (t = 0; t < tlen - 1; ++t)
|
197
|
+
((int8_t*)donor)[t] += junc[t+1] == 0xff || (junc[t+1]&1) != donor_val? -junc_pen : (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET;
|
198
|
+
for (t = 0; t < tlen - 1; ++t)
|
199
|
+
((int8_t*)acceptor)[t] += junc[t+1] == 0xff || (junc[t+1]&1) != !donor_val? -junc_pen : (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET;
|
200
|
+
//for (t = 0; t < tlen - 1; ++t) if (junc[t+1] != 0xff) fprintf(stderr, "Y2\t%d\t%d\t%c\t%d\n", ((int8_t*)donor)[t], ((int8_t*)acceptor)[t], "DA"[junc[t+1]&1], (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET);
|
201
|
+
} else if (junc) { // junc[] keeps the splice sites
|
195
202
|
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
196
203
|
for (t = 0; t < tlen - 1; ++t)
|
197
204
|
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
|
@@ -445,10 +452,14 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
445
452
|
if (!approx_max) kfree(km, H);
|
446
453
|
if (with_cigar) { // backtrack
|
447
454
|
int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
|
448
|
-
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
|
455
|
+
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) {
|
449
456
|
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
450
|
-
else if (ez->
|
457
|
+
} else if (!ez->zdropped && (flag&KSW_EZ_EXTZ_ONLY) && ez->mqe + end_bonus > (int)ez->max) {
|
458
|
+
ez->reach_end = 1;
|
459
|
+
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->mqe_t, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
460
|
+
} else if (ez->max_t >= 0 && ez->max_q >= 0) {
|
451
461
|
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
462
|
+
}
|
452
463
|
kfree(km, mem2); kfree(km, off);
|
453
464
|
}
|
454
465
|
}
|
data/ext/minimap2/lchain.c
CHANGED
@@ -149,7 +149,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
149
149
|
int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
150
150
|
{ // TODO: make sure this works when n has more than 32 bits
|
151
151
|
int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
|
152
|
-
int64_t *p, i, j, max_ii, st = 0
|
152
|
+
int64_t *p, i, j, max_ii, st = 0;
|
153
153
|
uint64_t *u;
|
154
154
|
|
155
155
|
if (_u) *_u = 0, *n_u_ = 0;
|
@@ -174,7 +174,6 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
174
174
|
for (j = i - 1; j >= st; --j) {
|
175
175
|
int32_t sc;
|
176
176
|
sc = comput_sc(&a[i], &a[j], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
|
177
|
-
++n_iter;
|
178
177
|
if (sc == INT32_MIN) continue;
|
179
178
|
sc += f[j];
|
180
179
|
if (sc > max_f) {
|
@@ -204,6 +203,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
204
203
|
if (max_ii < 0 || (a[i].x - a[max_ii].x <= (int64_t)max_dist_x && f[max_ii] < f[i]))
|
205
204
|
max_ii = i;
|
206
205
|
if (mmax_f < max_f) mmax_f = max_f;
|
206
|
+
//fprintf(stderr, "X1\t%ld\t%ld:%d\t%ld\t%ld:%d\t%ld\t%ld\n", (long)i, (long)(a[i].x>>32), (int32_t)a[i].x, (long)max_j, max_j<0?-1L:(long)(a[max_j].x>>32), max_j<0?-1:(int32_t)a[max_j].x, (long)max_f, (long)v[i]);
|
207
207
|
}
|
208
208
|
|
209
209
|
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
|
@@ -263,7 +263,8 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
263
263
|
return 0;
|
264
264
|
}
|
265
265
|
if (max_dist < bw) max_dist = bw;
|
266
|
-
if (max_dist_inner
|
266
|
+
if (max_dist_inner < 0) max_dist_inner = 0;
|
267
|
+
if (max_dist_inner > max_dist) max_dist_inner = max_dist;
|
267
268
|
p = Kmalloc(km, int64_t, n);
|
268
269
|
f = Kmalloc(km, int32_t, n);
|
269
270
|
t = Kcalloc(km, int32_t, n);
|
@@ -325,12 +326,11 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
325
326
|
krmq_interval(lc_elem, root_inner, &s, &lo, &hi);
|
326
327
|
if (lo) {
|
327
328
|
const lc_elem_t *q;
|
328
|
-
int32_t width
|
329
|
+
int32_t width;
|
329
330
|
krmq_itr_t(lc_elem) itr;
|
330
331
|
krmq_itr_find(lc_elem, root_inner, lo, &itr);
|
331
332
|
while ((q = krmq_at(&itr)) != 0) {
|
332
333
|
if (q->y < (int32_t)a[i].y - max_dist_inner) break;
|
333
|
-
++n_rmq_iter;
|
334
334
|
j = q->i;
|
335
335
|
sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, 0, &width);
|
336
336
|
if (width <= bw) {
|
data/ext/minimap2/main.c
CHANGED
@@ -35,12 +35,12 @@ static ko_longopt_t long_options[] = {
|
|
35
35
|
{ "splice", ko_no_argument, 310 },
|
36
36
|
{ "cost-non-gt-ag", ko_required_argument, 'C' },
|
37
37
|
{ "no-long-join", ko_no_argument, 312 },
|
38
|
-
{ "sr",
|
38
|
+
{ "sr", ko_optional_argument, 313 },
|
39
39
|
{ "frag", ko_required_argument, 314 },
|
40
40
|
{ "secondary", ko_required_argument, 315 },
|
41
41
|
{ "cs", ko_optional_argument, 316 },
|
42
42
|
{ "end-bonus", ko_required_argument, 317 },
|
43
|
-
{ "no-pairing", ko_no_argument, 318 },
|
43
|
+
{ "no-pairing", ko_no_argument, 318 }, // deprecated but reserved for backward compatibility
|
44
44
|
{ "splice-flank", ko_required_argument, 319 },
|
45
45
|
{ "idx-no-seq", ko_no_argument, 320 },
|
46
46
|
{ "end-seed-pen", ko_required_argument, 321 },
|
@@ -78,6 +78,14 @@ static ko_longopt_t long_options[] = {
|
|
78
78
|
{ "no-hash-name", ko_no_argument, 353 },
|
79
79
|
{ "secondary-seq", ko_no_argument, 354 },
|
80
80
|
{ "ds", ko_no_argument, 355 },
|
81
|
+
{ "rmq-inner", ko_required_argument, 356 },
|
82
|
+
{ "spsc", ko_required_argument, 357 },
|
83
|
+
{ "junc-pen", ko_required_argument, 358 },
|
84
|
+
{ "pairing", ko_required_argument, 359 },
|
85
|
+
{ "jump-min-match", ko_required_argument, 360 },
|
86
|
+
{ "write-junc", ko_no_argument, 361 },
|
87
|
+
{ "pass1", ko_required_argument, 362 },
|
88
|
+
{ "dbg-seed-occ", ko_no_argument, 501 },
|
81
89
|
{ "help", ko_no_argument, 'h' },
|
82
90
|
{ "max-intron-len", ko_required_argument, 'G' },
|
83
91
|
{ "version", ko_no_argument, 'V' },
|
@@ -121,12 +129,12 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
|
|
121
129
|
|
122
130
|
int main(int argc, char *argv[])
|
123
131
|
{
|
124
|
-
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:b:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
|
132
|
+
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:b:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:j:";
|
125
133
|
ketopt_t o = KETOPT_INIT;
|
126
134
|
mm_mapopt_t opt;
|
127
135
|
mm_idxopt_t ipt;
|
128
136
|
int i, c, n_threads = 3, n_parts, old_best_n = -1;
|
129
|
-
char *fnw = 0, *rg = 0, *
|
137
|
+
char *fnw = 0, *rg = 0, *fn_bed_junc = 0, *fn_bed_jump = 0, *fn_bed_pass1 = 0, *fn_spsc = 0, *s, *alt_list = 0;
|
130
138
|
FILE *fp_help = stderr;
|
131
139
|
mm_idx_reader_t *idx_rdr;
|
132
140
|
mm_idx_t *mi;
|
@@ -188,6 +196,7 @@ int main(int argc, char *argv[])
|
|
188
196
|
else if (c == 'R') rg = o.arg;
|
189
197
|
else if (c == 'h') fp_help = stdout;
|
190
198
|
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
199
|
+
else if (c == 'j') fn_bed_jump = o.arg;
|
191
200
|
else if (c == 'J') {
|
192
201
|
int t;
|
193
202
|
t = atoi(o.arg);
|
@@ -212,9 +221,8 @@ int main(int argc, char *argv[])
|
|
212
221
|
else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
|
213
222
|
else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
|
214
223
|
else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
|
215
|
-
else if (c == 313) opt.flag |= MM_F_SR; // --sr
|
216
224
|
else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
|
217
|
-
else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
|
225
|
+
else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing (deprecated)
|
218
226
|
else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
|
219
227
|
else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen
|
220
228
|
else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only
|
@@ -230,8 +238,9 @@ int main(int argc, char *argv[])
|
|
230
238
|
else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
|
231
239
|
else if (c == 337) opt.max_sw_mat = mm_parse_num(o.arg); // --cap-sw-mat
|
232
240
|
else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
|
233
|
-
else if (c == 340)
|
241
|
+
else if (c == 340) fn_bed_junc = o.arg; // --junc-bed
|
234
242
|
else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus
|
243
|
+
else if (c == 358) opt.junc_pen = atoi(o.arg); // --junc-pen
|
235
244
|
else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
|
236
245
|
else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale
|
237
246
|
else if (c == 351) opt.chain_skip_scale = atof(o.arg); // --chain-skip-scale
|
@@ -245,8 +254,25 @@ int main(int argc, char *argv[])
|
|
245
254
|
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
|
246
255
|
else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
|
247
256
|
else if (c == 355) opt.flag |= MM_F_OUT_DS; // --ds
|
257
|
+
else if (c == 356) opt.rmq_inner_dist = mm_parse_num(o.arg); // --rmq-inner
|
258
|
+
else if (c == 357) fn_spsc = o.arg; // --spsc
|
259
|
+
else if (c == 360) opt.jump_min_match = mm_parse_num(o.arg); // --jump-min-match
|
260
|
+
else if (c == 361) opt.flag |= MM_F_OUT_JUNC | MM_F_CIGAR; // --write-junc
|
261
|
+
else if (c == 362) fn_bed_pass1 = o.arg; // --jump-pass1
|
262
|
+
else if (c == 501) mm_dbg_flag |= MM_DBG_SEED_FREQ; // --dbg-seed-occ
|
248
263
|
else if (c == 330) {
|
249
264
|
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
265
|
+
} else if (c == 313) { // --sr
|
266
|
+
if (o.arg == 0 || strcmp(o.arg, "dna") == 0) {
|
267
|
+
opt.flag |= MM_F_SR;
|
268
|
+
} else if (strcmp(o.arg, "rna") == 0) {
|
269
|
+
opt.flag |= MM_F_SR_RNA;
|
270
|
+
} else if (strcmp(o.arg, "no") == 0) {
|
271
|
+
opt.flag &= ~(uint64_t)(MM_F_SR|MM_F_SR_RNA);
|
272
|
+
} else if (mm_verbose >= 2) {
|
273
|
+
opt.flag |= MM_F_SR;
|
274
|
+
fprintf(stderr, "[WARNING]\033[1;31m --sr only takes 'dna' or 'rna'. Invalid values are assumed to be 'dna'.\033[0m\n");
|
275
|
+
}
|
250
276
|
} else if (c == 314) { // --frag
|
251
277
|
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
|
252
278
|
} else if (c == 315) { // --secondary
|
@@ -271,6 +297,14 @@ int main(int argc, char *argv[])
|
|
271
297
|
} else if (c == 347) { // --rmq
|
272
298
|
if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
273
299
|
else opt.flag |= MM_F_RMQ;
|
300
|
+
} else if (c == 359) { // --pairing
|
301
|
+
if (strcmp(o.arg, "no") == 0) opt.flag |= MM_F_INDEPEND_SEG;
|
302
|
+
else if (strcmp(o.arg, "weak") == 0) opt.flag |= MM_F_WEAK_PAIRING, opt.flag &= ~(uint64_t)MM_F_INDEPEND_SEG;
|
303
|
+
else {
|
304
|
+
if (strcmp(o.arg, "strong") != 0 && mm_verbose >= 2)
|
305
|
+
fprintf(stderr, "[WARNING]\033[1;31m unrecognized argument for --pairing; assuming 'strong'.\033[0m\n");
|
306
|
+
opt.flag &= ~(uint64_t)(MM_F_INDEPEND_SEG|MM_F_WEAK_PAIRING);
|
307
|
+
}
|
274
308
|
} else if (c == 'S') {
|
275
309
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
276
310
|
if (mm_verbose >= 2)
|
@@ -311,10 +345,6 @@ int main(int argc, char *argv[])
|
|
311
345
|
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
|
312
346
|
}
|
313
347
|
}
|
314
|
-
if ((opt.flag & MM_F_SPLICE) && (opt.flag & MM_F_FRAG_MODE)) {
|
315
|
-
fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
|
316
|
-
return 1;
|
317
|
-
}
|
318
348
|
if (!fnw && !(opt.flag&MM_F_CIGAR))
|
319
349
|
ipt.flag |= MM_I_NO_SEQ;
|
320
350
|
if (mm_check_opt(&ipt, &opt) < 0)
|
@@ -354,6 +384,7 @@ int main(int argc, char *argv[])
|
|
354
384
|
fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
355
385
|
fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
356
386
|
fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
|
387
|
+
fprintf(fp_help, " -j FILE junctions in BED12 to extend *short* RNA-seq alignment []\n");
|
357
388
|
fprintf(fp_help, " Input/Output:\n");
|
358
389
|
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
359
390
|
fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
|
@@ -365,6 +396,7 @@ int main(int argc, char *argv[])
|
|
365
396
|
fprintf(fp_help, " --MD output the MD tag\n");
|
366
397
|
fprintf(fp_help, " --eqx write =/X CIGAR operators\n");
|
367
398
|
fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n");
|
399
|
+
fprintf(fp_help, " -y copy FASTA/Q comments to output SAM\n");
|
368
400
|
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
|
369
401
|
fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
|
370
402
|
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
|
@@ -373,6 +405,7 @@ int main(int argc, char *argv[])
|
|
373
405
|
fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
|
374
406
|
fprintf(fp_help, " - lr:hq - accurate long reads (error rate <1%%) against a reference genome\n");
|
375
407
|
fprintf(fp_help, " - splice/splice:hq - spliced alignment for long reads/accurate long reads\n");
|
408
|
+
fprintf(fp_help, " - splice:sr - spliced alignment for short RNA-seq reads\n");
|
376
409
|
fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
|
377
410
|
fprintf(fp_help, " - sr - short reads against a reference\n");
|
378
411
|
fprintf(fp_help, " - map-pb/map-hifi/map-ont/map-iclr - CLR/HiFi/Nanopore/ICLR vs reference mapping\n");
|
@@ -427,7 +460,26 @@ int main(int argc, char *argv[])
|
|
427
460
|
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
|
428
461
|
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
|
429
462
|
if (mm_verbose >= 3) mm_idx_stat(mi);
|
430
|
-
if (
|
463
|
+
if (fn_bed_junc) {
|
464
|
+
mm_idx_bed_read(mi, fn_bed_junc, 1);
|
465
|
+
if (mi->I == 0 && mm_verbose >= 2)
|
466
|
+
fprintf(stderr, "[WARNING] failed to load the junction BED file\n");
|
467
|
+
}
|
468
|
+
if (fn_bed_jump) {
|
469
|
+
mm_idx_jjump_read(mi, fn_bed_jump, MM_JUNC_ANNO, -1);
|
470
|
+
if (mi->J == 0 && mm_verbose >= 2)
|
471
|
+
fprintf(stderr, "[WARNING] failed to load the jump BED file\n");
|
472
|
+
}
|
473
|
+
if (fn_bed_pass1) {
|
474
|
+
mm_idx_jjump_read(mi, fn_bed_pass1, MM_JUNC_MISC, 5);
|
475
|
+
if (mi->J == 0 && mm_verbose >= 2)
|
476
|
+
fprintf(stderr, "[WARNING] failed to load the pass-1 jump BED file\n");
|
477
|
+
}
|
478
|
+
if (fn_spsc) {
|
479
|
+
mm_idx_spsc_read(mi, fn_spsc, mm_max_spsc_bonus(&opt));
|
480
|
+
if (mi->spsc == 0 && mm_verbose >= 2)
|
481
|
+
fprintf(stderr, "[WARNING] failed to load the splice score file\n");
|
482
|
+
}
|
431
483
|
if (alt_list) mm_idx_alt_read(mi, alt_list);
|
432
484
|
if (argc - (o.ind + 1) == 0) {
|
433
485
|
mm_idx_destroy(mi);
|
data/ext/minimap2/map.c
CHANGED
@@ -224,10 +224,10 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
|
|
224
224
|
return regs;
|
225
225
|
}
|
226
226
|
|
227
|
-
void
|
227
|
+
void mm_map_frag_core(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
228
228
|
{
|
229
229
|
int i, j, rep_len, qlen_sum, n_regs0, n_mini_pos;
|
230
|
-
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR);
|
230
|
+
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR), is_sr_rna = !!(opt->flag & MM_F_SR_RNA);
|
231
231
|
uint32_t hash;
|
232
232
|
int64_t n_a;
|
233
233
|
uint64_t *u, *mini_pos;
|
@@ -338,7 +338,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
338
338
|
if (n_segs == 1) { // uni-segment
|
339
339
|
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
|
340
340
|
regs0 = (mm_reg1_t*)realloc(regs0, sizeof(*regs0) * n_regs0);
|
341
|
-
|
341
|
+
mm_set_mapq2(b->km, n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr || is_sr_rna, is_splice);
|
342
342
|
n_regs[0] = n_regs0, regs[0] = regs0;
|
343
343
|
} else { // multi-segment
|
344
344
|
mm_seg_t *seg;
|
@@ -347,7 +347,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
347
347
|
for (i = 0; i < n_segs; ++i) {
|
348
348
|
mm_set_parent(b->km, opt->mask_level, opt->mask_len, n_regs[i], regs[i], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop); // update mm_reg1_t::parent
|
349
349
|
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a);
|
350
|
-
|
350
|
+
mm_set_mapq2(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr || is_sr_rna, is_splice);
|
351
351
|
}
|
352
352
|
mm_seg_free(b->km, n_segs, seg);
|
353
353
|
if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
@@ -359,6 +359,10 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
359
359
|
kfree(b->km, u);
|
360
360
|
kfree(b->km, mini_pos);
|
361
361
|
|
362
|
+
if (mi->J && n_segs == 1 && is_splice)
|
363
|
+
for (i = 0; i < n_regs0; ++i)
|
364
|
+
mm_jump_split(b->km, mi, opt, qlens[0], (const uint8_t*)seqs[0], ®s0[i], 0);
|
365
|
+
|
362
366
|
if (b->km) {
|
363
367
|
km_stat(b->km, &kmst);
|
364
368
|
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
|
@@ -373,6 +377,18 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
373
377
|
}
|
374
378
|
}
|
375
379
|
|
380
|
+
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
381
|
+
{
|
382
|
+
if ((opt->flag & MM_F_WEAK_PAIRING) && n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR)) {
|
383
|
+
int i;
|
384
|
+
for (i = 0; i < n_segs; ++i)
|
385
|
+
mm_map_frag_core(mi, 1, &qlens[i], &seqs[i], &n_regs[i], ®s[i], b, opt, qname);
|
386
|
+
mm_pair(b->km, opt->max_gap_ref, opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, n_regs, regs);
|
387
|
+
} else {
|
388
|
+
mm_map_frag_core(mi, n_segs, qlens, seqs, n_regs, regs, b, opt, qname);
|
389
|
+
}
|
390
|
+
}
|
391
|
+
|
376
392
|
mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
377
393
|
{
|
378
394
|
mm_reg1_t *regs;
|
@@ -447,6 +463,10 @@ static void worker_for(void *_data, long i, int tid) // kt_for() callback
|
|
447
463
|
r->qs = qlens[j] - r->qe;
|
448
464
|
r->qe = qlens[j] - t;
|
449
465
|
r->rev = !r->rev;
|
466
|
+
if (r->p) {
|
467
|
+
if (r->p->trans_strand == 1) r->p->trans_strand = 2;
|
468
|
+
else if (r->p->trans_strand == 2) r->p->trans_strand = 1;
|
469
|
+
}
|
450
470
|
}
|
451
471
|
}
|
452
472
|
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
|
@@ -509,7 +529,7 @@ static void merge_hits(step_t *s)
|
|
509
529
|
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, 0, opt->max_gap * 0.8, &s->n_reg[k], s->reg[k]);
|
510
530
|
mm_set_sam_pri(s->n_reg[k], s->reg[k]);
|
511
531
|
}
|
512
|
-
|
532
|
+
mm_set_mapq2(km, s->n_reg[k], s->reg[k], opt->min_chain_score, opt->a, rep_len, !!(opt->flag & (MM_F_SR|MM_F_SR_RNA)), !!(opt->flag & MM_F_SPLICE));
|
513
533
|
}
|
514
534
|
if (s->n_seg[f] == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
515
535
|
mm_pair(km, frag_gap_part[0], opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, &s->n_reg[k0], &s->reg[k0]);
|
@@ -578,23 +598,30 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|
578
598
|
mm_err_fwrite(r->p, r->p->capacity, 4, p->fp_split);
|
579
599
|
}
|
580
600
|
}
|
601
|
+
} else if (p->opt->flag & MM_F_OUT_JUNC) { // extra logic for --write-junc
|
602
|
+
for (j = 0; j < s->n_reg[i]; ++j) {
|
603
|
+
const mm_reg1_t *r = &s->reg[i][j];
|
604
|
+
if (r->id != r->parent || r->mapq < 10) continue;
|
605
|
+
mm_write_junc(&p->str, mi, t, r);
|
606
|
+
if (p->str.l > 0) mm_err_puts(p->str.s);
|
607
|
+
}
|
581
608
|
} else if (s->n_reg[i] > 0) { // the query has at least one hit
|
582
609
|
for (j = 0; j < s->n_reg[i]; ++j) {
|
583
|
-
mm_reg1_t *r = &s->reg[i][j];
|
610
|
+
const mm_reg1_t *r = &s->reg[i][j];
|
584
611
|
assert(!r->sam_pri || r->id == r->parent);
|
585
612
|
if ((p->opt->flag & MM_F_NO_PRINT_2ND) && r->id != r->parent)
|
586
613
|
continue;
|
587
614
|
if (p->opt->flag & MM_F_OUT_SAM)
|
588
615
|
mm_write_sam3(&p->str, mi, t, i - seg_st, j, s->n_seg[k], &s->n_reg[seg_st], (const mm_reg1_t*const*)&s->reg[seg_st], km, p->opt->flag, s->rep_len[i]);
|
589
616
|
else
|
590
|
-
|
617
|
+
mm_write_paf4(&p->str, mi, t, r, km, p->opt->flag, s->rep_len[i], s->n_seg[k], i - seg_st);
|
591
618
|
mm_err_puts(p->str.s);
|
592
619
|
}
|
593
620
|
} else if ((p->opt->flag & MM_F_PAF_NO_HIT) || ((p->opt->flag & MM_F_OUT_SAM) && !(p->opt->flag & MM_F_SAM_HIT_ONLY))) { // output an empty hit, if requested
|
594
621
|
if (p->opt->flag & MM_F_OUT_SAM)
|
595
622
|
mm_write_sam3(&p->str, mi, t, i - seg_st, -1, s->n_seg[k], &s->n_reg[seg_st], (const mm_reg1_t*const*)&s->reg[seg_st], km, p->opt->flag, s->rep_len[i]);
|
596
623
|
else
|
597
|
-
|
624
|
+
mm_write_paf4(&p->str, mi, t, 0, 0, p->opt->flag, s->rep_len[i], s->n_seg[k], i - seg_st);
|
598
625
|
mm_err_puts(p->str.s);
|
599
626
|
}
|
600
627
|
}
|