minimap2 0.2.28.0 → 0.2.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/cmappy/cmappy.c +3 -3
- data/ext/cmappy/cmappy.h +1 -1
- data/ext/minimap2/FAQ.md +1 -1
- data/ext/minimap2/Makefile +4 -3
- data/ext/minimap2/NEWS.md +39 -0
- data/ext/minimap2/README.md +30 -14
- data/ext/minimap2/align.c +134 -50
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +57 -3
- data/ext/minimap2/hit.c +14 -6
- data/ext/minimap2/index.c +304 -13
- data/ext/minimap2/jump.c +201 -0
- data/ext/minimap2/kalloc.h +8 -0
- data/ext/minimap2/ksw2.h +5 -2
- data/ext/minimap2/ksw2_dispatch.c +5 -5
- data/ext/minimap2/ksw2_exts2_sse.c +17 -6
- data/ext/minimap2/main.c +60 -12
- data/ext/minimap2/map.c +35 -8
- data/ext/minimap2/minimap.h +14 -3
- data/ext/minimap2/minimap2.1 +92 -45
- data/ext/minimap2/misc/README.md +2 -1
- data/ext/minimap2/misc/pafcluster.js +241 -0
- data/ext/minimap2/misc/paftools.js +8 -3
- data/ext/minimap2/mmpriv.h +24 -2
- data/ext/minimap2/options.c +27 -2
- data/ext/minimap2/python/cmappy.h +3 -3
- data/ext/minimap2/python/cmappy.pxd +4 -2
- data/ext/minimap2/python/mappy.pyx +19 -7
- data/ext/minimap2/setup.py +2 -2
- data/ext/minimap2.patch +2 -2
- data/lib/minimap2/aligner.rb +19 -12
- data/lib/minimap2/ffi/constants.rb +9 -1
- data/lib/minimap2/ffi/functions.rb +145 -6
- data/lib/minimap2/ffi/mappy.rb +1 -1
- data/lib/minimap2/version.rb +1 -1
- data/lib/minimap2.rb +2 -2
- metadata +5 -4
- data/ext/minimap2/misc/mmphase.js +0 -335
data/ext/minimap2/jump.c
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "mmpriv.h"
|
3
|
+
#include "kalloc.h"
|
4
|
+
|
5
|
+
#define MM_MIN_EXON_LEN 20
|
6
|
+
|
7
|
+
static int32_t mm_jump_check(void *km, const mm_idx_t *mi, int32_t qlen, const uint8_t *qseq0, const mm_reg1_t *r, int32_t ext, int32_t is_left) // TODO: check close N
|
8
|
+
{
|
9
|
+
int32_t clip, clen, e = !r->rev ^ !is_left; // 0 for left of the alignment; 1 for right
|
10
|
+
uint32_t cigar;
|
11
|
+
if (!r->p || r->p->n_cigar <= 0) return -1; // only working with CIGAR
|
12
|
+
clip = e == 0? r->qs : qlen - r->qe;
|
13
|
+
cigar = r->p->cigar[is_left? 0 : r->p->n_cigar - 1];
|
14
|
+
clen = (cigar&0xf) == MM_CIGAR_MATCH? cigar>>4 : 0;
|
15
|
+
if (clen <= ext) return -1;
|
16
|
+
if (is_left) {
|
17
|
+
if (clip >= r->rs) return -1; // no space to jump
|
18
|
+
} else {
|
19
|
+
if (clip >= mi->seq[r->rid].len - r->re) return -1; // no space to jump
|
20
|
+
}
|
21
|
+
return 0;
|
22
|
+
}
|
23
|
+
|
24
|
+
static uint8_t *mm_jump_get_qseq_seq(void *km, int32_t qlen, const uint8_t *qseq0, const mm_reg1_t *r, int32_t is_left, int32_t ql0, uint8_t *qseq)
|
25
|
+
{
|
26
|
+
extern unsigned char seq_nt4_table[256];
|
27
|
+
int32_t i, k = 0;
|
28
|
+
if (!r->rev) {
|
29
|
+
if (is_left)
|
30
|
+
for (i = 0; i < ql0; ++i)
|
31
|
+
qseq[k++] = seq_nt4_table[(uint8_t)qseq0[i]];
|
32
|
+
else
|
33
|
+
for (i = qlen - ql0; i < qlen; ++i)
|
34
|
+
qseq[k++] = seq_nt4_table[(uint8_t)qseq0[i]];
|
35
|
+
} else {
|
36
|
+
if (is_left)
|
37
|
+
for (i = qlen - 1; i >= qlen - ql0; --i) {
|
38
|
+
uint8_t c = seq_nt4_table[(uint8_t)qseq0[i]];
|
39
|
+
qseq[k++] = c >= 4? c : 3 - c;
|
40
|
+
}
|
41
|
+
else
|
42
|
+
for (i = ql0 - 1; i >= 0; --i) {
|
43
|
+
uint8_t c = seq_nt4_table[(uint8_t)qseq0[i]];
|
44
|
+
qseq[k++] = c >= 4? c : 3 - c;
|
45
|
+
}
|
46
|
+
}
|
47
|
+
return qseq;
|
48
|
+
}
|
49
|
+
|
50
|
+
static void mm_jump_split_left(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq0, mm_reg1_t *r, int32_t ts_strand)
|
51
|
+
{
|
52
|
+
uint8_t *tseq = 0, *qseq = 0;
|
53
|
+
int32_t i, n, l, i0, m, mm0;
|
54
|
+
int32_t i0_anno = -1, n_anno = 0, mm0_anno = 0, i0_misc = -1, n_misc = 0, mm0_misc = 0;
|
55
|
+
int32_t ext = 1 + (opt->b + opt->a - 1) / opt->a + 1;
|
56
|
+
int32_t clip = !r->rev? r->qs : qlen - r->qe;
|
57
|
+
int32_t extt = clip < ext? clip : ext;
|
58
|
+
const mm_idx_jjump1_t *a;
|
59
|
+
|
60
|
+
if (mm_jump_check(km, mi, qlen, qseq0, r, ext + MM_MIN_EXON_LEN, 1) < 0) return;
|
61
|
+
a = mm_idx_jump_get(mi, r->rid, r->rs - extt, r->rs + ext, &n);
|
62
|
+
if (n == 0) return;
|
63
|
+
|
64
|
+
for (i = 0; i < n; ++i) { // traverse possible jumps
|
65
|
+
const mm_idx_jjump1_t *ai = &a[i];
|
66
|
+
int32_t tlen, tl1, j, mm1, mm2;
|
67
|
+
assert(ai->off >= r->rs - extt && ai->off <= r->rs + ext);
|
68
|
+
if (ts_strand * ai->strand < 0) continue; // wrong strand
|
69
|
+
if (ai->off2 >= ai->off) continue; // wrong direction
|
70
|
+
if (ai->off - ai->off2 < 6) continue; // intron too small
|
71
|
+
if (ai->off2 < clip + ext) continue; // not long enough
|
72
|
+
if (tseq == 0) {
|
73
|
+
tseq = Kcalloc(km, uint8_t, (clip + ext) * 2); // tseq and qseq are allocated together
|
74
|
+
qseq = tseq + clip + ext;
|
75
|
+
mm_jump_get_qseq_seq(km, qlen, qseq0, r, 1, clip + ext, qseq);
|
76
|
+
}
|
77
|
+
tl1 = clip + (ai->off - r->rs);
|
78
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off, r->rs + ext, &tseq[tl1]);
|
79
|
+
assert(tlen == r->rs + ext - ai->off);
|
80
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off2 - tl1, ai->off2, tseq);
|
81
|
+
assert(tlen == tl1);
|
82
|
+
for (j = 0, mm1 = 0; j < tl1; ++j)
|
83
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
84
|
+
++mm1;
|
85
|
+
for (mm2 = 0; j < clip + ext; ++j)
|
86
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
87
|
+
++mm2;
|
88
|
+
if (mm1 == 0 && mm2 <= 1) {
|
89
|
+
if (ai->flag & MM_JUNC_ANNO)
|
90
|
+
i0_anno = i, mm0_anno = mm1 + mm2, ++n_anno; // i0 points to the rightmost i
|
91
|
+
else
|
92
|
+
i0_misc = i, mm0_misc = mm1 + mm2, ++n_misc;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
if (n_anno > 0) m = n_anno, i0 = i0_anno, mm0 = mm0_anno;
|
96
|
+
else m = n_misc, i0 = i0_misc, mm0 = mm0_misc;
|
97
|
+
kfree(km, tseq);
|
98
|
+
|
99
|
+
l = m > 0? a[i0].off - r->rs : 0; // may be negative
|
100
|
+
if (m == 1 && clip + l >= opt->jump_min_match) { // add one more exon
|
101
|
+
mm_enlarge_cigar(r, 2);
|
102
|
+
memmove(r->p->cigar + 2, r->p->cigar, r->p->n_cigar * 4);
|
103
|
+
r->p->cigar[0] = (clip + l) << 4 | MM_CIGAR_MATCH;
|
104
|
+
r->p->cigar[1] = (a[i0].off - a[i0].off2) << 4 | MM_CIGAR_N_SKIP;
|
105
|
+
r->p->cigar[2] = ((r->p->cigar[2]>>4) - l) << 4 | MM_CIGAR_MATCH;
|
106
|
+
r->p->n_cigar += 2;
|
107
|
+
r->rs = a[i0].off2 - (clip + l);
|
108
|
+
if (!r->rev) r->qs = 0;
|
109
|
+
else r->qe = qlen;
|
110
|
+
r->blen += clip, r->mlen += clip - mm0;
|
111
|
+
r->p->dp_max0 += (clip - mm0) * opt->a - mm0 * opt->b;
|
112
|
+
r->p->dp_max += (clip - mm0) * opt->a - mm0 * opt->b;
|
113
|
+
if (!r->is_spliced) r->is_spliced = 1, r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
|
114
|
+
} else if (m > 0 && a[i0].off > r->rs) { // trim by l; l is always positive
|
115
|
+
r->p->cigar[0] -= l << 4 | MM_CIGAR_MATCH;
|
116
|
+
r->rs += l;
|
117
|
+
if (!r->rev) r->qs += l;
|
118
|
+
else r->qe -= l;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
static void mm_jump_split_right(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq0, mm_reg1_t *r, int32_t ts_strand)
|
123
|
+
{
|
124
|
+
uint8_t *tseq = 0, *qseq = 0;
|
125
|
+
int32_t i, n, l, i0, m, mm0;
|
126
|
+
int32_t i0_anno = -1, n_anno = 0, mm0_anno = 0, i0_misc = -1, n_misc = 0, mm0_misc = 0;
|
127
|
+
int32_t ext = 1 + (opt->b + opt->a - 1) / opt->a + 1;
|
128
|
+
int32_t clip = !r->rev? qlen - r->qe : r->qs;
|
129
|
+
int32_t extt = clip < ext? clip : ext;
|
130
|
+
const mm_idx_jjump1_t *a;
|
131
|
+
|
132
|
+
if (mm_jump_check(km, mi, qlen, qseq0, r, ext + MM_MIN_EXON_LEN, 0) < 0) return;
|
133
|
+
a = mm_idx_jump_get(mi, r->rid, r->re - ext, r->re + extt, &n);
|
134
|
+
if (n == 0) return;
|
135
|
+
|
136
|
+
for (i = 0; i < n; ++i) { // traverse possible jumps
|
137
|
+
const mm_idx_jjump1_t *ai = &a[i];
|
138
|
+
int32_t tlen, tl1, j, mm1, mm2;
|
139
|
+
assert(ai->off >= r->re - ext && ai->off <= r->re + extt);
|
140
|
+
if (ts_strand * ai->strand < 0) continue; // wrong strand
|
141
|
+
if (ai->off2 <= ai->off) continue; // wrong direction
|
142
|
+
if (ai->off2 - ai->off < 6) continue; // intron too small
|
143
|
+
if (ai->off2 + clip + ext > mi->seq[r->rid].len) continue; // not long enough
|
144
|
+
if (tseq == 0) {
|
145
|
+
tseq = Kcalloc(km, uint8_t, (clip + ext) * 2); // tseq and qseq are allocated together
|
146
|
+
qseq = tseq + clip + ext;
|
147
|
+
mm_jump_get_qseq_seq(km, qlen, qseq0, r, 0, clip + ext, qseq);
|
148
|
+
}
|
149
|
+
tl1 = clip + (r->re - ai->off);
|
150
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, r->re - ext, ai->off, tseq);
|
151
|
+
assert(tlen == ai->off - (r->re - ext));
|
152
|
+
tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off2, ai->off2 + tl1, &tseq[clip + ext - tl1]);
|
153
|
+
assert(tlen == tl1);
|
154
|
+
for (j = 0, mm2 = 0; j < clip + ext - tl1; ++j)
|
155
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
156
|
+
++mm2;
|
157
|
+
for (mm1 = 0; j < clip + ext; ++j)
|
158
|
+
if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
|
159
|
+
++mm1;
|
160
|
+
if (mm1 == 0 && mm2 <= 1) {
|
161
|
+
if (ai->flag & MM_JUNC_ANNO) {
|
162
|
+
if (i0_anno < 0) i0_anno = i, mm0_anno = mm1 + mm2;
|
163
|
+
++n_anno;
|
164
|
+
} else {
|
165
|
+
if (i0_misc < 0) i0_misc = i, mm0_misc = mm1 + mm2;
|
166
|
+
++n_misc;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
}
|
170
|
+
if (n_anno > 0) m = n_anno, i0 = i0_anno, mm0 = mm0_anno;
|
171
|
+
else m = n_misc, i0 = i0_misc, mm0 = mm0_misc;
|
172
|
+
kfree(km, tseq);
|
173
|
+
|
174
|
+
l = m > 0? r->re - a[i0].off : 0; // may be negative
|
175
|
+
if (m == 1 && clip + l >= opt->jump_min_match) { // add one more exon
|
176
|
+
mm_enlarge_cigar(r, 2);
|
177
|
+
r->p->cigar[r->p->n_cigar - 1] = ((r->p->cigar[r->p->n_cigar - 1]>>4) - l) << 4 | MM_CIGAR_MATCH;
|
178
|
+
r->p->cigar[r->p->n_cigar] = (a[i0].off2 - a[i0].off) << 4 | MM_CIGAR_N_SKIP;
|
179
|
+
r->p->cigar[r->p->n_cigar + 1] = (clip + l) << 4 | MM_CIGAR_MATCH;
|
180
|
+
r->p->n_cigar += 2;
|
181
|
+
r->re = a[i0].off2 + (clip + l);
|
182
|
+
if (!r->rev) r->qe = qlen;
|
183
|
+
else r->qs = 0;
|
184
|
+
r->blen += clip, r->mlen += clip - mm0;
|
185
|
+
r->p->dp_max0 += (clip - mm0) * opt->a - mm0 * opt->b;
|
186
|
+
r->p->dp_max += (clip - mm0) * opt->a - mm0 * opt->b;
|
187
|
+
if (!r->is_spliced) r->is_spliced = 1, r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
|
188
|
+
} else if (m > 0 && r->re > a[i0].off) { // trim by l; l is always positive
|
189
|
+
r->p->cigar[r->p->n_cigar - 1] -= l << 4 | MM_CIGAR_MATCH;
|
190
|
+
r->re -= l;
|
191
|
+
if (!r->rev) r->qe -= l;
|
192
|
+
else r->qs += l;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
void mm_jump_split(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq, mm_reg1_t *r, int32_t ts_strand)
|
197
|
+
{
|
198
|
+
assert((opt->flag & MM_F_EQX) == 0);
|
199
|
+
mm_jump_split_left(km, mi, opt, qlen, qseq, r, ts_strand);
|
200
|
+
mm_jump_split_right(km, mi, opt, qlen, qseq, r, ts_strand);
|
201
|
+
}
|
data/ext/minimap2/kalloc.h
CHANGED
@@ -31,6 +31,14 @@ void km_stat_print(const void *km);
|
|
31
31
|
#define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
|
32
32
|
#define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
|
33
33
|
|
34
|
+
#define Kgrow(km, type, ptr, __i, __m) do { \
|
35
|
+
if ((__i) >= (__m)) { \
|
36
|
+
(__m) = (__i) + 1; \
|
37
|
+
(__m) += ((__m)>>1) + 16; \
|
38
|
+
(ptr) = Krealloc(km, type, ptr, (__m)); \
|
39
|
+
} \
|
40
|
+
} while (0)
|
41
|
+
|
34
42
|
#define Kexpand(km, type, a, m) do { \
|
35
43
|
(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
|
36
44
|
(a) = Krealloc(km, type, (a), (m)); \
|
data/ext/minimap2/ksw2.h
CHANGED
@@ -15,7 +15,8 @@
|
|
15
15
|
#define KSW_EZ_SPLICE_FOR 0x100
|
16
16
|
#define KSW_EZ_SPLICE_REV 0x200
|
17
17
|
#define KSW_EZ_SPLICE_FLANK 0x400
|
18
|
-
#define KSW_EZ_SPLICE_CMPLX 0x800
|
18
|
+
#define KSW_EZ_SPLICE_CMPLX 0x800 // use the miniprot splice model
|
19
|
+
#define KSW_EZ_SPLICE_SCORE 0x1000 // use splice score
|
19
20
|
|
20
21
|
// The subset of CIGAR operators used by ksw code.
|
21
22
|
// Use MM_CIGAR_* from minimap.h if you need the full list.
|
@@ -24,6 +25,8 @@
|
|
24
25
|
#define KSW_CIGAR_DEL 2
|
25
26
|
#define KSW_CIGAR_N_SKIP 3
|
26
27
|
|
28
|
+
#define KSW_SPSC_OFFSET 64
|
29
|
+
|
27
30
|
#ifdef __cplusplus
|
28
31
|
extern "C" {
|
29
32
|
#endif
|
@@ -69,7 +72,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
69
72
|
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
|
70
73
|
|
71
74
|
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
72
|
-
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
75
|
+
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
73
76
|
|
74
77
|
void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez);
|
75
78
|
|
@@ -80,17 +80,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
80
80
|
}
|
81
81
|
|
82
82
|
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
83
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
83
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
84
84
|
{
|
85
85
|
extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
86
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
86
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
87
87
|
extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
88
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
88
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
|
89
89
|
if (ksw_simd < 0) ksw_simd = x86_simd();
|
90
90
|
if (ksw_simd & SIMD_SSE4_1)
|
91
|
-
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
|
91
|
+
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, end_bonus, junc_bonus, junc_pen, flag, junc, ez);
|
92
92
|
else if (ksw_simd & SIMD_SSE2)
|
93
|
-
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
|
93
|
+
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, end_bonus, junc_bonus, junc_pen, flag, junc, ez);
|
94
94
|
else abort();
|
95
95
|
}
|
96
96
|
#endif
|
@@ -24,14 +24,14 @@
|
|
24
24
|
#ifdef KSW_CPU_DISPATCH
|
25
25
|
#ifdef __SSE4_1__
|
26
26
|
void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
27
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
27
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
28
28
|
#else
|
29
29
|
void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
30
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
30
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
31
31
|
#endif
|
32
32
|
#else
|
33
33
|
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
34
|
-
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
34
|
+
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
|
35
35
|
#endif // ~KSW_CPU_DISPATCH
|
36
36
|
{
|
37
37
|
#define __dp_code_block1 \
|
@@ -191,7 +191,14 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
191
191
|
}
|
192
192
|
}
|
193
193
|
|
194
|
-
if (junc) {
|
194
|
+
if (junc && (flag & KSW_EZ_SPLICE_SCORE)) { // junc[] keeps the donor score
|
195
|
+
uint8_t donor_val = !!(flag & KSW_EZ_SPLICE_FOR) == !(flag & KSW_EZ_REV_CIGAR)? 0 : 1;
|
196
|
+
for (t = 0; t < tlen - 1; ++t)
|
197
|
+
((int8_t*)donor)[t] += junc[t+1] == 0xff || (junc[t+1]&1) != donor_val? -junc_pen : (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET;
|
198
|
+
for (t = 0; t < tlen - 1; ++t)
|
199
|
+
((int8_t*)acceptor)[t] += junc[t+1] == 0xff || (junc[t+1]&1) != !donor_val? -junc_pen : (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET;
|
200
|
+
//for (t = 0; t < tlen - 1; ++t) if (junc[t+1] != 0xff) fprintf(stderr, "Y2\t%d\t%d\t%c\t%d\n", ((int8_t*)donor)[t], ((int8_t*)acceptor)[t], "DA"[junc[t+1]&1], (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET);
|
201
|
+
} else if (junc) { // junc[] keeps the splice sites
|
195
202
|
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
196
203
|
for (t = 0; t < tlen - 1; ++t)
|
197
204
|
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
|
@@ -445,10 +452,14 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
445
452
|
if (!approx_max) kfree(km, H);
|
446
453
|
if (with_cigar) { // backtrack
|
447
454
|
int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
|
448
|
-
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
|
455
|
+
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) {
|
449
456
|
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
450
|
-
else if (ez->
|
457
|
+
} else if (!ez->zdropped && (flag&KSW_EZ_EXTZ_ONLY) && ez->mqe + end_bonus > (int)ez->max) {
|
458
|
+
ez->reach_end = 1;
|
459
|
+
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->mqe_t, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
460
|
+
} else if (ez->max_t >= 0 && ez->max_q >= 0) {
|
451
461
|
ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
462
|
+
}
|
452
463
|
kfree(km, mem2); kfree(km, off);
|
453
464
|
}
|
454
465
|
}
|
data/ext/minimap2/main.c
CHANGED
@@ -35,12 +35,12 @@ static ko_longopt_t long_options[] = {
|
|
35
35
|
{ "splice", ko_no_argument, 310 },
|
36
36
|
{ "cost-non-gt-ag", ko_required_argument, 'C' },
|
37
37
|
{ "no-long-join", ko_no_argument, 312 },
|
38
|
-
{ "sr",
|
38
|
+
{ "sr", ko_optional_argument, 313 },
|
39
39
|
{ "frag", ko_required_argument, 314 },
|
40
40
|
{ "secondary", ko_required_argument, 315 },
|
41
41
|
{ "cs", ko_optional_argument, 316 },
|
42
42
|
{ "end-bonus", ko_required_argument, 317 },
|
43
|
-
{ "no-pairing", ko_no_argument, 318 },
|
43
|
+
{ "no-pairing", ko_no_argument, 318 }, // deprecated but reserved for backward compatibility
|
44
44
|
{ "splice-flank", ko_required_argument, 319 },
|
45
45
|
{ "idx-no-seq", ko_no_argument, 320 },
|
46
46
|
{ "end-seed-pen", ko_required_argument, 321 },
|
@@ -79,6 +79,12 @@ static ko_longopt_t long_options[] = {
|
|
79
79
|
{ "secondary-seq", ko_no_argument, 354 },
|
80
80
|
{ "ds", ko_no_argument, 355 },
|
81
81
|
{ "rmq-inner", ko_required_argument, 356 },
|
82
|
+
{ "spsc", ko_required_argument, 357 },
|
83
|
+
{ "junc-pen", ko_required_argument, 358 },
|
84
|
+
{ "pairing", ko_required_argument, 359 },
|
85
|
+
{ "jump-min-match", ko_required_argument, 360 },
|
86
|
+
{ "write-junc", ko_no_argument, 361 },
|
87
|
+
{ "pass1", ko_required_argument, 362 },
|
82
88
|
{ "dbg-seed-occ", ko_no_argument, 501 },
|
83
89
|
{ "help", ko_no_argument, 'h' },
|
84
90
|
{ "max-intron-len", ko_required_argument, 'G' },
|
@@ -123,12 +129,12 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
|
|
123
129
|
|
124
130
|
int main(int argc, char *argv[])
|
125
131
|
{
|
126
|
-
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:b:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
|
132
|
+
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:b:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:j:";
|
127
133
|
ketopt_t o = KETOPT_INIT;
|
128
134
|
mm_mapopt_t opt;
|
129
135
|
mm_idxopt_t ipt;
|
130
136
|
int i, c, n_threads = 3, n_parts, old_best_n = -1;
|
131
|
-
char *fnw = 0, *rg = 0, *
|
137
|
+
char *fnw = 0, *rg = 0, *fn_bed_junc = 0, *fn_bed_jump = 0, *fn_bed_pass1 = 0, *fn_spsc = 0, *s, *alt_list = 0;
|
132
138
|
FILE *fp_help = stderr;
|
133
139
|
mm_idx_reader_t *idx_rdr;
|
134
140
|
mm_idx_t *mi;
|
@@ -190,6 +196,7 @@ int main(int argc, char *argv[])
|
|
190
196
|
else if (c == 'R') rg = o.arg;
|
191
197
|
else if (c == 'h') fp_help = stdout;
|
192
198
|
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
199
|
+
else if (c == 'j') fn_bed_jump = o.arg;
|
193
200
|
else if (c == 'J') {
|
194
201
|
int t;
|
195
202
|
t = atoi(o.arg);
|
@@ -214,9 +221,8 @@ int main(int argc, char *argv[])
|
|
214
221
|
else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
|
215
222
|
else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
|
216
223
|
else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
|
217
|
-
else if (c == 313) opt.flag |= MM_F_SR; // --sr
|
218
224
|
else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
|
219
|
-
else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
|
225
|
+
else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing (deprecated)
|
220
226
|
else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
|
221
227
|
else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen
|
222
228
|
else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only
|
@@ -232,8 +238,9 @@ int main(int argc, char *argv[])
|
|
232
238
|
else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
|
233
239
|
else if (c == 337) opt.max_sw_mat = mm_parse_num(o.arg); // --cap-sw-mat
|
234
240
|
else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
|
235
|
-
else if (c == 340)
|
241
|
+
else if (c == 340) fn_bed_junc = o.arg; // --junc-bed
|
236
242
|
else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus
|
243
|
+
else if (c == 358) opt.junc_pen = atoi(o.arg); // --junc-pen
|
237
244
|
else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
|
238
245
|
else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale
|
239
246
|
else if (c == 351) opt.chain_skip_scale = atof(o.arg); // --chain-skip-scale
|
@@ -248,9 +255,24 @@ int main(int argc, char *argv[])
|
|
248
255
|
else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
|
249
256
|
else if (c == 355) opt.flag |= MM_F_OUT_DS; // --ds
|
250
257
|
else if (c == 356) opt.rmq_inner_dist = mm_parse_num(o.arg); // --rmq-inner
|
258
|
+
else if (c == 357) fn_spsc = o.arg; // --spsc
|
259
|
+
else if (c == 360) opt.jump_min_match = mm_parse_num(o.arg); // --jump-min-match
|
260
|
+
else if (c == 361) opt.flag |= MM_F_OUT_JUNC | MM_F_CIGAR; // --write-junc
|
261
|
+
else if (c == 362) fn_bed_pass1 = o.arg; // --jump-pass1
|
251
262
|
else if (c == 501) mm_dbg_flag |= MM_DBG_SEED_FREQ; // --dbg-seed-occ
|
252
263
|
else if (c == 330) {
|
253
264
|
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
265
|
+
} else if (c == 313) { // --sr
|
266
|
+
if (o.arg == 0 || strcmp(o.arg, "dna") == 0) {
|
267
|
+
opt.flag |= MM_F_SR;
|
268
|
+
} else if (strcmp(o.arg, "rna") == 0) {
|
269
|
+
opt.flag |= MM_F_SR_RNA;
|
270
|
+
} else if (strcmp(o.arg, "no") == 0) {
|
271
|
+
opt.flag &= ~(uint64_t)(MM_F_SR|MM_F_SR_RNA);
|
272
|
+
} else if (mm_verbose >= 2) {
|
273
|
+
opt.flag |= MM_F_SR;
|
274
|
+
fprintf(stderr, "[WARNING]\033[1;31m --sr only takes 'dna' or 'rna'. Invalid values are assumed to be 'dna'.\033[0m\n");
|
275
|
+
}
|
254
276
|
} else if (c == 314) { // --frag
|
255
277
|
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
|
256
278
|
} else if (c == 315) { // --secondary
|
@@ -275,6 +297,14 @@ int main(int argc, char *argv[])
|
|
275
297
|
} else if (c == 347) { // --rmq
|
276
298
|
if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
277
299
|
else opt.flag |= MM_F_RMQ;
|
300
|
+
} else if (c == 359) { // --pairing
|
301
|
+
if (strcmp(o.arg, "no") == 0) opt.flag |= MM_F_INDEPEND_SEG;
|
302
|
+
else if (strcmp(o.arg, "weak") == 0) opt.flag |= MM_F_WEAK_PAIRING, opt.flag &= ~(uint64_t)MM_F_INDEPEND_SEG;
|
303
|
+
else {
|
304
|
+
if (strcmp(o.arg, "strong") != 0 && mm_verbose >= 2)
|
305
|
+
fprintf(stderr, "[WARNING]\033[1;31m unrecognized argument for --pairing; assuming 'strong'.\033[0m\n");
|
306
|
+
opt.flag &= ~(uint64_t)(MM_F_INDEPEND_SEG|MM_F_WEAK_PAIRING);
|
307
|
+
}
|
278
308
|
} else if (c == 'S') {
|
279
309
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
280
310
|
if (mm_verbose >= 2)
|
@@ -315,10 +345,6 @@ int main(int argc, char *argv[])
|
|
315
345
|
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
|
316
346
|
}
|
317
347
|
}
|
318
|
-
if ((opt.flag & MM_F_SPLICE) && (opt.flag & MM_F_FRAG_MODE)) {
|
319
|
-
fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
|
320
|
-
return 1;
|
321
|
-
}
|
322
348
|
if (!fnw && !(opt.flag&MM_F_CIGAR))
|
323
349
|
ipt.flag |= MM_I_NO_SEQ;
|
324
350
|
if (mm_check_opt(&ipt, &opt) < 0)
|
@@ -358,6 +384,7 @@ int main(int argc, char *argv[])
|
|
358
384
|
fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
359
385
|
fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
360
386
|
fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
|
387
|
+
fprintf(fp_help, " -j FILE junctions in BED12 to extend *short* RNA-seq alignment []\n");
|
361
388
|
fprintf(fp_help, " Input/Output:\n");
|
362
389
|
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
363
390
|
fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
|
@@ -369,6 +396,7 @@ int main(int argc, char *argv[])
|
|
369
396
|
fprintf(fp_help, " --MD output the MD tag\n");
|
370
397
|
fprintf(fp_help, " --eqx write =/X CIGAR operators\n");
|
371
398
|
fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n");
|
399
|
+
fprintf(fp_help, " -y copy FASTA/Q comments to output SAM\n");
|
372
400
|
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
|
373
401
|
fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
|
374
402
|
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
|
@@ -377,6 +405,7 @@ int main(int argc, char *argv[])
|
|
377
405
|
fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
|
378
406
|
fprintf(fp_help, " - lr:hq - accurate long reads (error rate <1%%) against a reference genome\n");
|
379
407
|
fprintf(fp_help, " - splice/splice:hq - spliced alignment for long reads/accurate long reads\n");
|
408
|
+
fprintf(fp_help, " - splice:sr - spliced alignment for short RNA-seq reads\n");
|
380
409
|
fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
|
381
410
|
fprintf(fp_help, " - sr - short reads against a reference\n");
|
382
411
|
fprintf(fp_help, " - map-pb/map-hifi/map-ont/map-iclr - CLR/HiFi/Nanopore/ICLR vs reference mapping\n");
|
@@ -431,7 +460,26 @@ int main(int argc, char *argv[])
|
|
431
460
|
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
|
432
461
|
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
|
433
462
|
if (mm_verbose >= 3) mm_idx_stat(mi);
|
434
|
-
if (
|
463
|
+
if (fn_bed_junc) {
|
464
|
+
mm_idx_bed_read(mi, fn_bed_junc, 1);
|
465
|
+
if (mi->I == 0 && mm_verbose >= 2)
|
466
|
+
fprintf(stderr, "[WARNING] failed to load the junction BED file\n");
|
467
|
+
}
|
468
|
+
if (fn_bed_jump) {
|
469
|
+
mm_idx_jjump_read(mi, fn_bed_jump, MM_JUNC_ANNO, -1);
|
470
|
+
if (mi->J == 0 && mm_verbose >= 2)
|
471
|
+
fprintf(stderr, "[WARNING] failed to load the jump BED file\n");
|
472
|
+
}
|
473
|
+
if (fn_bed_pass1) {
|
474
|
+
mm_idx_jjump_read(mi, fn_bed_pass1, MM_JUNC_MISC, 5);
|
475
|
+
if (mi->J == 0 && mm_verbose >= 2)
|
476
|
+
fprintf(stderr, "[WARNING] failed to load the pass-1 jump BED file\n");
|
477
|
+
}
|
478
|
+
if (fn_spsc) {
|
479
|
+
mm_idx_spsc_read(mi, fn_spsc, mm_max_spsc_bonus(&opt));
|
480
|
+
if (mi->spsc == 0 && mm_verbose >= 2)
|
481
|
+
fprintf(stderr, "[WARNING] failed to load the splice score file\n");
|
482
|
+
}
|
435
483
|
if (alt_list) mm_idx_alt_read(mi, alt_list);
|
436
484
|
if (argc - (o.ind + 1) == 0) {
|
437
485
|
mm_idx_destroy(mi);
|
data/ext/minimap2/map.c
CHANGED
@@ -224,10 +224,10 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
|
|
224
224
|
return regs;
|
225
225
|
}
|
226
226
|
|
227
|
-
void
|
227
|
+
void mm_map_frag_core(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
228
228
|
{
|
229
229
|
int i, j, rep_len, qlen_sum, n_regs0, n_mini_pos;
|
230
|
-
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR);
|
230
|
+
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR), is_sr_rna = !!(opt->flag & MM_F_SR_RNA);
|
231
231
|
uint32_t hash;
|
232
232
|
int64_t n_a;
|
233
233
|
uint64_t *u, *mini_pos;
|
@@ -338,7 +338,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
338
338
|
if (n_segs == 1) { // uni-segment
|
339
339
|
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
|
340
340
|
regs0 = (mm_reg1_t*)realloc(regs0, sizeof(*regs0) * n_regs0);
|
341
|
-
|
341
|
+
mm_set_mapq2(b->km, n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr || is_sr_rna, is_splice);
|
342
342
|
n_regs[0] = n_regs0, regs[0] = regs0;
|
343
343
|
} else { // multi-segment
|
344
344
|
mm_seg_t *seg;
|
@@ -347,7 +347,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
347
347
|
for (i = 0; i < n_segs; ++i) {
|
348
348
|
mm_set_parent(b->km, opt->mask_level, opt->mask_len, n_regs[i], regs[i], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop); // update mm_reg1_t::parent
|
349
349
|
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a);
|
350
|
-
|
350
|
+
mm_set_mapq2(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr || is_sr_rna, is_splice);
|
351
351
|
}
|
352
352
|
mm_seg_free(b->km, n_segs, seg);
|
353
353
|
if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
@@ -359,6 +359,10 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
359
359
|
kfree(b->km, u);
|
360
360
|
kfree(b->km, mini_pos);
|
361
361
|
|
362
|
+
if (mi->J && n_segs == 1 && is_splice)
|
363
|
+
for (i = 0; i < n_regs0; ++i)
|
364
|
+
mm_jump_split(b->km, mi, opt, qlens[0], (const uint8_t*)seqs[0], ®s0[i], 0);
|
365
|
+
|
362
366
|
if (b->km) {
|
363
367
|
km_stat(b->km, &kmst);
|
364
368
|
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
|
@@ -373,6 +377,18 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
373
377
|
}
|
374
378
|
}
|
375
379
|
|
380
|
+
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
381
|
+
{
|
382
|
+
if ((opt->flag & MM_F_WEAK_PAIRING) && n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR)) {
|
383
|
+
int i;
|
384
|
+
for (i = 0; i < n_segs; ++i)
|
385
|
+
mm_map_frag_core(mi, 1, &qlens[i], &seqs[i], &n_regs[i], ®s[i], b, opt, qname);
|
386
|
+
mm_pair(b->km, opt->max_gap_ref, opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, n_regs, regs);
|
387
|
+
} else {
|
388
|
+
mm_map_frag_core(mi, n_segs, qlens, seqs, n_regs, regs, b, opt, qname);
|
389
|
+
}
|
390
|
+
}
|
391
|
+
|
376
392
|
mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
377
393
|
{
|
378
394
|
mm_reg1_t *regs;
|
@@ -447,6 +463,10 @@ static void worker_for(void *_data, long i, int tid) // kt_for() callback
|
|
447
463
|
r->qs = qlens[j] - r->qe;
|
448
464
|
r->qe = qlens[j] - t;
|
449
465
|
r->rev = !r->rev;
|
466
|
+
if (r->p) {
|
467
|
+
if (r->p->trans_strand == 1) r->p->trans_strand = 2;
|
468
|
+
else if (r->p->trans_strand == 2) r->p->trans_strand = 1;
|
469
|
+
}
|
450
470
|
}
|
451
471
|
}
|
452
472
|
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
|
@@ -509,7 +529,7 @@ static void merge_hits(step_t *s)
|
|
509
529
|
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, 0, opt->max_gap * 0.8, &s->n_reg[k], s->reg[k]);
|
510
530
|
mm_set_sam_pri(s->n_reg[k], s->reg[k]);
|
511
531
|
}
|
512
|
-
|
532
|
+
mm_set_mapq2(km, s->n_reg[k], s->reg[k], opt->min_chain_score, opt->a, rep_len, !!(opt->flag & (MM_F_SR|MM_F_SR_RNA)), !!(opt->flag & MM_F_SPLICE));
|
513
533
|
}
|
514
534
|
if (s->n_seg[f] == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
515
535
|
mm_pair(km, frag_gap_part[0], opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, &s->n_reg[k0], &s->reg[k0]);
|
@@ -578,23 +598,30 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|
578
598
|
mm_err_fwrite(r->p, r->p->capacity, 4, p->fp_split);
|
579
599
|
}
|
580
600
|
}
|
601
|
+
} else if (p->opt->flag & MM_F_OUT_JUNC) { // extra logic for --write-junc
|
602
|
+
for (j = 0; j < s->n_reg[i]; ++j) {
|
603
|
+
const mm_reg1_t *r = &s->reg[i][j];
|
604
|
+
if (r->id != r->parent || r->mapq < 10) continue;
|
605
|
+
mm_write_junc(&p->str, mi, t, r);
|
606
|
+
if (p->str.l > 0) mm_err_puts(p->str.s);
|
607
|
+
}
|
581
608
|
} else if (s->n_reg[i] > 0) { // the query has at least one hit
|
582
609
|
for (j = 0; j < s->n_reg[i]; ++j) {
|
583
|
-
mm_reg1_t *r = &s->reg[i][j];
|
610
|
+
const mm_reg1_t *r = &s->reg[i][j];
|
584
611
|
assert(!r->sam_pri || r->id == r->parent);
|
585
612
|
if ((p->opt->flag & MM_F_NO_PRINT_2ND) && r->id != r->parent)
|
586
613
|
continue;
|
587
614
|
if (p->opt->flag & MM_F_OUT_SAM)
|
588
615
|
mm_write_sam3(&p->str, mi, t, i - seg_st, j, s->n_seg[k], &s->n_reg[seg_st], (const mm_reg1_t*const*)&s->reg[seg_st], km, p->opt->flag, s->rep_len[i]);
|
589
616
|
else
|
590
|
-
|
617
|
+
mm_write_paf4(&p->str, mi, t, r, km, p->opt->flag, s->rep_len[i], s->n_seg[k], i - seg_st);
|
591
618
|
mm_err_puts(p->str.s);
|
592
619
|
}
|
593
620
|
} else if ((p->opt->flag & MM_F_PAF_NO_HIT) || ((p->opt->flag & MM_F_OUT_SAM) && !(p->opt->flag & MM_F_SAM_HIT_ONLY))) { // output an empty hit, if requested
|
594
621
|
if (p->opt->flag & MM_F_OUT_SAM)
|
595
622
|
mm_write_sam3(&p->str, mi, t, i - seg_st, -1, s->n_seg[k], &s->n_reg[seg_st], (const mm_reg1_t*const*)&s->reg[seg_st], km, p->opt->flag, s->rep_len[i]);
|
596
623
|
else
|
597
|
-
|
624
|
+
mm_write_paf4(&p->str, mi, t, 0, 0, p->opt->flag, s->rep_len[i], s->n_seg[k], i - seg_st);
|
598
625
|
mm_err_puts(p->str.s);
|
599
626
|
}
|
600
627
|
}
|