minimap2 0.2.27.0 → 0.2.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/ext/cmappy/cmappy.c +3 -3
  4. data/ext/cmappy/cmappy.h +1 -1
  5. data/ext/minimap2/FAQ.md +1 -1
  6. data/ext/minimap2/Makefile +4 -3
  7. data/ext/minimap2/NEWS.md +68 -0
  8. data/ext/minimap2/README.md +30 -14
  9. data/ext/minimap2/align.c +136 -52
  10. data/ext/minimap2/cookbook.md +2 -2
  11. data/ext/minimap2/format.c +59 -5
  12. data/ext/minimap2/hit.c +14 -6
  13. data/ext/minimap2/index.c +304 -13
  14. data/ext/minimap2/jump.c +201 -0
  15. data/ext/minimap2/kalloc.h +8 -0
  16. data/ext/minimap2/ksw2.h +5 -2
  17. data/ext/minimap2/ksw2_dispatch.c +5 -5
  18. data/ext/minimap2/ksw2_exts2_sse.c +17 -6
  19. data/ext/minimap2/lchain.c +5 -5
  20. data/ext/minimap2/main.c +64 -12
  21. data/ext/minimap2/map.c +35 -8
  22. data/ext/minimap2/minimap.h +14 -3
  23. data/ext/minimap2/minimap2.1 +98 -46
  24. data/ext/minimap2/misc/README.md +2 -1
  25. data/ext/minimap2/misc/pafcluster.js +241 -0
  26. data/ext/minimap2/misc/paftools.js +17 -6
  27. data/ext/minimap2/mmpriv.h +25 -4
  28. data/ext/minimap2/options.c +36 -3
  29. data/ext/minimap2/python/cmappy.h +3 -3
  30. data/ext/minimap2/python/cmappy.pxd +5 -2
  31. data/ext/minimap2/python/mappy.pyx +20 -7
  32. data/ext/minimap2/python/minimap2.py +5 -3
  33. data/ext/minimap2/seed.c +2 -1
  34. data/ext/minimap2/setup.py +2 -2
  35. data/ext/minimap2.patch +2 -2
  36. data/lib/minimap2/aligner.rb +19 -12
  37. data/lib/minimap2/alignment.rb +1 -0
  38. data/lib/minimap2/ffi/constants.rb +10 -2
  39. data/lib/minimap2/ffi/functions.rb +145 -6
  40. data/lib/minimap2/ffi/mappy.rb +1 -1
  41. data/lib/minimap2/version.rb +1 -1
  42. data/lib/minimap2.rb +2 -2
  43. metadata +8 -7
  44. data/ext/minimap2/misc/mmphase.js +0 -335
@@ -0,0 +1,201 @@
1
+ #include <stdio.h>
2
+ #include "mmpriv.h"
3
+ #include "kalloc.h"
4
+
5
+ #define MM_MIN_EXON_LEN 20
6
+
7
+ static int32_t mm_jump_check(void *km, const mm_idx_t *mi, int32_t qlen, const uint8_t *qseq0, const mm_reg1_t *r, int32_t ext, int32_t is_left) // TODO: check close N
8
+ {
9
+ int32_t clip, clen, e = !r->rev ^ !is_left; // 0 for left of the alignment; 1 for right
10
+ uint32_t cigar;
11
+ if (!r->p || r->p->n_cigar <= 0) return -1; // only working with CIGAR
12
+ clip = e == 0? r->qs : qlen - r->qe;
13
+ cigar = r->p->cigar[is_left? 0 : r->p->n_cigar - 1];
14
+ clen = (cigar&0xf) == MM_CIGAR_MATCH? cigar>>4 : 0;
15
+ if (clen <= ext) return -1;
16
+ if (is_left) {
17
+ if (clip >= r->rs) return -1; // no space to jump
18
+ } else {
19
+ if (clip >= mi->seq[r->rid].len - r->re) return -1; // no space to jump
20
+ }
21
+ return 0;
22
+ }
23
+
24
+ static uint8_t *mm_jump_get_qseq_seq(void *km, int32_t qlen, const uint8_t *qseq0, const mm_reg1_t *r, int32_t is_left, int32_t ql0, uint8_t *qseq)
25
+ {
26
+ extern unsigned char seq_nt4_table[256];
27
+ int32_t i, k = 0;
28
+ if (!r->rev) {
29
+ if (is_left)
30
+ for (i = 0; i < ql0; ++i)
31
+ qseq[k++] = seq_nt4_table[(uint8_t)qseq0[i]];
32
+ else
33
+ for (i = qlen - ql0; i < qlen; ++i)
34
+ qseq[k++] = seq_nt4_table[(uint8_t)qseq0[i]];
35
+ } else {
36
+ if (is_left)
37
+ for (i = qlen - 1; i >= qlen - ql0; --i) {
38
+ uint8_t c = seq_nt4_table[(uint8_t)qseq0[i]];
39
+ qseq[k++] = c >= 4? c : 3 - c;
40
+ }
41
+ else
42
+ for (i = ql0 - 1; i >= 0; --i) {
43
+ uint8_t c = seq_nt4_table[(uint8_t)qseq0[i]];
44
+ qseq[k++] = c >= 4? c : 3 - c;
45
+ }
46
+ }
47
+ return qseq;
48
+ }
49
+
50
+ static void mm_jump_split_left(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq0, mm_reg1_t *r, int32_t ts_strand)
51
+ {
52
+ uint8_t *tseq = 0, *qseq = 0;
53
+ int32_t i, n, l, i0, m, mm0;
54
+ int32_t i0_anno = -1, n_anno = 0, mm0_anno = 0, i0_misc = -1, n_misc = 0, mm0_misc = 0;
55
+ int32_t ext = 1 + (opt->b + opt->a - 1) / opt->a + 1;
56
+ int32_t clip = !r->rev? r->qs : qlen - r->qe;
57
+ int32_t extt = clip < ext? clip : ext;
58
+ const mm_idx_jjump1_t *a;
59
+
60
+ if (mm_jump_check(km, mi, qlen, qseq0, r, ext + MM_MIN_EXON_LEN, 1) < 0) return;
61
+ a = mm_idx_jump_get(mi, r->rid, r->rs - extt, r->rs + ext, &n);
62
+ if (n == 0) return;
63
+
64
+ for (i = 0; i < n; ++i) { // traverse possible jumps
65
+ const mm_idx_jjump1_t *ai = &a[i];
66
+ int32_t tlen, tl1, j, mm1, mm2;
67
+ assert(ai->off >= r->rs - extt && ai->off <= r->rs + ext);
68
+ if (ts_strand * ai->strand < 0) continue; // wrong strand
69
+ if (ai->off2 >= ai->off) continue; // wrong direction
70
+ if (ai->off - ai->off2 < 6) continue; // intron too small
71
+ if (ai->off2 < clip + ext) continue; // not long enough
72
+ if (tseq == 0) {
73
+ tseq = Kcalloc(km, uint8_t, (clip + ext) * 2); // tseq and qseq are allocated together
74
+ qseq = tseq + clip + ext;
75
+ mm_jump_get_qseq_seq(km, qlen, qseq0, r, 1, clip + ext, qseq);
76
+ }
77
+ tl1 = clip + (ai->off - r->rs);
78
+ tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off, r->rs + ext, &tseq[tl1]);
79
+ assert(tlen == r->rs + ext - ai->off);
80
+ tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off2 - tl1, ai->off2, tseq);
81
+ assert(tlen == tl1);
82
+ for (j = 0, mm1 = 0; j < tl1; ++j)
83
+ if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
84
+ ++mm1;
85
+ for (mm2 = 0; j < clip + ext; ++j)
86
+ if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
87
+ ++mm2;
88
+ if (mm1 == 0 && mm2 <= 1) {
89
+ if (ai->flag & MM_JUNC_ANNO)
90
+ i0_anno = i, mm0_anno = mm1 + mm2, ++n_anno; // i0 points to the rightmost i
91
+ else
92
+ i0_misc = i, mm0_misc = mm1 + mm2, ++n_misc;
93
+ }
94
+ }
95
+ if (n_anno > 0) m = n_anno, i0 = i0_anno, mm0 = mm0_anno;
96
+ else m = n_misc, i0 = i0_misc, mm0 = mm0_misc;
97
+ kfree(km, tseq);
98
+
99
+ l = m > 0? a[i0].off - r->rs : 0; // may be negative
100
+ if (m == 1 && clip + l >= opt->jump_min_match) { // add one more exon
101
+ mm_enlarge_cigar(r, 2);
102
+ memmove(r->p->cigar + 2, r->p->cigar, r->p->n_cigar * 4);
103
+ r->p->cigar[0] = (clip + l) << 4 | MM_CIGAR_MATCH;
104
+ r->p->cigar[1] = (a[i0].off - a[i0].off2) << 4 | MM_CIGAR_N_SKIP;
105
+ r->p->cigar[2] = ((r->p->cigar[2]>>4) - l) << 4 | MM_CIGAR_MATCH;
106
+ r->p->n_cigar += 2;
107
+ r->rs = a[i0].off2 - (clip + l);
108
+ if (!r->rev) r->qs = 0;
109
+ else r->qe = qlen;
110
+ r->blen += clip, r->mlen += clip - mm0;
111
+ r->p->dp_max0 += (clip - mm0) * opt->a - mm0 * opt->b;
112
+ r->p->dp_max += (clip - mm0) * opt->a - mm0 * opt->b;
113
+ if (!r->is_spliced) r->is_spliced = 1, r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
114
+ } else if (m > 0 && a[i0].off > r->rs) { // trim by l; l is always positive
115
+ r->p->cigar[0] -= l << 4 | MM_CIGAR_MATCH;
116
+ r->rs += l;
117
+ if (!r->rev) r->qs += l;
118
+ else r->qe -= l;
119
+ }
120
+ }
121
+
122
+ static void mm_jump_split_right(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq0, mm_reg1_t *r, int32_t ts_strand)
123
+ {
124
+ uint8_t *tseq = 0, *qseq = 0;
125
+ int32_t i, n, l, i0, m, mm0;
126
+ int32_t i0_anno = -1, n_anno = 0, mm0_anno = 0, i0_misc = -1, n_misc = 0, mm0_misc = 0;
127
+ int32_t ext = 1 + (opt->b + opt->a - 1) / opt->a + 1;
128
+ int32_t clip = !r->rev? qlen - r->qe : r->qs;
129
+ int32_t extt = clip < ext? clip : ext;
130
+ const mm_idx_jjump1_t *a;
131
+
132
+ if (mm_jump_check(km, mi, qlen, qseq0, r, ext + MM_MIN_EXON_LEN, 0) < 0) return;
133
+ a = mm_idx_jump_get(mi, r->rid, r->re - ext, r->re + extt, &n);
134
+ if (n == 0) return;
135
+
136
+ for (i = 0; i < n; ++i) { // traverse possible jumps
137
+ const mm_idx_jjump1_t *ai = &a[i];
138
+ int32_t tlen, tl1, j, mm1, mm2;
139
+ assert(ai->off >= r->re - ext && ai->off <= r->re + extt);
140
+ if (ts_strand * ai->strand < 0) continue; // wrong strand
141
+ if (ai->off2 <= ai->off) continue; // wrong direction
142
+ if (ai->off2 - ai->off < 6) continue; // intron too small
143
+ if (ai->off2 + clip + ext > mi->seq[r->rid].len) continue; // not long enough
144
+ if (tseq == 0) {
145
+ tseq = Kcalloc(km, uint8_t, (clip + ext) * 2); // tseq and qseq are allocated together
146
+ qseq = tseq + clip + ext;
147
+ mm_jump_get_qseq_seq(km, qlen, qseq0, r, 0, clip + ext, qseq);
148
+ }
149
+ tl1 = clip + (r->re - ai->off);
150
+ tlen = mm_idx_getseq2(mi, 0, r->rid, r->re - ext, ai->off, tseq);
151
+ assert(tlen == ai->off - (r->re - ext));
152
+ tlen = mm_idx_getseq2(mi, 0, r->rid, ai->off2, ai->off2 + tl1, &tseq[clip + ext - tl1]);
153
+ assert(tlen == tl1);
154
+ for (j = 0, mm2 = 0; j < clip + ext - tl1; ++j)
155
+ if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
156
+ ++mm2;
157
+ for (mm1 = 0; j < clip + ext; ++j)
158
+ if (qseq[j] != tseq[j] || qseq[j] > 3 || tseq[j] > 3)
159
+ ++mm1;
160
+ if (mm1 == 0 && mm2 <= 1) {
161
+ if (ai->flag & MM_JUNC_ANNO) {
162
+ if (i0_anno < 0) i0_anno = i, mm0_anno = mm1 + mm2;
163
+ ++n_anno;
164
+ } else {
165
+ if (i0_misc < 0) i0_misc = i, mm0_misc = mm1 + mm2;
166
+ ++n_misc;
167
+ }
168
+ }
169
+ }
170
+ if (n_anno > 0) m = n_anno, i0 = i0_anno, mm0 = mm0_anno;
171
+ else m = n_misc, i0 = i0_misc, mm0 = mm0_misc;
172
+ kfree(km, tseq);
173
+
174
+ l = m > 0? r->re - a[i0].off : 0; // may be negative
175
+ if (m == 1 && clip + l >= opt->jump_min_match) { // add one more exon
176
+ mm_enlarge_cigar(r, 2);
177
+ r->p->cigar[r->p->n_cigar - 1] = ((r->p->cigar[r->p->n_cigar - 1]>>4) - l) << 4 | MM_CIGAR_MATCH;
178
+ r->p->cigar[r->p->n_cigar] = (a[i0].off2 - a[i0].off) << 4 | MM_CIGAR_N_SKIP;
179
+ r->p->cigar[r->p->n_cigar + 1] = (clip + l) << 4 | MM_CIGAR_MATCH;
180
+ r->p->n_cigar += 2;
181
+ r->re = a[i0].off2 + (clip + l);
182
+ if (!r->rev) r->qe = qlen;
183
+ else r->qs = 0;
184
+ r->blen += clip, r->mlen += clip - mm0;
185
+ r->p->dp_max0 += (clip - mm0) * opt->a - mm0 * opt->b;
186
+ r->p->dp_max += (clip - mm0) * opt->a - mm0 * opt->b;
187
+ if (!r->is_spliced) r->is_spliced = 1, r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
188
+ } else if (m > 0 && r->re > a[i0].off) { // trim by l; l is always positive
189
+ r->p->cigar[r->p->n_cigar - 1] -= l << 4 | MM_CIGAR_MATCH;
190
+ r->re -= l;
191
+ if (!r->rev) r->qe -= l;
192
+ else r->qs += l;
193
+ }
194
+ }
195
+
196
+ void mm_jump_split(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq, mm_reg1_t *r, int32_t ts_strand)
197
+ {
198
+ assert((opt->flag & MM_F_EQX) == 0);
199
+ mm_jump_split_left(km, mi, opt, qlen, qseq, r, ts_strand);
200
+ mm_jump_split_right(km, mi, opt, qlen, qseq, r, ts_strand);
201
+ }
@@ -31,6 +31,14 @@ void km_stat_print(const void *km);
31
31
  #define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
32
32
  #define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
33
33
 
34
+ #define Kgrow(km, type, ptr, __i, __m) do { \
35
+ if ((__i) >= (__m)) { \
36
+ (__m) = (__i) + 1; \
37
+ (__m) += ((__m)>>1) + 16; \
38
+ (ptr) = Krealloc(km, type, ptr, (__m)); \
39
+ } \
40
+ } while (0)
41
+
34
42
  #define Kexpand(km, type, a, m) do { \
35
43
  (m) = (m) >= 4? (m) + ((m)>>1) : 16; \
36
44
  (a) = Krealloc(km, type, (a), (m)); \
data/ext/minimap2/ksw2.h CHANGED
@@ -15,7 +15,8 @@
15
15
  #define KSW_EZ_SPLICE_FOR 0x100
16
16
  #define KSW_EZ_SPLICE_REV 0x200
17
17
  #define KSW_EZ_SPLICE_FLANK 0x400
18
- #define KSW_EZ_SPLICE_CMPLX 0x800
18
+ #define KSW_EZ_SPLICE_CMPLX 0x800 // use the miniprot splice model
19
+ #define KSW_EZ_SPLICE_SCORE 0x1000 // use splice score
19
20
 
20
21
  // The subset of CIGAR operators used by ksw code.
21
22
  // Use MM_CIGAR_* from minimap.h if you need the full list.
@@ -24,6 +25,8 @@
24
25
  #define KSW_CIGAR_DEL 2
25
26
  #define KSW_CIGAR_N_SKIP 3
26
27
 
28
+ #define KSW_SPSC_OFFSET 64
29
+
27
30
  #ifdef __cplusplus
28
31
  extern "C" {
29
32
  #endif
@@ -69,7 +72,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
69
72
  int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
70
73
 
71
74
  void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
72
- int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
75
+ int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
73
76
 
74
77
  void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez);
75
78
 
@@ -80,17 +80,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
80
80
  }
81
81
 
82
82
  void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
83
- int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
83
+ int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
84
84
  {
85
85
  extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
86
- int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
86
+ int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
87
87
  extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
88
- int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
88
+ int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez);
89
89
  if (ksw_simd < 0) ksw_simd = x86_simd();
90
90
  if (ksw_simd & SIMD_SSE4_1)
91
- ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
91
+ ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, end_bonus, junc_bonus, junc_pen, flag, junc, ez);
92
92
  else if (ksw_simd & SIMD_SSE2)
93
- ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
93
+ ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, end_bonus, junc_bonus, junc_pen, flag, junc, ez);
94
94
  else abort();
95
95
  }
96
96
  #endif
@@ -24,14 +24,14 @@
24
24
  #ifdef KSW_CPU_DISPATCH
25
25
  #ifdef __SSE4_1__
26
26
  void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
27
- int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
27
+ int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
28
28
  #else
29
29
  void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
30
- int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
30
+ int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
31
31
  #endif
32
32
  #else
33
33
  void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
34
- int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
34
+ int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int end_bonus, int8_t junc_bonus, int8_t junc_pen, int flag, const uint8_t *junc, ksw_extz_t *ez)
35
35
  #endif // ~KSW_CPU_DISPATCH
36
36
  {
37
37
  #define __dp_code_block1 \
@@ -191,7 +191,14 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
191
191
  }
192
192
  }
193
193
 
194
- if (junc) {
194
+ if (junc && (flag & KSW_EZ_SPLICE_SCORE)) { // junc[] keeps the donor score
195
+ uint8_t donor_val = !!(flag & KSW_EZ_SPLICE_FOR) == !(flag & KSW_EZ_REV_CIGAR)? 0 : 1;
196
+ for (t = 0; t < tlen - 1; ++t)
197
+ ((int8_t*)donor)[t] += junc[t+1] == 0xff || (junc[t+1]&1) != donor_val? -junc_pen : (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET;
198
+ for (t = 0; t < tlen - 1; ++t)
199
+ ((int8_t*)acceptor)[t] += junc[t+1] == 0xff || (junc[t+1]&1) != !donor_val? -junc_pen : (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET;
200
+ //for (t = 0; t < tlen - 1; ++t) if (junc[t+1] != 0xff) fprintf(stderr, "Y2\t%d\t%d\t%c\t%d\n", ((int8_t*)donor)[t], ((int8_t*)acceptor)[t], "DA"[junc[t+1]&1], (int8_t)(junc[t+1]>>1) - (int8_t)KSW_SPSC_OFFSET);
201
+ } else if (junc) { // junc[] keeps the splice sites
195
202
  if (!(flag & KSW_EZ_REV_CIGAR)) {
196
203
  for (t = 0; t < tlen - 1; ++t)
197
204
  if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
@@ -445,10 +452,14 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
445
452
  if (!approx_max) kfree(km, H);
446
453
  if (with_cigar) { // backtrack
447
454
  int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
448
- if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
455
+ if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) {
449
456
  ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
450
- else if (ez->max_t >= 0 && ez->max_q >= 0)
457
+ } else if (!ez->zdropped && (flag&KSW_EZ_EXTZ_ONLY) && ez->mqe + end_bonus > (int)ez->max) {
458
+ ez->reach_end = 1;
459
+ ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->mqe_t, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
460
+ } else if (ez->max_t >= 0 && ez->max_q >= 0) {
451
461
  ksw_backtrack(km, 1, rev_cigar, long_thres, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
462
+ }
452
463
  kfree(km, mem2); kfree(km, off);
453
464
  }
454
465
  }
@@ -149,7 +149,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
149
149
  int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
150
150
  { // TODO: make sure this works when n has more than 32 bits
151
151
  int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
152
- int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
152
+ int64_t *p, i, j, max_ii, st = 0;
153
153
  uint64_t *u;
154
154
 
155
155
  if (_u) *_u = 0, *n_u_ = 0;
@@ -174,7 +174,6 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
174
174
  for (j = i - 1; j >= st; --j) {
175
175
  int32_t sc;
176
176
  sc = comput_sc(&a[i], &a[j], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
177
- ++n_iter;
178
177
  if (sc == INT32_MIN) continue;
179
178
  sc += f[j];
180
179
  if (sc > max_f) {
@@ -204,6 +203,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
204
203
  if (max_ii < 0 || (a[i].x - a[max_ii].x <= (int64_t)max_dist_x && f[max_ii] < f[i]))
205
204
  max_ii = i;
206
205
  if (mmax_f < max_f) mmax_f = max_f;
206
+ //fprintf(stderr, "X1\t%ld\t%ld:%d\t%ld\t%ld:%d\t%ld\t%ld\n", (long)i, (long)(a[i].x>>32), (int32_t)a[i].x, (long)max_j, max_j<0?-1L:(long)(a[max_j].x>>32), max_j<0?-1:(int32_t)a[max_j].x, (long)max_f, (long)v[i]);
207
207
  }
208
208
 
209
209
  u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
@@ -263,7 +263,8 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
263
263
  return 0;
264
264
  }
265
265
  if (max_dist < bw) max_dist = bw;
266
- if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
266
+ if (max_dist_inner < 0) max_dist_inner = 0;
267
+ if (max_dist_inner > max_dist) max_dist_inner = max_dist;
267
268
  p = Kmalloc(km, int64_t, n);
268
269
  f = Kmalloc(km, int32_t, n);
269
270
  t = Kcalloc(km, int32_t, n);
@@ -325,12 +326,11 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
325
326
  krmq_interval(lc_elem, root_inner, &s, &lo, &hi);
326
327
  if (lo) {
327
328
  const lc_elem_t *q;
328
- int32_t width, n_rmq_iter = 0;
329
+ int32_t width;
329
330
  krmq_itr_t(lc_elem) itr;
330
331
  krmq_itr_find(lc_elem, root_inner, lo, &itr);
331
332
  while ((q = krmq_at(&itr)) != 0) {
332
333
  if (q->y < (int32_t)a[i].y - max_dist_inner) break;
333
- ++n_rmq_iter;
334
334
  j = q->i;
335
335
  sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, 0, &width);
336
336
  if (width <= bw) {
data/ext/minimap2/main.c CHANGED
@@ -35,12 +35,12 @@ static ko_longopt_t long_options[] = {
35
35
  { "splice", ko_no_argument, 310 },
36
36
  { "cost-non-gt-ag", ko_required_argument, 'C' },
37
37
  { "no-long-join", ko_no_argument, 312 },
38
- { "sr", ko_no_argument, 313 },
38
+ { "sr", ko_optional_argument, 313 },
39
39
  { "frag", ko_required_argument, 314 },
40
40
  { "secondary", ko_required_argument, 315 },
41
41
  { "cs", ko_optional_argument, 316 },
42
42
  { "end-bonus", ko_required_argument, 317 },
43
- { "no-pairing", ko_no_argument, 318 },
43
+ { "no-pairing", ko_no_argument, 318 }, // deprecated but reserved for backward compatibility
44
44
  { "splice-flank", ko_required_argument, 319 },
45
45
  { "idx-no-seq", ko_no_argument, 320 },
46
46
  { "end-seed-pen", ko_required_argument, 321 },
@@ -78,6 +78,14 @@ static ko_longopt_t long_options[] = {
78
78
  { "no-hash-name", ko_no_argument, 353 },
79
79
  { "secondary-seq", ko_no_argument, 354 },
80
80
  { "ds", ko_no_argument, 355 },
81
+ { "rmq-inner", ko_required_argument, 356 },
82
+ { "spsc", ko_required_argument, 357 },
83
+ { "junc-pen", ko_required_argument, 358 },
84
+ { "pairing", ko_required_argument, 359 },
85
+ { "jump-min-match", ko_required_argument, 360 },
86
+ { "write-junc", ko_no_argument, 361 },
87
+ { "pass1", ko_required_argument, 362 },
88
+ { "dbg-seed-occ", ko_no_argument, 501 },
81
89
  { "help", ko_no_argument, 'h' },
82
90
  { "max-intron-len", ko_required_argument, 'G' },
83
91
  { "version", ko_no_argument, 'V' },
@@ -121,12 +129,12 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
121
129
 
122
130
  int main(int argc, char *argv[])
123
131
  {
124
- const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:b:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
132
+ const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:b:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:j:";
125
133
  ketopt_t o = KETOPT_INIT;
126
134
  mm_mapopt_t opt;
127
135
  mm_idxopt_t ipt;
128
136
  int i, c, n_threads = 3, n_parts, old_best_n = -1;
129
- char *fnw = 0, *rg = 0, *junc_bed = 0, *s, *alt_list = 0;
137
+ char *fnw = 0, *rg = 0, *fn_bed_junc = 0, *fn_bed_jump = 0, *fn_bed_pass1 = 0, *fn_spsc = 0, *s, *alt_list = 0;
130
138
  FILE *fp_help = stderr;
131
139
  mm_idx_reader_t *idx_rdr;
132
140
  mm_idx_t *mi;
@@ -188,6 +196,7 @@ int main(int argc, char *argv[])
188
196
  else if (c == 'R') rg = o.arg;
189
197
  else if (c == 'h') fp_help = stdout;
190
198
  else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
199
+ else if (c == 'j') fn_bed_jump = o.arg;
191
200
  else if (c == 'J') {
192
201
  int t;
193
202
  t = atoi(o.arg);
@@ -212,9 +221,8 @@ int main(int argc, char *argv[])
212
221
  else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
213
222
  else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
214
223
  else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
215
- else if (c == 313) opt.flag |= MM_F_SR; // --sr
216
224
  else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
217
- else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
225
+ else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing (deprecated)
218
226
  else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
219
227
  else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen
220
228
  else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only
@@ -230,8 +238,9 @@ int main(int argc, char *argv[])
230
238
  else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
231
239
  else if (c == 337) opt.max_sw_mat = mm_parse_num(o.arg); // --cap-sw-mat
232
240
  else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
233
- else if (c == 340) junc_bed = o.arg; // --junc-bed
241
+ else if (c == 340) fn_bed_junc = o.arg; // --junc-bed
234
242
  else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus
243
+ else if (c == 358) opt.junc_pen = atoi(o.arg); // --junc-pen
235
244
  else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
236
245
  else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale
237
246
  else if (c == 351) opt.chain_skip_scale = atof(o.arg); // --chain-skip-scale
@@ -245,8 +254,25 @@ int main(int argc, char *argv[])
245
254
  else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
246
255
  else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
247
256
  else if (c == 355) opt.flag |= MM_F_OUT_DS; // --ds
257
+ else if (c == 356) opt.rmq_inner_dist = mm_parse_num(o.arg); // --rmq-inner
258
+ else if (c == 357) fn_spsc = o.arg; // --spsc
259
+ else if (c == 360) opt.jump_min_match = mm_parse_num(o.arg); // --jump-min-match
260
+ else if (c == 361) opt.flag |= MM_F_OUT_JUNC | MM_F_CIGAR; // --write-junc
261
+ else if (c == 362) fn_bed_pass1 = o.arg; // --jump-pass1
262
+ else if (c == 501) mm_dbg_flag |= MM_DBG_SEED_FREQ; // --dbg-seed-occ
248
263
  else if (c == 330) {
249
264
  fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
265
+ } else if (c == 313) { // --sr
266
+ if (o.arg == 0 || strcmp(o.arg, "dna") == 0) {
267
+ opt.flag |= MM_F_SR;
268
+ } else if (strcmp(o.arg, "rna") == 0) {
269
+ opt.flag |= MM_F_SR_RNA;
270
+ } else if (strcmp(o.arg, "no") == 0) {
271
+ opt.flag &= ~(uint64_t)(MM_F_SR|MM_F_SR_RNA);
272
+ } else if (mm_verbose >= 2) {
273
+ opt.flag |= MM_F_SR;
274
+ fprintf(stderr, "[WARNING]\033[1;31m --sr only takes 'dna' or 'rna'. Invalid values are assumed to be 'dna'.\033[0m\n");
275
+ }
250
276
  } else if (c == 314) { // --frag
251
277
  yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
252
278
  } else if (c == 315) { // --secondary
@@ -271,6 +297,14 @@ int main(int argc, char *argv[])
271
297
  } else if (c == 347) { // --rmq
272
298
  if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
273
299
  else opt.flag |= MM_F_RMQ;
300
+ } else if (c == 359) { // --pairing
301
+ if (strcmp(o.arg, "no") == 0) opt.flag |= MM_F_INDEPEND_SEG;
302
+ else if (strcmp(o.arg, "weak") == 0) opt.flag |= MM_F_WEAK_PAIRING, opt.flag &= ~(uint64_t)MM_F_INDEPEND_SEG;
303
+ else {
304
+ if (strcmp(o.arg, "strong") != 0 && mm_verbose >= 2)
305
+ fprintf(stderr, "[WARNING]\033[1;31m unrecognized argument for --pairing; assuming 'strong'.\033[0m\n");
306
+ opt.flag &= ~(uint64_t)(MM_F_INDEPEND_SEG|MM_F_WEAK_PAIRING);
307
+ }
274
308
  } else if (c == 'S') {
275
309
  opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
276
310
  if (mm_verbose >= 2)
@@ -311,10 +345,6 @@ int main(int argc, char *argv[])
311
345
  if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
312
346
  }
313
347
  }
314
- if ((opt.flag & MM_F_SPLICE) && (opt.flag & MM_F_FRAG_MODE)) {
315
- fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
316
- return 1;
317
- }
318
348
  if (!fnw && !(opt.flag&MM_F_CIGAR))
319
349
  ipt.flag |= MM_I_NO_SEQ;
320
350
  if (mm_check_opt(&ipt, &opt) < 0)
@@ -354,6 +384,7 @@ int main(int argc, char *argv[])
354
384
  fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
355
385
  fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
356
386
  fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
387
+ fprintf(fp_help, " -j FILE junctions in BED12 to extend *short* RNA-seq alignment []\n");
357
388
  fprintf(fp_help, " Input/Output:\n");
358
389
  fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
359
390
  fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
@@ -365,6 +396,7 @@ int main(int argc, char *argv[])
365
396
  fprintf(fp_help, " --MD output the MD tag\n");
366
397
  fprintf(fp_help, " --eqx write =/X CIGAR operators\n");
367
398
  fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n");
399
+ fprintf(fp_help, " -y copy FASTA/Q comments to output SAM\n");
368
400
  fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
369
401
  fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
370
402
  // fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
@@ -373,6 +405,7 @@ int main(int argc, char *argv[])
373
405
  fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
374
406
  fprintf(fp_help, " - lr:hq - accurate long reads (error rate <1%%) against a reference genome\n");
375
407
  fprintf(fp_help, " - splice/splice:hq - spliced alignment for long reads/accurate long reads\n");
408
+ fprintf(fp_help, " - splice:sr - spliced alignment for short RNA-seq reads\n");
376
409
  fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
377
410
  fprintf(fp_help, " - sr - short reads against a reference\n");
378
411
  fprintf(fp_help, " - map-pb/map-hifi/map-ont/map-iclr - CLR/HiFi/Nanopore/ICLR vs reference mapping\n");
@@ -427,7 +460,26 @@ int main(int argc, char *argv[])
427
460
  __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
428
461
  if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
429
462
  if (mm_verbose >= 3) mm_idx_stat(mi);
430
- if (junc_bed) mm_idx_bed_read(mi, junc_bed, 1);
463
+ if (fn_bed_junc) {
464
+ mm_idx_bed_read(mi, fn_bed_junc, 1);
465
+ if (mi->I == 0 && mm_verbose >= 2)
466
+ fprintf(stderr, "[WARNING] failed to load the junction BED file\n");
467
+ }
468
+ if (fn_bed_jump) {
469
+ mm_idx_jjump_read(mi, fn_bed_jump, MM_JUNC_ANNO, -1);
470
+ if (mi->J == 0 && mm_verbose >= 2)
471
+ fprintf(stderr, "[WARNING] failed to load the jump BED file\n");
472
+ }
473
+ if (fn_bed_pass1) {
474
+ mm_idx_jjump_read(mi, fn_bed_pass1, MM_JUNC_MISC, 5);
475
+ if (mi->J == 0 && mm_verbose >= 2)
476
+ fprintf(stderr, "[WARNING] failed to load the pass-1 jump BED file\n");
477
+ }
478
+ if (fn_spsc) {
479
+ mm_idx_spsc_read(mi, fn_spsc, mm_max_spsc_bonus(&opt));
480
+ if (mi->spsc == 0 && mm_verbose >= 2)
481
+ fprintf(stderr, "[WARNING] failed to load the splice score file\n");
482
+ }
431
483
  if (alt_list) mm_idx_alt_read(mi, alt_list);
432
484
  if (argc - (o.ind + 1) == 0) {
433
485
  mm_idx_destroy(mi);
data/ext/minimap2/map.c CHANGED
@@ -224,10 +224,10 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
224
224
  return regs;
225
225
  }
226
226
 
227
- void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
227
+ void mm_map_frag_core(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
228
228
  {
229
229
  int i, j, rep_len, qlen_sum, n_regs0, n_mini_pos;
230
- int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR);
230
+ int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR), is_sr_rna = !!(opt->flag & MM_F_SR_RNA);
231
231
  uint32_t hash;
232
232
  int64_t n_a;
233
233
  uint64_t *u, *mini_pos;
@@ -338,7 +338,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
338
338
  if (n_segs == 1) { // uni-segment
339
339
  regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
340
340
  regs0 = (mm_reg1_t*)realloc(regs0, sizeof(*regs0) * n_regs0);
341
- mm_set_mapq(b->km, n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr);
341
+ mm_set_mapq2(b->km, n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr || is_sr_rna, is_splice);
342
342
  n_regs[0] = n_regs0, regs[0] = regs0;
343
343
  } else { // multi-segment
344
344
  mm_seg_t *seg;
@@ -347,7 +347,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
347
347
  for (i = 0; i < n_segs; ++i) {
348
348
  mm_set_parent(b->km, opt->mask_level, opt->mask_len, n_regs[i], regs[i], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop); // update mm_reg1_t::parent
349
349
  regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a);
350
- mm_set_mapq(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr);
350
+ mm_set_mapq2(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr || is_sr_rna, is_splice);
351
351
  }
352
352
  mm_seg_free(b->km, n_segs, seg);
353
353
  if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
@@ -359,6 +359,10 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
359
359
  kfree(b->km, u);
360
360
  kfree(b->km, mini_pos);
361
361
 
362
+ if (mi->J && n_segs == 1 && is_splice)
363
+ for (i = 0; i < n_regs0; ++i)
364
+ mm_jump_split(b->km, mi, opt, qlens[0], (const uint8_t*)seqs[0], &regs0[i], 0);
365
+
362
366
  if (b->km) {
363
367
  km_stat(b->km, &kmst);
364
368
  if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
@@ -373,6 +377,18 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
373
377
  }
374
378
  }
375
379
 
380
+ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
381
+ {
382
+ if ((opt->flag & MM_F_WEAK_PAIRING) && n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR)) {
383
+ int i;
384
+ for (i = 0; i < n_segs; ++i)
385
+ mm_map_frag_core(mi, 1, &qlens[i], &seqs[i], &n_regs[i], &regs[i], b, opt, qname);
386
+ mm_pair(b->km, opt->max_gap_ref, opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, n_regs, regs);
387
+ } else {
388
+ mm_map_frag_core(mi, n_segs, qlens, seqs, n_regs, regs, b, opt, qname);
389
+ }
390
+ }
391
+
376
392
  mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
377
393
  {
378
394
  mm_reg1_t *regs;
@@ -447,6 +463,10 @@ static void worker_for(void *_data, long i, int tid) // kt_for() callback
447
463
  r->qs = qlens[j] - r->qe;
448
464
  r->qe = qlens[j] - t;
449
465
  r->rev = !r->rev;
466
+ if (r->p) {
467
+ if (r->p->trans_strand == 1) r->p->trans_strand = 2;
468
+ else if (r->p->trans_strand == 2) r->p->trans_strand = 1;
469
+ }
450
470
  }
451
471
  }
452
472
  if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
@@ -509,7 +529,7 @@ static void merge_hits(step_t *s)
509
529
  mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, 0, opt->max_gap * 0.8, &s->n_reg[k], s->reg[k]);
510
530
  mm_set_sam_pri(s->n_reg[k], s->reg[k]);
511
531
  }
512
- mm_set_mapq(km, s->n_reg[k], s->reg[k], opt->min_chain_score, opt->a, rep_len, !!(opt->flag & MM_F_SR));
532
+ mm_set_mapq2(km, s->n_reg[k], s->reg[k], opt->min_chain_score, opt->a, rep_len, !!(opt->flag & (MM_F_SR|MM_F_SR_RNA)), !!(opt->flag & MM_F_SPLICE));
513
533
  }
514
534
  if (s->n_seg[f] == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
515
535
  mm_pair(km, frag_gap_part[0], opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, &s->n_reg[k0], &s->reg[k0]);
@@ -578,23 +598,30 @@ static void *worker_pipeline(void *shared, int step, void *in)
578
598
  mm_err_fwrite(r->p, r->p->capacity, 4, p->fp_split);
579
599
  }
580
600
  }
601
+ } else if (p->opt->flag & MM_F_OUT_JUNC) { // extra logic for --write-junc
602
+ for (j = 0; j < s->n_reg[i]; ++j) {
603
+ const mm_reg1_t *r = &s->reg[i][j];
604
+ if (r->id != r->parent || r->mapq < 10) continue;
605
+ mm_write_junc(&p->str, mi, t, r);
606
+ if (p->str.l > 0) mm_err_puts(p->str.s);
607
+ }
581
608
  } else if (s->n_reg[i] > 0) { // the query has at least one hit
582
609
  for (j = 0; j < s->n_reg[i]; ++j) {
583
- mm_reg1_t *r = &s->reg[i][j];
610
+ const mm_reg1_t *r = &s->reg[i][j];
584
611
  assert(!r->sam_pri || r->id == r->parent);
585
612
  if ((p->opt->flag & MM_F_NO_PRINT_2ND) && r->id != r->parent)
586
613
  continue;
587
614
  if (p->opt->flag & MM_F_OUT_SAM)
588
615
  mm_write_sam3(&p->str, mi, t, i - seg_st, j, s->n_seg[k], &s->n_reg[seg_st], (const mm_reg1_t*const*)&s->reg[seg_st], km, p->opt->flag, s->rep_len[i]);
589
616
  else
590
- mm_write_paf3(&p->str, mi, t, r, km, p->opt->flag, s->rep_len[i]);
617
+ mm_write_paf4(&p->str, mi, t, r, km, p->opt->flag, s->rep_len[i], s->n_seg[k], i - seg_st);
591
618
  mm_err_puts(p->str.s);
592
619
  }
593
620
  } else if ((p->opt->flag & MM_F_PAF_NO_HIT) || ((p->opt->flag & MM_F_OUT_SAM) && !(p->opt->flag & MM_F_SAM_HIT_ONLY))) { // output an empty hit, if requested
594
621
  if (p->opt->flag & MM_F_OUT_SAM)
595
622
  mm_write_sam3(&p->str, mi, t, i - seg_st, -1, s->n_seg[k], &s->n_reg[seg_st], (const mm_reg1_t*const*)&s->reg[seg_st], km, p->opt->flag, s->rep_len[i]);
596
623
  else
597
- mm_write_paf3(&p->str, mi, t, 0, 0, p->opt->flag, s->rep_len[i]);
624
+ mm_write_paf4(&p->str, mi, t, 0, 0, p->opt->flag, s->rep_len[i], s->n_seg[k], i - seg_st);
598
625
  mm_err_puts(p->str.s);
599
626
  }
600
627
  }