minimap2 0.2.28.0 → 0.2.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/cmappy/cmappy.c +3 -3
- data/ext/cmappy/cmappy.h +1 -1
- data/ext/minimap2/FAQ.md +1 -1
- data/ext/minimap2/Makefile +4 -3
- data/ext/minimap2/NEWS.md +39 -0
- data/ext/minimap2/README.md +30 -14
- data/ext/minimap2/align.c +134 -50
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +57 -3
- data/ext/minimap2/hit.c +14 -6
- data/ext/minimap2/index.c +304 -13
- data/ext/minimap2/jump.c +201 -0
- data/ext/minimap2/kalloc.h +8 -0
- data/ext/minimap2/ksw2.h +5 -2
- data/ext/minimap2/ksw2_dispatch.c +5 -5
- data/ext/minimap2/ksw2_exts2_sse.c +17 -6
- data/ext/minimap2/main.c +60 -12
- data/ext/minimap2/map.c +35 -8
- data/ext/minimap2/minimap.h +14 -3
- data/ext/minimap2/minimap2.1 +92 -45
- data/ext/minimap2/misc/README.md +2 -1
- data/ext/minimap2/misc/pafcluster.js +241 -0
- data/ext/minimap2/misc/paftools.js +8 -3
- data/ext/minimap2/mmpriv.h +24 -2
- data/ext/minimap2/options.c +27 -2
- data/ext/minimap2/python/cmappy.h +3 -3
- data/ext/minimap2/python/cmappy.pxd +4 -2
- data/ext/minimap2/python/mappy.pyx +19 -7
- data/ext/minimap2/setup.py +2 -2
- data/ext/minimap2.patch +2 -2
- data/lib/minimap2/aligner.rb +19 -12
- data/lib/minimap2/ffi/constants.rb +9 -1
- data/lib/minimap2/ffi/functions.rb +145 -6
- data/lib/minimap2/ffi/mappy.rb +1 -1
- data/lib/minimap2/version.rb +1 -1
- data/lib/minimap2.rb +2 -2
- metadata +5 -4
- data/ext/minimap2/misc/mmphase.js +0 -335
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 15977381155ba5ee7d3352f9669db9dcf074e2073ac54961bdff2f6c9af77fb1
|
4
|
+
data.tar.gz: 67abab72034a636e796cc316e9891c81e049dec9ce8a4cd5b68f975384ba152d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b6df8d975c534b9c71715919867abc905e1bdf5712d3c1ba63cd6d15ea657367678153b448187186b128ee65abfc190f7415dc5924bb009cc61e4debdb2801fa
|
7
|
+
data.tar.gz: 87c2bf817379daf71b410e39ed3e5e5599329f0aa44f1db59449acb42e8033d58ec46fc74e94136e60595d101eaef94b4ba7561d7f4b9fe82d7a5cc6c62d77fa
|
data/README.md
CHANGED
data/ext/cmappy/cmappy.c
CHANGED
@@ -50,13 +50,13 @@ void mm_reset_timer(void)
|
|
50
50
|
mm_realtime0 = realtime();
|
51
51
|
}
|
52
52
|
|
53
|
-
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
53
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
54
54
|
{
|
55
55
|
mm_reg1_t *r;
|
56
56
|
|
57
57
|
// Py_BEGIN_ALLOW_THREADS
|
58
58
|
if (seq2 == 0) {
|
59
|
-
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt,
|
59
|
+
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, seqname);
|
60
60
|
} else {
|
61
61
|
int _n_regs[2];
|
62
62
|
mm_reg1_t *regs[2];
|
@@ -73,7 +73,7 @@ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, in
|
|
73
73
|
seq[1][i] = seq_comp_table[t];
|
74
74
|
}
|
75
75
|
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
|
76
|
-
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt,
|
76
|
+
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, seqname);
|
77
77
|
for (i = 0; i < _n_regs[1]; ++i)
|
78
78
|
regs[1][i].rev = !regs[1][i].rev;
|
79
79
|
*n_regs = _n_regs[0] + _n_regs[1];
|
data/ext/cmappy/cmappy.h
CHANGED
@@ -33,7 +33,7 @@ int mm_verbose_level(int v);
|
|
33
33
|
void mm_reset_timer(void);
|
34
34
|
|
35
35
|
extern unsigned char seq_comp_table[256];
|
36
|
-
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
|
36
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
|
37
37
|
|
38
38
|
char *mappy_revcomp(int len, const uint8_t *seq);
|
39
39
|
|
data/ext/minimap2/FAQ.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
|
4
4
|
locations without detailed base alignment. In particular, the start and end
|
5
|
-
positions of the alignment are
|
5
|
+
positions of the alignment are imprecise. With one of those options, minimap2
|
6
6
|
will perform base alignment, which is generally more accurate but is much
|
7
7
|
slower.
|
8
8
|
|
data/ext/minimap2/Makefile
CHANGED
@@ -2,7 +2,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
|
|
2
2
|
CPPFLAGS= -DHAVE_KALLOC
|
3
3
|
INCLUDES=
|
4
4
|
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
5
|
-
lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
5
|
+
lchain.o align.o hit.o seed.o jump.o map.o format.o pe.o esterr.o splitidx.o \
|
6
6
|
ksw2_ll_sse.o
|
7
7
|
PROG= minimap2
|
8
8
|
PROG_EXTRA= sdust minimap2-lite
|
@@ -115,8 +115,9 @@ esterr.o: mmpriv.h minimap.h bseq.h kseq.h
|
|
115
115
|
example.o: minimap.h kseq.h
|
116
116
|
format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
117
117
|
hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
|
118
|
-
index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h
|
119
|
-
index.o: ksort.h
|
118
|
+
index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h ksw2.h kalloc.h kvec.h
|
119
|
+
index.o: khash.h ksort.h
|
120
|
+
jump.o: mmpriv.h minimap.h bseq.h kseq.h
|
120
121
|
kalloc.o: kalloc.h
|
121
122
|
ksw2_extd2_sse.o: ksw2.h kalloc.h
|
122
123
|
ksw2_exts2_sse.o: ksw2.h kalloc.h
|
data/ext/minimap2/NEWS.md
CHANGED
@@ -1,3 +1,42 @@
|
|
1
|
+
Release 2.29-r1283 (18 April 2025)
|
2
|
+
----------------------------------
|
3
|
+
|
4
|
+
Notable changes to minimap2:
|
5
|
+
|
6
|
+
* New feature: added the `splice:sr` preset for short RNA-seq read alignment.
|
7
|
+
Users may use `-j` to specify known gene annotation to improve spliced
|
8
|
+
alignment close to the ends of short reads. Also added `--write-junc` and
|
9
|
+
`--pass1` for 2-pass short-read RNA-seq alignment.
|
10
|
+
|
11
|
+
* Experimental feature: read splice scores from a file specified by `--spsc`
|
12
|
+
and consider the scores during base alignment. The feature makes it possible
|
13
|
+
to apply advanced splice models and to improve spliced alignment.
|
14
|
+
|
15
|
+
* Change: adjusted the mapping quality calculation for spliced alignment.
|
16
|
+
|
17
|
+
* Bugfixes: a) missing overlap alignment when base alignment is requested
|
18
|
+
(#969); b) incorrect summary information for long genomes (#1192); c)
|
19
|
+
missing parameter check for `--score-N` (#1226).
|
20
|
+
|
21
|
+
* Improvement: a) warn about absent junction files (#1229); b) report an error
|
22
|
+
if a wrong preset prefixed with "splice" is specified (#589).
|
23
|
+
|
24
|
+
Notable changes to mappy:
|
25
|
+
|
26
|
+
* Improvement: allow passing read name (#1260)
|
27
|
+
|
28
|
+
* Improvement: exposed score for ambiguous bases (#1240)
|
29
|
+
|
30
|
+
Minimap2 now supports short/long genomic/RNA-seq read alignment along with
|
31
|
+
contig alignment and all-vs-all read overlapping. It produces identical genomic
|
32
|
+
long-read or contig alignment to v2.27. Short genomic read alignment and the
|
33
|
+
mapping quality of long RNA-seq read alignment may slightly differ in very rare
|
34
|
+
cases.
|
35
|
+
|
36
|
+
(2.29: 18 April 2025, r1283)
|
37
|
+
|
38
|
+
|
39
|
+
|
1
40
|
Release 2.28-r1209 (27 March 2024)
|
2
41
|
----------------------------------
|
3
42
|
|
data/ext/minimap2/README.md
CHANGED
@@ -14,13 +14,15 @@ cd minimap2 && make
|
|
14
14
|
# use presets (no test data)
|
15
15
|
./minimap2 -ax map-pb ref.fa pacbio.fq.gz > aln.sam # PacBio CLR genomic reads
|
16
16
|
./minimap2 -ax map-ont ref.fa ont.fq.gz > aln.sam # Oxford Nanopore genomic reads
|
17
|
-
./minimap2 -ax map-hifi ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.19
|
18
|
-
./minimap2 -ax lr:hq ref.fa ont-Q20.fq.gz > aln.sam # Nanopore Q20 genomic reads (v2.27
|
17
|
+
./minimap2 -ax map-hifi ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.19+)
|
18
|
+
./minimap2 -ax lr:hq ref.fa ont-Q20.fq.gz > aln.sam # Nanopore Q20 genomic reads (v2.27+)
|
19
19
|
./minimap2 -ax sr ref.fa read1.fa read2.fa > aln.sam # short genomic paired-end reads
|
20
20
|
./minimap2 -ax splice ref.fa rna-reads.fa > aln.sam # spliced long reads (strand unknown)
|
21
|
-
./minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam # noisy Nanopore
|
22
|
-
./minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam #
|
23
|
-
./minimap2 -ax splice --junc-bed
|
21
|
+
./minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam # noisy Nanopore direct RNA-seq
|
22
|
+
./minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam # PacBio Kinnex/Iso-seq (RNA-seq)
|
23
|
+
./minimap2 -ax splice --junc-bed=anno.bed12 ref.fa query.fa > aln.sam # use annotated junctions
|
24
|
+
./minimap2 -ax splice:sr ref.fa r1.fq r2.fq > aln.sam # short-read RNA-seq (v2.29+)
|
25
|
+
./minimap2 -ax splice:sr -j anno.bed12 ref.fa r1.fq r2.fq > aln.sam
|
24
26
|
./minimap2 -cx asm5 asm1.fa asm2.fa > aln.paf # intra-species asm-to-asm alignment
|
25
27
|
./minimap2 -x ava-pb reads.fa reads.fa > overlaps.paf # PacBio read overlap
|
26
28
|
./minimap2 -x ava-ont reads.fa reads.fa > overlaps.paf # Nanopore read overlap
|
@@ -38,7 +40,8 @@ man ./minimap2.1
|
|
38
40
|
- [Map long noisy genomic reads](#map-long-genomic)
|
39
41
|
- [Map long mRNA/cDNA reads](#map-long-splice)
|
40
42
|
- [Find overlaps between long reads](#long-overlap)
|
41
|
-
- [Map short
|
43
|
+
- [Map short genomic reads](#short-genomic)
|
44
|
+
- [Map short RNA-seq reads](#short-rna-seq)
|
42
45
|
- [Full genome/assembly alignment](#full-genome)
|
43
46
|
- [Advanced features](#advanced)
|
44
47
|
- [Working with >65535 CIGAR operations](#long-cigar)
|
@@ -74,8 +77,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
|
|
74
77
|
Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
|
75
78
|
the [release page][release] with:
|
76
79
|
```sh
|
77
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
78
|
-
./minimap2-2.
|
80
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.29/minimap2-2.29_x64-linux.tar.bz2 | tar -jxvf -
|
81
|
+
./minimap2-2.29_x64-linux/minimap2
|
79
82
|
```
|
80
83
|
If you want to compile from the source, you need to have a C compiler, GNU make
|
81
84
|
and zlib development files installed. Then type `make` in the source code
|
@@ -171,9 +174,8 @@ or the last exons.
|
|
171
174
|
|
172
175
|
Minimap2 rates an alignment by the score of the max-scoring sub-segment,
|
173
176
|
*excluding* introns, and marks the best alignment as primary in SAM. When a
|
174
|
-
spliced gene also has unspliced pseudogenes, minimap2
|
175
|
-
|
176
|
-
alignment as the primary. By default, minimap2 outputs up to five secondary
|
177
|
+
spliced gene also has unspliced pseudogenes, minimap2 slightly prefers
|
178
|
+
the spliced alignment. By default, minimap2 outputs up to five secondary
|
177
179
|
alignments (i.e. likely pseudogenes in the context of RNA-seq mapping). This
|
178
180
|
can be tuned with option **-N**.
|
179
181
|
|
@@ -204,6 +206,10 @@ bonus score (tuned by `--junc-bonus`) if an aligned junction matches a junction
|
|
204
206
|
in the annotation. Option `--junc-bed` also takes 5-column BED, including the
|
205
207
|
strand field. In this case, each line indicates an oriented junction.
|
206
208
|
|
209
|
+
**Note:** `--junc-bed` is intended for long noisy RNA-seq reads only.
|
210
|
+
Applying the option to short RNA-seq reads would increase run time with little
|
211
|
+
improvement to junction accuracy.
|
212
|
+
|
207
213
|
#### <a name="long-overlap"></a>Find overlaps between long reads
|
208
214
|
|
209
215
|
```sh
|
@@ -216,7 +222,7 @@ the overlapping mode because it is slow and may produce false positive
|
|
216
222
|
overlaps. However, if performance is not a concern, you may try to add `-a` or
|
217
223
|
`-c` anyway.
|
218
224
|
|
219
|
-
#### <a name="short-genomic"></a>Map short
|
225
|
+
#### <a name="short-genomic"></a>Map short genomic reads
|
220
226
|
|
221
227
|
```sh
|
222
228
|
minimap2 -ax sr ref.fa reads-se.fq > aln.sam # single-end alignment
|
@@ -229,8 +235,18 @@ be paired if they are adjacent in the input stream and have the same name (with
|
|
229
235
|
the `/[0-9]` suffix trimmed if present). Single- and paired-end reads can be
|
230
236
|
mixed.
|
231
237
|
|
232
|
-
|
233
|
-
|
238
|
+
#### <a name="short-rna-seq"></a>Map short RNA-seq reads
|
239
|
+
|
240
|
+
```sh
|
241
|
+
minimap2 -ax splice:sr ref.fa reads-se.fq.gz > aln.sam # single-end
|
242
|
+
minimap2 -ax splice:sr ref.fa r1.fq.gz r2.fq.gz > aln.sam # paired-end
|
243
|
+
minimap2 -ax splice:sr -j anno.bed ref.fa r1.fq r2.fq > aln.sam # use annotation
|
244
|
+
# 2-pass alignment
|
245
|
+
minimap2 -x splice:sr -j anno.bed --write-junc ref.fa r1.fq r2.fq > junc.bed
|
246
|
+
minimap2 -ax splice:sr -j anno.bed --pass1=junc.bed ref.fa r1.fq r2.fq > aln.sam
|
247
|
+
```
|
248
|
+
The new preset `splice:sr` was added in v2.29. It functions similarly to `sr`
|
249
|
+
except that it performs spliced alignment.
|
234
250
|
|
235
251
|
#### <a name="full-genome"></a>Full genome/assembly alignment
|
236
252
|
|
data/ext/minimap2/align.c
CHANGED
@@ -6,6 +6,8 @@
|
|
6
6
|
#include "mmpriv.h"
|
7
7
|
#include "ksw2.h"
|
8
8
|
|
9
|
+
#define MM_MAX_QLEN_FLANK 100
|
10
|
+
|
9
11
|
static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b, int8_t sc_ambi)
|
10
12
|
{
|
11
13
|
int i, j;
|
@@ -258,7 +260,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|
258
260
|
if (p == 0) return;
|
259
261
|
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
260
262
|
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
|
261
|
-
r->blen = r->mlen = 0;
|
263
|
+
r->blen = r->mlen = 0, r->is_spliced = 0;
|
262
264
|
for (k = 0; k < p->n_cigar; ++k) {
|
263
265
|
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
264
266
|
if (op == MM_CIGAR_MATCH) {
|
@@ -292,7 +294,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|
292
294
|
if (s < 0) s = 0;
|
293
295
|
toff += len;
|
294
296
|
} else if (op == MM_CIGAR_N_SKIP) {
|
295
|
-
toff += len;
|
297
|
+
r->is_spliced = 1, toff += len;
|
296
298
|
}
|
297
299
|
}
|
298
300
|
p->dp_max = p->dp_max0 = (int32_t)(max + .499);
|
@@ -300,9 +302,8 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|
300
302
|
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
|
301
303
|
}
|
302
304
|
|
303
|
-
|
305
|
+
void mm_enlarge_cigar(mm_reg1_t *r, uint32_t n_cigar) // TODO: this calls the libc realloc()
|
304
306
|
{
|
305
|
-
mm_extra_t *p;
|
306
307
|
if (n_cigar == 0) return;
|
307
308
|
if (r->p == 0) {
|
308
309
|
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
|
@@ -314,6 +315,13 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
|
|
314
315
|
kroundup32(r->p->capacity);
|
315
316
|
r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
|
316
317
|
}
|
318
|
+
}
|
319
|
+
|
320
|
+
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, const uint32_t *cigar)
|
321
|
+
{
|
322
|
+
mm_extra_t *p;
|
323
|
+
if (n_cigar == 0) return;
|
324
|
+
mm_enlarge_cigar(r, n_cigar);
|
317
325
|
p = r->p;
|
318
326
|
if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
319
327
|
p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
|
@@ -325,29 +333,31 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
|
|
325
333
|
}
|
326
334
|
}
|
327
335
|
|
328
|
-
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc,
|
336
|
+
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc,
|
337
|
+
const int8_t *mat, int w, int end_bonus, int zdrop, int ksw_flag, ksw_extz_t *ez)
|
329
338
|
{
|
330
339
|
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
331
340
|
int i;
|
332
|
-
fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d,
|
341
|
+
fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, ksw_flag=%d, zdrop=%d, end_bonus=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, w, ksw_flag, opt->zdrop, end_bonus);
|
333
342
|
for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr);
|
334
343
|
fputc('\n', stderr);
|
335
344
|
for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr);
|
336
345
|
fputc('\n', stderr);
|
337
346
|
}
|
338
347
|
if (opt->transition != 0 && opt->b != opt->transition)
|
339
|
-
|
340
|
-
if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) {
|
348
|
+
ksw_flag |= KSW_EZ_GENERIC_SC;
|
349
|
+
if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) { // too much memory; skip alignment
|
341
350
|
ksw_reset_extz(ez);
|
342
351
|
ez->zdropped = 1;
|
343
|
-
} else if (opt->flag & MM_F_SPLICE) {
|
344
|
-
|
345
|
-
if (!(opt->flag & MM_F_SPLICE_OLD))
|
346
|
-
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus,
|
347
|
-
} else if (opt->q == opt->q2 && opt->e == opt->e2)
|
348
|
-
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus,
|
349
|
-
else
|
350
|
-
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus,
|
352
|
+
} else if (opt->flag & MM_F_SPLICE) { // spliced alignment
|
353
|
+
assert((ksw_flag & KSW_EZ_SPLICE_FOR) == 0 || (ksw_flag & KSW_EZ_SPLICE_REV) == 0);
|
354
|
+
if (!(opt->flag & MM_F_SPLICE_OLD)) ksw_flag |= KSW_EZ_SPLICE_CMPLX;
|
355
|
+
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, end_bonus, opt->junc_bonus, opt->junc_pen, ksw_flag, junc, ez);
|
356
|
+
} else if (opt->q == opt->q2 && opt->e == opt->e2) { // affine gap
|
357
|
+
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, ksw_flag, ez);
|
358
|
+
} else { // dual affine gap
|
359
|
+
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, ksw_flag, ez);
|
360
|
+
}
|
351
361
|
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
352
362
|
int i;
|
353
363
|
fprintf(stderr, "score=%d, cigar=", ez->score);
|
@@ -357,6 +367,45 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
|
|
357
367
|
}
|
358
368
|
}
|
359
369
|
|
370
|
+
static int mm_align_sr_rna(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc, uint8_t *tseq2, uint8_t *junc2,
|
371
|
+
const int8_t *mat, int w, int end_bonus, int zdrop, int ksw_flag, ksw_extz_t *ez)
|
372
|
+
{
|
373
|
+
int32_t ilen = opt->q2 * 2, tlen2 = qlen * 2 + ilen;
|
374
|
+
int32_t i, ll = 0, lr = 0, nn = 0, n_ins = 0;
|
375
|
+
if (!(opt->flag & MM_F_SPLICE)) return 0; // only for spliced alignment
|
376
|
+
if (qlen > MM_MAX_QLEN_FLANK || qlen * 2 + ilen > tlen) return 0; // the query sequence can't be too long and the target sequence must be long enough
|
377
|
+
for (i = 0; i < qlen; ++i) // exact match length from the left
|
378
|
+
if (qseq[i] == tseq[i] && qseq[i] < 4)
|
379
|
+
++ll;
|
380
|
+
for (i = 0; i < qlen; ++i) // exact match length from the right
|
381
|
+
if (qseq[qlen - 1 - i] == tseq[tlen - 1 - i] && qseq[qlen - 1 - i] < 4)
|
382
|
+
++lr;
|
383
|
+
if (qlen - (ll + lr) > 9) return 0; // qlen may be smaller than ll+lr
|
384
|
+
memcpy(tseq2, tseq, qlen);
|
385
|
+
memset(&tseq2[qlen], 4, ilen);
|
386
|
+
memcpy(&tseq2[qlen + ilen], &tseq[tlen - qlen], qlen);
|
387
|
+
if (junc) {
|
388
|
+
memcpy(junc2, junc, qlen);
|
389
|
+
memset(&junc2[qlen], 0, ilen);
|
390
|
+
memcpy(&junc2[qlen + ilen], &junc[tlen - qlen], qlen);
|
391
|
+
}
|
392
|
+
if (!(opt->flag & MM_F_SPLICE_OLD)) ksw_flag |= KSW_EZ_SPLICE_CMPLX;
|
393
|
+
ksw_exts2_sse(km, qlen, qseq, tlen2, tseq2, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, end_bonus, opt->junc_bonus, opt->junc_pen, ksw_flag, junc2, ez);
|
394
|
+
if (ez->zdropped) return 0;
|
395
|
+
if ((ez->cigar[0]&0xf) != KSW_CIGAR_MATCH || (ez->cigar[ez->n_cigar-1]&0xf) != KSW_CIGAR_MATCH) return 0;
|
396
|
+
for (i = 0; i < ez->n_cigar; ++i) { // count the number of introns in the alignment
|
397
|
+
if ((ez->cigar[i]&0xf) == KSW_CIGAR_N_SKIP)
|
398
|
+
++nn;
|
399
|
+
else if ((ez->cigar[i]&0xf) == KSW_CIGAR_INS)
|
400
|
+
++n_ins;
|
401
|
+
}
|
402
|
+
if (nn != 1 || n_ins > 0) return 0; // the heuristic only works when there is exactly one intron
|
403
|
+
for (i = 0; i < ez->n_cigar; ++i)
|
404
|
+
if ((ez->cigar[i]&0xf) == KSW_CIGAR_N_SKIP)
|
405
|
+
ez->cigar[i] += (tlen - tlen2) << 4;
|
406
|
+
return 1;
|
407
|
+
}
|
408
|
+
|
360
409
|
static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x)
|
361
410
|
{
|
362
411
|
int64_t i, off0 = mi->seq[rid].offset, off = off0 + x;
|
@@ -586,12 +635,19 @@ static void mm_fix_bad_ends_splice(void *km, const mm_mapopt_t *opt, const mm_id
|
|
586
635
|
}
|
587
636
|
}
|
588
637
|
|
638
|
+
static inline void mm_get_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, int32_t rev, uint8_t *junc)
|
639
|
+
{
|
640
|
+
if (mi->spsc) mm_idx_spsc_get(mi, ctg, st, en, rev, junc);
|
641
|
+
else if (mi->I) mm_idx_bed_junc(mi, ctg, st, en, junc);
|
642
|
+
else memset(junc, 0, en - st);
|
643
|
+
}
|
644
|
+
|
589
645
|
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, int n_a, mm128_t *a, ksw_extz_t *ez, int splice_flag)
|
590
646
|
{
|
591
|
-
int is_sr = !!(opt->flag & MM_F_SR), is_splice = !!(opt->flag & MM_F_SPLICE);
|
647
|
+
int is_sr = !!(opt->flag & MM_F_SR), is_splice = !!(opt->flag & MM_F_SPLICE), is_sr_rna = (!!(opt->flag & MM_F_SR_RNA) && is_splice);
|
592
648
|
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
|
593
|
-
uint8_t *tseq, *qseq, *junc;
|
594
|
-
int32_t i, l, bw, bw_long, dropped = 0,
|
649
|
+
uint8_t *tseq, *qseq, *junc, *tseq2 = 0, *junc2 = 0;
|
650
|
+
int32_t i, l, bw, bw_long, dropped = 0, ksw_flag = 0, rs0, re0, qs0, qe0;
|
595
651
|
int32_t rs, re, qs, qe;
|
596
652
|
int32_t rs1, qs1, re1, qe1;
|
597
653
|
int8_t mat[25];
|
@@ -626,9 +682,10 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
626
682
|
assert(cnt1 > 0);
|
627
683
|
|
628
684
|
if (is_splice) {
|
629
|
-
if (splice_flag & MM_F_SPLICE_FOR)
|
630
|
-
if (splice_flag & MM_F_SPLICE_REV)
|
631
|
-
if (opt->flag & MM_F_SPLICE_FLANK)
|
685
|
+
if (splice_flag & MM_F_SPLICE_FOR) ksw_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
|
686
|
+
if (splice_flag & MM_F_SPLICE_REV) ksw_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
|
687
|
+
if (opt->flag & MM_F_SPLICE_FLANK) ksw_flag |= KSW_EZ_SPLICE_FLANK;
|
688
|
+
if (mi->spsc) ksw_flag |= KSW_EZ_SPLICE_SCORE;
|
632
689
|
}
|
633
690
|
|
634
691
|
/* Look for the start and end of regions to perform DP. This sounds easy
|
@@ -713,6 +770,12 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
713
770
|
tseq = (uint8_t*)kmalloc(km, re0 - rs0);
|
714
771
|
junc = (uint8_t*)kmalloc(km, re0 - rs0);
|
715
772
|
|
773
|
+
if (is_sr_rna) {
|
774
|
+
int32_t max_tlen2 = MM_MAX_QLEN_FLANK * 2 + opt->q2 * 2;
|
775
|
+
tseq2 = Kmalloc(km, uint8_t, max_tlen2 * 2);
|
776
|
+
junc2 = tseq2 + max_tlen2;
|
777
|
+
}
|
778
|
+
|
716
779
|
if (qs > 0 && rs > 0) { // left extension; probably the condition can be changed to "qs > qs0 && rs > rs0"
|
717
780
|
if (opt->flag & MM_F_QSTRAND) {
|
718
781
|
qseq = &qseq0[0][qs0];
|
@@ -721,11 +784,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
721
784
|
qseq = &qseq0[rev][qs0];
|
722
785
|
mm_idx_getseq(mi, rid, rs0, rs, tseq);
|
723
786
|
}
|
724
|
-
|
787
|
+
mm_get_junc(mi, rid, rs0, rs, !!(ksw_flag&KSW_EZ_SPLICE_REV), junc);
|
725
788
|
mm_seq_rev(qs - qs0, qseq);
|
726
789
|
mm_seq_rev(rs - rs0, tseq);
|
727
790
|
mm_seq_rev(rs - rs0, junc);
|
728
|
-
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, junc, mat, bw, opt->end_bonus, r->split_inv? opt->zdrop_inv : opt->zdrop,
|
791
|
+
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, junc, mat, bw, opt->end_bonus, r->split_inv? opt->zdrop_inv : opt->zdrop, ksw_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
729
792
|
if (ez->n_cigar > 0) {
|
730
793
|
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
731
794
|
r->p->dp_score += ez->max;
|
@@ -737,14 +800,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
737
800
|
re1 = rs, qe1 = qs;
|
738
801
|
assert(qs1 >= 0 && rs1 >= 0);
|
739
802
|
|
740
|
-
for (i = is_sr? cnt1 - 1 : 1; i < cnt1; ++i) { // gap filling
|
803
|
+
for (i = is_sr? cnt1 - 1 : 1; i < cnt1; ++i) { // gap filling; for short genomic reads, fill from the first seed to the last
|
741
804
|
if ((a[as1+i].y & (MM_SEED_IGNORE|MM_SEED_TANDEM)) && i != cnt1 - 1) continue;
|
742
805
|
if (is_sr && !(mi->flag & MM_I_HPC)) {
|
743
806
|
re = (int32_t)a[as1 + i].x + 1;
|
744
807
|
qe = (int32_t)a[as1 + i].y + 1;
|
745
808
|
} else mm_adjust_minier(mi, qseq0, &a[as1 + i], &re, &qe);
|
746
809
|
re1 = re, qe1 = qe;
|
747
|
-
if (i == cnt1 - 1 || (a[as1+i].y&MM_SEED_LONG_JOIN) || (qe - qs >= opt->min_ksw_len && re - rs >= opt->min_ksw_len)) {
|
810
|
+
if (i == cnt1 - 1 || (a[as1+i].y&MM_SEED_LONG_JOIN) || (qe - qs >= opt->min_ksw_len && re - rs >= opt->min_ksw_len)) { // gap filling
|
748
811
|
int j, bw1 = bw_long, zdrop_code;
|
749
812
|
if (a[as1+i].y & MM_SEED_LONG_JOIN)
|
750
813
|
bw1 = qe - qs > re - rs? qe - qs : re - rs;
|
@@ -756,21 +819,29 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
756
819
|
qseq = &qseq0[rev][qs];
|
757
820
|
mm_idx_getseq(mi, rid, rs, re, tseq);
|
758
821
|
}
|
759
|
-
|
760
|
-
if (is_sr) { // perform ungapped alignment
|
822
|
+
mm_get_junc(mi, rid, rs, re, !!(ksw_flag&KSW_EZ_SPLICE_REV), junc);
|
823
|
+
if (is_sr || (is_sr_rna && qe - qs == re - rs)) { // perform ungapped alignment
|
824
|
+
int32_t max_gapped_score = (qe - qs - 2) * opt->a - 2 * (opt->q + opt->e);
|
761
825
|
assert(qe - qs == re - rs);
|
762
826
|
ksw_reset_extz(ez);
|
763
827
|
for (j = 0, ez->score = 0; j < qe - qs; ++j) {
|
764
|
-
if (qseq[j] >= 4 || tseq[j] >= 4) ez->score += opt->
|
828
|
+
if (qseq[j] >= 4 || tseq[j] >= 4) ez->score += opt->sc_ambi > 0? -opt->sc_ambi : opt->sc_ambi;
|
765
829
|
else ez->score += qseq[j] == tseq[j]? opt->a : -opt->b;
|
766
830
|
}
|
767
|
-
|
831
|
+
if (ez->score > max_gapped_score)
|
832
|
+
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, MM_CIGAR_MATCH, qe - qs);
|
833
|
+
else
|
834
|
+
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, ksw_flag|KSW_EZ_APPROX_MAX, ez);
|
768
835
|
} else { // perform normal gapped alignment
|
769
|
-
|
836
|
+
int32_t skip_full = 0;
|
837
|
+
if (is_sr_rna)
|
838
|
+
skip_full = mm_align_sr_rna(km, opt, qe - qs, qseq, re - rs, tseq, junc, tseq2, junc2, mat, bw1, -1, opt->zdrop, ksw_flag|KSW_EZ_APPROX_MAX, ez);
|
839
|
+
if (!skip_full)
|
840
|
+
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, ksw_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
|
770
841
|
}
|
771
842
|
// test Z-drop and inversion Z-drop
|
772
843
|
if ((zdrop_code = mm_test_zdrop(km, opt, qseq, tseq, ez->n_cigar, ez->cigar, mat)) != 0)
|
773
|
-
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, zdrop_code == 2? opt->zdrop_inv : opt->zdrop,
|
844
|
+
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, zdrop_code == 2? opt->zdrop_inv : opt->zdrop, ksw_flag, ez); // second pass: lift approximate
|
774
845
|
// update CIGAR
|
775
846
|
if (ez->n_cigar > 0)
|
776
847
|
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
@@ -808,8 +879,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
808
879
|
qseq = &qseq0[rev][qe];
|
809
880
|
mm_idx_getseq(mi, rid, re, re0, tseq);
|
810
881
|
}
|
811
|
-
|
812
|
-
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, junc, mat, bw, opt->end_bonus, opt->zdrop,
|
882
|
+
mm_get_junc(mi, rid, re, re0, !!(ksw_flag&KSW_EZ_SPLICE_REV), junc);
|
883
|
+
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, junc, mat, bw, opt->end_bonus, opt->zdrop, ksw_flag|KSW_EZ_EXTZ_ONLY, ez);
|
813
884
|
if (ez->n_cigar > 0) {
|
814
885
|
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
815
886
|
r->p->dp_score += ez->max;
|
@@ -832,11 +903,12 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|
832
903
|
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
833
904
|
qseq = &qseq0[r->rev][qs1];
|
834
905
|
}
|
835
|
-
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(
|
906
|
+
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(is_sr || is_sr_rna));
|
836
907
|
if (rev && r->p->trans_strand)
|
837
908
|
r->p->trans_strand ^= 3; // flip to the read strand
|
838
909
|
}
|
839
910
|
|
911
|
+
if (tseq2) kfree(km, tseq2);
|
840
912
|
kfree(km, tseq);
|
841
913
|
kfree(km, junc);
|
842
914
|
}
|
@@ -891,7 +963,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|
891
963
|
}
|
892
964
|
r_inv->rs = r1->re + t_off;
|
893
965
|
r_inv->re = r_inv->rs + ez->max_t + 1;
|
894
|
-
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag & MM_F_SR));
|
966
|
+
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag & (MM_F_SR|MM_F_SR_RNA)));
|
895
967
|
ret = 1;
|
896
968
|
end_align1_inv:
|
897
969
|
kfree(km, tseq);
|
@@ -992,24 +1064,36 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|
992
1064
|
n_a = mm_squeeze_a(km, n_regs, regs, a);
|
993
1065
|
memset(&ez, 0, sizeof(ksw_extz_t));
|
994
1066
|
for (i = 0; i < n_regs; ++i) {
|
995
|
-
mm_reg1_t r2;
|
1067
|
+
mm_reg1_t r2; // only used for inversion
|
996
1068
|
if ((opt->flag&MM_F_SPLICE) && (opt->flag&MM_F_SPLICE_FOR) && (opt->flag&MM_F_SPLICE_REV)) { // then do two rounds of alignments for both strands
|
997
|
-
mm_reg1_t s[2], s2[2];
|
998
|
-
int which, trans_strand;
|
1069
|
+
mm_reg1_t s[2], s2[2], *r;
|
999
1070
|
s[0] = s[1] = regs[i];
|
1000
|
-
mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], n_a, a, &ez, MM_F_SPLICE_FOR);
|
1001
|
-
|
1002
|
-
if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
|
1003
|
-
else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
|
1004
|
-
else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
|
1005
|
-
if (which == 0) {
|
1071
|
+
mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], n_a, a, &ez, MM_F_SPLICE_FOR); // assume the transcript is on the + strand of the genome
|
1072
|
+
if ((opt->flag&MM_F_SR_RNA) && regs[i].qe - regs[i].qs == regs[i].re - regs[i].rs && s[0].qe - s[0].qs == s[0].re - s[0].rs && s[0].qs == 0 && s[0].qe == qlen) {
|
1006
1073
|
regs[i] = s[0], r2 = s2[0];
|
1007
|
-
|
1074
|
+
regs[i].p->trans_strand = 0;
|
1008
1075
|
} else {
|
1009
|
-
|
1010
|
-
|
1076
|
+
int which, trans_strand;
|
1077
|
+
mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], n_a, a, &ez, MM_F_SPLICE_REV); // assume the transcript on the - strand
|
1078
|
+
if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
|
1079
|
+
else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
|
1080
|
+
else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
|
1081
|
+
if (which == 0) {
|
1082
|
+
regs[i] = s[0], r2 = s2[0];
|
1083
|
+
free(s[1].p);
|
1084
|
+
} else {
|
1085
|
+
regs[i] = s[1], r2 = s2[1];
|
1086
|
+
free(s[0].p);
|
1087
|
+
}
|
1088
|
+
r = ®s[i];
|
1089
|
+
r->p->trans_strand = trans_strand;
|
1090
|
+
if (r->is_spliced) {
|
1091
|
+
if (trans_strand == 1 || trans_strand == 2) // this is an *approximate* way to tell if there are splice signals.
|
1092
|
+
r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
|
1093
|
+
else if (trans_strand == 3)
|
1094
|
+
r->p->dp_max -= opt->a + opt->b;
|
1095
|
+
}
|
1011
1096
|
}
|
1012
|
-
regs[i].p->trans_strand = trans_strand;
|
1013
1097
|
} else { // one round of alignment
|
1014
1098
|
mm_align1(km, opt, mi, qlen, qseq0, ®s[i], &r2, n_a, a, &ez, opt->flag);
|
1015
1099
|
if (opt->flag&MM_F_SPLICE)
|
@@ -1027,7 +1111,7 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|
1027
1111
|
kfree(km, qseq0[0]);
|
1028
1112
|
kfree(km, ez.cigar);
|
1029
1113
|
mm_filter_regs(opt, qlen, n_regs_, regs);
|
1030
|
-
if (!(opt->flag&MM_F_SR) && !opt->split_prefix && qlen >= opt->rank_min_len) {
|
1114
|
+
if (!(opt->flag&(MM_F_SR|MM_F_SR_RNA|MM_F_ALL_CHAINS)) && !opt->split_prefix && qlen >= opt->rank_min_len) {
|
1031
1115
|
mm_update_dp_max(qlen, *n_regs_, regs, opt->rank_frac, opt->a, opt->b);
|
1032
1116
|
mm_filter_regs(opt, qlen, n_regs_, regs);
|
1033
1117
|
}
|
data/ext/minimap2/cookbook.md
CHANGED
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
|
|
31
31
|
please follow the command lines below:
|
32
32
|
```sh
|
33
33
|
# install minimap2 executables
|
34
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
35
|
-
cp minimap2-2.
|
34
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.29/minimap2-2.29_x64-linux.tar.bz2 | tar jxf -
|
35
|
+
cp minimap2-2.29_x64-linux/{minimap2,k8,paftools.js} . # copy executables
|
36
36
|
export PATH="$PATH:"`pwd` # put the current directory on PATH
|
37
37
|
# download example datasets
|
38
38
|
curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
|