minimap2 0.2.27.0 → 0.2.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/ext/cmappy/cmappy.c +3 -3
  4. data/ext/cmappy/cmappy.h +1 -1
  5. data/ext/minimap2/FAQ.md +1 -1
  6. data/ext/minimap2/Makefile +4 -3
  7. data/ext/minimap2/NEWS.md +68 -0
  8. data/ext/minimap2/README.md +30 -14
  9. data/ext/minimap2/align.c +136 -52
  10. data/ext/minimap2/cookbook.md +2 -2
  11. data/ext/minimap2/format.c +59 -5
  12. data/ext/minimap2/hit.c +14 -6
  13. data/ext/minimap2/index.c +304 -13
  14. data/ext/minimap2/jump.c +201 -0
  15. data/ext/minimap2/kalloc.h +8 -0
  16. data/ext/minimap2/ksw2.h +5 -2
  17. data/ext/minimap2/ksw2_dispatch.c +5 -5
  18. data/ext/minimap2/ksw2_exts2_sse.c +17 -6
  19. data/ext/minimap2/lchain.c +5 -5
  20. data/ext/minimap2/main.c +64 -12
  21. data/ext/minimap2/map.c +35 -8
  22. data/ext/minimap2/minimap.h +14 -3
  23. data/ext/minimap2/minimap2.1 +98 -46
  24. data/ext/minimap2/misc/README.md +2 -1
  25. data/ext/minimap2/misc/pafcluster.js +241 -0
  26. data/ext/minimap2/misc/paftools.js +17 -6
  27. data/ext/minimap2/mmpriv.h +25 -4
  28. data/ext/minimap2/options.c +36 -3
  29. data/ext/minimap2/python/cmappy.h +3 -3
  30. data/ext/minimap2/python/cmappy.pxd +5 -2
  31. data/ext/minimap2/python/mappy.pyx +20 -7
  32. data/ext/minimap2/python/minimap2.py +5 -3
  33. data/ext/minimap2/seed.c +2 -1
  34. data/ext/minimap2/setup.py +2 -2
  35. data/ext/minimap2.patch +2 -2
  36. data/lib/minimap2/aligner.rb +19 -12
  37. data/lib/minimap2/alignment.rb +1 -0
  38. data/lib/minimap2/ffi/constants.rb +10 -2
  39. data/lib/minimap2/ffi/functions.rb +145 -6
  40. data/lib/minimap2/ffi/mappy.rb +1 -1
  41. data/lib/minimap2/version.rb +1 -1
  42. data/lib/minimap2.rb +2 -2
  43. metadata +8 -7
  44. data/ext/minimap2/misc/mmphase.js +0 -335
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f618028eabf476209264541d9037f68961548eb02dad4a22338bacdfe383fce7
4
- data.tar.gz: f97eb69e9b1e78357cd738ba2a63ce36034e0fbd7c253c5a89a14b23ade19b01
3
+ metadata.gz: 15977381155ba5ee7d3352f9669db9dcf074e2073ac54961bdff2f6c9af77fb1
4
+ data.tar.gz: 67abab72034a636e796cc316e9891c81e049dec9ce8a4cd5b68f975384ba152d
5
5
  SHA512:
6
- metadata.gz: 1ab742822a921c06f31671b0a555a0220cc9b2d0dc2e9d6ef7b72ff90fbc33de4d2f9285819b5064a28e3789065290e21f7e4dedb162c9927aefd8c860ceea35
7
- data.tar.gz: 8d0b005004a1ac625a61d8a68073b31c08b4d156308ebfa01e18508e2ed520d948fa5d2a2e4804978a0846abc90bdc142183ae2d7d2c466cb2b00f87afff4d71
6
+ metadata.gz: b6df8d975c534b9c71715919867abc905e1bdf5712d3c1ba63cd6d15ea657367678153b448187186b128ee65abfc190f7415dc5924bb009cc61e4debdb2801fa
7
+ data.tar.gz: 87c2bf817379daf71b410e39ed3e5e5599329f0aa44f1db59449acb42e8033d58ec46fc74e94136e60595d101eaef94b4ba7561d7f4b9fe82d7a5cc6c62d77fa
data/README.md CHANGED
@@ -65,6 +65,7 @@ pp hits
65
65
  * Minimap2 module
66
66
  - fastx_read Read fasta/fastq file.
67
67
  - revcomp Reverse complement sequence.
68
+ - execute Calls the main function of Minimap2 with arguments. `Minimap2.execute("--version")`
68
69
 
69
70
  * Aligner class
70
71
  * attributes
data/ext/cmappy/cmappy.c CHANGED
@@ -50,13 +50,13 @@ void mm_reset_timer(void)
50
50
  mm_realtime0 = realtime();
51
51
  }
52
52
 
53
- mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
53
+ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
54
54
  {
55
55
  mm_reg1_t *r;
56
56
 
57
57
  // Py_BEGIN_ALLOW_THREADS
58
58
  if (seq2 == 0) {
59
- r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
59
+ r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, seqname);
60
60
  } else {
61
61
  int _n_regs[2];
62
62
  mm_reg1_t *regs[2];
@@ -73,7 +73,7 @@ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, in
73
73
  seq[1][i] = seq_comp_table[t];
74
74
  }
75
75
  if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
76
- mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
76
+ mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, seqname);
77
77
  for (i = 0; i < _n_regs[1]; ++i)
78
78
  regs[1][i].rev = !regs[1][i].rev;
79
79
  *n_regs = _n_regs[0] + _n_regs[1];
data/ext/cmappy/cmappy.h CHANGED
@@ -33,7 +33,7 @@ int mm_verbose_level(int v);
33
33
  void mm_reset_timer(void);
34
34
 
35
35
  extern unsigned char seq_comp_table[256];
36
- mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
36
+ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
37
37
 
38
38
  char *mappy_revcomp(int len, const uint8_t *seq);
39
39
 
data/ext/minimap2/FAQ.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
4
4
  locations without detailed base alignment. In particular, the start and end
5
- positions of the alignment are impricise. With one of those options, minimap2
5
+ positions of the alignment are imprecise. With one of those options, minimap2
6
6
  will perform base alignment, which is generally more accurate but is much
7
7
  slower.
8
8
 
@@ -2,7 +2,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
2
2
  CPPFLAGS= -DHAVE_KALLOC
3
3
  INCLUDES=
4
4
  OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
5
- lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
5
+ lchain.o align.o hit.o seed.o jump.o map.o format.o pe.o esterr.o splitidx.o \
6
6
  ksw2_ll_sse.o
7
7
  PROG= minimap2
8
8
  PROG_EXTRA= sdust minimap2-lite
@@ -115,8 +115,9 @@ esterr.o: mmpriv.h minimap.h bseq.h kseq.h
115
115
  example.o: minimap.h kseq.h
116
116
  format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
117
117
  hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
118
- index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
119
- index.o: ksort.h
118
+ index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h ksw2.h kalloc.h kvec.h
119
+ index.o: khash.h ksort.h
120
+ jump.o: mmpriv.h minimap.h bseq.h kseq.h
120
121
  kalloc.o: kalloc.h
121
122
  ksw2_extd2_sse.o: ksw2.h kalloc.h
122
123
  ksw2_exts2_sse.o: ksw2.h kalloc.h
data/ext/minimap2/NEWS.md CHANGED
@@ -1,3 +1,71 @@
1
+ Release 2.29-r1283 (18 April 2025)
2
+ ----------------------------------
3
+
4
+ Notable changes to minimap2:
5
+
6
+ * New feature: added the `splice:sr` preset for short RNA-seq read alignment.
7
+ Users may use `-j` to specify known gene annotation to improve spliced
8
+ alignment close to the ends of short reads. Also added `--write-junc` and
9
+ `--pass1` for 2-pass short-read RNA-seq alignment.
10
+
11
+ * Experimental feature: read splice scores from a file specified by `--spsc`
12
+ and consider the scores during base alignment. The feature makes it possible
13
+ to apply advanced splice models and to improve spliced alignment.
14
+
15
+ * Change: adjusted the mapping quality calculation for spliced alignment.
16
+
17
+ * Bugfixes: a) missing overlap alignment when base alignment is requested
18
+ (#969); b) incorrect summary information for long genomes (#1192); c)
19
+ missing parameter check for `--score-N` (#1226).
20
+
21
+ * Improvement: a) warn about absent junction files (#1229); b) report an error
22
+ if a wrong preset prefixed with "splice" is specified (#589).
23
+
24
+ Notable changes to mappy:
25
+
26
+ * Improvement: allow passing read name (#1260)
27
+
28
+ * Improvement: exposed score for ambiguous bases (#1240)
29
+
30
+ Minimap2 now supports short/long genomic/RNA-seq read alignment along with
31
+ contig alignment and all-vs-all read overlapping. It produces identical genomic
32
+ long-read or contig alignment to v2.27. Short genomic read alignment and the
33
+ mapping quality of long RNA-seq read alignment may slightly differ in very rare
34
+ cases.
35
+
36
+ (2.29: 18 April 2025, r1283)
37
+
38
+
39
+
40
+ Release 2.28-r1209 (27 March 2024)
41
+ ----------------------------------
42
+
43
+ Notable changes to minimap2:
44
+
45
+ * Bugfix: `--MD` was not working properly due to the addition of `--ds` in the
46
+ last release (#1181 and #1182).
47
+
48
+ * New feature: added an experimental preset `lq:hqae` for aligning accurate
49
+ long reads back to their assembly. It has been observed that `map-hifi` and
50
+ `lr:hq` may produce many wrong alignments around centromeres when accurate
51
+ long reads (PacBio HiFi or Nanopore duplex/Q20+) are mapped to a diploid
52
+ assembly constructed from them. This new preset produces much more accurate
53
+ alignment. It is still experimental and may be subjective to changes in
54
+ future.
55
+
56
+ * Change: reduced the default `--cap-kalloc` to 500m to lower the peak
57
+ memory consumption (#855).
58
+
59
+ Notable changes to mappy:
60
+
61
+ * Bugfix: mappy option struct was out of sync with minimap2 (#1177).
62
+
63
+ Minimap2 should output identical alignments to v2.27.
64
+
65
+ (2.28: 27 March 2024, r1209)
66
+
67
+
68
+
1
69
  Release 2.27-r1193 (12 March 2024)
2
70
  ----------------------------------
3
71
 
@@ -14,13 +14,15 @@ cd minimap2 && make
14
14
  # use presets (no test data)
15
15
  ./minimap2 -ax map-pb ref.fa pacbio.fq.gz > aln.sam # PacBio CLR genomic reads
16
16
  ./minimap2 -ax map-ont ref.fa ont.fq.gz > aln.sam # Oxford Nanopore genomic reads
17
- ./minimap2 -ax map-hifi ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.19 or later)
18
- ./minimap2 -ax lr:hq ref.fa ont-Q20.fq.gz > aln.sam # Nanopore Q20 genomic reads (v2.27 or later)
17
+ ./minimap2 -ax map-hifi ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.19+)
18
+ ./minimap2 -ax lr:hq ref.fa ont-Q20.fq.gz > aln.sam # Nanopore Q20 genomic reads (v2.27+)
19
19
  ./minimap2 -ax sr ref.fa read1.fa read2.fa > aln.sam # short genomic paired-end reads
20
20
  ./minimap2 -ax splice ref.fa rna-reads.fa > aln.sam # spliced long reads (strand unknown)
21
- ./minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam # noisy Nanopore Direct RNA-seq
22
- ./minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam # Final PacBio Iso-seq or traditional cDNA
23
- ./minimap2 -ax splice --junc-bed anno.bed12 ref.fa query.fa > aln.sam # prioritize on annotated junctions
21
+ ./minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam # noisy Nanopore direct RNA-seq
22
+ ./minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam # PacBio Kinnex/Iso-seq (RNA-seq)
23
+ ./minimap2 -ax splice --junc-bed=anno.bed12 ref.fa query.fa > aln.sam # use annotated junctions
24
+ ./minimap2 -ax splice:sr ref.fa r1.fq r2.fq > aln.sam # short-read RNA-seq (v2.29+)
25
+ ./minimap2 -ax splice:sr -j anno.bed12 ref.fa r1.fq r2.fq > aln.sam
24
26
  ./minimap2 -cx asm5 asm1.fa asm2.fa > aln.paf # intra-species asm-to-asm alignment
25
27
  ./minimap2 -x ava-pb reads.fa reads.fa > overlaps.paf # PacBio read overlap
26
28
  ./minimap2 -x ava-ont reads.fa reads.fa > overlaps.paf # Nanopore read overlap
@@ -38,7 +40,8 @@ man ./minimap2.1
38
40
  - [Map long noisy genomic reads](#map-long-genomic)
39
41
  - [Map long mRNA/cDNA reads](#map-long-splice)
40
42
  - [Find overlaps between long reads](#long-overlap)
41
- - [Map short accurate genomic reads](#short-genomic)
43
+ - [Map short genomic reads](#short-genomic)
44
+ - [Map short RNA-seq reads](#short-rna-seq)
42
45
  - [Full genome/assembly alignment](#full-genome)
43
46
  - [Advanced features](#advanced)
44
47
  - [Working with >65535 CIGAR operations](#long-cigar)
@@ -74,8 +77,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
74
77
  Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
75
78
  the [release page][release] with:
76
79
  ```sh
77
- curl -L https://github.com/lh3/minimap2/releases/download/v2.27/minimap2-2.27_x64-linux.tar.bz2 | tar -jxvf -
78
- ./minimap2-2.27_x64-linux/minimap2
80
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.29/minimap2-2.29_x64-linux.tar.bz2 | tar -jxvf -
81
+ ./minimap2-2.29_x64-linux/minimap2
79
82
  ```
80
83
  If you want to compile from the source, you need to have a C compiler, GNU make
81
84
  and zlib development files installed. Then type `make` in the source code
@@ -171,9 +174,8 @@ or the last exons.
171
174
 
172
175
  Minimap2 rates an alignment by the score of the max-scoring sub-segment,
173
176
  *excluding* introns, and marks the best alignment as primary in SAM. When a
174
- spliced gene also has unspliced pseudogenes, minimap2 does not intentionally
175
- prefer spliced alignment, though in practice it more often marks the spliced
176
- alignment as the primary. By default, minimap2 outputs up to five secondary
177
+ spliced gene also has unspliced pseudogenes, minimap2 slightly prefers
178
+ the spliced alignment. By default, minimap2 outputs up to five secondary
177
179
  alignments (i.e. likely pseudogenes in the context of RNA-seq mapping). This
178
180
  can be tuned with option **-N**.
179
181
 
@@ -204,6 +206,10 @@ bonus score (tuned by `--junc-bonus`) if an aligned junction matches a junction
204
206
  in the annotation. Option `--junc-bed` also takes 5-column BED, including the
205
207
  strand field. In this case, each line indicates an oriented junction.
206
208
 
209
+ **Note:** `--junc-bed` is intended for long noisy RNA-seq reads only.
210
+ Applying the option to short RNA-seq reads would increase run time with little
211
+ improvement to junction accuracy.
212
+
207
213
  #### <a name="long-overlap"></a>Find overlaps between long reads
208
214
 
209
215
  ```sh
@@ -216,7 +222,7 @@ the overlapping mode because it is slow and may produce false positive
216
222
  overlaps. However, if performance is not a concern, you may try to add `-a` or
217
223
  `-c` anyway.
218
224
 
219
- #### <a name="short-genomic"></a>Map short accurate genomic reads
225
+ #### <a name="short-genomic"></a>Map short genomic reads
220
226
 
221
227
  ```sh
222
228
  minimap2 -ax sr ref.fa reads-se.fq > aln.sam # single-end alignment
@@ -229,8 +235,18 @@ be paired if they are adjacent in the input stream and have the same name (with
229
235
  the `/[0-9]` suffix trimmed if present). Single- and paired-end reads can be
230
236
  mixed.
231
237
 
232
- Minimap2 does not work well with short spliced reads. There are many capable
233
- RNA-seq mappers for short reads.
238
+ #### <a name="short-rna-seq"></a>Map short RNA-seq reads
239
+
240
+ ```sh
241
+ minimap2 -ax splice:sr ref.fa reads-se.fq.gz > aln.sam # single-end
242
+ minimap2 -ax splice:sr ref.fa r1.fq.gz r2.fq.gz > aln.sam # paired-end
243
+ minimap2 -ax splice:sr -j anno.bed ref.fa r1.fq r2.fq > aln.sam # use annotation
244
+ # 2-pass alignment
245
+ minimap2 -x splice:sr -j anno.bed --write-junc ref.fa r1.fq r2.fq > junc.bed
246
+ minimap2 -ax splice:sr -j anno.bed --pass1=junc.bed ref.fa r1.fq r2.fq > aln.sam
247
+ ```
248
+ The new preset `splice:sr` was added in v2.29. It functions similarly to `sr`
249
+ except that it performs spliced alignment.
234
250
 
235
251
  #### <a name="full-genome"></a>Full genome/assembly alignment
236
252
 
data/ext/minimap2/align.c CHANGED
@@ -6,6 +6,8 @@
6
6
  #include "mmpriv.h"
7
7
  #include "ksw2.h"
8
8
 
9
+ #define MM_MAX_QLEN_FLANK 100
10
+
9
11
  static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b, int8_t sc_ambi)
10
12
  {
11
13
  int i, j;
@@ -258,7 +260,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
258
260
  if (p == 0) return;
259
261
  mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
260
262
  qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
261
- r->blen = r->mlen = 0;
263
+ r->blen = r->mlen = 0, r->is_spliced = 0;
262
264
  for (k = 0; k < p->n_cigar; ++k) {
263
265
  uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
264
266
  if (op == MM_CIGAR_MATCH) {
@@ -292,7 +294,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
292
294
  if (s < 0) s = 0;
293
295
  toff += len;
294
296
  } else if (op == MM_CIGAR_N_SKIP) {
295
- toff += len;
297
+ r->is_spliced = 1, toff += len;
296
298
  }
297
299
  }
298
300
  p->dp_max = p->dp_max0 = (int32_t)(max + .499);
@@ -300,9 +302,8 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
300
302
  if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
301
303
  }
302
304
 
303
- static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
305
+ void mm_enlarge_cigar(mm_reg1_t *r, uint32_t n_cigar) // TODO: this calls the libc realloc()
304
306
  {
305
- mm_extra_t *p;
306
307
  if (n_cigar == 0) return;
307
308
  if (r->p == 0) {
308
309
  uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
@@ -314,6 +315,13 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
314
315
  kroundup32(r->p->capacity);
315
316
  r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
316
317
  }
318
+ }
319
+
320
+ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, const uint32_t *cigar)
321
+ {
322
+ mm_extra_t *p;
323
+ if (n_cigar == 0) return;
324
+ mm_enlarge_cigar(r, n_cigar);
317
325
  p = r->p;
318
326
  if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
319
327
  p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
@@ -325,29 +333,31 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
325
333
  }
326
334
  }
327
335
 
328
- static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
336
+ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc,
337
+ const int8_t *mat, int w, int end_bonus, int zdrop, int ksw_flag, ksw_extz_t *ez)
329
338
  {
330
339
  if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
331
340
  int i;
332
- fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, flag=%d, zdrop=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, w, flag, opt->zdrop);
341
+ fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, ksw_flag=%d, zdrop=%d, end_bonus=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, w, ksw_flag, opt->zdrop, end_bonus);
333
342
  for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr);
334
343
  fputc('\n', stderr);
335
344
  for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr);
336
345
  fputc('\n', stderr);
337
346
  }
338
347
  if (opt->transition != 0 && opt->b != opt->transition)
339
- flag |= KSW_EZ_GENERIC_SC;
340
- if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) {
348
+ ksw_flag |= KSW_EZ_GENERIC_SC;
349
+ if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) { // too much memory; skip alignment
341
350
  ksw_reset_extz(ez);
342
351
  ez->zdropped = 1;
343
- } else if (opt->flag & MM_F_SPLICE) {
344
- int flag_tmp = flag;
345
- if (!(opt->flag & MM_F_SPLICE_OLD)) flag_tmp |= KSW_EZ_SPLICE_CMPLX;
346
- ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag_tmp, junc, ez);
347
- } else if (opt->q == opt->q2 && opt->e == opt->e2)
348
- ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
349
- else
350
- ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
352
+ } else if (opt->flag & MM_F_SPLICE) { // spliced alignment
353
+ assert((ksw_flag & KSW_EZ_SPLICE_FOR) == 0 || (ksw_flag & KSW_EZ_SPLICE_REV) == 0);
354
+ if (!(opt->flag & MM_F_SPLICE_OLD)) ksw_flag |= KSW_EZ_SPLICE_CMPLX;
355
+ ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, end_bonus, opt->junc_bonus, opt->junc_pen, ksw_flag, junc, ez);
356
+ } else if (opt->q == opt->q2 && opt->e == opt->e2) { // affine gap
357
+ ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, ksw_flag, ez);
358
+ } else { // dual affine gap
359
+ ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, ksw_flag, ez);
360
+ }
351
361
  if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
352
362
  int i;
353
363
  fprintf(stderr, "score=%d, cigar=", ez->score);
@@ -357,6 +367,45 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
357
367
  }
358
368
  }
359
369
 
370
+ static int mm_align_sr_rna(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const uint8_t *junc, uint8_t *tseq2, uint8_t *junc2,
371
+ const int8_t *mat, int w, int end_bonus, int zdrop, int ksw_flag, ksw_extz_t *ez)
372
+ {
373
+ int32_t ilen = opt->q2 * 2, tlen2 = qlen * 2 + ilen;
374
+ int32_t i, ll = 0, lr = 0, nn = 0, n_ins = 0;
375
+ if (!(opt->flag & MM_F_SPLICE)) return 0; // only for spliced alignment
376
+ if (qlen > MM_MAX_QLEN_FLANK || qlen * 2 + ilen > tlen) return 0; // the query sequence can't be too long and the target sequence must be long enough
377
+ for (i = 0; i < qlen; ++i) // exact match length from the left
378
+ if (qseq[i] == tseq[i] && qseq[i] < 4)
379
+ ++ll;
380
+ for (i = 0; i < qlen; ++i) // exact match length from the right
381
+ if (qseq[qlen - 1 - i] == tseq[tlen - 1 - i] && qseq[qlen - 1 - i] < 4)
382
+ ++lr;
383
+ if (qlen - (ll + lr) > 9) return 0; // qlen may be smaller than ll+lr
384
+ memcpy(tseq2, tseq, qlen);
385
+ memset(&tseq2[qlen], 4, ilen);
386
+ memcpy(&tseq2[qlen + ilen], &tseq[tlen - qlen], qlen);
387
+ if (junc) {
388
+ memcpy(junc2, junc, qlen);
389
+ memset(&junc2[qlen], 0, ilen);
390
+ memcpy(&junc2[qlen + ilen], &junc[tlen - qlen], qlen);
391
+ }
392
+ if (!(opt->flag & MM_F_SPLICE_OLD)) ksw_flag |= KSW_EZ_SPLICE_CMPLX;
393
+ ksw_exts2_sse(km, qlen, qseq, tlen2, tseq2, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, end_bonus, opt->junc_bonus, opt->junc_pen, ksw_flag, junc2, ez);
394
+ if (ez->zdropped) return 0;
395
+ if ((ez->cigar[0]&0xf) != KSW_CIGAR_MATCH || (ez->cigar[ez->n_cigar-1]&0xf) != KSW_CIGAR_MATCH) return 0;
396
+ for (i = 0; i < ez->n_cigar; ++i) { // count the number of introns in the alignment
397
+ if ((ez->cigar[i]&0xf) == KSW_CIGAR_N_SKIP)
398
+ ++nn;
399
+ else if ((ez->cigar[i]&0xf) == KSW_CIGAR_INS)
400
+ ++n_ins;
401
+ }
402
+ if (nn != 1 || n_ins > 0) return 0; // the heuristic only works when there is exactly one intron
403
+ for (i = 0; i < ez->n_cigar; ++i)
404
+ if ((ez->cigar[i]&0xf) == KSW_CIGAR_N_SKIP)
405
+ ez->cigar[i] += (tlen - tlen2) << 4;
406
+ return 1;
407
+ }
408
+
360
409
  static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x)
361
410
  {
362
411
  int64_t i, off0 = mi->seq[rid].offset, off = off0 + x;
@@ -586,12 +635,19 @@ static void mm_fix_bad_ends_splice(void *km, const mm_mapopt_t *opt, const mm_id
586
635
  }
587
636
  }
588
637
 
638
+ static inline void mm_get_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, int32_t rev, uint8_t *junc)
639
+ {
640
+ if (mi->spsc) mm_idx_spsc_get(mi, ctg, st, en, rev, junc);
641
+ else if (mi->I) mm_idx_bed_junc(mi, ctg, st, en, junc);
642
+ else memset(junc, 0, en - st);
643
+ }
644
+
589
645
  static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, int n_a, mm128_t *a, ksw_extz_t *ez, int splice_flag)
590
646
  {
591
- int is_sr = !!(opt->flag & MM_F_SR), is_splice = !!(opt->flag & MM_F_SPLICE);
647
+ int is_sr = !!(opt->flag & MM_F_SR), is_splice = !!(opt->flag & MM_F_SPLICE), is_sr_rna = (!!(opt->flag & MM_F_SR_RNA) && is_splice);
592
648
  int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
593
- uint8_t *tseq, *qseq, *junc;
594
- int32_t i, l, bw, bw_long, dropped = 0, extra_flag = 0, rs0, re0, qs0, qe0;
649
+ uint8_t *tseq, *qseq, *junc, *tseq2 = 0, *junc2 = 0;
650
+ int32_t i, l, bw, bw_long, dropped = 0, ksw_flag = 0, rs0, re0, qs0, qe0;
595
651
  int32_t rs, re, qs, qe;
596
652
  int32_t rs1, qs1, re1, qe1;
597
653
  int8_t mat[25];
@@ -626,9 +682,10 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
626
682
  assert(cnt1 > 0);
627
683
 
628
684
  if (is_splice) {
629
- if (splice_flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
630
- if (splice_flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
631
- if (opt->flag & MM_F_SPLICE_FLANK) extra_flag |= KSW_EZ_SPLICE_FLANK;
685
+ if (splice_flag & MM_F_SPLICE_FOR) ksw_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
686
+ if (splice_flag & MM_F_SPLICE_REV) ksw_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
687
+ if (opt->flag & MM_F_SPLICE_FLANK) ksw_flag |= KSW_EZ_SPLICE_FLANK;
688
+ if (mi->spsc) ksw_flag |= KSW_EZ_SPLICE_SCORE;
632
689
  }
633
690
 
634
691
  /* Look for the start and end of regions to perform DP. This sounds easy
@@ -713,6 +770,12 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
713
770
  tseq = (uint8_t*)kmalloc(km, re0 - rs0);
714
771
  junc = (uint8_t*)kmalloc(km, re0 - rs0);
715
772
 
773
+ if (is_sr_rna) {
774
+ int32_t max_tlen2 = MM_MAX_QLEN_FLANK * 2 + opt->q2 * 2;
775
+ tseq2 = Kmalloc(km, uint8_t, max_tlen2 * 2);
776
+ junc2 = tseq2 + max_tlen2;
777
+ }
778
+
716
779
  if (qs > 0 && rs > 0) { // left extension; probably the condition can be changed to "qs > qs0 && rs > rs0"
717
780
  if (opt->flag & MM_F_QSTRAND) {
718
781
  qseq = &qseq0[0][qs0];
@@ -721,11 +784,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
721
784
  qseq = &qseq0[rev][qs0];
722
785
  mm_idx_getseq(mi, rid, rs0, rs, tseq);
723
786
  }
724
- mm_idx_bed_junc(mi, rid, rs0, rs, junc);
787
+ mm_get_junc(mi, rid, rs0, rs, !!(ksw_flag&KSW_EZ_SPLICE_REV), junc);
725
788
  mm_seq_rev(qs - qs0, qseq);
726
789
  mm_seq_rev(rs - rs0, tseq);
727
790
  mm_seq_rev(rs - rs0, junc);
728
- mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, junc, mat, bw, opt->end_bonus, r->split_inv? opt->zdrop_inv : opt->zdrop, extra_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
791
+ mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, junc, mat, bw, opt->end_bonus, r->split_inv? opt->zdrop_inv : opt->zdrop, ksw_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
729
792
  if (ez->n_cigar > 0) {
730
793
  mm_append_cigar(r, ez->n_cigar, ez->cigar);
731
794
  r->p->dp_score += ez->max;
@@ -737,14 +800,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
737
800
  re1 = rs, qe1 = qs;
738
801
  assert(qs1 >= 0 && rs1 >= 0);
739
802
 
740
- for (i = is_sr? cnt1 - 1 : 1; i < cnt1; ++i) { // gap filling
803
+ for (i = is_sr? cnt1 - 1 : 1; i < cnt1; ++i) { // gap filling; for short genomic reads, fill from the first seed to the last
741
804
  if ((a[as1+i].y & (MM_SEED_IGNORE|MM_SEED_TANDEM)) && i != cnt1 - 1) continue;
742
805
  if (is_sr && !(mi->flag & MM_I_HPC)) {
743
806
  re = (int32_t)a[as1 + i].x + 1;
744
807
  qe = (int32_t)a[as1 + i].y + 1;
745
808
  } else mm_adjust_minier(mi, qseq0, &a[as1 + i], &re, &qe);
746
809
  re1 = re, qe1 = qe;
747
- if (i == cnt1 - 1 || (a[as1+i].y&MM_SEED_LONG_JOIN) || (qe - qs >= opt->min_ksw_len && re - rs >= opt->min_ksw_len)) {
810
+ if (i == cnt1 - 1 || (a[as1+i].y&MM_SEED_LONG_JOIN) || (qe - qs >= opt->min_ksw_len && re - rs >= opt->min_ksw_len)) { // gap filling
748
811
  int j, bw1 = bw_long, zdrop_code;
749
812
  if (a[as1+i].y & MM_SEED_LONG_JOIN)
750
813
  bw1 = qe - qs > re - rs? qe - qs : re - rs;
@@ -756,21 +819,29 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
756
819
  qseq = &qseq0[rev][qs];
757
820
  mm_idx_getseq(mi, rid, rs, re, tseq);
758
821
  }
759
- mm_idx_bed_junc(mi, rid, rs, re, junc);
760
- if (is_sr) { // perform ungapped alignment
822
+ mm_get_junc(mi, rid, rs, re, !!(ksw_flag&KSW_EZ_SPLICE_REV), junc);
823
+ if (is_sr || (is_sr_rna && qe - qs == re - rs)) { // perform ungapped alignment
824
+ int32_t max_gapped_score = (qe - qs - 2) * opt->a - 2 * (opt->q + opt->e);
761
825
  assert(qe - qs == re - rs);
762
826
  ksw_reset_extz(ez);
763
827
  for (j = 0, ez->score = 0; j < qe - qs; ++j) {
764
- if (qseq[j] >= 4 || tseq[j] >= 4) ez->score += opt->e2;
828
+ if (qseq[j] >= 4 || tseq[j] >= 4) ez->score += opt->sc_ambi > 0? -opt->sc_ambi : opt->sc_ambi;
765
829
  else ez->score += qseq[j] == tseq[j]? opt->a : -opt->b;
766
830
  }
767
- ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, MM_CIGAR_MATCH, qe - qs);
831
+ if (ez->score > max_gapped_score)
832
+ ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, MM_CIGAR_MATCH, qe - qs);
833
+ else
834
+ mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, ksw_flag|KSW_EZ_APPROX_MAX, ez);
768
835
  } else { // perform normal gapped alignment
769
- mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
836
+ int32_t skip_full = 0;
837
+ if (is_sr_rna)
838
+ skip_full = mm_align_sr_rna(km, opt, qe - qs, qseq, re - rs, tseq, junc, tseq2, junc2, mat, bw1, -1, opt->zdrop, ksw_flag|KSW_EZ_APPROX_MAX, ez);
839
+ if (!skip_full)
840
+ mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, ksw_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
770
841
  }
771
842
  // test Z-drop and inversion Z-drop
772
843
  if ((zdrop_code = mm_test_zdrop(km, opt, qseq, tseq, ez->n_cigar, ez->cigar, mat)) != 0)
773
- mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, zdrop_code == 2? opt->zdrop_inv : opt->zdrop, extra_flag, ez); // second pass: lift approximate
844
+ mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, zdrop_code == 2? opt->zdrop_inv : opt->zdrop, ksw_flag, ez); // second pass: lift approximate
774
845
  // update CIGAR
775
846
  if (ez->n_cigar > 0)
776
847
  mm_append_cigar(r, ez->n_cigar, ez->cigar);
@@ -808,8 +879,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
808
879
  qseq = &qseq0[rev][qe];
809
880
  mm_idx_getseq(mi, rid, re, re0, tseq);
810
881
  }
811
- mm_idx_bed_junc(mi, rid, re, re0, junc);
812
- mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, junc, mat, bw, opt->end_bonus, opt->zdrop, extra_flag|KSW_EZ_EXTZ_ONLY, ez);
882
+ mm_get_junc(mi, rid, re, re0, !!(ksw_flag&KSW_EZ_SPLICE_REV), junc);
883
+ mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, junc, mat, bw, opt->end_bonus, opt->zdrop, ksw_flag|KSW_EZ_EXTZ_ONLY, ez);
813
884
  if (ez->n_cigar > 0) {
814
885
  mm_append_cigar(r, ez->n_cigar, ez->cigar);
815
886
  r->p->dp_score += ez->max;
@@ -832,11 +903,12 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
832
903
  mm_idx_getseq(mi, rid, rs1, re1, tseq);
833
904
  qseq = &qseq0[r->rev][qs1];
834
905
  }
835
- mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag & MM_F_SR));
906
+ mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(is_sr || is_sr_rna));
836
907
  if (rev && r->p->trans_strand)
837
908
  r->p->trans_strand ^= 3; // flip to the read strand
838
909
  }
839
910
 
911
+ if (tseq2) kfree(km, tseq2);
840
912
  kfree(km, tseq);
841
913
  kfree(km, junc);
842
914
  }
@@ -891,7 +963,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
891
963
  }
892
964
  r_inv->rs = r1->re + t_off;
893
965
  r_inv->re = r_inv->rs + ez->max_t + 1;
894
- mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag & MM_F_SR));
966
+ mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag & (MM_F_SR|MM_F_SR_RNA)));
895
967
  ret = 1;
896
968
  end_align1_inv:
897
969
  kfree(km, tseq);
@@ -933,14 +1005,14 @@ double mm_event_identity(const mm_reg1_t *r)
933
1005
  static int32_t mm_recal_max_dp(const mm_reg1_t *r, double b2, int32_t match_sc)
934
1006
  {
935
1007
  uint32_t i;
936
- int32_t n_gap = 0, n_gapo = 0, n_mis;
1008
+ int32_t n_gap = 0, n_mis;
937
1009
  double gap_cost = 0.0;
938
1010
  if (r->p == 0) return -1;
939
1011
  for (i = 0; i < r->p->n_cigar; ++i) {
940
1012
  int32_t op = r->p->cigar[i] & 0xf, len = r->p->cigar[i] >> 4;
941
1013
  if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL) {
942
1014
  gap_cost += b2 + (double)mg_log2(1.0 + len);
943
- ++n_gapo, n_gap += len;
1015
+ n_gap += len;
944
1016
  }
945
1017
  }
946
1018
  n_mis = r->blen + r->p->n_ambi - r->mlen - n_gap;
@@ -992,24 +1064,36 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
992
1064
  n_a = mm_squeeze_a(km, n_regs, regs, a);
993
1065
  memset(&ez, 0, sizeof(ksw_extz_t));
994
1066
  for (i = 0; i < n_regs; ++i) {
995
- mm_reg1_t r2;
1067
+ mm_reg1_t r2; // only used for inversion
996
1068
  if ((opt->flag&MM_F_SPLICE) && (opt->flag&MM_F_SPLICE_FOR) && (opt->flag&MM_F_SPLICE_REV)) { // then do two rounds of alignments for both strands
997
- mm_reg1_t s[2], s2[2];
998
- int which, trans_strand;
1069
+ mm_reg1_t s[2], s2[2], *r;
999
1070
  s[0] = s[1] = regs[i];
1000
- mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], n_a, a, &ez, MM_F_SPLICE_FOR);
1001
- mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], n_a, a, &ez, MM_F_SPLICE_REV);
1002
- if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
1003
- else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
1004
- else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
1005
- if (which == 0) {
1071
+ mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], n_a, a, &ez, MM_F_SPLICE_FOR); // assume the transcript is on the + strand of the genome
1072
+ if ((opt->flag&MM_F_SR_RNA) && regs[i].qe - regs[i].qs == regs[i].re - regs[i].rs && s[0].qe - s[0].qs == s[0].re - s[0].rs && s[0].qs == 0 && s[0].qe == qlen) {
1006
1073
  regs[i] = s[0], r2 = s2[0];
1007
- free(s[1].p);
1074
+ regs[i].p->trans_strand = 0;
1008
1075
  } else {
1009
- regs[i] = s[1], r2 = s2[1];
1010
- free(s[0].p);
1076
+ int which, trans_strand;
1077
+ mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], n_a, a, &ez, MM_F_SPLICE_REV); // assume the transcript on the - strand
1078
+ if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
1079
+ else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
1080
+ else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
1081
+ if (which == 0) {
1082
+ regs[i] = s[0], r2 = s2[0];
1083
+ free(s[1].p);
1084
+ } else {
1085
+ regs[i] = s[1], r2 = s2[1];
1086
+ free(s[0].p);
1087
+ }
1088
+ r = &regs[i];
1089
+ r->p->trans_strand = trans_strand;
1090
+ if (r->is_spliced) {
1091
+ if (trans_strand == 1 || trans_strand == 2) // this is an *approximate* way to tell if there are splice signals.
1092
+ r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
1093
+ else if (trans_strand == 3)
1094
+ r->p->dp_max -= opt->a + opt->b;
1095
+ }
1011
1096
  }
1012
- regs[i].p->trans_strand = trans_strand;
1013
1097
  } else { // one round of alignment
1014
1098
  mm_align1(km, opt, mi, qlen, qseq0, &regs[i], &r2, n_a, a, &ez, opt->flag);
1015
1099
  if (opt->flag&MM_F_SPLICE)
@@ -1027,7 +1111,7 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
1027
1111
  kfree(km, qseq0[0]);
1028
1112
  kfree(km, ez.cigar);
1029
1113
  mm_filter_regs(opt, qlen, n_regs_, regs);
1030
- if (!(opt->flag&MM_F_SR) && !opt->split_prefix && qlen >= opt->rank_min_len) {
1114
+ if (!(opt->flag&(MM_F_SR|MM_F_SR_RNA|MM_F_ALL_CHAINS)) && !opt->split_prefix && qlen >= opt->rank_min_len) {
1031
1115
  mm_update_dp_max(qlen, *n_regs_, regs, opt->rank_frac, opt->a, opt->b);
1032
1116
  mm_filter_regs(opt, qlen, n_regs_, regs);
1033
1117
  }
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
31
31
  please follow the command lines below:
32
32
  ```sh
33
33
  # install minimap2 executables
34
- curl -L https://github.com/lh3/minimap2/releases/download/v2.27/minimap2-2.27_x64-linux.tar.bz2 | tar jxf -
35
- cp minimap2-2.27_x64-linux/{minimap2,k8,paftools.js} . # copy executables
34
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.29/minimap2-2.29_x64-linux.tar.bz2 | tar jxf -
35
+ cp minimap2-2.29_x64-linux/{minimap2,k8,paftools.js} . # copy executables
36
36
  export PATH="$PATH:"`pwd` # put the current directory on PATH
37
37
  # download example datasets
38
38
  curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -