minimap2 0.2.24.6 → 0.2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a1518fb8048d902865ee6771fedc9cb9b42693c984ec6616c2d1407597a9f26
4
- data.tar.gz: 83501abbd0cedb037edb96c6a630b7c93adff87597950ef03359be41dad0710a
3
+ metadata.gz: f58943e39da8f734af4ee4d16b5d335825ee7cf94cc45d9cf88a41d7adfe6afe
4
+ data.tar.gz: 2cb372b02bcb2cc763fb3a9fcd82219c653e61ac7f28a039e0a4d03b054aab35
5
5
  SHA512:
6
- metadata.gz: d62d29fd4f5f8254f7fdaa9943cb1141a1fb2b832c33b30568fe659c6b4299a4827f596806c0d38368af53c63a48f1e1dcb2fe9562be7534fa3b1697a8b6b3de
7
- data.tar.gz: 86e8179090acf840d615b3172fd1aa92e39c3b106c7629d079dfc473bb1a81180a922c13a19e60e41b54c6fda37534c6e61d9107f756bb87410ffa3fd87b93ce
6
+ metadata.gz: 52c827db192ac69bf99cfa1e8ff701a4d87965b7f443a82cdd10d109eebf31a6ac2ca966cc5b4a9d3bdcf1fdb4180f51a0f1c78f1bc6c5e937419ad3a78fa5d3
7
+ data.tar.gz: 95fd63410ffc2aa088877c9545566aa32326899658ff5fb9ec92ac9cc1c8d89c28c6a849e50bd50d5fce4bc33c6af8b3f40a65c0270f7a6ccec181375c549fb1
data/README.md CHANGED
@@ -175,9 +175,8 @@ ruby-minimap2 is a library under development and there are many points to be imp
175
175
 
176
176
  Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
177
177
 
178
- Do you need commit rights to my repository?
179
- Do you want to get admin rights and take over the project?
180
- If so, please feel free to contact me @kojix2.
178
+ Many OSS projects become abandoned because only the founder has commit rights to the original repository.
179
+ If you need commit rights to ruby-minimap2 repository or want to get admin rights and take over the project, please feel free to contact me @kojix2.
181
180
 
182
181
  ## License
183
182
 
@@ -8,6 +8,10 @@ PROG= minimap2
8
8
  PROG_EXTRA= sdust minimap2-lite
9
9
  LIBS= -lm -lz -lpthread
10
10
 
11
+ ifneq ($(aarch64),)
12
+ arm_neon=1
13
+ endif
14
+
11
15
  ifeq ($(arm_neon),) # if arm_neon is not defined
12
16
  ifeq ($(sse2only),) # if sse2only is not defined
13
17
  OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
@@ -26,12 +30,12 @@ endif
26
30
 
27
31
  ifneq ($(asan),)
28
32
  CFLAGS+=-fsanitize=address
29
- LIBS+=-fsanitize=address
33
+ LIBS+=-fsanitize=address -ldl
30
34
  endif
31
35
 
32
36
  ifneq ($(tsan),)
33
37
  CFLAGS+=-fsanitize=thread
34
- LIBS+=-fsanitize=thread
38
+ LIBS+=-fsanitize=thread -ldl
35
39
  endif
36
40
 
37
41
  .PHONY:all extra clean depend
data/ext/minimap2/NEWS.md CHANGED
@@ -1,3 +1,41 @@
1
+ Release 2.25-r1173 (25 April 2023)
2
+ ----------------------------------
3
+
4
+ Notable changes:
5
+
6
+ * Improvement: use the miniprot splice model for RNA-seq alignment by default.
7
+ This model considers non-GT-AG splice sites and leads to slightly higher
8
+ (<0.1%) accuracy and sensitivity on real human data.
9
+
10
+ * Change: increased the default `-I` to `8G` such that minimap2 would create a
11
+ uni-part index for a pair of mammalian genomes. This change may increase the
12
+ memory for all-vs-all read overlap alignment given large datasets.
13
+
14
+ * New feature: output the sequences in secondary alignments with option
15
+ `--secondary-seq` (#687).
16
+
17
+ * Bugfix: --rmq was not parsed correctly (#1010)
18
+
19
+ * Bugfix: possibly incorrect coordinate when applying end bonus to the target
20
+ sequence (#1025). This is a ksw2 bug. It does not affect minimap2 as
21
+ minimap2 is not using the affected feature.
22
+
23
+ * Improvement: incorporated several changes for better compatibility with
24
+ Windows (#1051) and for minimap2 integration at Oxford Nanopore Technologies
25
+ (#1048 and #1033).
26
+
27
+ * Improvement: output the HD-line in SAM output (#1019).
28
+
29
+ * Improvement: check minimap2 index file in mappy to prevent segmentation
30
+ fault for certain indices (#1008).
31
+
32
+ For genomic sequences, minimap2 should give identical output to v2.24.
33
+ Long-read RNA-seq alignment may occasionally differ from previous versions.
34
+
35
+ (2.25: 25 April 2023, r1173)
36
+
37
+
38
+
1
39
  Release 2.24-r1122 (26 December 2021)
2
40
  -------------------------------------
3
41
 
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
74
74
  Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
75
75
  the [release page][release] with:
76
76
  ```sh
77
- curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
78
- ./minimap2-2.24_x64-linux/minimap2
77
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar -jxvf -
78
+ ./minimap2-2.25_x64-linux/minimap2
79
79
  ```
80
80
  If you want to compile from the source, you need to have a C compiler, GNU make
81
81
  and zlib development files installed. Then type `make` in the source code
@@ -350,6 +350,11 @@ If you use minimap2 in your work, please cite:
350
350
  > Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
351
351
  > *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
352
352
 
353
+ and/or:
354
+
355
+ > Li, H. (2021). New strategies to improve minimap2 alignment accuracy.
356
+ > *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2]
357
+
353
358
  ## <a name="dguide"></a>Developers' Guide
354
359
 
355
360
  Minimap2 is not only a command line tool, but also a programming library.
@@ -399,5 +404,6 @@ mappy` or [from BioConda][mappyconda] via `conda install -c bioconda mappy`.
399
404
  [manpage]: https://lh3.github.io/minimap2/minimap2.html
400
405
  [manpage-cs]: https://lh3.github.io/minimap2/minimap2.html#10
401
406
  [doi]: https://doi.org/10.1093/bioinformatics/bty191
402
- [smide]: https://github.com/nemequ/simde
407
+ [doi2]: https://doi.org/10.1093/bioinformatics/btab705
408
+ [simde]: https://github.com/nemequ/simde
403
409
  [unimap]: https://github.com/lh3/unimap
data/ext/minimap2/align.c CHANGED
@@ -326,9 +326,11 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
326
326
  if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) {
327
327
  ksw_reset_extz(ez);
328
328
  ez->zdropped = 1;
329
- } else if (opt->flag & MM_F_SPLICE)
330
- ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag, junc, ez);
331
- else if (opt->q == opt->q2 && opt->e == opt->e2)
329
+ } else if (opt->flag & MM_F_SPLICE) {
330
+ int flag_tmp = flag;
331
+ if (!(opt->flag & MM_F_SPLICE_OLD)) flag_tmp |= KSW_EZ_SPLICE_CMPLX;
332
+ ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag_tmp, junc, ez);
333
+ } else if (opt->q == opt->q2 && opt->e == opt->e2)
332
334
  ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
333
335
  else
334
336
  ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
31
31
  please follow the command lines below:
32
32
  ```sh
33
33
  # install minimap2 executables
34
- curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
35
- cp minimap2-2.24_x64-linux/{minimap2,k8,paftools.js} . # copy executables
34
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar jxf -
35
+ cp minimap2-2.25_x64-linux/{minimap2,k8,paftools.js} . # copy executables
36
36
  export PATH="$PATH:"`pwd` # put the current directory on PATH
37
37
  # download example datasets
38
38
  curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
@@ -119,6 +119,7 @@ int mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int a
119
119
  {
120
120
  kstring_t str = {0,0,0};
121
121
  int ret = 0;
122
+ mm_sprintf_lite(&str, "@HD\tVN:1.6\tSO:unsorted\tGO:query\n");
122
123
  if (idx) {
123
124
  uint32_t i;
124
125
  for (i = 0; i < idx->n_seq; ++i)
@@ -369,14 +370,16 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
369
370
  clip_len[0] = r->rev? qlen - r->qe : r->qs;
370
371
  clip_len[1] = r->rev? r->qs : qlen - r->qe;
371
372
  if (in_tag) {
372
- int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 5 : 4;
373
+ int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
374
+ !(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4;
373
375
  mm_sprintf_lite(s, "\tCG:B:I");
374
376
  if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
375
377
  for (k = 0; k < r->p->n_cigar; ++k)
376
378
  mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
377
379
  if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
378
380
  } else {
379
- int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S';
381
+ int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
382
+ !(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S';
380
383
  assert(clip_len[0] < qlen && clip_len[1] < qlen);
381
384
  if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
382
385
  for (k = 0; k < r->p->n_cigar; ++k)
@@ -451,7 +454,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
451
454
  if (cigar_in_tag) {
452
455
  int slen;
453
456
  if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq;
454
- else if (flag & 0x100) slen = 0;
457
+ else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0;
455
458
  else slen = r->qe - r->qs;
456
459
  mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs);
457
460
  } else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag);
@@ -492,7 +495,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
492
495
  mm_sprintf_lite(s, "\t");
493
496
  if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0);
494
497
  else mm_sprintf_lite(s, "*");
495
- } else if (flag & 0x100) {
498
+ } else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){
496
499
  mm_sprintf_lite(s, "*\t*");
497
500
  } else {
498
501
  sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev);
@@ -40,7 +40,8 @@ void *km_init2(void *km_par, size_t min_core_size)
40
40
  kmem_t *km;
41
41
  km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t));
42
42
  km->par = km_par;
43
- km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
43
+ if (km_par) km->min_core_size = min_core_size > 0? min_core_size : ((kmem_t*)km_par)->min_core_size - 2;
44
+ else km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
44
45
  return (void*)km;
45
46
  }
46
47
 
@@ -183,6 +184,16 @@ void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made mo
183
184
  return q;
184
185
  }
185
186
 
187
+ void *krelocate(void *km, void *ap, size_t n_bytes)
188
+ {
189
+ void *p;
190
+ if (km == 0 || ap == 0) return ap;
191
+ p = kmalloc(km, n_bytes);
192
+ memcpy(p, ap, n_bytes);
193
+ kfree(km, ap);
194
+ return p;
195
+ }
196
+
186
197
  void km_stat(const void *_km, km_stat_t *s)
187
198
  {
188
199
  kmem_t *km = (kmem_t*)_km;
@@ -203,3 +214,11 @@ void km_stat(const void *_km, km_stat_t *s)
203
214
  s->largest = s->largest > size? s->largest : size;
204
215
  }
205
216
  }
217
+
218
+ void km_stat_print(const void *km)
219
+ {
220
+ km_stat_t st;
221
+ km_stat(km, &st);
222
+ fprintf(stderr, "[km_stat] cap=%ld, avail=%ld, largest=%ld, n_core=%ld, n_block=%ld\n",
223
+ st.capacity, st.available, st.largest, st.n_blocks, st.n_cores);
224
+ }
@@ -13,6 +13,7 @@ typedef struct {
13
13
 
14
14
  void *kmalloc(void *km, size_t size);
15
15
  void *krealloc(void *km, void *ptr, size_t size);
16
+ void *krelocate(void *km, void *ap, size_t n_bytes);
16
17
  void *kcalloc(void *km, size_t count, size_t size);
17
18
  void kfree(void *km, void *ptr);
18
19
 
@@ -20,11 +21,21 @@ void *km_init(void);
20
21
  void *km_init2(void *km_par, size_t min_core_size);
21
22
  void km_destroy(void *km);
22
23
  void km_stat(const void *_km, km_stat_t *s);
24
+ void km_stat_print(const void *km);
23
25
 
24
26
  #ifdef __cplusplus
25
27
  }
26
28
  #endif
27
29
 
30
+ #define Kmalloc(km, type, cnt) ((type*)kmalloc((km), (cnt) * sizeof(type)))
31
+ #define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
32
+ #define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
33
+
34
+ #define Kexpand(km, type, a, m) do { \
35
+ (m) = (m) >= 4? (m) + ((m)>>1) : 16; \
36
+ (a) = Krealloc(km, type, (a), (m)); \
37
+ } while (0)
38
+
28
39
  #define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr))))
29
40
  #define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr))))
30
41
  #define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr))))
@@ -50,7 +61,7 @@ void km_stat(const void *_km, km_stat_t *s);
50
61
  } kmp_##name##_t; \
51
62
  SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
52
63
  kmp_##name##_t *mp; \
53
- KCALLOC(km, mp, 1); \
64
+ mp = Kcalloc(km, kmp_##name##_t, 1); \
54
65
  mp->km = km; \
55
66
  return mp; \
56
67
  } \
@@ -66,7 +77,7 @@ void km_stat(const void *_km, km_stat_t *s);
66
77
  } \
67
78
  SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
68
79
  --mp->cnt; \
69
- if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \
80
+ if (mp->n == mp->max) Kexpand(mp->km, kmptype_t*, mp->buf, mp->max); \
70
81
  mp->buf[mp->n++] = p; \
71
82
  }
72
83
 
data/ext/minimap2/ksw2.h CHANGED
@@ -15,6 +15,7 @@
15
15
  #define KSW_EZ_SPLICE_FOR 0x100
16
16
  #define KSW_EZ_SPLICE_REV 0x200
17
17
  #define KSW_EZ_SPLICE_FLANK 0x400
18
+ #define KSW_EZ_SPLICE_CMPLX 0x800
18
19
 
19
20
  // The subset of CIGAR operators used by ksw code.
20
21
  // Use MM_CIGAR_* from minimap.h if you need the full list.
@@ -358,7 +358,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
358
358
  } else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
359
359
  // update ez
360
360
  if (en0 == tlen - 1 && H[en0] > ez->mte)
361
- ez->mte = H[en0], ez->mte_q = r - en;
361
+ ez->mte = H[en0], ez->mte_q = r - en0;
362
362
  if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
363
363
  ez->mqe = H[st0], ez->mqe_t = st0;
364
364
  if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e2)) break;
@@ -71,6 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
71
71
 
72
72
  ksw_reset_extz(ez);
73
73
  if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return;
74
+ assert((flag & KSW_EZ_SPLICE_FOR) == 0 || (flag & KSW_EZ_SPLICE_REV) == 0); // can't be both set
74
75
 
75
76
  zero_ = _mm_set1_epi8(0);
76
77
  q_ = _mm_set1_epi8(q);
@@ -118,55 +119,93 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
118
119
 
119
120
  // set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding!
120
121
  if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) {
121
- int semi_cost = flag&KSW_EZ_SPLICE_FLANK? -noncan/2 : 0; // GTr or yAG is worth 0.5 bit; see PMID:18688272
122
- memset(donor, -noncan, tlen_ * 16);
123
- memset(acceptor, -noncan, tlen_ * 16);
122
+ const int sp0[4] = { 8, 15, 21, 30 };
123
+ int sp[4];
124
+ if (flag & KSW_EZ_SPLICE_CMPLX) {
125
+ for (t = 0; t < 4; ++t)
126
+ sp[t] = (int)((double)sp0[t] / 3. + .499);
127
+ } else {
128
+ sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0;
129
+ sp[1] = sp[2] = sp[3] = noncan;
130
+ }
131
+ memset(donor, -sp[3], tlen_ * 16);
132
+ memset(acceptor, -sp[3], tlen_ * 16);
124
133
  if (!(flag & KSW_EZ_REV_CIGAR)) {
125
134
  for (t = 0; t < tlen - 4; ++t) {
126
- int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
127
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 3) can_type = 1; // GTr...
128
- if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 3) can_type = 1; // CTr...
129
- if (can_type && (target[t+3] == 0 || target[t+3] == 2)) can_type = 2;
130
- if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
135
+ int z = 3;
136
+ if (flag & KSW_EZ_SPLICE_FOR) {
137
+ if (target[t+1] == 2 && target[t+2] == 3) // |GT.
138
+ z = target[t+3] == 0 || target[t+3] == 2? -1 : 0; // |GTr or not
139
+ else if (target[t+1] == 2 && target[t+2] == 1) z = 1; // |GC.
140
+ else if (target[t+1] == 0 && target[t+2] == 3) z = 2; // |AT.
141
+ } else if (flag & KSW_EZ_SPLICE_REV) {
142
+ if (target[t+1] == 1 && target[t+2] == 3) // |CT. (revcomp of .AG|)
143
+ z = target[t+3] == 0 || target[t+3] == 2? -1 : 0;
144
+ else if (target[t+1] == 2 && target[t+2] == 3) z = 2; // |GT. (revcomp of .AC|)
145
+ }
146
+ ((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
131
147
  }
132
- if (junc)
133
- for (t = 0; t < tlen - 1; ++t)
134
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
135
- ((int8_t*)donor)[t] += junc_bonus;
136
148
  for (t = 2; t < tlen; ++t) {
137
- int can_type = 0;
138
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 0 && target[t] == 2) can_type = 1; // ...yAG
139
- if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 0 && target[t] == 1) can_type = 1; // ...yAC
140
- if (can_type && (target[t-2] == 1 || target[t-2] == 3)) can_type = 2;
141
- if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
149
+ int z = 3;
150
+ if (flag & KSW_EZ_SPLICE_FOR) {
151
+ if (target[t-1] == 0 && target[t] == 2) // .AG|
152
+ z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAG| or not
153
+ else if (target[t-1] == 0 && target[t] == 1) z = 2; // .AC|
154
+ } else if (flag & KSW_EZ_SPLICE_REV) {
155
+ if (target[t-1] == 0 && target[t] == 1) // .AC| (revcomp of |GT.)
156
+ z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAC| or not
157
+ else if (target[t-1] == 2 && target[t] == 1) z = 1; // .GC| (revcomp of |GC.)
158
+ else if (target[t-1] == 0 && target[t] == 3) z = 2; // .AT| (revcomp of |AT.)
159
+ }
160
+ ((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
142
161
  }
143
- if (junc)
144
- for (t = 0; t < tlen; ++t)
145
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
146
- ((int8_t*)acceptor)[t] += junc_bonus;
147
162
  } else {
148
163
  for (t = 0; t < tlen - 4; ++t) {
149
- int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
150
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 0) can_type = 1; // GAy...
151
- if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 0) can_type = 1; // CAy...
152
- if (can_type && (target[t+3] == 1 || target[t+3] == 3)) can_type = 2;
153
- if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
164
+ int z = 3;
165
+ if (flag & KSW_EZ_SPLICE_FOR) {
166
+ if (target[t+1] == 2 && target[t+2] == 0) // |GA. (rev of .AG|)
167
+ z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
168
+ else if (target[t+1] == 1 && target[t+2] == 0) z = 2; // |CA. (rev of .AC|)
169
+ } else if (flag & KSW_EZ_SPLICE_REV) {
170
+ if (target[t+1] == 1 && target[t+2] == 0) // |CA. (comp of |GT.)
171
+ z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
172
+ else if (target[t+1] == 1 && target[t+2] == 2) z = 1; // |CG. (comp of |GC.)
173
+ else if (target[t+1] == 3 && target[t+2] == 0) z = 2; // |TA. (comp of |AT.)
174
+ }
175
+ ((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
154
176
  }
155
- if (junc)
156
- for (t = 0; t < tlen - 1; ++t)
157
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
158
- ((int8_t*)donor)[t] += junc_bonus;
159
177
  for (t = 2; t < tlen; ++t) {
160
- int can_type = 0;
161
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 3 && target[t] == 2) can_type = 1; // ...rTG
162
- if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 3 && target[t] == 1) can_type = 1; // ...rTC
163
- if (can_type && (target[t-2] == 0 || target[t-2] == 2)) can_type = 2;
164
- if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
178
+ int z = 3;
179
+ if (flag & KSW_EZ_SPLICE_FOR) {
180
+ if (target[t-1] == 3 && target[t] == 2) // .TG| (rev of |GT.)
181
+ z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
182
+ else if (target[t-1] == 1 && target[t] == 2) z = 1; // .CG| (rev of |GC.)
183
+ else if (target[t-1] == 3 && target[t] == 0) z = 2; // .TA| (rev of |AT.)
184
+ } else if (flag & KSW_EZ_SPLICE_REV) {
185
+ if (target[t-1] == 3 && target[t] == 1) // .TC| (comp of .AG|)
186
+ z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
187
+ else if (target[t-1] == 3 && target[t] == 2) z = 2; // .TG| (comp of .AC|)
188
+ }
189
+ ((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
165
190
  }
166
- if (junc)
167
- for (t = 0; t < tlen; ++t)
168
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&8)))
169
- ((int8_t*)acceptor)[t] += junc_bonus;
191
+ }
192
+ }
193
+
194
+ if (junc) {
195
+ if (!(flag & KSW_EZ_REV_CIGAR)) {
196
+ for (t = 0; t < tlen - 1; ++t)
197
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
198
+ ((int8_t*)donor)[t] += junc_bonus;
199
+ for (t = 0; t < tlen; ++t)
200
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
201
+ ((int8_t*)acceptor)[t] += junc_bonus;
202
+ } else {
203
+ for (t = 0; t < tlen - 1; ++t)
204
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
205
+ ((int8_t*)donor)[t] += junc_bonus;
206
+ for (t = 0; t < tlen; ++t)
207
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&8)))
208
+ ((int8_t*)acceptor)[t] += junc_bonus;
170
209
  }
171
210
  }
172
211
 
@@ -376,7 +415,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
376
415
  } else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
377
416
  // update ez
378
417
  if (en0 == tlen - 1 && H[en0] > ez->mte)
379
- ez->mte = H[en0], ez->mte_q = r - en;
418
+ ez->mte = H[en0], ez->mte_q = r - en0;
380
419
  if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
381
420
  ez->mqe = H[st0], ez->mqe_t = st0;
382
421
  if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, 0)) break;
@@ -269,7 +269,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
269
269
  } else H[0] = v8[0] - qe - qe, max_H = H[0], max_t = 0; // special casing r==0
270
270
  // update ez
271
271
  if (en0 == tlen - 1 && H[en0] > ez->mte)
272
- ez->mte = H[en0], ez->mte_q = r - en;
272
+ ez->mte = H[en0], ez->mte_q = r - en0;
273
273
  if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
274
274
  ez->mqe = H[st0], ez->mqe_t = st0;
275
275
  if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e)) break;
@@ -35,7 +35,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
35
35
  for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
36
36
  if (f[i] >= min_sc) ++n_z;
37
37
  if (n_z == 0) return 0;
38
- KMALLOC(km, z, n_z);
38
+ z = Kmalloc(km, mm128_t, n_z);
39
39
  for (i = 0, k = 0; i < n; ++i) // populate z[]
40
40
  if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
41
41
  radix_sort_128x(z, z + n_z);
@@ -54,7 +54,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
54
54
  else n_v = n_v0;
55
55
  }
56
56
  }
57
- KMALLOC(km, u, n_u);
57
+ u = Kmalloc(km, uint64_t, n_u);
58
58
  memset(t, 0, n * 4);
59
59
  for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
60
60
  if (t[z[k].y] == 0) {
@@ -82,7 +82,7 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
82
82
  int64_t i, j, k;
83
83
 
84
84
  // write the result to b[]
85
- KMALLOC(km, b, n_v);
85
+ b = Kmalloc(km, mm128_t, n_v);
86
86
  for (i = 0, k = 0; i < n_u; ++i) {
87
87
  int32_t k0 = k, ni = (int32_t)u[i];
88
88
  for (j = 0; j < ni; ++j)
@@ -91,13 +91,13 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
91
91
  kfree(km, v);
92
92
 
93
93
  // sort u[] and a[] by the target position, such that adjacent chains may be joined
94
- KMALLOC(km, w, n_u);
94
+ w = Kmalloc(km, mm128_t, n_u);
95
95
  for (i = k = 0; i < n_u; ++i) {
96
96
  w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
97
97
  k += (int32_t)u[i];
98
98
  }
99
99
  radix_sort_128x(w, w + n_u);
100
- KMALLOC(km, u2, n_u);
100
+ u2 = Kmalloc(km, uint64_t, n_u);
101
101
  for (i = k = 0; i < n_u; ++i) {
102
102
  int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
103
103
  u2[i] = u[j];
@@ -138,7 +138,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
138
138
  }
139
139
 
140
140
  /* Input:
141
- * a[].x: tid<<33 | rev<<32 | tpos
141
+ * a[].x: rev<<63 | tid<<32 | tpos
142
142
  * a[].y: flags<<40 | q_span<<32 | q_pos
143
143
  * Output:
144
144
  * n_u: #chains
@@ -160,10 +160,10 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
160
160
  if (max_dist_x < bw) max_dist_x = bw;
161
161
  if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
162
162
  if (is_cdna) max_drop = INT32_MAX;
163
- KMALLOC(km, p, n);
164
- KMALLOC(km, f, n);
165
- KMALLOC(km, v, n);
166
- KCALLOC(km, t, n);
163
+ p = Kmalloc(km, int64_t, n);
164
+ f = Kmalloc(km, int32_t, n);
165
+ v = Kmalloc(km, int32_t, n);
166
+ t = Kcalloc(km, int32_t, n);
167
167
 
168
168
  // fill the score and backtrack arrays
169
169
  for (i = 0, max_ii = -1; i < n; ++i) {
@@ -251,7 +251,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
251
251
  int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
252
252
  {
253
253
  int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
254
- int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
254
+ int64_t *p, i, i0, st = 0, st_inner = 0;
255
255
  uint64_t *u;
256
256
  lc_elem_t *root = 0, *root_inner = 0;
257
257
  void *mem_mp = 0;
@@ -264,10 +264,10 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
264
264
  }
265
265
  if (max_dist < bw) max_dist = bw;
266
266
  if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
267
- KMALLOC(km, p, n);
268
- KMALLOC(km, f, n);
269
- KCALLOC(km, t, n);
270
- KMALLOC(km, v, n);
267
+ p = Kmalloc(km, int64_t, n);
268
+ f = Kmalloc(km, int32_t, n);
269
+ t = Kcalloc(km, int32_t, n);
270
+ v = Kmalloc(km, int32_t, n);
271
271
  mem_mp = km_init2(km, 0x10000);
272
272
  mp = kmp_init_rmq(mem_mp);
273
273
 
@@ -345,7 +345,6 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
345
345
  }
346
346
  if (!krmq_itr_prev(lc_elem, &itr)) break;
347
347
  }
348
- n_iter += n_rmq_iter;
349
348
  }
350
349
  }
351
350
  }
data/ext/minimap2/main.c CHANGED
@@ -7,8 +7,6 @@
7
7
  #include "mmpriv.h"
8
8
  #include "ketopt.h"
9
9
 
10
- #define MM_VERSION "2.24-r1122"
11
-
12
10
  #ifdef __linux__
13
11
  #include <sys/resource.h>
14
12
  #include <sys/time.h>
@@ -78,6 +76,7 @@ static ko_longopt_t long_options[] = {
78
76
  { "chain-skip-scale",ko_required_argument,351 },
79
77
  { "print-chains", ko_no_argument, 352 },
80
78
  { "no-hash-name", ko_no_argument, 353 },
79
+ { "secondary-seq", ko_no_argument, 354 },
81
80
  { "help", ko_no_argument, 'h' },
82
81
  { "max-intron-len", ko_required_argument, 'G' },
83
82
  { "version", ko_no_argument, 'V' },
@@ -121,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
121
120
 
122
121
  int main(int argc, char *argv[])
123
122
  {
124
- const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
123
+ const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
125
124
  ketopt_t o = KETOPT_INIT;
126
125
  mm_mapopt_t opt;
127
126
  mm_idxopt_t ipt;
@@ -187,7 +186,12 @@ int main(int argc, char *argv[])
187
186
  else if (c == 'R') rg = o.arg;
188
187
  else if (c == 'h') fp_help = stdout;
189
188
  else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
190
- else if (c == 'o') {
189
+ else if (c == 'J') {
190
+ int t;
191
+ t = atoi(o.arg);
192
+ if (t == 0) opt.flag |= MM_F_SPLICE_OLD;
193
+ else if (t == 1) opt.flag &= ~MM_F_SPLICE_OLD;
194
+ } else if (c == 'o') {
191
195
  if (strcmp(o.arg, "-") != 0) {
192
196
  if (freopen(o.arg, "wb", stdout) == NULL) {
193
197
  fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
@@ -237,6 +241,7 @@ int main(int argc, char *argv[])
237
241
  else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
238
242
  else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
239
243
  else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
244
+ else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
240
245
  else if (c == 330) {
241
246
  fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
242
247
  } else if (c == 314) { // --frag
@@ -261,7 +266,8 @@ int main(int argc, char *argv[])
261
266
  } else if (c == 326) { // --dual
262
267
  yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
263
268
  } else if (c == 347) { // --rmq
264
- yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
269
+ if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
270
+ else opt.flag |= MM_F_RMQ;
265
271
  } else if (c == 'S') {
266
272
  opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
267
273
  if (mm_verbose >= 2)
@@ -322,7 +328,7 @@ int main(int argc, char *argv[])
322
328
  fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
323
329
  fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
324
330
  fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
325
- fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
331
+ fprintf(fp_help, " -I NUM split index for every ~NUM input bases [8G]\n");
326
332
  fprintf(fp_help, " -d FILE dump index to FILE []\n");
327
333
  fprintf(fp_help, " Mapping:\n");
328
334
  fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
@@ -344,6 +350,7 @@ int main(int argc, char *argv[])
344
350
  fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
345
351
  fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
346
352
  fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
353
+ fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
347
354
  fprintf(fp_help, " Input/Output:\n");
348
355
  fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
349
356
  fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
data/ext/minimap2/map.c CHANGED
@@ -10,11 +10,6 @@
10
10
  #include "bseq.h"
11
11
  #include "khash.h"
12
12
 
13
- struct mm_tbuf_s {
14
- void *km;
15
- int rep_len, frag_gap;
16
- };
17
-
18
13
  mm_tbuf_t *mm_tbuf_init(void)
19
14
  {
20
15
  mm_tbuf_t *b;