minimap2 0.2.24.6 → 0.2.25.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/ext/minimap2.patch +3 -3
- data/lib/minimap2/aligner.rb +4 -0
- data/lib/minimap2/ffi/constants.rb +90 -88
- data/lib/minimap2/version.rb +2 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f58943e39da8f734af4ee4d16b5d335825ee7cf94cc45d9cf88a41d7adfe6afe
|
4
|
+
data.tar.gz: 2cb372b02bcb2cc763fb3a9fcd82219c653e61ac7f28a039e0a4d03b054aab35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52c827db192ac69bf99cfa1e8ff701a4d87965b7f443a82cdd10d109eebf31a6ac2ca966cc5b4a9d3bdcf1fdb4180f51a0f1c78f1bc6c5e937419ad3a78fa5d3
|
7
|
+
data.tar.gz: 95fd63410ffc2aa088877c9545566aa32326899658ff5fb9ec92ac9cc1c8d89c28c6a849e50bd50d5fce4bc33c6af8b3f40a65c0270f7a6ccec181375c549fb1
|
data/README.md
CHANGED
@@ -175,9 +175,8 @@ ruby-minimap2 is a library under development and there are many points to be imp
|
|
175
175
|
|
176
176
|
Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
|
177
177
|
|
178
|
-
|
179
|
-
|
180
|
-
If so, please feel free to contact me @kojix2.
|
178
|
+
Many OSS projects become abandoned because only the founder has commit rights to the original repository.
|
179
|
+
If you need commit rights to ruby-minimap2 repository or want to get admin rights and take over the project, please feel free to contact me @kojix2.
|
181
180
|
|
182
181
|
## License
|
183
182
|
|
data/ext/minimap2/Makefile
CHANGED
@@ -8,6 +8,10 @@ PROG= minimap2
|
|
8
8
|
PROG_EXTRA= sdust minimap2-lite
|
9
9
|
LIBS= -lm -lz -lpthread
|
10
10
|
|
11
|
+
ifneq ($(aarch64),)
|
12
|
+
arm_neon=1
|
13
|
+
endif
|
14
|
+
|
11
15
|
ifeq ($(arm_neon),) # if arm_neon is not defined
|
12
16
|
ifeq ($(sse2only),) # if sse2only is not defined
|
13
17
|
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
|
@@ -26,12 +30,12 @@ endif
|
|
26
30
|
|
27
31
|
ifneq ($(asan),)
|
28
32
|
CFLAGS+=-fsanitize=address
|
29
|
-
LIBS+=-fsanitize=address
|
33
|
+
LIBS+=-fsanitize=address -ldl
|
30
34
|
endif
|
31
35
|
|
32
36
|
ifneq ($(tsan),)
|
33
37
|
CFLAGS+=-fsanitize=thread
|
34
|
-
LIBS+=-fsanitize=thread
|
38
|
+
LIBS+=-fsanitize=thread -ldl
|
35
39
|
endif
|
36
40
|
|
37
41
|
.PHONY:all extra clean depend
|
data/ext/minimap2/NEWS.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1
|
+
Release 2.25-r1173 (25 April 2023)
|
2
|
+
----------------------------------
|
3
|
+
|
4
|
+
Notable changes:
|
5
|
+
|
6
|
+
* Improvement: use the miniprot splice model for RNA-seq alignment by default.
|
7
|
+
This model considers non-GT-AG splice sites and leads to slightly higher
|
8
|
+
(<0.1%) accuracy and sensitivity on real human data.
|
9
|
+
|
10
|
+
* Change: increased the default `-I` to `8G` such that minimap2 would create a
|
11
|
+
uni-part index for a pair of mammalian genomes. This change may increase the
|
12
|
+
memory for all-vs-all read overlap alignment given large datasets.
|
13
|
+
|
14
|
+
* New feature: output the sequences in secondary alignments with option
|
15
|
+
`--secondary-seq` (#687).
|
16
|
+
|
17
|
+
* Bugfix: --rmq was not parsed correctly (#1010)
|
18
|
+
|
19
|
+
* Bugfix: possibly incorrect coordinate when applying end bonus to the target
|
20
|
+
sequence (#1025). This is a ksw2 bug. It does not affect minimap2 as
|
21
|
+
minimap2 is not using the affected feature.
|
22
|
+
|
23
|
+
* Improvement: incorporated several changes for better compatibility with
|
24
|
+
Windows (#1051) and for minimap2 integration at Oxford Nanopore Technologies
|
25
|
+
(#1048 and #1033).
|
26
|
+
|
27
|
+
* Improvement: output the HD-line in SAM output (#1019).
|
28
|
+
|
29
|
+
* Improvement: check minimap2 index file in mappy to prevent segmentation
|
30
|
+
fault for certain indices (#1008).
|
31
|
+
|
32
|
+
For genomic sequences, minimap2 should give identical output to v2.24.
|
33
|
+
Long-read RNA-seq alignment may occasionally differ from previous versions.
|
34
|
+
|
35
|
+
(2.25: 25 April 2023, r1173)
|
36
|
+
|
37
|
+
|
38
|
+
|
1
39
|
Release 2.24-r1122 (26 December 2021)
|
2
40
|
-------------------------------------
|
3
41
|
|
data/ext/minimap2/README.md
CHANGED
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
|
|
74
74
|
Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
|
75
75
|
the [release page][release] with:
|
76
76
|
```sh
|
77
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
78
|
-
./minimap2-2.
|
77
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar -jxvf -
|
78
|
+
./minimap2-2.25_x64-linux/minimap2
|
79
79
|
```
|
80
80
|
If you want to compile from the source, you need to have a C compiler, GNU make
|
81
81
|
and zlib development files installed. Then type `make` in the source code
|
@@ -350,6 +350,11 @@ If you use minimap2 in your work, please cite:
|
|
350
350
|
> Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
|
351
351
|
> *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
|
352
352
|
|
353
|
+
and/or:
|
354
|
+
|
355
|
+
> Li, H. (2021). New strategies to improve minimap2 alignment accuracy.
|
356
|
+
> *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2]
|
357
|
+
|
353
358
|
## <a name="dguide"></a>Developers' Guide
|
354
359
|
|
355
360
|
Minimap2 is not only a command line tool, but also a programming library.
|
@@ -399,5 +404,6 @@ mappy` or [from BioConda][mappyconda] via `conda install -c bioconda mappy`.
|
|
399
404
|
[manpage]: https://lh3.github.io/minimap2/minimap2.html
|
400
405
|
[manpage-cs]: https://lh3.github.io/minimap2/minimap2.html#10
|
401
406
|
[doi]: https://doi.org/10.1093/bioinformatics/bty191
|
402
|
-
[
|
407
|
+
[doi2]: https://doi.org/10.1093/bioinformatics/btab705
|
408
|
+
[simde]: https://github.com/nemequ/simde
|
403
409
|
[unimap]: https://github.com/lh3/unimap
|
data/ext/minimap2/align.c
CHANGED
@@ -326,9 +326,11 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
|
|
326
326
|
if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) {
|
327
327
|
ksw_reset_extz(ez);
|
328
328
|
ez->zdropped = 1;
|
329
|
-
} else if (opt->flag & MM_F_SPLICE)
|
330
|
-
|
331
|
-
|
329
|
+
} else if (opt->flag & MM_F_SPLICE) {
|
330
|
+
int flag_tmp = flag;
|
331
|
+
if (!(opt->flag & MM_F_SPLICE_OLD)) flag_tmp |= KSW_EZ_SPLICE_CMPLX;
|
332
|
+
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag_tmp, junc, ez);
|
333
|
+
} else if (opt->q == opt->q2 && opt->e == opt->e2)
|
332
334
|
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
|
333
335
|
else
|
334
336
|
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
|
data/ext/minimap2/cookbook.md
CHANGED
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
|
|
31
31
|
please follow the command lines below:
|
32
32
|
```sh
|
33
33
|
# install minimap2 executables
|
34
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
35
|
-
cp minimap2-2.
|
34
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar jxf -
|
35
|
+
cp minimap2-2.25_x64-linux/{minimap2,k8,paftools.js} . # copy executables
|
36
36
|
export PATH="$PATH:"`pwd` # put the current directory on PATH
|
37
37
|
# download example datasets
|
38
38
|
curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
|
data/ext/minimap2/format.c
CHANGED
@@ -119,6 +119,7 @@ int mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int a
|
|
119
119
|
{
|
120
120
|
kstring_t str = {0,0,0};
|
121
121
|
int ret = 0;
|
122
|
+
mm_sprintf_lite(&str, "@HD\tVN:1.6\tSO:unsorted\tGO:query\n");
|
122
123
|
if (idx) {
|
123
124
|
uint32_t i;
|
124
125
|
for (i = 0; i < idx->n_seq; ++i)
|
@@ -369,14 +370,16 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
|
|
369
370
|
clip_len[0] = r->rev? qlen - r->qe : r->qs;
|
370
371
|
clip_len[1] = r->rev? r->qs : qlen - r->qe;
|
371
372
|
if (in_tag) {
|
372
|
-
int clip_char = (sam_flag&0x800) &&
|
373
|
+
int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
|
374
|
+
!(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4;
|
373
375
|
mm_sprintf_lite(s, "\tCG:B:I");
|
374
376
|
if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
|
375
377
|
for (k = 0; k < r->p->n_cigar; ++k)
|
376
378
|
mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
|
377
379
|
if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
|
378
380
|
} else {
|
379
|
-
int clip_char = (sam_flag&0x800) &&
|
381
|
+
int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
|
382
|
+
!(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S';
|
380
383
|
assert(clip_len[0] < qlen && clip_len[1] < qlen);
|
381
384
|
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
|
382
385
|
for (k = 0; k < r->p->n_cigar; ++k)
|
@@ -451,7 +454,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|
451
454
|
if (cigar_in_tag) {
|
452
455
|
int slen;
|
453
456
|
if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq;
|
454
|
-
else if (flag & 0x100) slen = 0;
|
457
|
+
else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0;
|
455
458
|
else slen = r->qe - r->qs;
|
456
459
|
mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs);
|
457
460
|
} else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag);
|
@@ -492,7 +495,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|
492
495
|
mm_sprintf_lite(s, "\t");
|
493
496
|
if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0);
|
494
497
|
else mm_sprintf_lite(s, "*");
|
495
|
-
} else if (flag & 0x100) {
|
498
|
+
} else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){
|
496
499
|
mm_sprintf_lite(s, "*\t*");
|
497
500
|
} else {
|
498
501
|
sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev);
|
data/ext/minimap2/kalloc.c
CHANGED
@@ -40,7 +40,8 @@ void *km_init2(void *km_par, size_t min_core_size)
|
|
40
40
|
kmem_t *km;
|
41
41
|
km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t));
|
42
42
|
km->par = km_par;
|
43
|
-
km->min_core_size = min_core_size > 0? min_core_size :
|
43
|
+
if (km_par) km->min_core_size = min_core_size > 0? min_core_size : ((kmem_t*)km_par)->min_core_size - 2;
|
44
|
+
else km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
|
44
45
|
return (void*)km;
|
45
46
|
}
|
46
47
|
|
@@ -183,6 +184,16 @@ void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made mo
|
|
183
184
|
return q;
|
184
185
|
}
|
185
186
|
|
187
|
+
void *krelocate(void *km, void *ap, size_t n_bytes)
|
188
|
+
{
|
189
|
+
void *p;
|
190
|
+
if (km == 0 || ap == 0) return ap;
|
191
|
+
p = kmalloc(km, n_bytes);
|
192
|
+
memcpy(p, ap, n_bytes);
|
193
|
+
kfree(km, ap);
|
194
|
+
return p;
|
195
|
+
}
|
196
|
+
|
186
197
|
void km_stat(const void *_km, km_stat_t *s)
|
187
198
|
{
|
188
199
|
kmem_t *km = (kmem_t*)_km;
|
@@ -203,3 +214,11 @@ void km_stat(const void *_km, km_stat_t *s)
|
|
203
214
|
s->largest = s->largest > size? s->largest : size;
|
204
215
|
}
|
205
216
|
}
|
217
|
+
|
218
|
+
void km_stat_print(const void *km)
|
219
|
+
{
|
220
|
+
km_stat_t st;
|
221
|
+
km_stat(km, &st);
|
222
|
+
fprintf(stderr, "[km_stat] cap=%ld, avail=%ld, largest=%ld, n_core=%ld, n_block=%ld\n",
|
223
|
+
st.capacity, st.available, st.largest, st.n_blocks, st.n_cores);
|
224
|
+
}
|
data/ext/minimap2/kalloc.h
CHANGED
@@ -13,6 +13,7 @@ typedef struct {
|
|
13
13
|
|
14
14
|
void *kmalloc(void *km, size_t size);
|
15
15
|
void *krealloc(void *km, void *ptr, size_t size);
|
16
|
+
void *krelocate(void *km, void *ap, size_t n_bytes);
|
16
17
|
void *kcalloc(void *km, size_t count, size_t size);
|
17
18
|
void kfree(void *km, void *ptr);
|
18
19
|
|
@@ -20,11 +21,21 @@ void *km_init(void);
|
|
20
21
|
void *km_init2(void *km_par, size_t min_core_size);
|
21
22
|
void km_destroy(void *km);
|
22
23
|
void km_stat(const void *_km, km_stat_t *s);
|
24
|
+
void km_stat_print(const void *km);
|
23
25
|
|
24
26
|
#ifdef __cplusplus
|
25
27
|
}
|
26
28
|
#endif
|
27
29
|
|
30
|
+
#define Kmalloc(km, type, cnt) ((type*)kmalloc((km), (cnt) * sizeof(type)))
|
31
|
+
#define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
|
32
|
+
#define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
|
33
|
+
|
34
|
+
#define Kexpand(km, type, a, m) do { \
|
35
|
+
(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
|
36
|
+
(a) = Krealloc(km, type, (a), (m)); \
|
37
|
+
} while (0)
|
38
|
+
|
28
39
|
#define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr))))
|
29
40
|
#define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr))))
|
30
41
|
#define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr))))
|
@@ -50,7 +61,7 @@ void km_stat(const void *_km, km_stat_t *s);
|
|
50
61
|
} kmp_##name##_t; \
|
51
62
|
SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
|
52
63
|
kmp_##name##_t *mp; \
|
53
|
-
|
64
|
+
mp = Kcalloc(km, kmp_##name##_t, 1); \
|
54
65
|
mp->km = km; \
|
55
66
|
return mp; \
|
56
67
|
} \
|
@@ -66,7 +77,7 @@ void km_stat(const void *_km, km_stat_t *s);
|
|
66
77
|
} \
|
67
78
|
SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
|
68
79
|
--mp->cnt; \
|
69
|
-
if (mp->n == mp->max)
|
80
|
+
if (mp->n == mp->max) Kexpand(mp->km, kmptype_t*, mp->buf, mp->max); \
|
70
81
|
mp->buf[mp->n++] = p; \
|
71
82
|
}
|
72
83
|
|
data/ext/minimap2/ksw2.h
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
#define KSW_EZ_SPLICE_FOR 0x100
|
16
16
|
#define KSW_EZ_SPLICE_REV 0x200
|
17
17
|
#define KSW_EZ_SPLICE_FLANK 0x400
|
18
|
+
#define KSW_EZ_SPLICE_CMPLX 0x800
|
18
19
|
|
19
20
|
// The subset of CIGAR operators used by ksw code.
|
20
21
|
// Use MM_CIGAR_* from minimap.h if you need the full list.
|
@@ -358,7 +358,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
358
358
|
} else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
|
359
359
|
// update ez
|
360
360
|
if (en0 == tlen - 1 && H[en0] > ez->mte)
|
361
|
-
ez->mte = H[en0], ez->mte_q = r -
|
361
|
+
ez->mte = H[en0], ez->mte_q = r - en0;
|
362
362
|
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
|
363
363
|
ez->mqe = H[st0], ez->mqe_t = st0;
|
364
364
|
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e2)) break;
|
@@ -71,6 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
71
71
|
|
72
72
|
ksw_reset_extz(ez);
|
73
73
|
if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return;
|
74
|
+
assert((flag & KSW_EZ_SPLICE_FOR) == 0 || (flag & KSW_EZ_SPLICE_REV) == 0); // can't be both set
|
74
75
|
|
75
76
|
zero_ = _mm_set1_epi8(0);
|
76
77
|
q_ = _mm_set1_epi8(q);
|
@@ -118,55 +119,93 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
118
119
|
|
119
120
|
// set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding!
|
120
121
|
if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) {
|
121
|
-
int
|
122
|
-
|
123
|
-
|
122
|
+
const int sp0[4] = { 8, 15, 21, 30 };
|
123
|
+
int sp[4];
|
124
|
+
if (flag & KSW_EZ_SPLICE_CMPLX) {
|
125
|
+
for (t = 0; t < 4; ++t)
|
126
|
+
sp[t] = (int)((double)sp0[t] / 3. + .499);
|
127
|
+
} else {
|
128
|
+
sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0;
|
129
|
+
sp[1] = sp[2] = sp[3] = noncan;
|
130
|
+
}
|
131
|
+
memset(donor, -sp[3], tlen_ * 16);
|
132
|
+
memset(acceptor, -sp[3], tlen_ * 16);
|
124
133
|
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
125
134
|
for (t = 0; t < tlen - 4; ++t) {
|
126
|
-
int
|
127
|
-
if (
|
128
|
-
|
129
|
-
|
130
|
-
|
135
|
+
int z = 3;
|
136
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
137
|
+
if (target[t+1] == 2 && target[t+2] == 3) // |GT.
|
138
|
+
z = target[t+3] == 0 || target[t+3] == 2? -1 : 0; // |GTr or not
|
139
|
+
else if (target[t+1] == 2 && target[t+2] == 1) z = 1; // |GC.
|
140
|
+
else if (target[t+1] == 0 && target[t+2] == 3) z = 2; // |AT.
|
141
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
142
|
+
if (target[t+1] == 1 && target[t+2] == 3) // |CT. (revcomp of .AG|)
|
143
|
+
z = target[t+3] == 0 || target[t+3] == 2? -1 : 0;
|
144
|
+
else if (target[t+1] == 2 && target[t+2] == 3) z = 2; // |GT. (revcomp of .AC|)
|
145
|
+
}
|
146
|
+
((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
|
131
147
|
}
|
132
|
-
if (junc)
|
133
|
-
for (t = 0; t < tlen - 1; ++t)
|
134
|
-
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
|
135
|
-
((int8_t*)donor)[t] += junc_bonus;
|
136
148
|
for (t = 2; t < tlen; ++t) {
|
137
|
-
int
|
138
|
-
if (
|
139
|
-
|
140
|
-
|
141
|
-
|
149
|
+
int z = 3;
|
150
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
151
|
+
if (target[t-1] == 0 && target[t] == 2) // .AG|
|
152
|
+
z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAG| or not
|
153
|
+
else if (target[t-1] == 0 && target[t] == 1) z = 2; // .AC|
|
154
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
155
|
+
if (target[t-1] == 0 && target[t] == 1) // .AC| (revcomp of |GT.)
|
156
|
+
z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAC| or not
|
157
|
+
else if (target[t-1] == 2 && target[t] == 1) z = 1; // .GC| (revcomp of |GC.)
|
158
|
+
else if (target[t-1] == 0 && target[t] == 3) z = 2; // .AT| (revcomp of |AT.)
|
159
|
+
}
|
160
|
+
((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
|
142
161
|
}
|
143
|
-
if (junc)
|
144
|
-
for (t = 0; t < tlen; ++t)
|
145
|
-
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
|
146
|
-
((int8_t*)acceptor)[t] += junc_bonus;
|
147
162
|
} else {
|
148
163
|
for (t = 0; t < tlen - 4; ++t) {
|
149
|
-
int
|
150
|
-
if (
|
151
|
-
|
152
|
-
|
153
|
-
|
164
|
+
int z = 3;
|
165
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
166
|
+
if (target[t+1] == 2 && target[t+2] == 0) // |GA. (rev of .AG|)
|
167
|
+
z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
|
168
|
+
else if (target[t+1] == 1 && target[t+2] == 0) z = 2; // |CA. (rev of .AC|)
|
169
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
170
|
+
if (target[t+1] == 1 && target[t+2] == 0) // |CA. (comp of |GT.)
|
171
|
+
z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
|
172
|
+
else if (target[t+1] == 1 && target[t+2] == 2) z = 1; // |CG. (comp of |GC.)
|
173
|
+
else if (target[t+1] == 3 && target[t+2] == 0) z = 2; // |TA. (comp of |AT.)
|
174
|
+
}
|
175
|
+
((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
|
154
176
|
}
|
155
|
-
if (junc)
|
156
|
-
for (t = 0; t < tlen - 1; ++t)
|
157
|
-
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
|
158
|
-
((int8_t*)donor)[t] += junc_bonus;
|
159
177
|
for (t = 2; t < tlen; ++t) {
|
160
|
-
int
|
161
|
-
if (
|
162
|
-
|
163
|
-
|
164
|
-
|
178
|
+
int z = 3;
|
179
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
180
|
+
if (target[t-1] == 3 && target[t] == 2) // .TG| (rev of |GT.)
|
181
|
+
z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
|
182
|
+
else if (target[t-1] == 1 && target[t] == 2) z = 1; // .CG| (rev of |GC.)
|
183
|
+
else if (target[t-1] == 3 && target[t] == 0) z = 2; // .TA| (rev of |AT.)
|
184
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
185
|
+
if (target[t-1] == 3 && target[t] == 1) // .TC| (comp of .AG|)
|
186
|
+
z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
|
187
|
+
else if (target[t-1] == 3 && target[t] == 2) z = 2; // .TG| (comp of .AC|)
|
188
|
+
}
|
189
|
+
((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
|
165
190
|
}
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
if (junc) {
|
195
|
+
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
196
|
+
for (t = 0; t < tlen - 1; ++t)
|
197
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
|
198
|
+
((int8_t*)donor)[t] += junc_bonus;
|
199
|
+
for (t = 0; t < tlen; ++t)
|
200
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
|
201
|
+
((int8_t*)acceptor)[t] += junc_bonus;
|
202
|
+
} else {
|
203
|
+
for (t = 0; t < tlen - 1; ++t)
|
204
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
|
205
|
+
((int8_t*)donor)[t] += junc_bonus;
|
206
|
+
for (t = 0; t < tlen; ++t)
|
207
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&8)))
|
208
|
+
((int8_t*)acceptor)[t] += junc_bonus;
|
170
209
|
}
|
171
210
|
}
|
172
211
|
|
@@ -376,7 +415,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
376
415
|
} else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
|
377
416
|
// update ez
|
378
417
|
if (en0 == tlen - 1 && H[en0] > ez->mte)
|
379
|
-
ez->mte = H[en0], ez->mte_q = r -
|
418
|
+
ez->mte = H[en0], ez->mte_q = r - en0;
|
380
419
|
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
|
381
420
|
ez->mqe = H[st0], ez->mqe_t = st0;
|
382
421
|
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, 0)) break;
|
@@ -269,7 +269,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
269
269
|
} else H[0] = v8[0] - qe - qe, max_H = H[0], max_t = 0; // special casing r==0
|
270
270
|
// update ez
|
271
271
|
if (en0 == tlen - 1 && H[en0] > ez->mte)
|
272
|
-
ez->mte = H[en0], ez->mte_q = r -
|
272
|
+
ez->mte = H[en0], ez->mte_q = r - en0;
|
273
273
|
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
|
274
274
|
ez->mqe = H[st0], ez->mqe_t = st0;
|
275
275
|
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e)) break;
|
data/ext/minimap2/lchain.c
CHANGED
@@ -35,7 +35,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
|
|
35
35
|
for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
|
36
36
|
if (f[i] >= min_sc) ++n_z;
|
37
37
|
if (n_z == 0) return 0;
|
38
|
-
|
38
|
+
z = Kmalloc(km, mm128_t, n_z);
|
39
39
|
for (i = 0, k = 0; i < n; ++i) // populate z[]
|
40
40
|
if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
|
41
41
|
radix_sort_128x(z, z + n_z);
|
@@ -54,7 +54,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
|
|
54
54
|
else n_v = n_v0;
|
55
55
|
}
|
56
56
|
}
|
57
|
-
|
57
|
+
u = Kmalloc(km, uint64_t, n_u);
|
58
58
|
memset(t, 0, n * 4);
|
59
59
|
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
|
60
60
|
if (t[z[k].y] == 0) {
|
@@ -82,7 +82,7 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
|
|
82
82
|
int64_t i, j, k;
|
83
83
|
|
84
84
|
// write the result to b[]
|
85
|
-
|
85
|
+
b = Kmalloc(km, mm128_t, n_v);
|
86
86
|
for (i = 0, k = 0; i < n_u; ++i) {
|
87
87
|
int32_t k0 = k, ni = (int32_t)u[i];
|
88
88
|
for (j = 0; j < ni; ++j)
|
@@ -91,13 +91,13 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
|
|
91
91
|
kfree(km, v);
|
92
92
|
|
93
93
|
// sort u[] and a[] by the target position, such that adjacent chains may be joined
|
94
|
-
|
94
|
+
w = Kmalloc(km, mm128_t, n_u);
|
95
95
|
for (i = k = 0; i < n_u; ++i) {
|
96
96
|
w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
|
97
97
|
k += (int32_t)u[i];
|
98
98
|
}
|
99
99
|
radix_sort_128x(w, w + n_u);
|
100
|
-
|
100
|
+
u2 = Kmalloc(km, uint64_t, n_u);
|
101
101
|
for (i = k = 0; i < n_u; ++i) {
|
102
102
|
int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
|
103
103
|
u2[i] = u[j];
|
@@ -138,7 +138,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
|
|
138
138
|
}
|
139
139
|
|
140
140
|
/* Input:
|
141
|
-
* a[].x:
|
141
|
+
* a[].x: rev<<63 | tid<<32 | tpos
|
142
142
|
* a[].y: flags<<40 | q_span<<32 | q_pos
|
143
143
|
* Output:
|
144
144
|
* n_u: #chains
|
@@ -160,10 +160,10 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
160
160
|
if (max_dist_x < bw) max_dist_x = bw;
|
161
161
|
if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
|
162
162
|
if (is_cdna) max_drop = INT32_MAX;
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
163
|
+
p = Kmalloc(km, int64_t, n);
|
164
|
+
f = Kmalloc(km, int32_t, n);
|
165
|
+
v = Kmalloc(km, int32_t, n);
|
166
|
+
t = Kcalloc(km, int32_t, n);
|
167
167
|
|
168
168
|
// fill the score and backtrack arrays
|
169
169
|
for (i = 0, max_ii = -1; i < n; ++i) {
|
@@ -251,7 +251,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
251
251
|
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
252
252
|
{
|
253
253
|
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
|
254
|
-
int64_t *p, i, i0, st = 0, st_inner = 0
|
254
|
+
int64_t *p, i, i0, st = 0, st_inner = 0;
|
255
255
|
uint64_t *u;
|
256
256
|
lc_elem_t *root = 0, *root_inner = 0;
|
257
257
|
void *mem_mp = 0;
|
@@ -264,10 +264,10 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
264
264
|
}
|
265
265
|
if (max_dist < bw) max_dist = bw;
|
266
266
|
if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
267
|
+
p = Kmalloc(km, int64_t, n);
|
268
|
+
f = Kmalloc(km, int32_t, n);
|
269
|
+
t = Kcalloc(km, int32_t, n);
|
270
|
+
v = Kmalloc(km, int32_t, n);
|
271
271
|
mem_mp = km_init2(km, 0x10000);
|
272
272
|
mp = kmp_init_rmq(mem_mp);
|
273
273
|
|
@@ -345,7 +345,6 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
345
345
|
}
|
346
346
|
if (!krmq_itr_prev(lc_elem, &itr)) break;
|
347
347
|
}
|
348
|
-
n_iter += n_rmq_iter;
|
349
348
|
}
|
350
349
|
}
|
351
350
|
}
|
data/ext/minimap2/main.c
CHANGED
@@ -7,8 +7,6 @@
|
|
7
7
|
#include "mmpriv.h"
|
8
8
|
#include "ketopt.h"
|
9
9
|
|
10
|
-
#define MM_VERSION "2.24-r1122"
|
11
|
-
|
12
10
|
#ifdef __linux__
|
13
11
|
#include <sys/resource.h>
|
14
12
|
#include <sys/time.h>
|
@@ -78,6 +76,7 @@ static ko_longopt_t long_options[] = {
|
|
78
76
|
{ "chain-skip-scale",ko_required_argument,351 },
|
79
77
|
{ "print-chains", ko_no_argument, 352 },
|
80
78
|
{ "no-hash-name", ko_no_argument, 353 },
|
79
|
+
{ "secondary-seq", ko_no_argument, 354 },
|
81
80
|
{ "help", ko_no_argument, 'h' },
|
82
81
|
{ "max-intron-len", ko_required_argument, 'G' },
|
83
82
|
{ "version", ko_no_argument, 'V' },
|
@@ -121,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
|
|
121
120
|
|
122
121
|
int main(int argc, char *argv[])
|
123
122
|
{
|
124
|
-
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
|
123
|
+
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
|
125
124
|
ketopt_t o = KETOPT_INIT;
|
126
125
|
mm_mapopt_t opt;
|
127
126
|
mm_idxopt_t ipt;
|
@@ -187,7 +186,12 @@ int main(int argc, char *argv[])
|
|
187
186
|
else if (c == 'R') rg = o.arg;
|
188
187
|
else if (c == 'h') fp_help = stdout;
|
189
188
|
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
190
|
-
else if (c == '
|
189
|
+
else if (c == 'J') {
|
190
|
+
int t;
|
191
|
+
t = atoi(o.arg);
|
192
|
+
if (t == 0) opt.flag |= MM_F_SPLICE_OLD;
|
193
|
+
else if (t == 1) opt.flag &= ~MM_F_SPLICE_OLD;
|
194
|
+
} else if (c == 'o') {
|
191
195
|
if (strcmp(o.arg, "-") != 0) {
|
192
196
|
if (freopen(o.arg, "wb", stdout) == NULL) {
|
193
197
|
fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
|
@@ -237,6 +241,7 @@ int main(int argc, char *argv[])
|
|
237
241
|
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
|
238
242
|
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
|
239
243
|
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
|
244
|
+
else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
|
240
245
|
else if (c == 330) {
|
241
246
|
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
242
247
|
} else if (c == 314) { // --frag
|
@@ -261,7 +266,8 @@ int main(int argc, char *argv[])
|
|
261
266
|
} else if (c == 326) { // --dual
|
262
267
|
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
263
268
|
} else if (c == 347) { // --rmq
|
264
|
-
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
269
|
+
if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
270
|
+
else opt.flag |= MM_F_RMQ;
|
265
271
|
} else if (c == 'S') {
|
266
272
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
267
273
|
if (mm_verbose >= 2)
|
@@ -322,7 +328,7 @@ int main(int argc, char *argv[])
|
|
322
328
|
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
|
323
329
|
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
|
324
330
|
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
|
325
|
-
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [
|
331
|
+
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [8G]\n");
|
326
332
|
fprintf(fp_help, " -d FILE dump index to FILE []\n");
|
327
333
|
fprintf(fp_help, " Mapping:\n");
|
328
334
|
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
@@ -344,6 +350,7 @@ int main(int argc, char *argv[])
|
|
344
350
|
fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
|
345
351
|
fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
346
352
|
fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
353
|
+
fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
|
347
354
|
fprintf(fp_help, " Input/Output:\n");
|
348
355
|
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
349
356
|
fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
|