minimap2 0.2.24.6 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/ext/minimap2.patch +3 -3
- data/lib/minimap2/aligner.rb +4 -0
- data/lib/minimap2/ffi/constants.rb +90 -88
- data/lib/minimap2/version.rb +2 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f58943e39da8f734af4ee4d16b5d335825ee7cf94cc45d9cf88a41d7adfe6afe
|
4
|
+
data.tar.gz: 2cb372b02bcb2cc763fb3a9fcd82219c653e61ac7f28a039e0a4d03b054aab35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52c827db192ac69bf99cfa1e8ff701a4d87965b7f443a82cdd10d109eebf31a6ac2ca966cc5b4a9d3bdcf1fdb4180f51a0f1c78f1bc6c5e937419ad3a78fa5d3
|
7
|
+
data.tar.gz: 95fd63410ffc2aa088877c9545566aa32326899658ff5fb9ec92ac9cc1c8d89c28c6a849e50bd50d5fce4bc33c6af8b3f40a65c0270f7a6ccec181375c549fb1
|
data/README.md
CHANGED
@@ -175,9 +175,8 @@ ruby-minimap2 is a library under development and there are many points to be imp
|
|
175
175
|
|
176
176
|
Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
|
177
177
|
|
178
|
-
|
179
|
-
|
180
|
-
If so, please feel free to contact me @kojix2.
|
178
|
+
Many OSS projects become abandoned because only the founder has commit rights to the original repository.
|
179
|
+
If you need commit rights to ruby-minimap2 repository or want to get admin rights and take over the project, please feel free to contact me @kojix2.
|
181
180
|
|
182
181
|
## License
|
183
182
|
|
data/ext/minimap2/Makefile
CHANGED
@@ -8,6 +8,10 @@ PROG= minimap2
|
|
8
8
|
PROG_EXTRA= sdust minimap2-lite
|
9
9
|
LIBS= -lm -lz -lpthread
|
10
10
|
|
11
|
+
ifneq ($(aarch64),)
|
12
|
+
arm_neon=1
|
13
|
+
endif
|
14
|
+
|
11
15
|
ifeq ($(arm_neon),) # if arm_neon is not defined
|
12
16
|
ifeq ($(sse2only),) # if sse2only is not defined
|
13
17
|
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
|
@@ -26,12 +30,12 @@ endif
|
|
26
30
|
|
27
31
|
ifneq ($(asan),)
|
28
32
|
CFLAGS+=-fsanitize=address
|
29
|
-
LIBS+=-fsanitize=address
|
33
|
+
LIBS+=-fsanitize=address -ldl
|
30
34
|
endif
|
31
35
|
|
32
36
|
ifneq ($(tsan),)
|
33
37
|
CFLAGS+=-fsanitize=thread
|
34
|
-
LIBS+=-fsanitize=thread
|
38
|
+
LIBS+=-fsanitize=thread -ldl
|
35
39
|
endif
|
36
40
|
|
37
41
|
.PHONY:all extra clean depend
|
data/ext/minimap2/NEWS.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1
|
+
Release 2.25-r1173 (25 April 2023)
|
2
|
+
----------------------------------
|
3
|
+
|
4
|
+
Notable changes:
|
5
|
+
|
6
|
+
* Improvement: use the miniprot splice model for RNA-seq alignment by default.
|
7
|
+
This model considers non-GT-AG splice sites and leads to slightly higher
|
8
|
+
(<0.1%) accuracy and sensitivity on real human data.
|
9
|
+
|
10
|
+
* Change: increased the default `-I` to `8G` such that minimap2 would create a
|
11
|
+
uni-part index for a pair of mammalian genomes. This change may increase the
|
12
|
+
memory for all-vs-all read overlap alignment given large datasets.
|
13
|
+
|
14
|
+
* New feature: output the sequences in secondary alignments with option
|
15
|
+
`--secondary-seq` (#687).
|
16
|
+
|
17
|
+
* Bugfix: --rmq was not parsed correctly (#1010)
|
18
|
+
|
19
|
+
* Bugfix: possibly incorrect coordinate when applying end bonus to the target
|
20
|
+
sequence (#1025). This is a ksw2 bug. It does not affect minimap2 as
|
21
|
+
minimap2 is not using the affected feature.
|
22
|
+
|
23
|
+
* Improvement: incorporated several changes for better compatibility with
|
24
|
+
Windows (#1051) and for minimap2 integration at Oxford Nanopore Technologies
|
25
|
+
(#1048 and #1033).
|
26
|
+
|
27
|
+
* Improvement: output the HD-line in SAM output (#1019).
|
28
|
+
|
29
|
+
* Improvement: check minimap2 index file in mappy to prevent segmentation
|
30
|
+
fault for certain indices (#1008).
|
31
|
+
|
32
|
+
For genomic sequences, minimap2 should give identical output to v2.24.
|
33
|
+
Long-read RNA-seq alignment may occasionally differ from previous versions.
|
34
|
+
|
35
|
+
(2.25: 25 April 2023, r1173)
|
36
|
+
|
37
|
+
|
38
|
+
|
1
39
|
Release 2.24-r1122 (26 December 2021)
|
2
40
|
-------------------------------------
|
3
41
|
|
data/ext/minimap2/README.md
CHANGED
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
|
|
74
74
|
Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
|
75
75
|
the [release page][release] with:
|
76
76
|
```sh
|
77
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
78
|
-
./minimap2-2.
|
77
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar -jxvf -
|
78
|
+
./minimap2-2.25_x64-linux/minimap2
|
79
79
|
```
|
80
80
|
If you want to compile from the source, you need to have a C compiler, GNU make
|
81
81
|
and zlib development files installed. Then type `make` in the source code
|
@@ -350,6 +350,11 @@ If you use minimap2 in your work, please cite:
|
|
350
350
|
> Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
|
351
351
|
> *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
|
352
352
|
|
353
|
+
and/or:
|
354
|
+
|
355
|
+
> Li, H. (2021). New strategies to improve minimap2 alignment accuracy.
|
356
|
+
> *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2]
|
357
|
+
|
353
358
|
## <a name="dguide"></a>Developers' Guide
|
354
359
|
|
355
360
|
Minimap2 is not only a command line tool, but also a programming library.
|
@@ -399,5 +404,6 @@ mappy` or [from BioConda][mappyconda] via `conda install -c bioconda mappy`.
|
|
399
404
|
[manpage]: https://lh3.github.io/minimap2/minimap2.html
|
400
405
|
[manpage-cs]: https://lh3.github.io/minimap2/minimap2.html#10
|
401
406
|
[doi]: https://doi.org/10.1093/bioinformatics/bty191
|
402
|
-
[
|
407
|
+
[doi2]: https://doi.org/10.1093/bioinformatics/btab705
|
408
|
+
[simde]: https://github.com/nemequ/simde
|
403
409
|
[unimap]: https://github.com/lh3/unimap
|
data/ext/minimap2/align.c
CHANGED
@@ -326,9 +326,11 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
|
|
326
326
|
if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) {
|
327
327
|
ksw_reset_extz(ez);
|
328
328
|
ez->zdropped = 1;
|
329
|
-
} else if (opt->flag & MM_F_SPLICE)
|
330
|
-
|
331
|
-
|
329
|
+
} else if (opt->flag & MM_F_SPLICE) {
|
330
|
+
int flag_tmp = flag;
|
331
|
+
if (!(opt->flag & MM_F_SPLICE_OLD)) flag_tmp |= KSW_EZ_SPLICE_CMPLX;
|
332
|
+
ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag_tmp, junc, ez);
|
333
|
+
} else if (opt->q == opt->q2 && opt->e == opt->e2)
|
332
334
|
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
|
333
335
|
else
|
334
336
|
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
|
data/ext/minimap2/cookbook.md
CHANGED
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
|
|
31
31
|
please follow the command lines below:
|
32
32
|
```sh
|
33
33
|
# install minimap2 executables
|
34
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
35
|
-
cp minimap2-2.
|
34
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar jxf -
|
35
|
+
cp minimap2-2.25_x64-linux/{minimap2,k8,paftools.js} . # copy executables
|
36
36
|
export PATH="$PATH:"`pwd` # put the current directory on PATH
|
37
37
|
# download example datasets
|
38
38
|
curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
|
data/ext/minimap2/format.c
CHANGED
@@ -119,6 +119,7 @@ int mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int a
|
|
119
119
|
{
|
120
120
|
kstring_t str = {0,0,0};
|
121
121
|
int ret = 0;
|
122
|
+
mm_sprintf_lite(&str, "@HD\tVN:1.6\tSO:unsorted\tGO:query\n");
|
122
123
|
if (idx) {
|
123
124
|
uint32_t i;
|
124
125
|
for (i = 0; i < idx->n_seq; ++i)
|
@@ -369,14 +370,16 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
|
|
369
370
|
clip_len[0] = r->rev? qlen - r->qe : r->qs;
|
370
371
|
clip_len[1] = r->rev? r->qs : qlen - r->qe;
|
371
372
|
if (in_tag) {
|
372
|
-
int clip_char = (sam_flag&0x800) &&
|
373
|
+
int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
|
374
|
+
!(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4;
|
373
375
|
mm_sprintf_lite(s, "\tCG:B:I");
|
374
376
|
if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
|
375
377
|
for (k = 0; k < r->p->n_cigar; ++k)
|
376
378
|
mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
|
377
379
|
if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
|
378
380
|
} else {
|
379
|
-
int clip_char = (sam_flag&0x800) &&
|
381
|
+
int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
|
382
|
+
!(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S';
|
380
383
|
assert(clip_len[0] < qlen && clip_len[1] < qlen);
|
381
384
|
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
|
382
385
|
for (k = 0; k < r->p->n_cigar; ++k)
|
@@ -451,7 +454,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|
451
454
|
if (cigar_in_tag) {
|
452
455
|
int slen;
|
453
456
|
if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq;
|
454
|
-
else if (flag & 0x100) slen = 0;
|
457
|
+
else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0;
|
455
458
|
else slen = r->qe - r->qs;
|
456
459
|
mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs);
|
457
460
|
} else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag);
|
@@ -492,7 +495,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|
492
495
|
mm_sprintf_lite(s, "\t");
|
493
496
|
if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0);
|
494
497
|
else mm_sprintf_lite(s, "*");
|
495
|
-
} else if (flag & 0x100) {
|
498
|
+
} else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){
|
496
499
|
mm_sprintf_lite(s, "*\t*");
|
497
500
|
} else {
|
498
501
|
sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev);
|
data/ext/minimap2/kalloc.c
CHANGED
@@ -40,7 +40,8 @@ void *km_init2(void *km_par, size_t min_core_size)
|
|
40
40
|
kmem_t *km;
|
41
41
|
km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t));
|
42
42
|
km->par = km_par;
|
43
|
-
km->min_core_size = min_core_size > 0? min_core_size :
|
43
|
+
if (km_par) km->min_core_size = min_core_size > 0? min_core_size : ((kmem_t*)km_par)->min_core_size - 2;
|
44
|
+
else km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
|
44
45
|
return (void*)km;
|
45
46
|
}
|
46
47
|
|
@@ -183,6 +184,16 @@ void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made mo
|
|
183
184
|
return q;
|
184
185
|
}
|
185
186
|
|
187
|
+
void *krelocate(void *km, void *ap, size_t n_bytes)
|
188
|
+
{
|
189
|
+
void *p;
|
190
|
+
if (km == 0 || ap == 0) return ap;
|
191
|
+
p = kmalloc(km, n_bytes);
|
192
|
+
memcpy(p, ap, n_bytes);
|
193
|
+
kfree(km, ap);
|
194
|
+
return p;
|
195
|
+
}
|
196
|
+
|
186
197
|
void km_stat(const void *_km, km_stat_t *s)
|
187
198
|
{
|
188
199
|
kmem_t *km = (kmem_t*)_km;
|
@@ -203,3 +214,11 @@ void km_stat(const void *_km, km_stat_t *s)
|
|
203
214
|
s->largest = s->largest > size? s->largest : size;
|
204
215
|
}
|
205
216
|
}
|
217
|
+
|
218
|
+
void km_stat_print(const void *km)
|
219
|
+
{
|
220
|
+
km_stat_t st;
|
221
|
+
km_stat(km, &st);
|
222
|
+
fprintf(stderr, "[km_stat] cap=%ld, avail=%ld, largest=%ld, n_core=%ld, n_block=%ld\n",
|
223
|
+
st.capacity, st.available, st.largest, st.n_blocks, st.n_cores);
|
224
|
+
}
|
data/ext/minimap2/kalloc.h
CHANGED
@@ -13,6 +13,7 @@ typedef struct {
|
|
13
13
|
|
14
14
|
void *kmalloc(void *km, size_t size);
|
15
15
|
void *krealloc(void *km, void *ptr, size_t size);
|
16
|
+
void *krelocate(void *km, void *ap, size_t n_bytes);
|
16
17
|
void *kcalloc(void *km, size_t count, size_t size);
|
17
18
|
void kfree(void *km, void *ptr);
|
18
19
|
|
@@ -20,11 +21,21 @@ void *km_init(void);
|
|
20
21
|
void *km_init2(void *km_par, size_t min_core_size);
|
21
22
|
void km_destroy(void *km);
|
22
23
|
void km_stat(const void *_km, km_stat_t *s);
|
24
|
+
void km_stat_print(const void *km);
|
23
25
|
|
24
26
|
#ifdef __cplusplus
|
25
27
|
}
|
26
28
|
#endif
|
27
29
|
|
30
|
+
#define Kmalloc(km, type, cnt) ((type*)kmalloc((km), (cnt) * sizeof(type)))
|
31
|
+
#define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
|
32
|
+
#define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
|
33
|
+
|
34
|
+
#define Kexpand(km, type, a, m) do { \
|
35
|
+
(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
|
36
|
+
(a) = Krealloc(km, type, (a), (m)); \
|
37
|
+
} while (0)
|
38
|
+
|
28
39
|
#define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr))))
|
29
40
|
#define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr))))
|
30
41
|
#define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr))))
|
@@ -50,7 +61,7 @@ void km_stat(const void *_km, km_stat_t *s);
|
|
50
61
|
} kmp_##name##_t; \
|
51
62
|
SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
|
52
63
|
kmp_##name##_t *mp; \
|
53
|
-
|
64
|
+
mp = Kcalloc(km, kmp_##name##_t, 1); \
|
54
65
|
mp->km = km; \
|
55
66
|
return mp; \
|
56
67
|
} \
|
@@ -66,7 +77,7 @@ void km_stat(const void *_km, km_stat_t *s);
|
|
66
77
|
} \
|
67
78
|
SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
|
68
79
|
--mp->cnt; \
|
69
|
-
if (mp->n == mp->max)
|
80
|
+
if (mp->n == mp->max) Kexpand(mp->km, kmptype_t*, mp->buf, mp->max); \
|
70
81
|
mp->buf[mp->n++] = p; \
|
71
82
|
}
|
72
83
|
|
data/ext/minimap2/ksw2.h
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
#define KSW_EZ_SPLICE_FOR 0x100
|
16
16
|
#define KSW_EZ_SPLICE_REV 0x200
|
17
17
|
#define KSW_EZ_SPLICE_FLANK 0x400
|
18
|
+
#define KSW_EZ_SPLICE_CMPLX 0x800
|
18
19
|
|
19
20
|
// The subset of CIGAR operators used by ksw code.
|
20
21
|
// Use MM_CIGAR_* from minimap.h if you need the full list.
|
@@ -358,7 +358,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
358
358
|
} else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
|
359
359
|
// update ez
|
360
360
|
if (en0 == tlen - 1 && H[en0] > ez->mte)
|
361
|
-
ez->mte = H[en0], ez->mte_q = r -
|
361
|
+
ez->mte = H[en0], ez->mte_q = r - en0;
|
362
362
|
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
|
363
363
|
ez->mqe = H[st0], ez->mqe_t = st0;
|
364
364
|
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e2)) break;
|
@@ -71,6 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
71
71
|
|
72
72
|
ksw_reset_extz(ez);
|
73
73
|
if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return;
|
74
|
+
assert((flag & KSW_EZ_SPLICE_FOR) == 0 || (flag & KSW_EZ_SPLICE_REV) == 0); // can't be both set
|
74
75
|
|
75
76
|
zero_ = _mm_set1_epi8(0);
|
76
77
|
q_ = _mm_set1_epi8(q);
|
@@ -118,55 +119,93 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
118
119
|
|
119
120
|
// set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding!
|
120
121
|
if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) {
|
121
|
-
int
|
122
|
-
|
123
|
-
|
122
|
+
const int sp0[4] = { 8, 15, 21, 30 };
|
123
|
+
int sp[4];
|
124
|
+
if (flag & KSW_EZ_SPLICE_CMPLX) {
|
125
|
+
for (t = 0; t < 4; ++t)
|
126
|
+
sp[t] = (int)((double)sp0[t] / 3. + .499);
|
127
|
+
} else {
|
128
|
+
sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0;
|
129
|
+
sp[1] = sp[2] = sp[3] = noncan;
|
130
|
+
}
|
131
|
+
memset(donor, -sp[3], tlen_ * 16);
|
132
|
+
memset(acceptor, -sp[3], tlen_ * 16);
|
124
133
|
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
125
134
|
for (t = 0; t < tlen - 4; ++t) {
|
126
|
-
int
|
127
|
-
if (
|
128
|
-
|
129
|
-
|
130
|
-
|
135
|
+
int z = 3;
|
136
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
137
|
+
if (target[t+1] == 2 && target[t+2] == 3) // |GT.
|
138
|
+
z = target[t+3] == 0 || target[t+3] == 2? -1 : 0; // |GTr or not
|
139
|
+
else if (target[t+1] == 2 && target[t+2] == 1) z = 1; // |GC.
|
140
|
+
else if (target[t+1] == 0 && target[t+2] == 3) z = 2; // |AT.
|
141
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
142
|
+
if (target[t+1] == 1 && target[t+2] == 3) // |CT. (revcomp of .AG|)
|
143
|
+
z = target[t+3] == 0 || target[t+3] == 2? -1 : 0;
|
144
|
+
else if (target[t+1] == 2 && target[t+2] == 3) z = 2; // |GT. (revcomp of .AC|)
|
145
|
+
}
|
146
|
+
((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
|
131
147
|
}
|
132
|
-
if (junc)
|
133
|
-
for (t = 0; t < tlen - 1; ++t)
|
134
|
-
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
|
135
|
-
((int8_t*)donor)[t] += junc_bonus;
|
136
148
|
for (t = 2; t < tlen; ++t) {
|
137
|
-
int
|
138
|
-
if (
|
139
|
-
|
140
|
-
|
141
|
-
|
149
|
+
int z = 3;
|
150
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
151
|
+
if (target[t-1] == 0 && target[t] == 2) // .AG|
|
152
|
+
z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAG| or not
|
153
|
+
else if (target[t-1] == 0 && target[t] == 1) z = 2; // .AC|
|
154
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
155
|
+
if (target[t-1] == 0 && target[t] == 1) // .AC| (revcomp of |GT.)
|
156
|
+
z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAC| or not
|
157
|
+
else if (target[t-1] == 2 && target[t] == 1) z = 1; // .GC| (revcomp of |GC.)
|
158
|
+
else if (target[t-1] == 0 && target[t] == 3) z = 2; // .AT| (revcomp of |AT.)
|
159
|
+
}
|
160
|
+
((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
|
142
161
|
}
|
143
|
-
if (junc)
|
144
|
-
for (t = 0; t < tlen; ++t)
|
145
|
-
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
|
146
|
-
((int8_t*)acceptor)[t] += junc_bonus;
|
147
162
|
} else {
|
148
163
|
for (t = 0; t < tlen - 4; ++t) {
|
149
|
-
int
|
150
|
-
if (
|
151
|
-
|
152
|
-
|
153
|
-
|
164
|
+
int z = 3;
|
165
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
166
|
+
if (target[t+1] == 2 && target[t+2] == 0) // |GA. (rev of .AG|)
|
167
|
+
z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
|
168
|
+
else if (target[t+1] == 1 && target[t+2] == 0) z = 2; // |CA. (rev of .AC|)
|
169
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
170
|
+
if (target[t+1] == 1 && target[t+2] == 0) // |CA. (comp of |GT.)
|
171
|
+
z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
|
172
|
+
else if (target[t+1] == 1 && target[t+2] == 2) z = 1; // |CG. (comp of |GC.)
|
173
|
+
else if (target[t+1] == 3 && target[t+2] == 0) z = 2; // |TA. (comp of |AT.)
|
174
|
+
}
|
175
|
+
((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
|
154
176
|
}
|
155
|
-
if (junc)
|
156
|
-
for (t = 0; t < tlen - 1; ++t)
|
157
|
-
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
|
158
|
-
((int8_t*)donor)[t] += junc_bonus;
|
159
177
|
for (t = 2; t < tlen; ++t) {
|
160
|
-
int
|
161
|
-
if (
|
162
|
-
|
163
|
-
|
164
|
-
|
178
|
+
int z = 3;
|
179
|
+
if (flag & KSW_EZ_SPLICE_FOR) {
|
180
|
+
if (target[t-1] == 3 && target[t] == 2) // .TG| (rev of |GT.)
|
181
|
+
z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
|
182
|
+
else if (target[t-1] == 1 && target[t] == 2) z = 1; // .CG| (rev of |GC.)
|
183
|
+
else if (target[t-1] == 3 && target[t] == 0) z = 2; // .TA| (rev of |AT.)
|
184
|
+
} else if (flag & KSW_EZ_SPLICE_REV) {
|
185
|
+
if (target[t-1] == 3 && target[t] == 1) // .TC| (comp of .AG|)
|
186
|
+
z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
|
187
|
+
else if (target[t-1] == 3 && target[t] == 2) z = 2; // .TG| (comp of .AC|)
|
188
|
+
}
|
189
|
+
((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
|
165
190
|
}
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
if (junc) {
|
195
|
+
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
196
|
+
for (t = 0; t < tlen - 1; ++t)
|
197
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
|
198
|
+
((int8_t*)donor)[t] += junc_bonus;
|
199
|
+
for (t = 0; t < tlen; ++t)
|
200
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
|
201
|
+
((int8_t*)acceptor)[t] += junc_bonus;
|
202
|
+
} else {
|
203
|
+
for (t = 0; t < tlen - 1; ++t)
|
204
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
|
205
|
+
((int8_t*)donor)[t] += junc_bonus;
|
206
|
+
for (t = 0; t < tlen; ++t)
|
207
|
+
if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&8)))
|
208
|
+
((int8_t*)acceptor)[t] += junc_bonus;
|
170
209
|
}
|
171
210
|
}
|
172
211
|
|
@@ -376,7 +415,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
376
415
|
} else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
|
377
416
|
// update ez
|
378
417
|
if (en0 == tlen - 1 && H[en0] > ez->mte)
|
379
|
-
ez->mte = H[en0], ez->mte_q = r -
|
418
|
+
ez->mte = H[en0], ez->mte_q = r - en0;
|
380
419
|
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
|
381
420
|
ez->mqe = H[st0], ez->mqe_t = st0;
|
382
421
|
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, 0)) break;
|
@@ -269,7 +269,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|
269
269
|
} else H[0] = v8[0] - qe - qe, max_H = H[0], max_t = 0; // special casing r==0
|
270
270
|
// update ez
|
271
271
|
if (en0 == tlen - 1 && H[en0] > ez->mte)
|
272
|
-
ez->mte = H[en0], ez->mte_q = r -
|
272
|
+
ez->mte = H[en0], ez->mte_q = r - en0;
|
273
273
|
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
|
274
274
|
ez->mqe = H[st0], ez->mqe_t = st0;
|
275
275
|
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e)) break;
|
data/ext/minimap2/lchain.c
CHANGED
@@ -35,7 +35,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
|
|
35
35
|
for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
|
36
36
|
if (f[i] >= min_sc) ++n_z;
|
37
37
|
if (n_z == 0) return 0;
|
38
|
-
|
38
|
+
z = Kmalloc(km, mm128_t, n_z);
|
39
39
|
for (i = 0, k = 0; i < n; ++i) // populate z[]
|
40
40
|
if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
|
41
41
|
radix_sort_128x(z, z + n_z);
|
@@ -54,7 +54,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
|
|
54
54
|
else n_v = n_v0;
|
55
55
|
}
|
56
56
|
}
|
57
|
-
|
57
|
+
u = Kmalloc(km, uint64_t, n_u);
|
58
58
|
memset(t, 0, n * 4);
|
59
59
|
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
|
60
60
|
if (t[z[k].y] == 0) {
|
@@ -82,7 +82,7 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
|
|
82
82
|
int64_t i, j, k;
|
83
83
|
|
84
84
|
// write the result to b[]
|
85
|
-
|
85
|
+
b = Kmalloc(km, mm128_t, n_v);
|
86
86
|
for (i = 0, k = 0; i < n_u; ++i) {
|
87
87
|
int32_t k0 = k, ni = (int32_t)u[i];
|
88
88
|
for (j = 0; j < ni; ++j)
|
@@ -91,13 +91,13 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
|
|
91
91
|
kfree(km, v);
|
92
92
|
|
93
93
|
// sort u[] and a[] by the target position, such that adjacent chains may be joined
|
94
|
-
|
94
|
+
w = Kmalloc(km, mm128_t, n_u);
|
95
95
|
for (i = k = 0; i < n_u; ++i) {
|
96
96
|
w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
|
97
97
|
k += (int32_t)u[i];
|
98
98
|
}
|
99
99
|
radix_sort_128x(w, w + n_u);
|
100
|
-
|
100
|
+
u2 = Kmalloc(km, uint64_t, n_u);
|
101
101
|
for (i = k = 0; i < n_u; ++i) {
|
102
102
|
int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
|
103
103
|
u2[i] = u[j];
|
@@ -138,7 +138,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
|
|
138
138
|
}
|
139
139
|
|
140
140
|
/* Input:
|
141
|
-
* a[].x:
|
141
|
+
* a[].x: rev<<63 | tid<<32 | tpos
|
142
142
|
* a[].y: flags<<40 | q_span<<32 | q_pos
|
143
143
|
* Output:
|
144
144
|
* n_u: #chains
|
@@ -160,10 +160,10 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
160
160
|
if (max_dist_x < bw) max_dist_x = bw;
|
161
161
|
if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
|
162
162
|
if (is_cdna) max_drop = INT32_MAX;
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
163
|
+
p = Kmalloc(km, int64_t, n);
|
164
|
+
f = Kmalloc(km, int32_t, n);
|
165
|
+
v = Kmalloc(km, int32_t, n);
|
166
|
+
t = Kcalloc(km, int32_t, n);
|
167
167
|
|
168
168
|
// fill the score and backtrack arrays
|
169
169
|
for (i = 0, max_ii = -1; i < n; ++i) {
|
@@ -251,7 +251,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
251
251
|
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
252
252
|
{
|
253
253
|
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
|
254
|
-
int64_t *p, i, i0, st = 0, st_inner = 0
|
254
|
+
int64_t *p, i, i0, st = 0, st_inner = 0;
|
255
255
|
uint64_t *u;
|
256
256
|
lc_elem_t *root = 0, *root_inner = 0;
|
257
257
|
void *mem_mp = 0;
|
@@ -264,10 +264,10 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
264
264
|
}
|
265
265
|
if (max_dist < bw) max_dist = bw;
|
266
266
|
if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
267
|
+
p = Kmalloc(km, int64_t, n);
|
268
|
+
f = Kmalloc(km, int32_t, n);
|
269
|
+
t = Kcalloc(km, int32_t, n);
|
270
|
+
v = Kmalloc(km, int32_t, n);
|
271
271
|
mem_mp = km_init2(km, 0x10000);
|
272
272
|
mp = kmp_init_rmq(mem_mp);
|
273
273
|
|
@@ -345,7 +345,6 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
345
345
|
}
|
346
346
|
if (!krmq_itr_prev(lc_elem, &itr)) break;
|
347
347
|
}
|
348
|
-
n_iter += n_rmq_iter;
|
349
348
|
}
|
350
349
|
}
|
351
350
|
}
|
data/ext/minimap2/main.c
CHANGED
@@ -7,8 +7,6 @@
|
|
7
7
|
#include "mmpriv.h"
|
8
8
|
#include "ketopt.h"
|
9
9
|
|
10
|
-
#define MM_VERSION "2.24-r1122"
|
11
|
-
|
12
10
|
#ifdef __linux__
|
13
11
|
#include <sys/resource.h>
|
14
12
|
#include <sys/time.h>
|
@@ -78,6 +76,7 @@ static ko_longopt_t long_options[] = {
|
|
78
76
|
{ "chain-skip-scale",ko_required_argument,351 },
|
79
77
|
{ "print-chains", ko_no_argument, 352 },
|
80
78
|
{ "no-hash-name", ko_no_argument, 353 },
|
79
|
+
{ "secondary-seq", ko_no_argument, 354 },
|
81
80
|
{ "help", ko_no_argument, 'h' },
|
82
81
|
{ "max-intron-len", ko_required_argument, 'G' },
|
83
82
|
{ "version", ko_no_argument, 'V' },
|
@@ -121,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
|
|
121
120
|
|
122
121
|
int main(int argc, char *argv[])
|
123
122
|
{
|
124
|
-
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
|
123
|
+
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
|
125
124
|
ketopt_t o = KETOPT_INIT;
|
126
125
|
mm_mapopt_t opt;
|
127
126
|
mm_idxopt_t ipt;
|
@@ -187,7 +186,12 @@ int main(int argc, char *argv[])
|
|
187
186
|
else if (c == 'R') rg = o.arg;
|
188
187
|
else if (c == 'h') fp_help = stdout;
|
189
188
|
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
190
|
-
else if (c == '
|
189
|
+
else if (c == 'J') {
|
190
|
+
int t;
|
191
|
+
t = atoi(o.arg);
|
192
|
+
if (t == 0) opt.flag |= MM_F_SPLICE_OLD;
|
193
|
+
else if (t == 1) opt.flag &= ~MM_F_SPLICE_OLD;
|
194
|
+
} else if (c == 'o') {
|
191
195
|
if (strcmp(o.arg, "-") != 0) {
|
192
196
|
if (freopen(o.arg, "wb", stdout) == NULL) {
|
193
197
|
fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
|
@@ -237,6 +241,7 @@ int main(int argc, char *argv[])
|
|
237
241
|
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
|
238
242
|
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
|
239
243
|
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
|
244
|
+
else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
|
240
245
|
else if (c == 330) {
|
241
246
|
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
242
247
|
} else if (c == 314) { // --frag
|
@@ -261,7 +266,8 @@ int main(int argc, char *argv[])
|
|
261
266
|
} else if (c == 326) { // --dual
|
262
267
|
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
263
268
|
} else if (c == 347) { // --rmq
|
264
|
-
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
269
|
+
if (o.arg) yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
270
|
+
else opt.flag |= MM_F_RMQ;
|
265
271
|
} else if (c == 'S') {
|
266
272
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
267
273
|
if (mm_verbose >= 2)
|
@@ -322,7 +328,7 @@ int main(int argc, char *argv[])
|
|
322
328
|
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
|
323
329
|
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
|
324
330
|
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
|
325
|
-
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [
|
331
|
+
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [8G]\n");
|
326
332
|
fprintf(fp_help, " -d FILE dump index to FILE []\n");
|
327
333
|
fprintf(fp_help, " Mapping:\n");
|
328
334
|
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
@@ -344,6 +350,7 @@ int main(int argc, char *argv[])
|
|
344
350
|
fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
|
345
351
|
fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
346
352
|
fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
353
|
+
fprintf(fp_help, " -J INT splice mode. 0: original minimap2 model; 1: miniprot model [1]\n");
|
347
354
|
fprintf(fp_help, " Input/Output:\n");
|
348
355
|
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
349
356
|
fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
|