minimap2 0.2.22.0 → 0.2.24.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +60 -76
- data/ext/Rakefile +55 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +821 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +369 -0
- data/ext/minimap2/main.c +459 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +410 -0
- data/ext/minimap2/minimap2.1 +725 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +132 -0
- data/ext/minimap2/options.c +234 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +20 -16
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +4 -5
- data/lib/minimap2/version.rb +2 -2
- data/lib/minimap2.rb +51 -15
- metadata +97 -79
- data/lib/minimap2/ffi_helper.rb +0 -53
- data/vendor/libminimap2.so +0 -0
data/ext/minimap2/kvec.h
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008, by Attractive Chaos <attractor@live.co.uk>
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/*
|
27
|
+
An example:
|
28
|
+
|
29
|
+
#include "kvec.h"
|
30
|
+
int main() {
|
31
|
+
kvec_t(int) array;
|
32
|
+
kv_init(array);
|
33
|
+
kv_push(int, array, 10); // append
|
34
|
+
kv_a(int, array, 20) = 5; // dynamic
|
35
|
+
kv_A(array, 20) = 4; // static
|
36
|
+
kv_destroy(array);
|
37
|
+
return 0;
|
38
|
+
}
|
39
|
+
*/
|
40
|
+
|
41
|
+
/*
|
42
|
+
2008-09-22 (0.1.0):
|
43
|
+
|
44
|
+
* The initial version.
|
45
|
+
|
46
|
+
*/
|
47
|
+
|
48
|
+
#ifndef AC_KVEC_H
|
49
|
+
#define AC_KVEC_H
|
50
|
+
|
51
|
+
#include <stdlib.h>
|
52
|
+
#include "kalloc.h"
|
53
|
+
|
54
|
+
#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
55
|
+
|
56
|
+
#define kvec_t(type) struct { size_t n, m; type *a; }
|
57
|
+
#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
|
58
|
+
#define kv_destroy(v) free((v).a)
|
59
|
+
#define kv_A(v, i) ((v).a[(i)])
|
60
|
+
#define kv_pop(v) ((v).a[--(v).n])
|
61
|
+
#define kv_size(v) ((v).n)
|
62
|
+
#define kv_max(v) ((v).m)
|
63
|
+
|
64
|
+
#define kv_resize(type, km, v, s) do { \
|
65
|
+
if ((v).m < (s)) { \
|
66
|
+
(v).m = (s); \
|
67
|
+
kv_roundup32((v).m); \
|
68
|
+
(v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \
|
69
|
+
} \
|
70
|
+
} while (0)
|
71
|
+
|
72
|
+
#define kv_copy(type, km, v1, v0) do { \
|
73
|
+
if ((v1).m < (v0).n) kv_resize(type, (km), (v1), (v0).n); \
|
74
|
+
(v1).n = (v0).n; \
|
75
|
+
memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \
|
76
|
+
} while (0) \
|
77
|
+
|
78
|
+
#define kv_push(type, km, v, x) do { \
|
79
|
+
if ((v).n == (v).m) { \
|
80
|
+
(v).m = (v).m? (v).m<<1 : 2; \
|
81
|
+
(v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \
|
82
|
+
} \
|
83
|
+
(v).a[(v).n++] = (x); \
|
84
|
+
} while (0)
|
85
|
+
|
86
|
+
#define kv_pushp(type, km, v, p) do { \
|
87
|
+
if ((v).n == (v).m) { \
|
88
|
+
(v).m = (v).m? (v).m<<1 : 2; \
|
89
|
+
(v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \
|
90
|
+
} \
|
91
|
+
*(p) = &(v).a[(v).n++]; \
|
92
|
+
} while (0)
|
93
|
+
|
94
|
+
#define kv_reverse(type, v, start) do { \
|
95
|
+
if ((v).m > 0 && (v).n > (start)) { \
|
96
|
+
size_t __i, __end = (v).n - (start); \
|
97
|
+
type *__a = (v).a + (start); \
|
98
|
+
for (__i = 0; __i < __end>>1; ++__i) { \
|
99
|
+
type __t = __a[__end - 1 - __i]; \
|
100
|
+
__a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \
|
101
|
+
} \
|
102
|
+
} \
|
103
|
+
} while (0)
|
104
|
+
|
105
|
+
#endif
|
@@ -0,0 +1,369 @@
|
|
1
|
+
#include <stdint.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <assert.h>
|
5
|
+
#include "mmpriv.h"
|
6
|
+
#include "kalloc.h"
|
7
|
+
#include "krmq.h"
|
8
|
+
|
9
|
+
static int64_t mg_chain_bk_end(int32_t max_drop, const mm128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k)
|
10
|
+
{
|
11
|
+
int64_t i = z[k].y, end_i = -1, max_i = i;
|
12
|
+
int32_t max_s = 0;
|
13
|
+
if (i < 0 || t[i] != 0) return i;
|
14
|
+
do {
|
15
|
+
int32_t s;
|
16
|
+
t[i] = 2;
|
17
|
+
end_i = i = p[i];
|
18
|
+
s = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
19
|
+
if (s > max_s) max_s = s, max_i = i;
|
20
|
+
else if (max_s - s > max_drop) break;
|
21
|
+
} while (i >= 0 && t[i] == 0);
|
22
|
+
for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[]
|
23
|
+
t[i] = 0;
|
24
|
+
return max_i;
|
25
|
+
}
|
26
|
+
|
27
|
+
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, int32_t *n_u_, int32_t *n_v_)
|
28
|
+
{
|
29
|
+
mm128_t *z;
|
30
|
+
uint64_t *u;
|
31
|
+
int64_t i, k, n_z, n_v;
|
32
|
+
int32_t n_u;
|
33
|
+
|
34
|
+
*n_u_ = *n_v_ = 0;
|
35
|
+
for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
|
36
|
+
if (f[i] >= min_sc) ++n_z;
|
37
|
+
if (n_z == 0) return 0;
|
38
|
+
KMALLOC(km, z, n_z);
|
39
|
+
for (i = 0, k = 0; i < n; ++i) // populate z[]
|
40
|
+
if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
|
41
|
+
radix_sort_128x(z, z + n_z);
|
42
|
+
|
43
|
+
memset(t, 0, n * 4);
|
44
|
+
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
|
45
|
+
if (t[z[k].y] == 0) {
|
46
|
+
int64_t n_v0 = n_v, end_i;
|
47
|
+
int32_t sc;
|
48
|
+
end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
|
49
|
+
for (i = z[k].y; i != end_i; i = p[i])
|
50
|
+
++n_v, t[i] = 1;
|
51
|
+
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
52
|
+
if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
|
53
|
+
++n_u;
|
54
|
+
else n_v = n_v0;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
KMALLOC(km, u, n_u);
|
58
|
+
memset(t, 0, n * 4);
|
59
|
+
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
|
60
|
+
if (t[z[k].y] == 0) {
|
61
|
+
int64_t n_v0 = n_v, end_i;
|
62
|
+
int32_t sc;
|
63
|
+
end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
|
64
|
+
for (i = z[k].y; i != end_i; i = p[i])
|
65
|
+
v[n_v++] = i, t[i] = 1;
|
66
|
+
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
67
|
+
if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
|
68
|
+
u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
|
69
|
+
else n_v = n_v0;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
kfree(km, z);
|
73
|
+
assert(n_v < INT32_MAX);
|
74
|
+
*n_u_ = n_u, *n_v_ = n_v;
|
75
|
+
return u;
|
76
|
+
}
|
77
|
+
|
78
|
+
static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32_t *v, mm128_t *a)
|
79
|
+
{
|
80
|
+
mm128_t *b, *w;
|
81
|
+
uint64_t *u2;
|
82
|
+
int64_t i, j, k;
|
83
|
+
|
84
|
+
// write the result to b[]
|
85
|
+
KMALLOC(km, b, n_v);
|
86
|
+
for (i = 0, k = 0; i < n_u; ++i) {
|
87
|
+
int32_t k0 = k, ni = (int32_t)u[i];
|
88
|
+
for (j = 0; j < ni; ++j)
|
89
|
+
b[k++] = a[v[k0 + (ni - j - 1)]];
|
90
|
+
}
|
91
|
+
kfree(km, v);
|
92
|
+
|
93
|
+
// sort u[] and a[] by the target position, such that adjacent chains may be joined
|
94
|
+
KMALLOC(km, w, n_u);
|
95
|
+
for (i = k = 0; i < n_u; ++i) {
|
96
|
+
w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
|
97
|
+
k += (int32_t)u[i];
|
98
|
+
}
|
99
|
+
radix_sort_128x(w, w + n_u);
|
100
|
+
KMALLOC(km, u2, n_u);
|
101
|
+
for (i = k = 0; i < n_u; ++i) {
|
102
|
+
int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
|
103
|
+
u2[i] = u[j];
|
104
|
+
memcpy(&a[k], &b[w[i].y>>32], n * sizeof(mm128_t));
|
105
|
+
k += n;
|
106
|
+
}
|
107
|
+
memcpy(u, u2, n_u * 8);
|
108
|
+
memcpy(b, a, k * sizeof(mm128_t)); // write _a_ to _b_ and deallocate _a_ because _a_ is oversized, sometimes a lot
|
109
|
+
kfree(km, a); kfree(km, w); kfree(km, u2);
|
110
|
+
return b;
|
111
|
+
}
|
112
|
+
|
113
|
+
static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t max_dist_x, int32_t max_dist_y, int32_t bw, float chn_pen_gap, float chn_pen_skip, int is_cdna, int n_seg)
|
114
|
+
{
|
115
|
+
int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc;
|
116
|
+
int32_t sidi = (ai->y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT;
|
117
|
+
int32_t sidj = (aj->y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT;
|
118
|
+
if (dq <= 0 || dq > max_dist_x) return INT32_MIN;
|
119
|
+
dr = (int32_t)(ai->x - aj->x);
|
120
|
+
if (sidi == sidj && (dr == 0 || dq > max_dist_y)) return INT32_MIN;
|
121
|
+
dd = dr > dq? dr - dq : dq - dr;
|
122
|
+
if (sidi == sidj && dd > bw) return INT32_MIN;
|
123
|
+
if (n_seg > 1 && !is_cdna && sidi == sidj && dr > max_dist_y) return INT32_MIN;
|
124
|
+
dg = dr < dq? dr : dq;
|
125
|
+
q_span = aj->y>>32&0xff;
|
126
|
+
sc = q_span < dg? q_span : dg;
|
127
|
+
if (dd || dg > q_span) {
|
128
|
+
float lin_pen, log_pen;
|
129
|
+
lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg;
|
130
|
+
log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2
|
131
|
+
if (is_cdna || sidi != sidj) {
|
132
|
+
if (sidi != sidj && dr == 0) ++sc; // possibly due to overlapping paired ends; give a minor bonus
|
133
|
+
else if (dr > dq || sidi != sidj) sc -= (int)(lin_pen < log_pen? lin_pen : log_pen); // deletion or jump between paired ends
|
134
|
+
else sc -= (int)(lin_pen + .5f * log_pen);
|
135
|
+
} else sc -= (int)(lin_pen + .5f * log_pen);
|
136
|
+
}
|
137
|
+
return sc;
|
138
|
+
}
|
139
|
+
|
140
|
+
/* Input:
|
141
|
+
* a[].x: tid<<33 | rev<<32 | tpos
|
142
|
+
* a[].y: flags<<40 | q_span<<32 | q_pos
|
143
|
+
* Output:
|
144
|
+
* n_u: #chains
|
145
|
+
* u[]: score<<32 | #anchors (sum of lower 32 bits of u[] is the returned length of a[])
|
146
|
+
* input a[] is deallocated on return
|
147
|
+
*/
|
148
|
+
mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
149
|
+
int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
150
|
+
{ // TODO: make sure this works when n has more than 32 bits
|
151
|
+
int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
|
152
|
+
int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
|
153
|
+
uint64_t *u;
|
154
|
+
|
155
|
+
if (_u) *_u = 0, *n_u_ = 0;
|
156
|
+
if (n == 0 || a == 0) {
|
157
|
+
kfree(km, a);
|
158
|
+
return 0;
|
159
|
+
}
|
160
|
+
if (max_dist_x < bw) max_dist_x = bw;
|
161
|
+
if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
|
162
|
+
if (is_cdna) max_drop = INT32_MAX;
|
163
|
+
KMALLOC(km, p, n);
|
164
|
+
KMALLOC(km, f, n);
|
165
|
+
KMALLOC(km, v, n);
|
166
|
+
KCALLOC(km, t, n);
|
167
|
+
|
168
|
+
// fill the score and backtrack arrays
|
169
|
+
for (i = 0, max_ii = -1; i < n; ++i) {
|
170
|
+
int64_t max_j = -1, end_j;
|
171
|
+
int32_t max_f = a[i].y>>32&0xff, n_skip = 0;
|
172
|
+
while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist_x)) ++st;
|
173
|
+
if (i - st > max_iter) st = i - max_iter;
|
174
|
+
for (j = i - 1; j >= st; --j) {
|
175
|
+
int32_t sc;
|
176
|
+
sc = comput_sc(&a[i], &a[j], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
|
177
|
+
++n_iter;
|
178
|
+
if (sc == INT32_MIN) continue;
|
179
|
+
sc += f[j];
|
180
|
+
if (sc > max_f) {
|
181
|
+
max_f = sc, max_j = j;
|
182
|
+
if (n_skip > 0) --n_skip;
|
183
|
+
} else if (t[j] == (int32_t)i) {
|
184
|
+
if (++n_skip > max_skip)
|
185
|
+
break;
|
186
|
+
}
|
187
|
+
if (p[j] >= 0) t[p[j]] = i;
|
188
|
+
}
|
189
|
+
end_j = j;
|
190
|
+
if (max_ii < 0 || a[i].x - a[max_ii].x > (int64_t)max_dist_x) {
|
191
|
+
int32_t max = INT32_MIN;
|
192
|
+
max_ii = -1;
|
193
|
+
for (j = i - 1; j >= st; --j)
|
194
|
+
if (max < f[j]) max = f[j], max_ii = j;
|
195
|
+
}
|
196
|
+
if (max_ii >= 0 && max_ii < end_j) {
|
197
|
+
int32_t tmp;
|
198
|
+
tmp = comput_sc(&a[i], &a[max_ii], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
|
199
|
+
if (tmp != INT32_MIN && max_f < tmp + f[max_ii])
|
200
|
+
max_f = tmp + f[max_ii], max_j = max_ii;
|
201
|
+
}
|
202
|
+
f[i] = max_f, p[i] = max_j;
|
203
|
+
v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak
|
204
|
+
if (max_ii < 0 || (a[i].x - a[max_ii].x <= (int64_t)max_dist_x && f[max_ii] < f[i]))
|
205
|
+
max_ii = i;
|
206
|
+
if (mmax_f < max_f) mmax_f = max_f;
|
207
|
+
}
|
208
|
+
|
209
|
+
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
|
210
|
+
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
|
211
|
+
kfree(km, p); kfree(km, f); kfree(km, t);
|
212
|
+
if (n_u == 0) {
|
213
|
+
kfree(km, a); kfree(km, v);
|
214
|
+
return 0;
|
215
|
+
}
|
216
|
+
return compact_a(km, n_u, u, n_v, v, a);
|
217
|
+
}
|
218
|
+
|
219
|
+
typedef struct lc_elem_s {
|
220
|
+
int32_t y;
|
221
|
+
int64_t i;
|
222
|
+
double pri;
|
223
|
+
KRMQ_HEAD(struct lc_elem_s) head;
|
224
|
+
} lc_elem_t;
|
225
|
+
|
226
|
+
#define lc_elem_cmp(a, b) ((a)->y < (b)->y? -1 : (a)->y > (b)->y? 1 : ((a)->i > (b)->i) - ((a)->i < (b)->i))
|
227
|
+
#define lc_elem_lt2(a, b) ((a)->pri < (b)->pri)
|
228
|
+
KRMQ_INIT(lc_elem, lc_elem_t, head, lc_elem_cmp, lc_elem_lt2)
|
229
|
+
|
230
|
+
KALLOC_POOL_INIT(rmq, lc_elem_t)
|
231
|
+
|
232
|
+
static inline int32_t comput_sc_simple(const mm128_t *ai, const mm128_t *aj, float chn_pen_gap, float chn_pen_skip, int32_t *exact, int32_t *width)
|
233
|
+
{
|
234
|
+
int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc;
|
235
|
+
dr = (int32_t)(ai->x - aj->x);
|
236
|
+
*width = dd = dr > dq? dr - dq : dq - dr;
|
237
|
+
dg = dr < dq? dr : dq;
|
238
|
+
q_span = aj->y>>32&0xff;
|
239
|
+
sc = q_span < dg? q_span : dg;
|
240
|
+
if (exact) *exact = (dd == 0 && dg <= q_span);
|
241
|
+
if (dd || dq > q_span) {
|
242
|
+
float lin_pen, log_pen;
|
243
|
+
lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg;
|
244
|
+
log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2
|
245
|
+
sc -= (int)(lin_pen + .5f * log_pen);
|
246
|
+
}
|
247
|
+
return sc;
|
248
|
+
}
|
249
|
+
|
250
|
+
mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
251
|
+
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
252
|
+
{
|
253
|
+
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
|
254
|
+
int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
|
255
|
+
uint64_t *u;
|
256
|
+
lc_elem_t *root = 0, *root_inner = 0;
|
257
|
+
void *mem_mp = 0;
|
258
|
+
kmp_rmq_t *mp;
|
259
|
+
|
260
|
+
if (_u) *_u = 0, *n_u_ = 0;
|
261
|
+
if (n == 0 || a == 0) {
|
262
|
+
kfree(km, a);
|
263
|
+
return 0;
|
264
|
+
}
|
265
|
+
if (max_dist < bw) max_dist = bw;
|
266
|
+
if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
|
267
|
+
KMALLOC(km, p, n);
|
268
|
+
KMALLOC(km, f, n);
|
269
|
+
KCALLOC(km, t, n);
|
270
|
+
KMALLOC(km, v, n);
|
271
|
+
mem_mp = km_init2(km, 0x10000);
|
272
|
+
mp = kmp_init_rmq(mem_mp);
|
273
|
+
|
274
|
+
// fill the score and backtrack arrays
|
275
|
+
for (i = i0 = 0; i < n; ++i) {
|
276
|
+
int64_t max_j = -1;
|
277
|
+
int32_t q_span = a[i].y>>32&0xff, max_f = q_span;
|
278
|
+
lc_elem_t s, *q, *r, lo, hi;
|
279
|
+
// add in-range anchors
|
280
|
+
if (i0 < i && a[i0].x != a[i].x) {
|
281
|
+
int64_t j;
|
282
|
+
for (j = i0; j < i; ++j) {
|
283
|
+
q = kmp_alloc_rmq(mp);
|
284
|
+
q->y = (int32_t)a[j].y, q->i = j, q->pri = -(f[j] + 0.5 * chn_pen_gap * ((int32_t)a[j].x + (int32_t)a[j].y));
|
285
|
+
krmq_insert(lc_elem, &root, q, 0);
|
286
|
+
if (max_dist_inner > 0) {
|
287
|
+
r = kmp_alloc_rmq(mp);
|
288
|
+
*r = *q;
|
289
|
+
krmq_insert(lc_elem, &root_inner, r, 0);
|
290
|
+
}
|
291
|
+
}
|
292
|
+
i0 = i;
|
293
|
+
}
|
294
|
+
// get rid of active chains out of range
|
295
|
+
while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist || krmq_size(head, root) > cap_rmq_size)) {
|
296
|
+
s.y = (int32_t)a[st].y, s.i = st;
|
297
|
+
if ((q = krmq_find(lc_elem, root, &s, 0)) != 0) {
|
298
|
+
q = krmq_erase(lc_elem, &root, q, 0);
|
299
|
+
kmp_free_rmq(mp, q);
|
300
|
+
}
|
301
|
+
++st;
|
302
|
+
}
|
303
|
+
if (max_dist_inner > 0) { // similar to the block above, but applied to the inner tree
|
304
|
+
while (st_inner < i && (a[i].x>>32 != a[st_inner].x>>32 || a[i].x > a[st_inner].x + max_dist_inner || krmq_size(head, root_inner) > cap_rmq_size)) {
|
305
|
+
s.y = (int32_t)a[st_inner].y, s.i = st_inner;
|
306
|
+
if ((q = krmq_find(lc_elem, root_inner, &s, 0)) != 0) {
|
307
|
+
q = krmq_erase(lc_elem, &root_inner, q, 0);
|
308
|
+
kmp_free_rmq(mp, q);
|
309
|
+
}
|
310
|
+
++st_inner;
|
311
|
+
}
|
312
|
+
}
|
313
|
+
// RMQ
|
314
|
+
lo.i = INT32_MAX, lo.y = (int32_t)a[i].y - max_dist;
|
315
|
+
hi.i = 0, hi.y = (int32_t)a[i].y;
|
316
|
+
if ((q = krmq_rmq(lc_elem, root, &lo, &hi)) != 0) {
|
317
|
+
int32_t sc, exact, width, n_skip = 0;
|
318
|
+
int64_t j = q->i;
|
319
|
+
assert(q->y >= lo.y && q->y <= hi.y);
|
320
|
+
sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, &exact, &width);
|
321
|
+
if (width <= bw && sc > max_f) max_f = sc, max_j = j;
|
322
|
+
if (!exact && root_inner && (int32_t)a[i].y > 0) {
|
323
|
+
lc_elem_t *lo, *hi;
|
324
|
+
s.y = (int32_t)a[i].y - 1, s.i = n;
|
325
|
+
krmq_interval(lc_elem, root_inner, &s, &lo, &hi);
|
326
|
+
if (lo) {
|
327
|
+
const lc_elem_t *q;
|
328
|
+
int32_t width, n_rmq_iter = 0;
|
329
|
+
krmq_itr_t(lc_elem) itr;
|
330
|
+
krmq_itr_find(lc_elem, root_inner, lo, &itr);
|
331
|
+
while ((q = krmq_at(&itr)) != 0) {
|
332
|
+
if (q->y < (int32_t)a[i].y - max_dist_inner) break;
|
333
|
+
++n_rmq_iter;
|
334
|
+
j = q->i;
|
335
|
+
sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, 0, &width);
|
336
|
+
if (width <= bw) {
|
337
|
+
if (sc > max_f) {
|
338
|
+
max_f = sc, max_j = j;
|
339
|
+
if (n_skip > 0) --n_skip;
|
340
|
+
} else if (t[j] == (int32_t)i) {
|
341
|
+
if (++n_skip > max_chn_skip)
|
342
|
+
break;
|
343
|
+
}
|
344
|
+
if (p[j] >= 0) t[p[j]] = i;
|
345
|
+
}
|
346
|
+
if (!krmq_itr_prev(lc_elem, &itr)) break;
|
347
|
+
}
|
348
|
+
n_iter += n_rmq_iter;
|
349
|
+
}
|
350
|
+
}
|
351
|
+
}
|
352
|
+
// set max
|
353
|
+
assert(max_j < 0 || (a[max_j].x < a[i].x && (int32_t)a[max_j].y < (int32_t)a[i].y));
|
354
|
+
f[i] = max_f, p[i] = max_j;
|
355
|
+
v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak
|
356
|
+
if (mmax_f < max_f) mmax_f = max_f;
|
357
|
+
if (max_rmq_size < krmq_size(head, root)) max_rmq_size = krmq_size(head, root);
|
358
|
+
}
|
359
|
+
km_destroy(mem_mp);
|
360
|
+
|
361
|
+
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
|
362
|
+
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
|
363
|
+
kfree(km, p); kfree(km, f); kfree(km, t);
|
364
|
+
if (n_u == 0) {
|
365
|
+
kfree(km, a); kfree(km, v);
|
366
|
+
return 0;
|
367
|
+
}
|
368
|
+
return compact_a(km, n_u, u, n_v, v, a);
|
369
|
+
}
|