minimap2 0.2.27.0 → 0.2.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/cmappy/cmappy.c +3 -3
- data/ext/cmappy/cmappy.h +1 -1
- data/ext/minimap2/FAQ.md +1 -1
- data/ext/minimap2/Makefile +4 -3
- data/ext/minimap2/NEWS.md +68 -0
- data/ext/minimap2/README.md +30 -14
- data/ext/minimap2/align.c +136 -52
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +59 -5
- data/ext/minimap2/hit.c +14 -6
- data/ext/minimap2/index.c +304 -13
- data/ext/minimap2/jump.c +201 -0
- data/ext/minimap2/kalloc.h +8 -0
- data/ext/minimap2/ksw2.h +5 -2
- data/ext/minimap2/ksw2_dispatch.c +5 -5
- data/ext/minimap2/ksw2_exts2_sse.c +17 -6
- data/ext/minimap2/lchain.c +5 -5
- data/ext/minimap2/main.c +64 -12
- data/ext/minimap2/map.c +35 -8
- data/ext/minimap2/minimap.h +14 -3
- data/ext/minimap2/minimap2.1 +98 -46
- data/ext/minimap2/misc/README.md +2 -1
- data/ext/minimap2/misc/pafcluster.js +241 -0
- data/ext/minimap2/misc/paftools.js +17 -6
- data/ext/minimap2/mmpriv.h +25 -4
- data/ext/minimap2/options.c +36 -3
- data/ext/minimap2/python/cmappy.h +3 -3
- data/ext/minimap2/python/cmappy.pxd +5 -2
- data/ext/minimap2/python/mappy.pyx +20 -7
- data/ext/minimap2/python/minimap2.py +5 -3
- data/ext/minimap2/seed.c +2 -1
- data/ext/minimap2/setup.py +2 -2
- data/ext/minimap2.patch +2 -2
- data/lib/minimap2/aligner.rb +19 -12
- data/lib/minimap2/alignment.rb +1 -0
- data/lib/minimap2/ffi/constants.rb +10 -2
- data/lib/minimap2/ffi/functions.rb +145 -6
- data/lib/minimap2/ffi/mappy.rb +1 -1
- data/lib/minimap2/version.rb +1 -1
- data/lib/minimap2.rb +2 -2
- metadata +8 -7
- data/ext/minimap2/misc/mmphase.js +0 -335
data/ext/minimap2/mmpriv.h
CHANGED
@@ -14,6 +14,7 @@
|
|
14
14
|
#define MM_DBG_PRINT_SEED 0x4
|
15
15
|
#define MM_DBG_PRINT_ALN_SEQ 0x8
|
16
16
|
#define MM_DBG_PRINT_CHAIN 0x10
|
17
|
+
#define MM_DBG_SEED_FREQ 0x20
|
17
18
|
|
18
19
|
#define MM_SEED_LONG_JOIN (1ULL<<40)
|
19
20
|
#define MM_SEED_IGNORE (1ULL<<41)
|
@@ -23,6 +24,9 @@
|
|
23
24
|
#define MM_SEED_SEG_SHIFT 48
|
24
25
|
#define MM_SEED_SEG_MASK (0xffULL<<(MM_SEED_SEG_SHIFT))
|
25
26
|
|
27
|
+
#define MM_JUNC_ANNO 0x1
|
28
|
+
#define MM_JUNC_MISC 0x2
|
29
|
+
|
26
30
|
#ifndef kroundup32
|
27
31
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
28
32
|
#endif
|
@@ -32,6 +36,7 @@
|
|
32
36
|
|
33
37
|
#define MALLOC(type, len) ((type*)malloc((len) * sizeof(type)))
|
34
38
|
#define CALLOC(type, len) ((type*)calloc((len), sizeof(type)))
|
39
|
+
#define REALLOC(type, ptr, cnt) ((type*)realloc((ptr), (cnt) * sizeof(type)))
|
35
40
|
|
36
41
|
#ifdef __cplusplus
|
37
42
|
extern "C" {
|
@@ -51,6 +56,12 @@ typedef struct {
|
|
51
56
|
mm128_t *a;
|
52
57
|
} mm_seg_t;
|
53
58
|
|
59
|
+
typedef struct {
|
60
|
+
int32_t off, off2, cnt;
|
61
|
+
int16_t strand;
|
62
|
+
uint16_t flag;
|
63
|
+
} mm_idx_jjump1_t;
|
64
|
+
|
54
65
|
double cputime(void);
|
55
66
|
double realtime(void);
|
56
67
|
long peakrss(void);
|
@@ -68,19 +79,23 @@ double mm_event_identity(const mm_reg1_t *r);
|
|
68
79
|
int mm_write_sam_hdr(const mm_idx_t *mi, const char *rg, const char *ver, int argc, char *argv[]);
|
69
80
|
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag);
|
70
81
|
void mm_write_paf3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag, int rep_len);
|
82
|
+
void mm_write_paf4(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag, int rep_len, int n_seg, int seg_idx);
|
71
83
|
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs);
|
72
84
|
void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regs, const mm_reg1_t *const* regs, void *km, int64_t opt_flag);
|
73
85
|
void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int64_t opt_flag, int rep_len);
|
86
|
+
void mm_write_junc(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r);
|
74
87
|
|
88
|
+
// indexing related in index.c
|
75
89
|
void mm_idxopt_init(mm_idxopt_t *opt);
|
76
90
|
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
77
91
|
int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
|
78
92
|
int mm_idx_getseq2(const mm_idx_t *mi, int is_rev, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
79
|
-
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
|
80
93
|
mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mm128_t *a, int is_qstrand);
|
94
|
+
int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc);
|
95
|
+
int mm_idx_jjump_read(mm_idx_t *mi, const char *fn, int flag, int min_sc);
|
96
|
+
const mm_idx_jjump1_t *mm_idx_jump_get(const mm_idx_t *db, int32_t cid, int32_t st, int32_t en, int32_t *n);
|
81
97
|
|
82
|
-
|
83
|
-
int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
98
|
+
// chaining in lchain.c
|
84
99
|
mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
85
100
|
int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
86
101
|
mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
@@ -97,8 +112,12 @@ void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int
|
|
97
112
|
int mm_filter_strand_retained(int n_regs, mm_reg1_t *r);
|
98
113
|
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
|
99
114
|
void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac);
|
100
|
-
void
|
115
|
+
void mm_set_mapq2(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr, int is_splice);
|
101
116
|
void mm_update_dp_max(int qlen, int n_regs, mm_reg1_t *regs, float frac, int a, int b);
|
117
|
+
void mm_jump_split(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq, mm_reg1_t *r, int32_t ts_strand);
|
118
|
+
|
119
|
+
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
|
120
|
+
void mm_enlarge_cigar(mm_reg1_t *r, uint32_t n_cigar);
|
102
121
|
|
103
122
|
void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos);
|
104
123
|
|
@@ -106,6 +125,8 @@ mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int
|
|
106
125
|
void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
|
107
126
|
void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs);
|
108
127
|
|
128
|
+
void mm_jump_split(void *km, const mm_idx_t *mi, const mm_mapopt_t *opt, int32_t qlen, const uint8_t *qseq, mm_reg1_t *r, int32_t ts_strand);
|
129
|
+
|
109
130
|
FILE *mm_split_init(const char *prefix, const mm_idx_t *mi);
|
110
131
|
mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part);
|
111
132
|
int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_split_idx);
|
data/ext/minimap2/options.c
CHANGED
@@ -55,13 +55,15 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|
55
55
|
opt->max_clip_ratio = 1.0f;
|
56
56
|
opt->mini_batch_size = 500000000;
|
57
57
|
opt->max_sw_mat = 100000000;
|
58
|
-
opt->cap_kalloc =
|
58
|
+
opt->cap_kalloc = 500000000;
|
59
59
|
|
60
60
|
opt->rank_min_len = 500;
|
61
61
|
opt->rank_frac = 0.9f;
|
62
62
|
|
63
63
|
opt->pe_ori = 0; // FF
|
64
64
|
opt->pe_bonus = 33;
|
65
|
+
|
66
|
+
opt->jump_min_match = 3;
|
65
67
|
}
|
66
68
|
|
67
69
|
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
@@ -114,6 +116,14 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|
114
116
|
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
|
115
117
|
mo->min_dp_max = 200;
|
116
118
|
}
|
119
|
+
} else if (strcmp(preset, "lr:hqae") == 0) { // high-quality assembly evaluation
|
120
|
+
io->flag = 0, io->k = 25, io->w = 51;
|
121
|
+
mo->flag |= MM_F_RMQ;
|
122
|
+
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
|
123
|
+
mo->rmq_inner_dist = 5000;
|
124
|
+
mo->occ_dist = 200;
|
125
|
+
mo->best_n = 100;
|
126
|
+
mo->chain_gap_scale = 5.0f;
|
117
127
|
} else if (strcmp(preset, "map-iclr-prerender") == 0) {
|
118
128
|
io->flag = 0, io->k = 15;
|
119
129
|
mo->b = 6, mo->transition = 1;
|
@@ -156,7 +166,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|
156
166
|
mo->mid_occ = 1000;
|
157
167
|
mo->max_occ = 5000;
|
158
168
|
mo->mini_batch_size = 50000000;
|
159
|
-
} else if (
|
169
|
+
} else if (strcmp(preset, "splice") == 0 || strcmp(preset, "splice:hq") == 0 || strcmp(preset, "splice:sr") == 0 || strcmp(preset, "cdna") == 0) {
|
160
170
|
io->flag = 0, io->k = 15, io->w = 5;
|
161
171
|
mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK;
|
162
172
|
mo->max_sw_mat = 0;
|
@@ -164,13 +174,31 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|
164
174
|
mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
|
165
175
|
mo->noncan = 9;
|
166
176
|
mo->junc_bonus = 9;
|
177
|
+
mo->junc_pen = 5;
|
167
178
|
mo->zdrop = 200, mo->zdrop_inv = 100; // because mo->a is halved
|
168
|
-
if (strcmp(preset, "splice:hq") == 0)
|
179
|
+
if (strcmp(preset, "splice:hq") == 0) {
|
169
180
|
mo->noncan = 5, mo->b = 4, mo->q = 6, mo->q2 = 24;
|
181
|
+
} else if (strcmp(preset, "splice:sr") == 0) {
|
182
|
+
mo->flag |= MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS | MM_F_HEAP_SORT | MM_F_FRAG_MODE | MM_F_WEAK_PAIRING | MM_F_SR_RNA;
|
183
|
+
mo->noncan = 5, mo->b = 4, mo->q = 6, mo->q2 = 24;
|
184
|
+
mo->min_chain_score = 25;
|
185
|
+
mo->min_dp_max = 40;
|
186
|
+
mo->min_ksw_len = 20;
|
187
|
+
mo->pe_ori = 0<<1|1; // FR
|
188
|
+
mo->best_n = 10;
|
189
|
+
mo->mini_batch_size = 100000000;
|
190
|
+
}
|
170
191
|
} else return -1;
|
171
192
|
return 0;
|
172
193
|
}
|
173
194
|
|
195
|
+
int mm_max_spsc_bonus(const mm_mapopt_t *mo)
|
196
|
+
{
|
197
|
+
int max_sc = (mo->q2 + 1) / 2 - 1;
|
198
|
+
max_sc = max_sc > mo->q2 - mo->q? max_sc : mo->q2 - mo->q;
|
199
|
+
return max_sc;
|
200
|
+
}
|
201
|
+
|
174
202
|
int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
|
175
203
|
{
|
176
204
|
if (mo->bw > mo->bw_long) {
|
@@ -225,6 +253,11 @@ int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
|
|
225
253
|
fprintf(stderr, "[ERROR]\033[1;31m scoring system violating ({-O}+{-E})+({-O2}+{-E2}) <= 127\033[0m\n");
|
226
254
|
return -1;
|
227
255
|
}
|
256
|
+
if (mo->sc_ambi < 0 || mo->sc_ambi >= mo->b) {
|
257
|
+
if (mm_verbose >= 1)
|
258
|
+
fprintf(stderr, "[ERROR]\033[1;31m --score-N should be within [0,{-B})\033[0m\n");
|
259
|
+
return -1;
|
260
|
+
}
|
228
261
|
if (mo->zdrop < mo->zdrop_inv) {
|
229
262
|
if (mm_verbose >= 1)
|
230
263
|
fprintf(stderr, "[ERROR]\033[1;31m Z-drop should not be less than inversion-Z-drop\033[0m\n");
|
@@ -71,13 +71,13 @@ static inline void mm_reset_timer(void)
|
|
71
71
|
}
|
72
72
|
|
73
73
|
extern unsigned char seq_comp_table[256];
|
74
|
-
static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
74
|
+
static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
75
75
|
{
|
76
76
|
mm_reg1_t *r;
|
77
77
|
|
78
78
|
Py_BEGIN_ALLOW_THREADS
|
79
79
|
if (seq2 == 0) {
|
80
|
-
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt,
|
80
|
+
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, seqname);
|
81
81
|
} else {
|
82
82
|
int _n_regs[2];
|
83
83
|
mm_reg1_t *regs[2];
|
@@ -94,7 +94,7 @@ static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const
|
|
94
94
|
seq[1][i] = seq_comp_table[t];
|
95
95
|
}
|
96
96
|
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
|
97
|
-
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt,
|
97
|
+
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, seqname);
|
98
98
|
for (i = 0; i < _n_regs[1]; ++i)
|
99
99
|
regs[1][i].rev = !regs[1][i].rev;
|
100
100
|
*n_regs = _n_regs[0] + _n_regs[1];
|
@@ -36,9 +36,10 @@ cdef extern from "minimap.h":
|
|
36
36
|
float alt_drop
|
37
37
|
|
38
38
|
int a, b, q, e, q2, e2
|
39
|
+
int transition
|
39
40
|
int sc_ambi
|
40
41
|
int noncan
|
41
|
-
int junc_bonus
|
42
|
+
int junc_bonus, junc_pen
|
42
43
|
int zdrop, zdrop_inv
|
43
44
|
int end_bonus
|
44
45
|
int min_dp_max
|
@@ -51,6 +52,8 @@ cdef extern from "minimap.h":
|
|
51
52
|
|
52
53
|
int pe_ori, pe_bonus
|
53
54
|
|
55
|
+
int jump_min_match;
|
56
|
+
|
54
57
|
float mid_occ_frac
|
55
58
|
float q_occ_frac
|
56
59
|
int32_t min_mid_occ
|
@@ -128,7 +131,7 @@ cdef extern from "cmappy.h":
|
|
128
131
|
|
129
132
|
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
130
133
|
void mm_free_reg1(mm_reg1_t *r)
|
131
|
-
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
134
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
132
135
|
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l)
|
133
136
|
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l)
|
134
137
|
|
@@ -3,7 +3,7 @@ from libc.stdlib cimport free
|
|
3
3
|
cimport cmappy
|
4
4
|
import sys
|
5
5
|
|
6
|
-
__version__ = '2.
|
6
|
+
__version__ = '2.29'
|
7
7
|
|
8
8
|
cmappy.mm_reset_timer()
|
9
9
|
|
@@ -96,6 +96,7 @@ cdef class Alignment:
|
|
96
96
|
a = [str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
|
97
97
|
str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str]
|
98
98
|
if self._cs != "": a.append("cs:Z:" + self._cs)
|
99
|
+
if self._MD != "": a.append("MD:Z:" + self._MD)
|
99
100
|
return "\t".join(a)
|
100
101
|
|
101
102
|
cdef class ThreadBuffer:
|
@@ -112,7 +113,7 @@ cdef class Aligner:
|
|
112
113
|
cdef cmappy.mm_idxopt_t idx_opt
|
113
114
|
cdef cmappy.mm_mapopt_t map_opt
|
114
115
|
|
115
|
-
def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, bw_long=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None, scoring=None):
|
116
|
+
def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, bw_long=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None, scoring=None, sc_ambi=None, max_chain_skip=None):
|
116
117
|
self._idx = NULL
|
117
118
|
cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options
|
118
119
|
if preset is not None:
|
@@ -137,6 +138,8 @@ cdef class Aligner:
|
|
137
138
|
self.map_opt.q2, self.map_opt.e2 = scoring[4], scoring[5]
|
138
139
|
if len(scoring) >= 7:
|
139
140
|
self.map_opt.sc_ambi = scoring[6]
|
141
|
+
if sc_ambi is not None: self.map_opt.sc_ambi = sc_ambi
|
142
|
+
if max_chain_skip is not None: self.map_opt.max_chain_skip = max_chain_skip
|
140
143
|
|
141
144
|
cdef cmappy.mm_idx_reader_t *r;
|
142
145
|
|
@@ -162,7 +165,7 @@ cdef class Aligner:
|
|
162
165
|
def __bool__(self):
|
163
166
|
return (self._idx != NULL)
|
164
167
|
|
165
|
-
def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None):
|
168
|
+
def map(self, seq, seq2=None, name=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None):
|
166
169
|
cdef cmappy.mm_reg1_t *regs
|
167
170
|
cdef cmappy.mm_hitpy_t h
|
168
171
|
cdef ThreadBuffer b
|
@@ -184,11 +187,20 @@ cdef class Aligner:
|
|
184
187
|
km = cmappy.mm_tbuf_get_km(b._b)
|
185
188
|
|
186
189
|
_seq = seq if isinstance(seq, bytes) else seq.encode()
|
190
|
+
if name is not None:
|
191
|
+
_name = name if isinstance(name, bytes) else name.encode()
|
192
|
+
|
187
193
|
if seq2 is None:
|
188
|
-
|
194
|
+
if name is None:
|
195
|
+
regs = cmappy.mm_map_aux(self._idx, NULL, _seq, NULL, &n_regs, b._b, &map_opt)
|
196
|
+
else:
|
197
|
+
regs = cmappy.mm_map_aux(self._idx, _name, _seq, NULL, &n_regs, b._b, &map_opt)
|
189
198
|
else:
|
190
199
|
_seq2 = seq2 if isinstance(seq2, bytes) else seq2.encode()
|
191
|
-
|
200
|
+
if name is None:
|
201
|
+
regs = cmappy.mm_map_aux(self._idx, NULL, _seq, _seq2, &n_regs, b._b, &map_opt)
|
202
|
+
else:
|
203
|
+
regs = cmappy.mm_map_aux(self._idx, _name, _seq, _seq2, &n_regs, b._b, &map_opt)
|
192
204
|
|
193
205
|
try:
|
194
206
|
i = 0
|
@@ -199,11 +211,12 @@ cdef class Aligner:
|
|
199
211
|
c = h.cigar32[k]
|
200
212
|
cigar.append([c>>4, c&0xf])
|
201
213
|
if cs or MD: # generate the cs and/or the MD tag, if requested
|
214
|
+
_cur_seq = _seq2 if h.seg_id > 0 and seq2 is not None else _seq
|
202
215
|
if cs:
|
203
|
-
l_cs_str = cmappy.mm_gen_cs(km, &cs_str, &m_cs_str, self._idx, ®s[i],
|
216
|
+
l_cs_str = cmappy.mm_gen_cs(km, &cs_str, &m_cs_str, self._idx, ®s[i], _cur_seq, 1)
|
204
217
|
_cs = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
|
205
218
|
if MD:
|
206
|
-
l_cs_str = cmappy.mm_gen_MD(km, &cs_str, &m_cs_str, self._idx, ®s[i],
|
219
|
+
l_cs_str = cmappy.mm_gen_MD(km, &cs_str, &m_cs_str, self._idx, ®s[i], _cur_seq)
|
207
220
|
_MD = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
|
208
221
|
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _MD)
|
209
222
|
cmappy.mm_free_reg1(®s[i])
|
@@ -5,7 +5,7 @@ import getopt
|
|
5
5
|
import mappy as mp
|
6
6
|
|
7
7
|
def main(argv):
|
8
|
-
opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:
|
8
|
+
opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:cM")
|
9
9
|
if len(args) < 2:
|
10
10
|
print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>")
|
11
11
|
print("Options:")
|
@@ -16,10 +16,11 @@ def main(argv):
|
|
16
16
|
print(" -w INT minimizer window length")
|
17
17
|
print(" -r INT band width")
|
18
18
|
print(" -c output the cs tag")
|
19
|
+
print(" -M output the MD tag")
|
19
20
|
sys.exit(1)
|
20
21
|
|
21
22
|
preset = min_cnt = min_sc = k = w = bw = None
|
22
|
-
out_cs = False
|
23
|
+
out_cs = out_MD = False
|
23
24
|
for opt, arg in opts:
|
24
25
|
if opt == '-x': preset = arg
|
25
26
|
elif opt == '-n': min_cnt = int(arg)
|
@@ -28,11 +29,12 @@ def main(argv):
|
|
28
29
|
elif opt == '-k': k = int(arg)
|
29
30
|
elif opt == '-w': w = int(arg)
|
30
31
|
elif opt == '-c': out_cs = True
|
32
|
+
elif opt == '-M': out_MD = True
|
31
33
|
|
32
34
|
a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw)
|
33
35
|
if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0]))
|
34
36
|
for name, seq, qual in mp.fastx_read(args[1]): # read one sequence
|
35
|
-
for h in a.map(seq, cs=out_cs): # traverse hits
|
37
|
+
for h in a.map(seq, cs=out_cs, MD=out_MD): # traverse hits
|
36
38
|
print('{}\t{}\t{}'.format(name, len(seq), h))
|
37
39
|
|
38
40
|
if __name__ == "__main__":
|
data/ext/minimap2/seed.c
CHANGED
@@ -112,7 +112,8 @@ mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int ma
|
|
112
112
|
}
|
113
113
|
for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < n_m0; ++i) {
|
114
114
|
mm_seed_t *q = &m[i];
|
115
|
-
|
115
|
+
if (mm_dbg_flag & MM_DBG_SEED_FREQ)
|
116
|
+
fprintf(stderr, "SF\t%d\t%d\t%d\n", q->q_pos>>1, q->n, q->flt);
|
116
117
|
if (q->flt) {
|
117
118
|
int en = (q->q_pos >> 1) + 1, st = en - q->q_span;
|
118
119
|
if (st > rep_en) {
|
data/ext/minimap2/setup.py
CHANGED
@@ -23,7 +23,7 @@ def readme():
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name = 'mappy',
|
26
|
-
version = '2.
|
26
|
+
version = '2.29',
|
27
27
|
url = 'https://github.com/lh3/minimap2',
|
28
28
|
description = 'Minimap2 python binding',
|
29
29
|
long_description = readme(),
|
@@ -33,7 +33,7 @@ setup(
|
|
33
33
|
keywords = 'sequence-alignment',
|
34
34
|
scripts = ['python/minimap2.py'],
|
35
35
|
ext_modules = [Extension('mappy',
|
36
|
-
sources = ['python/mappy.pyx', 'align.c', 'bseq.c', 'lchain.c', 'seed.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'options.c',
|
36
|
+
sources = ['python/mappy.pyx', 'align.c', 'bseq.c', 'lchain.c', 'seed.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'jump.c', 'options.c',
|
37
37
|
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
|
38
38
|
'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c', 'splitidx.c'],
|
39
39
|
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
|
data/ext/minimap2.patch
CHANGED
@@ -6,13 +6,13 @@
|
|
6
6
|
CPPFLAGS= -DHAVE_KALLOC
|
7
7
|
INCLUDES=
|
8
8
|
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
9
|
-
lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
9
|
+
lchain.o align.o hit.o seed.o jump.o map.o format.o pe.o esterr.o splitidx.o \
|
10
10
|
- ksw2_ll_sse.o
|
11
11
|
+ ksw2_ll_sse.o cmappy.o
|
12
12
|
PROG= minimap2
|
13
13
|
PROG_EXTRA= sdust minimap2-lite
|
14
14
|
LIBS= -lm -lz -lpthread
|
15
|
-
@@ -
|
15
|
+
@@ -135,3 +135,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
16
16
|
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
|
17
17
|
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
18
18
|
splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
|
data/lib/minimap2/aligner.rb
CHANGED
@@ -54,9 +54,10 @@ module Minimap2
|
|
54
54
|
fn_idx_out: nil,
|
55
55
|
max_frag_len: nil,
|
56
56
|
extra_flags: nil,
|
57
|
-
scoring: nil
|
57
|
+
scoring: nil,
|
58
|
+
sc_ambi: nil,
|
59
|
+
max_chain_skip: nil
|
58
60
|
)
|
59
|
-
|
60
61
|
@idx_opt = FFI::IdxOpt.new
|
61
62
|
@map_opt = FFI::MapOpt.new
|
62
63
|
|
@@ -91,6 +92,8 @@ module Minimap2
|
|
91
92
|
map_opt[:sc_ambi] = scoring[6] if scoring.size >= 7
|
92
93
|
end
|
93
94
|
end
|
95
|
+
map_opt[:sc_ambi] = sc_ambi if sc_ambi
|
96
|
+
map_opt[:max_chain_skip] = max_chain_skip if max_chain_skip
|
94
97
|
|
95
98
|
if fn_idx_in
|
96
99
|
warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
|
@@ -134,13 +137,13 @@ module Minimap2
|
|
134
137
|
|
135
138
|
def align(
|
136
139
|
seq, seq2 = nil,
|
140
|
+
name: nil,
|
137
141
|
buf: nil,
|
138
142
|
cs: false,
|
139
143
|
md: false,
|
140
144
|
max_frag_len: nil,
|
141
145
|
extra_flags: nil
|
142
146
|
)
|
143
|
-
|
144
147
|
return if index.null?
|
145
148
|
return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?
|
146
149
|
|
@@ -151,7 +154,7 @@ module Minimap2
|
|
151
154
|
km = FFI.mm_tbuf_get_km(buf)
|
152
155
|
|
153
156
|
n_regs_ptr = ::FFI::MemoryPointer.new :int
|
154
|
-
regs_ptr = FFI.mm_map_aux(index, seq, seq2, n_regs_ptr, buf, map_opt)
|
157
|
+
regs_ptr = FFI.mm_map_aux(index, name, seq, seq2, n_regs_ptr, buf, map_opt)
|
155
158
|
n_regs = n_regs_ptr.read_int
|
156
159
|
|
157
160
|
regs = Array.new(n_regs) do |i|
|
@@ -174,15 +177,19 @@ module Minimap2
|
|
174
177
|
cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
|
175
178
|
|
176
179
|
_cs = ""
|
177
|
-
if cs
|
178
|
-
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
|
179
|
-
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
180
|
-
end
|
181
|
-
|
182
180
|
_md = ""
|
183
|
-
if md
|
184
|
-
|
185
|
-
|
181
|
+
if cs or md
|
182
|
+
cur_seq = hit[:seg_id] > 0 && seq2 ? seq2 : seq
|
183
|
+
|
184
|
+
if cs
|
185
|
+
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], cur_seq, 1)
|
186
|
+
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
187
|
+
end
|
188
|
+
|
189
|
+
if md
|
190
|
+
l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], cur_seq)
|
191
|
+
_md = cs_str.read_pointer.read_string(l_cs_str)
|
192
|
+
end
|
186
193
|
end
|
187
194
|
|
188
195
|
alignments << Alignment.new(hit, cigar, _cs, _md)
|
data/lib/minimap2/alignment.rb
CHANGED
@@ -41,6 +41,9 @@ module Minimap2
|
|
41
41
|
SPLICE_OLD = 0x800000000
|
42
42
|
SECONDARY_SEQ = 0x1000000000 # output SEQ field for seqondary alignments using hard clipping
|
43
43
|
OUT_DS = 0x2000000000
|
44
|
+
WEAK_PAIRING = 0x4000000000
|
45
|
+
SR_RNA = 0x8000000000
|
46
|
+
OUT_JUNC = 0x10000000000
|
44
47
|
|
45
48
|
HPC = 0x1
|
46
49
|
NO_SEQ = 0x2
|
@@ -99,6 +102,8 @@ module Minimap2
|
|
99
102
|
:S, :pointer, # 4-bit packed sequence
|
100
103
|
:B, :pointer, # index (hidden)
|
101
104
|
:I, :pointer, # intervals (hidden)
|
105
|
+
:spsc, :pointer, # splice score (hidden)
|
106
|
+
:J, :pointer, # junctions to create jumps (hidden)
|
102
107
|
:km, :pointer,
|
103
108
|
:h, :pointer
|
104
109
|
end
|
@@ -113,7 +118,7 @@ module Minimap2
|
|
113
118
|
:dp_max0, :int32, # DP score before mm_update_dp_max() adjustment
|
114
119
|
:n_ambi_trans_strand, :uint32,
|
115
120
|
:n_cigar, :uint32
|
116
|
-
|
121
|
+
# :cigar, :pointer # variable length array (see cigar method below)
|
117
122
|
|
118
123
|
bit_field :n_ambi_trans_strand,
|
119
124
|
:n_ambi, 30, # number of ambiguous bases
|
@@ -160,7 +165,8 @@ module Minimap2
|
|
160
165
|
:split_inv, 1,
|
161
166
|
:is_alt, 1,
|
162
167
|
:strand_retained, 1,
|
163
|
-
:
|
168
|
+
:is_spliced, 1,
|
169
|
+
:dummy, 4
|
164
170
|
end
|
165
171
|
|
166
172
|
# indexing option
|
@@ -210,6 +216,7 @@ module Minimap2
|
|
210
216
|
:transition, :int, # transition mismatch score (A:G, C:T)
|
211
217
|
:sc_ambi, :int, # score when one or both bases are "N"
|
212
218
|
:noncan, :int, # cost of non-canonical splicing sites
|
219
|
+
:junc_pen, :int,
|
213
220
|
:junc_bonus, :int,
|
214
221
|
:zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
|
215
222
|
:zdrop_inv, :int,
|
@@ -223,6 +230,7 @@ module Minimap2
|
|
223
230
|
:rank_frac, :float,
|
224
231
|
:pe_ori, :int,
|
225
232
|
:pe_bonus, :int,
|
233
|
+
:jump_min_match, :int32,
|
226
234
|
:mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
|
227
235
|
:q_occ_frac, :float,
|
228
236
|
:min_mid_occ, :int32,
|