minimap2 0.2.22.0 → 0.2.24.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -76
- data/ext/Rakefile +55 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +821 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +369 -0
- data/ext/minimap2/main.c +459 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +410 -0
- data/ext/minimap2/minimap2.1 +725 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +132 -0
- data/ext/minimap2/options.c +234 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +20 -16
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +4 -5
- data/lib/minimap2/version.rb +2 -2
- data/lib/minimap2.rb +51 -15
- metadata +97 -79
- data/lib/minimap2/ffi_helper.rb +0 -53
- data/vendor/libminimap2.so +0 -0
data/ext/minimap2/main.c
ADDED
@@ -0,0 +1,459 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <errno.h>
|
5
|
+
#include "bseq.h"
|
6
|
+
#include "minimap.h"
|
7
|
+
#include "mmpriv.h"
|
8
|
+
#include "ketopt.h"
|
9
|
+
|
10
|
+
#define MM_VERSION "2.24-r1122"
|
11
|
+
|
12
|
+
#ifdef __linux__
|
13
|
+
#include <sys/resource.h>
|
14
|
+
#include <sys/time.h>
|
15
|
+
void liftrlimit()
|
16
|
+
{
|
17
|
+
struct rlimit r;
|
18
|
+
getrlimit(RLIMIT_AS, &r);
|
19
|
+
r.rlim_cur = r.rlim_max;
|
20
|
+
setrlimit(RLIMIT_AS, &r);
|
21
|
+
}
|
22
|
+
#else
|
23
|
+
void liftrlimit() {}
|
24
|
+
#endif
|
25
|
+
|
26
|
+
static ko_longopt_t long_options[] = {
|
27
|
+
{ "bucket-bits", ko_required_argument, 300 },
|
28
|
+
{ "mb-size", ko_required_argument, 'K' },
|
29
|
+
{ "seed", ko_required_argument, 302 },
|
30
|
+
{ "no-kalloc", ko_no_argument, 303 },
|
31
|
+
{ "print-qname", ko_no_argument, 304 },
|
32
|
+
{ "no-self", ko_no_argument, 'D' },
|
33
|
+
{ "print-seeds", ko_no_argument, 306 },
|
34
|
+
{ "max-chain-skip", ko_required_argument, 307 },
|
35
|
+
{ "min-dp-len", ko_required_argument, 308 },
|
36
|
+
{ "print-aln-seq", ko_no_argument, 309 },
|
37
|
+
{ "splice", ko_no_argument, 310 },
|
38
|
+
{ "cost-non-gt-ag", ko_required_argument, 'C' },
|
39
|
+
{ "no-long-join", ko_no_argument, 312 },
|
40
|
+
{ "sr", ko_no_argument, 313 },
|
41
|
+
{ "frag", ko_required_argument, 314 },
|
42
|
+
{ "secondary", ko_required_argument, 315 },
|
43
|
+
{ "cs", ko_optional_argument, 316 },
|
44
|
+
{ "end-bonus", ko_required_argument, 317 },
|
45
|
+
{ "no-pairing", ko_no_argument, 318 },
|
46
|
+
{ "splice-flank", ko_required_argument, 319 },
|
47
|
+
{ "idx-no-seq", ko_no_argument, 320 },
|
48
|
+
{ "end-seed-pen", ko_required_argument, 321 },
|
49
|
+
{ "for-only", ko_no_argument, 322 },
|
50
|
+
{ "rev-only", ko_no_argument, 323 },
|
51
|
+
{ "heap-sort", ko_required_argument, 324 },
|
52
|
+
{ "all-chain", ko_no_argument, 'P' },
|
53
|
+
{ "dual", ko_required_argument, 326 },
|
54
|
+
{ "max-clip-ratio", ko_required_argument, 327 },
|
55
|
+
{ "min-occ-floor", ko_required_argument, 328 },
|
56
|
+
{ "MD", ko_no_argument, 329 },
|
57
|
+
{ "lj-min-ratio", ko_required_argument, 330 },
|
58
|
+
{ "score-N", ko_required_argument, 331 },
|
59
|
+
{ "eqx", ko_no_argument, 332 },
|
60
|
+
{ "paf-no-hit", ko_no_argument, 333 },
|
61
|
+
{ "split-prefix", ko_required_argument, 334 },
|
62
|
+
{ "no-end-flt", ko_no_argument, 335 },
|
63
|
+
{ "hard-mask-level",ko_no_argument, 336 },
|
64
|
+
{ "cap-sw-mem", ko_required_argument, 337 },
|
65
|
+
{ "max-qlen", ko_required_argument, 338 },
|
66
|
+
{ "max-chain-iter", ko_required_argument, 339 },
|
67
|
+
{ "junc-bed", ko_required_argument, 340 },
|
68
|
+
{ "junc-bonus", ko_required_argument, 341 },
|
69
|
+
{ "sam-hit-only", ko_no_argument, 342 },
|
70
|
+
{ "chain-gap-scale",ko_required_argument, 343 },
|
71
|
+
{ "alt", ko_required_argument, 344 },
|
72
|
+
{ "alt-drop", ko_required_argument, 345 },
|
73
|
+
{ "mask-len", ko_required_argument, 346 },
|
74
|
+
{ "rmq", ko_optional_argument, 347 },
|
75
|
+
{ "qstrand", ko_no_argument, 348 },
|
76
|
+
{ "cap-kalloc", ko_required_argument, 349 },
|
77
|
+
{ "q-occ-frac", ko_required_argument, 350 },
|
78
|
+
{ "chain-skip-scale",ko_required_argument,351 },
|
79
|
+
{ "print-chains", ko_no_argument, 352 },
|
80
|
+
{ "no-hash-name", ko_no_argument, 353 },
|
81
|
+
{ "help", ko_no_argument, 'h' },
|
82
|
+
{ "max-intron-len", ko_required_argument, 'G' },
|
83
|
+
{ "version", ko_no_argument, 'V' },
|
84
|
+
{ "min-count", ko_required_argument, 'n' },
|
85
|
+
{ "min-chain-score",ko_required_argument, 'm' },
|
86
|
+
{ "mask-level", ko_required_argument, 'M' },
|
87
|
+
{ "min-dp-score", ko_required_argument, 's' },
|
88
|
+
{ "sam", ko_no_argument, 'a' },
|
89
|
+
{ 0, 0, 0 }
|
90
|
+
};
|
91
|
+
|
92
|
+
static inline int64_t mm_parse_num2(const char *str, char **q)
|
93
|
+
{
|
94
|
+
double x;
|
95
|
+
char *p;
|
96
|
+
x = strtod(str, &p);
|
97
|
+
if (*p == 'G' || *p == 'g') x *= 1e9, ++p;
|
98
|
+
else if (*p == 'M' || *p == 'm') x *= 1e6, ++p;
|
99
|
+
else if (*p == 'K' || *p == 'k') x *= 1e3, ++p;
|
100
|
+
if (q) *q = p;
|
101
|
+
return (int64_t)(x + .499);
|
102
|
+
}
|
103
|
+
|
104
|
+
static inline int64_t mm_parse_num(const char *str)
|
105
|
+
{
|
106
|
+
return mm_parse_num2(str, 0);
|
107
|
+
}
|
108
|
+
|
109
|
+
static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const char *arg, int yes_to_set)
|
110
|
+
{
|
111
|
+
if (yes_to_set) {
|
112
|
+
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
|
113
|
+
else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag &= ~flag;
|
114
|
+
else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
|
115
|
+
} else {
|
116
|
+
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag &= ~flag;
|
117
|
+
else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag |= flag;
|
118
|
+
else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
int main(int argc, char *argv[])
|
123
|
+
{
|
124
|
+
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
|
125
|
+
ketopt_t o = KETOPT_INIT;
|
126
|
+
mm_mapopt_t opt;
|
127
|
+
mm_idxopt_t ipt;
|
128
|
+
int i, c, n_threads = 3, n_parts, old_best_n = -1;
|
129
|
+
char *fnw = 0, *rg = 0, *junc_bed = 0, *s, *alt_list = 0;
|
130
|
+
FILE *fp_help = stderr;
|
131
|
+
mm_idx_reader_t *idx_rdr;
|
132
|
+
mm_idx_t *mi;
|
133
|
+
|
134
|
+
mm_verbose = 3;
|
135
|
+
liftrlimit();
|
136
|
+
mm_realtime0 = realtime();
|
137
|
+
mm_set_opt(0, &ipt, &opt);
|
138
|
+
|
139
|
+
while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { // test command line options and apply option -x/preset first
|
140
|
+
if (c == 'x') {
|
141
|
+
if (mm_set_opt(o.arg, &ipt, &opt) < 0) {
|
142
|
+
fprintf(stderr, "[ERROR] unknown preset '%s'\n", o.arg);
|
143
|
+
return 1;
|
144
|
+
}
|
145
|
+
} else if (c == ':') {
|
146
|
+
fprintf(stderr, "[ERROR] missing option argument\n");
|
147
|
+
return 1;
|
148
|
+
} else if (c == '?') {
|
149
|
+
fprintf(stderr, "[ERROR] unknown option in \"%s\"\n", argv[o.i - 1]);
|
150
|
+
return 1;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
o = KETOPT_INIT;
|
154
|
+
|
155
|
+
while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) {
|
156
|
+
if (c == 'w') ipt.w = atoi(o.arg);
|
157
|
+
else if (c == 'k') ipt.k = atoi(o.arg);
|
158
|
+
else if (c == 'H') ipt.flag |= MM_I_HPC;
|
159
|
+
else if (c == 'd') fnw = o.arg; // the above are indexing related options, except -I
|
160
|
+
else if (c == 't') n_threads = atoi(o.arg);
|
161
|
+
else if (c == 'v') mm_verbose = atoi(o.arg);
|
162
|
+
else if (c == 'g') opt.max_gap = (int)mm_parse_num(o.arg);
|
163
|
+
else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(o.arg));
|
164
|
+
else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(o.arg);
|
165
|
+
else if (c == 'N') old_best_n = opt.best_n, opt.best_n = atoi(o.arg);
|
166
|
+
else if (c == 'p') opt.pri_ratio = atof(o.arg);
|
167
|
+
else if (c == 'M') opt.mask_level = atof(o.arg);
|
168
|
+
else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR;
|
169
|
+
else if (c == 'D') opt.flag |= MM_F_NO_DIAG;
|
170
|
+
else if (c == 'P') opt.flag |= MM_F_ALL_CHAINS;
|
171
|
+
else if (c == 'X') opt.flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN; // -D -P --no-long-join --dual=no
|
172
|
+
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
|
173
|
+
else if (c == 'Q') opt.flag |= MM_F_NO_QUAL;
|
174
|
+
else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP;
|
175
|
+
else if (c == 'L') opt.flag |= MM_F_LONG_CIGAR;
|
176
|
+
else if (c == 'y') opt.flag |= MM_F_COPY_COMMENT;
|
177
|
+
else if (c == 'T') opt.sdust_thres = atoi(o.arg);
|
178
|
+
else if (c == 'n') opt.min_cnt = atoi(o.arg);
|
179
|
+
else if (c == 'm') opt.min_chain_score = atoi(o.arg);
|
180
|
+
else if (c == 'A') opt.a = atoi(o.arg);
|
181
|
+
else if (c == 'B') opt.b = atoi(o.arg);
|
182
|
+
else if (c == 's') opt.min_dp_max = atoi(o.arg);
|
183
|
+
else if (c == 'C') opt.noncan = atoi(o.arg);
|
184
|
+
else if (c == 'I') ipt.batch_size = mm_parse_num(o.arg);
|
185
|
+
else if (c == 'K') opt.mini_batch_size = mm_parse_num(o.arg);
|
186
|
+
else if (c == 'e') opt.occ_dist = mm_parse_num(o.arg);
|
187
|
+
else if (c == 'R') rg = o.arg;
|
188
|
+
else if (c == 'h') fp_help = stdout;
|
189
|
+
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
190
|
+
else if (c == 'o') {
|
191
|
+
if (strcmp(o.arg, "-") != 0) {
|
192
|
+
if (freopen(o.arg, "wb", stdout) == NULL) {
|
193
|
+
fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
|
194
|
+
exit(1);
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
else if (c == 300) ipt.bucket_bits = atoi(o.arg); // --bucket-bits
|
199
|
+
else if (c == 302) opt.seed = atoi(o.arg); // --seed
|
200
|
+
else if (c == 303) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
|
201
|
+
else if (c == 304) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
|
202
|
+
else if (c == 306) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
|
203
|
+
else if (c == 307) opt.max_chain_skip = atoi(o.arg); // --max-chain-skip
|
204
|
+
else if (c == 339) opt.max_chain_iter = atoi(o.arg); // --max-chain-iter
|
205
|
+
else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len
|
206
|
+
else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
|
207
|
+
else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
|
208
|
+
else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
|
209
|
+
else if (c == 313) opt.flag |= MM_F_SR; // --sr
|
210
|
+
else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
|
211
|
+
else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
|
212
|
+
else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
|
213
|
+
else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen
|
214
|
+
else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only
|
215
|
+
else if (c == 323) opt.flag |= MM_F_REV_ONLY; // --rev-only
|
216
|
+
else if (c == 327) opt.max_clip_ratio = atof(o.arg); // --max-clip-ratio
|
217
|
+
else if (c == 328) opt.min_mid_occ = atoi(o.arg); // --min-occ-floor
|
218
|
+
else if (c == 329) opt.flag |= MM_F_OUT_MD; // --MD
|
219
|
+
else if (c == 331) opt.sc_ambi = atoi(o.arg); // --score-N
|
220
|
+
else if (c == 332) opt.flag |= MM_F_EQX; // --eqx
|
221
|
+
else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
|
222
|
+
else if (c == 334) opt.split_prefix = o.arg; // --split-prefix
|
223
|
+
else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
|
224
|
+
else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
|
225
|
+
else if (c == 337) opt.max_sw_mat = mm_parse_num(o.arg); // --cap-sw-mat
|
226
|
+
else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
|
227
|
+
else if (c == 340) junc_bed = o.arg; // --junc-bed
|
228
|
+
else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus
|
229
|
+
else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
|
230
|
+
else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale
|
231
|
+
else if (c == 351) opt.chain_skip_scale = atof(o.arg); // --chain-skip-scale
|
232
|
+
else if (c == 344) alt_list = o.arg; // --alt
|
233
|
+
else if (c == 345) opt.alt_drop = atof(o.arg); // --alt-drop
|
234
|
+
else if (c == 346) opt.mask_len = mm_parse_num(o.arg); // --mask-len
|
235
|
+
else if (c == 348) opt.flag |= MM_F_QSTRAND | MM_F_NO_INV; // --qstrand
|
236
|
+
else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc
|
237
|
+
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
|
238
|
+
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
|
239
|
+
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
|
240
|
+
else if (c == 330) {
|
241
|
+
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
242
|
+
} else if (c == 314) { // --frag
|
243
|
+
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
|
244
|
+
} else if (c == 315) { // --secondary
|
245
|
+
yes_or_no(&opt, MM_F_NO_PRINT_2ND, o.longidx, o.arg, 0);
|
246
|
+
} else if (c == 316) { // --cs
|
247
|
+
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR;
|
248
|
+
if (o.arg == 0 || strcmp(o.arg, "short") == 0) {
|
249
|
+
opt.flag &= ~MM_F_OUT_CS_LONG;
|
250
|
+
} else if (strcmp(o.arg, "long") == 0) {
|
251
|
+
opt.flag |= MM_F_OUT_CS_LONG;
|
252
|
+
} else if (strcmp(o.arg, "none") == 0) {
|
253
|
+
opt.flag &= ~MM_F_OUT_CS;
|
254
|
+
} else if (mm_verbose >= 2) {
|
255
|
+
fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n");
|
256
|
+
}
|
257
|
+
} else if (c == 319) { // --splice-flank
|
258
|
+
yes_or_no(&opt, MM_F_SPLICE_FLANK, o.longidx, o.arg, 1);
|
259
|
+
} else if (c == 324) { // --heap-sort
|
260
|
+
yes_or_no(&opt, MM_F_HEAP_SORT, o.longidx, o.arg, 1);
|
261
|
+
} else if (c == 326) { // --dual
|
262
|
+
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
263
|
+
} else if (c == 347) { // --rmq
|
264
|
+
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
265
|
+
} else if (c == 'S') {
|
266
|
+
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
267
|
+
if (mm_verbose >= 2)
|
268
|
+
fprintf(stderr, "[WARNING]\033[1;31m option -S is deprecated and may be removed in future. Please use --cs=long instead.\033[0m\n");
|
269
|
+
} else if (c == 'V') {
|
270
|
+
puts(MM_VERSION);
|
271
|
+
return 0;
|
272
|
+
} else if (c == 'r') {
|
273
|
+
opt.bw = (int)mm_parse_num2(o.arg, &s);
|
274
|
+
if (*s == ',') opt.bw_long = (int)mm_parse_num2(s + 1, &s);
|
275
|
+
} else if (c == 'U') {
|
276
|
+
opt.min_mid_occ = strtol(o.arg, &s, 10);
|
277
|
+
if (*s == ',') opt.max_mid_occ = strtol(s + 1, &s, 10);
|
278
|
+
} else if (c == 'f') {
|
279
|
+
double x;
|
280
|
+
char *p;
|
281
|
+
x = strtod(o.arg, &p);
|
282
|
+
if (x < 1.0) opt.mid_occ_frac = x, opt.mid_occ = 0;
|
283
|
+
else opt.mid_occ = (int)(x + .499);
|
284
|
+
if (*p == ',') opt.max_occ = (int)(strtod(p+1, &p) + .499);
|
285
|
+
} else if (c == 'u') {
|
286
|
+
if (*o.arg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; // both strands
|
287
|
+
else if (*o.arg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; // match GT-AG
|
288
|
+
else if (*o.arg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG)
|
289
|
+
else if (*o.arg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal
|
290
|
+
else {
|
291
|
+
fprintf(stderr, "[ERROR]\033[1;31m unrecognized cDNA direction\033[0m\n");
|
292
|
+
return 1;
|
293
|
+
}
|
294
|
+
} else if (c == 'z') {
|
295
|
+
opt.zdrop = opt.zdrop_inv = strtol(o.arg, &s, 10);
|
296
|
+
if (*s == ',') opt.zdrop_inv = strtol(s + 1, &s, 10);
|
297
|
+
} else if (c == 'O') {
|
298
|
+
opt.q = opt.q2 = strtol(o.arg, &s, 10);
|
299
|
+
if (*s == ',') opt.q2 = strtol(s + 1, &s, 10);
|
300
|
+
} else if (c == 'E') {
|
301
|
+
opt.e = opt.e2 = strtol(o.arg, &s, 10);
|
302
|
+
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
|
303
|
+
}
|
304
|
+
}
|
305
|
+
if ((opt.flag & MM_F_SPLICE) && (opt.flag & MM_F_FRAG_MODE)) {
|
306
|
+
fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
|
307
|
+
return 1;
|
308
|
+
}
|
309
|
+
if (!fnw && !(opt.flag&MM_F_CIGAR))
|
310
|
+
ipt.flag |= MM_I_NO_SEQ;
|
311
|
+
if (mm_check_opt(&ipt, &opt) < 0)
|
312
|
+
return 1;
|
313
|
+
if (opt.best_n == 0) {
|
314
|
+
fprintf(stderr, "[WARNING]\033[1;31m changed '-N 0' to '-N %d --secondary=no'.\033[0m\n", old_best_n);
|
315
|
+
opt.best_n = old_best_n, opt.flag |= MM_F_NO_PRINT_2ND;
|
316
|
+
}
|
317
|
+
|
318
|
+
if (argc == o.ind || fp_help == stdout) {
|
319
|
+
fprintf(fp_help, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n");
|
320
|
+
fprintf(fp_help, "Options:\n");
|
321
|
+
fprintf(fp_help, " Indexing:\n");
|
322
|
+
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
|
323
|
+
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
|
324
|
+
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
|
325
|
+
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
|
326
|
+
fprintf(fp_help, " -d FILE dump index to FILE []\n");
|
327
|
+
fprintf(fp_help, " Mapping:\n");
|
328
|
+
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
329
|
+
fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
330
|
+
fprintf(fp_help, " -G NUM max intron length (effective with -xsplice; changing -r) [200k]\n");
|
331
|
+
fprintf(fp_help, " -F NUM max fragment length (effective with -xsr or in the fragment mode) [800]\n");
|
332
|
+
fprintf(fp_help, " -r NUM[,NUM] chaining/alignment bandwidth and long-join bandwidth [%d,%d]\n", opt.bw, opt.bw_long);
|
333
|
+
fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
334
|
+
fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
335
|
+
// fprintf(fp_help, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
|
336
|
+
fprintf(fp_help, " -X skip self and dual mappings (for the all-vs-all mode)\n");
|
337
|
+
fprintf(fp_help, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
338
|
+
fprintf(fp_help, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
|
339
|
+
fprintf(fp_help, " Alignment:\n");
|
340
|
+
fprintf(fp_help, " -A INT matching score [%d]\n", opt.a);
|
341
|
+
fprintf(fp_help, " -B INT mismatch penalty (larger value for lower divergence) [%d]\n", opt.b);
|
342
|
+
fprintf(fp_help, " -O INT[,INT] gap open penalty [%d,%d]\n", opt.q, opt.q2);
|
343
|
+
fprintf(fp_help, " -E INT[,INT] gap extension penalty; a k-long gap costs min{O1+k*E1,O2+k*E2} [%d,%d]\n", opt.e, opt.e2);
|
344
|
+
fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
|
345
|
+
fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
|
346
|
+
fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
|
347
|
+
fprintf(fp_help, " Input/Output:\n");
|
348
|
+
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
|
349
|
+
fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
|
350
|
+
fprintf(fp_help, " -L write CIGAR with >65535 ops at the CG tag\n");
|
351
|
+
fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n");
|
352
|
+
fprintf(fp_help, " -c output CIGAR in PAF\n");
|
353
|
+
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
|
354
|
+
fprintf(fp_help, " --MD output the MD tag\n");
|
355
|
+
fprintf(fp_help, " --eqx write =/X CIGAR operators\n");
|
356
|
+
fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n");
|
357
|
+
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
|
358
|
+
fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
|
359
|
+
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
|
360
|
+
fprintf(fp_help, " --version show version number\n");
|
361
|
+
fprintf(fp_help, " Preset:\n");
|
362
|
+
fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
|
363
|
+
fprintf(fp_help, " - map-pb/map-ont - PacBio CLR/Nanopore vs reference mapping\n");
|
364
|
+
fprintf(fp_help, " - map-hifi - PacBio HiFi reads vs reference mapping\n");
|
365
|
+
fprintf(fp_help, " - ava-pb/ava-ont - PacBio/Nanopore read overlap\n");
|
366
|
+
fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
|
367
|
+
fprintf(fp_help, " - splice/splice:hq - long-read/Pacbio-CCS spliced alignment\n");
|
368
|
+
fprintf(fp_help, " - sr - genomic short-read mapping\n");
|
369
|
+
fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of these and other advanced command-line options.\n");
|
370
|
+
return fp_help == stdout? 0 : 1;
|
371
|
+
}
|
372
|
+
|
373
|
+
if ((opt.flag & MM_F_SR) && argc - o.ind > 3) {
|
374
|
+
fprintf(stderr, "[ERROR] incorrect input: in the sr mode, please specify no more than two query files.\n");
|
375
|
+
return 1;
|
376
|
+
}
|
377
|
+
idx_rdr = mm_idx_reader_open(argv[o.ind], &ipt, fnw);
|
378
|
+
if (idx_rdr == 0) {
|
379
|
+
fprintf(stderr, "[ERROR] failed to open file '%s': %s\n", argv[o.ind], strerror(errno));
|
380
|
+
return 1;
|
381
|
+
}
|
382
|
+
if (!idx_rdr->is_idx && fnw == 0 && argc - o.ind < 2) {
|
383
|
+
fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n");
|
384
|
+
mm_idx_reader_close(idx_rdr);
|
385
|
+
return 1;
|
386
|
+
}
|
387
|
+
if (opt.best_n == 0 && (opt.flag&MM_F_CIGAR) && mm_verbose >= 2)
|
388
|
+
fprintf(stderr, "[WARNING]\033[1;31m `-N 0' reduces alignment accuracy. Please use --secondary=no to suppress secondary alignments.\033[0m\n");
|
389
|
+
while ((mi = mm_idx_reader_read(idx_rdr, n_threads)) != 0) {
|
390
|
+
int ret;
|
391
|
+
if ((opt.flag & MM_F_CIGAR) && (mi->flag & MM_I_NO_SEQ)) {
|
392
|
+
fprintf(stderr, "[ERROR] the prebuilt index doesn't contain sequences.\n");
|
393
|
+
mm_idx_destroy(mi);
|
394
|
+
mm_idx_reader_close(idx_rdr);
|
395
|
+
return 1;
|
396
|
+
}
|
397
|
+
if ((opt.flag & MM_F_OUT_SAM) && idx_rdr->n_parts == 1) {
|
398
|
+
if (mm_idx_reader_eof(idx_rdr)) {
|
399
|
+
if (opt.split_prefix == 0)
|
400
|
+
ret = mm_write_sam_hdr(mi, rg, MM_VERSION, argc, argv);
|
401
|
+
else
|
402
|
+
ret = mm_write_sam_hdr(0, rg, MM_VERSION, argc, argv);
|
403
|
+
} else {
|
404
|
+
ret = mm_write_sam_hdr(0, rg, MM_VERSION, argc, argv);
|
405
|
+
if (opt.split_prefix == 0 && mm_verbose >= 2)
|
406
|
+
fprintf(stderr, "[WARNING]\033[1;31m For a multi-part index, no @SQ lines will be outputted. Please use --split-prefix.\033[0m\n");
|
407
|
+
}
|
408
|
+
if (ret != 0) {
|
409
|
+
mm_idx_destroy(mi);
|
410
|
+
mm_idx_reader_close(idx_rdr);
|
411
|
+
return 1;
|
412
|
+
}
|
413
|
+
}
|
414
|
+
if (mm_verbose >= 3)
|
415
|
+
fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
|
416
|
+
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
|
417
|
+
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
|
418
|
+
if (mm_verbose >= 3) mm_idx_stat(mi);
|
419
|
+
if (junc_bed) mm_idx_bed_read(mi, junc_bed, 1);
|
420
|
+
if (alt_list) mm_idx_alt_read(mi, alt_list);
|
421
|
+
if (argc - (o.ind + 1) == 0) {
|
422
|
+
mm_idx_destroy(mi);
|
423
|
+
continue; // no query files
|
424
|
+
}
|
425
|
+
ret = 0;
|
426
|
+
if (!(opt.flag & MM_F_FRAG_MODE)) {
|
427
|
+
for (i = o.ind + 1; i < argc; ++i) {
|
428
|
+
ret = mm_map_file(mi, argv[i], &opt, n_threads);
|
429
|
+
if (ret < 0) break;
|
430
|
+
}
|
431
|
+
} else {
|
432
|
+
ret = mm_map_file_frag(mi, argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_threads);
|
433
|
+
}
|
434
|
+
mm_idx_destroy(mi);
|
435
|
+
if (ret < 0) {
|
436
|
+
fprintf(stderr, "ERROR: failed to map the query file\n");
|
437
|
+
exit(EXIT_FAILURE);
|
438
|
+
}
|
439
|
+
}
|
440
|
+
n_parts = idx_rdr->n_parts;
|
441
|
+
mm_idx_reader_close(idx_rdr);
|
442
|
+
|
443
|
+
if (opt.split_prefix)
|
444
|
+
mm_split_merge(argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_parts);
|
445
|
+
|
446
|
+
if (fflush(stdout) == EOF) {
|
447
|
+
perror("[ERROR] failed to write the results");
|
448
|
+
exit(EXIT_FAILURE);
|
449
|
+
}
|
450
|
+
|
451
|
+
if (mm_verbose >= 3) {
|
452
|
+
fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
|
453
|
+
fprintf(stderr, "[M::%s] CMD:", __func__);
|
454
|
+
for (i = 0; i < argc; ++i)
|
455
|
+
fprintf(stderr, " %s", argv[i]);
|
456
|
+
fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec; Peak RSS: %.3f GB\n", __func__, realtime() - mm_realtime0, cputime(), peakrss() / 1024.0 / 1024.0 / 1024.0);
|
457
|
+
}
|
458
|
+
return 0;
|
459
|
+
}
|