minimap2 0.2.22.0 → 0.2.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -76
  3. data/ext/Rakefile +55 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +821 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +369 -0
  41. data/ext/minimap2/main.c +459 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +410 -0
  44. data/ext/minimap2/minimap2.1 +725 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +132 -0
  50. data/ext/minimap2/options.c +234 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/lib/minimap2/aligner.rb +4 -4
  93. data/lib/minimap2/alignment.rb +11 -11
  94. data/lib/minimap2/ffi/constants.rb +20 -16
  95. data/lib/minimap2/ffi/functions.rb +5 -0
  96. data/lib/minimap2/ffi.rb +4 -5
  97. data/lib/minimap2/version.rb +2 -2
  98. data/lib/minimap2.rb +51 -15
  99. metadata +97 -79
  100. data/lib/minimap2/ffi_helper.rb +0 -53
  101. data/vendor/libminimap2.so +0 -0
@@ -0,0 +1,459 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <errno.h>
5
+ #include "bseq.h"
6
+ #include "minimap.h"
7
+ #include "mmpriv.h"
8
+ #include "ketopt.h"
9
+
10
+ #define MM_VERSION "2.24-r1122"
11
+
12
+ #ifdef __linux__
13
+ #include <sys/resource.h>
14
+ #include <sys/time.h>
15
+ void liftrlimit()
16
+ {
17
+ struct rlimit r;
18
+ getrlimit(RLIMIT_AS, &r);
19
+ r.rlim_cur = r.rlim_max;
20
+ setrlimit(RLIMIT_AS, &r);
21
+ }
22
+ #else
23
+ void liftrlimit() {}
24
+ #endif
25
+
26
+ static ko_longopt_t long_options[] = {
27
+ { "bucket-bits", ko_required_argument, 300 },
28
+ { "mb-size", ko_required_argument, 'K' },
29
+ { "seed", ko_required_argument, 302 },
30
+ { "no-kalloc", ko_no_argument, 303 },
31
+ { "print-qname", ko_no_argument, 304 },
32
+ { "no-self", ko_no_argument, 'D' },
33
+ { "print-seeds", ko_no_argument, 306 },
34
+ { "max-chain-skip", ko_required_argument, 307 },
35
+ { "min-dp-len", ko_required_argument, 308 },
36
+ { "print-aln-seq", ko_no_argument, 309 },
37
+ { "splice", ko_no_argument, 310 },
38
+ { "cost-non-gt-ag", ko_required_argument, 'C' },
39
+ { "no-long-join", ko_no_argument, 312 },
40
+ { "sr", ko_no_argument, 313 },
41
+ { "frag", ko_required_argument, 314 },
42
+ { "secondary", ko_required_argument, 315 },
43
+ { "cs", ko_optional_argument, 316 },
44
+ { "end-bonus", ko_required_argument, 317 },
45
+ { "no-pairing", ko_no_argument, 318 },
46
+ { "splice-flank", ko_required_argument, 319 },
47
+ { "idx-no-seq", ko_no_argument, 320 },
48
+ { "end-seed-pen", ko_required_argument, 321 },
49
+ { "for-only", ko_no_argument, 322 },
50
+ { "rev-only", ko_no_argument, 323 },
51
+ { "heap-sort", ko_required_argument, 324 },
52
+ { "all-chain", ko_no_argument, 'P' },
53
+ { "dual", ko_required_argument, 326 },
54
+ { "max-clip-ratio", ko_required_argument, 327 },
55
+ { "min-occ-floor", ko_required_argument, 328 },
56
+ { "MD", ko_no_argument, 329 },
57
+ { "lj-min-ratio", ko_required_argument, 330 },
58
+ { "score-N", ko_required_argument, 331 },
59
+ { "eqx", ko_no_argument, 332 },
60
+ { "paf-no-hit", ko_no_argument, 333 },
61
+ { "split-prefix", ko_required_argument, 334 },
62
+ { "no-end-flt", ko_no_argument, 335 },
63
+ { "hard-mask-level",ko_no_argument, 336 },
64
+ { "cap-sw-mem", ko_required_argument, 337 },
65
+ { "max-qlen", ko_required_argument, 338 },
66
+ { "max-chain-iter", ko_required_argument, 339 },
67
+ { "junc-bed", ko_required_argument, 340 },
68
+ { "junc-bonus", ko_required_argument, 341 },
69
+ { "sam-hit-only", ko_no_argument, 342 },
70
+ { "chain-gap-scale",ko_required_argument, 343 },
71
+ { "alt", ko_required_argument, 344 },
72
+ { "alt-drop", ko_required_argument, 345 },
73
+ { "mask-len", ko_required_argument, 346 },
74
+ { "rmq", ko_optional_argument, 347 },
75
+ { "qstrand", ko_no_argument, 348 },
76
+ { "cap-kalloc", ko_required_argument, 349 },
77
+ { "q-occ-frac", ko_required_argument, 350 },
78
+ { "chain-skip-scale",ko_required_argument,351 },
79
+ { "print-chains", ko_no_argument, 352 },
80
+ { "no-hash-name", ko_no_argument, 353 },
81
+ { "help", ko_no_argument, 'h' },
82
+ { "max-intron-len", ko_required_argument, 'G' },
83
+ { "version", ko_no_argument, 'V' },
84
+ { "min-count", ko_required_argument, 'n' },
85
+ { "min-chain-score",ko_required_argument, 'm' },
86
+ { "mask-level", ko_required_argument, 'M' },
87
+ { "min-dp-score", ko_required_argument, 's' },
88
+ { "sam", ko_no_argument, 'a' },
89
+ { 0, 0, 0 }
90
+ };
91
+
92
+ static inline int64_t mm_parse_num2(const char *str, char **q)
93
+ {
94
+ double x;
95
+ char *p;
96
+ x = strtod(str, &p);
97
+ if (*p == 'G' || *p == 'g') x *= 1e9, ++p;
98
+ else if (*p == 'M' || *p == 'm') x *= 1e6, ++p;
99
+ else if (*p == 'K' || *p == 'k') x *= 1e3, ++p;
100
+ if (q) *q = p;
101
+ return (int64_t)(x + .499);
102
+ }
103
+
104
+ static inline int64_t mm_parse_num(const char *str)
105
+ {
106
+ return mm_parse_num2(str, 0);
107
+ }
108
+
109
+ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const char *arg, int yes_to_set)
110
+ {
111
+ if (yes_to_set) {
112
+ if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
113
+ else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag &= ~flag;
114
+ else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
115
+ } else {
116
+ if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag &= ~flag;
117
+ else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag |= flag;
118
+ else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
119
+ }
120
+ }
121
+
122
+ int main(int argc, char *argv[])
123
+ {
124
+ const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:";
125
+ ketopt_t o = KETOPT_INIT;
126
+ mm_mapopt_t opt;
127
+ mm_idxopt_t ipt;
128
+ int i, c, n_threads = 3, n_parts, old_best_n = -1;
129
+ char *fnw = 0, *rg = 0, *junc_bed = 0, *s, *alt_list = 0;
130
+ FILE *fp_help = stderr;
131
+ mm_idx_reader_t *idx_rdr;
132
+ mm_idx_t *mi;
133
+
134
+ mm_verbose = 3;
135
+ liftrlimit();
136
+ mm_realtime0 = realtime();
137
+ mm_set_opt(0, &ipt, &opt);
138
+
139
+ while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) { // test command line options and apply option -x/preset first
140
+ if (c == 'x') {
141
+ if (mm_set_opt(o.arg, &ipt, &opt) < 0) {
142
+ fprintf(stderr, "[ERROR] unknown preset '%s'\n", o.arg);
143
+ return 1;
144
+ }
145
+ } else if (c == ':') {
146
+ fprintf(stderr, "[ERROR] missing option argument\n");
147
+ return 1;
148
+ } else if (c == '?') {
149
+ fprintf(stderr, "[ERROR] unknown option in \"%s\"\n", argv[o.i - 1]);
150
+ return 1;
151
+ }
152
+ }
153
+ o = KETOPT_INIT;
154
+
155
+ while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) {
156
+ if (c == 'w') ipt.w = atoi(o.arg);
157
+ else if (c == 'k') ipt.k = atoi(o.arg);
158
+ else if (c == 'H') ipt.flag |= MM_I_HPC;
159
+ else if (c == 'd') fnw = o.arg; // the above are indexing related options, except -I
160
+ else if (c == 't') n_threads = atoi(o.arg);
161
+ else if (c == 'v') mm_verbose = atoi(o.arg);
162
+ else if (c == 'g') opt.max_gap = (int)mm_parse_num(o.arg);
163
+ else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(o.arg));
164
+ else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(o.arg);
165
+ else if (c == 'N') old_best_n = opt.best_n, opt.best_n = atoi(o.arg);
166
+ else if (c == 'p') opt.pri_ratio = atof(o.arg);
167
+ else if (c == 'M') opt.mask_level = atof(o.arg);
168
+ else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR;
169
+ else if (c == 'D') opt.flag |= MM_F_NO_DIAG;
170
+ else if (c == 'P') opt.flag |= MM_F_ALL_CHAINS;
171
+ else if (c == 'X') opt.flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN; // -D -P --no-long-join --dual=no
172
+ else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
173
+ else if (c == 'Q') opt.flag |= MM_F_NO_QUAL;
174
+ else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP;
175
+ else if (c == 'L') opt.flag |= MM_F_LONG_CIGAR;
176
+ else if (c == 'y') opt.flag |= MM_F_COPY_COMMENT;
177
+ else if (c == 'T') opt.sdust_thres = atoi(o.arg);
178
+ else if (c == 'n') opt.min_cnt = atoi(o.arg);
179
+ else if (c == 'm') opt.min_chain_score = atoi(o.arg);
180
+ else if (c == 'A') opt.a = atoi(o.arg);
181
+ else if (c == 'B') opt.b = atoi(o.arg);
182
+ else if (c == 's') opt.min_dp_max = atoi(o.arg);
183
+ else if (c == 'C') opt.noncan = atoi(o.arg);
184
+ else if (c == 'I') ipt.batch_size = mm_parse_num(o.arg);
185
+ else if (c == 'K') opt.mini_batch_size = mm_parse_num(o.arg);
186
+ else if (c == 'e') opt.occ_dist = mm_parse_num(o.arg);
187
+ else if (c == 'R') rg = o.arg;
188
+ else if (c == 'h') fp_help = stdout;
189
+ else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
190
+ else if (c == 'o') {
191
+ if (strcmp(o.arg, "-") != 0) {
192
+ if (freopen(o.arg, "wb", stdout) == NULL) {
193
+ fprintf(stderr, "[ERROR]\033[1;31m failed to write the output to file '%s'\033[0m: %s\n", o.arg, strerror(errno));
194
+ exit(1);
195
+ }
196
+ }
197
+ }
198
+ else if (c == 300) ipt.bucket_bits = atoi(o.arg); // --bucket-bits
199
+ else if (c == 302) opt.seed = atoi(o.arg); // --seed
200
+ else if (c == 303) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
201
+ else if (c == 304) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
202
+ else if (c == 306) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
203
+ else if (c == 307) opt.max_chain_skip = atoi(o.arg); // --max-chain-skip
204
+ else if (c == 339) opt.max_chain_iter = atoi(o.arg); // --max-chain-iter
205
+ else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len
206
+ else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
207
+ else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
208
+ else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
209
+ else if (c == 313) opt.flag |= MM_F_SR; // --sr
210
+ else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
211
+ else if (c == 318) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
212
+ else if (c == 320) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
213
+ else if (c == 321) opt.anchor_ext_shift = atoi(o.arg); // --end-seed-pen
214
+ else if (c == 322) opt.flag |= MM_F_FOR_ONLY; // --for-only
215
+ else if (c == 323) opt.flag |= MM_F_REV_ONLY; // --rev-only
216
+ else if (c == 327) opt.max_clip_ratio = atof(o.arg); // --max-clip-ratio
217
+ else if (c == 328) opt.min_mid_occ = atoi(o.arg); // --min-occ-floor
218
+ else if (c == 329) opt.flag |= MM_F_OUT_MD; // --MD
219
+ else if (c == 331) opt.sc_ambi = atoi(o.arg); // --score-N
220
+ else if (c == 332) opt.flag |= MM_F_EQX; // --eqx
221
+ else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
222
+ else if (c == 334) opt.split_prefix = o.arg; // --split-prefix
223
+ else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
224
+ else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
225
+ else if (c == 337) opt.max_sw_mat = mm_parse_num(o.arg); // --cap-sw-mat
226
+ else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
227
+ else if (c == 340) junc_bed = o.arg; // --junc-bed
228
+ else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus
229
+ else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
230
+ else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale
231
+ else if (c == 351) opt.chain_skip_scale = atof(o.arg); // --chain-skip-scale
232
+ else if (c == 344) alt_list = o.arg; // --alt
233
+ else if (c == 345) opt.alt_drop = atof(o.arg); // --alt-drop
234
+ else if (c == 346) opt.mask_len = mm_parse_num(o.arg); // --mask-len
235
+ else if (c == 348) opt.flag |= MM_F_QSTRAND | MM_F_NO_INV; // --qstrand
236
+ else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc
237
+ else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
238
+ else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
239
+ else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
240
+ else if (c == 330) {
241
+ fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
242
+ } else if (c == 314) { // --frag
243
+ yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
244
+ } else if (c == 315) { // --secondary
245
+ yes_or_no(&opt, MM_F_NO_PRINT_2ND, o.longidx, o.arg, 0);
246
+ } else if (c == 316) { // --cs
247
+ opt.flag |= MM_F_OUT_CS | MM_F_CIGAR;
248
+ if (o.arg == 0 || strcmp(o.arg, "short") == 0) {
249
+ opt.flag &= ~MM_F_OUT_CS_LONG;
250
+ } else if (strcmp(o.arg, "long") == 0) {
251
+ opt.flag |= MM_F_OUT_CS_LONG;
252
+ } else if (strcmp(o.arg, "none") == 0) {
253
+ opt.flag &= ~MM_F_OUT_CS;
254
+ } else if (mm_verbose >= 2) {
255
+ fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n");
256
+ }
257
+ } else if (c == 319) { // --splice-flank
258
+ yes_or_no(&opt, MM_F_SPLICE_FLANK, o.longidx, o.arg, 1);
259
+ } else if (c == 324) { // --heap-sort
260
+ yes_or_no(&opt, MM_F_HEAP_SORT, o.longidx, o.arg, 1);
261
+ } else if (c == 326) { // --dual
262
+ yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
263
+ } else if (c == 347) { // --rmq
264
+ yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
265
+ } else if (c == 'S') {
266
+ opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
267
+ if (mm_verbose >= 2)
268
+ fprintf(stderr, "[WARNING]\033[1;31m option -S is deprecated and may be removed in future. Please use --cs=long instead.\033[0m\n");
269
+ } else if (c == 'V') {
270
+ puts(MM_VERSION);
271
+ return 0;
272
+ } else if (c == 'r') {
273
+ opt.bw = (int)mm_parse_num2(o.arg, &s);
274
+ if (*s == ',') opt.bw_long = (int)mm_parse_num2(s + 1, &s);
275
+ } else if (c == 'U') {
276
+ opt.min_mid_occ = strtol(o.arg, &s, 10);
277
+ if (*s == ',') opt.max_mid_occ = strtol(s + 1, &s, 10);
278
+ } else if (c == 'f') {
279
+ double x;
280
+ char *p;
281
+ x = strtod(o.arg, &p);
282
+ if (x < 1.0) opt.mid_occ_frac = x, opt.mid_occ = 0;
283
+ else opt.mid_occ = (int)(x + .499);
284
+ if (*p == ',') opt.max_occ = (int)(strtod(p+1, &p) + .499);
285
+ } else if (c == 'u') {
286
+ if (*o.arg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; // both strands
287
+ else if (*o.arg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; // match GT-AG
288
+ else if (*o.arg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG)
289
+ else if (*o.arg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal
290
+ else {
291
+ fprintf(stderr, "[ERROR]\033[1;31m unrecognized cDNA direction\033[0m\n");
292
+ return 1;
293
+ }
294
+ } else if (c == 'z') {
295
+ opt.zdrop = opt.zdrop_inv = strtol(o.arg, &s, 10);
296
+ if (*s == ',') opt.zdrop_inv = strtol(s + 1, &s, 10);
297
+ } else if (c == 'O') {
298
+ opt.q = opt.q2 = strtol(o.arg, &s, 10);
299
+ if (*s == ',') opt.q2 = strtol(s + 1, &s, 10);
300
+ } else if (c == 'E') {
301
+ opt.e = opt.e2 = strtol(o.arg, &s, 10);
302
+ if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
303
+ }
304
+ }
305
+ if ((opt.flag & MM_F_SPLICE) && (opt.flag & MM_F_FRAG_MODE)) {
306
+ fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
307
+ return 1;
308
+ }
309
+ if (!fnw && !(opt.flag&MM_F_CIGAR))
310
+ ipt.flag |= MM_I_NO_SEQ;
311
+ if (mm_check_opt(&ipt, &opt) < 0)
312
+ return 1;
313
+ if (opt.best_n == 0) {
314
+ fprintf(stderr, "[WARNING]\033[1;31m changed '-N 0' to '-N %d --secondary=no'.\033[0m\n", old_best_n);
315
+ opt.best_n = old_best_n, opt.flag |= MM_F_NO_PRINT_2ND;
316
+ }
317
+
318
+ if (argc == o.ind || fp_help == stdout) {
319
+ fprintf(fp_help, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n");
320
+ fprintf(fp_help, "Options:\n");
321
+ fprintf(fp_help, " Indexing:\n");
322
+ fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
323
+ fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
324
+ fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
325
+ fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
326
+ fprintf(fp_help, " -d FILE dump index to FILE []\n");
327
+ fprintf(fp_help, " Mapping:\n");
328
+ fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
329
+ fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
330
+ fprintf(fp_help, " -G NUM max intron length (effective with -xsplice; changing -r) [200k]\n");
331
+ fprintf(fp_help, " -F NUM max fragment length (effective with -xsr or in the fragment mode) [800]\n");
332
+ fprintf(fp_help, " -r NUM[,NUM] chaining/alignment bandwidth and long-join bandwidth [%d,%d]\n", opt.bw, opt.bw_long);
333
+ fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
334
+ fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
335
+ // fprintf(fp_help, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
336
+ fprintf(fp_help, " -X skip self and dual mappings (for the all-vs-all mode)\n");
337
+ fprintf(fp_help, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
338
+ fprintf(fp_help, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
339
+ fprintf(fp_help, " Alignment:\n");
340
+ fprintf(fp_help, " -A INT matching score [%d]\n", opt.a);
341
+ fprintf(fp_help, " -B INT mismatch penalty (larger value for lower divergence) [%d]\n", opt.b);
342
+ fprintf(fp_help, " -O INT[,INT] gap open penalty [%d,%d]\n", opt.q, opt.q2);
343
+ fprintf(fp_help, " -E INT[,INT] gap extension penalty; a k-long gap costs min{O1+k*E1,O2+k*E2} [%d,%d]\n", opt.e, opt.e2);
344
+ fprintf(fp_help, " -z INT[,INT] Z-drop score and inversion Z-drop score [%d,%d]\n", opt.zdrop, opt.zdrop_inv);
345
+ fprintf(fp_help, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
346
+ fprintf(fp_help, " -u CHAR how to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG [n]\n");
347
+ fprintf(fp_help, " Input/Output:\n");
348
+ fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
349
+ fprintf(fp_help, " -o FILE output alignments to FILE [stdout]\n");
350
+ fprintf(fp_help, " -L write CIGAR with >65535 ops at the CG tag\n");
351
+ fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n");
352
+ fprintf(fp_help, " -c output CIGAR in PAF\n");
353
+ fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
354
+ fprintf(fp_help, " --MD output the MD tag\n");
355
+ fprintf(fp_help, " --eqx write =/X CIGAR operators\n");
356
+ fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n");
357
+ fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
358
+ fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
359
+ // fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
360
+ fprintf(fp_help, " --version show version number\n");
361
+ fprintf(fp_help, " Preset:\n");
362
+ fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
363
+ fprintf(fp_help, " - map-pb/map-ont - PacBio CLR/Nanopore vs reference mapping\n");
364
+ fprintf(fp_help, " - map-hifi - PacBio HiFi reads vs reference mapping\n");
365
+ fprintf(fp_help, " - ava-pb/ava-ont - PacBio/Nanopore read overlap\n");
366
+ fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
367
+ fprintf(fp_help, " - splice/splice:hq - long-read/Pacbio-CCS spliced alignment\n");
368
+ fprintf(fp_help, " - sr - genomic short-read mapping\n");
369
+ fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of these and other advanced command-line options.\n");
370
+ return fp_help == stdout? 0 : 1;
371
+ }
372
+
373
+ if ((opt.flag & MM_F_SR) && argc - o.ind > 3) {
374
+ fprintf(stderr, "[ERROR] incorrect input: in the sr mode, please specify no more than two query files.\n");
375
+ return 1;
376
+ }
377
+ idx_rdr = mm_idx_reader_open(argv[o.ind], &ipt, fnw);
378
+ if (idx_rdr == 0) {
379
+ fprintf(stderr, "[ERROR] failed to open file '%s': %s\n", argv[o.ind], strerror(errno));
380
+ return 1;
381
+ }
382
+ if (!idx_rdr->is_idx && fnw == 0 && argc - o.ind < 2) {
383
+ fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n");
384
+ mm_idx_reader_close(idx_rdr);
385
+ return 1;
386
+ }
387
+ if (opt.best_n == 0 && (opt.flag&MM_F_CIGAR) && mm_verbose >= 2)
388
+ fprintf(stderr, "[WARNING]\033[1;31m `-N 0' reduces alignment accuracy. Please use --secondary=no to suppress secondary alignments.\033[0m\n");
389
+ while ((mi = mm_idx_reader_read(idx_rdr, n_threads)) != 0) {
390
+ int ret;
391
+ if ((opt.flag & MM_F_CIGAR) && (mi->flag & MM_I_NO_SEQ)) {
392
+ fprintf(stderr, "[ERROR] the prebuilt index doesn't contain sequences.\n");
393
+ mm_idx_destroy(mi);
394
+ mm_idx_reader_close(idx_rdr);
395
+ return 1;
396
+ }
397
+ if ((opt.flag & MM_F_OUT_SAM) && idx_rdr->n_parts == 1) {
398
+ if (mm_idx_reader_eof(idx_rdr)) {
399
+ if (opt.split_prefix == 0)
400
+ ret = mm_write_sam_hdr(mi, rg, MM_VERSION, argc, argv);
401
+ else
402
+ ret = mm_write_sam_hdr(0, rg, MM_VERSION, argc, argv);
403
+ } else {
404
+ ret = mm_write_sam_hdr(0, rg, MM_VERSION, argc, argv);
405
+ if (opt.split_prefix == 0 && mm_verbose >= 2)
406
+ fprintf(stderr, "[WARNING]\033[1;31m For a multi-part index, no @SQ lines will be outputted. Please use --split-prefix.\033[0m\n");
407
+ }
408
+ if (ret != 0) {
409
+ mm_idx_destroy(mi);
410
+ mm_idx_reader_close(idx_rdr);
411
+ return 1;
412
+ }
413
+ }
414
+ if (mm_verbose >= 3)
415
+ fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
416
+ __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
417
+ if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
418
+ if (mm_verbose >= 3) mm_idx_stat(mi);
419
+ if (junc_bed) mm_idx_bed_read(mi, junc_bed, 1);
420
+ if (alt_list) mm_idx_alt_read(mi, alt_list);
421
+ if (argc - (o.ind + 1) == 0) {
422
+ mm_idx_destroy(mi);
423
+ continue; // no query files
424
+ }
425
+ ret = 0;
426
+ if (!(opt.flag & MM_F_FRAG_MODE)) {
427
+ for (i = o.ind + 1; i < argc; ++i) {
428
+ ret = mm_map_file(mi, argv[i], &opt, n_threads);
429
+ if (ret < 0) break;
430
+ }
431
+ } else {
432
+ ret = mm_map_file_frag(mi, argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_threads);
433
+ }
434
+ mm_idx_destroy(mi);
435
+ if (ret < 0) {
436
+ fprintf(stderr, "ERROR: failed to map the query file\n");
437
+ exit(EXIT_FAILURE);
438
+ }
439
+ }
440
+ n_parts = idx_rdr->n_parts;
441
+ mm_idx_reader_close(idx_rdr);
442
+
443
+ if (opt.split_prefix)
444
+ mm_split_merge(argc - (o.ind + 1), (const char**)&argv[o.ind + 1], &opt, n_parts);
445
+
446
+ if (fflush(stdout) == EOF) {
447
+ perror("[ERROR] failed to write the results");
448
+ exit(EXIT_FAILURE);
449
+ }
450
+
451
+ if (mm_verbose >= 3) {
452
+ fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
453
+ fprintf(stderr, "[M::%s] CMD:", __func__);
454
+ for (i = 0; i < argc; ++i)
455
+ fprintf(stderr, " %s", argv[i]);
456
+ fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec; Peak RSS: %.3f GB\n", __func__, realtime() - mm_realtime0, cputime(), peakrss() / 1024.0 / 1024.0 / 1024.0);
457
+ }
458
+ return 0;
459
+ }