minimap2 0.2.22.0 → 0.2.24.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -76
  3. data/ext/Rakefile +55 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +821 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +369 -0
  41. data/ext/minimap2/main.c +459 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +410 -0
  44. data/ext/minimap2/minimap2.1 +725 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +132 -0
  50. data/ext/minimap2/options.c +234 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/lib/minimap2/aligner.rb +4 -4
  93. data/lib/minimap2/alignment.rb +11 -11
  94. data/lib/minimap2/ffi/constants.rb +20 -16
  95. data/lib/minimap2/ffi/functions.rb +5 -0
  96. data/lib/minimap2/ffi.rb +4 -5
  97. data/lib/minimap2/version.rb +2 -2
  98. data/lib/minimap2.rb +51 -15
  99. metadata +97 -79
  100. data/lib/minimap2/ffi_helper.rb +0 -53
  101. data/vendor/libminimap2.so +0 -0
@@ -0,0 +1,162 @@
1
+ #include <stdlib.h>
2
+ #include "mmpriv.h"
3
+
4
+ int mm_verbose = 1;
5
+ int mm_dbg_flag = 0;
6
+ double mm_realtime0;
7
+
8
+ #if defined(WIN32) || defined(_WIN32)
9
+ #include <windows.h>
10
+
11
+ struct timezone
12
+ {
13
+ __int32 tz_minuteswest; /* minutes W of Greenwich */
14
+ int tz_dsttime; /* type of dst correction */
15
+ };
16
+
17
+ /*
18
+ * gettimeofday.c
19
+ * Win32 gettimeofday() replacement
20
+ * taken from PostgreSQL, according to
21
+ * https://stackoverflow.com/questions/1676036/what-should-i-use-to-replace-gettimeofday-on-windows
22
+ *
23
+ * src/port/gettimeofday.c
24
+ *
25
+ * Copyright (c) 2003 SRA, Inc.
26
+ * Copyright (c) 2003 SKC, Inc.
27
+ *
28
+ * Permission to use, copy, modify, and distribute this software and
29
+ * its documentation for any purpose, without fee, and without a
30
+ * written agreement is hereby granted, provided that the above
31
+ * copyright notice and this paragraph and the following two
32
+ * paragraphs appear in all copies.
33
+ *
34
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
35
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
36
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
37
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
38
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
39
+ *
40
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
41
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
42
+ * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
43
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
44
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
45
+ */
46
+
47
+ /* FILETIME of Jan 1 1970 00:00:00. */
48
+ static const unsigned __int64 epoch = ((unsigned __int64) 116444736000000000ULL);
49
+
50
+ /*
51
+ * timezone information is stored outside the kernel so tzp isn't used anymore.
52
+ *
53
+ * Note: this function is not for Win32 high precision timing purpose. See
54
+ * elapsed_time().
55
+ */
56
+ int gettimeofday(struct timeval * tp, struct timezone *tzp)
57
+ {
58
+ FILETIME file_time;
59
+ SYSTEMTIME system_time;
60
+ ULARGE_INTEGER ularge;
61
+
62
+ GetSystemTime(&system_time);
63
+ SystemTimeToFileTime(&system_time, &file_time);
64
+ ularge.LowPart = file_time.dwLowDateTime;
65
+ ularge.HighPart = file_time.dwHighDateTime;
66
+
67
+ tp->tv_sec = (long) ((ularge.QuadPart - epoch) / 10000000L);
68
+ tp->tv_usec = (long) (system_time.wMilliseconds * 1000);
69
+
70
+ return 0;
71
+ }
72
+
73
+ // taken from https://stackoverflow.com/questions/5272470/c-get-cpu-usage-on-linux-and-windows
74
+ double cputime()
75
+ {
76
+ HANDLE hProcess = GetCurrentProcess();
77
+ FILETIME ftCreation, ftExit, ftKernel, ftUser;
78
+ SYSTEMTIME stKernel;
79
+ SYSTEMTIME stUser;
80
+
81
+ GetProcessTimes(hProcess, &ftCreation, &ftExit, &ftKernel, &ftUser);
82
+ FileTimeToSystemTime(&ftKernel, &stKernel);
83
+ FileTimeToSystemTime(&ftUser, &stUser);
84
+
85
+ double kernelModeTime = ((stKernel.wHour * 60.) + stKernel.wMinute * 60.) + stKernel.wSecond * 1. + stKernel.wMilliseconds / 1000.;
86
+ double userModeTime = ((stUser.wHour * 60.) + stUser.wMinute * 60.) + stUser.wSecond * 1. + stUser.wMilliseconds / 1000.;
87
+
88
+ return kernelModeTime + userModeTime;
89
+ }
90
+
91
+ long peakrss(void) { return 0; }
92
+ #else
93
+ #include <sys/resource.h>
94
+ #include <sys/time.h>
95
+
96
+ double cputime(void)
97
+ {
98
+ struct rusage r;
99
+ getrusage(RUSAGE_SELF, &r);
100
+ return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec);
101
+ }
102
+
103
+ long peakrss(void)
104
+ {
105
+ struct rusage r;
106
+ getrusage(RUSAGE_SELF, &r);
107
+ #ifdef __linux__
108
+ return r.ru_maxrss * 1024;
109
+ #else
110
+ return r.ru_maxrss;
111
+ #endif
112
+ }
113
+
114
+ #endif /* WIN32 || _WIN32 */
115
+
116
+ double realtime(void)
117
+ {
118
+ struct timeval tp;
119
+ gettimeofday(&tp, NULL);
120
+ return tp.tv_sec + tp.tv_usec * 1e-6;
121
+ }
122
+
123
+ void mm_err_puts(const char *str)
124
+ {
125
+ int ret;
126
+ ret = puts(str);
127
+ if (ret == EOF) {
128
+ perror("[ERROR] failed to write the results");
129
+ exit(EXIT_FAILURE);
130
+ }
131
+ }
132
+
133
+ void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp)
134
+ {
135
+ int ret;
136
+ ret = fwrite(p, size, nitems, fp);
137
+ if (ret == EOF) {
138
+ perror("[ERROR] failed to write data");
139
+ exit(EXIT_FAILURE);
140
+ }
141
+ }
142
+
143
+ void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp)
144
+ {
145
+ int ret;
146
+ ret = fread(p, size, nitems, fp);
147
+ if (ret == EOF) {
148
+ perror("[ERROR] failed to read data");
149
+ exit(EXIT_FAILURE);
150
+ }
151
+ }
152
+
153
+ #include "ksort.h"
154
+
155
+ #define sort_key_128x(a) ((a).x)
156
+ KRADIX_SORT_INIT(128x, mm128_t, sort_key_128x, 8)
157
+
158
+ #define sort_key_64(x) (x)
159
+ KRADIX_SORT_INIT(64, uint64_t, sort_key_64, 8)
160
+
161
+ KSORT_INIT_GENERIC(uint32_t)
162
+ KSORT_INIT_GENERIC(uint64_t)
@@ -0,0 +1,132 @@
1
+ #ifndef MMPRIV2_H
2
+ #define MMPRIV2_H
3
+
4
+ #include <assert.h>
5
+ #include "minimap.h"
6
+ #include "bseq.h"
7
+ #include "kseq.h"
8
+
9
+ #define MM_PARENT_UNSET (-1)
10
+ #define MM_PARENT_TMP_PRI (-2)
11
+
12
+ #define MM_DBG_NO_KALLOC 0x1
13
+ #define MM_DBG_PRINT_QNAME 0x2
14
+ #define MM_DBG_PRINT_SEED 0x4
15
+ #define MM_DBG_PRINT_ALN_SEQ 0x8
16
+ #define MM_DBG_PRINT_CHAIN 0x10
17
+
18
+ #define MM_SEED_LONG_JOIN (1ULL<<40)
19
+ #define MM_SEED_IGNORE (1ULL<<41)
20
+ #define MM_SEED_TANDEM (1ULL<<42)
21
+ #define MM_SEED_SELF (1ULL<<43)
22
+
23
+ #define MM_SEED_SEG_SHIFT 48
24
+ #define MM_SEED_SEG_MASK (0xffULL<<(MM_SEED_SEG_SHIFT))
25
+
26
+ #ifndef kroundup32
27
+ #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
28
+ #endif
29
+
30
+ #define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2))
31
+ #define mm_seq4_get(s, i) ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf)
32
+
33
+ #define MALLOC(type, len) ((type*)malloc((len) * sizeof(type)))
34
+ #define CALLOC(type, len) ((type*)calloc((len), sizeof(type)))
35
+
36
+ #ifdef __cplusplus
37
+ extern "C" {
38
+ #endif
39
+
40
+ typedef struct {
41
+ uint32_t n;
42
+ uint32_t q_pos;
43
+ uint32_t q_span:31, flt:1;
44
+ uint32_t seg_id:31, is_tandem:1;
45
+ const uint64_t *cr;
46
+ } mm_seed_t;
47
+
48
+ typedef struct {
49
+ int n_u, n_a;
50
+ uint64_t *u;
51
+ mm128_t *a;
52
+ } mm_seg_t;
53
+
54
+ double cputime(void);
55
+ double realtime(void);
56
+ long peakrss(void);
57
+
58
+ void radix_sort_128x(mm128_t *beg, mm128_t *end);
59
+ void radix_sort_64(uint64_t *beg, uint64_t *end);
60
+ uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
61
+
62
+ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
63
+
64
+ mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos);
65
+ void mm_seed_mz_flt(void *km, mm128_v *mv, int32_t q_occ_max, float q_occ_frac);
66
+
67
+ double mm_event_identity(const mm_reg1_t *r);
68
+ int mm_write_sam_hdr(const mm_idx_t *mi, const char *rg, const char *ver, int argc, char *argv[]);
69
+ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag);
70
+ void mm_write_paf3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag, int rep_len);
71
+ void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs);
72
+ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regs, const mm_reg1_t *const* regs, void *km, int64_t opt_flag);
73
+ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int64_t opt_flag, int rep_len);
74
+
75
+ void mm_idxopt_init(mm_idxopt_t *opt);
76
+ const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
77
+ int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
78
+ int mm_idx_getseq2(const mm_idx_t *mi, int is_rev, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
79
+ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
80
+ mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mm128_t *a, int is_qstrand);
81
+
82
+ mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale,
83
+ int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
84
+ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
85
+ int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
86
+ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
87
+ int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
88
+
89
+ void mm_mark_alt(const mm_idx_t *mi, int n, mm_reg1_t *r);
90
+ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a, int is_qstrand);
91
+ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
92
+ int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a);
93
+ int mm_set_sam_pri(int n, mm_reg1_t *r);
94
+ void mm_set_parent(void *km, float mask_level, int mask_len, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level, float alt_diff_frac);
95
+ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int check_strand, int min_strand_sc, int *n_, mm_reg1_t *r);
96
+ void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
97
+ int mm_filter_strand_retained(int n_regs, mm_reg1_t *r);
98
+ void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
99
+ void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac);
100
+ void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
101
+ void mm_update_dp_max(int qlen, int n_regs, mm_reg1_t *regs, float frac, int a, int b);
102
+
103
+ void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos);
104
+
105
+ mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
106
+ void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
107
+ void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs);
108
+
109
+ FILE *mm_split_init(const char *prefix, const mm_idx_t *mi);
110
+ mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part);
111
+ int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_split_idx);
112
+ void mm_split_rm_tmp(const char *prefix, int n_splits);
113
+
114
+ void mm_err_puts(const char *str);
115
+ void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp);
116
+ void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp);
117
+
118
+ static inline float mg_log2(float x) // NB: this doesn't work when x<2
119
+ {
120
+ union { float f; uint32_t i; } z = { x };
121
+ float log_2 = ((z.i >> 23) & 255) - 128;
122
+ z.i &= ~(255 << 23);
123
+ z.i += 127 << 23;
124
+ log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
125
+ return log_2;
126
+ }
127
+
128
+ #ifdef __cplusplus
129
+ }
130
+ #endif
131
+
132
+ #endif
@@ -0,0 +1,234 @@
1
+ #include <stdio.h>
2
+ #include <limits.h>
3
+ #include "mmpriv.h"
4
+
5
+ void mm_idxopt_init(mm_idxopt_t *opt)
6
+ {
7
+ memset(opt, 0, sizeof(mm_idxopt_t));
8
+ opt->k = 15, opt->w = 10, opt->flag = 0;
9
+ opt->bucket_bits = 14;
10
+ opt->mini_batch_size = 50000000;
11
+ opt->batch_size = 4000000000ULL;
12
+ }
13
+
14
+ void mm_mapopt_init(mm_mapopt_t *opt)
15
+ {
16
+ memset(opt, 0, sizeof(mm_mapopt_t));
17
+ opt->seed = 11;
18
+ opt->mid_occ_frac = 2e-4f;
19
+ opt->min_mid_occ = 10;
20
+ opt->max_mid_occ = 1000000;
21
+ opt->sdust_thres = 0; // no SDUST masking
22
+ opt->q_occ_frac = 0.01f;
23
+
24
+ opt->min_cnt = 3;
25
+ opt->min_chain_score = 40;
26
+ opt->bw = 500, opt->bw_long = 20000;
27
+ opt->max_gap = 5000;
28
+ opt->max_gap_ref = -1;
29
+ opt->max_chain_skip = 25;
30
+ opt->max_chain_iter = 5000;
31
+ opt->rmq_inner_dist = 1000;
32
+ opt->rmq_size_cap = 100000;
33
+ opt->rmq_rescue_size = 1000;
34
+ opt->rmq_rescue_ratio = 0.1f;
35
+ opt->chain_gap_scale = 0.8f;
36
+ opt->chain_skip_scale = 0.0f;
37
+ opt->max_max_occ = 4095;
38
+ opt->occ_dist = 500;
39
+
40
+ opt->mask_level = 0.5f;
41
+ opt->mask_len = INT_MAX;
42
+ opt->pri_ratio = 0.8f;
43
+ opt->best_n = 5;
44
+
45
+ opt->alt_drop = 0.15f;
46
+
47
+ opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
48
+ opt->sc_ambi = 1;
49
+ opt->zdrop = 400, opt->zdrop_inv = 200;
50
+ opt->end_bonus = -1;
51
+ opt->min_dp_max = opt->min_chain_score * opt->a;
52
+ opt->min_ksw_len = 200;
53
+ opt->anchor_ext_len = 20, opt->anchor_ext_shift = 6;
54
+ opt->max_clip_ratio = 1.0f;
55
+ opt->mini_batch_size = 500000000;
56
+ opt->max_sw_mat = 100000000;
57
+ opt->cap_kalloc = 1000000000;
58
+
59
+ opt->rank_min_len = 500;
60
+ opt->rank_frac = 0.9f;
61
+
62
+ opt->pe_ori = 0; // FF
63
+ opt->pe_bonus = 33;
64
+ }
65
+
66
+ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
67
+ {
68
+ if ((opt->flag & MM_F_SPLICE_FOR) || (opt->flag & MM_F_SPLICE_REV))
69
+ opt->flag |= MM_F_SPLICE;
70
+ if (opt->mid_occ <= 0) {
71
+ opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
72
+ if (opt->mid_occ < opt->min_mid_occ)
73
+ opt->mid_occ = opt->min_mid_occ;
74
+ if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
75
+ opt->mid_occ = opt->max_mid_occ;
76
+ }
77
+ if (opt->bw_long < opt->bw) opt->bw_long = opt->bw;
78
+ if (mm_verbose >= 3)
79
+ fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
80
+ }
81
+
82
+ void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len)
83
+ {
84
+ if ((opt->flag & MM_F_SPLICE) && max_intron_len > 0)
85
+ opt->max_gap_ref = opt->bw = opt->bw_long = max_intron_len;
86
+ }
87
+
88
+ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
89
+ {
90
+ if (preset == 0) {
91
+ mm_idxopt_init(io);
92
+ mm_mapopt_init(mo);
93
+ } else if (strcmp(preset, "map-ont") == 0) { // this is the same as the default
94
+ } else if (strcmp(preset, "ava-ont") == 0) {
95
+ io->flag = 0, io->k = 15, io->w = 5;
96
+ mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
97
+ mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25;
98
+ mo->bw = mo->bw_long = 2000;
99
+ mo->occ_dist = 0;
100
+ } else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
101
+ io->flag |= MM_I_HPC, io->k = 19;
102
+ } else if (strcmp(preset, "ava-pb") == 0) {
103
+ io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
104
+ mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
105
+ mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25;
106
+ mo->bw_long = mo->bw;
107
+ mo->occ_dist = 0;
108
+ } else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
109
+ io->flag = 0, io->k = 19, io->w = 19;
110
+ mo->max_gap = 10000;
111
+ mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
112
+ mo->occ_dist = 500;
113
+ mo->min_mid_occ = 50, mo->max_mid_occ = 500;
114
+ mo->min_dp_max = 200;
115
+ } else if (strncmp(preset, "asm", 3) == 0) {
116
+ io->flag = 0, io->k = 19, io->w = 19;
117
+ mo->bw = 1000, mo->bw_long = 100000;
118
+ mo->max_gap = 10000;
119
+ mo->flag |= MM_F_RMQ;
120
+ mo->min_mid_occ = 50, mo->max_mid_occ = 500;
121
+ mo->min_dp_max = 200;
122
+ mo->best_n = 50;
123
+ if (strcmp(preset, "asm5") == 0) {
124
+ mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
125
+ } else if (strcmp(preset, "asm10") == 0) {
126
+ mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
127
+ } else if (strcmp(preset, "asm20") == 0) {
128
+ mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
129
+ io->w = 10;
130
+ } else return -1;
131
+ } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
132
+ io->flag = 0, io->k = 21, io->w = 11;
133
+ mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS | MM_F_HEAP_SORT;
134
+ mo->pe_ori = 0<<1|1; // FR
135
+ mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 24, mo->e2 = 1;
136
+ mo->zdrop = mo->zdrop_inv = 100;
137
+ mo->end_bonus = 10;
138
+ mo->max_frag_len = 800;
139
+ mo->max_gap = 100;
140
+ mo->bw = mo->bw_long = 100;
141
+ mo->pri_ratio = 0.5f;
142
+ mo->min_cnt = 2;
143
+ mo->min_chain_score = 25;
144
+ mo->min_dp_max = 40;
145
+ mo->best_n = 20;
146
+ mo->mid_occ = 1000;
147
+ mo->max_occ = 5000;
148
+ mo->mini_batch_size = 50000000;
149
+ } else if (strncmp(preset, "splice", 6) == 0 || strcmp(preset, "cdna") == 0) {
150
+ io->flag = 0, io->k = 15, io->w = 5;
151
+ mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK;
152
+ mo->max_sw_mat = 0;
153
+ mo->max_gap = 2000, mo->max_gap_ref = mo->bw = mo->bw_long = 200000;
154
+ mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
155
+ mo->noncan = 9;
156
+ mo->junc_bonus = 9;
157
+ mo->zdrop = 200, mo->zdrop_inv = 100; // because mo->a is halved
158
+ if (strcmp(preset, "splice:hq") == 0)
159
+ mo->junc_bonus = 5, mo->b = 4, mo->q = 6, mo->q2 = 24;
160
+ } else return -1;
161
+ return 0;
162
+ }
163
+
164
+ int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
165
+ {
166
+ if (mo->bw > mo->bw_long) {
167
+ if (mm_verbose >= 1)
168
+ fprintf(stderr, "[ERROR]\033[1;31m with '-rNUM1,NUM2', NUM1 (%d) can't be larger than NUM2 (%d)\033[0m\n", mo->bw, mo->bw_long);
169
+ return -8;
170
+ }
171
+ if ((mo->flag & MM_F_RMQ) && (mo->flag & (MM_F_SR|MM_F_SPLICE))) {
172
+ if (mm_verbose >= 1)
173
+ fprintf(stderr, "[ERROR]\033[1;31m --rmq doesn't work with --sr or --splice\033[0m\n");
174
+ return -7;
175
+ }
176
+ if (mo->split_prefix && (mo->flag & (MM_F_OUT_CS|MM_F_OUT_MD))) {
177
+ if (mm_verbose >= 1)
178
+ fprintf(stderr, "[ERROR]\033[1;31m --cs or --MD doesn't work with --split-prefix\033[0m\n");
179
+ return -6;
180
+ }
181
+ if (io->k <= 0 || io->w <= 0) {
182
+ if (mm_verbose >= 1)
183
+ fprintf(stderr, "[ERROR]\033[1;31m -k and -w must be positive\033[0m\n");
184
+ return -5;
185
+ }
186
+ if (mo->best_n < 0) {
187
+ if (mm_verbose >= 1)
188
+ fprintf(stderr, "[ERROR]\033[1;31m -N must be no less than 0\033[0m\n");
189
+ return -4;
190
+ }
191
+ if (mo->best_n == 0 && mm_verbose >= 2)
192
+ fprintf(stderr, "[WARNING]\033[1;31m '-N 0' reduces mapping accuracy. Please use '--secondary=no' instead.\033[0m\n");
193
+ if (mo->pri_ratio < 0.0f || mo->pri_ratio > 1.0f) {
194
+ if (mm_verbose >= 1)
195
+ fprintf(stderr, "[ERROR]\033[1;31m -p must be within 0 and 1 (including 0 and 1)\033[0m\n");
196
+ return -4;
197
+ }
198
+ if ((mo->flag & MM_F_FOR_ONLY) && (mo->flag & MM_F_REV_ONLY)) {
199
+ if (mm_verbose >= 1)
200
+ fprintf(stderr, "[ERROR]\033[1;31m --for-only and --rev-only can't be applied at the same time\033[0m\n");
201
+ return -3;
202
+ }
203
+ if (mo->e <= 0 || mo->q <= 0) {
204
+ if (mm_verbose >= 1)
205
+ fprintf(stderr, "[ERROR]\033[1;31m -O and -E must be positive\033[0m\n");
206
+ return -1;
207
+ }
208
+ if ((mo->q != mo->q2 || mo->e != mo->e2) && !(mo->e > mo->e2 && mo->q + mo->e < mo->q2 + mo->e2)) {
209
+ if (mm_verbose >= 1)
210
+ fprintf(stderr, "[ERROR]\033[1;31m dual gap penalties violating E1>E2 and O1+E1<O2+E2\033[0m\n");
211
+ return -2;
212
+ }
213
+ if ((mo->q + mo->e) + (mo->q2 + mo->e2) > 127) {
214
+ if (mm_verbose >= 1)
215
+ fprintf(stderr, "[ERROR]\033[1;31m scoring system violating ({-O}+{-E})+({-O2}+{-E2}) <= 127\033[0m\n");
216
+ return -1;
217
+ }
218
+ if (mo->zdrop < mo->zdrop_inv) {
219
+ if (mm_verbose >= 1)
220
+ fprintf(stderr, "[ERROR]\033[1;31m Z-drop should not be less than inversion-Z-drop\033[0m\n");
221
+ return -5;
222
+ }
223
+ if ((mo->flag & MM_F_NO_PRINT_2ND) && (mo->flag & MM_F_ALL_CHAINS)) {
224
+ if (mm_verbose >= 1)
225
+ fprintf(stderr, "[ERROR]\033[1;31m -X/-P and --secondary=no can't be applied at the same time\033[0m\n");
226
+ return -5;
227
+ }
228
+ if ((mo->flag & MM_F_QSTRAND) && ((mo->flag & (MM_F_OUT_SAM|MM_F_SPLICE|MM_F_FRAG_MODE)) || (io->flag & MM_I_HPC))) {
229
+ if (mm_verbose >= 1)
230
+ fprintf(stderr, "[ERROR]\033[1;31m --qstrand doesn't work with -a, -H, --frag or --splice\033[0m\n");
231
+ return -5;
232
+ }
233
+ return 0;
234
+ }