minimap2 0.2.22.0 → 0.2.24.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +60 -76
- data/ext/Rakefile +55 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +821 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +369 -0
- data/ext/minimap2/main.c +459 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +410 -0
- data/ext/minimap2/minimap2.1 +725 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +132 -0
- data/ext/minimap2/options.c +234 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +20 -16
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +4 -5
- data/lib/minimap2/version.rb +2 -2
- data/lib/minimap2.rb +51 -15
- metadata +97 -79
- data/lib/minimap2/ffi_helper.rb +0 -53
- data/vendor/libminimap2.so +0 -0
@@ -0,0 +1,410 @@
|
|
1
|
+
#ifndef MINIMAP2_H
|
2
|
+
#define MINIMAP2_H
|
3
|
+
|
4
|
+
#include <stdint.h>
|
5
|
+
#include <stdio.h>
|
6
|
+
#include <sys/types.h>
|
7
|
+
|
8
|
+
#define MM_F_NO_DIAG 0x001 // no exact diagonal hit
|
9
|
+
#define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
|
10
|
+
#define MM_F_CIGAR 0x004
|
11
|
+
#define MM_F_OUT_SAM 0x008
|
12
|
+
#define MM_F_NO_QUAL 0x010
|
13
|
+
#define MM_F_OUT_CG 0x020
|
14
|
+
#define MM_F_OUT_CS 0x040
|
15
|
+
#define MM_F_SPLICE 0x080 // splice mode
|
16
|
+
#define MM_F_SPLICE_FOR 0x100 // match GT-AG
|
17
|
+
#define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
|
18
|
+
#define MM_F_NO_LJOIN 0x400
|
19
|
+
#define MM_F_OUT_CS_LONG 0x800
|
20
|
+
#define MM_F_SR 0x1000
|
21
|
+
#define MM_F_FRAG_MODE 0x2000
|
22
|
+
#define MM_F_NO_PRINT_2ND 0x4000
|
23
|
+
#define MM_F_2_IO_THREADS 0x8000
|
24
|
+
#define MM_F_LONG_CIGAR 0x10000
|
25
|
+
#define MM_F_INDEPEND_SEG 0x20000
|
26
|
+
#define MM_F_SPLICE_FLANK 0x40000
|
27
|
+
#define MM_F_SOFTCLIP 0x80000
|
28
|
+
#define MM_F_FOR_ONLY 0x100000
|
29
|
+
#define MM_F_REV_ONLY 0x200000
|
30
|
+
#define MM_F_HEAP_SORT 0x400000
|
31
|
+
#define MM_F_ALL_CHAINS 0x800000
|
32
|
+
#define MM_F_OUT_MD 0x1000000
|
33
|
+
#define MM_F_COPY_COMMENT 0x2000000
|
34
|
+
#define MM_F_EQX 0x4000000 // use =/X instead of M
|
35
|
+
#define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF
|
36
|
+
#define MM_F_NO_END_FLT 0x10000000
|
37
|
+
#define MM_F_HARD_MLEVEL 0x20000000
|
38
|
+
#define MM_F_SAM_HIT_ONLY 0x40000000
|
39
|
+
#define MM_F_RMQ (0x80000000LL)
|
40
|
+
#define MM_F_QSTRAND (0x100000000LL)
|
41
|
+
#define MM_F_NO_INV (0x200000000LL)
|
42
|
+
#define MM_F_NO_HASH_NAME (0x400000000LL)
|
43
|
+
|
44
|
+
#define MM_I_HPC 0x1
|
45
|
+
#define MM_I_NO_SEQ 0x2
|
46
|
+
#define MM_I_NO_NAME 0x4
|
47
|
+
|
48
|
+
#define MM_IDX_MAGIC "MMI\2"
|
49
|
+
|
50
|
+
#define MM_MAX_SEG 255
|
51
|
+
|
52
|
+
#define MM_CIGAR_MATCH 0
|
53
|
+
#define MM_CIGAR_INS 1
|
54
|
+
#define MM_CIGAR_DEL 2
|
55
|
+
#define MM_CIGAR_N_SKIP 3
|
56
|
+
#define MM_CIGAR_SOFTCLIP 4
|
57
|
+
#define MM_CIGAR_HARDCLIP 5
|
58
|
+
#define MM_CIGAR_PADDING 6
|
59
|
+
#define MM_CIGAR_EQ_MATCH 7
|
60
|
+
#define MM_CIGAR_X_MISMATCH 8
|
61
|
+
|
62
|
+
#define MM_CIGAR_STR "MIDNSHP=XB"
|
63
|
+
|
64
|
+
#ifdef __cplusplus
|
65
|
+
extern "C" {
|
66
|
+
#endif
|
67
|
+
|
68
|
+
// emulate 128-bit integers and arrays
|
69
|
+
typedef struct { uint64_t x, y; } mm128_t;
|
70
|
+
typedef struct { size_t n, m; mm128_t *a; } mm128_v;
|
71
|
+
|
72
|
+
// minimap2 index
|
73
|
+
typedef struct {
|
74
|
+
char *name; // name of the db sequence
|
75
|
+
uint64_t offset; // offset in mm_idx_t::S
|
76
|
+
uint32_t len; // length
|
77
|
+
uint32_t is_alt;
|
78
|
+
} mm_idx_seq_t;
|
79
|
+
|
80
|
+
typedef struct {
|
81
|
+
int32_t b, w, k, flag;
|
82
|
+
uint32_t n_seq; // number of reference sequences
|
83
|
+
int32_t index;
|
84
|
+
int32_t n_alt;
|
85
|
+
mm_idx_seq_t *seq; // sequence name, length and offset
|
86
|
+
uint32_t *S; // 4-bit packed sequence
|
87
|
+
struct mm_idx_bucket_s *B; // index (hidden)
|
88
|
+
struct mm_idx_intv_s *I; // intervals (hidden)
|
89
|
+
void *km, *h;
|
90
|
+
} mm_idx_t;
|
91
|
+
|
92
|
+
// minimap2 alignment
|
93
|
+
typedef struct {
|
94
|
+
uint32_t capacity; // the capacity of cigar[]
|
95
|
+
int32_t dp_score, dp_max, dp_max2; // DP score; score of the max-scoring segment; score of the best alternate mappings
|
96
|
+
uint32_t n_ambi:30, trans_strand:2; // number of ambiguous bases; transcript strand: 0 for unknown, 1 for +, 2 for -
|
97
|
+
uint32_t n_cigar; // number of cigar operations in cigar[]
|
98
|
+
uint32_t cigar[];
|
99
|
+
} mm_extra_t;
|
100
|
+
|
101
|
+
typedef struct {
|
102
|
+
int32_t id; // ID for internal uses (see also parent below)
|
103
|
+
int32_t cnt; // number of minimizers; if on the reverse strand
|
104
|
+
int32_t rid; // reference index; if this is an alignment from inversion rescue
|
105
|
+
int32_t score; // DP alignment score
|
106
|
+
int32_t qs, qe, rs, re; // query start and end; reference start and end
|
107
|
+
int32_t parent, subsc; // parent==id if primary; best alternate mapping score
|
108
|
+
int32_t as; // offset in the a[] array (for internal uses only)
|
109
|
+
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
|
110
|
+
int32_t n_sub; // number of suboptimal mappings
|
111
|
+
int32_t score0; // initial chaining score (before chain merging/spliting)
|
112
|
+
uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, seg_id:8, split_inv:1, is_alt:1, strand_retained:1, dummy:5;
|
113
|
+
uint32_t hash;
|
114
|
+
float div;
|
115
|
+
mm_extra_t *p;
|
116
|
+
} mm_reg1_t;
|
117
|
+
|
118
|
+
// indexing and mapping options
|
119
|
+
typedef struct {
|
120
|
+
short k, w, flag, bucket_bits;
|
121
|
+
int64_t mini_batch_size;
|
122
|
+
uint64_t batch_size;
|
123
|
+
} mm_idxopt_t;
|
124
|
+
|
125
|
+
typedef struct {
|
126
|
+
int64_t flag; // see MM_F_* macros
|
127
|
+
int seed;
|
128
|
+
int sdust_thres; // score threshold for SDUST; 0 to disable
|
129
|
+
|
130
|
+
int max_qlen; // max query length
|
131
|
+
|
132
|
+
int bw, bw_long; // bandwidth
|
133
|
+
int max_gap, max_gap_ref; // break a chain if there are no minimizers in a max_gap window
|
134
|
+
int max_frag_len;
|
135
|
+
int max_chain_skip, max_chain_iter;
|
136
|
+
int min_cnt; // min number of minimizers on each chain
|
137
|
+
int min_chain_score; // min chaining score
|
138
|
+
float chain_gap_scale;
|
139
|
+
float chain_skip_scale;
|
140
|
+
int rmq_size_cap, rmq_inner_dist;
|
141
|
+
int rmq_rescue_size;
|
142
|
+
float rmq_rescue_ratio;
|
143
|
+
|
144
|
+
float mask_level;
|
145
|
+
int mask_len;
|
146
|
+
float pri_ratio;
|
147
|
+
int best_n; // top best_n chains are subjected to DP alignment
|
148
|
+
|
149
|
+
float alt_drop;
|
150
|
+
|
151
|
+
int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
|
152
|
+
int sc_ambi; // score when one or both bases are "N"
|
153
|
+
int noncan; // cost of non-canonical splicing sites
|
154
|
+
int junc_bonus;
|
155
|
+
int zdrop, zdrop_inv; // break alignment if alignment score drops too fast along the diagonal
|
156
|
+
int end_bonus;
|
157
|
+
int min_dp_max; // drop an alignment if the score of the max scoring segment is below this threshold
|
158
|
+
int min_ksw_len;
|
159
|
+
int anchor_ext_len, anchor_ext_shift;
|
160
|
+
float max_clip_ratio; // drop an alignment if BOTH ends are clipped above this ratio
|
161
|
+
|
162
|
+
int rank_min_len;
|
163
|
+
float rank_frac;
|
164
|
+
|
165
|
+
int pe_ori, pe_bonus;
|
166
|
+
|
167
|
+
float mid_occ_frac; // only used by mm_mapopt_update(); see below
|
168
|
+
float q_occ_frac;
|
169
|
+
int32_t min_mid_occ, max_mid_occ;
|
170
|
+
int32_t mid_occ; // ignore seeds with occurrences above this threshold
|
171
|
+
int32_t max_occ, max_max_occ, occ_dist;
|
172
|
+
int64_t mini_batch_size; // size of a batch of query bases to process in parallel
|
173
|
+
int64_t max_sw_mat;
|
174
|
+
int64_t cap_kalloc;
|
175
|
+
|
176
|
+
const char *split_prefix;
|
177
|
+
} mm_mapopt_t;
|
178
|
+
|
179
|
+
// index reader
|
180
|
+
typedef struct {
|
181
|
+
int is_idx, n_parts;
|
182
|
+
int64_t idx_size;
|
183
|
+
mm_idxopt_t opt;
|
184
|
+
FILE *fp_out;
|
185
|
+
union {
|
186
|
+
struct mm_bseq_file_s *seq;
|
187
|
+
FILE *idx;
|
188
|
+
} fp;
|
189
|
+
} mm_idx_reader_t;
|
190
|
+
|
191
|
+
// memory buffer for thread-local storage during mapping
|
192
|
+
typedef struct mm_tbuf_s mm_tbuf_t;
|
193
|
+
|
194
|
+
// global variables
|
195
|
+
extern int mm_verbose, mm_dbg_flag; // verbose level: 0 for no info, 1 for error, 2 for warning, 3 for message (default); debugging flag
|
196
|
+
extern double mm_realtime0; // wall-clock timer
|
197
|
+
|
198
|
+
/**
|
199
|
+
* Set default or preset parameters
|
200
|
+
*
|
201
|
+
* @param preset NULL to set all parameters as default; otherwise apply preset to affected parameters
|
202
|
+
* @param io pointer to indexing parameters
|
203
|
+
* @param mo pointer to mapping parameters
|
204
|
+
*
|
205
|
+
* @return 0 if success; -1 if _present_ unknown
|
206
|
+
*/
|
207
|
+
int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo);
|
208
|
+
int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo);
|
209
|
+
|
210
|
+
/**
|
211
|
+
* Update mm_mapopt_t::mid_occ via mm_mapopt_t::mid_occ_frac
|
212
|
+
*
|
213
|
+
* If mm_mapopt_t::mid_occ is 0, this function sets it to a number such that no
|
214
|
+
* more than mm_mapopt_t::mid_occ_frac of minimizers in the index have a higher
|
215
|
+
* occurrence.
|
216
|
+
*
|
217
|
+
* @param opt mapping parameters
|
218
|
+
* @param mi minimap2 index
|
219
|
+
*/
|
220
|
+
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
|
221
|
+
|
222
|
+
void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len);
|
223
|
+
|
224
|
+
/**
|
225
|
+
* Initialize an index reader
|
226
|
+
*
|
227
|
+
* @param fn index or fasta/fastq file name (this function tests the file type)
|
228
|
+
* @param opt indexing parameters
|
229
|
+
* @param fn_out if not NULL, write built index to this file
|
230
|
+
*
|
231
|
+
* @return an index reader on success; NULL if fail to open _fn_
|
232
|
+
*/
|
233
|
+
mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out);
|
234
|
+
|
235
|
+
/**
|
236
|
+
* Read/build an index
|
237
|
+
*
|
238
|
+
* If the input file is an index file, this function reads one part of the
|
239
|
+
* index and returns. If the input file is a sequence file (fasta or fastq),
|
240
|
+
* this function constructs the index for about mm_idxopt_t::batch_size bases.
|
241
|
+
* Importantly, for a huge collection of sequences, this function may only
|
242
|
+
* return an index for part of sequences. It needs to be repeatedly called
|
243
|
+
* to traverse the entire index/sequence file.
|
244
|
+
*
|
245
|
+
* @param r index reader
|
246
|
+
* @param n_threads number of threads for constructing index
|
247
|
+
*
|
248
|
+
* @return an index on success; NULL if reaching the end of the input file
|
249
|
+
*/
|
250
|
+
mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads);
|
251
|
+
|
252
|
+
/**
|
253
|
+
* Destroy/deallocate an index reader
|
254
|
+
*
|
255
|
+
* @param r index reader
|
256
|
+
*/
|
257
|
+
void mm_idx_reader_close(mm_idx_reader_t *r);
|
258
|
+
|
259
|
+
int mm_idx_reader_eof(const mm_idx_reader_t *r);
|
260
|
+
|
261
|
+
/**
|
262
|
+
* Check whether the file contains a minimap2 index
|
263
|
+
*
|
264
|
+
* @param fn file name
|
265
|
+
*
|
266
|
+
* @return the file size if fn is an index file; 0 if fn is not.
|
267
|
+
*/
|
268
|
+
int64_t mm_idx_is_idx(const char *fn);
|
269
|
+
|
270
|
+
/**
|
271
|
+
* Load a part of an index
|
272
|
+
*
|
273
|
+
* Given a uni-part index, this function loads the entire index into memory.
|
274
|
+
* Given a multi-part index, it loads one part only and places the file pointer
|
275
|
+
* at the end of that part.
|
276
|
+
*
|
277
|
+
* @param fp pointer to FILE object
|
278
|
+
*
|
279
|
+
* @return minimap2 index read from fp
|
280
|
+
*/
|
281
|
+
mm_idx_t *mm_idx_load(FILE *fp);
|
282
|
+
|
283
|
+
/**
|
284
|
+
* Append an index (or one part of a full index) to file
|
285
|
+
*
|
286
|
+
* @param fp pointer to FILE object
|
287
|
+
* @param mi minimap2 index
|
288
|
+
*/
|
289
|
+
void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
|
290
|
+
|
291
|
+
/**
|
292
|
+
* Create an index from strings in memory
|
293
|
+
*
|
294
|
+
* @param w minimizer window size
|
295
|
+
* @param k minimizer k-mer size
|
296
|
+
* @param is_hpc use HPC k-mer if true
|
297
|
+
* @param bucket_bits number of bits for the first level of the hash table
|
298
|
+
* @param n number of sequences
|
299
|
+
* @param seq sequences in A/C/G/T
|
300
|
+
* @param name sequence names; could be NULL
|
301
|
+
*
|
302
|
+
* @return minimap2 index
|
303
|
+
*/
|
304
|
+
mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name);
|
305
|
+
|
306
|
+
/**
|
307
|
+
* Print index statistics to stderr
|
308
|
+
*
|
309
|
+
* @param mi minimap2 index
|
310
|
+
*/
|
311
|
+
void mm_idx_stat(const mm_idx_t *idx);
|
312
|
+
|
313
|
+
/**
|
314
|
+
* Destroy/deallocate an index
|
315
|
+
*
|
316
|
+
* @param r minimap2 index
|
317
|
+
*/
|
318
|
+
void mm_idx_destroy(mm_idx_t *mi);
|
319
|
+
|
320
|
+
/**
|
321
|
+
* Initialize a thread-local buffer for mapping
|
322
|
+
*
|
323
|
+
* Each mapping thread requires a buffer specific to the thread (see mm_map()
|
324
|
+
* below). The primary purpose of this buffer is to reduce frequent heap
|
325
|
+
* allocations across threads. A buffer shall not be used by two or more
|
326
|
+
* threads.
|
327
|
+
*
|
328
|
+
* @return pointer to a thread-local buffer
|
329
|
+
*/
|
330
|
+
mm_tbuf_t *mm_tbuf_init(void);
|
331
|
+
|
332
|
+
/**
|
333
|
+
* Destroy/deallocate a thread-local buffer for mapping
|
334
|
+
*
|
335
|
+
* @param b the buffer
|
336
|
+
*/
|
337
|
+
void mm_tbuf_destroy(mm_tbuf_t *b);
|
338
|
+
|
339
|
+
void *mm_tbuf_get_km(mm_tbuf_t *b);
|
340
|
+
|
341
|
+
/**
|
342
|
+
* Align a query sequence against an index
|
343
|
+
*
|
344
|
+
* This function possibly finds multiple alignments of the query sequence.
|
345
|
+
* The returned array and the mm_reg1_t::p field of each element are allocated
|
346
|
+
* with malloc().
|
347
|
+
*
|
348
|
+
* @param mi minimap2 index
|
349
|
+
* @param l_seq length of the query sequence
|
350
|
+
* @param seq the query sequence
|
351
|
+
* @param n_regs number of hits (out)
|
352
|
+
* @param b thread-local buffer; two mm_map() calls shall not use one buffer at the same time!
|
353
|
+
* @param opt mapping parameters
|
354
|
+
* @param name query name, used for all-vs-all overlapping and debugging
|
355
|
+
*
|
356
|
+
* @return an array of hits which need to be deallocated with free() together
|
357
|
+
* with mm_reg1_t::p of each element. The size is written to _n_regs_.
|
358
|
+
*/
|
359
|
+
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
|
360
|
+
|
361
|
+
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname);
|
362
|
+
|
363
|
+
/**
|
364
|
+
* Align a fasta/fastq file and print alignments to stdout
|
365
|
+
*
|
366
|
+
* @param idx minimap2 index
|
367
|
+
* @param fn fasta/fastq file name
|
368
|
+
* @param opt mapping parameters
|
369
|
+
* @param n_threads number of threads
|
370
|
+
*
|
371
|
+
* @return 0 on success; -1 if _fn_ can't be read
|
372
|
+
*/
|
373
|
+
int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads);
|
374
|
+
|
375
|
+
int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads);
|
376
|
+
|
377
|
+
/**
|
378
|
+
* Generate the cs tag (new in 2.12)
|
379
|
+
*
|
380
|
+
* @param km memory blocks; set to NULL if unsure
|
381
|
+
* @param buf buffer to write the cs/MD tag; typicall NULL on the first call
|
382
|
+
* @param max_len max length of the buffer; typically set to 0 on the first call
|
383
|
+
* @param mi index
|
384
|
+
* @param r alignment
|
385
|
+
* @param seq query sequence
|
386
|
+
* @param no_iden true to use : instead of =
|
387
|
+
*
|
388
|
+
* @return the length of cs
|
389
|
+
*/
|
390
|
+
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
|
391
|
+
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
|
392
|
+
|
393
|
+
// query sequence name and sequence in the minimap2 index
|
394
|
+
int mm_idx_index_name(mm_idx_t *mi);
|
395
|
+
int mm_idx_name2id(const mm_idx_t *mi, const char *name);
|
396
|
+
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
397
|
+
|
398
|
+
int mm_idx_alt_read(mm_idx_t *mi, const char *fn);
|
399
|
+
int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc);
|
400
|
+
int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);
|
401
|
+
|
402
|
+
// deprecated APIs for backward compatibility
|
403
|
+
void mm_mapopt_init(mm_mapopt_t *opt);
|
404
|
+
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads);
|
405
|
+
|
406
|
+
#ifdef __cplusplus
|
407
|
+
}
|
408
|
+
#endif
|
409
|
+
|
410
|
+
#endif // MINIMAP2_H
|