ruby-minigraph 0.0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,359 @@
1
+ .TH minigraph 1 "20 November 2022" "minigraph-0.20 (r559)" "Bioinformatics tools"
2
+
3
+ .SH NAME
4
+ .PP
5
+ minigraph - sequence-to-graph mapping and incremental sequence graph generation
6
+
7
+ .SH SYNOPSIS
8
+ * Sequence-to-graph mapping:
9
+ .RS 4
10
+ .B minigraph
11
+ .RB [ -x
12
+ .IR preset ]
13
+ .RB [ -c ]
14
+ .RB [ -t
15
+ .IR nThreads ]
16
+ .I graph.gfa
17
+ .I query1.fa
18
+ .RI [ ... ]
19
+ .B >
20
+ .I out.gaf
21
+ .RE
22
+
23
+ * Incremental graph generation:
24
+ .RS 4
25
+ .B minigraph
26
+ .B -x ggs
27
+ .RB [ -c ]
28
+ .RB [ -t
29
+ .IR nThreads ]
30
+ .I initGraph.gfa
31
+ .I sample1Asm.fa
32
+ .RI [ ... ]
33
+ .B >
34
+ .I finalGraph.gfa
35
+
36
+ .SH DESCRIPTION
37
+
38
+ Minigraph is a
39
+ .I proof-of-concept
40
+ sequence-to-graph mapper and graph constructor. It finds approximate locations
41
+ of a query sequence in a sequence graph and incrementally augments an existing
42
+ graph with long query subsequences.
43
+
44
+ .SH OPTIONS
45
+ .SS Indexing options
46
+ .TP 10
47
+ .BI -k \ INT
48
+ Minimizer k-mer length [17]
49
+ .TP
50
+ .BI -w \ INT
51
+ Minimizer window size [11]. A minimizer is the smallest k-mer in a window of w
52
+ consecutive k-mers.
53
+ .SS Mapping options
54
+ .TP 10
55
+ .BI -c
56
+ Perform base alignment; recommended for graph generation
57
+ .TP 10
58
+ .BI -U \ INT1 [, INT2 ]
59
+ Choose the minimizer occurrence threshold within this interval [50,250]
60
+ .TP
61
+ .BI -f \ FLOAT
62
+ Ignore top
63
+ .I FLOAT
64
+ fraction of repetitive minimizers [0.0002]. If this threshold falls within the
65
+ interval set by
66
+ .BR -U ,
67
+ it will be the final threshold; otherwise the lower or the upper bound of
68
+ .B -U
69
+ will be applied.
70
+ .TP
71
+ .BI -j \ FLOAT
72
+ Expected query-graph sequence divergence [0.1]
73
+ .TP
74
+ .BI -g \ NUM
75
+ Stop chain enlongation if there are no minimizers within
76
+ .IR INT -bp
77
+ [10k]. K/k/M/m suffixes are recognized.
78
+ .TP
79
+ .BI -r \ NUM1 [, NUM2 ]
80
+ Bandwidth for the two rounds of chaining [500,20k].
81
+ .I NUM2
82
+ also controls bandwidth for graph chaining.
83
+ .TP
84
+ .BI -n \ INT1 [, INT2 ]
85
+ Drop graph chains consisting of
86
+ .RI < INT1
87
+ minimizers and drop linear chains consisting of
88
+ .RI < INT2
89
+ minimizers [5,3]
90
+ .TP
91
+ .BI -m \ INT1 [, INT2 ]
92
+ Drop graph chains with graph chaining score
93
+ .RI < INT1
94
+ and drop linear chains with linear chaining score
95
+ .RI < INT2
96
+ [50,30]. Linear chaining score equals the approximate number of matching bases
97
+ minus a weak concave gap penalty. Graph chaining score uses a linear gap
98
+ penalty.
99
+ .TP
100
+ .BI -p \ FLOAT
101
+ Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
102
+ Between two chains overlaping over half of the shorter chain (controlled by
103
+ .BR -M ),
104
+ the chain with a lower score is secondary to the chain with a higher score.
105
+ .TP
106
+ .BI -N \ INT
107
+ Output at most
108
+ .I INT
109
+ secondary mappings [5]. This option has no effect when
110
+ .B -P
111
+ is applied.
112
+ .TP
113
+ .B -P
114
+ Retain all chains and don't attempt to set primary chains. Options
115
+ .B -p
116
+ and
117
+ .B -N
118
+ have no effect when this option is in use.
119
+ .TP
120
+ .BI -M \ FLOAT
121
+ Mark as secondary a chain that overlaps with a better chain by
122
+ .I FLOAT
123
+ or more of the shorter chain [0.5]
124
+ .TP
125
+ .BI --max-gap-pre \ NUM
126
+ Similar to
127
+ .B -g
128
+ but used for prefiltering [1000]
129
+ .TP
130
+ .BI --max-lc-iter \ NUM
131
+ max number of iterations for linear chaining [10000]
132
+ .TP
133
+ .BI --max-rmq-size \ NUM
134
+ max size of the RMQ tree [100000]
135
+ .TP
136
+ .BI --max-lc-skip \ INT
137
+ A heuristics that stops linear chaining early [25]
138
+ .TP
139
+ .BI --max-gc-skip \ INT
140
+ Similar to
141
+ .B --max-lc-skip
142
+ but applied to graph chaining [25]
143
+ .TP
144
+ .BI --ref-bonus \ INT
145
+ Bonus for a reference subwalk [0]
146
+ .TP
147
+ .BI --min-cov-blen \ NUM
148
+ Minimum alignment block length to count [1k]
149
+ .TP
150
+ .BI --min-cov-mapq \ INT
151
+ Minimum mapping quality to count [20]
152
+ .SS Graph generation options
153
+ .TP 10
154
+ .BR --ggen =[ simple ]
155
+ Graph generation algorithm. So far only a
156
+ .B simple
157
+ algorithm is implemented [simple]. With this option, all query sequences are
158
+ loaded into memory.
159
+ .TP
160
+ .B --call
161
+ Call the graph path in each bubble and output in a BED-based format:
162
+ .RS
163
+ ctg start end sourceNode sinkNode walk:strand:queryName:qStart:qEnd
164
+ .RE
165
+ .TP
166
+ .BI -q \ INT
167
+ Minimum mapping quality [5]
168
+ .TP
169
+ .BI -l \ NUM
170
+ Minimum chain length to consider [100k]
171
+ .TP
172
+ .BI -d \ NUM
173
+ Minimum chain length for depth calculation [20k]
174
+ .TP
175
+ .BI -L \ INT
176
+ Minimum insertion length [50]
177
+ .TP
178
+ .BI --gg-match-pen \ INT
179
+ Penalty for a pair of matching anchors [5]. Larger value for more fragmented inserts.
180
+ Effectively without
181
+ .BR -c .
182
+ .TP
183
+ .BR --ins-qovlp = yes | no
184
+ Forcefully resolve query overlaps [no]. Effective without
185
+ .BR -c .
186
+ .TP
187
+ .BR --inv = yes | no
188
+ Generate graphs with inversions or not [yes]
189
+ .TP
190
+ .B --cov
191
+ Remap and generate segment and link use frequencies. This option triggers GFA
192
+ output. When used with
193
+ .BR --ggen ,
194
+ minigraph writes the frequency of link uses and the average breadth of coverage
195
+ of each segment to the
196
+ .B cf
197
+ tag. When used without
198
+ .BR --ggen ,
199
+ minigraph writes the count of link uses and the average depth of coverage of
200
+ each segment to the
201
+ .B dc
202
+ tag.
203
+ .B
204
+ WARNING:
205
+ THIS OPTION IS DEPRECATED AND MAY BE REMOVED IN FUTURE.
206
+ .SS Input/output options
207
+ .TP 10
208
+ .BI -o \ FILE
209
+ Output alignments to
210
+ .I FILE
211
+ [stdout].
212
+ .TP
213
+ .BI -t \ INT
214
+ Number of threads [4]. Minigraph uses at most three threads when indexing target
215
+ sequences, and uses up to
216
+ .IR INT +1
217
+ threads when mapping (the extra thread is for I/O, which is frequently idle and
218
+ takes little CPU time).
219
+ .TP
220
+ .BI -K \ NUM
221
+ Number of bases loaded into memory to process in a mini-batch [500M].
222
+ K/M/G/k/m/g suffix is accepted. A large
223
+ .I NUM
224
+ helps load balancing in the multi-threading mode, at the cost of increased
225
+ memory. This option has no effect if
226
+ .B --ggen
227
+ is applied.
228
+ .TP
229
+ .B --vc
230
+ In output GAF, show mapping paths in the unstable segment coordinate.
231
+ .TP
232
+ .B -S
233
+ Output linear chains in the format of: `*' segName segLen nMinimizer seqDiv segStart segEnd qStart qEnd
234
+ .TP
235
+ .B --write-mz
236
+ Output linear chains in the format of: `*' segName segLen nMinimizer seqDiv segStart segEnd qStart qEnd
237
+ k-mer segOffsets qOffsets. segOffsets and qOffsets are comma-separated lists
238
+ with each consisting of nMinimizer-1 integers which give the distance from the
239
+ previous minimizer on segments and query, respectively.
240
+ .TP
241
+ .BR --secondary = yes | no
242
+ Whether to output secondary alignments [no]
243
+ .TP
244
+ .BR --show-unmap = yes | no
245
+ Print unmapped query sequences in GAF [no]
246
+ .TP
247
+ .B --version
248
+ Print version number to stdout
249
+ .SS Preset options
250
+ .TP 10
251
+ .BI -x \ STR
252
+ Preset []. This option applies multiple options at the same time. Other options
253
+ on the command line will always override values set by
254
+ .BR -x .
255
+ Available
256
+ .I STR
257
+ are:
258
+ .RS
259
+ .TP 8
260
+ .B lr
261
+ Mapping noisy long reads. This is the same as the default setting.
262
+ .TP
263
+ .B sr
264
+ Mapping short single-end or paired-end reads
265
+ .RB ( -k21
266
+ .B -w10 -U1000,2500 -g100 -r100 -p.5 -n3,2 -m40,25 --heap-sort=yes -K50m --frag --ref-bonus=1
267
+ .BR --min-cov-blen=50 ).
268
+ Paired-end mapping is not supported.
269
+ .TP
270
+ .B asm
271
+ Mapping long contigs or high-quality CCS reads
272
+ .RB ( -k19
273
+ .B -w10 -U10,100 -j.01 -g10k -r1k,150k -n5,5 -m1000,40 -K4g --max-lc-skip=50 --max-gc-skip=50 --min-cov-mapq=5
274
+ .BR --min-cov-blen=100k ).
275
+ .TP
276
+ .B ggs
277
+ Incremental graph generation
278
+ .RB ( -xasm
279
+ .B -N0
280
+ .BR --ggen=simple ).
281
+ .RE
282
+ .SS Miscellaneous options
283
+ .TP 10
284
+ .B --no-kalloc
285
+ Use the libc default allocator instead of the kalloc thread-local allocator.
286
+ This debugging option is mostly used with Valgrind to detect invalid memory
287
+ accesses. Minigraph runs slower with this option, especially in the
288
+ multi-threading mode.
289
+ .SH OUTPUT FORMAT
290
+ .PP
291
+ Minigraph outputs mapping positions in the Graph mApping Format (GAF) by
292
+ default. GAF is a TAB-delimited text format with each line consisting of at
293
+ least 12 fields as are described in the following table:
294
+ .TS
295
+ center box;
296
+ cb | cb | cb
297
+ r | c | l .
298
+ Col Type Description
299
+ _
300
+ 1 string Query sequence name
301
+ 2 int Query sequence length
302
+ 3 int Query start coordinate (0-based; closed)
303
+ 4 int Query end coordinate (0-based; open)
304
+ 5 char `+' if query/path on the same strand; `-' if opposite
305
+ 6 string Path matching /([><][^\\s><]+(:\\d+-\\d+)?)+|([^\\s><]+)/
306
+ 7 int Path sequence length
307
+ 8 int Path start coordinate
308
+ 9 int Path end coordinate
309
+ 10 int Number of matching bases in the mapping
310
+ 11 int Number bases, including gaps, in the mapping
311
+ 12 int Mapping quality (0-255 with 255 for missing)
312
+ .TE
313
+
314
+ .PP
315
+ When alignment is available, column 11 gives the total number of sequence
316
+ matches, mismatches and gaps in the alignment; column 10 divided by column 11
317
+ gives the BLAST-like alignment identity. When alignment is unavailable,
318
+ these two columns are approximate. PAF may optionally have additional fields in
319
+ the SAM-like typed key-value format. Minigraph may output the following tags:
320
+ .TS
321
+ center box;
322
+ cb | cb | cb
323
+ r | c | l .
324
+ Tag Type Description
325
+ _
326
+ tp A Type of aln: P/primary and S/secondary
327
+ cm i Number of minimizers on the chain
328
+ s1 i Chaining score
329
+ s2 i Chaining score of the best secondary chain
330
+ dv f Approximate per-base sequence divergence
331
+ cf f Avg. segment breadth of coverage and link use freq
332
+ dc f Avg. segment depth of coverage and link use counts
333
+ cg Z CIGAR string
334
+ ql B,i Lengths of single-end reads
335
+ .TE
336
+
337
+ .SH LIMITATIONS
338
+ .TP 2
339
+ *
340
+ Minigraph needs to find strong colinear chains first. For a graph consisting of
341
+ many short segments (e.g. one generated from rare SNPs in large populations),
342
+ minigraph will fail to map query sequences.
343
+ .TP
344
+ *
345
+ When connecting colinear chains on graphs, minigraph doesn't always take full
346
+ advantage of base sequences and may miss the optimal alignments.
347
+ .TP
348
+ *
349
+ Minigraph only inserts segments contained in long graph chains. This
350
+ conservative strategy helps to build relatively accurate graph, but may miss
351
+ more complex events. Other strategies may be explored in future.
352
+ .TP
353
+ *
354
+ Base alignment has only been evaluated for human. For more diverse genomes,
355
+ the performance may need to be improved.
356
+
357
+ .SH SEE ALSO
358
+ .PP
359
+ minimap2(1), gfatools(1).
@@ -0,0 +1,176 @@
1
+ #ifndef MINIGRAPH_H
2
+ #define MINIGRAPH_H
3
+
4
+ #include <stdint.h>
5
+ #include "gfa.h"
6
+
7
+ #define MG_VERSION "0.20-r559"
8
+
9
+ #define MG_M_SPLICE 0x10
10
+ #define MG_M_SR 0x20
11
+ #define MG_M_FRAG_MODE 0x40
12
+ #define MG_M_FRAG_MERGE 0x80
13
+ #define MG_M_FOR_ONLY 0x100
14
+ #define MG_M_REV_ONLY 0x200
15
+ #define MG_M_HEAP_SORT 0x400
16
+ #define MG_M_VERTEX_COOR 0x800
17
+ #define MG_M_ALL_CHAINS 0x1000
18
+ #define MG_M_PRINT_2ND 0x2000
19
+ #define MG_M_CAL_COV 0x4000
20
+ #define MG_M_RMQ 0x8000
21
+ #define MG_M_COPY_COMMENT 0x10000
22
+ #define MG_M_INDEPEND_SEG 0x20000
23
+ #define MG_M_NO_QUAL 0x40000
24
+ #define MG_M_2_IO_THREADS 0x80000
25
+ #define MG_M_SHOW_UNMAP 0x100000
26
+ #define MG_M_NO_COMP_PATH 0x200000
27
+ #define MG_M_NO_DIAG 0x400000
28
+ #define MG_M_WRITE_LCHAIN 0x800000
29
+ #define MG_M_WRITE_MZ 0x1000000
30
+ #define MG_M_SKIP_GCHECK 0x2000000
31
+ #define MG_M_CIGAR 0x4000000
32
+
33
+ #define MG_G_NONE 0
34
+ #define MG_G_GGSIMPLE 1
35
+
36
+ #define MG_G_NO_QOVLP 0x1
37
+ #define MG_G_CAL_COV 0x2
38
+ #define MG_G_NO_INV 0x4
39
+ #define MG_G_CALL 0x8
40
+
41
+ typedef struct { uint64_t x, y; } mg128_t;
42
+ typedef struct { size_t n, m; mg128_t *a; } mg128_v;
43
+ typedef struct { int32_t n, m; uint32_t *a; } mg32_v;
44
+ typedef struct { int32_t n, m; uint64_t *a; } mg64_v;
45
+
46
+ typedef struct {
47
+ int w, k;
48
+ int bucket_bits;
49
+ } mg_idxopt_t;
50
+
51
+ typedef struct {
52
+ uint64_t flag;
53
+ int64_t mini_batch_size;
54
+ int seed;
55
+ int max_qlen;
56
+ int pe_ori;
57
+ int occ_max1, occ_max1_cap;
58
+ float occ_max1_frac;
59
+ int bw, bw_long;
60
+ int rmq_size_cap;
61
+ int rmq_rescue_size;
62
+ float rmq_rescue_ratio;
63
+ int max_gap_pre, max_gap, max_gap_ref, max_frag_len;
64
+ float div;
65
+ float chn_pen_gap, chn_pen_skip;
66
+ int max_lc_skip, max_lc_iter, max_gc_skip;
67
+ int min_lc_cnt, min_lc_score;
68
+ int min_gc_cnt, min_gc_score;
69
+ int gdp_max_ed, lc_max_trim, lc_max_occ;
70
+ float mask_level;
71
+ int sub_diff;
72
+ int best_n;
73
+ float pri_ratio;
74
+ int ref_bonus;
75
+ int64_t cap_kalloc;
76
+ int min_cov_mapq, min_cov_blen;
77
+ } mg_mapopt_t;
78
+
79
+ typedef struct {
80
+ uint64_t flag;
81
+ int algo;
82
+ int min_mapq;
83
+ int min_map_len, min_depth_len;
84
+ int min_var_len, match_pen;
85
+ // parameters specific to ggsimple/ggs
86
+ int ggs_shrink_pen;
87
+ int ggs_min_end_cnt;
88
+ float ggs_min_end_frac;
89
+ // scoring for SW check
90
+ float ggs_max_iden, ggs_min_inv_iden;
91
+ } mg_ggopt_t;
92
+
93
+ typedef struct {
94
+ const gfa_t *g;
95
+ gfa_edseq_t *es;
96
+ int32_t b, w, k, flag, n_seg;
97
+ struct mg_idx_bucket_s *B; // index (hidden)
98
+ } mg_idx_t;
99
+
100
+ typedef struct {
101
+ int32_t off, cnt:31, inner_pre:1;
102
+ uint32_t v;
103
+ int32_t rs, re, qs, qe;
104
+ int32_t score, dist_pre;
105
+ uint32_t hash_pre;
106
+ } mg_lchain_t;
107
+
108
+ typedef struct {
109
+ int32_t off, cnt;
110
+ uint32_t v;
111
+ int32_t score;
112
+ int32_t ed;
113
+ } mg_llchain_t;
114
+
115
+ typedef struct {
116
+ int32_t n_cigar, mlen, blen, aplen, ss, ee; // ss: start on the start vertex; ee: end on the end vertex
117
+ uint64_t cigar[];
118
+ } mg_cigar_t;
119
+
120
+ typedef struct {
121
+ int32_t id, parent;
122
+ int32_t off, cnt;
123
+ int32_t n_anchor, score;
124
+ int32_t qs, qe;
125
+ int32_t plen, ps, pe;
126
+ int32_t blen, mlen;
127
+ float div;
128
+ uint32_t hash;
129
+ int32_t subsc, n_sub;
130
+ uint32_t mapq:8, flt:1, dummy:23;
131
+ mg_cigar_t *p;
132
+ } mg_gchain_t;
133
+
134
+ typedef struct {
135
+ void *km;
136
+ int32_t n_gc, n_lc, n_a, rep_len;
137
+ mg_gchain_t *gc;
138
+ mg_llchain_t *lc;
139
+ mg128_t *a; // minimizer positions; see comments above mg_update_anchors() for details
140
+ } mg_gchains_t;
141
+
142
+ typedef struct mg_tbuf_s mg_tbuf_t;
143
+
144
+ extern int mg_verbose, mg_dbg_flag;
145
+ extern double mg_realtime0;
146
+
147
+ #ifdef __cplusplus
148
+ extern "C" {
149
+ #endif
150
+
151
+ // options
152
+ int mg_opt_set(const char *preset, mg_idxopt_t *io, mg_mapopt_t *mo, mg_ggopt_t *go);
153
+ int mg_opt_check(const mg_idxopt_t *io, const mg_mapopt_t *mo, const mg_ggopt_t *go);
154
+ void mg_opt_update(const mg_idx_t *gi, mg_mapopt_t *mo, mg_ggopt_t *go);
155
+
156
+ // index operations
157
+ mg_idx_t *mg_index(gfa_t *g, const mg_idxopt_t *io, int n_threads, mg_mapopt_t *mo); // combine mg_index_core() and mg_opt_update()
158
+ void mg_idx_destroy(mg_idx_t *gi);
159
+
160
+ // mapping
161
+ mg_tbuf_t *mg_tbuf_init(void);
162
+ void mg_tbuf_destroy(mg_tbuf_t *b);
163
+ mg_gchains_t *mg_map(const mg_idx_t *gi, int qlen, const char *seq, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname);
164
+ void mg_map_frag(const mg_idx_t *gi, int n_segs, const int *qlens, const char **seqs, mg_gchains_t **gcs, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname);
165
+
166
+ // high-level mapping APIs
167
+ int mg_map_files(gfa_t *g, int n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, int n_threads);
168
+
169
+ // graph generation
170
+ int mg_ggen(gfa_t *g, int32_t n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, const mg_ggopt_t *go, int n_threads);
171
+
172
+ #ifdef __cplusplus
173
+ }
174
+ #endif
175
+
176
+ #endif