ruby-minigraph 0.0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,359 @@
1
+ .TH minigraph 1 "20 November 2022" "minigraph-0.20 (r559)" "Bioinformatics tools"
2
+
3
+ .SH NAME
4
+ .PP
5
+ minigraph - sequence-to-graph mapping and incremental sequence graph generation
6
+
7
+ .SH SYNOPSIS
8
+ * Sequence-to-graph mapping:
9
+ .RS 4
10
+ .B minigraph
11
+ .RB [ -x
12
+ .IR preset ]
13
+ .RB [ -c ]
14
+ .RB [ -t
15
+ .IR nThreads ]
16
+ .I graph.gfa
17
+ .I query1.fa
18
+ .RI [ ... ]
19
+ .B >
20
+ .I out.gaf
21
+ .RE
22
+
23
+ * Incremental graph generation:
24
+ .RS 4
25
+ .B minigraph
26
+ .B -x ggs
27
+ .RB [ -c ]
28
+ .RB [ -t
29
+ .IR nThreads ]
30
+ .I initGraph.gfa
31
+ .I sample1Asm.fa
32
+ .RI [ ... ]
33
+ .B >
34
+ .I finalGraph.gfa
35
+
36
+ .SH DESCRIPTION
37
+
38
+ Minigraph is a
39
+ .I proof-of-concept
40
+ sequence-to-graph mapper and graph constructor. It finds approximate locations
41
+ of a query sequence in a sequence graph and incrementally augments an existing
42
+ graph with long query subsequences.
43
+
44
+ .SH OPTIONS
45
+ .SS Indexing options
46
+ .TP 10
47
+ .BI -k \ INT
48
+ Minimizer k-mer length [17]
49
+ .TP
50
+ .BI -w \ INT
51
+ Minimizer window size [11]. A minimizer is the smallest k-mer in a window of w
52
+ consecutive k-mers.
53
+ .SS Mapping options
54
+ .TP 10
55
+ .BI -c
56
+ Perform base alignment; recommended for graph generation
57
+ .TP 10
58
+ .BI -U \ INT1 [, INT2 ]
59
+ Choose the minimizer occurrence threshold within this interval [50,250]
60
+ .TP
61
+ .BI -f \ FLOAT
62
+ Ignore top
63
+ .I FLOAT
64
+ fraction of repetitive minimizers [0.0002]. If this threshold falls within the
65
+ interval set by
66
+ .BR -U ,
67
+ it will be the final threshold; otherwise the lower or the upper bound of
68
+ .B -U
69
+ will be applied.
70
+ .TP
71
+ .BI -j \ FLOAT
72
+ Expected query-graph sequence divergence [0.1]
73
+ .TP
74
+ .BI -g \ NUM
75
+ Stop chain enlongation if there are no minimizers within
76
+ .IR INT -bp
77
+ [10k]. K/k/M/m suffixes are recognized.
78
+ .TP
79
+ .BI -r \ NUM1 [, NUM2 ]
80
+ Bandwidth for the two rounds of chaining [500,20k].
81
+ .I NUM2
82
+ also controls bandwidth for graph chaining.
83
+ .TP
84
+ .BI -n \ INT1 [, INT2 ]
85
+ Drop graph chains consisting of
86
+ .RI < INT1
87
+ minimizers and drop linear chains consisting of
88
+ .RI < INT2
89
+ minimizers [5,3]
90
+ .TP
91
+ .BI -m \ INT1 [, INT2 ]
92
+ Drop graph chains with graph chaining score
93
+ .RI < INT1
94
+ and drop linear chains with linear chaining score
95
+ .RI < INT2
96
+ [50,30]. Linear chaining score equals the approximate number of matching bases
97
+ minus a weak concave gap penalty. Graph chaining score uses a linear gap
98
+ penalty.
99
+ .TP
100
+ .BI -p \ FLOAT
101
+ Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
102
+ Between two chains overlaping over half of the shorter chain (controlled by
103
+ .BR -M ),
104
+ the chain with a lower score is secondary to the chain with a higher score.
105
+ .TP
106
+ .BI -N \ INT
107
+ Output at most
108
+ .I INT
109
+ secondary mappings [5]. This option has no effect when
110
+ .B -P
111
+ is applied.
112
+ .TP
113
+ .B -P
114
+ Retain all chains and don't attempt to set primary chains. Options
115
+ .B -p
116
+ and
117
+ .B -N
118
+ have no effect when this option is in use.
119
+ .TP
120
+ .BI -M \ FLOAT
121
+ Mark as secondary a chain that overlaps with a better chain by
122
+ .I FLOAT
123
+ or more of the shorter chain [0.5]
124
+ .TP
125
+ .BI --max-gap-pre \ NUM
126
+ Similar to
127
+ .B -g
128
+ but used for prefiltering [1000]
129
+ .TP
130
+ .BI --max-lc-iter \ NUM
131
+ max number of iterations for linear chaining [10000]
132
+ .TP
133
+ .BI --max-rmq-size \ NUM
134
+ max size of the RMQ tree [100000]
135
+ .TP
136
+ .BI --max-lc-skip \ INT
137
+ A heuristics that stops linear chaining early [25]
138
+ .TP
139
+ .BI --max-gc-skip \ INT
140
+ Similar to
141
+ .B --max-lc-skip
142
+ but applied to graph chaining [25]
143
+ .TP
144
+ .BI --ref-bonus \ INT
145
+ Bonus for a reference subwalk [0]
146
+ .TP
147
+ .BI --min-cov-blen \ NUM
148
+ Minimum alignment block length to count [1k]
149
+ .TP
150
+ .BI --min-cov-mapq \ INT
151
+ Minimum mapping quality to count [20]
152
+ .SS Graph generation options
153
+ .TP 10
154
+ .BR --ggen =[ simple ]
155
+ Graph generation algorithm. So far only a
156
+ .B simple
157
+ algorithm is implemented [simple]. With this option, all query sequences are
158
+ loaded into memory.
159
+ .TP
160
+ .B --call
161
+ Call the graph path in each bubble and output in a BED-based format:
162
+ .RS
163
+ ctg start end sourceNode sinkNode walk:strand:queryName:qStart:qEnd
164
+ .RE
165
+ .TP
166
+ .BI -q \ INT
167
+ Minimum mapping quality [5]
168
+ .TP
169
+ .BI -l \ NUM
170
+ Minimum chain length to consider [100k]
171
+ .TP
172
+ .BI -d \ NUM
173
+ Minimum chain length for depth calculation [20k]
174
+ .TP
175
+ .BI -L \ INT
176
+ Minimum insertion length [50]
177
+ .TP
178
+ .BI --gg-match-pen \ INT
179
+ Penalty for a pair of matching anchors [5]. Larger value for more fragmented inserts.
180
+ Effectively without
181
+ .BR -c .
182
+ .TP
183
+ .BR --ins-qovlp = yes | no
184
+ Forcefully resolve query overlaps [no]. Effective without
185
+ .BR -c .
186
+ .TP
187
+ .BR --inv = yes | no
188
+ Generate graphs with inversions or not [yes]
189
+ .TP
190
+ .B --cov
191
+ Remap and generate segment and link use frequencies. This option triggers GFA
192
+ output. When used with
193
+ .BR --ggen ,
194
+ minigraph writes the frequency of link uses and the average breadth of coverage
195
+ of each segment to the
196
+ .B cf
197
+ tag. When used without
198
+ .BR --ggen ,
199
+ minigraph writes the count of link uses and the average depth of coverage of
200
+ each segment to the
201
+ .B dc
202
+ tag.
203
+ .B
204
+ WARNING:
205
+ THIS OPTION IS DEPRECATED AND MAY BE REMOVED IN FUTURE.
206
+ .SS Input/output options
207
+ .TP 10
208
+ .BI -o \ FILE
209
+ Output alignments to
210
+ .I FILE
211
+ [stdout].
212
+ .TP
213
+ .BI -t \ INT
214
+ Number of threads [4]. Minigraph uses at most three threads when indexing target
215
+ sequences, and uses up to
216
+ .IR INT +1
217
+ threads when mapping (the extra thread is for I/O, which is frequently idle and
218
+ takes little CPU time).
219
+ .TP
220
+ .BI -K \ NUM
221
+ Number of bases loaded into memory to process in a mini-batch [500M].
222
+ K/M/G/k/m/g suffix is accepted. A large
223
+ .I NUM
224
+ helps load balancing in the multi-threading mode, at the cost of increased
225
+ memory. This option has no effect if
226
+ .B --ggen
227
+ is applied.
228
+ .TP
229
+ .B --vc
230
+ In output GAF, show mapping paths in the unstable segment coordinate.
231
+ .TP
232
+ .B -S
233
+ Output linear chains in the format of: `*' segName segLen nMinimizer seqDiv segStart segEnd qStart qEnd
234
+ .TP
235
+ .B --write-mz
236
+ Output linear chains in the format of: `*' segName segLen nMinimizer seqDiv segStart segEnd qStart qEnd
237
+ k-mer segOffsets qOffsets. segOffsets and qOffsets are comma-separated lists
238
+ with each consisting of nMinimizer-1 integers which give the distance from the
239
+ previous minimizer on segments and query, respectively.
240
+ .TP
241
+ .BR --secondary = yes | no
242
+ Whether to output secondary alignments [no]
243
+ .TP
244
+ .BR --show-unmap = yes | no
245
+ Print unmapped query sequences in GAF [no]
246
+ .TP
247
+ .B --version
248
+ Print version number to stdout
249
+ .SS Preset options
250
+ .TP 10
251
+ .BI -x \ STR
252
+ Preset []. This option applies multiple options at the same time. Other options
253
+ on the command line will always override values set by
254
+ .BR -x .
255
+ Available
256
+ .I STR
257
+ are:
258
+ .RS
259
+ .TP 8
260
+ .B lr
261
+ Mapping noisy long reads. This is the same as the default setting.
262
+ .TP
263
+ .B sr
264
+ Mapping short single-end or paired-end reads
265
+ .RB ( -k21
266
+ .B -w10 -U1000,2500 -g100 -r100 -p.5 -n3,2 -m40,25 --heap-sort=yes -K50m --frag --ref-bonus=1
267
+ .BR --min-cov-blen=50 ).
268
+ Paired-end mapping is not supported.
269
+ .TP
270
+ .B asm
271
+ Mapping long contigs or high-quality CCS reads
272
+ .RB ( -k19
273
+ .B -w10 -U10,100 -j.01 -g10k -r1k,150k -n5,5 -m1000,40 -K4g --max-lc-skip=50 --max-gc-skip=50 --min-cov-mapq=5
274
+ .BR --min-cov-blen=100k ).
275
+ .TP
276
+ .B ggs
277
+ Incremental graph generation
278
+ .RB ( -xasm
279
+ .B -N0
280
+ .BR --ggen=simple ).
281
+ .RE
282
+ .SS Miscellaneous options
283
+ .TP 10
284
+ .B --no-kalloc
285
+ Use the libc default allocator instead of the kalloc thread-local allocator.
286
+ This debugging option is mostly used with Valgrind to detect invalid memory
287
+ accesses. Minigraph runs slower with this option, especially in the
288
+ multi-threading mode.
289
+ .SH OUTPUT FORMAT
290
+ .PP
291
+ Minigraph outputs mapping positions in the Graph mApping Format (GAF) by
292
+ default. GAF is a TAB-delimited text format with each line consisting of at
293
+ least 12 fields as are described in the following table:
294
+ .TS
295
+ center box;
296
+ cb | cb | cb
297
+ r | c | l .
298
+ Col Type Description
299
+ _
300
+ 1 string Query sequence name
301
+ 2 int Query sequence length
302
+ 3 int Query start coordinate (0-based; closed)
303
+ 4 int Query end coordinate (0-based; open)
304
+ 5 char `+' if query/path on the same strand; `-' if opposite
305
+ 6 string Path matching /([><][^\\s><]+(:\\d+-\\d+)?)+|([^\\s><]+)/
306
+ 7 int Path sequence length
307
+ 8 int Path start coordinate
308
+ 9 int Path end coordinate
309
+ 10 int Number of matching bases in the mapping
310
+ 11 int Number bases, including gaps, in the mapping
311
+ 12 int Mapping quality (0-255 with 255 for missing)
312
+ .TE
313
+
314
+ .PP
315
+ When alignment is available, column 11 gives the total number of sequence
316
+ matches, mismatches and gaps in the alignment; column 10 divided by column 11
317
+ gives the BLAST-like alignment identity. When alignment is unavailable,
318
+ these two columns are approximate. PAF may optionally have additional fields in
319
+ the SAM-like typed key-value format. Minigraph may output the following tags:
320
+ .TS
321
+ center box;
322
+ cb | cb | cb
323
+ r | c | l .
324
+ Tag Type Description
325
+ _
326
+ tp A Type of aln: P/primary and S/secondary
327
+ cm i Number of minimizers on the chain
328
+ s1 i Chaining score
329
+ s2 i Chaining score of the best secondary chain
330
+ dv f Approximate per-base sequence divergence
331
+ cf f Avg. segment breadth of coverage and link use freq
332
+ dc f Avg. segment depth of coverage and link use counts
333
+ cg Z CIGAR string
334
+ ql B,i Lengths of single-end reads
335
+ .TE
336
+
337
+ .SH LIMITATIONS
338
+ .TP 2
339
+ *
340
+ Minigraph needs to find strong colinear chains first. For a graph consisting of
341
+ many short segments (e.g. one generated from rare SNPs in large populations),
342
+ minigraph will fail to map query sequences.
343
+ .TP
344
+ *
345
+ When connecting colinear chains on graphs, minigraph doesn't always take full
346
+ advantage of base sequences and may miss the optimal alignments.
347
+ .TP
348
+ *
349
+ Minigraph only inserts segments contained in long graph chains. This
350
+ conservative strategy helps to build relatively accurate graph, but may miss
351
+ more complex events. Other strategies may be explored in future.
352
+ .TP
353
+ *
354
+ Base alignment has only been evaluated for human. For more diverse genomes,
355
+ the performance may need to be improved.
356
+
357
+ .SH SEE ALSO
358
+ .PP
359
+ minimap2(1), gfatools(1).
@@ -0,0 +1,176 @@
1
+ #ifndef MINIGRAPH_H
2
+ #define MINIGRAPH_H
3
+
4
+ #include <stdint.h>
5
+ #include "gfa.h"
6
+
7
+ #define MG_VERSION "0.20-r559"
8
+
9
+ #define MG_M_SPLICE 0x10
10
+ #define MG_M_SR 0x20
11
+ #define MG_M_FRAG_MODE 0x40
12
+ #define MG_M_FRAG_MERGE 0x80
13
+ #define MG_M_FOR_ONLY 0x100
14
+ #define MG_M_REV_ONLY 0x200
15
+ #define MG_M_HEAP_SORT 0x400
16
+ #define MG_M_VERTEX_COOR 0x800
17
+ #define MG_M_ALL_CHAINS 0x1000
18
+ #define MG_M_PRINT_2ND 0x2000
19
+ #define MG_M_CAL_COV 0x4000
20
+ #define MG_M_RMQ 0x8000
21
+ #define MG_M_COPY_COMMENT 0x10000
22
+ #define MG_M_INDEPEND_SEG 0x20000
23
+ #define MG_M_NO_QUAL 0x40000
24
+ #define MG_M_2_IO_THREADS 0x80000
25
+ #define MG_M_SHOW_UNMAP 0x100000
26
+ #define MG_M_NO_COMP_PATH 0x200000
27
+ #define MG_M_NO_DIAG 0x400000
28
+ #define MG_M_WRITE_LCHAIN 0x800000
29
+ #define MG_M_WRITE_MZ 0x1000000
30
+ #define MG_M_SKIP_GCHECK 0x2000000
31
+ #define MG_M_CIGAR 0x4000000
32
+
33
+ #define MG_G_NONE 0
34
+ #define MG_G_GGSIMPLE 1
35
+
36
+ #define MG_G_NO_QOVLP 0x1
37
+ #define MG_G_CAL_COV 0x2
38
+ #define MG_G_NO_INV 0x4
39
+ #define MG_G_CALL 0x8
40
+
41
+ typedef struct { uint64_t x, y; } mg128_t;
42
+ typedef struct { size_t n, m; mg128_t *a; } mg128_v;
43
+ typedef struct { int32_t n, m; uint32_t *a; } mg32_v;
44
+ typedef struct { int32_t n, m; uint64_t *a; } mg64_v;
45
+
46
+ typedef struct {
47
+ int w, k;
48
+ int bucket_bits;
49
+ } mg_idxopt_t;
50
+
51
+ typedef struct {
52
+ uint64_t flag;
53
+ int64_t mini_batch_size;
54
+ int seed;
55
+ int max_qlen;
56
+ int pe_ori;
57
+ int occ_max1, occ_max1_cap;
58
+ float occ_max1_frac;
59
+ int bw, bw_long;
60
+ int rmq_size_cap;
61
+ int rmq_rescue_size;
62
+ float rmq_rescue_ratio;
63
+ int max_gap_pre, max_gap, max_gap_ref, max_frag_len;
64
+ float div;
65
+ float chn_pen_gap, chn_pen_skip;
66
+ int max_lc_skip, max_lc_iter, max_gc_skip;
67
+ int min_lc_cnt, min_lc_score;
68
+ int min_gc_cnt, min_gc_score;
69
+ int gdp_max_ed, lc_max_trim, lc_max_occ;
70
+ float mask_level;
71
+ int sub_diff;
72
+ int best_n;
73
+ float pri_ratio;
74
+ int ref_bonus;
75
+ int64_t cap_kalloc;
76
+ int min_cov_mapq, min_cov_blen;
77
+ } mg_mapopt_t;
78
+
79
+ typedef struct {
80
+ uint64_t flag;
81
+ int algo;
82
+ int min_mapq;
83
+ int min_map_len, min_depth_len;
84
+ int min_var_len, match_pen;
85
+ // parameters specific to ggsimple/ggs
86
+ int ggs_shrink_pen;
87
+ int ggs_min_end_cnt;
88
+ float ggs_min_end_frac;
89
+ // scoring for SW check
90
+ float ggs_max_iden, ggs_min_inv_iden;
91
+ } mg_ggopt_t;
92
+
93
+ typedef struct {
94
+ const gfa_t *g;
95
+ gfa_edseq_t *es;
96
+ int32_t b, w, k, flag, n_seg;
97
+ struct mg_idx_bucket_s *B; // index (hidden)
98
+ } mg_idx_t;
99
+
100
+ typedef struct {
101
+ int32_t off, cnt:31, inner_pre:1;
102
+ uint32_t v;
103
+ int32_t rs, re, qs, qe;
104
+ int32_t score, dist_pre;
105
+ uint32_t hash_pre;
106
+ } mg_lchain_t;
107
+
108
+ typedef struct {
109
+ int32_t off, cnt;
110
+ uint32_t v;
111
+ int32_t score;
112
+ int32_t ed;
113
+ } mg_llchain_t;
114
+
115
+ typedef struct {
116
+ int32_t n_cigar, mlen, blen, aplen, ss, ee; // ss: start on the start vertex; ee: end on the end vertex
117
+ uint64_t cigar[];
118
+ } mg_cigar_t;
119
+
120
+ typedef struct {
121
+ int32_t id, parent;
122
+ int32_t off, cnt;
123
+ int32_t n_anchor, score;
124
+ int32_t qs, qe;
125
+ int32_t plen, ps, pe;
126
+ int32_t blen, mlen;
127
+ float div;
128
+ uint32_t hash;
129
+ int32_t subsc, n_sub;
130
+ uint32_t mapq:8, flt:1, dummy:23;
131
+ mg_cigar_t *p;
132
+ } mg_gchain_t;
133
+
134
+ typedef struct {
135
+ void *km;
136
+ int32_t n_gc, n_lc, n_a, rep_len;
137
+ mg_gchain_t *gc;
138
+ mg_llchain_t *lc;
139
+ mg128_t *a; // minimizer positions; see comments above mg_update_anchors() for details
140
+ } mg_gchains_t;
141
+
142
+ typedef struct mg_tbuf_s mg_tbuf_t;
143
+
144
+ extern int mg_verbose, mg_dbg_flag;
145
+ extern double mg_realtime0;
146
+
147
+ #ifdef __cplusplus
148
+ extern "C" {
149
+ #endif
150
+
151
+ // options
152
+ int mg_opt_set(const char *preset, mg_idxopt_t *io, mg_mapopt_t *mo, mg_ggopt_t *go);
153
+ int mg_opt_check(const mg_idxopt_t *io, const mg_mapopt_t *mo, const mg_ggopt_t *go);
154
+ void mg_opt_update(const mg_idx_t *gi, mg_mapopt_t *mo, mg_ggopt_t *go);
155
+
156
+ // index operations
157
+ mg_idx_t *mg_index(gfa_t *g, const mg_idxopt_t *io, int n_threads, mg_mapopt_t *mo); // combine mg_index_core() and mg_opt_update()
158
+ void mg_idx_destroy(mg_idx_t *gi);
159
+
160
+ // mapping
161
+ mg_tbuf_t *mg_tbuf_init(void);
162
+ void mg_tbuf_destroy(mg_tbuf_t *b);
163
+ mg_gchains_t *mg_map(const mg_idx_t *gi, int qlen, const char *seq, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname);
164
+ void mg_map_frag(const mg_idx_t *gi, int n_segs, const int *qlens, const char **seqs, mg_gchains_t **gcs, mg_tbuf_t *b, const mg_mapopt_t *opt, const char *qname);
165
+
166
+ // high-level mapping APIs
167
+ int mg_map_files(gfa_t *g, int n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, int n_threads);
168
+
169
+ // graph generation
170
+ int mg_ggen(gfa_t *g, int32_t n_fn, const char **fn, const mg_idxopt_t *ipt, const mg_mapopt_t *opt0, const mg_ggopt_t *go, int n_threads);
171
+
172
+ #ifdef __cplusplus
173
+ }
174
+ #endif
175
+
176
+ #endif