minimap2 0.0.4 → 0.2.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +113 -98
- data/ext/Rakefile +41 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +807 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +344 -0
- data/ext/minimap2/main.c +455 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +409 -0
- data/ext/minimap2/minimap2.1 +722 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +131 -0
- data/ext/minimap2/options.c +233 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/ext/vendor/libminimap2.so +0 -0
- data/lib/minimap2/aligner.rb +16 -5
- data/lib/minimap2/alignment.rb +6 -2
- data/lib/minimap2/ffi/constants.rb +74 -53
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +1 -2
- data/lib/minimap2/version.rb +2 -1
- data/lib/minimap2.rb +67 -22
- metadata +98 -64
- data/lib/minimap2/ffi_helper.rb +0 -53
@@ -0,0 +1,196 @@
|
|
1
|
+
==============================
|
2
|
+
Mappy: Minimap2 Python Binding
|
3
|
+
==============================
|
4
|
+
|
5
|
+
Mappy provides a convenient interface to `minimap2
|
6
|
+
<https://github.com/lh3/minimap2>`_, a fast and accurate C program to align
|
7
|
+
genomic and transcribe nucleotide sequences.
|
8
|
+
|
9
|
+
Installation
|
10
|
+
------------
|
11
|
+
|
12
|
+
Mappy depends on `zlib <http://zlib.net>`_. It can be installed with `pip
|
13
|
+
<https://en.wikipedia.org/wiki/Pip_(package_manager)>`_:
|
14
|
+
|
15
|
+
.. code:: shell
|
16
|
+
|
17
|
+
pip install --user mappy
|
18
|
+
|
19
|
+
or from the minimap2 github repo (`Cython <http://cython.org>`_ required):
|
20
|
+
|
21
|
+
.. code:: shell
|
22
|
+
|
23
|
+
git clone https://github.com/lh3/minimap2
|
24
|
+
cd minimap2
|
25
|
+
python setup.py install
|
26
|
+
|
27
|
+
Usage
|
28
|
+
-----
|
29
|
+
|
30
|
+
The following Python script demonstrates the key functionality of mappy:
|
31
|
+
|
32
|
+
.. code:: python
|
33
|
+
|
34
|
+
import mappy as mp
|
35
|
+
a = mp.Aligner("test/MT-human.fa") # load or build index
|
36
|
+
if not a: raise Exception("ERROR: failed to load/build index")
|
37
|
+
s = a.seq("MT_human", 100, 200) # retrieve a subsequence from the index
|
38
|
+
print(mp.revcomp(s)) # reverse complement
|
39
|
+
for name, seq, qual in mp.fastx_read("test/MT-orang.fa"): # read a fasta/q sequence
|
40
|
+
for hit in a.map(seq): # traverse alignments
|
41
|
+
print("{}\t{}\t{}\t{}".format(hit.ctg, hit.r_st, hit.r_en, hit.cigar_str))
|
42
|
+
|
43
|
+
APIs
|
44
|
+
----
|
45
|
+
|
46
|
+
Mappy implements two classes and two global function.
|
47
|
+
|
48
|
+
Class mappy.Aligner
|
49
|
+
~~~~~~~~~~~~~~~~~~~
|
50
|
+
|
51
|
+
.. code:: python
|
52
|
+
|
53
|
+
mappy.Aligner(fn_idx_in=None, preset=None, ...)
|
54
|
+
|
55
|
+
This constructor accepts the following arguments:
|
56
|
+
|
57
|
+
* **fn_idx_in**: index or sequence file name. Minimap2 automatically tests the
|
58
|
+
file type. If a sequence file is provided, minimap2 builds an index. The
|
59
|
+
sequence file can be optionally gzip'd. This option has no effect if **seq**
|
60
|
+
is set.
|
61
|
+
|
62
|
+
* **seq**: a single sequence to index. The sequence name will be set to
|
63
|
+
:code:`N/A`.
|
64
|
+
|
65
|
+
* **preset**: minimap2 preset. Currently, minimap2 supports the following
|
66
|
+
presets: **sr** for single-end short reads; **map-pb** for PacBio
|
67
|
+
read-to-reference mapping; **map-ont** for Oxford Nanopore read mapping;
|
68
|
+
**splice** for long-read spliced alignment; **asm5** for assembly-to-assembly
|
69
|
+
alignment; **asm10** for full genome alignment of closely related species. Note
|
70
|
+
that the Python module does not support all-vs-all read overlapping.
|
71
|
+
|
72
|
+
* **k**: k-mer length, no larger than 28
|
73
|
+
|
74
|
+
* **w**: minimizer window size, no larger than 255
|
75
|
+
|
76
|
+
* **min_cnt**: mininum number of minimizers on a chain
|
77
|
+
|
78
|
+
* **min_chain_score**: minimum chaing score
|
79
|
+
|
80
|
+
* **bw**: chaining and alignment band width
|
81
|
+
|
82
|
+
* **best_n**: max number of alignments to return
|
83
|
+
|
84
|
+
* **n_threads**: number of indexing threads; 3 by default
|
85
|
+
|
86
|
+
* **extra_flags**: additional flags defined in minimap.h
|
87
|
+
|
88
|
+
* **fn_idx_out**: name of file to which the index is written. This parameter
|
89
|
+
has no effect if **seq** is set.
|
90
|
+
|
91
|
+
* **scoring**: scoring system. It is a tuple/list consisting of 4, 6 or 7
|
92
|
+
positive integers. The first 4 elements specify match scoring, mismatch
|
93
|
+
penalty, gap open and gap extension penalty. The 5th and 6th elements, if
|
94
|
+
present, set long-gap open and long-gap extension penalty. The 7th sets a
|
95
|
+
mismatch penalty involving ambiguous bases.
|
96
|
+
|
97
|
+
.. code:: python
|
98
|
+
|
99
|
+
mappy.Aligner.map(seq, seq2=None, cs=False, MD=False)
|
100
|
+
|
101
|
+
This method aligns :code:`seq` against the index. It is a generator, *yielding*
|
102
|
+
a series of :code:`mappy.Alignment` objects. If :code:`seq2` is present, mappy
|
103
|
+
performs paired-end alignment, assuming the two ends are in the FR orientation.
|
104
|
+
Alignments of the two ends can be distinguished by the :code:`read_num` field
|
105
|
+
(see Class mappy.Alignment below). Argument :code:`cs` asks mappy to generate
|
106
|
+
the :code:`cs` tag; :code:`MD` is similar. These two arguments might slightly
|
107
|
+
degrade performance and are not enabled by default.
|
108
|
+
|
109
|
+
.. code:: python
|
110
|
+
|
111
|
+
mappy.Aligner.seq(name, start=0, end=0x7fffffff)
|
112
|
+
|
113
|
+
This method retrieves a (sub)sequence from the index and returns it as a Python
|
114
|
+
string. :code:`None` is returned if :code:`name` is not present in the index or
|
115
|
+
the start/end coordinates are invalid.
|
116
|
+
|
117
|
+
.. code:: python
|
118
|
+
|
119
|
+
mappy.Aligner.seq_names
|
120
|
+
|
121
|
+
This property gives the array of sequence names in the index.
|
122
|
+
|
123
|
+
Class mappy.Alignment
|
124
|
+
~~~~~~~~~~~~~~~~~~~~~
|
125
|
+
|
126
|
+
This class describes an alignment. An object of this class has the following
|
127
|
+
properties:
|
128
|
+
|
129
|
+
* **ctg**: name of the reference sequence the query is mapped to
|
130
|
+
|
131
|
+
* **ctg_len**: total length of the reference sequence
|
132
|
+
|
133
|
+
* **r_st** and **r_en**: start and end positions on the reference
|
134
|
+
|
135
|
+
* **q_st** and **q_en**: start and end positions on the query
|
136
|
+
|
137
|
+
* **strand**: +1 if on the forward strand; -1 if on the reverse strand
|
138
|
+
|
139
|
+
* **mapq**: mapping quality
|
140
|
+
|
141
|
+
* **blen**: length of the alignment, including both alignment matches and gaps
|
142
|
+
but excluding ambiguous bases.
|
143
|
+
|
144
|
+
* **mlen**: length of the matching bases in the alignment, excluding ambiguous
|
145
|
+
base matches.
|
146
|
+
|
147
|
+
* **NM**: number of mismatches, gaps and ambiguous positions in the alignment
|
148
|
+
|
149
|
+
* **trans_strand**: transcript strand. +1 if on the forward strand; -1 if on the
|
150
|
+
reverse strand; 0 if unknown
|
151
|
+
|
152
|
+
* **is_primary**: if the alignment is primary (typically the best and the first
|
153
|
+
to generate)
|
154
|
+
|
155
|
+
* **read_num**: read number that the alignment corresponds to; 1 for the first
|
156
|
+
read and 2 for the second read
|
157
|
+
|
158
|
+
* **cigar_str**: CIGAR string
|
159
|
+
|
160
|
+
* **cigar**: CIGAR returned as an array of shape :code:`(n_cigar,2)`. The two
|
161
|
+
numbers give the length and the operator of each CIGAR operation.
|
162
|
+
|
163
|
+
* **MD**: the :code:`MD` tag as in the SAM format. It is an empty string unless
|
164
|
+
the :code:`MD` argument is applied when calling :code:`mappy.Aligner.map()`.
|
165
|
+
|
166
|
+
* **cs**: the :code:`cs` tag.
|
167
|
+
|
168
|
+
An :code:`Alignment` object can be converted to a string with :code:`str()` in
|
169
|
+
the following format:
|
170
|
+
|
171
|
+
::
|
172
|
+
|
173
|
+
q_st q_en strand ctg ctg_len r_st r_en mlen blen mapq cg:Z:cigar_str
|
174
|
+
|
175
|
+
It is effectively the PAF format without the QueryName and QueryLength columns
|
176
|
+
(the first two columns in PAF).
|
177
|
+
|
178
|
+
Miscellaneous Functions
|
179
|
+
~~~~~~~~~~~~~~~~~~~~~~~
|
180
|
+
|
181
|
+
.. code:: python
|
182
|
+
|
183
|
+
mappy.fastx_read(fn, read_comment=False)
|
184
|
+
|
185
|
+
This generator function opens a FASTA/FASTQ file and *yields* a
|
186
|
+
:code:`(name,seq,qual)` tuple for each sequence entry. The input file may be
|
187
|
+
optionally gzip'd. If :code:`read_comment` is True, this generator yields
|
188
|
+
a :code:`(name,seq,qual,comment)` tuple instead.
|
189
|
+
|
190
|
+
.. code:: python
|
191
|
+
|
192
|
+
mappy.revcomp(seq)
|
193
|
+
|
194
|
+
Return the reverse complement of DNA string :code:`seq`. This function
|
195
|
+
recognizes IUB code and preserves the letter cases. Uracil :code:`U` is
|
196
|
+
complemented to :code:`A`.
|
@@ -0,0 +1,152 @@
|
|
1
|
+
#ifndef CMAPPY_H
|
2
|
+
#define CMAPPY_H
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <zlib.h>
|
7
|
+
#include "minimap.h"
|
8
|
+
#include "kseq.h"
|
9
|
+
KSEQ_DECLARE(gzFile)
|
10
|
+
|
11
|
+
typedef struct {
|
12
|
+
const char *ctg;
|
13
|
+
int32_t ctg_start, ctg_end;
|
14
|
+
int32_t qry_start, qry_end;
|
15
|
+
int32_t blen, mlen, NM, ctg_len;
|
16
|
+
uint8_t mapq, is_primary;
|
17
|
+
int8_t strand, trans_strand;
|
18
|
+
int32_t seg_id;
|
19
|
+
int32_t n_cigar32;
|
20
|
+
uint32_t *cigar32;
|
21
|
+
} mm_hitpy_t;
|
22
|
+
|
23
|
+
static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
24
|
+
{
|
25
|
+
h->ctg = mi->seq[r->rid].name;
|
26
|
+
h->ctg_len = mi->seq[r->rid].len;
|
27
|
+
h->ctg_start = r->rs, h->ctg_end = r->re;
|
28
|
+
h->qry_start = r->qs, h->qry_end = r->qe;
|
29
|
+
h->strand = r->rev? -1 : 1;
|
30
|
+
h->mapq = r->mapq;
|
31
|
+
h->mlen = r->mlen;
|
32
|
+
h->blen = r->blen;
|
33
|
+
h->NM = r->blen - r->mlen + r->p->n_ambi;
|
34
|
+
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
|
35
|
+
h->is_primary = (r->id == r->parent);
|
36
|
+
h->seg_id = r->seg_id;
|
37
|
+
h->n_cigar32 = r->p->n_cigar;
|
38
|
+
h->cigar32 = r->p->cigar;
|
39
|
+
}
|
40
|
+
|
41
|
+
static inline void mm_free_reg1(mm_reg1_t *r)
|
42
|
+
{
|
43
|
+
free(r->p);
|
44
|
+
}
|
45
|
+
|
46
|
+
static inline kseq_t *mm_fastx_open(const char *fn)
|
47
|
+
{
|
48
|
+
gzFile fp;
|
49
|
+
fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
|
50
|
+
return kseq_init(fp);
|
51
|
+
}
|
52
|
+
|
53
|
+
static inline void mm_fastx_close(kseq_t *ks)
|
54
|
+
{
|
55
|
+
gzFile fp;
|
56
|
+
fp = ks->f->f;
|
57
|
+
kseq_destroy(ks);
|
58
|
+
gzclose(fp);
|
59
|
+
}
|
60
|
+
|
61
|
+
static inline int mm_verbose_level(int v)
|
62
|
+
{
|
63
|
+
if (v >= 0) mm_verbose = v;
|
64
|
+
return mm_verbose;
|
65
|
+
}
|
66
|
+
|
67
|
+
static inline void mm_reset_timer(void)
|
68
|
+
{
|
69
|
+
extern double realtime(void);
|
70
|
+
mm_realtime0 = realtime();
|
71
|
+
}
|
72
|
+
|
73
|
+
extern unsigned char seq_comp_table[256];
|
74
|
+
static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
75
|
+
{
|
76
|
+
mm_reg1_t *r;
|
77
|
+
|
78
|
+
Py_BEGIN_ALLOW_THREADS
|
79
|
+
if (seq2 == 0) {
|
80
|
+
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
|
81
|
+
} else {
|
82
|
+
int _n_regs[2];
|
83
|
+
mm_reg1_t *regs[2];
|
84
|
+
char *seq[2];
|
85
|
+
int i, len[2];
|
86
|
+
|
87
|
+
len[0] = strlen(seq1);
|
88
|
+
len[1] = strlen(seq2);
|
89
|
+
seq[0] = (char*)seq1;
|
90
|
+
seq[1] = strdup(seq2);
|
91
|
+
for (i = 0; i < len[1]>>1; ++i) {
|
92
|
+
int t = seq[1][len[1] - i - 1];
|
93
|
+
seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
|
94
|
+
seq[1][i] = seq_comp_table[t];
|
95
|
+
}
|
96
|
+
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
|
97
|
+
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
|
98
|
+
for (i = 0; i < _n_regs[1]; ++i)
|
99
|
+
regs[1][i].rev = !regs[1][i].rev;
|
100
|
+
*n_regs = _n_regs[0] + _n_regs[1];
|
101
|
+
regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
|
102
|
+
memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
|
103
|
+
free(regs[1]);
|
104
|
+
r = regs[0];
|
105
|
+
}
|
106
|
+
Py_END_ALLOW_THREADS
|
107
|
+
|
108
|
+
return r;
|
109
|
+
}
|
110
|
+
|
111
|
+
static inline char *mappy_revcomp(int len, const uint8_t *seq)
|
112
|
+
{
|
113
|
+
int i;
|
114
|
+
char *rev;
|
115
|
+
rev = (char*)malloc(len + 1);
|
116
|
+
for (i = 0; i < len; ++i)
|
117
|
+
rev[len - i - 1] = seq_comp_table[seq[i]];
|
118
|
+
rev[len] = 0;
|
119
|
+
return rev;
|
120
|
+
}
|
121
|
+
|
122
|
+
static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
|
123
|
+
{
|
124
|
+
int i, rid;
|
125
|
+
char *s;
|
126
|
+
*len = 0;
|
127
|
+
rid = mm_idx_name2id(mi, name);
|
128
|
+
if (rid < 0) return 0;
|
129
|
+
if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
|
130
|
+
if (en < 0 || (uint32_t)en > mi->seq[rid].len)
|
131
|
+
en = mi->seq[rid].len;
|
132
|
+
s = (char*)malloc(en - st + 1);
|
133
|
+
*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
|
134
|
+
for (i = 0; i < *len; ++i)
|
135
|
+
s[i] = "ACGTN"[(uint8_t)s[i]];
|
136
|
+
s[*len] = 0;
|
137
|
+
return s;
|
138
|
+
}
|
139
|
+
|
140
|
+
static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
|
141
|
+
{
|
142
|
+
const char *fake_name = "N/A";
|
143
|
+
char *s;
|
144
|
+
mm_idx_t *mi;
|
145
|
+
s = (char*)calloc(len + 1, 1);
|
146
|
+
memcpy(s, seq, len);
|
147
|
+
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
|
148
|
+
free(s);
|
149
|
+
return mi;
|
150
|
+
}
|
151
|
+
|
152
|
+
#endif
|
@@ -0,0 +1,153 @@
|
|
1
|
+
from libc.stdint cimport int8_t, uint8_t, int32_t, int64_t, uint32_t, uint64_t
|
2
|
+
|
3
|
+
cdef extern from "minimap.h":
|
4
|
+
#
|
5
|
+
# Options
|
6
|
+
#
|
7
|
+
ctypedef struct mm_idxopt_t:
|
8
|
+
short k, w, flag, bucket_bits
|
9
|
+
int64_t mini_batch_size
|
10
|
+
uint64_t batch_size
|
11
|
+
|
12
|
+
ctypedef struct mm_mapopt_t:
|
13
|
+
int64_t flag
|
14
|
+
int seed
|
15
|
+
int sdust_thres
|
16
|
+
|
17
|
+
int max_qlen
|
18
|
+
|
19
|
+
int bw, bw_long
|
20
|
+
int max_gap, max_gap_ref
|
21
|
+
int max_frag_len
|
22
|
+
int max_chain_skip, max_chain_iter
|
23
|
+
int min_cnt
|
24
|
+
int min_chain_score
|
25
|
+
float chain_gap_scale
|
26
|
+
float chain_skip_scale
|
27
|
+
int rmq_size_cap, rmq_inner_dist
|
28
|
+
int rmq_rescue_size
|
29
|
+
float rmq_rescue_ratio
|
30
|
+
|
31
|
+
float mask_level
|
32
|
+
int mask_len
|
33
|
+
float pri_ratio
|
34
|
+
int best_n
|
35
|
+
|
36
|
+
float alt_drop
|
37
|
+
|
38
|
+
int a, b, q, e, q2, e2
|
39
|
+
int sc_ambi
|
40
|
+
int noncan
|
41
|
+
int junc_bonus
|
42
|
+
int zdrop, zdrop_inv
|
43
|
+
int end_bonus
|
44
|
+
int min_dp_max
|
45
|
+
int min_ksw_len
|
46
|
+
int anchor_ext_len, anchor_ext_shift
|
47
|
+
float max_clip_ratio
|
48
|
+
|
49
|
+
int rank_min_len
|
50
|
+
float rank_frac
|
51
|
+
|
52
|
+
int pe_ori, pe_bonus
|
53
|
+
|
54
|
+
float mid_occ_frac
|
55
|
+
float q_occ_frac
|
56
|
+
int32_t min_mid_occ
|
57
|
+
int32_t mid_occ
|
58
|
+
int32_t max_occ
|
59
|
+
int64_t mini_batch_size
|
60
|
+
int64_t max_sw_mat
|
61
|
+
int64_t cap_kalloc
|
62
|
+
|
63
|
+
const char *split_prefix
|
64
|
+
|
65
|
+
int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
66
|
+
int mm_verbose
|
67
|
+
|
68
|
+
#
|
69
|
+
# Indexing
|
70
|
+
#
|
71
|
+
ctypedef struct mm_idx_seq_t:
|
72
|
+
char *name
|
73
|
+
uint64_t offset
|
74
|
+
uint32_t len
|
75
|
+
|
76
|
+
ctypedef struct mm_idx_bucket_t:
|
77
|
+
pass
|
78
|
+
|
79
|
+
ctypedef struct mm_idx_t:
|
80
|
+
int32_t b, w, k, flag
|
81
|
+
uint32_t n_seq
|
82
|
+
mm_idx_seq_t *seq
|
83
|
+
uint32_t *S
|
84
|
+
mm_idx_bucket_t *B
|
85
|
+
void *km
|
86
|
+
void *h
|
87
|
+
|
88
|
+
ctypedef struct mm_idx_reader_t:
|
89
|
+
pass
|
90
|
+
|
91
|
+
mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out)
|
92
|
+
mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
|
93
|
+
void mm_idx_reader_close(mm_idx_reader_t *r)
|
94
|
+
void mm_idx_destroy(mm_idx_t *mi)
|
95
|
+
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
96
|
+
|
97
|
+
int mm_idx_index_name(mm_idx_t *mi)
|
98
|
+
|
99
|
+
#
|
100
|
+
# Mapping (key struct defined in cmappy.h below)
|
101
|
+
#
|
102
|
+
ctypedef struct mm_reg1_t:
|
103
|
+
pass
|
104
|
+
|
105
|
+
ctypedef struct mm_tbuf_t:
|
106
|
+
pass
|
107
|
+
|
108
|
+
mm_tbuf_t *mm_tbuf_init()
|
109
|
+
void mm_tbuf_destroy(mm_tbuf_t *b)
|
110
|
+
void *mm_tbuf_get_km(mm_tbuf_t *b)
|
111
|
+
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
|
112
|
+
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
|
113
|
+
|
114
|
+
#
|
115
|
+
# Helper header (because it is hard to expose mm_reg1_t with Cython)
|
116
|
+
#
|
117
|
+
cdef extern from "cmappy.h":
|
118
|
+
ctypedef struct mm_hitpy_t:
|
119
|
+
const char *ctg
|
120
|
+
int32_t ctg_start, ctg_end
|
121
|
+
int32_t qry_start, qry_end
|
122
|
+
int32_t blen, mlen, NM, ctg_len
|
123
|
+
uint8_t mapq, is_primary
|
124
|
+
int8_t strand, trans_strand
|
125
|
+
int32_t seg_id
|
126
|
+
int32_t n_cigar32
|
127
|
+
uint32_t *cigar32
|
128
|
+
|
129
|
+
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
130
|
+
void mm_free_reg1(mm_reg1_t *r)
|
131
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
132
|
+
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l)
|
133
|
+
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l)
|
134
|
+
|
135
|
+
ctypedef struct kstring_t:
|
136
|
+
unsigned l, m
|
137
|
+
char *s
|
138
|
+
|
139
|
+
ctypedef struct kstream_t:
|
140
|
+
pass
|
141
|
+
|
142
|
+
ctypedef struct kseq_t:
|
143
|
+
kstring_t name, comment, seq, qual
|
144
|
+
int last_char
|
145
|
+
kstream_t *f
|
146
|
+
|
147
|
+
kseq_t *mm_fastx_open(const char *fn)
|
148
|
+
void mm_fastx_close(kseq_t *ks)
|
149
|
+
int kseq_read(kseq_t *seq)
|
150
|
+
|
151
|
+
char *mappy_revcomp(int l, const uint8_t *seq)
|
152
|
+
int mm_verbose_level(int v)
|
153
|
+
void mm_reset_timer()
|