minimap2 0.2.22.0 → 0.2.24.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +60 -76
- data/ext/Rakefile +55 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +821 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +369 -0
- data/ext/minimap2/main.c +459 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +410 -0
- data/ext/minimap2/minimap2.1 +725 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +132 -0
- data/ext/minimap2/options.c +234 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +20 -16
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +4 -5
- data/lib/minimap2/version.rb +2 -2
- data/lib/minimap2.rb +51 -15
- metadata +97 -79
- data/lib/minimap2/ffi_helper.rb +0 -53
- data/vendor/libminimap2.so +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1bbe7130374c248e008183b82ffa9feb7af9738184d654c8a14c7963b96f5c09
|
4
|
+
data.tar.gz: ddae428827a8e1f8e7c9cc8684f44aa266ec1d73d32e8d6a65e64a459d509292
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f1caa66a874a45f4df9b0971f55cb0f436351c62f6d2bb4ba07e2d8cfe1485e391e58c952cdcaa5f256a9fa9e26376b865cd919ff53d374ea32666ae486de24
|
7
|
+
data.tar.gz: bef5486385f7a52d37566a866776791eec9eda5bf1d707e253f7845f5f87689c4258076874dcff44852f09b39d51522ab2ecf8ebbbe675b2ce98c2f87b60df76
|
data/README.md
CHANGED
@@ -6,85 +6,61 @@
|
|
6
6
|
[![Docs Latest](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/minimap2)
|
7
7
|
[![DOI](https://zenodo.org/badge/325711305.svg)](https://zenodo.org/badge/latestdoi/325711305)
|
8
8
|
|
9
|
-
|
10
|
-
|
11
9
|
:dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
|
12
10
|
|
13
11
|
## Installation
|
14
12
|
|
15
|
-
Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
|
16
|
-
|
17
|
-
Build
|
18
|
-
|
19
|
-
```sh
|
20
|
-
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
21
|
-
cd ruby-minimap2
|
22
|
-
bundle install
|
23
|
-
bundle exec rake minimap2:build
|
24
|
-
```
|
25
|
-
|
26
|
-
Install
|
27
|
-
|
28
13
|
```
|
29
|
-
|
14
|
+
gem install minimap2
|
30
15
|
```
|
31
16
|
|
32
|
-
|
17
|
+
<details>
|
18
|
+
<summary><b>Compiling from source</b></summary>
|
19
|
+
|
20
|
+
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
21
|
+
cd ruby-minimap2
|
22
|
+
bundle install
|
23
|
+
bundle exec rake minimap2:build
|
24
|
+
bundle exec rake install
|
25
|
+
|
26
|
+
</details>
|
33
27
|
|
34
28
|
## Quick Start
|
35
29
|
|
36
30
|
```ruby
|
37
31
|
require "minimap2"
|
38
|
-
```
|
39
|
-
|
40
|
-
Create aligner
|
41
|
-
|
42
|
-
```ruby
|
43
|
-
aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
|
44
|
-
```
|
45
|
-
|
46
|
-
Retrieve a subsequence from the index
|
47
|
-
|
48
|
-
```ruby
|
49
|
-
seq = aligner.seq("MT_human", 100, 200)
|
50
|
-
```
|
51
|
-
|
52
|
-
Mapping
|
53
|
-
|
54
|
-
```ruby
|
55
|
-
hits = aligner.align(seq)
|
56
|
-
pp hits[0]
|
57
|
-
```
|
58
32
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
33
|
+
aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa")
|
34
|
+
seq = aligner.seq("MT_human", 100, 200)
|
35
|
+
hits = aligner.align(seq)
|
36
|
+
pp hits
|
37
|
+
```
|
38
|
+
```
|
39
|
+
[#<Minimap2::Alignment:0x000055bbfde2d128
|
40
|
+
@blen=100,
|
41
|
+
@cigar=[[100, 0]],
|
42
|
+
@cigar_str="100M",
|
43
|
+
@cs="",
|
44
|
+
@ctg="MT_human",
|
45
|
+
@ctg_len=16569,
|
46
|
+
@mapq=60,
|
47
|
+
@md="",
|
48
|
+
@mlen=100,
|
49
|
+
@nm=0,
|
50
|
+
@primary=1,
|
51
|
+
@q_en=100,
|
52
|
+
@q_st=0,
|
53
|
+
@r_en=200,
|
54
|
+
@r_st=100,
|
55
|
+
@read_num=1,
|
56
|
+
@strand=1,
|
57
|
+
@trans_strand=0>]
|
58
|
+
```
|
59
|
+
|
60
|
+
</details>
|
81
61
|
|
82
62
|
## APIs Overview
|
83
63
|
|
84
|
-
API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
|
85
|
-
|
86
|
-
Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
|
87
|
-
|
88
64
|
```markdown
|
89
65
|
* Minimap2 module
|
90
66
|
- fastx_read Read fasta/fastq file.
|
@@ -129,11 +105,19 @@ Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator i
|
|
129
105
|
* MapOpt class Mapping options.
|
130
106
|
```
|
131
107
|
|
132
|
-
|
108
|
+
* API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
|
109
|
+
* `Aligner#map` has been changed to `align`, because `map` means iterator in Ruby.
|
110
|
+
* See [RubyDoc](https://rubydoc.info/gems/minimap2/) for details.
|
133
111
|
|
134
|
-
|
135
|
-
|
112
|
+
<details>
|
113
|
+
<summary><b>C Structures and Functions</b></summary>
|
136
114
|
|
115
|
+
### FFI
|
116
|
+
* Ruby-Minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
|
117
|
+
* Native C functions can be called from the `Minimap2::FFI` module.
|
118
|
+
* Native C structure members can be accessed.
|
119
|
+
* Bitfields are supported by [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) gems.
|
120
|
+
|
137
121
|
```ruby
|
138
122
|
aligner.idx_opt.members
|
139
123
|
# => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
|
@@ -145,10 +129,15 @@ aligner.idx_opt[:k] = 14
|
|
145
129
|
aligner.idx_opt[:k]
|
146
130
|
# => 14
|
147
131
|
```
|
132
|
+
|
133
|
+
</details>
|
134
|
+
|
135
|
+
## Contributing
|
148
136
|
|
149
|
-
|
137
|
+
<details>
|
138
|
+
<summary><b>Development</b></summary>
|
150
139
|
|
151
|
-
Fork your repository.
|
140
|
+
Fork your repository.
|
152
141
|
then clone.
|
153
142
|
|
154
143
|
```sh
|
@@ -179,16 +168,11 @@ Run tests.
|
|
179
168
|
bundle exec rake test
|
180
169
|
```
|
181
170
|
|
182
|
-
|
171
|
+
</details>
|
183
172
|
|
184
|
-
ruby-minimap2 is a library under development and there are many points to be improved.
|
173
|
+
ruby-minimap2 is a library under development and there are many points to be improved.
|
185
174
|
|
186
|
-
|
187
|
-
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-minimap2/pulls)
|
188
|
-
* Write, clarify, or fix documentation
|
189
|
-
* Suggest or add new features
|
190
|
-
* Create tools based on ruby-minimap2
|
191
|
-
* Update minimap2 in github submodule
|
175
|
+
Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
|
192
176
|
|
193
177
|
## License
|
194
178
|
|
data/ext/Rakefile
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake"
|
4
|
+
require "fileutils"
|
5
|
+
require "ffi"
|
6
|
+
|
7
|
+
minimap2_dir = File.expand_path("minimap2", __dir__)
|
8
|
+
target_dir = "../../vendor"
|
9
|
+
target_fname = FFI.map_library_name("minimap2")
|
10
|
+
target_path = File.join(target_dir, target_fname)
|
11
|
+
|
12
|
+
task default: ["minimap2:build", "minimap2:clean"]
|
13
|
+
|
14
|
+
namespace :minimap2 do
|
15
|
+
desc "Compile Minimap2"
|
16
|
+
task :build do
|
17
|
+
Dir.chdir(minimap2_dir) do
|
18
|
+
# Add -fPIC option to Makefile
|
19
|
+
sh "git apply ../minimap2.patch"
|
20
|
+
sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
|
21
|
+
sh "make"
|
22
|
+
case RbConfig::CONFIG["host_os"]
|
23
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
24
|
+
sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
|
25
|
+
when /darwin|mac os/
|
26
|
+
sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o -lm -lz -lpthread"
|
27
|
+
sh "otool -L #{target_fname}"
|
28
|
+
else
|
29
|
+
sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
|
30
|
+
sh "ldd -r #{target_fname}"
|
31
|
+
end
|
32
|
+
sh "rm cmappy.h cmappy.c"
|
33
|
+
sh "git apply -R ../minimap2.patch"
|
34
|
+
FileUtils.mkdir_p(target_dir)
|
35
|
+
warn "mkdir -p #{target_dir}"
|
36
|
+
sh "mv #{target_fname} #{target_path}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "Clean"
|
41
|
+
task :clean do
|
42
|
+
Dir.chdir(minimap2_dir) do
|
43
|
+
sh "make clean"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
task cleanall: [:clean]
|
48
|
+
|
49
|
+
desc "Clean all"
|
50
|
+
task :cleanall do
|
51
|
+
Dir.chdir(minimap2_dir) do
|
52
|
+
sh "rm #{target_path}" if File.exist?(target_path)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/ext/cmappy/cmappy.c
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
#include "cmappy.h"
|
2
|
+
|
3
|
+
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
4
|
+
{
|
5
|
+
h->ctg = mi->seq[r->rid].name;
|
6
|
+
h->ctg_len = mi->seq[r->rid].len;
|
7
|
+
h->ctg_start = r->rs, h->ctg_end = r->re;
|
8
|
+
h->qry_start = r->qs, h->qry_end = r->qe;
|
9
|
+
h->strand = r->rev? -1 : 1;
|
10
|
+
h->mapq = r->mapq;
|
11
|
+
h->mlen = r->mlen;
|
12
|
+
h->blen = r->blen;
|
13
|
+
h->NM = r->blen - r->mlen + r->p->n_ambi;
|
14
|
+
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
|
15
|
+
h->is_primary = (r->id == r->parent);
|
16
|
+
h->seg_id = r->seg_id;
|
17
|
+
h->n_cigar32 = r->p->n_cigar;
|
18
|
+
h->cigar32 = r->p->cigar;
|
19
|
+
}
|
20
|
+
|
21
|
+
void mm_free_reg1(mm_reg1_t *r)
|
22
|
+
{
|
23
|
+
free(r->p);
|
24
|
+
}
|
25
|
+
|
26
|
+
kseq_t *mm_fastx_open(const char *fn)
|
27
|
+
{
|
28
|
+
gzFile fp;
|
29
|
+
fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
|
30
|
+
return kseq_init(fp);
|
31
|
+
}
|
32
|
+
|
33
|
+
void mm_fastx_close(kseq_t *ks)
|
34
|
+
{
|
35
|
+
gzFile fp;
|
36
|
+
fp = ks->f->f;
|
37
|
+
kseq_destroy(ks);
|
38
|
+
gzclose(fp);
|
39
|
+
}
|
40
|
+
|
41
|
+
int mm_verbose_level(int v)
|
42
|
+
{
|
43
|
+
if (v >= 0) mm_verbose = v;
|
44
|
+
return mm_verbose;
|
45
|
+
}
|
46
|
+
|
47
|
+
void mm_reset_timer(void)
|
48
|
+
{
|
49
|
+
extern double realtime(void);
|
50
|
+
mm_realtime0 = realtime();
|
51
|
+
}
|
52
|
+
|
53
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
54
|
+
{
|
55
|
+
mm_reg1_t *r;
|
56
|
+
|
57
|
+
// Py_BEGIN_ALLOW_THREADS
|
58
|
+
if (seq2 == 0) {
|
59
|
+
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
|
60
|
+
} else {
|
61
|
+
int _n_regs[2];
|
62
|
+
mm_reg1_t *regs[2];
|
63
|
+
char *seq[2];
|
64
|
+
int i, len[2];
|
65
|
+
|
66
|
+
len[0] = strlen(seq1);
|
67
|
+
len[1] = strlen(seq2);
|
68
|
+
seq[0] = (char*)seq1;
|
69
|
+
seq[1] = strdup(seq2);
|
70
|
+
for (i = 0; i < len[1]>>1; ++i) {
|
71
|
+
int t = seq[1][len[1] - i - 1];
|
72
|
+
seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
|
73
|
+
seq[1][i] = seq_comp_table[t];
|
74
|
+
}
|
75
|
+
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
|
76
|
+
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
|
77
|
+
for (i = 0; i < _n_regs[1]; ++i)
|
78
|
+
regs[1][i].rev = !regs[1][i].rev;
|
79
|
+
*n_regs = _n_regs[0] + _n_regs[1];
|
80
|
+
regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
|
81
|
+
memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
|
82
|
+
free(regs[1]);
|
83
|
+
r = regs[0];
|
84
|
+
}
|
85
|
+
// Py_END_ALLOW_THREADS
|
86
|
+
|
87
|
+
return r;
|
88
|
+
}
|
89
|
+
|
90
|
+
char *mappy_revcomp(int len, const uint8_t *seq)
|
91
|
+
{
|
92
|
+
int i;
|
93
|
+
char *rev;
|
94
|
+
rev = (char*)malloc(len + 1);
|
95
|
+
for (i = 0; i < len; ++i)
|
96
|
+
rev[len - i - 1] = seq_comp_table[seq[i]];
|
97
|
+
rev[len] = 0;
|
98
|
+
return rev;
|
99
|
+
}
|
100
|
+
|
101
|
+
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
|
102
|
+
{
|
103
|
+
int i, rid;
|
104
|
+
char *s;
|
105
|
+
*len = 0;
|
106
|
+
rid = mm_idx_name2id(mi, name);
|
107
|
+
if (rid < 0) return 0;
|
108
|
+
if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
|
109
|
+
if (en < 0 || (uint32_t)en > mi->seq[rid].len)
|
110
|
+
en = mi->seq[rid].len;
|
111
|
+
s = (char*)malloc(en - st + 1);
|
112
|
+
*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
|
113
|
+
for (i = 0; i < *len; ++i)
|
114
|
+
s[i] = "ACGTN"[(uint8_t)s[i]];
|
115
|
+
s[*len] = 0;
|
116
|
+
return s;
|
117
|
+
}
|
118
|
+
|
119
|
+
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
|
120
|
+
{
|
121
|
+
const char *fake_name = "N/A";
|
122
|
+
char *s;
|
123
|
+
mm_idx_t *mi;
|
124
|
+
s = (char*)calloc(len + 1, 1);
|
125
|
+
memcpy(s, seq, len);
|
126
|
+
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
|
127
|
+
free(s);
|
128
|
+
return mi;
|
129
|
+
}
|
data/ext/cmappy/cmappy.h
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#ifndef CMAPPY_H
|
2
|
+
#define CMAPPY_H
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <zlib.h>
|
7
|
+
#include "minimap.h"
|
8
|
+
#include "kseq.h"
|
9
|
+
KSEQ_DECLARE(gzFile)
|
10
|
+
|
11
|
+
typedef struct {
|
12
|
+
const char *ctg;
|
13
|
+
int32_t ctg_start, ctg_end;
|
14
|
+
int32_t qry_start, qry_end;
|
15
|
+
int32_t blen, mlen, NM, ctg_len;
|
16
|
+
uint8_t mapq, is_primary;
|
17
|
+
int8_t strand, trans_strand;
|
18
|
+
int32_t seg_id;
|
19
|
+
int32_t n_cigar32;
|
20
|
+
uint32_t *cigar32;
|
21
|
+
} mm_hitpy_t;
|
22
|
+
|
23
|
+
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h);
|
24
|
+
|
25
|
+
void mm_free_reg1(mm_reg1_t *r);
|
26
|
+
|
27
|
+
kseq_t *mm_fastx_open(const char *fn);
|
28
|
+
|
29
|
+
void mm_fastx_close(kseq_t *ks);
|
30
|
+
|
31
|
+
int mm_verbose_level(int v);
|
32
|
+
|
33
|
+
void mm_reset_timer(void);
|
34
|
+
|
35
|
+
extern unsigned char seq_comp_table[256];
|
36
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
|
37
|
+
|
38
|
+
char *mappy_revcomp(int len, const uint8_t *seq);
|
39
|
+
|
40
|
+
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len);
|
41
|
+
|
42
|
+
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len);
|
43
|
+
|
44
|
+
#endif
|
data/ext/minimap2/FAQ.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#### 1. Alignment different with option `-a` or `-c`?
|
2
|
+
|
3
|
+
Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
|
4
|
+
locations without detailed base alignment. In particular, the start and end
|
5
|
+
positions of the alignment are impricise. With one of those options, minimap2
|
6
|
+
will perform base alignment, which is generally more accurate but is much
|
7
|
+
slower.
|
8
|
+
|
9
|
+
#### 2. How to map Illumina short reads to noisy long reads?
|
10
|
+
|
11
|
+
No good solutions. The better approach is to assemble short reads into contigs
|
12
|
+
and then map noisy reads to contigs.
|
13
|
+
|
14
|
+
#### 3. The output SAM doesn't have a header.
|
15
|
+
|
16
|
+
By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map
|
17
|
+
all reads against each reference batch. Given a reference longer than 4Gb,
|
18
|
+
minimap2 is unable to see all the sequences and thus can't produce a correct
|
19
|
+
SAM header. In this case, minimap2 doesn't output any SAM header. There are two
|
20
|
+
solutions to this issue. First, you may increase option `-I` to, for example,
|
21
|
+
`-I8g` to index more reference bases in a batch. This is preferred if your
|
22
|
+
machine has enough memory. Second, if your machines doesn't have enough memory
|
23
|
+
to hold the reference index, you can use the `--split-prefix` option in a
|
24
|
+
command line like:
|
25
|
+
```sh
|
26
|
+
minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq
|
27
|
+
```
|
28
|
+
This second approach uses less memory, but it is slower and requires temporary
|
29
|
+
disk space.
|
30
|
+
|
31
|
+
#### 4. The output SAM is malformatted.
|
32
|
+
|
33
|
+
This typically happens when you use nohup to wrap a minimap2 command line.
|
34
|
+
Nohup is discouraged as it breaks piping. If you have to use nohup, please
|
35
|
+
specify an output file with option `-o`.
|
36
|
+
|
37
|
+
#### 5. How to output one alignment per read?
|
38
|
+
|
39
|
+
You can use `--secondary=no` to suppress secondary alignments (aka multiple
|
40
|
+
mappings), but you can't suppress supplementary alignment (aka split or
|
41
|
+
chimeric alignment) this way. You can use samtools to filter out these
|
42
|
+
alignments:
|
43
|
+
```sh
|
44
|
+
minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900
|
45
|
+
```
|
46
|
+
However, this is discouraged as supplementary alignment is informative.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2018- Dana-Farber Cancer Institute
|
4
|
+
2017-2018 Broad Institute, Inc.
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
a copy of this software and associated documentation files (the
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be
|
15
|
+
included in all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
21
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
22
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
23
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
SOFTWARE.
|
@@ -0,0 +1,132 @@
|
|
1
|
+
CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
|
2
|
+
CPPFLAGS= -DHAVE_KALLOC
|
3
|
+
INCLUDES=
|
4
|
+
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
5
|
+
lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
6
|
+
ksw2_ll_sse.o
|
7
|
+
PROG= minimap2
|
8
|
+
PROG_EXTRA= sdust minimap2-lite
|
9
|
+
LIBS= -lm -lz -lpthread
|
10
|
+
|
11
|
+
ifeq ($(arm_neon),) # if arm_neon is not defined
|
12
|
+
ifeq ($(sse2only),) # if sse2only is not defined
|
13
|
+
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
|
14
|
+
else # if sse2only is defined
|
15
|
+
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
|
16
|
+
endif
|
17
|
+
else # if arm_neon is defined
|
18
|
+
OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
|
19
|
+
INCLUDES+=-Isse2neon
|
20
|
+
ifeq ($(aarch64),) #if aarch64 is not defined
|
21
|
+
CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
|
22
|
+
else #if aarch64 is defined
|
23
|
+
CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
|
24
|
+
endif
|
25
|
+
endif
|
26
|
+
|
27
|
+
ifneq ($(asan),)
|
28
|
+
CFLAGS+=-fsanitize=address
|
29
|
+
LIBS+=-fsanitize=address
|
30
|
+
endif
|
31
|
+
|
32
|
+
ifneq ($(tsan),)
|
33
|
+
CFLAGS+=-fsanitize=thread
|
34
|
+
LIBS+=-fsanitize=thread
|
35
|
+
endif
|
36
|
+
|
37
|
+
.PHONY:all extra clean depend
|
38
|
+
.SUFFIXES:.c .o
|
39
|
+
|
40
|
+
.c.o:
|
41
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
|
42
|
+
|
43
|
+
all:$(PROG)
|
44
|
+
|
45
|
+
extra:all $(PROG_EXTRA)
|
46
|
+
|
47
|
+
minimap2:main.o libminimap2.a
|
48
|
+
$(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
|
49
|
+
|
50
|
+
minimap2-lite:example.o libminimap2.a
|
51
|
+
$(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
|
52
|
+
|
53
|
+
libminimap2.a:$(OBJS)
|
54
|
+
$(AR) -csru $@ $(OBJS)
|
55
|
+
|
56
|
+
sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
|
57
|
+
$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
|
58
|
+
|
59
|
+
# SSE-specific targets on x86/x86_64
|
60
|
+
|
61
|
+
ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
|
62
|
+
ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
|
63
|
+
$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
|
64
|
+
endif
|
65
|
+
|
66
|
+
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
67
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
68
|
+
|
69
|
+
ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
70
|
+
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
71
|
+
|
72
|
+
ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
73
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
74
|
+
|
75
|
+
ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
76
|
+
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
77
|
+
|
78
|
+
ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
79
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
80
|
+
|
81
|
+
ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
82
|
+
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
83
|
+
|
84
|
+
ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
|
85
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
86
|
+
|
87
|
+
# NEON-specific targets on ARM
|
88
|
+
|
89
|
+
ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
90
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
|
91
|
+
|
92
|
+
ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
93
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
|
94
|
+
|
95
|
+
ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
96
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
|
97
|
+
|
98
|
+
# other non-file targets
|
99
|
+
|
100
|
+
clean:
|
101
|
+
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
|
102
|
+
|
103
|
+
depend:
|
104
|
+
(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
|
105
|
+
|
106
|
+
# DO NOT DELETE
|
107
|
+
|
108
|
+
align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h
|
109
|
+
bseq.o: bseq.h kvec.h kalloc.h kseq.h
|
110
|
+
esterr.o: mmpriv.h minimap.h bseq.h kseq.h
|
111
|
+
example.o: minimap.h kseq.h
|
112
|
+
format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
113
|
+
hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
|
114
|
+
index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
|
115
|
+
index.o: ksort.h
|
116
|
+
kalloc.o: kalloc.h
|
117
|
+
ksw2_extd2_sse.o: ksw2.h kalloc.h
|
118
|
+
ksw2_exts2_sse.o: ksw2.h kalloc.h
|
119
|
+
ksw2_extz2_sse.o: ksw2.h kalloc.h
|
120
|
+
ksw2_ll_sse.o: ksw2.h kalloc.h
|
121
|
+
kthread.o: kthread.h
|
122
|
+
lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h
|
123
|
+
main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
|
124
|
+
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
|
125
|
+
map.o: khash.h ksort.h
|
126
|
+
misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
|
127
|
+
options.o: mmpriv.h minimap.h bseq.h kseq.h
|
128
|
+
pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
|
129
|
+
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
130
|
+
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
|
131
|
+
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
132
|
+
splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
|