minimap2 0.2.22.0 → 0.2.24.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -76
- data/ext/Rakefile +55 -0
- data/ext/cmappy/cmappy.c +129 -0
- data/ext/cmappy/cmappy.h +44 -0
- data/ext/minimap2/FAQ.md +46 -0
- data/ext/minimap2/LICENSE.txt +24 -0
- data/ext/minimap2/MANIFEST.in +10 -0
- data/ext/minimap2/Makefile +132 -0
- data/ext/minimap2/Makefile.simde +97 -0
- data/ext/minimap2/NEWS.md +821 -0
- data/ext/minimap2/README.md +403 -0
- data/ext/minimap2/align.c +1020 -0
- data/ext/minimap2/bseq.c +169 -0
- data/ext/minimap2/bseq.h +64 -0
- data/ext/minimap2/code_of_conduct.md +30 -0
- data/ext/minimap2/cookbook.md +243 -0
- data/ext/minimap2/esterr.c +64 -0
- data/ext/minimap2/example.c +63 -0
- data/ext/minimap2/format.c +559 -0
- data/ext/minimap2/hit.c +466 -0
- data/ext/minimap2/index.c +775 -0
- data/ext/minimap2/kalloc.c +205 -0
- data/ext/minimap2/kalloc.h +76 -0
- data/ext/minimap2/kdq.h +132 -0
- data/ext/minimap2/ketopt.h +120 -0
- data/ext/minimap2/khash.h +615 -0
- data/ext/minimap2/krmq.h +474 -0
- data/ext/minimap2/kseq.h +256 -0
- data/ext/minimap2/ksort.h +153 -0
- data/ext/minimap2/ksw2.h +184 -0
- data/ext/minimap2/ksw2_dispatch.c +96 -0
- data/ext/minimap2/ksw2_extd2_sse.c +402 -0
- data/ext/minimap2/ksw2_exts2_sse.c +416 -0
- data/ext/minimap2/ksw2_extz2_sse.c +313 -0
- data/ext/minimap2/ksw2_ll_sse.c +152 -0
- data/ext/minimap2/kthread.c +159 -0
- data/ext/minimap2/kthread.h +15 -0
- data/ext/minimap2/kvec.h +105 -0
- data/ext/minimap2/lchain.c +369 -0
- data/ext/minimap2/main.c +459 -0
- data/ext/minimap2/map.c +714 -0
- data/ext/minimap2/minimap.h +410 -0
- data/ext/minimap2/minimap2.1 +725 -0
- data/ext/minimap2/misc/README.md +179 -0
- data/ext/minimap2/misc/mmphase.js +335 -0
- data/ext/minimap2/misc/paftools.js +3149 -0
- data/ext/minimap2/misc.c +162 -0
- data/ext/minimap2/mmpriv.h +132 -0
- data/ext/minimap2/options.c +234 -0
- data/ext/minimap2/pe.c +177 -0
- data/ext/minimap2/python/README.rst +196 -0
- data/ext/minimap2/python/cmappy.h +152 -0
- data/ext/minimap2/python/cmappy.pxd +153 -0
- data/ext/minimap2/python/mappy.pyx +273 -0
- data/ext/minimap2/python/minimap2.py +39 -0
- data/ext/minimap2/sdust.c +213 -0
- data/ext/minimap2/sdust.h +25 -0
- data/ext/minimap2/seed.c +131 -0
- data/ext/minimap2/setup.py +55 -0
- data/ext/minimap2/sketch.c +143 -0
- data/ext/minimap2/splitidx.c +84 -0
- data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
- data/ext/minimap2/test/MT-human.fa +278 -0
- data/ext/minimap2/test/MT-orang.fa +276 -0
- data/ext/minimap2/test/q-inv.fa +4 -0
- data/ext/minimap2/test/q2.fa +2 -0
- data/ext/minimap2/test/t-inv.fa +127 -0
- data/ext/minimap2/test/t2.fa +2 -0
- data/ext/minimap2/tex/Makefile +21 -0
- data/ext/minimap2/tex/bioinfo.cls +930 -0
- data/ext/minimap2/tex/blasr-mc.eval +17 -0
- data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
- data/ext/minimap2/tex/bwa.eval +55 -0
- data/ext/minimap2/tex/eval2roc.pl +33 -0
- data/ext/minimap2/tex/graphmap.eval +4 -0
- data/ext/minimap2/tex/hs38-simu.sh +10 -0
- data/ext/minimap2/tex/minialign.eval +49 -0
- data/ext/minimap2/tex/minimap2.bib +460 -0
- data/ext/minimap2/tex/minimap2.tex +724 -0
- data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
- data/ext/minimap2/tex/mm2-update.tex +240 -0
- data/ext/minimap2/tex/mm2.approx.eval +12 -0
- data/ext/minimap2/tex/mm2.eval +13 -0
- data/ext/minimap2/tex/natbib.bst +1288 -0
- data/ext/minimap2/tex/natbib.sty +803 -0
- data/ext/minimap2/tex/ngmlr.eval +38 -0
- data/ext/minimap2/tex/roc.gp +60 -0
- data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
- data/ext/minimap2.patch +19 -0
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +20 -16
- data/lib/minimap2/ffi/functions.rb +5 -0
- data/lib/minimap2/ffi.rb +4 -5
- data/lib/minimap2/version.rb +2 -2
- data/lib/minimap2.rb +51 -15
- metadata +97 -79
- data/lib/minimap2/ffi_helper.rb +0 -53
- data/vendor/libminimap2.so +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1bbe7130374c248e008183b82ffa9feb7af9738184d654c8a14c7963b96f5c09
|
4
|
+
data.tar.gz: ddae428827a8e1f8e7c9cc8684f44aa266ec1d73d32e8d6a65e64a459d509292
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f1caa66a874a45f4df9b0971f55cb0f436351c62f6d2bb4ba07e2d8cfe1485e391e58c952cdcaa5f256a9fa9e26376b865cd919ff53d374ea32666ae486de24
|
7
|
+
data.tar.gz: bef5486385f7a52d37566a866776791eec9eda5bf1d707e253f7845f5f87689c4258076874dcff44852f09b39d51522ab2ecf8ebbbe675b2ce98c2f87b60df76
|
data/README.md
CHANGED
@@ -6,85 +6,61 @@
|
|
6
6
|
[](https://rubydoc.info/gems/minimap2)
|
7
7
|
[](https://zenodo.org/badge/latestdoi/325711305)
|
8
8
|
|
9
|
-
|
10
|
-
|
11
9
|
:dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
|
12
10
|
|
13
11
|
## Installation
|
14
12
|
|
15
|
-
Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
|
16
|
-
|
17
|
-
Build
|
18
|
-
|
19
|
-
```sh
|
20
|
-
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
21
|
-
cd ruby-minimap2
|
22
|
-
bundle install
|
23
|
-
bundle exec rake minimap2:build
|
24
|
-
```
|
25
|
-
|
26
|
-
Install
|
27
|
-
|
28
13
|
```
|
29
|
-
|
14
|
+
gem install minimap2
|
30
15
|
```
|
31
16
|
|
32
|
-
|
17
|
+
<details>
|
18
|
+
<summary><b>Compiling from source</b></summary>
|
19
|
+
|
20
|
+
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
21
|
+
cd ruby-minimap2
|
22
|
+
bundle install
|
23
|
+
bundle exec rake minimap2:build
|
24
|
+
bundle exec rake install
|
25
|
+
|
26
|
+
</details>
|
33
27
|
|
34
28
|
## Quick Start
|
35
29
|
|
36
30
|
```ruby
|
37
31
|
require "minimap2"
|
38
|
-
```
|
39
|
-
|
40
|
-
Create aligner
|
41
|
-
|
42
|
-
```ruby
|
43
|
-
aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
|
44
|
-
```
|
45
|
-
|
46
|
-
Retrieve a subsequence from the index
|
47
|
-
|
48
|
-
```ruby
|
49
|
-
seq = aligner.seq("MT_human", 100, 200)
|
50
|
-
```
|
51
|
-
|
52
|
-
Mapping
|
53
|
-
|
54
|
-
```ruby
|
55
|
-
hits = aligner.align(seq)
|
56
|
-
pp hits[0]
|
57
|
-
```
|
58
32
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
33
|
+
aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa")
|
34
|
+
seq = aligner.seq("MT_human", 100, 200)
|
35
|
+
hits = aligner.align(seq)
|
36
|
+
pp hits
|
37
|
+
```
|
38
|
+
```
|
39
|
+
[#<Minimap2::Alignment:0x000055bbfde2d128
|
40
|
+
@blen=100,
|
41
|
+
@cigar=[[100, 0]],
|
42
|
+
@cigar_str="100M",
|
43
|
+
@cs="",
|
44
|
+
@ctg="MT_human",
|
45
|
+
@ctg_len=16569,
|
46
|
+
@mapq=60,
|
47
|
+
@md="",
|
48
|
+
@mlen=100,
|
49
|
+
@nm=0,
|
50
|
+
@primary=1,
|
51
|
+
@q_en=100,
|
52
|
+
@q_st=0,
|
53
|
+
@r_en=200,
|
54
|
+
@r_st=100,
|
55
|
+
@read_num=1,
|
56
|
+
@strand=1,
|
57
|
+
@trans_strand=0>]
|
58
|
+
```
|
59
|
+
|
60
|
+
</details>
|
81
61
|
|
82
62
|
## APIs Overview
|
83
63
|
|
84
|
-
API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
|
85
|
-
|
86
|
-
Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
|
87
|
-
|
88
64
|
```markdown
|
89
65
|
* Minimap2 module
|
90
66
|
- fastx_read Read fasta/fastq file.
|
@@ -129,11 +105,19 @@ Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator i
|
|
129
105
|
* MapOpt class Mapping options.
|
130
106
|
```
|
131
107
|
|
132
|
-
|
108
|
+
* API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
|
109
|
+
* `Aligner#map` has been changed to `align`, because `map` means iterator in Ruby.
|
110
|
+
* See [RubyDoc](https://rubydoc.info/gems/minimap2/) for details.
|
133
111
|
|
134
|
-
|
135
|
-
|
112
|
+
<details>
|
113
|
+
<summary><b>C Structures and Functions</b></summary>
|
136
114
|
|
115
|
+
### FFI
|
116
|
+
* Ruby-Minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
|
117
|
+
* Native C functions can be called from the `Minimap2::FFI` module.
|
118
|
+
* Native C structure members can be accessed.
|
119
|
+
* Bitfields are supported by [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) gems.
|
120
|
+
|
137
121
|
```ruby
|
138
122
|
aligner.idx_opt.members
|
139
123
|
# => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
|
@@ -145,10 +129,15 @@ aligner.idx_opt[:k] = 14
|
|
145
129
|
aligner.idx_opt[:k]
|
146
130
|
# => 14
|
147
131
|
```
|
132
|
+
|
133
|
+
</details>
|
134
|
+
|
135
|
+
## Contributing
|
148
136
|
|
149
|
-
|
137
|
+
<details>
|
138
|
+
<summary><b>Development</b></summary>
|
150
139
|
|
151
|
-
Fork your repository.
|
140
|
+
Fork your repository.
|
152
141
|
then clone.
|
153
142
|
|
154
143
|
```sh
|
@@ -179,16 +168,11 @@ Run tests.
|
|
179
168
|
bundle exec rake test
|
180
169
|
```
|
181
170
|
|
182
|
-
|
171
|
+
</details>
|
183
172
|
|
184
|
-
ruby-minimap2 is a library under development and there are many points to be improved.
|
173
|
+
ruby-minimap2 is a library under development and there are many points to be improved.
|
185
174
|
|
186
|
-
|
187
|
-
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-minimap2/pulls)
|
188
|
-
* Write, clarify, or fix documentation
|
189
|
-
* Suggest or add new features
|
190
|
-
* Create tools based on ruby-minimap2
|
191
|
-
* Update minimap2 in github submodule
|
175
|
+
Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
|
192
176
|
|
193
177
|
## License
|
194
178
|
|
data/ext/Rakefile
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake"
|
4
|
+
require "fileutils"
|
5
|
+
require "ffi"
|
6
|
+
|
7
|
+
minimap2_dir = File.expand_path("minimap2", __dir__)
|
8
|
+
target_dir = "../../vendor"
|
9
|
+
target_fname = FFI.map_library_name("minimap2")
|
10
|
+
target_path = File.join(target_dir, target_fname)
|
11
|
+
|
12
|
+
task default: ["minimap2:build", "minimap2:clean"]
|
13
|
+
|
14
|
+
namespace :minimap2 do
|
15
|
+
desc "Compile Minimap2"
|
16
|
+
task :build do
|
17
|
+
Dir.chdir(minimap2_dir) do
|
18
|
+
# Add -fPIC option to Makefile
|
19
|
+
sh "git apply ../minimap2.patch"
|
20
|
+
sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
|
21
|
+
sh "make"
|
22
|
+
case RbConfig::CONFIG["host_os"]
|
23
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
24
|
+
sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
|
25
|
+
when /darwin|mac os/
|
26
|
+
sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o -lm -lz -lpthread"
|
27
|
+
sh "otool -L #{target_fname}"
|
28
|
+
else
|
29
|
+
sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
|
30
|
+
sh "ldd -r #{target_fname}"
|
31
|
+
end
|
32
|
+
sh "rm cmappy.h cmappy.c"
|
33
|
+
sh "git apply -R ../minimap2.patch"
|
34
|
+
FileUtils.mkdir_p(target_dir)
|
35
|
+
warn "mkdir -p #{target_dir}"
|
36
|
+
sh "mv #{target_fname} #{target_path}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "Clean"
|
41
|
+
task :clean do
|
42
|
+
Dir.chdir(minimap2_dir) do
|
43
|
+
sh "make clean"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
task cleanall: [:clean]
|
48
|
+
|
49
|
+
desc "Clean all"
|
50
|
+
task :cleanall do
|
51
|
+
Dir.chdir(minimap2_dir) do
|
52
|
+
sh "rm #{target_path}" if File.exist?(target_path)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/ext/cmappy/cmappy.c
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
#include "cmappy.h"
|
2
|
+
|
3
|
+
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
4
|
+
{
|
5
|
+
h->ctg = mi->seq[r->rid].name;
|
6
|
+
h->ctg_len = mi->seq[r->rid].len;
|
7
|
+
h->ctg_start = r->rs, h->ctg_end = r->re;
|
8
|
+
h->qry_start = r->qs, h->qry_end = r->qe;
|
9
|
+
h->strand = r->rev? -1 : 1;
|
10
|
+
h->mapq = r->mapq;
|
11
|
+
h->mlen = r->mlen;
|
12
|
+
h->blen = r->blen;
|
13
|
+
h->NM = r->blen - r->mlen + r->p->n_ambi;
|
14
|
+
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
|
15
|
+
h->is_primary = (r->id == r->parent);
|
16
|
+
h->seg_id = r->seg_id;
|
17
|
+
h->n_cigar32 = r->p->n_cigar;
|
18
|
+
h->cigar32 = r->p->cigar;
|
19
|
+
}
|
20
|
+
|
21
|
+
void mm_free_reg1(mm_reg1_t *r)
|
22
|
+
{
|
23
|
+
free(r->p);
|
24
|
+
}
|
25
|
+
|
26
|
+
kseq_t *mm_fastx_open(const char *fn)
|
27
|
+
{
|
28
|
+
gzFile fp;
|
29
|
+
fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
|
30
|
+
return kseq_init(fp);
|
31
|
+
}
|
32
|
+
|
33
|
+
void mm_fastx_close(kseq_t *ks)
|
34
|
+
{
|
35
|
+
gzFile fp;
|
36
|
+
fp = ks->f->f;
|
37
|
+
kseq_destroy(ks);
|
38
|
+
gzclose(fp);
|
39
|
+
}
|
40
|
+
|
41
|
+
int mm_verbose_level(int v)
|
42
|
+
{
|
43
|
+
if (v >= 0) mm_verbose = v;
|
44
|
+
return mm_verbose;
|
45
|
+
}
|
46
|
+
|
47
|
+
void mm_reset_timer(void)
|
48
|
+
{
|
49
|
+
extern double realtime(void);
|
50
|
+
mm_realtime0 = realtime();
|
51
|
+
}
|
52
|
+
|
53
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
54
|
+
{
|
55
|
+
mm_reg1_t *r;
|
56
|
+
|
57
|
+
// Py_BEGIN_ALLOW_THREADS
|
58
|
+
if (seq2 == 0) {
|
59
|
+
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
|
60
|
+
} else {
|
61
|
+
int _n_regs[2];
|
62
|
+
mm_reg1_t *regs[2];
|
63
|
+
char *seq[2];
|
64
|
+
int i, len[2];
|
65
|
+
|
66
|
+
len[0] = strlen(seq1);
|
67
|
+
len[1] = strlen(seq2);
|
68
|
+
seq[0] = (char*)seq1;
|
69
|
+
seq[1] = strdup(seq2);
|
70
|
+
for (i = 0; i < len[1]>>1; ++i) {
|
71
|
+
int t = seq[1][len[1] - i - 1];
|
72
|
+
seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
|
73
|
+
seq[1][i] = seq_comp_table[t];
|
74
|
+
}
|
75
|
+
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
|
76
|
+
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
|
77
|
+
for (i = 0; i < _n_regs[1]; ++i)
|
78
|
+
regs[1][i].rev = !regs[1][i].rev;
|
79
|
+
*n_regs = _n_regs[0] + _n_regs[1];
|
80
|
+
regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
|
81
|
+
memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
|
82
|
+
free(regs[1]);
|
83
|
+
r = regs[0];
|
84
|
+
}
|
85
|
+
// Py_END_ALLOW_THREADS
|
86
|
+
|
87
|
+
return r;
|
88
|
+
}
|
89
|
+
|
90
|
+
char *mappy_revcomp(int len, const uint8_t *seq)
|
91
|
+
{
|
92
|
+
int i;
|
93
|
+
char *rev;
|
94
|
+
rev = (char*)malloc(len + 1);
|
95
|
+
for (i = 0; i < len; ++i)
|
96
|
+
rev[len - i - 1] = seq_comp_table[seq[i]];
|
97
|
+
rev[len] = 0;
|
98
|
+
return rev;
|
99
|
+
}
|
100
|
+
|
101
|
+
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
|
102
|
+
{
|
103
|
+
int i, rid;
|
104
|
+
char *s;
|
105
|
+
*len = 0;
|
106
|
+
rid = mm_idx_name2id(mi, name);
|
107
|
+
if (rid < 0) return 0;
|
108
|
+
if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
|
109
|
+
if (en < 0 || (uint32_t)en > mi->seq[rid].len)
|
110
|
+
en = mi->seq[rid].len;
|
111
|
+
s = (char*)malloc(en - st + 1);
|
112
|
+
*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
|
113
|
+
for (i = 0; i < *len; ++i)
|
114
|
+
s[i] = "ACGTN"[(uint8_t)s[i]];
|
115
|
+
s[*len] = 0;
|
116
|
+
return s;
|
117
|
+
}
|
118
|
+
|
119
|
+
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
|
120
|
+
{
|
121
|
+
const char *fake_name = "N/A";
|
122
|
+
char *s;
|
123
|
+
mm_idx_t *mi;
|
124
|
+
s = (char*)calloc(len + 1, 1);
|
125
|
+
memcpy(s, seq, len);
|
126
|
+
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
|
127
|
+
free(s);
|
128
|
+
return mi;
|
129
|
+
}
|
data/ext/cmappy/cmappy.h
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#ifndef CMAPPY_H
|
2
|
+
#define CMAPPY_H
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <zlib.h>
|
7
|
+
#include "minimap.h"
|
8
|
+
#include "kseq.h"
|
9
|
+
KSEQ_DECLARE(gzFile)
|
10
|
+
|
11
|
+
typedef struct {
|
12
|
+
const char *ctg;
|
13
|
+
int32_t ctg_start, ctg_end;
|
14
|
+
int32_t qry_start, qry_end;
|
15
|
+
int32_t blen, mlen, NM, ctg_len;
|
16
|
+
uint8_t mapq, is_primary;
|
17
|
+
int8_t strand, trans_strand;
|
18
|
+
int32_t seg_id;
|
19
|
+
int32_t n_cigar32;
|
20
|
+
uint32_t *cigar32;
|
21
|
+
} mm_hitpy_t;
|
22
|
+
|
23
|
+
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h);
|
24
|
+
|
25
|
+
void mm_free_reg1(mm_reg1_t *r);
|
26
|
+
|
27
|
+
kseq_t *mm_fastx_open(const char *fn);
|
28
|
+
|
29
|
+
void mm_fastx_close(kseq_t *ks);
|
30
|
+
|
31
|
+
int mm_verbose_level(int v);
|
32
|
+
|
33
|
+
void mm_reset_timer(void);
|
34
|
+
|
35
|
+
extern unsigned char seq_comp_table[256];
|
36
|
+
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
|
37
|
+
|
38
|
+
char *mappy_revcomp(int len, const uint8_t *seq);
|
39
|
+
|
40
|
+
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len);
|
41
|
+
|
42
|
+
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len);
|
43
|
+
|
44
|
+
#endif
|
data/ext/minimap2/FAQ.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#### 1. Alignment different with option `-a` or `-c`?
|
2
|
+
|
3
|
+
Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
|
4
|
+
locations without detailed base alignment. In particular, the start and end
|
5
|
+
positions of the alignment are impricise. With one of those options, minimap2
|
6
|
+
will perform base alignment, which is generally more accurate but is much
|
7
|
+
slower.
|
8
|
+
|
9
|
+
#### 2. How to map Illumina short reads to noisy long reads?
|
10
|
+
|
11
|
+
No good solutions. The better approach is to assemble short reads into contigs
|
12
|
+
and then map noisy reads to contigs.
|
13
|
+
|
14
|
+
#### 3. The output SAM doesn't have a header.
|
15
|
+
|
16
|
+
By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map
|
17
|
+
all reads against each reference batch. Given a reference longer than 4Gb,
|
18
|
+
minimap2 is unable to see all the sequences and thus can't produce a correct
|
19
|
+
SAM header. In this case, minimap2 doesn't output any SAM header. There are two
|
20
|
+
solutions to this issue. First, you may increase option `-I` to, for example,
|
21
|
+
`-I8g` to index more reference bases in a batch. This is preferred if your
|
22
|
+
machine has enough memory. Second, if your machines doesn't have enough memory
|
23
|
+
to hold the reference index, you can use the `--split-prefix` option in a
|
24
|
+
command line like:
|
25
|
+
```sh
|
26
|
+
minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq
|
27
|
+
```
|
28
|
+
This second approach uses less memory, but it is slower and requires temporary
|
29
|
+
disk space.
|
30
|
+
|
31
|
+
#### 4. The output SAM is malformatted.
|
32
|
+
|
33
|
+
This typically happens when you use nohup to wrap a minimap2 command line.
|
34
|
+
Nohup is discouraged as it breaks piping. If you have to use nohup, please
|
35
|
+
specify an output file with option `-o`.
|
36
|
+
|
37
|
+
#### 5. How to output one alignment per read?
|
38
|
+
|
39
|
+
You can use `--secondary=no` to suppress secondary alignments (aka multiple
|
40
|
+
mappings), but you can't suppress supplementary alignment (aka split or
|
41
|
+
chimeric alignment) this way. You can use samtools to filter out these
|
42
|
+
alignments:
|
43
|
+
```sh
|
44
|
+
minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900
|
45
|
+
```
|
46
|
+
However, this is discouraged as supplementary alignment is informative.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2018- Dana-Farber Cancer Institute
|
4
|
+
2017-2018 Broad Institute, Inc.
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
a copy of this software and associated documentation files (the
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be
|
15
|
+
included in all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
21
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
22
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
23
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
SOFTWARE.
|
@@ -0,0 +1,132 @@
|
|
1
|
+
CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
|
2
|
+
CPPFLAGS= -DHAVE_KALLOC
|
3
|
+
INCLUDES=
|
4
|
+
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
5
|
+
lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
6
|
+
ksw2_ll_sse.o
|
7
|
+
PROG= minimap2
|
8
|
+
PROG_EXTRA= sdust minimap2-lite
|
9
|
+
LIBS= -lm -lz -lpthread
|
10
|
+
|
11
|
+
ifeq ($(arm_neon),) # if arm_neon is not defined
|
12
|
+
ifeq ($(sse2only),) # if sse2only is not defined
|
13
|
+
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
|
14
|
+
else # if sse2only is defined
|
15
|
+
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
|
16
|
+
endif
|
17
|
+
else # if arm_neon is defined
|
18
|
+
OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
|
19
|
+
INCLUDES+=-Isse2neon
|
20
|
+
ifeq ($(aarch64),) #if aarch64 is not defined
|
21
|
+
CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
|
22
|
+
else #if aarch64 is defined
|
23
|
+
CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
|
24
|
+
endif
|
25
|
+
endif
|
26
|
+
|
27
|
+
ifneq ($(asan),)
|
28
|
+
CFLAGS+=-fsanitize=address
|
29
|
+
LIBS+=-fsanitize=address
|
30
|
+
endif
|
31
|
+
|
32
|
+
ifneq ($(tsan),)
|
33
|
+
CFLAGS+=-fsanitize=thread
|
34
|
+
LIBS+=-fsanitize=thread
|
35
|
+
endif
|
36
|
+
|
37
|
+
.PHONY:all extra clean depend
|
38
|
+
.SUFFIXES:.c .o
|
39
|
+
|
40
|
+
.c.o:
|
41
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
|
42
|
+
|
43
|
+
all:$(PROG)
|
44
|
+
|
45
|
+
extra:all $(PROG_EXTRA)
|
46
|
+
|
47
|
+
minimap2:main.o libminimap2.a
|
48
|
+
$(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
|
49
|
+
|
50
|
+
minimap2-lite:example.o libminimap2.a
|
51
|
+
$(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
|
52
|
+
|
53
|
+
libminimap2.a:$(OBJS)
|
54
|
+
$(AR) -csru $@ $(OBJS)
|
55
|
+
|
56
|
+
sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
|
57
|
+
$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
|
58
|
+
|
59
|
+
# SSE-specific targets on x86/x86_64
|
60
|
+
|
61
|
+
ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
|
62
|
+
ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
|
63
|
+
$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
|
64
|
+
endif
|
65
|
+
|
66
|
+
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
67
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
68
|
+
|
69
|
+
ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
70
|
+
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
71
|
+
|
72
|
+
ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
73
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
74
|
+
|
75
|
+
ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
76
|
+
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
77
|
+
|
78
|
+
ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
79
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
80
|
+
|
81
|
+
ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
82
|
+
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
83
|
+
|
84
|
+
ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
|
85
|
+
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
86
|
+
|
87
|
+
# NEON-specific targets on ARM
|
88
|
+
|
89
|
+
ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
90
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
|
91
|
+
|
92
|
+
ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
93
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
|
94
|
+
|
95
|
+
ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
96
|
+
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
|
97
|
+
|
98
|
+
# other non-file targets
|
99
|
+
|
100
|
+
clean:
|
101
|
+
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
|
102
|
+
|
103
|
+
depend:
|
104
|
+
(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
|
105
|
+
|
106
|
+
# DO NOT DELETE
|
107
|
+
|
108
|
+
align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h
|
109
|
+
bseq.o: bseq.h kvec.h kalloc.h kseq.h
|
110
|
+
esterr.o: mmpriv.h minimap.h bseq.h kseq.h
|
111
|
+
example.o: minimap.h kseq.h
|
112
|
+
format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
113
|
+
hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
|
114
|
+
index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
|
115
|
+
index.o: ksort.h
|
116
|
+
kalloc.o: kalloc.h
|
117
|
+
ksw2_extd2_sse.o: ksw2.h kalloc.h
|
118
|
+
ksw2_exts2_sse.o: ksw2.h kalloc.h
|
119
|
+
ksw2_extz2_sse.o: ksw2.h kalloc.h
|
120
|
+
ksw2_ll_sse.o: ksw2.h kalloc.h
|
121
|
+
kthread.o: kthread.h
|
122
|
+
lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h
|
123
|
+
main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
|
124
|
+
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
|
125
|
+
map.o: khash.h ksort.h
|
126
|
+
misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
|
127
|
+
options.o: mmpriv.h minimap.h bseq.h kseq.h
|
128
|
+
pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
|
129
|
+
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
130
|
+
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
|
131
|
+
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
132
|
+
splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
|