minimap2 0.2.22.0 → 0.2.24.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -76
  3. data/ext/Rakefile +55 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +821 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +369 -0
  41. data/ext/minimap2/main.c +459 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +410 -0
  44. data/ext/minimap2/minimap2.1 +725 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +132 -0
  50. data/ext/minimap2/options.c +234 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/lib/minimap2/aligner.rb +4 -4
  93. data/lib/minimap2/alignment.rb +11 -11
  94. data/lib/minimap2/ffi/constants.rb +20 -16
  95. data/lib/minimap2/ffi/functions.rb +5 -0
  96. data/lib/minimap2/ffi.rb +4 -5
  97. data/lib/minimap2/version.rb +2 -2
  98. data/lib/minimap2.rb +51 -15
  99. metadata +97 -79
  100. data/lib/minimap2/ffi_helper.rb +0 -53
  101. data/vendor/libminimap2.so +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ceb411a88448c6ed13e6d842450264e91569260c9b19d77e699a93736768522
4
- data.tar.gz: 76c1f3466375b73c54db6cd2574ffe19a817b11994d50e9e8473a209566b14f5
3
+ metadata.gz: 1bbe7130374c248e008183b82ffa9feb7af9738184d654c8a14c7963b96f5c09
4
+ data.tar.gz: ddae428827a8e1f8e7c9cc8684f44aa266ec1d73d32e8d6a65e64a459d509292
5
5
  SHA512:
6
- metadata.gz: 05a313f05984e2afab772da3971249327a9461a92d8b1fbcf53329ea5cd9d421e4ab8a137bd16213b86d6912bf1d43fd7f22c1bb5408a36004a64296fab294d9
7
- data.tar.gz: 7739312455aba2b14192eef634623f351fc4cea721d142b815ce41d651bc3de84e0fe413de6d24e8551ed90a2d71bcbf82a3df5b87685cb1474e6a134ed9fefe
6
+ metadata.gz: 8f1caa66a874a45f4df9b0971f55cb0f436351c62f6d2bb4ba07e2d8cfe1485e391e58c952cdcaa5f256a9fa9e26376b865cd919ff53d374ea32666ae486de24
7
+ data.tar.gz: bef5486385f7a52d37566a866776791eec9eda5bf1d707e253f7845f5f87689c4258076874dcff44852f09b39d51522ab2ecf8ebbbe675b2ce98c2f87b60df76
data/README.md CHANGED
@@ -6,85 +6,61 @@
6
6
  [![Docs Latest](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/minimap2)
7
7
  [![DOI](https://zenodo.org/badge/325711305.svg)](https://zenodo.org/badge/latestdoi/325711305)
8
8
 
9
-
10
-
11
9
  :dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
12
10
 
13
11
  ## Installation
14
12
 
15
- Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
16
-
17
- Build
18
-
19
- ```sh
20
- git clone --recursive https://github.com/kojix2/ruby-minimap2
21
- cd ruby-minimap2
22
- bundle install
23
- bundle exec rake minimap2:build
24
- ```
25
-
26
- Install
27
-
28
13
  ```
29
- bundle exec rake install
14
+ gem install minimap2
30
15
  ```
31
16
 
32
- Ruby-minimap2 is [tested on Ubuntu and macOS](https://github.com/kojix2/ruby-minimap2/actions).
17
+ <details>
18
+ <summary><b>Compiling from source</b></summary>
19
+
20
+ git clone --recursive https://github.com/kojix2/ruby-minimap2
21
+ cd ruby-minimap2
22
+ bundle install
23
+ bundle exec rake minimap2:build
24
+ bundle exec rake install
25
+
26
+ </details>
33
27
 
34
28
  ## Quick Start
35
29
 
36
30
  ```ruby
37
31
  require "minimap2"
38
- ```
39
-
40
- Create aligner
41
-
42
- ```ruby
43
- aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
44
- ```
45
-
46
- Retrieve a subsequence from the index
47
-
48
- ```ruby
49
- seq = aligner.seq("MT_human", 100, 200)
50
- ```
51
-
52
- Mapping
53
-
54
- ```ruby
55
- hits = aligner.align(seq)
56
- pp hits[0]
57
- ```
58
32
 
59
- ```
60
- =>
61
- #<Minimap2::Alignment:0x000055fe18223f50
62
- @blen=100,
63
- @cigar=[[100, 0]],
64
- @cigar_str="100M",
65
- @cs="",
66
- @ctg="MT_human",
67
- @ctg_len=16569,
68
- @mapq=60,
69
- @md="",
70
- @mlen=100,
71
- @nm=0,
72
- @primary=1,
73
- @q_en=100,
74
- @q_st=0,
75
- @r_en=200,
76
- @r_st=100,
77
- @read_num=1,
78
- @strand=1,
79
- @trans_strand=0>
80
- ```
33
+ aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa")
34
+ seq = aligner.seq("MT_human", 100, 200)
35
+ hits = aligner.align(seq)
36
+ pp hits
37
+ ```
38
+ ```
39
+ [#<Minimap2::Alignment:0x000055bbfde2d128
40
+ @blen=100,
41
+ @cigar=[[100, 0]],
42
+ @cigar_str="100M",
43
+ @cs="",
44
+ @ctg="MT_human",
45
+ @ctg_len=16569,
46
+ @mapq=60,
47
+ @md="",
48
+ @mlen=100,
49
+ @nm=0,
50
+ @primary=1,
51
+ @q_en=100,
52
+ @q_st=0,
53
+ @r_en=200,
54
+ @r_st=100,
55
+ @read_num=1,
56
+ @strand=1,
57
+ @trans_strand=0>]
58
+ ```
59
+
60
+ </details>
81
61
 
82
62
  ## APIs Overview
83
63
 
84
- API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
85
-
86
- Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
87
-
88
64
  ```markdown
89
65
  * Minimap2 module
90
66
  - fastx_read Read fasta/fastq file.
@@ -129,11 +105,19 @@ Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator i
129
105
  * MapOpt class Mapping options.
130
106
  ```
131
107
 
132
- This is not all. See the [RubyDoc.info documentation](https://rubydoc.info/gems/minimap2/) for more details.
108
+ * API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
109
+ * `Aligner#map` has been changed to `align`, because `map` means iterator in Ruby.
110
+ * See [RubyDoc](https://rubydoc.info/gems/minimap2/) for details.
133
111
 
134
- ruby-minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
135
- Native functions can be called from the FFI module. FFI also provides the way to access some C structs.
112
+ <details>
113
+ <summary><b>C Structures and Functions</b></summary>
136
114
 
115
+ ### FFI
116
+ * Ruby-Minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
117
+ * Native C functions can be called from the `Minimap2::FFI` module.
118
+ * Native C structure members can be accessed.
119
+ * Bitfields are supported by [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) gems.
120
+
137
121
  ```ruby
138
122
  aligner.idx_opt.members
139
123
  # => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
@@ -145,10 +129,15 @@ aligner.idx_opt[:k] = 14
145
129
  aligner.idx_opt[:k]
146
130
  # => 14
147
131
  ```
132
+
133
+ </details>
134
+
135
+ ## Contributing
148
136
 
149
- ## Development
137
+ <details>
138
+ <summary><b>Development</b></summary>
150
139
 
151
- Fork your repository.
140
+ Fork your repository.
152
141
  then clone.
153
142
 
154
143
  ```sh
@@ -179,16 +168,11 @@ Run tests.
179
168
  bundle exec rake test
180
169
  ```
181
170
 
182
- ## Contributing
171
+ </details>
183
172
 
184
- ruby-minimap2 is a library under development and there are many points to be improved. Please feel free to send us your pull request.
173
+ ruby-minimap2 is a library under development and there are many points to be improved.
185
174
 
186
- * [Report bugs](https://github.com/kojix2/ruby-minimap2/issues)
187
- * Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-minimap2/pulls)
188
- * Write, clarify, or fix documentation
189
- * Suggest or add new features
190
- * Create tools based on ruby-minimap2
191
- * Update minimap2 in github submodule
175
+ Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
192
176
 
193
177
  ## License
194
178
 
data/ext/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "fileutils"
5
+ require "ffi"
6
+
7
+ minimap2_dir = File.expand_path("minimap2", __dir__)
8
+ target_dir = "../../vendor"
9
+ target_fname = FFI.map_library_name("minimap2")
10
+ target_path = File.join(target_dir, target_fname)
11
+
12
+ task default: ["minimap2:build", "minimap2:clean"]
13
+
14
+ namespace :minimap2 do
15
+ desc "Compile Minimap2"
16
+ task :build do
17
+ Dir.chdir(minimap2_dir) do
18
+ # Add -fPIC option to Makefile
19
+ sh "git apply ../minimap2.patch"
20
+ sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
21
+ sh "make"
22
+ case RbConfig::CONFIG["host_os"]
23
+ when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
24
+ sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
25
+ when /darwin|mac os/
26
+ sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o -lm -lz -lpthread"
27
+ sh "otool -L #{target_fname}"
28
+ else
29
+ sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
30
+ sh "ldd -r #{target_fname}"
31
+ end
32
+ sh "rm cmappy.h cmappy.c"
33
+ sh "git apply -R ../minimap2.patch"
34
+ FileUtils.mkdir_p(target_dir)
35
+ warn "mkdir -p #{target_dir}"
36
+ sh "mv #{target_fname} #{target_path}"
37
+ end
38
+ end
39
+
40
+ desc "Clean"
41
+ task :clean do
42
+ Dir.chdir(minimap2_dir) do
43
+ sh "make clean"
44
+ end
45
+ end
46
+
47
+ task cleanall: [:clean]
48
+
49
+ desc "Clean all"
50
+ task :cleanall do
51
+ Dir.chdir(minimap2_dir) do
52
+ sh "rm #{target_path}" if File.exist?(target_path)
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,129 @@
1
+ #include "cmappy.h"
2
+
3
+ void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
4
+ {
5
+ h->ctg = mi->seq[r->rid].name;
6
+ h->ctg_len = mi->seq[r->rid].len;
7
+ h->ctg_start = r->rs, h->ctg_end = r->re;
8
+ h->qry_start = r->qs, h->qry_end = r->qe;
9
+ h->strand = r->rev? -1 : 1;
10
+ h->mapq = r->mapq;
11
+ h->mlen = r->mlen;
12
+ h->blen = r->blen;
13
+ h->NM = r->blen - r->mlen + r->p->n_ambi;
14
+ h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
15
+ h->is_primary = (r->id == r->parent);
16
+ h->seg_id = r->seg_id;
17
+ h->n_cigar32 = r->p->n_cigar;
18
+ h->cigar32 = r->p->cigar;
19
+ }
20
+
21
+ void mm_free_reg1(mm_reg1_t *r)
22
+ {
23
+ free(r->p);
24
+ }
25
+
26
+ kseq_t *mm_fastx_open(const char *fn)
27
+ {
28
+ gzFile fp;
29
+ fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
30
+ return kseq_init(fp);
31
+ }
32
+
33
+ void mm_fastx_close(kseq_t *ks)
34
+ {
35
+ gzFile fp;
36
+ fp = ks->f->f;
37
+ kseq_destroy(ks);
38
+ gzclose(fp);
39
+ }
40
+
41
+ int mm_verbose_level(int v)
42
+ {
43
+ if (v >= 0) mm_verbose = v;
44
+ return mm_verbose;
45
+ }
46
+
47
+ void mm_reset_timer(void)
48
+ {
49
+ extern double realtime(void);
50
+ mm_realtime0 = realtime();
51
+ }
52
+
53
+ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
54
+ {
55
+ mm_reg1_t *r;
56
+
57
+ // Py_BEGIN_ALLOW_THREADS
58
+ if (seq2 == 0) {
59
+ r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
60
+ } else {
61
+ int _n_regs[2];
62
+ mm_reg1_t *regs[2];
63
+ char *seq[2];
64
+ int i, len[2];
65
+
66
+ len[0] = strlen(seq1);
67
+ len[1] = strlen(seq2);
68
+ seq[0] = (char*)seq1;
69
+ seq[1] = strdup(seq2);
70
+ for (i = 0; i < len[1]>>1; ++i) {
71
+ int t = seq[1][len[1] - i - 1];
72
+ seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
73
+ seq[1][i] = seq_comp_table[t];
74
+ }
75
+ if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
76
+ mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
77
+ for (i = 0; i < _n_regs[1]; ++i)
78
+ regs[1][i].rev = !regs[1][i].rev;
79
+ *n_regs = _n_regs[0] + _n_regs[1];
80
+ regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
81
+ memcpy(&regs[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
82
+ free(regs[1]);
83
+ r = regs[0];
84
+ }
85
+ // Py_END_ALLOW_THREADS
86
+
87
+ return r;
88
+ }
89
+
90
+ char *mappy_revcomp(int len, const uint8_t *seq)
91
+ {
92
+ int i;
93
+ char *rev;
94
+ rev = (char*)malloc(len + 1);
95
+ for (i = 0; i < len; ++i)
96
+ rev[len - i - 1] = seq_comp_table[seq[i]];
97
+ rev[len] = 0;
98
+ return rev;
99
+ }
100
+
101
+ char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
102
+ {
103
+ int i, rid;
104
+ char *s;
105
+ *len = 0;
106
+ rid = mm_idx_name2id(mi, name);
107
+ if (rid < 0) return 0;
108
+ if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
109
+ if (en < 0 || (uint32_t)en > mi->seq[rid].len)
110
+ en = mi->seq[rid].len;
111
+ s = (char*)malloc(en - st + 1);
112
+ *len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
113
+ for (i = 0; i < *len; ++i)
114
+ s[i] = "ACGTN"[(uint8_t)s[i]];
115
+ s[*len] = 0;
116
+ return s;
117
+ }
118
+
119
+ mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
120
+ {
121
+ const char *fake_name = "N/A";
122
+ char *s;
123
+ mm_idx_t *mi;
124
+ s = (char*)calloc(len + 1, 1);
125
+ memcpy(s, seq, len);
126
+ mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
127
+ free(s);
128
+ return mi;
129
+ }
@@ -0,0 +1,44 @@
1
+ #ifndef CMAPPY_H
2
+ #define CMAPPY_H
3
+
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <zlib.h>
7
+ #include "minimap.h"
8
+ #include "kseq.h"
9
+ KSEQ_DECLARE(gzFile)
10
+
11
+ typedef struct {
12
+ const char *ctg;
13
+ int32_t ctg_start, ctg_end;
14
+ int32_t qry_start, qry_end;
15
+ int32_t blen, mlen, NM, ctg_len;
16
+ uint8_t mapq, is_primary;
17
+ int8_t strand, trans_strand;
18
+ int32_t seg_id;
19
+ int32_t n_cigar32;
20
+ uint32_t *cigar32;
21
+ } mm_hitpy_t;
22
+
23
+ void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h);
24
+
25
+ void mm_free_reg1(mm_reg1_t *r);
26
+
27
+ kseq_t *mm_fastx_open(const char *fn);
28
+
29
+ void mm_fastx_close(kseq_t *ks);
30
+
31
+ int mm_verbose_level(int v);
32
+
33
+ void mm_reset_timer(void);
34
+
35
+ extern unsigned char seq_comp_table[256];
36
+ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
37
+
38
+ char *mappy_revcomp(int len, const uint8_t *seq);
39
+
40
+ char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len);
41
+
42
+ mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len);
43
+
44
+ #endif
@@ -0,0 +1,46 @@
1
+ #### 1. Alignment different with option `-a` or `-c`?
2
+
3
+ Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
4
+ locations without detailed base alignment. In particular, the start and end
5
+ positions of the alignment are impricise. With one of those options, minimap2
6
+ will perform base alignment, which is generally more accurate but is much
7
+ slower.
8
+
9
+ #### 2. How to map Illumina short reads to noisy long reads?
10
+
11
+ No good solutions. The better approach is to assemble short reads into contigs
12
+ and then map noisy reads to contigs.
13
+
14
+ #### 3. The output SAM doesn't have a header.
15
+
16
+ By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map
17
+ all reads against each reference batch. Given a reference longer than 4Gb,
18
+ minimap2 is unable to see all the sequences and thus can't produce a correct
19
+ SAM header. In this case, minimap2 doesn't output any SAM header. There are two
20
+ solutions to this issue. First, you may increase option `-I` to, for example,
21
+ `-I8g` to index more reference bases in a batch. This is preferred if your
22
+ machine has enough memory. Second, if your machines doesn't have enough memory
23
+ to hold the reference index, you can use the `--split-prefix` option in a
24
+ command line like:
25
+ ```sh
26
+ minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq
27
+ ```
28
+ This second approach uses less memory, but it is slower and requires temporary
29
+ disk space.
30
+
31
+ #### 4. The output SAM is malformatted.
32
+
33
+ This typically happens when you use nohup to wrap a minimap2 command line.
34
+ Nohup is discouraged as it breaks piping. If you have to use nohup, please
35
+ specify an output file with option `-o`.
36
+
37
+ #### 5. How to output one alignment per read?
38
+
39
+ You can use `--secondary=no` to suppress secondary alignments (aka multiple
40
+ mappings), but you can't suppress supplementary alignment (aka split or
41
+ chimeric alignment) this way. You can use samtools to filter out these
42
+ alignments:
43
+ ```sh
44
+ minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900
45
+ ```
46
+ However, this is discouraged as supplementary alignment is informative.
@@ -0,0 +1,24 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2018- Dana-Farber Cancer Institute
4
+ 2017-2018 Broad Institute, Inc.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ include *.h
2
+ include Makefile
3
+ include ksw2_dispatch.c
4
+ include main.c
5
+ include README.md
6
+ include sse2neon/emmintrin.h
7
+ include python/cmappy.h
8
+ include python/cmappy.pxd
9
+ include python/mappy.pyx
10
+ include python/README.rst
@@ -0,0 +1,132 @@
1
+ CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
2
+ CPPFLAGS= -DHAVE_KALLOC
3
+ INCLUDES=
4
+ OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
5
+ lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
6
+ ksw2_ll_sse.o
7
+ PROG= minimap2
8
+ PROG_EXTRA= sdust minimap2-lite
9
+ LIBS= -lm -lz -lpthread
10
+
11
+ ifeq ($(arm_neon),) # if arm_neon is not defined
12
+ ifeq ($(sse2only),) # if sse2only is not defined
13
+ OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
14
+ else # if sse2only is defined
15
+ OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
16
+ endif
17
+ else # if arm_neon is defined
18
+ OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
19
+ INCLUDES+=-Isse2neon
20
+ ifeq ($(aarch64),) #if aarch64 is not defined
21
+ CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
22
+ else #if aarch64 is defined
23
+ CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
24
+ endif
25
+ endif
26
+
27
+ ifneq ($(asan),)
28
+ CFLAGS+=-fsanitize=address
29
+ LIBS+=-fsanitize=address
30
+ endif
31
+
32
+ ifneq ($(tsan),)
33
+ CFLAGS+=-fsanitize=thread
34
+ LIBS+=-fsanitize=thread
35
+ endif
36
+
37
+ .PHONY:all extra clean depend
38
+ .SUFFIXES:.c .o
39
+
40
+ .c.o:
41
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
42
+
43
+ all:$(PROG)
44
+
45
+ extra:all $(PROG_EXTRA)
46
+
47
+ minimap2:main.o libminimap2.a
48
+ $(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
49
+
50
+ minimap2-lite:example.o libminimap2.a
51
+ $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
52
+
53
+ libminimap2.a:$(OBJS)
54
+ $(AR) -csru $@ $(OBJS)
55
+
56
+ sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
57
+ $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
58
+
59
+ # SSE-specific targets on x86/x86_64
60
+
61
+ ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
62
+ ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
63
+ $(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
64
+ endif
65
+
66
+ ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
67
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
68
+
69
+ ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
70
+ $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
71
+
72
+ ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
73
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
74
+
75
+ ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
76
+ $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
77
+
78
+ ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
79
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
80
+
81
+ ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
82
+ $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
83
+
84
+ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
85
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
86
+
87
+ # NEON-specific targets on ARM
88
+
89
+ ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
90
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
91
+
92
+ ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
93
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
94
+
95
+ ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
96
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
97
+
98
+ # other non-file targets
99
+
100
+ clean:
101
+ rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
102
+
103
+ depend:
104
+ (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
105
+
106
+ # DO NOT DELETE
107
+
108
+ align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h
109
+ bseq.o: bseq.h kvec.h kalloc.h kseq.h
110
+ esterr.o: mmpriv.h minimap.h bseq.h kseq.h
111
+ example.o: minimap.h kseq.h
112
+ format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
113
+ hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
114
+ index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
115
+ index.o: ksort.h
116
+ kalloc.o: kalloc.h
117
+ ksw2_extd2_sse.o: ksw2.h kalloc.h
118
+ ksw2_exts2_sse.o: ksw2.h kalloc.h
119
+ ksw2_extz2_sse.o: ksw2.h kalloc.h
120
+ ksw2_ll_sse.o: ksw2.h kalloc.h
121
+ kthread.o: kthread.h
122
+ lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h
123
+ main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
124
+ map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
125
+ map.o: khash.h ksort.h
126
+ misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
127
+ options.o: mmpriv.h minimap.h bseq.h kseq.h
128
+ pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
129
+ sdust.o: kalloc.h kdq.h kvec.h sdust.h
130
+ seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
131
+ sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
132
+ splitidx.o: mmpriv.h minimap.h bseq.h kseq.h