minimap2 0.2.22.0 → 0.2.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -76
  3. data/ext/Rakefile +55 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +821 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +369 -0
  41. data/ext/minimap2/main.c +459 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +410 -0
  44. data/ext/minimap2/minimap2.1 +725 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +132 -0
  50. data/ext/minimap2/options.c +234 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/lib/minimap2/aligner.rb +4 -4
  93. data/lib/minimap2/alignment.rb +11 -11
  94. data/lib/minimap2/ffi/constants.rb +20 -16
  95. data/lib/minimap2/ffi/functions.rb +5 -0
  96. data/lib/minimap2/ffi.rb +4 -5
  97. data/lib/minimap2/version.rb +2 -2
  98. data/lib/minimap2.rb +51 -15
  99. metadata +97 -79
  100. data/lib/minimap2/ffi_helper.rb +0 -53
  101. data/vendor/libminimap2.so +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ceb411a88448c6ed13e6d842450264e91569260c9b19d77e699a93736768522
4
- data.tar.gz: 76c1f3466375b73c54db6cd2574ffe19a817b11994d50e9e8473a209566b14f5
3
+ metadata.gz: 1bbe7130374c248e008183b82ffa9feb7af9738184d654c8a14c7963b96f5c09
4
+ data.tar.gz: ddae428827a8e1f8e7c9cc8684f44aa266ec1d73d32e8d6a65e64a459d509292
5
5
  SHA512:
6
- metadata.gz: 05a313f05984e2afab772da3971249327a9461a92d8b1fbcf53329ea5cd9d421e4ab8a137bd16213b86d6912bf1d43fd7f22c1bb5408a36004a64296fab294d9
7
- data.tar.gz: 7739312455aba2b14192eef634623f351fc4cea721d142b815ce41d651bc3de84e0fe413de6d24e8551ed90a2d71bcbf82a3df5b87685cb1474e6a134ed9fefe
6
+ metadata.gz: 8f1caa66a874a45f4df9b0971f55cb0f436351c62f6d2bb4ba07e2d8cfe1485e391e58c952cdcaa5f256a9fa9e26376b865cd919ff53d374ea32666ae486de24
7
+ data.tar.gz: bef5486385f7a52d37566a866776791eec9eda5bf1d707e253f7845f5f87689c4258076874dcff44852f09b39d51522ab2ecf8ebbbe675b2ce98c2f87b60df76
data/README.md CHANGED
@@ -6,85 +6,61 @@
6
6
  [![Docs Latest](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/minimap2)
7
7
  [![DOI](https://zenodo.org/badge/325711305.svg)](https://zenodo.org/badge/latestdoi/325711305)
8
8
 
9
-
10
-
11
9
  :dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
12
10
 
13
11
  ## Installation
14
12
 
15
- Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
16
-
17
- Build
18
-
19
- ```sh
20
- git clone --recursive https://github.com/kojix2/ruby-minimap2
21
- cd ruby-minimap2
22
- bundle install
23
- bundle exec rake minimap2:build
24
- ```
25
-
26
- Install
27
-
28
13
  ```
29
- bundle exec rake install
14
+ gem install minimap2
30
15
  ```
31
16
 
32
- Ruby-minimap2 is [tested on Ubuntu and macOS](https://github.com/kojix2/ruby-minimap2/actions).
17
+ <details>
18
+ <summary><b>Compiling from source</b></summary>
19
+
20
+ git clone --recursive https://github.com/kojix2/ruby-minimap2
21
+ cd ruby-minimap2
22
+ bundle install
23
+ bundle exec rake minimap2:build
24
+ bundle exec rake install
25
+
26
+ </details>
33
27
 
34
28
  ## Quick Start
35
29
 
36
30
  ```ruby
37
31
  require "minimap2"
38
- ```
39
-
40
- Create aligner
41
-
42
- ```ruby
43
- aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
44
- ```
45
-
46
- Retrieve a subsequence from the index
47
-
48
- ```ruby
49
- seq = aligner.seq("MT_human", 100, 200)
50
- ```
51
-
52
- Mapping
53
-
54
- ```ruby
55
- hits = aligner.align(seq)
56
- pp hits[0]
57
- ```
58
32
 
59
- ```
60
- =>
61
- #<Minimap2::Alignment:0x000055fe18223f50
62
- @blen=100,
63
- @cigar=[[100, 0]],
64
- @cigar_str="100M",
65
- @cs="",
66
- @ctg="MT_human",
67
- @ctg_len=16569,
68
- @mapq=60,
69
- @md="",
70
- @mlen=100,
71
- @nm=0,
72
- @primary=1,
73
- @q_en=100,
74
- @q_st=0,
75
- @r_en=200,
76
- @r_st=100,
77
- @read_num=1,
78
- @strand=1,
79
- @trans_strand=0>
80
- ```
33
+ aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa")
34
+ seq = aligner.seq("MT_human", 100, 200)
35
+ hits = aligner.align(seq)
36
+ pp hits
37
+ ```
38
+ ```
39
+ [#<Minimap2::Alignment:0x000055bbfde2d128
40
+ @blen=100,
41
+ @cigar=[[100, 0]],
42
+ @cigar_str="100M",
43
+ @cs="",
44
+ @ctg="MT_human",
45
+ @ctg_len=16569,
46
+ @mapq=60,
47
+ @md="",
48
+ @mlen=100,
49
+ @nm=0,
50
+ @primary=1,
51
+ @q_en=100,
52
+ @q_st=0,
53
+ @r_en=200,
54
+ @r_st=100,
55
+ @read_num=1,
56
+ @strand=1,
57
+ @trans_strand=0>]
58
+ ```
59
+
60
+ </details>
81
61
 
82
62
  ## APIs Overview
83
63
 
84
- API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
85
-
86
- Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
87
-
88
64
  ```markdown
89
65
  * Minimap2 module
90
66
  - fastx_read Read fasta/fastq file.
@@ -129,11 +105,19 @@ Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator i
129
105
  * MapOpt class Mapping options.
130
106
  ```
131
107
 
132
- This is not all. See the [RubyDoc.info documentation](https://rubydoc.info/gems/minimap2/) for more details.
108
+ * API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
109
+ * `Aligner#map` has been changed to `align`, because `map` means iterator in Ruby.
110
+ * See [RubyDoc](https://rubydoc.info/gems/minimap2/) for details.
133
111
 
134
- ruby-minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
135
- Native functions can be called from the FFI module. FFI also provides the way to access some C structs.
112
+ <details>
113
+ <summary><b>C Structures and Functions</b></summary>
136
114
 
115
+ ### FFI
116
+ * Ruby-Minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
117
+ * Native C functions can be called from the `Minimap2::FFI` module.
118
+ * Native C structure members can be accessed.
119
+ * Bitfields are supported by [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) gems.
120
+
137
121
  ```ruby
138
122
  aligner.idx_opt.members
139
123
  # => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
@@ -145,10 +129,15 @@ aligner.idx_opt[:k] = 14
145
129
  aligner.idx_opt[:k]
146
130
  # => 14
147
131
  ```
132
+
133
+ </details>
134
+
135
+ ## Contributing
148
136
 
149
- ## Development
137
+ <details>
138
+ <summary><b>Development</b></summary>
150
139
 
151
- Fork your repository.
140
+ Fork your repository.
152
141
  then clone.
153
142
 
154
143
  ```sh
@@ -179,16 +168,11 @@ Run tests.
179
168
  bundle exec rake test
180
169
  ```
181
170
 
182
- ## Contributing
171
+ </details>
183
172
 
184
- ruby-minimap2 is a library under development and there are many points to be improved. Please feel free to send us your pull request.
173
+ ruby-minimap2 is a library under development and there are many points to be improved.
185
174
 
186
- * [Report bugs](https://github.com/kojix2/ruby-minimap2/issues)
187
- * Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-minimap2/pulls)
188
- * Write, clarify, or fix documentation
189
- * Suggest or add new features
190
- * Create tools based on ruby-minimap2
191
- * Update minimap2 in github submodule
175
+ Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
192
176
 
193
177
  ## License
194
178
 
data/ext/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "fileutils"
5
+ require "ffi"
6
+
7
+ minimap2_dir = File.expand_path("minimap2", __dir__)
8
+ target_dir = "../../vendor"
9
+ target_fname = FFI.map_library_name("minimap2")
10
+ target_path = File.join(target_dir, target_fname)
11
+
12
+ task default: ["minimap2:build", "minimap2:clean"]
13
+
14
+ namespace :minimap2 do
15
+ desc "Compile Minimap2"
16
+ task :build do
17
+ Dir.chdir(minimap2_dir) do
18
+ # Add -fPIC option to Makefile
19
+ sh "git apply ../minimap2.patch"
20
+ sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
21
+ sh "make"
22
+ case RbConfig::CONFIG["host_os"]
23
+ when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
24
+ sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
25
+ when /darwin|mac os/
26
+ sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o -lm -lz -lpthread"
27
+ sh "otool -L #{target_fname}"
28
+ else
29
+ sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
30
+ sh "ldd -r #{target_fname}"
31
+ end
32
+ sh "rm cmappy.h cmappy.c"
33
+ sh "git apply -R ../minimap2.patch"
34
+ FileUtils.mkdir_p(target_dir)
35
+ warn "mkdir -p #{target_dir}"
36
+ sh "mv #{target_fname} #{target_path}"
37
+ end
38
+ end
39
+
40
+ desc "Clean"
41
+ task :clean do
42
+ Dir.chdir(minimap2_dir) do
43
+ sh "make clean"
44
+ end
45
+ end
46
+
47
+ task cleanall: [:clean]
48
+
49
+ desc "Clean all"
50
+ task :cleanall do
51
+ Dir.chdir(minimap2_dir) do
52
+ sh "rm #{target_path}" if File.exist?(target_path)
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,129 @@
1
+ #include "cmappy.h"
2
+
3
+ void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
4
+ {
5
+ h->ctg = mi->seq[r->rid].name;
6
+ h->ctg_len = mi->seq[r->rid].len;
7
+ h->ctg_start = r->rs, h->ctg_end = r->re;
8
+ h->qry_start = r->qs, h->qry_end = r->qe;
9
+ h->strand = r->rev? -1 : 1;
10
+ h->mapq = r->mapq;
11
+ h->mlen = r->mlen;
12
+ h->blen = r->blen;
13
+ h->NM = r->blen - r->mlen + r->p->n_ambi;
14
+ h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
15
+ h->is_primary = (r->id == r->parent);
16
+ h->seg_id = r->seg_id;
17
+ h->n_cigar32 = r->p->n_cigar;
18
+ h->cigar32 = r->p->cigar;
19
+ }
20
+
21
+ void mm_free_reg1(mm_reg1_t *r)
22
+ {
23
+ free(r->p);
24
+ }
25
+
26
+ kseq_t *mm_fastx_open(const char *fn)
27
+ {
28
+ gzFile fp;
29
+ fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
30
+ return kseq_init(fp);
31
+ }
32
+
33
+ void mm_fastx_close(kseq_t *ks)
34
+ {
35
+ gzFile fp;
36
+ fp = ks->f->f;
37
+ kseq_destroy(ks);
38
+ gzclose(fp);
39
+ }
40
+
41
+ int mm_verbose_level(int v)
42
+ {
43
+ if (v >= 0) mm_verbose = v;
44
+ return mm_verbose;
45
+ }
46
+
47
+ void mm_reset_timer(void)
48
+ {
49
+ extern double realtime(void);
50
+ mm_realtime0 = realtime();
51
+ }
52
+
53
+ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
54
+ {
55
+ mm_reg1_t *r;
56
+
57
+ // Py_BEGIN_ALLOW_THREADS
58
+ if (seq2 == 0) {
59
+ r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
60
+ } else {
61
+ int _n_regs[2];
62
+ mm_reg1_t *regs[2];
63
+ char *seq[2];
64
+ int i, len[2];
65
+
66
+ len[0] = strlen(seq1);
67
+ len[1] = strlen(seq2);
68
+ seq[0] = (char*)seq1;
69
+ seq[1] = strdup(seq2);
70
+ for (i = 0; i < len[1]>>1; ++i) {
71
+ int t = seq[1][len[1] - i - 1];
72
+ seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
73
+ seq[1][i] = seq_comp_table[t];
74
+ }
75
+ if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
76
+ mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
77
+ for (i = 0; i < _n_regs[1]; ++i)
78
+ regs[1][i].rev = !regs[1][i].rev;
79
+ *n_regs = _n_regs[0] + _n_regs[1];
80
+ regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
81
+ memcpy(&regs[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
82
+ free(regs[1]);
83
+ r = regs[0];
84
+ }
85
+ // Py_END_ALLOW_THREADS
86
+
87
+ return r;
88
+ }
89
+
90
+ char *mappy_revcomp(int len, const uint8_t *seq)
91
+ {
92
+ int i;
93
+ char *rev;
94
+ rev = (char*)malloc(len + 1);
95
+ for (i = 0; i < len; ++i)
96
+ rev[len - i - 1] = seq_comp_table[seq[i]];
97
+ rev[len] = 0;
98
+ return rev;
99
+ }
100
+
101
+ char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
102
+ {
103
+ int i, rid;
104
+ char *s;
105
+ *len = 0;
106
+ rid = mm_idx_name2id(mi, name);
107
+ if (rid < 0) return 0;
108
+ if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
109
+ if (en < 0 || (uint32_t)en > mi->seq[rid].len)
110
+ en = mi->seq[rid].len;
111
+ s = (char*)malloc(en - st + 1);
112
+ *len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
113
+ for (i = 0; i < *len; ++i)
114
+ s[i] = "ACGTN"[(uint8_t)s[i]];
115
+ s[*len] = 0;
116
+ return s;
117
+ }
118
+
119
+ mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
120
+ {
121
+ const char *fake_name = "N/A";
122
+ char *s;
123
+ mm_idx_t *mi;
124
+ s = (char*)calloc(len + 1, 1);
125
+ memcpy(s, seq, len);
126
+ mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
127
+ free(s);
128
+ return mi;
129
+ }
@@ -0,0 +1,44 @@
1
+ #ifndef CMAPPY_H
2
+ #define CMAPPY_H
3
+
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <zlib.h>
7
+ #include "minimap.h"
8
+ #include "kseq.h"
9
+ KSEQ_DECLARE(gzFile)
10
+
11
+ typedef struct {
12
+ const char *ctg;
13
+ int32_t ctg_start, ctg_end;
14
+ int32_t qry_start, qry_end;
15
+ int32_t blen, mlen, NM, ctg_len;
16
+ uint8_t mapq, is_primary;
17
+ int8_t strand, trans_strand;
18
+ int32_t seg_id;
19
+ int32_t n_cigar32;
20
+ uint32_t *cigar32;
21
+ } mm_hitpy_t;
22
+
23
+ void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h);
24
+
25
+ void mm_free_reg1(mm_reg1_t *r);
26
+
27
+ kseq_t *mm_fastx_open(const char *fn);
28
+
29
+ void mm_fastx_close(kseq_t *ks);
30
+
31
+ int mm_verbose_level(int v);
32
+
33
+ void mm_reset_timer(void);
34
+
35
+ extern unsigned char seq_comp_table[256];
36
+ mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
37
+
38
+ char *mappy_revcomp(int len, const uint8_t *seq);
39
+
40
+ char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len);
41
+
42
+ mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len);
43
+
44
+ #endif
@@ -0,0 +1,46 @@
1
+ #### 1. Alignment different with option `-a` or `-c`?
2
+
3
+ Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
4
+ locations without detailed base alignment. In particular, the start and end
5
+ positions of the alignment are impricise. With one of those options, minimap2
6
+ will perform base alignment, which is generally more accurate but is much
7
+ slower.
8
+
9
+ #### 2. How to map Illumina short reads to noisy long reads?
10
+
11
+ No good solutions. The better approach is to assemble short reads into contigs
12
+ and then map noisy reads to contigs.
13
+
14
+ #### 3. The output SAM doesn't have a header.
15
+
16
+ By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map
17
+ all reads against each reference batch. Given a reference longer than 4Gb,
18
+ minimap2 is unable to see all the sequences and thus can't produce a correct
19
+ SAM header. In this case, minimap2 doesn't output any SAM header. There are two
20
+ solutions to this issue. First, you may increase option `-I` to, for example,
21
+ `-I8g` to index more reference bases in a batch. This is preferred if your
22
+ machine has enough memory. Second, if your machines doesn't have enough memory
23
+ to hold the reference index, you can use the `--split-prefix` option in a
24
+ command line like:
25
+ ```sh
26
+ minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq
27
+ ```
28
+ This second approach uses less memory, but it is slower and requires temporary
29
+ disk space.
30
+
31
+ #### 4. The output SAM is malformatted.
32
+
33
+ This typically happens when you use nohup to wrap a minimap2 command line.
34
+ Nohup is discouraged as it breaks piping. If you have to use nohup, please
35
+ specify an output file with option `-o`.
36
+
37
+ #### 5. How to output one alignment per read?
38
+
39
+ You can use `--secondary=no` to suppress secondary alignments (aka multiple
40
+ mappings), but you can't suppress supplementary alignment (aka split or
41
+ chimeric alignment) this way. You can use samtools to filter out these
42
+ alignments:
43
+ ```sh
44
+ minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900
45
+ ```
46
+ However, this is discouraged as supplementary alignment is informative.
@@ -0,0 +1,24 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2018- Dana-Farber Cancer Institute
4
+ 2017-2018 Broad Institute, Inc.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ include *.h
2
+ include Makefile
3
+ include ksw2_dispatch.c
4
+ include main.c
5
+ include README.md
6
+ include sse2neon/emmintrin.h
7
+ include python/cmappy.h
8
+ include python/cmappy.pxd
9
+ include python/mappy.pyx
10
+ include python/README.rst
@@ -0,0 +1,132 @@
1
+ CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
2
+ CPPFLAGS= -DHAVE_KALLOC
3
+ INCLUDES=
4
+ OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
5
+ lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
6
+ ksw2_ll_sse.o
7
+ PROG= minimap2
8
+ PROG_EXTRA= sdust minimap2-lite
9
+ LIBS= -lm -lz -lpthread
10
+
11
+ ifeq ($(arm_neon),) # if arm_neon is not defined
12
+ ifeq ($(sse2only),) # if sse2only is not defined
13
+ OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
14
+ else # if sse2only is defined
15
+ OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
16
+ endif
17
+ else # if arm_neon is defined
18
+ OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
19
+ INCLUDES+=-Isse2neon
20
+ ifeq ($(aarch64),) #if aarch64 is not defined
21
+ CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
22
+ else #if aarch64 is defined
23
+ CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
24
+ endif
25
+ endif
26
+
27
+ ifneq ($(asan),)
28
+ CFLAGS+=-fsanitize=address
29
+ LIBS+=-fsanitize=address
30
+ endif
31
+
32
+ ifneq ($(tsan),)
33
+ CFLAGS+=-fsanitize=thread
34
+ LIBS+=-fsanitize=thread
35
+ endif
36
+
37
+ .PHONY:all extra clean depend
38
+ .SUFFIXES:.c .o
39
+
40
+ .c.o:
41
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
42
+
43
+ all:$(PROG)
44
+
45
+ extra:all $(PROG_EXTRA)
46
+
47
+ minimap2:main.o libminimap2.a
48
+ $(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
49
+
50
+ minimap2-lite:example.o libminimap2.a
51
+ $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
52
+
53
+ libminimap2.a:$(OBJS)
54
+ $(AR) -csru $@ $(OBJS)
55
+
56
+ sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
57
+ $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
58
+
59
+ # SSE-specific targets on x86/x86_64
60
+
61
+ ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
62
+ ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
63
+ $(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
64
+ endif
65
+
66
+ ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
67
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
68
+
69
+ ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
70
+ $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
71
+
72
+ ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
73
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
74
+
75
+ ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
76
+ $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
77
+
78
+ ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
79
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
80
+
81
+ ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
82
+ $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
83
+
84
+ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
85
+ $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
86
+
87
+ # NEON-specific targets on ARM
88
+
89
+ ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
90
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
91
+
92
+ ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
93
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
94
+
95
+ ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
96
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
97
+
98
+ # other non-file targets
99
+
100
+ clean:
101
+ rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
102
+
103
+ depend:
104
+ (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
105
+
106
+ # DO NOT DELETE
107
+
108
+ align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h
109
+ bseq.o: bseq.h kvec.h kalloc.h kseq.h
110
+ esterr.o: mmpriv.h minimap.h bseq.h kseq.h
111
+ example.o: minimap.h kseq.h
112
+ format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
113
+ hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
114
+ index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
115
+ index.o: ksort.h
116
+ kalloc.o: kalloc.h
117
+ ksw2_extd2_sse.o: ksw2.h kalloc.h
118
+ ksw2_exts2_sse.o: ksw2.h kalloc.h
119
+ ksw2_extz2_sse.o: ksw2.h kalloc.h
120
+ ksw2_ll_sse.o: ksw2.h kalloc.h
121
+ kthread.o: kthread.h
122
+ lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h
123
+ main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
124
+ map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
125
+ map.o: khash.h ksort.h
126
+ misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
127
+ options.o: mmpriv.h minimap.h bseq.h kseq.h
128
+ pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
129
+ sdust.o: kalloc.h kdq.h kvec.h sdust.h
130
+ seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
131
+ sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
132
+ splitidx.o: mmpriv.h minimap.h bseq.h kseq.h