minimap2 0.0.4 → 0.2.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +92 -61
- data/lib/minimap2.rb +3 -3
- data/lib/minimap2/aligner.rb +16 -5
- data/lib/minimap2/alignment.rb +6 -2
- data/lib/minimap2/ffi/constants.rb +3 -0
- data/lib/minimap2/version.rb +2 -1
- data/vendor/libminimap2.so +0 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4bd850f529cb82950c16581735bdd74f232e0ef3490e5cb5b6f7045faa1fe696
|
4
|
+
data.tar.gz: 40d00cf14886a35f831b593d541cf9e72f8e5cf07d87be31116c215799449f62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 669bd6d5a4eb0dc37f12ee4c0f9653bfe76afec70b8d592e291269cb97b90b493b398b8d68ebacb64ba2ce28187a32a32fdb3fb77ef070023ffa27983f479929
|
7
|
+
data.tar.gz: 12c2fd1ace06a7e6a1734cb27f09091851f3fe917714156b27a003a168815dbef83eabc00c56c701bdcd5f982db873346bca375b3e8f05764b7fb797d2d5c898
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ruby-minimap2
|
2
2
|
|
3
3
|
[![Gem Version](https://img.shields.io/gem/v/minimap2?color=brightgreen)](https://rubygems.org/gems/minimap2)
|
4
4
|
[![CI](https://github.com/kojix2/ruby-minimap2/workflows/CI/badge.svg)](https://github.com/kojix2/ruby-minimap2/actions)
|
@@ -12,7 +12,7 @@
|
|
12
12
|
|
13
13
|
## Installation
|
14
14
|
|
15
|
-
|
15
|
+
Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
|
16
16
|
|
17
17
|
Build
|
18
18
|
|
@@ -29,7 +29,7 @@ Install
|
|
29
29
|
bundle exec rake install
|
30
30
|
```
|
31
31
|
|
32
|
-
Ruby-minimap2 is tested on Ubuntu and macOS.
|
32
|
+
Ruby-minimap2 is [tested on Ubuntu and macOS](https://github.com/kojix2/ruby-minimap2/actions).
|
33
33
|
|
34
34
|
## Quick Start
|
35
35
|
|
@@ -37,99 +37,119 @@ Ruby-minimap2 is tested on Ubuntu and macOS.
|
|
37
37
|
require "minimap2"
|
38
38
|
```
|
39
39
|
|
40
|
-
|
40
|
+
Create aligner
|
41
41
|
|
42
42
|
```ruby
|
43
43
|
aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
|
44
44
|
```
|
45
45
|
|
46
|
-
|
46
|
+
Retrieve a subsequence from the index
|
47
47
|
|
48
48
|
```ruby
|
49
49
|
seq = aligner.seq("MT_human", 100, 200)
|
50
50
|
```
|
51
51
|
|
52
|
-
|
52
|
+
Mapping
|
53
53
|
|
54
54
|
```ruby
|
55
55
|
hits = aligner.align(seq)
|
56
|
-
pp hits[0]
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
56
|
+
pp hits[0]
|
57
|
+
```
|
58
|
+
|
59
|
+
```
|
60
|
+
=>
|
61
|
+
#<Minimap2::Alignment:0x000055fe18223f50
|
62
|
+
@blen=100,
|
63
|
+
@cigar=[[100, 0]],
|
64
|
+
@cigar_str="100M",
|
65
|
+
@cs="",
|
66
|
+
@ctg="MT_human",
|
67
|
+
@ctg_len=16569,
|
68
|
+
@mapq=60,
|
69
|
+
@md="",
|
70
|
+
@mlen=100,
|
71
|
+
@nm=0,
|
72
|
+
@primary=1,
|
73
|
+
@q_en=100,
|
74
|
+
@q_st=0,
|
75
|
+
@r_en=200,
|
76
|
+
@r_st=100,
|
77
|
+
@read_num=1,
|
78
|
+
@strand=1,
|
79
|
+
@trans_strand=0>
|
75
80
|
```
|
76
81
|
|
77
82
|
## APIs Overview
|
78
83
|
|
79
|
-
|
84
|
+
API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
|
85
|
+
|
86
|
+
Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
|
80
87
|
|
81
88
|
```markdown
|
82
89
|
* Minimap2 module
|
83
|
-
- fastx_read
|
84
|
-
- revcomp
|
90
|
+
- fastx_read Read fasta/fastq file.
|
91
|
+
- revcomp Reverse complement sequence.
|
85
92
|
|
86
93
|
* Aligner class
|
87
94
|
* attributes
|
88
|
-
- index
|
89
|
-
- idx_opt
|
90
|
-
- map_opt
|
95
|
+
- index Returns the value of attribute index.
|
96
|
+
- idx_opt Returns the value of attribute idx_opt.
|
97
|
+
- map_opt Returns the value of attribute map_opt.
|
91
98
|
* methods
|
92
|
-
- new(path, preset: nil)
|
93
|
-
- align
|
99
|
+
- new(path, preset: nil) Create a new aligner. (presets: sr, map-pb, map-out, map-hifi, splice, asm5, etc.)
|
100
|
+
- align Maps and returns alignments.
|
101
|
+
- seq Retrieve a subsequence from the index.
|
94
102
|
|
95
103
|
* Alignment class
|
96
104
|
* attributes
|
97
|
-
- ctg
|
98
|
-
- ctg_len
|
99
|
-
- r_st
|
100
|
-
- r_en
|
101
|
-
- strand
|
102
|
-
- trans_strand
|
103
|
-
- blen
|
104
|
-
- mlen
|
105
|
-
- nm
|
106
|
-
- primary
|
107
|
-
- q_st
|
108
|
-
- q_en
|
109
|
-
- mapq
|
110
|
-
- cigar
|
111
|
-
- read_num
|
112
|
-
- cs
|
113
|
-
- md
|
114
|
-
- cigar_str
|
105
|
+
- ctg Returns name of the reference sequence the query is mapped to.
|
106
|
+
- ctg_len Returns total length of the reference sequence.
|
107
|
+
- r_st Returns start positions on the reference.
|
108
|
+
- r_en Returns end positions on the reference.
|
109
|
+
- strand Returns +1 if on the forward strand; -1 if on the reverse strand.
|
110
|
+
- trans_strand Returns transcript strand. +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown.
|
111
|
+
- blen Returns length of the alignment, including both alignment matches and gaps but excluding ambiguous bases.
|
112
|
+
- mlen Returns length of the matching bases in the alignment, excluding ambiguous base matches.
|
113
|
+
- nm Returns number of mismatches, gaps and ambiguous poistions in the alignment.
|
114
|
+
- primary Returns if the alignment is primary (typically the best and the first to generate).
|
115
|
+
- q_st Returns start positions on the query.
|
116
|
+
- q_en Returns end positions on the query.
|
117
|
+
- mapq Returns mapping quality.
|
118
|
+
- cigar Returns CIGAR returned as an array of shape (n_cigar,2). The two numbers give the length and the operator of each CIGAR operation.
|
119
|
+
- read_num Returns read number that the alignment corresponds to; 1 for the first read and 2 for the second read.
|
120
|
+
- cs Returns the cs tag.
|
121
|
+
- md Returns the MD tag as in the SAM format. It is an empty string unless the md argument is applied when calling Aligner#align.
|
122
|
+
- cigar_str Returns CIGAR string.
|
115
123
|
* methods
|
116
|
-
- to_h
|
117
|
-
- to_s
|
124
|
+
- to_h Convert Alignment to hash.
|
125
|
+
- to_s Convert to the PAF format without the QueryName and QueryLength columns.
|
118
126
|
|
119
|
-
|
120
|
-
* IdxOpt class
|
121
|
-
* MapOpt class
|
127
|
+
## FFI module
|
128
|
+
* IdxOpt class Indexing options.
|
129
|
+
* MapOpt class Mapping options.
|
122
130
|
```
|
123
131
|
|
124
|
-
|
132
|
+
This is not all. See the [RubyDoc.info documentation](https://rubydoc.info/gems/minimap2/) for more details.
|
125
133
|
|
126
|
-
|
134
|
+
ruby-minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
|
135
|
+
Native functions can be called from the FFI module. FFI also provides the way to access some C structs.
|
127
136
|
|
128
|
-
ruby
|
137
|
+
```ruby
|
138
|
+
aligner.idx_opt.members
|
139
|
+
# => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
|
140
|
+
aligner.kds_opt.values
|
141
|
+
# => [15, 10, 0, 14, 50000000, 9223372036854775807]
|
142
|
+
aligner.idx_opt[:k]
|
143
|
+
# => 15
|
144
|
+
aligner.idx_opt[:k] = 14
|
145
|
+
aligner.idx_opt[:k]
|
146
|
+
# => 14
|
147
|
+
```
|
129
148
|
|
130
149
|
## Development
|
131
150
|
|
132
|
-
Fork your repository
|
151
|
+
Fork your repository.
|
152
|
+
then clone.
|
133
153
|
|
134
154
|
```sh
|
135
155
|
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
@@ -138,7 +158,7 @@ git clone --recursive https://github.com/kojix2/ruby-minimap2
|
|
138
158
|
# git submodule update -i
|
139
159
|
```
|
140
160
|
|
141
|
-
Build.
|
161
|
+
Build Minimap2 and Mappy.
|
142
162
|
|
143
163
|
```sh
|
144
164
|
cd ruby-minimap2
|
@@ -146,6 +166,13 @@ bundle install # Install dependent packages including Ruby-FFI
|
|
146
166
|
bundle exec rake minimap2:build
|
147
167
|
```
|
148
168
|
|
169
|
+
A shared library will be created in the vendor directory.
|
170
|
+
|
171
|
+
```
|
172
|
+
└── vendor
|
173
|
+
└── libminimap2.so
|
174
|
+
```
|
175
|
+
|
149
176
|
Run tests.
|
150
177
|
|
151
178
|
```
|
@@ -166,3 +193,7 @@ ruby-minimap2 is a library under development and there are many points to be imp
|
|
166
193
|
## License
|
167
194
|
|
168
195
|
[MIT License](https://opensource.org/licenses/MIT).
|
196
|
+
|
197
|
+
## Acknowledgements
|
198
|
+
|
199
|
+
I would like to thank Heng Li for making Minimap2, and all the readers who read the README to the end.
|
data/lib/minimap2.rb
CHANGED
@@ -34,7 +34,7 @@ module Minimap2
|
|
34
34
|
|
35
35
|
# methods from mappy
|
36
36
|
class << self
|
37
|
-
#
|
37
|
+
# Read fasta/fastq file.
|
38
38
|
# @param [String] file_path
|
39
39
|
# @param [Boolean] read_comment If false or nil, the comment will not be read.
|
40
40
|
# @yield [name, seq, qual, comment]
|
@@ -57,7 +57,7 @@ module Minimap2
|
|
57
57
|
FFI.mm_fastx_close(ks)
|
58
58
|
end
|
59
59
|
|
60
|
-
#
|
60
|
+
# Reverse complement sequence.
|
61
61
|
# @param [String] seq
|
62
62
|
# @return [string] seq
|
63
63
|
|
@@ -68,7 +68,7 @@ module Minimap2
|
|
68
68
|
FFI.mappy_revcomp(l, bseq)
|
69
69
|
end
|
70
70
|
|
71
|
-
#
|
71
|
+
# Set verbosity level.
|
72
72
|
# @param [Integer] level
|
73
73
|
|
74
74
|
def verbose(level = -1)
|
data/lib/minimap2/aligner.rb
CHANGED
@@ -4,11 +4,21 @@ module Minimap2
|
|
4
4
|
class Aligner
|
5
5
|
attr_reader :idx_opt, :map_opt, :index
|
6
6
|
|
7
|
-
# Create a new aligner
|
7
|
+
# Create a new aligner.
|
8
8
|
#
|
9
9
|
# @param fn_idx_in [String] index or sequence file name.
|
10
10
|
# @param seq [String] a single sequence to index.
|
11
11
|
# @param preset [String] minimap2 preset.
|
12
|
+
# * map-pb : PacBio CLR genomic reads
|
13
|
+
# * map-ont : Oxford Nanopore genomic reads
|
14
|
+
# * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)
|
15
|
+
# * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)
|
16
|
+
# * sr : short genomic paired-end reads
|
17
|
+
# * splice : spliced long reads (strand unknown)
|
18
|
+
# * splice:hq : Final PacBio Iso-seq or traditional cDNA
|
19
|
+
# * asm5 : intra-species asm-to-asm alignment
|
20
|
+
# * ava-pb : PacBio read overlap
|
21
|
+
# * ava-ont : Nanopore read overlap
|
12
22
|
# @param k [Integer] k-mer length, no larger than 28.
|
13
23
|
# @param w [Integer] minimizer window size, no larger than 255.
|
14
24
|
# @param min_cnt [Integer] mininum number of minimizers on a chain.
|
@@ -101,6 +111,7 @@ module Minimap2
|
|
101
111
|
end
|
102
112
|
|
103
113
|
# Explicitly releases the memory of the index object.
|
114
|
+
|
104
115
|
def free_index
|
105
116
|
FFI.mm_idx_destroy(index) unless index.null?
|
106
117
|
end
|
@@ -184,10 +195,10 @@ module Minimap2
|
|
184
195
|
alignments
|
185
196
|
end
|
186
197
|
|
187
|
-
#
|
188
|
-
# @
|
189
|
-
# @
|
190
|
-
# @
|
198
|
+
# Retrieve a subsequence from the index.
|
199
|
+
# @param name
|
200
|
+
# @param start
|
201
|
+
# @param stop
|
191
202
|
|
192
203
|
def seq(name, start = 0, stop = 0x7fffffff)
|
193
204
|
lp = ::FFI::MemoryPointer.new(:int)
|
data/lib/minimap2/alignment.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Minimap2
|
4
|
-
# Alignment result
|
4
|
+
# Alignment result.
|
5
5
|
#
|
6
6
|
# @!attribute ctg
|
7
7
|
# @return [String] name of the reference sequence the query is mapped to.
|
@@ -73,17 +73,21 @@ module Minimap2
|
|
73
73
|
@cs = cs
|
74
74
|
@md = md
|
75
75
|
|
76
|
-
@cigar_str = cigar.map { |x| x[0].to_s +
|
76
|
+
@cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
|
77
77
|
end
|
78
78
|
|
79
79
|
def primary?
|
80
80
|
@primary == 1
|
81
81
|
end
|
82
82
|
|
83
|
+
# Convert Alignment to hash.
|
84
|
+
|
83
85
|
def to_h
|
84
86
|
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
85
87
|
end
|
86
88
|
|
89
|
+
# Convert to the PAF format without the QueryName and QueryLength columns.
|
90
|
+
|
87
91
|
def to_s
|
88
92
|
strand = if @strand.positive?
|
89
93
|
'+'
|
@@ -34,6 +34,7 @@ module Minimap2
|
|
34
34
|
NO_END_FLT = 0x10000000
|
35
35
|
HARD_MLEVEL = 0x20000000
|
36
36
|
SAM_HIT_ONLY = 0x40000000
|
37
|
+
RMQ = 0x80000000 # LL
|
37
38
|
|
38
39
|
HPC = 0x1
|
39
40
|
NO_SEQ = 0x2
|
@@ -43,6 +44,8 @@ module Minimap2
|
|
43
44
|
|
44
45
|
MAX_SEG = 255
|
45
46
|
|
47
|
+
CIGAR_STR = 'MIDNSHP=XB'
|
48
|
+
|
46
49
|
# emulate 128-bit integers
|
47
50
|
class MM128 < ::FFI::Struct
|
48
51
|
layout \
|
data/lib/minimap2/version.rb
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minimap2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- lib/minimap2/ffi/mappy.rb
|
113
113
|
- lib/minimap2/ffi_helper.rb
|
114
114
|
- lib/minimap2/version.rb
|
115
|
+
- vendor/libminimap2.so
|
115
116
|
homepage: https://github.com/kojix2/ruby-minimap2
|
116
117
|
licenses:
|
117
118
|
- MIT
|