minimap2 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -5
- data/lib/minimap2.rb +75 -3
- data/lib/minimap2/aligner.rb +180 -0
- data/lib/minimap2/alignment.rb +65 -0
- data/lib/minimap2/ffi.rb +27 -0
- data/lib/minimap2/ffi/constants.rb +227 -0
- data/lib/minimap2/ffi/functions.rb +76 -0
- data/lib/minimap2/ffi/mappy.rb +99 -0
- data/lib/minimap2/ffi_helper.rb +53 -0
- data/lib/minimap2/version.rb +5 -0
- data/vendor/libminimap2.so +0 -0
- metadata +40 -4
- data/lib/minimap/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad687f9f6addf8de0a913d8b3b230539a42f76ce32f77b24576dda10118e3a07
|
4
|
+
data.tar.gz: 853c9994378f2441ad9dbaff0966a97812a3f7708a5b6516391d150f9c70b558
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 916210eb2b7ab42f1d81cc9873f6bfa5ca2c9b1084ba63c9dc62f6301c54940115fc36e5b2fc63afbec33a60a18fb632bb8ae3e533009a4b47de619d7a026321
|
7
|
+
data.tar.gz: 82435ffd47931e66ab1b99c6d85b2d8413d6cf9fb3152f984857b708e988f09b96a4380ae37d76f3f90c5a10a5300c7b891adc0d161dff4414d6aa2d277b1fc7
|
data/README.md
CHANGED
@@ -1,28 +1,75 @@
|
|
1
1
|
# Minimap2
|
2
2
|
|
3
|
+
[](https://github.com/kojix2/ruby-minimap2/actions)
|
4
|
+
|
3
5
|
:dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
9
|
+
You need to install it from the source code. Because you need to build minimap2 and create a shared library.
|
10
|
+
Open your terminal and type the following commands in order.
|
11
|
+
|
7
12
|
```sh
|
8
|
-
|
13
|
+
git clone --recurse-submodules https://github.com/kojix2/ruby-minimap2
|
14
|
+
cd ruby-minimap2
|
15
|
+
bundle install
|
16
|
+
bundle exec rake minimap2:build
|
17
|
+
bundle exec rake install
|
9
18
|
```
|
10
19
|
|
11
|
-
|
20
|
+
You can run tests to see if the installation was successful.
|
12
21
|
|
13
|
-
```sh
|
14
|
-
# TODO
|
15
22
|
```
|
23
|
+
bundle exec rake test
|
24
|
+
```
|
25
|
+
|
26
|
+
## Quick Start
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
require "minimap2"
|
30
|
+
|
31
|
+
# load or build index
|
32
|
+
aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
|
33
|
+
|
34
|
+
# retrieve a subsequence from the index
|
35
|
+
seq = aligner.seq("MT_human", 100, 200)
|
36
|
+
|
37
|
+
# mapping
|
38
|
+
aligner.align(seq) do |h|
|
39
|
+
pp h.to_h
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
## APIs
|
16
44
|
|
45
|
+
See
|
46
|
+
* [Mappy: Minimap2 Python Binding](https://github.com/lh3/minimap2/tree/master/python)
|
47
|
+
|
48
|
+
```markdown
|
49
|
+
* Minimap2 module
|
50
|
+
* Aligner class
|
51
|
+
* Alignment class
|
52
|
+
```
|
17
53
|
|
18
54
|
## Development
|
19
55
|
|
20
56
|
```sh
|
21
|
-
|
57
|
+
git clone --recurse-submodules https://github.com/kojix2/ruby-minimap2
|
58
|
+
# git clone https://github.com/kojix2/ruby-minimap2
|
59
|
+
# cd ruby-minimap2
|
60
|
+
# git submodule update -i
|
61
|
+
cd ruby-minimap2
|
62
|
+
bundle install
|
63
|
+
bundle exec rake minimap2:build
|
64
|
+
bundle exec rake test
|
22
65
|
```
|
23
66
|
|
24
67
|
## Contributing
|
25
68
|
|
69
|
+
ruby-minimap2 is a library under development and there are many points to be improved.
|
70
|
+
If you improve the source code, please feel free to send us your pull request.
|
71
|
+
Typo corrections are also welcome.
|
72
|
+
|
26
73
|
Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/ruby-minimap2.
|
27
74
|
|
28
75
|
## License
|
data/lib/minimap2.rb
CHANGED
@@ -1,6 +1,78 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
# dependencies
|
4
|
+
require "ffi"
|
5
|
+
|
6
|
+
# bit fields
|
7
|
+
require_relative "minimap2/ffi_helper"
|
8
|
+
|
9
|
+
# modules
|
10
|
+
require_relative "minimap2/aligner"
|
11
|
+
require_relative "minimap2/alignment"
|
12
|
+
require_relative "minimap2/version"
|
13
|
+
|
14
|
+
# Minimap2 mapper for long read sequences
|
15
|
+
# https://github.com/lh3/minimap2
|
16
|
+
# Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34:3094-3100.
|
17
|
+
# doi:10.1093/bioinformatics/bty191
|
18
|
+
module Minimap2
|
4
19
|
class Error < StandardError; end
|
5
|
-
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_accessor :ffi_lib
|
23
|
+
end
|
24
|
+
|
25
|
+
lib_name = ::FFI.map_library_name("minimap2")
|
26
|
+
self.ffi_lib = if ENV["MINIMAPDIR"]
|
27
|
+
File.expand_path(lib_name, ENV["MINIMAPDIR"])
|
28
|
+
else
|
29
|
+
File.expand_path("../vendor/#{lib_name}", __dir__)
|
30
|
+
end
|
31
|
+
|
32
|
+
# friendlier error message
|
33
|
+
autoload :FFI, "minimap2/ffi"
|
34
|
+
|
35
|
+
# methods from mappy
|
36
|
+
class << self
|
37
|
+
# read fasta/fastq file
|
38
|
+
# @param [String] file_path
|
39
|
+
# @param [Boolean] read_comment If false or nil, the comment will not be read.
|
40
|
+
# @yield [name, seq, qual, comment]
|
41
|
+
# Note: You can also use a generic library such as BioRuby instead of this method.
|
42
|
+
|
43
|
+
def fastx_read(file_path, read_comment = false)
|
44
|
+
path = File.expand_path(file_path)
|
45
|
+
ks = FFI.mm_fastx_open(path)
|
46
|
+
while FFI.kseq_read(ks) >= 0
|
47
|
+
qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
|
48
|
+
name = ks[:name][:s]
|
49
|
+
seq = ks[:seq][:s]
|
50
|
+
if read_comment
|
51
|
+
comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
|
52
|
+
yield [name, seq, qual, comment]
|
53
|
+
else
|
54
|
+
yield [name, seq, qual]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
FFI.mm_fastx_close(ks)
|
58
|
+
end
|
59
|
+
|
60
|
+
# reverse complement sequence
|
61
|
+
# @param [String] seq
|
62
|
+
# @return [string] seq
|
63
|
+
|
64
|
+
def revcomp(seq)
|
65
|
+
l = seq.size
|
66
|
+
bseq = ::FFI::MemoryPointer.new(:char, l)
|
67
|
+
bseq.put_bytes(0, seq)
|
68
|
+
FFI.mappy_revcomp(l, bseq)
|
69
|
+
end
|
70
|
+
|
71
|
+
# set verbosity level
|
72
|
+
# @param [Integer] level
|
73
|
+
|
74
|
+
def verbose(level = -1)
|
75
|
+
FFI.mm_verbose_level(level)
|
76
|
+
end
|
77
|
+
end
|
6
78
|
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
class Aligner
|
5
|
+
attr_reader :index_options, :map_options, :index
|
6
|
+
|
7
|
+
def initialize(
|
8
|
+
fn_idx_in, # FIXME
|
9
|
+
preset: nil,
|
10
|
+
k: nil,
|
11
|
+
w: nil,
|
12
|
+
min_cnt: nil,
|
13
|
+
min_chain_score: nil,
|
14
|
+
min_dp_score: nil,
|
15
|
+
bw: nil,
|
16
|
+
best_n: nil,
|
17
|
+
n_threads: 3,
|
18
|
+
fn_idx_out: nil,
|
19
|
+
max_frag_len: nil,
|
20
|
+
extra_flags: nil,
|
21
|
+
seq: nil,
|
22
|
+
scoring: nil
|
23
|
+
)
|
24
|
+
|
25
|
+
@index_options = FFI::IdxOpt.new
|
26
|
+
@map_options = FFI::MapOpt.new
|
27
|
+
|
28
|
+
if preset
|
29
|
+
FFI.mm_set_opt(preset, index_options, map_options)
|
30
|
+
else
|
31
|
+
# set the default options
|
32
|
+
FFI.mm_set_opt(0, index_options, map_options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# always perform alignment
|
36
|
+
map_options[:flag] |= 4
|
37
|
+
index_options[:batch_size] = 0x7fffffffffffffff
|
38
|
+
|
39
|
+
# override preset options
|
40
|
+
index_options[:k] = k if k
|
41
|
+
index_options[:w] = w if w
|
42
|
+
map_options[:min_cnt] = min_cnt if min_cnt
|
43
|
+
map_options[:min_chain_score] = min_chain_score if min_chain_score
|
44
|
+
map_options[:min_dp_max] = min_dp_score if min_dp_score
|
45
|
+
map_options[:bw] = bw if bw
|
46
|
+
map_options[:best_n] = best_n if best_n
|
47
|
+
map_options[:max_frag_len] = max_frag_len if max_frag_len
|
48
|
+
map_options[:flag] |= extra_flags if extra_flags
|
49
|
+
if scoring && scoring.size >= 4
|
50
|
+
map_options[:a] = scoring[0]
|
51
|
+
map_options[:b] = scoring[1]
|
52
|
+
map_options[:q] = scoring[2]
|
53
|
+
map_options[:e] = scoring[3]
|
54
|
+
map_options[:q2] = map_options.q
|
55
|
+
map_options[:e2] = map_options.e
|
56
|
+
if scoring.size >= 6
|
57
|
+
map_options[:q2] = scoring[4]
|
58
|
+
map_options[:e2] = scoring[5]
|
59
|
+
map_options[:sc_ambi] = scoring[6] if scoring.size >= 7
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
if seq
|
64
|
+
@index = FFI.mappy_idx_seq(
|
65
|
+
index_options.w, index_options.k, index_options & 1,
|
66
|
+
index_options.bucket_bits, seq, seq.size
|
67
|
+
)
|
68
|
+
FFI.mm_mapopt_update(map_options, index)
|
69
|
+
map_options.mid_occ = 1000 # don't filter high-occ seeds
|
70
|
+
else
|
71
|
+
reader = FFI.mm_idx_reader_open(fn_idx_in, index_options, fn_idx_out)
|
72
|
+
|
73
|
+
# The Ruby version raises an error here
|
74
|
+
raise "Cannot open : #{fn_idx_in}" if reader.null?
|
75
|
+
|
76
|
+
@index = FFI.mm_idx_reader_read(reader, n_threads)
|
77
|
+
FFI.mm_idx_reader_close(reader)
|
78
|
+
FFI.mm_mapopt_update(map_options, index)
|
79
|
+
FFI.mm_idx_index_name(index)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# FIXME: naming
|
84
|
+
def destroy
|
85
|
+
FFI.mm_idx_destroy(index) unless index.null?
|
86
|
+
end
|
87
|
+
|
88
|
+
# NOTE: Name change: map -> align
|
89
|
+
# In the Ruby language, the name map means iterator.
|
90
|
+
# The original name is map, but here I use the method name align.
|
91
|
+
def align(
|
92
|
+
seq, seq2 = nil,
|
93
|
+
buf: nil,
|
94
|
+
cs: false,
|
95
|
+
md: false,
|
96
|
+
max_frag_len: nil,
|
97
|
+
extra_flags: nil
|
98
|
+
)
|
99
|
+
|
100
|
+
return if index.null?
|
101
|
+
|
102
|
+
map_options.max_frag_len = max_frag_len if max_frag_len
|
103
|
+
map_options.flag |= extra_flags if extra_flags
|
104
|
+
|
105
|
+
buf ||= FFI::TBuf.new
|
106
|
+
km = FFI.mm_tbuf_get_km(buf)
|
107
|
+
n_regs_ptr = ::FFI::MemoryPointer.new :int
|
108
|
+
|
109
|
+
ptr = FFI.mm_map_aux(index, seq, seq2, n_regs_ptr, buf, map_options)
|
110
|
+
n_regs = n_regs_ptr.read_int
|
111
|
+
|
112
|
+
regs = Array.new(n_regs) { |i| FFI::Reg1.new(ptr + i * FFI::Reg1.size) }
|
113
|
+
|
114
|
+
hit = FFI::Hit.new
|
115
|
+
cs_str = ::FFI::MemoryPointer.new(::FFI::MemoryPointer.new(:string))
|
116
|
+
m_cs_str = ::FFI::MemoryPointer.new :int
|
117
|
+
i = 0
|
118
|
+
begin
|
119
|
+
while i < n_regs
|
120
|
+
FFI.mm_reg2hitpy(index, regs[i], hit)
|
121
|
+
cigar = []
|
122
|
+
|
123
|
+
c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
|
124
|
+
# convert the 32-bit CIGAR encoding to Ruby array
|
125
|
+
cigar = c.map { |x| [x >> 4, x & 0xf] }
|
126
|
+
|
127
|
+
_cs = ""
|
128
|
+
if cs
|
129
|
+
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
|
130
|
+
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
131
|
+
end
|
132
|
+
|
133
|
+
_md = ""
|
134
|
+
if md
|
135
|
+
l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
|
136
|
+
_md = cs_str.read_pointer.read_string(l_cs_str)
|
137
|
+
end
|
138
|
+
|
139
|
+
yield Alignment.new(hit, cigar, _cs, _md)
|
140
|
+
|
141
|
+
FFI.mm_free_reg1(regs[i])
|
142
|
+
i += 1
|
143
|
+
end
|
144
|
+
ensure
|
145
|
+
while i < n_regs
|
146
|
+
FFI.mm_free_reg1(regs[i])
|
147
|
+
i += 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def seq(name, start = 0, stop = 0x7fffffff)
|
153
|
+
lp = ::FFI::MemoryPointer.new(:int)
|
154
|
+
s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
|
155
|
+
l = lp.read_int
|
156
|
+
return nil if l.zero?
|
157
|
+
|
158
|
+
s.read_string(l)
|
159
|
+
end
|
160
|
+
|
161
|
+
def k
|
162
|
+
index[:k]
|
163
|
+
end
|
164
|
+
|
165
|
+
def w
|
166
|
+
index[:w]
|
167
|
+
end
|
168
|
+
|
169
|
+
def n_seq
|
170
|
+
index[:n_seq]
|
171
|
+
end
|
172
|
+
|
173
|
+
def seq_names
|
174
|
+
ptr = index[:seq].to_ptr
|
175
|
+
Array.new(index[:n_seq]) do |i|
|
176
|
+
FFI::IdxSeq.new(ptr + i * FFI::IdxSeq.size)[:name]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
class Alignment
|
5
|
+
def self.keys
|
6
|
+
%i[ctg ctg_len r_st r_en strand trans_strand blen mlen nm primary
|
7
|
+
q_st q_en mapq cigar read_num cs md cigar_str]
|
8
|
+
end
|
9
|
+
|
10
|
+
# Read only
|
11
|
+
attr_reader(*keys)
|
12
|
+
|
13
|
+
def initialize(h, cigar, cs = nil, md = nil)
|
14
|
+
@ctg = h[:ctg]
|
15
|
+
@ctg_len = h[:ctg_len]
|
16
|
+
@r_st = h[:ctg_start]
|
17
|
+
@r_en = h[:ctg_end]
|
18
|
+
@strand = h[:strand]
|
19
|
+
@trans_strand = h[:trans_strand]
|
20
|
+
@blen = h[:blen]
|
21
|
+
@mlen = h[:mlen]
|
22
|
+
@nm = h[:NM]
|
23
|
+
@primary = h[:is_primary]
|
24
|
+
@q_st = h[:qry_start]
|
25
|
+
@q_en = h[:qry_end]
|
26
|
+
@mapq = h[:mapq]
|
27
|
+
@cigar = cigar
|
28
|
+
@read_num = h[:seg_id] + 1
|
29
|
+
@cs = cs
|
30
|
+
@md = md
|
31
|
+
|
32
|
+
@cigar_str = cigar.map { |x| x[0].to_s + "MIDNSH"[x[1]] }.join
|
33
|
+
end
|
34
|
+
|
35
|
+
def primary?
|
36
|
+
@primary == 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_h
|
40
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_s
|
44
|
+
strand = if @strand.positive?
|
45
|
+
"+"
|
46
|
+
elsif @strand.negative?
|
47
|
+
"-"
|
48
|
+
else
|
49
|
+
"?"
|
50
|
+
end
|
51
|
+
tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
|
52
|
+
ts = if @trans_strand.positive?
|
53
|
+
"ts:A:+"
|
54
|
+
elsif @trans_strand.negative?
|
55
|
+
"ts:A:-"
|
56
|
+
else
|
57
|
+
"ts:A:."
|
58
|
+
end
|
59
|
+
a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
|
60
|
+
@mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
|
61
|
+
a << "cs:Z:#{@cs}" if @cs
|
62
|
+
a.join("\t")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/minimap2/ffi.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# bit fields
|
4
|
+
require_relative "ffi_helper"
|
5
|
+
|
6
|
+
module Minimap2
|
7
|
+
# Native APIs
|
8
|
+
module FFI
|
9
|
+
extend ::FFI::Library
|
10
|
+
begin
|
11
|
+
ffi_lib Minimap2.ffi_lib
|
12
|
+
rescue LoadError => e
|
13
|
+
raise LoadError, "Could not find #{Minimap2.ffi_lib} \n#{e}"
|
14
|
+
end
|
15
|
+
|
16
|
+
# Continue even if some functions are not found.
|
17
|
+
def self.attach_function(*)
|
18
|
+
super
|
19
|
+
rescue ::FFI::NotFoundError => e
|
20
|
+
warn e.message
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
require_relative "ffi/constants"
|
26
|
+
require_relative "ffi/functions"
|
27
|
+
require_relative "ffi/mappy"
|
@@ -0,0 +1,227 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
module FFI
|
5
|
+
# flags
|
6
|
+
NO_DIAG = 0x001 # no exact diagonal hit
|
7
|
+
NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
|
8
|
+
CIGAR = 0x004
|
9
|
+
OUT_SAM = 0x008
|
10
|
+
NO_QUAL = 0x010
|
11
|
+
OUT_CG = 0x020
|
12
|
+
OUT_CS = 0x040
|
13
|
+
SPLICE = 0x080 # splice mode
|
14
|
+
SPLICE_FOR = 0x100 # match GT-AG
|
15
|
+
SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
|
16
|
+
NO_LJOIN = 0x400
|
17
|
+
OUT_CS_LONG = 0x800
|
18
|
+
SR = 0x1000
|
19
|
+
FRAG_MODE = 0x2000
|
20
|
+
NO_PRINT_2ND = 0x4000
|
21
|
+
TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
|
22
|
+
LONG_CIGAR = 0x10000
|
23
|
+
INDEPEND_SEG = 0x20000
|
24
|
+
SPLICE_FLANK = 0x40000
|
25
|
+
SOFTCLIP = 0x80000
|
26
|
+
FOR_ONLY = 0x100000
|
27
|
+
REV_ONLY = 0x200000
|
28
|
+
HEAP_SORT = 0x400000
|
29
|
+
ALL_CHAINS = 0x800000
|
30
|
+
OUT_MD = 0x1000000
|
31
|
+
COPY_COMMENT = 0x2000000
|
32
|
+
EQX = 0x4000000 # use =/X instead of M
|
33
|
+
PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
|
34
|
+
NO_END_FLT = 0x10000000
|
35
|
+
HARD_MLEVEL = 0x20000000
|
36
|
+
SAM_HIT_ONLY = 0x40000000
|
37
|
+
|
38
|
+
HPC = 0x1
|
39
|
+
NO_SEQ = 0x2
|
40
|
+
NO_NAME = 0x4
|
41
|
+
|
42
|
+
IDX_MAGIC = "MMI\2"
|
43
|
+
|
44
|
+
MAX_SEG = 255
|
45
|
+
|
46
|
+
# emulate 128-bit integers
|
47
|
+
class MM128 < ::FFI::Struct
|
48
|
+
layout \
|
49
|
+
:x, :uint64_t,
|
50
|
+
:y, :uint64_t
|
51
|
+
end
|
52
|
+
|
53
|
+
# emulate 128-bit arrays
|
54
|
+
class MM128V < ::FFI::Struct
|
55
|
+
layout \
|
56
|
+
:n, :size_t,
|
57
|
+
:m, :size_t,
|
58
|
+
:a, MM128.ptr
|
59
|
+
end
|
60
|
+
|
61
|
+
# indexing option
|
62
|
+
class IdxOpt < ::FFI::Struct
|
63
|
+
layout \
|
64
|
+
:k, :short,
|
65
|
+
:w, :short,
|
66
|
+
:flag, :short,
|
67
|
+
:bucket_bits, :short,
|
68
|
+
:mini_batch_size, :int64_t,
|
69
|
+
:batch_size, :uint64_t
|
70
|
+
end
|
71
|
+
|
72
|
+
# mapping option
|
73
|
+
class MapOpt < ::FFI::Struct
|
74
|
+
layout \
|
75
|
+
:flag, :int64_t, # see MM_F_* macros
|
76
|
+
:seed, :int,
|
77
|
+
:sdust_thres, :int, # score threshold for SDUST; 0 to disable
|
78
|
+
:max_qlen, :int, # max query length
|
79
|
+
:bw, :int, # bandwidth
|
80
|
+
:max_gap, :int, # break a chain if there are no minimizers in a max_gap window
|
81
|
+
:max_gap_ref, :int,
|
82
|
+
:max_frag_len, :int,
|
83
|
+
:max_chain_skip, :int,
|
84
|
+
:max_chain_iter, :int,
|
85
|
+
:min_cnt, :int, # min number of minimizers on each chain
|
86
|
+
:min_chain_score, :int, # min chaining score
|
87
|
+
:chain_gap_scale, :float,
|
88
|
+
:mask_level, :float,
|
89
|
+
:mask_len, :int,
|
90
|
+
:pri_ratio, :float,
|
91
|
+
:best_n, :int, # top best_n chains are subjected to DP alignment
|
92
|
+
:max_join_long, :int,
|
93
|
+
:max_join_short, :int,
|
94
|
+
:min_join_flank_sc, :int,
|
95
|
+
:min_join_flank_ratio, :float,
|
96
|
+
:alt_drop, :float,
|
97
|
+
:a, :int, # matching score
|
98
|
+
:b, :int, # mismatch
|
99
|
+
:q, :int, # gap-open
|
100
|
+
:e, :int, # gap-ext
|
101
|
+
:q2, :int, # gap-open
|
102
|
+
:e2, :int, # gap-ext
|
103
|
+
:sc_ambi, :int, # score when one or both bases are "N"
|
104
|
+
:noncan, :int, # cost of non-canonical splicing sites
|
105
|
+
:junc_bonus, :int,
|
106
|
+
:zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
|
107
|
+
:zdrop_inv, :int,
|
108
|
+
:end_bonus, :int,
|
109
|
+
:min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold
|
110
|
+
:min_ksw_len, :int,
|
111
|
+
:anchor_ext_len, :int,
|
112
|
+
:anchor_ext_shift, :int,
|
113
|
+
:max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
|
114
|
+
:pe_ori, :int,
|
115
|
+
:pe_bonus, :int,
|
116
|
+
:mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
|
117
|
+
:min_mid_occ, :int32_t,
|
118
|
+
:mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
|
119
|
+
:max_occ, :int32_t,
|
120
|
+
:mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
|
121
|
+
:max_sw_mat, :int64_t,
|
122
|
+
:split_prefix, :string
|
123
|
+
end
|
124
|
+
|
125
|
+
# minimap2 index
|
126
|
+
class IdxSeq < ::FFI::Struct
|
127
|
+
layout \
|
128
|
+
:name, :string, # name of the db sequence
|
129
|
+
:offset, :uint64_t, # offset in mm_idx_t::S
|
130
|
+
:len, :uint32_t, # length
|
131
|
+
:is_alt, :uint32_t
|
132
|
+
end
|
133
|
+
|
134
|
+
class Idx < ::FFI::Struct
|
135
|
+
layout \
|
136
|
+
:b, :int32_t,
|
137
|
+
:w, :int32_t,
|
138
|
+
:k, :int32_t,
|
139
|
+
:flag, :int32_t,
|
140
|
+
:n_seq, :uint32_t, # number of reference sequences
|
141
|
+
:index, :int32_t,
|
142
|
+
:n_alt, :int32_t,
|
143
|
+
:seq, IdxSeq.ptr, # sequence name, length and offset
|
144
|
+
:S, :pointer, # 4-bit packed sequence
|
145
|
+
:B, :pointer, # index (hidden)
|
146
|
+
:I, :pointer, # intervals (hidden)
|
147
|
+
:km, :pointer,
|
148
|
+
:h, :pointer
|
149
|
+
end
|
150
|
+
|
151
|
+
# index reader
|
152
|
+
class IdxReader < ::FFI::Struct
|
153
|
+
layout \
|
154
|
+
:is_idx, :int,
|
155
|
+
:n_parts, :int,
|
156
|
+
:idx_size, :int64_t,
|
157
|
+
:opt, IdxOpt,
|
158
|
+
:fp_out, :pointer, # FILE
|
159
|
+
:seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE
|
160
|
+
end
|
161
|
+
|
162
|
+
# minimap2 alignment
|
163
|
+
class Extra < ::FFI::BitStruct
|
164
|
+
layout \
|
165
|
+
:capacity, :uint32, # the capacity of cigar[]
|
166
|
+
:dp_score, :int32, # DP score
|
167
|
+
:dp_max, :int32, # score of the max-scoring segment
|
168
|
+
:dp_max2, :int32, # score of the best alternate mappings
|
169
|
+
:n_ambi_trans_strand, :uint32,
|
170
|
+
:n_cigar, :uint32
|
171
|
+
|
172
|
+
bitfields :n_ambi_trans_strand,
|
173
|
+
:n_ambi, 30, # number of ambiguous bases
|
174
|
+
:trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
|
175
|
+
|
176
|
+
# variable length array
|
177
|
+
def cigar
|
178
|
+
pointer.get_array_of_uint32(size, self[:n_cigar])
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class Reg1 < ::FFI::BitStruct
|
183
|
+
layout \
|
184
|
+
:id, :int32_t, # ID for internal uses (see also parent below)
|
185
|
+
:cnt, :int32_t, # number of minimizers; if on the reverse strand
|
186
|
+
:rid, :int32_t, # reference index; if this is an alignment from inversion rescue
|
187
|
+
:score, :int32_t, # DP alignment score
|
188
|
+
:qs, :int32_t, # query start
|
189
|
+
:qe, :int32_t, # query end
|
190
|
+
:rs, :int32_t, # reference start
|
191
|
+
:re, :int32_t, # reference end
|
192
|
+
:parent, :int32_t, # parent==id if primary
|
193
|
+
:subsc, :int32_t, # best alternate mapping score
|
194
|
+
:as, :int32_t, # offset in the a[] array (for internal uses only)
|
195
|
+
:mlen, :int32_t, # seeded exact match length
|
196
|
+
:blen, :int32_t, # seeded alignment block length
|
197
|
+
:n_sub, :int32_t, # number of suboptimal mappings
|
198
|
+
:score0, :int32_t, # initial chaining score (before chain merging/spliting)
|
199
|
+
:fields, :uint32_t,
|
200
|
+
:hash, :uint32_t,
|
201
|
+
:div, :float,
|
202
|
+
:p, Extra.ptr
|
203
|
+
|
204
|
+
bitfields :fields,
|
205
|
+
:mapq, 8,
|
206
|
+
:split, 2,
|
207
|
+
:rev, 1,
|
208
|
+
:inv, 1,
|
209
|
+
:sam_pri, 1,
|
210
|
+
:proper_frag, 1,
|
211
|
+
:pe_thru, 1,
|
212
|
+
:seg_split, 1,
|
213
|
+
:seg_id, 8,
|
214
|
+
:split_inv, 1,
|
215
|
+
:is_alt, 1,
|
216
|
+
:dummy, 6
|
217
|
+
end
|
218
|
+
|
219
|
+
# memory buffer for thread-local storage during mapping
|
220
|
+
class TBuf < ::FFI::Struct
|
221
|
+
layout \
|
222
|
+
:km, :pointer,
|
223
|
+
:rep_len, :int,
|
224
|
+
:frag_gap, :int
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
module FFI
|
5
|
+
attach_function \
|
6
|
+
:mm_set_opt_raw, :mm_set_opt,
|
7
|
+
[:pointer, IdxOpt.by_ref, MapOpt.by_ref],
|
8
|
+
:int
|
9
|
+
|
10
|
+
private_class_method :mm_set_opt_raw
|
11
|
+
|
12
|
+
def self.mm_set_opt(preset, io, mo)
|
13
|
+
if preset == 0
|
14
|
+
ptr = ::FFI::Pointer.new(:int, 0)
|
15
|
+
else
|
16
|
+
ptr = ::FFI::MemoryPointer.from_string(preset.to_s)
|
17
|
+
end
|
18
|
+
mm_set_opt_raw(ptr, io, mo)
|
19
|
+
end
|
20
|
+
|
21
|
+
attach_function \
|
22
|
+
:mm_idx_reader_open,
|
23
|
+
[:string, IdxOpt.by_ref, :string],
|
24
|
+
IdxReader.by_ref
|
25
|
+
|
26
|
+
attach_function \
|
27
|
+
:mm_idx_reader_read,
|
28
|
+
[IdxReader.by_ref, :int],
|
29
|
+
Idx.by_ref
|
30
|
+
|
31
|
+
attach_function \
|
32
|
+
:mm_idx_reader_close,
|
33
|
+
[IdxReader.by_ref],
|
34
|
+
:void
|
35
|
+
|
36
|
+
attach_function \
|
37
|
+
:mm_idx_destroy,
|
38
|
+
[Idx.by_ref],
|
39
|
+
:void
|
40
|
+
|
41
|
+
attach_function \
|
42
|
+
:mm_mapopt_update,
|
43
|
+
[MapOpt.by_ref, Idx.by_ref],
|
44
|
+
:void
|
45
|
+
|
46
|
+
attach_function \
|
47
|
+
:mm_idx_index_name,
|
48
|
+
[Idx.by_ref],
|
49
|
+
:int
|
50
|
+
|
51
|
+
attach_function \
|
52
|
+
:mm_tbuf_init,
|
53
|
+
[],
|
54
|
+
TBuf.by_ref
|
55
|
+
|
56
|
+
attach_function \
|
57
|
+
:mm_tbuf_destroy,
|
58
|
+
[TBuf.by_ref],
|
59
|
+
:void
|
60
|
+
|
61
|
+
attach_function \
|
62
|
+
:mm_tbuf_get_km,
|
63
|
+
[TBuf.by_ref],
|
64
|
+
:pointer
|
65
|
+
|
66
|
+
attach_function \
|
67
|
+
:mm_gen_cs,
|
68
|
+
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
|
69
|
+
:int
|
70
|
+
|
71
|
+
attach_function \
|
72
|
+
:mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
|
73
|
+
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string],
|
74
|
+
:int
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://github.com/lh3/minimap2/blob/master/python/cmappy.h
|
4
|
+
|
5
|
+
module Minimap2
|
6
|
+
module FFI
|
7
|
+
class Hit < ::FFI::Struct
|
8
|
+
layout \
|
9
|
+
:ctg, :string,
|
10
|
+
:ctg_start, :int32_t,
|
11
|
+
:ctg_end, :int32_t,
|
12
|
+
:qry_start, :int32_t,
|
13
|
+
:qry_end, :int32_t,
|
14
|
+
:blen, :int32_t,
|
15
|
+
:mlen, :int32_t,
|
16
|
+
:NM, :int32_t,
|
17
|
+
:ctg_len, :int32_t,
|
18
|
+
:mapq, :uint8_t,
|
19
|
+
:is_primary, :uint8_t,
|
20
|
+
:strand, :int8_t,
|
21
|
+
:trans_strand, :int8_t,
|
22
|
+
:seg_id, :int32_t,
|
23
|
+
:n_cigar32, :int32_t,
|
24
|
+
:cigar32, :pointer
|
25
|
+
end
|
26
|
+
|
27
|
+
class KString < ::FFI::Struct
|
28
|
+
layout \
|
29
|
+
:l, :size_t,
|
30
|
+
:m, :size_t,
|
31
|
+
:s, :string
|
32
|
+
end
|
33
|
+
|
34
|
+
class KSeq < ::FFI::Struct
|
35
|
+
layout \
|
36
|
+
:name, KString,
|
37
|
+
:comment, KString,
|
38
|
+
:seq, KString,
|
39
|
+
:qual, KString,
|
40
|
+
:last_char, :int,
|
41
|
+
:f, :pointer # FIXME: KStream
|
42
|
+
end
|
43
|
+
|
44
|
+
attach_function \
|
45
|
+
:mm_reg2hitpy,
|
46
|
+
[Idx.by_ref, Reg1.by_ref, Hit.by_ref],
|
47
|
+
:void
|
48
|
+
|
49
|
+
attach_function \
|
50
|
+
:mm_free_reg1,
|
51
|
+
[Reg1.by_ref],
|
52
|
+
:void
|
53
|
+
|
54
|
+
attach_function \
|
55
|
+
:mm_fastx_open,
|
56
|
+
[:string],
|
57
|
+
KSeq.by_ref
|
58
|
+
|
59
|
+
attach_function \
|
60
|
+
:mm_fastx_close,
|
61
|
+
[KSeq.by_ref],
|
62
|
+
:void
|
63
|
+
|
64
|
+
attach_function \
|
65
|
+
:mm_verbose_level,
|
66
|
+
[:int],
|
67
|
+
:int
|
68
|
+
|
69
|
+
attach_function \
|
70
|
+
:mm_reset_timer,
|
71
|
+
[:void],
|
72
|
+
:void
|
73
|
+
|
74
|
+
attach_function \
|
75
|
+
:mm_map_aux,
|
76
|
+
[Idx.by_ref, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref],
|
77
|
+
:pointer # Reg1
|
78
|
+
|
79
|
+
attach_function \
|
80
|
+
:mappy_revcomp,
|
81
|
+
%i[int pointer],
|
82
|
+
:string
|
83
|
+
|
84
|
+
attach_function \
|
85
|
+
:mappy_fetch_seq,
|
86
|
+
[Idx.by_ref, :string, :int, :int, :pointer],
|
87
|
+
:pointer # Use pointer instead of string to read with a specified length
|
88
|
+
|
89
|
+
attach_function \
|
90
|
+
:mappy_idx_seq,
|
91
|
+
%i[int int int int pointer int],
|
92
|
+
Idx.by_ref
|
93
|
+
|
94
|
+
attach_function \
|
95
|
+
:kseq_read,
|
96
|
+
[KSeq.by_ref],
|
97
|
+
:int
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ffi"
|
4
|
+
|
5
|
+
module FFI
|
6
|
+
class BitStruct < Struct
|
7
|
+
class << self
|
8
|
+
# def union_layout(*args)
|
9
|
+
# Class.new(FFI::Union) { layout(*args) }
|
10
|
+
# end
|
11
|
+
|
12
|
+
# def struct_layout(*args)
|
13
|
+
# Class.new(FFI::Struct) { layout(*args) }
|
14
|
+
# end
|
15
|
+
|
16
|
+
module BitFieldsModule
|
17
|
+
def [](name)
|
18
|
+
bit_fields = self.class.bit_fields_map
|
19
|
+
parent, start, width = bit_fields[name]
|
20
|
+
if parent
|
21
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
22
|
+
else
|
23
|
+
super(name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
private_constant :BitFieldsModule
|
28
|
+
|
29
|
+
attr_reader :bit_fields_map
|
30
|
+
|
31
|
+
def bitfields(*args)
|
32
|
+
unless instance_variable_defined?(:@bit_fields)
|
33
|
+
@bit_fields_map = {}
|
34
|
+
prepend BitFieldsModule
|
35
|
+
end
|
36
|
+
|
37
|
+
parent = args.shift
|
38
|
+
labels = []
|
39
|
+
widths = []
|
40
|
+
args.each_slice(2) do |l, w|
|
41
|
+
labels << l
|
42
|
+
widths << w
|
43
|
+
end
|
44
|
+
starts = widths.inject([0]) do |result, w|
|
45
|
+
result << (result.last + w)
|
46
|
+
end
|
47
|
+
labels.zip(starts, widths).each do |l, s, w|
|
48
|
+
@bit_fields_map[l] = [parent, s, w]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minimap2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +80,20 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tty-command
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
description: minimap2
|
70
98
|
email:
|
71
99
|
- 2xijok@gmail.com
|
@@ -75,8 +103,16 @@ extra_rdoc_files: []
|
|
75
103
|
files:
|
76
104
|
- LICENSE.txt
|
77
105
|
- README.md
|
78
|
-
- lib/minimap/version.rb
|
79
106
|
- lib/minimap2.rb
|
107
|
+
- lib/minimap2/aligner.rb
|
108
|
+
- lib/minimap2/alignment.rb
|
109
|
+
- lib/minimap2/ffi.rb
|
110
|
+
- lib/minimap2/ffi/constants.rb
|
111
|
+
- lib/minimap2/ffi/functions.rb
|
112
|
+
- lib/minimap2/ffi/mappy.rb
|
113
|
+
- lib/minimap2/ffi_helper.rb
|
114
|
+
- lib/minimap2/version.rb
|
115
|
+
- vendor/libminimap2.so
|
80
116
|
homepage: https://github.com/kojix2/ruby-minimap2
|
81
117
|
licenses:
|
82
118
|
- MIT
|
@@ -96,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
132
|
- !ruby/object:Gem::Version
|
97
133
|
version: '0'
|
98
134
|
requirements: []
|
99
|
-
rubygems_version: 3.
|
135
|
+
rubygems_version: 3.2.3
|
100
136
|
signing_key:
|
101
137
|
specification_version: 4
|
102
138
|
summary: minimap2
|