minimap2 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +52 -5
- data/lib/minimap2.rb +75 -3
- data/lib/minimap2/aligner.rb +180 -0
- data/lib/minimap2/alignment.rb +65 -0
- data/lib/minimap2/ffi.rb +27 -0
- data/lib/minimap2/ffi/constants.rb +227 -0
- data/lib/minimap2/ffi/functions.rb +76 -0
- data/lib/minimap2/ffi/mappy.rb +99 -0
- data/lib/minimap2/ffi_helper.rb +53 -0
- data/lib/minimap2/version.rb +5 -0
- data/vendor/libminimap2.so +0 -0
- metadata +40 -4
- data/lib/minimap/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad687f9f6addf8de0a913d8b3b230539a42f76ce32f77b24576dda10118e3a07
|
4
|
+
data.tar.gz: 853c9994378f2441ad9dbaff0966a97812a3f7708a5b6516391d150f9c70b558
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 916210eb2b7ab42f1d81cc9873f6bfa5ca2c9b1084ba63c9dc62f6301c54940115fc36e5b2fc63afbec33a60a18fb632bb8ae3e533009a4b47de619d7a026321
|
7
|
+
data.tar.gz: 82435ffd47931e66ab1b99c6d85b2d8413d6cf9fb3152f984857b708e988f09b96a4380ae37d76f3f90c5a10a5300c7b891adc0d161dff4414d6aa2d277b1fc7
|
data/README.md
CHANGED
@@ -1,28 +1,75 @@
|
|
1
1
|
# Minimap2
|
2
2
|
|
3
|
+
[![CI](https://github.com/kojix2/ruby-minimap2/workflows/CI/badge.svg)](https://github.com/kojix2/ruby-minimap2/actions)
|
4
|
+
|
3
5
|
:dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
9
|
+
You need to install it from the source code. Because you need to build minimap2 and create a shared library.
|
10
|
+
Open your terminal and type the following commands in order.
|
11
|
+
|
7
12
|
```sh
|
8
|
-
|
13
|
+
git clone --recurse-submodules https://github.com/kojix2/ruby-minimap2
|
14
|
+
cd ruby-minimap2
|
15
|
+
bundle install
|
16
|
+
bundle exec rake minimap2:build
|
17
|
+
bundle exec rake install
|
9
18
|
```
|
10
19
|
|
11
|
-
|
20
|
+
You can run tests to see if the installation was successful.
|
12
21
|
|
13
|
-
```sh
|
14
|
-
# TODO
|
15
22
|
```
|
23
|
+
bundle exec rake test
|
24
|
+
```
|
25
|
+
|
26
|
+
## Quick Start
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
require "minimap2"
|
30
|
+
|
31
|
+
# load or build index
|
32
|
+
aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
|
33
|
+
|
34
|
+
# retrieve a subsequence from the index
|
35
|
+
seq = aligner.seq("MT_human", 100, 200)
|
36
|
+
|
37
|
+
# mapping
|
38
|
+
aligner.align(seq) do |h|
|
39
|
+
pp h.to_h
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
## APIs
|
16
44
|
|
45
|
+
See
|
46
|
+
* [Mappy: Minimap2 Python Binding](https://github.com/lh3/minimap2/tree/master/python)
|
47
|
+
|
48
|
+
```markdown
|
49
|
+
* Minimap2 module
|
50
|
+
* Aligner class
|
51
|
+
* Alignment class
|
52
|
+
```
|
17
53
|
|
18
54
|
## Development
|
19
55
|
|
20
56
|
```sh
|
21
|
-
|
57
|
+
git clone --recurse-submodules https://github.com/kojix2/ruby-minimap2
|
58
|
+
# git clone https://github.com/kojix2/ruby-minimap2
|
59
|
+
# cd ruby-minimap2
|
60
|
+
# git submodule update -i
|
61
|
+
cd ruby-minimap2
|
62
|
+
bundle install
|
63
|
+
bundle exec rake minimap2:build
|
64
|
+
bundle exec rake test
|
22
65
|
```
|
23
66
|
|
24
67
|
## Contributing
|
25
68
|
|
69
|
+
ruby-minimap2 is a library under development and there are many points to be improved.
|
70
|
+
If you improve the source code, please feel free to send us your pull request.
|
71
|
+
Typo corrections are also welcome.
|
72
|
+
|
26
73
|
Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/ruby-minimap2.
|
27
74
|
|
28
75
|
## License
|
data/lib/minimap2.rb
CHANGED
@@ -1,6 +1,78 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
# dependencies
|
4
|
+
require "ffi"
|
5
|
+
|
6
|
+
# bit fields
|
7
|
+
require_relative "minimap2/ffi_helper"
|
8
|
+
|
9
|
+
# modules
|
10
|
+
require_relative "minimap2/aligner"
|
11
|
+
require_relative "minimap2/alignment"
|
12
|
+
require_relative "minimap2/version"
|
13
|
+
|
14
|
+
# Minimap2 mapper for long read sequences
|
15
|
+
# https://github.com/lh3/minimap2
|
16
|
+
# Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34:3094-3100.
|
17
|
+
# doi:10.1093/bioinformatics/bty191
|
18
|
+
module Minimap2
|
4
19
|
class Error < StandardError; end
|
5
|
-
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_accessor :ffi_lib
|
23
|
+
end
|
24
|
+
|
25
|
+
lib_name = ::FFI.map_library_name("minimap2")
|
26
|
+
self.ffi_lib = if ENV["MINIMAPDIR"]
|
27
|
+
File.expand_path(lib_name, ENV["MINIMAPDIR"])
|
28
|
+
else
|
29
|
+
File.expand_path("../vendor/#{lib_name}", __dir__)
|
30
|
+
end
|
31
|
+
|
32
|
+
# friendlier error message
|
33
|
+
autoload :FFI, "minimap2/ffi"
|
34
|
+
|
35
|
+
# methods from mappy
|
36
|
+
class << self
|
37
|
+
# read fasta/fastq file
|
38
|
+
# @param [String] file_path
|
39
|
+
# @param [Boolean] read_comment If false or nil, the comment will not be read.
|
40
|
+
# @yield [name, seq, qual, comment]
|
41
|
+
# Note: You can also use a generic library such as BioRuby instead of this method.
|
42
|
+
|
43
|
+
def fastx_read(file_path, read_comment = false)
|
44
|
+
path = File.expand_path(file_path)
|
45
|
+
ks = FFI.mm_fastx_open(path)
|
46
|
+
while FFI.kseq_read(ks) >= 0
|
47
|
+
qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
|
48
|
+
name = ks[:name][:s]
|
49
|
+
seq = ks[:seq][:s]
|
50
|
+
if read_comment
|
51
|
+
comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
|
52
|
+
yield [name, seq, qual, comment]
|
53
|
+
else
|
54
|
+
yield [name, seq, qual]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
FFI.mm_fastx_close(ks)
|
58
|
+
end
|
59
|
+
|
60
|
+
# reverse complement sequence
|
61
|
+
# @param [String] seq
|
62
|
+
# @return [string] seq
|
63
|
+
|
64
|
+
def revcomp(seq)
|
65
|
+
l = seq.size
|
66
|
+
bseq = ::FFI::MemoryPointer.new(:char, l)
|
67
|
+
bseq.put_bytes(0, seq)
|
68
|
+
FFI.mappy_revcomp(l, bseq)
|
69
|
+
end
|
70
|
+
|
71
|
+
# set verbosity level
|
72
|
+
# @param [Integer] level
|
73
|
+
|
74
|
+
def verbose(level = -1)
|
75
|
+
FFI.mm_verbose_level(level)
|
76
|
+
end
|
77
|
+
end
|
6
78
|
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
class Aligner
|
5
|
+
attr_reader :index_options, :map_options, :index
|
6
|
+
|
7
|
+
def initialize(
|
8
|
+
fn_idx_in, # FIXME
|
9
|
+
preset: nil,
|
10
|
+
k: nil,
|
11
|
+
w: nil,
|
12
|
+
min_cnt: nil,
|
13
|
+
min_chain_score: nil,
|
14
|
+
min_dp_score: nil,
|
15
|
+
bw: nil,
|
16
|
+
best_n: nil,
|
17
|
+
n_threads: 3,
|
18
|
+
fn_idx_out: nil,
|
19
|
+
max_frag_len: nil,
|
20
|
+
extra_flags: nil,
|
21
|
+
seq: nil,
|
22
|
+
scoring: nil
|
23
|
+
)
|
24
|
+
|
25
|
+
@index_options = FFI::IdxOpt.new
|
26
|
+
@map_options = FFI::MapOpt.new
|
27
|
+
|
28
|
+
if preset
|
29
|
+
FFI.mm_set_opt(preset, index_options, map_options)
|
30
|
+
else
|
31
|
+
# set the default options
|
32
|
+
FFI.mm_set_opt(0, index_options, map_options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# always perform alignment
|
36
|
+
map_options[:flag] |= 4
|
37
|
+
index_options[:batch_size] = 0x7fffffffffffffff
|
38
|
+
|
39
|
+
# override preset options
|
40
|
+
index_options[:k] = k if k
|
41
|
+
index_options[:w] = w if w
|
42
|
+
map_options[:min_cnt] = min_cnt if min_cnt
|
43
|
+
map_options[:min_chain_score] = min_chain_score if min_chain_score
|
44
|
+
map_options[:min_dp_max] = min_dp_score if min_dp_score
|
45
|
+
map_options[:bw] = bw if bw
|
46
|
+
map_options[:best_n] = best_n if best_n
|
47
|
+
map_options[:max_frag_len] = max_frag_len if max_frag_len
|
48
|
+
map_options[:flag] |= extra_flags if extra_flags
|
49
|
+
if scoring && scoring.size >= 4
|
50
|
+
map_options[:a] = scoring[0]
|
51
|
+
map_options[:b] = scoring[1]
|
52
|
+
map_options[:q] = scoring[2]
|
53
|
+
map_options[:e] = scoring[3]
|
54
|
+
map_options[:q2] = map_options.q
|
55
|
+
map_options[:e2] = map_options.e
|
56
|
+
if scoring.size >= 6
|
57
|
+
map_options[:q2] = scoring[4]
|
58
|
+
map_options[:e2] = scoring[5]
|
59
|
+
map_options[:sc_ambi] = scoring[6] if scoring.size >= 7
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
if seq
|
64
|
+
@index = FFI.mappy_idx_seq(
|
65
|
+
index_options.w, index_options.k, index_options & 1,
|
66
|
+
index_options.bucket_bits, seq, seq.size
|
67
|
+
)
|
68
|
+
FFI.mm_mapopt_update(map_options, index)
|
69
|
+
map_options.mid_occ = 1000 # don't filter high-occ seeds
|
70
|
+
else
|
71
|
+
reader = FFI.mm_idx_reader_open(fn_idx_in, index_options, fn_idx_out)
|
72
|
+
|
73
|
+
# The Ruby version raises an error here
|
74
|
+
raise "Cannot open : #{fn_idx_in}" if reader.null?
|
75
|
+
|
76
|
+
@index = FFI.mm_idx_reader_read(reader, n_threads)
|
77
|
+
FFI.mm_idx_reader_close(reader)
|
78
|
+
FFI.mm_mapopt_update(map_options, index)
|
79
|
+
FFI.mm_idx_index_name(index)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# FIXME: naming
|
84
|
+
def destroy
|
85
|
+
FFI.mm_idx_destroy(index) unless index.null?
|
86
|
+
end
|
87
|
+
|
88
|
+
# NOTE: Name change: map -> align
|
89
|
+
# In the Ruby language, the name map means iterator.
|
90
|
+
# The original name is map, but here I use the method name align.
|
91
|
+
def align(
|
92
|
+
seq, seq2 = nil,
|
93
|
+
buf: nil,
|
94
|
+
cs: false,
|
95
|
+
md: false,
|
96
|
+
max_frag_len: nil,
|
97
|
+
extra_flags: nil
|
98
|
+
)
|
99
|
+
|
100
|
+
return if index.null?
|
101
|
+
|
102
|
+
map_options.max_frag_len = max_frag_len if max_frag_len
|
103
|
+
map_options.flag |= extra_flags if extra_flags
|
104
|
+
|
105
|
+
buf ||= FFI::TBuf.new
|
106
|
+
km = FFI.mm_tbuf_get_km(buf)
|
107
|
+
n_regs_ptr = ::FFI::MemoryPointer.new :int
|
108
|
+
|
109
|
+
ptr = FFI.mm_map_aux(index, seq, seq2, n_regs_ptr, buf, map_options)
|
110
|
+
n_regs = n_regs_ptr.read_int
|
111
|
+
|
112
|
+
regs = Array.new(n_regs) { |i| FFI::Reg1.new(ptr + i * FFI::Reg1.size) }
|
113
|
+
|
114
|
+
hit = FFI::Hit.new
|
115
|
+
cs_str = ::FFI::MemoryPointer.new(::FFI::MemoryPointer.new(:string))
|
116
|
+
m_cs_str = ::FFI::MemoryPointer.new :int
|
117
|
+
i = 0
|
118
|
+
begin
|
119
|
+
while i < n_regs
|
120
|
+
FFI.mm_reg2hitpy(index, regs[i], hit)
|
121
|
+
cigar = []
|
122
|
+
|
123
|
+
c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
|
124
|
+
# convert the 32-bit CIGAR encoding to Ruby array
|
125
|
+
cigar = c.map { |x| [x >> 4, x & 0xf] }
|
126
|
+
|
127
|
+
_cs = ""
|
128
|
+
if cs
|
129
|
+
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
|
130
|
+
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
131
|
+
end
|
132
|
+
|
133
|
+
_md = ""
|
134
|
+
if md
|
135
|
+
l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
|
136
|
+
_md = cs_str.read_pointer.read_string(l_cs_str)
|
137
|
+
end
|
138
|
+
|
139
|
+
yield Alignment.new(hit, cigar, _cs, _md)
|
140
|
+
|
141
|
+
FFI.mm_free_reg1(regs[i])
|
142
|
+
i += 1
|
143
|
+
end
|
144
|
+
ensure
|
145
|
+
while i < n_regs
|
146
|
+
FFI.mm_free_reg1(regs[i])
|
147
|
+
i += 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def seq(name, start = 0, stop = 0x7fffffff)
|
153
|
+
lp = ::FFI::MemoryPointer.new(:int)
|
154
|
+
s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
|
155
|
+
l = lp.read_int
|
156
|
+
return nil if l.zero?
|
157
|
+
|
158
|
+
s.read_string(l)
|
159
|
+
end
|
160
|
+
|
161
|
+
def k
|
162
|
+
index[:k]
|
163
|
+
end
|
164
|
+
|
165
|
+
def w
|
166
|
+
index[:w]
|
167
|
+
end
|
168
|
+
|
169
|
+
def n_seq
|
170
|
+
index[:n_seq]
|
171
|
+
end
|
172
|
+
|
173
|
+
def seq_names
|
174
|
+
ptr = index[:seq].to_ptr
|
175
|
+
Array.new(index[:n_seq]) do |i|
|
176
|
+
FFI::IdxSeq.new(ptr + i * FFI::IdxSeq.size)[:name]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
class Alignment
|
5
|
+
def self.keys
|
6
|
+
%i[ctg ctg_len r_st r_en strand trans_strand blen mlen nm primary
|
7
|
+
q_st q_en mapq cigar read_num cs md cigar_str]
|
8
|
+
end
|
9
|
+
|
10
|
+
# Read only
|
11
|
+
attr_reader(*keys)
|
12
|
+
|
13
|
+
def initialize(h, cigar, cs = nil, md = nil)
|
14
|
+
@ctg = h[:ctg]
|
15
|
+
@ctg_len = h[:ctg_len]
|
16
|
+
@r_st = h[:ctg_start]
|
17
|
+
@r_en = h[:ctg_end]
|
18
|
+
@strand = h[:strand]
|
19
|
+
@trans_strand = h[:trans_strand]
|
20
|
+
@blen = h[:blen]
|
21
|
+
@mlen = h[:mlen]
|
22
|
+
@nm = h[:NM]
|
23
|
+
@primary = h[:is_primary]
|
24
|
+
@q_st = h[:qry_start]
|
25
|
+
@q_en = h[:qry_end]
|
26
|
+
@mapq = h[:mapq]
|
27
|
+
@cigar = cigar
|
28
|
+
@read_num = h[:seg_id] + 1
|
29
|
+
@cs = cs
|
30
|
+
@md = md
|
31
|
+
|
32
|
+
@cigar_str = cigar.map { |x| x[0].to_s + "MIDNSH"[x[1]] }.join
|
33
|
+
end
|
34
|
+
|
35
|
+
def primary?
|
36
|
+
@primary == 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_h
|
40
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_s
|
44
|
+
strand = if @strand.positive?
|
45
|
+
"+"
|
46
|
+
elsif @strand.negative?
|
47
|
+
"-"
|
48
|
+
else
|
49
|
+
"?"
|
50
|
+
end
|
51
|
+
tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
|
52
|
+
ts = if @trans_strand.positive?
|
53
|
+
"ts:A:+"
|
54
|
+
elsif @trans_strand.negative?
|
55
|
+
"ts:A:-"
|
56
|
+
else
|
57
|
+
"ts:A:."
|
58
|
+
end
|
59
|
+
a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
|
60
|
+
@mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
|
61
|
+
a << "cs:Z:#{@cs}" if @cs
|
62
|
+
a.join("\t")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/minimap2/ffi.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# bit fields
|
4
|
+
require_relative "ffi_helper"
|
5
|
+
|
6
|
+
module Minimap2
|
7
|
+
# Native APIs
|
8
|
+
module FFI
|
9
|
+
extend ::FFI::Library
|
10
|
+
begin
|
11
|
+
ffi_lib Minimap2.ffi_lib
|
12
|
+
rescue LoadError => e
|
13
|
+
raise LoadError, "Could not find #{Minimap2.ffi_lib} \n#{e}"
|
14
|
+
end
|
15
|
+
|
16
|
+
# Continue even if some functions are not found.
|
17
|
+
def self.attach_function(*)
|
18
|
+
super
|
19
|
+
rescue ::FFI::NotFoundError => e
|
20
|
+
warn e.message
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
require_relative "ffi/constants"
|
26
|
+
require_relative "ffi/functions"
|
27
|
+
require_relative "ffi/mappy"
|
@@ -0,0 +1,227 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
module FFI
|
5
|
+
# flags
|
6
|
+
NO_DIAG = 0x001 # no exact diagonal hit
|
7
|
+
NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
|
8
|
+
CIGAR = 0x004
|
9
|
+
OUT_SAM = 0x008
|
10
|
+
NO_QUAL = 0x010
|
11
|
+
OUT_CG = 0x020
|
12
|
+
OUT_CS = 0x040
|
13
|
+
SPLICE = 0x080 # splice mode
|
14
|
+
SPLICE_FOR = 0x100 # match GT-AG
|
15
|
+
SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
|
16
|
+
NO_LJOIN = 0x400
|
17
|
+
OUT_CS_LONG = 0x800
|
18
|
+
SR = 0x1000
|
19
|
+
FRAG_MODE = 0x2000
|
20
|
+
NO_PRINT_2ND = 0x4000
|
21
|
+
TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
|
22
|
+
LONG_CIGAR = 0x10000
|
23
|
+
INDEPEND_SEG = 0x20000
|
24
|
+
SPLICE_FLANK = 0x40000
|
25
|
+
SOFTCLIP = 0x80000
|
26
|
+
FOR_ONLY = 0x100000
|
27
|
+
REV_ONLY = 0x200000
|
28
|
+
HEAP_SORT = 0x400000
|
29
|
+
ALL_CHAINS = 0x800000
|
30
|
+
OUT_MD = 0x1000000
|
31
|
+
COPY_COMMENT = 0x2000000
|
32
|
+
EQX = 0x4000000 # use =/X instead of M
|
33
|
+
PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
|
34
|
+
NO_END_FLT = 0x10000000
|
35
|
+
HARD_MLEVEL = 0x20000000
|
36
|
+
SAM_HIT_ONLY = 0x40000000
|
37
|
+
|
38
|
+
HPC = 0x1
|
39
|
+
NO_SEQ = 0x2
|
40
|
+
NO_NAME = 0x4
|
41
|
+
|
42
|
+
IDX_MAGIC = "MMI\2"
|
43
|
+
|
44
|
+
MAX_SEG = 255
|
45
|
+
|
46
|
+
# emulate 128-bit integers
|
47
|
+
class MM128 < ::FFI::Struct
|
48
|
+
layout \
|
49
|
+
:x, :uint64_t,
|
50
|
+
:y, :uint64_t
|
51
|
+
end
|
52
|
+
|
53
|
+
# emulate 128-bit arrays
|
54
|
+
class MM128V < ::FFI::Struct
|
55
|
+
layout \
|
56
|
+
:n, :size_t,
|
57
|
+
:m, :size_t,
|
58
|
+
:a, MM128.ptr
|
59
|
+
end
|
60
|
+
|
61
|
+
# indexing option
|
62
|
+
class IdxOpt < ::FFI::Struct
|
63
|
+
layout \
|
64
|
+
:k, :short,
|
65
|
+
:w, :short,
|
66
|
+
:flag, :short,
|
67
|
+
:bucket_bits, :short,
|
68
|
+
:mini_batch_size, :int64_t,
|
69
|
+
:batch_size, :uint64_t
|
70
|
+
end
|
71
|
+
|
72
|
+
# mapping option
|
73
|
+
class MapOpt < ::FFI::Struct
|
74
|
+
layout \
|
75
|
+
:flag, :int64_t, # see MM_F_* macros
|
76
|
+
:seed, :int,
|
77
|
+
:sdust_thres, :int, # score threshold for SDUST; 0 to disable
|
78
|
+
:max_qlen, :int, # max query length
|
79
|
+
:bw, :int, # bandwidth
|
80
|
+
:max_gap, :int, # break a chain if there are no minimizers in a max_gap window
|
81
|
+
:max_gap_ref, :int,
|
82
|
+
:max_frag_len, :int,
|
83
|
+
:max_chain_skip, :int,
|
84
|
+
:max_chain_iter, :int,
|
85
|
+
:min_cnt, :int, # min number of minimizers on each chain
|
86
|
+
:min_chain_score, :int, # min chaining score
|
87
|
+
:chain_gap_scale, :float,
|
88
|
+
:mask_level, :float,
|
89
|
+
:mask_len, :int,
|
90
|
+
:pri_ratio, :float,
|
91
|
+
:best_n, :int, # top best_n chains are subjected to DP alignment
|
92
|
+
:max_join_long, :int,
|
93
|
+
:max_join_short, :int,
|
94
|
+
:min_join_flank_sc, :int,
|
95
|
+
:min_join_flank_ratio, :float,
|
96
|
+
:alt_drop, :float,
|
97
|
+
:a, :int, # matching score
|
98
|
+
:b, :int, # mismatch
|
99
|
+
:q, :int, # gap-open
|
100
|
+
:e, :int, # gap-ext
|
101
|
+
:q2, :int, # gap-open
|
102
|
+
:e2, :int, # gap-ext
|
103
|
+
:sc_ambi, :int, # score when one or both bases are "N"
|
104
|
+
:noncan, :int, # cost of non-canonical splicing sites
|
105
|
+
:junc_bonus, :int,
|
106
|
+
:zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
|
107
|
+
:zdrop_inv, :int,
|
108
|
+
:end_bonus, :int,
|
109
|
+
:min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold
|
110
|
+
:min_ksw_len, :int,
|
111
|
+
:anchor_ext_len, :int,
|
112
|
+
:anchor_ext_shift, :int,
|
113
|
+
:max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
|
114
|
+
:pe_ori, :int,
|
115
|
+
:pe_bonus, :int,
|
116
|
+
:mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
|
117
|
+
:min_mid_occ, :int32_t,
|
118
|
+
:mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
|
119
|
+
:max_occ, :int32_t,
|
120
|
+
:mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
|
121
|
+
:max_sw_mat, :int64_t,
|
122
|
+
:split_prefix, :string
|
123
|
+
end
|
124
|
+
|
125
|
+
# minimap2 index
|
126
|
+
class IdxSeq < ::FFI::Struct
|
127
|
+
layout \
|
128
|
+
:name, :string, # name of the db sequence
|
129
|
+
:offset, :uint64_t, # offset in mm_idx_t::S
|
130
|
+
:len, :uint32_t, # length
|
131
|
+
:is_alt, :uint32_t
|
132
|
+
end
|
133
|
+
|
134
|
+
class Idx < ::FFI::Struct
|
135
|
+
layout \
|
136
|
+
:b, :int32_t,
|
137
|
+
:w, :int32_t,
|
138
|
+
:k, :int32_t,
|
139
|
+
:flag, :int32_t,
|
140
|
+
:n_seq, :uint32_t, # number of reference sequences
|
141
|
+
:index, :int32_t,
|
142
|
+
:n_alt, :int32_t,
|
143
|
+
:seq, IdxSeq.ptr, # sequence name, length and offset
|
144
|
+
:S, :pointer, # 4-bit packed sequence
|
145
|
+
:B, :pointer, # index (hidden)
|
146
|
+
:I, :pointer, # intervals (hidden)
|
147
|
+
:km, :pointer,
|
148
|
+
:h, :pointer
|
149
|
+
end
|
150
|
+
|
151
|
+
# index reader
|
152
|
+
class IdxReader < ::FFI::Struct
|
153
|
+
layout \
|
154
|
+
:is_idx, :int,
|
155
|
+
:n_parts, :int,
|
156
|
+
:idx_size, :int64_t,
|
157
|
+
:opt, IdxOpt,
|
158
|
+
:fp_out, :pointer, # FILE
|
159
|
+
:seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE
|
160
|
+
end
|
161
|
+
|
162
|
+
# minimap2 alignment
|
163
|
+
class Extra < ::FFI::BitStruct
|
164
|
+
layout \
|
165
|
+
:capacity, :uint32, # the capacity of cigar[]
|
166
|
+
:dp_score, :int32, # DP score
|
167
|
+
:dp_max, :int32, # score of the max-scoring segment
|
168
|
+
:dp_max2, :int32, # score of the best alternate mappings
|
169
|
+
:n_ambi_trans_strand, :uint32,
|
170
|
+
:n_cigar, :uint32
|
171
|
+
|
172
|
+
bitfields :n_ambi_trans_strand,
|
173
|
+
:n_ambi, 30, # number of ambiguous bases
|
174
|
+
:trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
|
175
|
+
|
176
|
+
# variable length array
|
177
|
+
def cigar
|
178
|
+
pointer.get_array_of_uint32(size, self[:n_cigar])
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class Reg1 < ::FFI::BitStruct
|
183
|
+
layout \
|
184
|
+
:id, :int32_t, # ID for internal uses (see also parent below)
|
185
|
+
:cnt, :int32_t, # number of minimizers; if on the reverse strand
|
186
|
+
:rid, :int32_t, # reference index; if this is an alignment from inversion rescue
|
187
|
+
:score, :int32_t, # DP alignment score
|
188
|
+
:qs, :int32_t, # query start
|
189
|
+
:qe, :int32_t, # query end
|
190
|
+
:rs, :int32_t, # reference start
|
191
|
+
:re, :int32_t, # reference end
|
192
|
+
:parent, :int32_t, # parent==id if primary
|
193
|
+
:subsc, :int32_t, # best alternate mapping score
|
194
|
+
:as, :int32_t, # offset in the a[] array (for internal uses only)
|
195
|
+
:mlen, :int32_t, # seeded exact match length
|
196
|
+
:blen, :int32_t, # seeded alignment block length
|
197
|
+
:n_sub, :int32_t, # number of suboptimal mappings
|
198
|
+
:score0, :int32_t, # initial chaining score (before chain merging/spliting)
|
199
|
+
:fields, :uint32_t,
|
200
|
+
:hash, :uint32_t,
|
201
|
+
:div, :float,
|
202
|
+
:p, Extra.ptr
|
203
|
+
|
204
|
+
bitfields :fields,
|
205
|
+
:mapq, 8,
|
206
|
+
:split, 2,
|
207
|
+
:rev, 1,
|
208
|
+
:inv, 1,
|
209
|
+
:sam_pri, 1,
|
210
|
+
:proper_frag, 1,
|
211
|
+
:pe_thru, 1,
|
212
|
+
:seg_split, 1,
|
213
|
+
:seg_id, 8,
|
214
|
+
:split_inv, 1,
|
215
|
+
:is_alt, 1,
|
216
|
+
:dummy, 6
|
217
|
+
end
|
218
|
+
|
219
|
+
# memory buffer for thread-local storage during mapping
|
220
|
+
class TBuf < ::FFI::Struct
|
221
|
+
layout \
|
222
|
+
:km, :pointer,
|
223
|
+
:rep_len, :int,
|
224
|
+
:frag_gap, :int
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
module FFI
|
5
|
+
attach_function \
|
6
|
+
:mm_set_opt_raw, :mm_set_opt,
|
7
|
+
[:pointer, IdxOpt.by_ref, MapOpt.by_ref],
|
8
|
+
:int
|
9
|
+
|
10
|
+
private_class_method :mm_set_opt_raw
|
11
|
+
|
12
|
+
def self.mm_set_opt(preset, io, mo)
|
13
|
+
if preset == 0
|
14
|
+
ptr = ::FFI::Pointer.new(:int, 0)
|
15
|
+
else
|
16
|
+
ptr = ::FFI::MemoryPointer.from_string(preset.to_s)
|
17
|
+
end
|
18
|
+
mm_set_opt_raw(ptr, io, mo)
|
19
|
+
end
|
20
|
+
|
21
|
+
attach_function \
|
22
|
+
:mm_idx_reader_open,
|
23
|
+
[:string, IdxOpt.by_ref, :string],
|
24
|
+
IdxReader.by_ref
|
25
|
+
|
26
|
+
attach_function \
|
27
|
+
:mm_idx_reader_read,
|
28
|
+
[IdxReader.by_ref, :int],
|
29
|
+
Idx.by_ref
|
30
|
+
|
31
|
+
attach_function \
|
32
|
+
:mm_idx_reader_close,
|
33
|
+
[IdxReader.by_ref],
|
34
|
+
:void
|
35
|
+
|
36
|
+
attach_function \
|
37
|
+
:mm_idx_destroy,
|
38
|
+
[Idx.by_ref],
|
39
|
+
:void
|
40
|
+
|
41
|
+
attach_function \
|
42
|
+
:mm_mapopt_update,
|
43
|
+
[MapOpt.by_ref, Idx.by_ref],
|
44
|
+
:void
|
45
|
+
|
46
|
+
attach_function \
|
47
|
+
:mm_idx_index_name,
|
48
|
+
[Idx.by_ref],
|
49
|
+
:int
|
50
|
+
|
51
|
+
attach_function \
|
52
|
+
:mm_tbuf_init,
|
53
|
+
[],
|
54
|
+
TBuf.by_ref
|
55
|
+
|
56
|
+
attach_function \
|
57
|
+
:mm_tbuf_destroy,
|
58
|
+
[TBuf.by_ref],
|
59
|
+
:void
|
60
|
+
|
61
|
+
attach_function \
|
62
|
+
:mm_tbuf_get_km,
|
63
|
+
[TBuf.by_ref],
|
64
|
+
:pointer
|
65
|
+
|
66
|
+
attach_function \
|
67
|
+
:mm_gen_cs,
|
68
|
+
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
|
69
|
+
:int
|
70
|
+
|
71
|
+
attach_function \
|
72
|
+
:mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
|
73
|
+
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string],
|
74
|
+
:int
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://github.com/lh3/minimap2/blob/master/python/cmappy.h
|
4
|
+
|
5
|
+
module Minimap2
|
6
|
+
module FFI
|
7
|
+
class Hit < ::FFI::Struct
|
8
|
+
layout \
|
9
|
+
:ctg, :string,
|
10
|
+
:ctg_start, :int32_t,
|
11
|
+
:ctg_end, :int32_t,
|
12
|
+
:qry_start, :int32_t,
|
13
|
+
:qry_end, :int32_t,
|
14
|
+
:blen, :int32_t,
|
15
|
+
:mlen, :int32_t,
|
16
|
+
:NM, :int32_t,
|
17
|
+
:ctg_len, :int32_t,
|
18
|
+
:mapq, :uint8_t,
|
19
|
+
:is_primary, :uint8_t,
|
20
|
+
:strand, :int8_t,
|
21
|
+
:trans_strand, :int8_t,
|
22
|
+
:seg_id, :int32_t,
|
23
|
+
:n_cigar32, :int32_t,
|
24
|
+
:cigar32, :pointer
|
25
|
+
end
|
26
|
+
|
27
|
+
class KString < ::FFI::Struct
|
28
|
+
layout \
|
29
|
+
:l, :size_t,
|
30
|
+
:m, :size_t,
|
31
|
+
:s, :string
|
32
|
+
end
|
33
|
+
|
34
|
+
class KSeq < ::FFI::Struct
|
35
|
+
layout \
|
36
|
+
:name, KString,
|
37
|
+
:comment, KString,
|
38
|
+
:seq, KString,
|
39
|
+
:qual, KString,
|
40
|
+
:last_char, :int,
|
41
|
+
:f, :pointer # FIXME: KStream
|
42
|
+
end
|
43
|
+
|
44
|
+
attach_function \
|
45
|
+
:mm_reg2hitpy,
|
46
|
+
[Idx.by_ref, Reg1.by_ref, Hit.by_ref],
|
47
|
+
:void
|
48
|
+
|
49
|
+
attach_function \
|
50
|
+
:mm_free_reg1,
|
51
|
+
[Reg1.by_ref],
|
52
|
+
:void
|
53
|
+
|
54
|
+
attach_function \
|
55
|
+
:mm_fastx_open,
|
56
|
+
[:string],
|
57
|
+
KSeq.by_ref
|
58
|
+
|
59
|
+
attach_function \
|
60
|
+
:mm_fastx_close,
|
61
|
+
[KSeq.by_ref],
|
62
|
+
:void
|
63
|
+
|
64
|
+
attach_function \
|
65
|
+
:mm_verbose_level,
|
66
|
+
[:int],
|
67
|
+
:int
|
68
|
+
|
69
|
+
attach_function \
|
70
|
+
:mm_reset_timer,
|
71
|
+
[:void],
|
72
|
+
:void
|
73
|
+
|
74
|
+
attach_function \
|
75
|
+
:mm_map_aux,
|
76
|
+
[Idx.by_ref, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref],
|
77
|
+
:pointer # Reg1
|
78
|
+
|
79
|
+
attach_function \
|
80
|
+
:mappy_revcomp,
|
81
|
+
%i[int pointer],
|
82
|
+
:string
|
83
|
+
|
84
|
+
attach_function \
|
85
|
+
:mappy_fetch_seq,
|
86
|
+
[Idx.by_ref, :string, :int, :int, :pointer],
|
87
|
+
:pointer # Use pointer instead of string to read with a specified length
|
88
|
+
|
89
|
+
attach_function \
|
90
|
+
:mappy_idx_seq,
|
91
|
+
%i[int int int int pointer int],
|
92
|
+
Idx.by_ref
|
93
|
+
|
94
|
+
attach_function \
|
95
|
+
:kseq_read,
|
96
|
+
[KSeq.by_ref],
|
97
|
+
:int
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ffi"
|
4
|
+
|
5
|
+
module FFI
|
6
|
+
class BitStruct < Struct
|
7
|
+
class << self
|
8
|
+
# def union_layout(*args)
|
9
|
+
# Class.new(FFI::Union) { layout(*args) }
|
10
|
+
# end
|
11
|
+
|
12
|
+
# def struct_layout(*args)
|
13
|
+
# Class.new(FFI::Struct) { layout(*args) }
|
14
|
+
# end
|
15
|
+
|
16
|
+
module BitFieldsModule
|
17
|
+
def [](name)
|
18
|
+
bit_fields = self.class.bit_fields_map
|
19
|
+
parent, start, width = bit_fields[name]
|
20
|
+
if parent
|
21
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
22
|
+
else
|
23
|
+
super(name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
private_constant :BitFieldsModule
|
28
|
+
|
29
|
+
attr_reader :bit_fields_map
|
30
|
+
|
31
|
+
def bitfields(*args)
|
32
|
+
unless instance_variable_defined?(:@bit_fields)
|
33
|
+
@bit_fields_map = {}
|
34
|
+
prepend BitFieldsModule
|
35
|
+
end
|
36
|
+
|
37
|
+
parent = args.shift
|
38
|
+
labels = []
|
39
|
+
widths = []
|
40
|
+
args.each_slice(2) do |l, w|
|
41
|
+
labels << l
|
42
|
+
widths << w
|
43
|
+
end
|
44
|
+
starts = widths.inject([0]) do |result, w|
|
45
|
+
result << (result.last + w)
|
46
|
+
end
|
47
|
+
labels.zip(starts, widths).each do |l, s, w|
|
48
|
+
@bit_fields_map[l] = [parent, s, w]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minimap2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +80,20 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tty-command
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
description: minimap2
|
70
98
|
email:
|
71
99
|
- 2xijok@gmail.com
|
@@ -75,8 +103,16 @@ extra_rdoc_files: []
|
|
75
103
|
files:
|
76
104
|
- LICENSE.txt
|
77
105
|
- README.md
|
78
|
-
- lib/minimap/version.rb
|
79
106
|
- lib/minimap2.rb
|
107
|
+
- lib/minimap2/aligner.rb
|
108
|
+
- lib/minimap2/alignment.rb
|
109
|
+
- lib/minimap2/ffi.rb
|
110
|
+
- lib/minimap2/ffi/constants.rb
|
111
|
+
- lib/minimap2/ffi/functions.rb
|
112
|
+
- lib/minimap2/ffi/mappy.rb
|
113
|
+
- lib/minimap2/ffi_helper.rb
|
114
|
+
- lib/minimap2/version.rb
|
115
|
+
- vendor/libminimap2.so
|
80
116
|
homepage: https://github.com/kojix2/ruby-minimap2
|
81
117
|
licenses:
|
82
118
|
- MIT
|
@@ -96,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
132
|
- !ruby/object:Gem::Version
|
97
133
|
version: '0'
|
98
134
|
requirements: []
|
99
|
-
rubygems_version: 3.
|
135
|
+
rubygems_version: 3.2.3
|
100
136
|
signing_key:
|
101
137
|
specification_version: 4
|
102
138
|
summary: minimap2
|