minimap2 0.0.0 → 0.2.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +176 -7
- data/lib/minimap2.rb +75 -3
- data/lib/minimap2/aligner.rb +235 -0
- data/lib/minimap2/alignment.rb +113 -0
- data/lib/minimap2/ffi.rb +27 -0
- data/lib/minimap2/ffi/constants.rb +231 -0
- data/lib/minimap2/ffi/functions.rb +76 -0
- data/lib/minimap2/ffi/mappy.rb +99 -0
- data/lib/minimap2/ffi_helper.rb +53 -0
- data/lib/minimap2/version.rb +6 -0
- data/vendor/libminimap2.so +0 -0
- metadata +40 -4
- data/lib/minimap/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4bd850f529cb82950c16581735bdd74f232e0ef3490e5cb5b6f7045faa1fe696
|
4
|
+
data.tar.gz: 40d00cf14886a35f831b593d541cf9e72f8e5cf07d87be31116c215799449f62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 669bd6d5a4eb0dc37f12ee4c0f9653bfe76afec70b8d592e291269cb97b90b493b398b8d68ebacb64ba2ce28187a32a32fdb3fb77ef070023ffa27983f479929
|
7
|
+
data.tar.gz: 12c2fd1ace06a7e6a1734cb27f09091851f3fe917714156b27a003a168815dbef83eabc00c56c701bdcd5f982db873346bca375b3e8f05764b7fb797d2d5c898
|
data/README.md
CHANGED
@@ -1,30 +1,199 @@
|
|
1
|
-
#
|
1
|
+
# ruby-minimap2
|
2
|
+
|
3
|
+
[![Gem Version](https://img.shields.io/gem/v/minimap2?color=brightgreen)](https://rubygems.org/gems/minimap2)
|
4
|
+
[![CI](https://github.com/kojix2/ruby-minimap2/workflows/CI/badge.svg)](https://github.com/kojix2/ruby-minimap2/actions)
|
5
|
+
[![The MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE.txt)
|
6
|
+
[![Docs Latest](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/minimap2)
|
7
|
+
[![DOI](https://zenodo.org/badge/325711305.svg)](https://zenodo.org/badge/latestdoi/325711305)
|
8
|
+
|
9
|
+
|
2
10
|
|
3
11
|
:dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
|
4
12
|
|
5
13
|
## Installation
|
6
14
|
|
15
|
+
Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
|
16
|
+
|
17
|
+
Build
|
18
|
+
|
7
19
|
```sh
|
8
|
-
|
20
|
+
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
21
|
+
cd ruby-minimap2
|
22
|
+
bundle install
|
23
|
+
bundle exec rake minimap2:build
|
9
24
|
```
|
10
25
|
|
11
|
-
|
26
|
+
Install
|
12
27
|
|
13
|
-
```sh
|
14
|
-
# TODO
|
15
28
|
```
|
29
|
+
bundle exec rake install
|
30
|
+
```
|
31
|
+
|
32
|
+
Ruby-minimap2 is [tested on Ubuntu and macOS](https://github.com/kojix2/ruby-minimap2/actions).
|
33
|
+
|
34
|
+
## Quick Start
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
require "minimap2"
|
38
|
+
```
|
39
|
+
|
40
|
+
Create aligner
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
|
44
|
+
```
|
45
|
+
|
46
|
+
Retrieve a subsequence from the index
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
seq = aligner.seq("MT_human", 100, 200)
|
50
|
+
```
|
51
|
+
|
52
|
+
Mapping
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
hits = aligner.align(seq)
|
56
|
+
pp hits[0]
|
57
|
+
```
|
58
|
+
|
59
|
+
```
|
60
|
+
=>
|
61
|
+
#<Minimap2::Alignment:0x000055fe18223f50
|
62
|
+
@blen=100,
|
63
|
+
@cigar=[[100, 0]],
|
64
|
+
@cigar_str="100M",
|
65
|
+
@cs="",
|
66
|
+
@ctg="MT_human",
|
67
|
+
@ctg_len=16569,
|
68
|
+
@mapq=60,
|
69
|
+
@md="",
|
70
|
+
@mlen=100,
|
71
|
+
@nm=0,
|
72
|
+
@primary=1,
|
73
|
+
@q_en=100,
|
74
|
+
@q_st=0,
|
75
|
+
@r_en=200,
|
76
|
+
@r_st=100,
|
77
|
+
@read_num=1,
|
78
|
+
@strand=1,
|
79
|
+
@trans_strand=0>
|
80
|
+
```
|
81
|
+
|
82
|
+
## APIs Overview
|
83
|
+
|
84
|
+
API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
|
85
|
+
|
86
|
+
Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
|
87
|
+
|
88
|
+
```markdown
|
89
|
+
* Minimap2 module
|
90
|
+
- fastx_read Read fasta/fastq file.
|
91
|
+
- revcomp Reverse complement sequence.
|
92
|
+
|
93
|
+
* Aligner class
|
94
|
+
* attributes
|
95
|
+
- index Returns the value of attribute index.
|
96
|
+
- idx_opt Returns the value of attribute idx_opt.
|
97
|
+
- map_opt Returns the value of attribute map_opt.
|
98
|
+
* methods
|
99
|
+
- new(path, preset: nil) Create a new aligner. (presets: sr, map-pb, map-out, map-hifi, splice, asm5, etc.)
|
100
|
+
- align Maps and returns alignments.
|
101
|
+
- seq Retrieve a subsequence from the index.
|
102
|
+
|
103
|
+
* Alignment class
|
104
|
+
* attributes
|
105
|
+
- ctg Returns name of the reference sequence the query is mapped to.
|
106
|
+
- ctg_len Returns total length of the reference sequence.
|
107
|
+
- r_st Returns start positions on the reference.
|
108
|
+
- r_en Returns end positions on the reference.
|
109
|
+
- strand Returns +1 if on the forward strand; -1 if on the reverse strand.
|
110
|
+
- trans_strand Returns transcript strand. +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown.
|
111
|
+
- blen Returns length of the alignment, including both alignment matches and gaps but excluding ambiguous bases.
|
112
|
+
- mlen Returns length of the matching bases in the alignment, excluding ambiguous base matches.
|
113
|
+
- nm Returns number of mismatches, gaps and ambiguous poistions in the alignment.
|
114
|
+
- primary Returns if the alignment is primary (typically the best and the first to generate).
|
115
|
+
- q_st Returns start positions on the query.
|
116
|
+
- q_en Returns end positions on the query.
|
117
|
+
- mapq Returns mapping quality.
|
118
|
+
- cigar Returns CIGAR returned as an array of shape (n_cigar,2). The two numbers give the length and the operator of each CIGAR operation.
|
119
|
+
- read_num Returns read number that the alignment corresponds to; 1 for the first read and 2 for the second read.
|
120
|
+
- cs Returns the cs tag.
|
121
|
+
- md Returns the MD tag as in the SAM format. It is an empty string unless the md argument is applied when calling Aligner#align.
|
122
|
+
- cigar_str Returns CIGAR string.
|
123
|
+
* methods
|
124
|
+
- to_h Convert Alignment to hash.
|
125
|
+
- to_s Convert to the PAF format without the QueryName and QueryLength columns.
|
126
|
+
|
127
|
+
## FFI module
|
128
|
+
* IdxOpt class Indexing options.
|
129
|
+
* MapOpt class Mapping options.
|
130
|
+
```
|
131
|
+
|
132
|
+
This is not all. See the [RubyDoc.info documentation](https://rubydoc.info/gems/minimap2/) for more details.
|
16
133
|
|
134
|
+
ruby-minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
|
135
|
+
Native functions can be called from the FFI module. FFI also provides the way to access some C structs.
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
aligner.idx_opt.members
|
139
|
+
# => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
|
140
|
+
aligner.kds_opt.values
|
141
|
+
# => [15, 10, 0, 14, 50000000, 9223372036854775807]
|
142
|
+
aligner.idx_opt[:k]
|
143
|
+
# => 15
|
144
|
+
aligner.idx_opt[:k] = 14
|
145
|
+
aligner.idx_opt[:k]
|
146
|
+
# => 14
|
147
|
+
```
|
17
148
|
|
18
149
|
## Development
|
19
150
|
|
151
|
+
Fork your repository.
|
152
|
+
then clone.
|
153
|
+
|
20
154
|
```sh
|
21
|
-
|
155
|
+
git clone --recursive https://github.com/kojix2/ruby-minimap2
|
156
|
+
# git clone https://github.com/kojix2/ruby-minimap2
|
157
|
+
# cd ruby-minimap2
|
158
|
+
# git submodule update -i
|
159
|
+
```
|
160
|
+
|
161
|
+
Build Minimap2 and Mappy.
|
162
|
+
|
163
|
+
```sh
|
164
|
+
cd ruby-minimap2
|
165
|
+
bundle install # Install dependent packages including Ruby-FFI
|
166
|
+
bundle exec rake minimap2:build
|
167
|
+
```
|
168
|
+
|
169
|
+
A shared library will be created in the vendor directory.
|
170
|
+
|
171
|
+
```
|
172
|
+
└── vendor
|
173
|
+
└── libminimap2.so
|
174
|
+
```
|
175
|
+
|
176
|
+
Run tests.
|
177
|
+
|
178
|
+
```
|
179
|
+
bundle exec rake test
|
22
180
|
```
|
23
181
|
|
24
182
|
## Contributing
|
25
183
|
|
26
|
-
|
184
|
+
ruby-minimap2 is a library under development and there are many points to be improved. Please feel free to send us your pull request.
|
185
|
+
|
186
|
+
* [Report bugs](https://github.com/kojix2/ruby-minimap2/issues)
|
187
|
+
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-minimap2/pulls)
|
188
|
+
* Write, clarify, or fix documentation
|
189
|
+
* Suggest or add new features
|
190
|
+
* Create tools based on ruby-minimap2
|
191
|
+
* Update minimap2 in github submodule
|
27
192
|
|
28
193
|
## License
|
29
194
|
|
30
195
|
[MIT License](https://opensource.org/licenses/MIT).
|
196
|
+
|
197
|
+
## Acknowledgements
|
198
|
+
|
199
|
+
I would like to thank Heng Li for making Minimap2, and all the readers who read the README to the end.
|
data/lib/minimap2.rb
CHANGED
@@ -1,6 +1,78 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
# dependencies
|
4
|
+
require 'ffi'
|
5
|
+
|
6
|
+
# bit fields
|
7
|
+
require_relative 'minimap2/ffi_helper'
|
8
|
+
|
9
|
+
# modules
|
10
|
+
require_relative 'minimap2/aligner'
|
11
|
+
require_relative 'minimap2/alignment'
|
12
|
+
require_relative 'minimap2/version'
|
13
|
+
|
14
|
+
# Minimap2 mapper for long read sequences
|
15
|
+
# https://github.com/lh3/minimap2
|
16
|
+
# Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34:3094-3100.
|
17
|
+
# doi:10.1093/bioinformatics/bty191
|
18
|
+
module Minimap2
|
4
19
|
class Error < StandardError; end
|
5
|
-
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_accessor :ffi_lib
|
23
|
+
end
|
24
|
+
|
25
|
+
lib_name = ::FFI.map_library_name('minimap2')
|
26
|
+
self.ffi_lib = if ENV['MINIMAPDIR']
|
27
|
+
File.expand_path(lib_name, ENV['MINIMAPDIR'])
|
28
|
+
else
|
29
|
+
File.expand_path("../vendor/#{lib_name}", __dir__)
|
30
|
+
end
|
31
|
+
|
32
|
+
# friendlier error message
|
33
|
+
autoload :FFI, 'minimap2/ffi'
|
34
|
+
|
35
|
+
# methods from mappy
|
36
|
+
class << self
|
37
|
+
# Read fasta/fastq file.
|
38
|
+
# @param [String] file_path
|
39
|
+
# @param [Boolean] read_comment If false or nil, the comment will not be read.
|
40
|
+
# @yield [name, seq, qual, comment]
|
41
|
+
# Note: You can also use a generic library such as BioRuby instead of this method.
|
42
|
+
|
43
|
+
def fastx_read(file_path, read_comment = false)
|
44
|
+
path = File.expand_path(file_path)
|
45
|
+
ks = FFI.mm_fastx_open(path)
|
46
|
+
while FFI.kseq_read(ks) >= 0
|
47
|
+
qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
|
48
|
+
name = ks[:name][:s]
|
49
|
+
seq = ks[:seq][:s]
|
50
|
+
if read_comment
|
51
|
+
comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
|
52
|
+
yield [name, seq, qual, comment]
|
53
|
+
else
|
54
|
+
yield [name, seq, qual]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
FFI.mm_fastx_close(ks)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Reverse complement sequence.
|
61
|
+
# @param [String] seq
|
62
|
+
# @return [string] seq
|
63
|
+
|
64
|
+
def revcomp(seq)
|
65
|
+
l = seq.size
|
66
|
+
bseq = ::FFI::MemoryPointer.new(:char, l)
|
67
|
+
bseq.put_bytes(0, seq)
|
68
|
+
FFI.mappy_revcomp(l, bseq)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Set verbosity level.
|
72
|
+
# @param [Integer] level
|
73
|
+
|
74
|
+
def verbose(level = -1)
|
75
|
+
FFI.mm_verbose_level(level)
|
76
|
+
end
|
77
|
+
end
|
6
78
|
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
class Aligner
|
5
|
+
attr_reader :idx_opt, :map_opt, :index
|
6
|
+
|
7
|
+
# Create a new aligner.
|
8
|
+
#
|
9
|
+
# @param fn_idx_in [String] index or sequence file name.
|
10
|
+
# @param seq [String] a single sequence to index.
|
11
|
+
# @param preset [String] minimap2 preset.
|
12
|
+
# * map-pb : PacBio CLR genomic reads
|
13
|
+
# * map-ont : Oxford Nanopore genomic reads
|
14
|
+
# * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)
|
15
|
+
# * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)
|
16
|
+
# * sr : short genomic paired-end reads
|
17
|
+
# * splice : spliced long reads (strand unknown)
|
18
|
+
# * splice:hq : Final PacBio Iso-seq or traditional cDNA
|
19
|
+
# * asm5 : intra-species asm-to-asm alignment
|
20
|
+
# * ava-pb : PacBio read overlap
|
21
|
+
# * ava-ont : Nanopore read overlap
|
22
|
+
# @param k [Integer] k-mer length, no larger than 28.
|
23
|
+
# @param w [Integer] minimizer window size, no larger than 255.
|
24
|
+
# @param min_cnt [Integer] mininum number of minimizers on a chain.
|
25
|
+
# @param min_chain_score [Integer] minimum chaing score.
|
26
|
+
# @param min_dp_score
|
27
|
+
# @param bw [Integer] chaining and alignment band width.
|
28
|
+
# @param best_n [Integer] max number of alignments to return.
|
29
|
+
# @param n_threads [Integer] number of indexing threads.
|
30
|
+
# @param fn_idx_out [String] name of file to which the index is written.
|
31
|
+
# This parameter has no effect if seq is set.
|
32
|
+
# @param max_frag_len [Integer]
|
33
|
+
# @param extra_flags [Integer] additional flags defined in minimap.h.
|
34
|
+
# @param scoring [Array] scoring system.
|
35
|
+
# It is a tuple/list consisting of 4, 6 or 7 positive integers.
|
36
|
+
# The first 4 elements specify match scoring, mismatch penalty, gap open and gap extension penalty.
|
37
|
+
# The 5th and 6th elements, if present, set long-gap open and long-gap extension penalty.
|
38
|
+
# The 7th sets a mismatch penalty involving ambiguous bases.
|
39
|
+
|
40
|
+
def initialize(
|
41
|
+
fn_idx_in = nil,
|
42
|
+
seq: nil,
|
43
|
+
preset: nil,
|
44
|
+
k: nil,
|
45
|
+
w: nil,
|
46
|
+
min_cnt: nil,
|
47
|
+
min_chain_score: nil,
|
48
|
+
min_dp_score: nil,
|
49
|
+
bw: nil,
|
50
|
+
best_n: nil,
|
51
|
+
n_threads: 3,
|
52
|
+
fn_idx_out: nil,
|
53
|
+
max_frag_len: nil,
|
54
|
+
extra_flags: nil,
|
55
|
+
scoring: nil
|
56
|
+
)
|
57
|
+
|
58
|
+
@idx_opt = FFI::IdxOpt.new
|
59
|
+
@map_opt = FFI::MapOpt.new
|
60
|
+
|
61
|
+
r = FFI.mm_set_opt(preset, idx_opt, map_opt)
|
62
|
+
raise ArgumentError, "Unknown preset name: #{preset}" if r == -1
|
63
|
+
|
64
|
+
# always perform alignment
|
65
|
+
map_opt[:flag] |= 4
|
66
|
+
idx_opt[:batch_size] = 0x7fffffffffffffff
|
67
|
+
|
68
|
+
# override preset options
|
69
|
+
idx_opt[:k] = k if k
|
70
|
+
idx_opt[:w] = w if w
|
71
|
+
map_opt[:min_cnt] = min_cnt if min_cnt
|
72
|
+
map_opt[:min_chain_score] = min_chain_score if min_chain_score
|
73
|
+
map_opt[:min_dp_max] = min_dp_score if min_dp_score
|
74
|
+
map_opt[:bw] = bw if bw
|
75
|
+
map_opt[:best_n] = best_n if best_n
|
76
|
+
map_opt[:max_frag_len] = max_frag_len if max_frag_len
|
77
|
+
map_opt[:flag] |= extra_flags if extra_flags
|
78
|
+
if scoring && scoring.size >= 4
|
79
|
+
map_opt[:a] = scoring[0]
|
80
|
+
map_opt[:b] = scoring[1]
|
81
|
+
map_opt[:q] = scoring[2]
|
82
|
+
map_opt[:e] = scoring[3]
|
83
|
+
map_opt[:q2] = map_opt.q
|
84
|
+
map_opt[:e2] = map_opt.e
|
85
|
+
if scoring.size >= 6
|
86
|
+
map_opt[:q2] = scoring[4]
|
87
|
+
map_opt[:e2] = scoring[5]
|
88
|
+
map_opt[:sc_ambi] = scoring[6] if scoring.size >= 7
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
if fn_idx_in
|
93
|
+
warn 'Since fn_idx_in is specified, the seq argument will be ignored.' if seq
|
94
|
+
reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
|
95
|
+
|
96
|
+
# The Ruby version raises an error here
|
97
|
+
raise "Cannot open : #{fn_idx_in}" if reader.null?
|
98
|
+
|
99
|
+
@index = FFI.mm_idx_reader_read(reader, n_threads)
|
100
|
+
FFI.mm_idx_reader_close(reader)
|
101
|
+
FFI.mm_mapopt_update(map_opt, index)
|
102
|
+
FFI.mm_idx_index_name(index)
|
103
|
+
elsif seq
|
104
|
+
@index = FFI.mappy_idx_seq(
|
105
|
+
idx_opt[:w], idx_opt[:k], idx_opt[:flag] & 1,
|
106
|
+
idx_opt[:bucket_bits], seq, seq.size
|
107
|
+
)
|
108
|
+
FFI.mm_mapopt_update(map_opt, index)
|
109
|
+
map_opt[:mid_occ] = 1000 # don't filter high-occ seeds
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Explicitly releases the memory of the index object.
|
114
|
+
|
115
|
+
def free_index
|
116
|
+
FFI.mm_idx_destroy(index) unless index.null?
|
117
|
+
end
|
118
|
+
|
119
|
+
# @param seq [String]
|
120
|
+
# @param seq2 [String]
|
121
|
+
# @param buf [FFI::TBuf]
|
122
|
+
# @param cs [true, false]
|
123
|
+
# @param md [true, false]
|
124
|
+
# @param max_frag_len [Integer]
|
125
|
+
# @param extra_flags [Integer]
|
126
|
+
# @note Name change: map -> align
|
127
|
+
# In the Ruby language, the name map means iterator.
|
128
|
+
# The original name is map, but here I use the method name align.
|
129
|
+
# @note The use of Enumerator is being considered. The method names may change again.
|
130
|
+
# @return [Array] alignments
|
131
|
+
|
132
|
+
def align(
|
133
|
+
seq, seq2 = nil,
|
134
|
+
buf: nil,
|
135
|
+
cs: false,
|
136
|
+
md: false,
|
137
|
+
max_frag_len: nil,
|
138
|
+
extra_flags: nil
|
139
|
+
)
|
140
|
+
|
141
|
+
return if index.null?
|
142
|
+
|
143
|
+
map_opt.max_frag_len = max_frag_len if max_frag_len
|
144
|
+
map_opt.flag |= extra_flags if extra_flags
|
145
|
+
|
146
|
+
buf ||= FFI::TBuf.new
|
147
|
+
km = FFI.mm_tbuf_get_km(buf)
|
148
|
+
|
149
|
+
n_regs_ptr = ::FFI::MemoryPointer.new :int
|
150
|
+
regs_ptr = FFI.mm_map_aux(index, seq, seq2, n_regs_ptr, buf, map_opt)
|
151
|
+
n_regs = n_regs_ptr.read_int
|
152
|
+
|
153
|
+
regs = Array.new(n_regs) do |i|
|
154
|
+
FFI::Reg1.new(regs_ptr + i * FFI::Reg1.size)
|
155
|
+
end
|
156
|
+
|
157
|
+
hit = FFI::Hit.new
|
158
|
+
|
159
|
+
cs_str = ::FFI::MemoryPointer.new(::FFI::MemoryPointer.new(:string))
|
160
|
+
m_cs_str = ::FFI::MemoryPointer.new :int
|
161
|
+
|
162
|
+
alignments = []
|
163
|
+
|
164
|
+
i = 0
|
165
|
+
begin
|
166
|
+
while i < n_regs
|
167
|
+
FFI.mm_reg2hitpy(index, regs[i], hit)
|
168
|
+
|
169
|
+
c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
|
170
|
+
cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
|
171
|
+
|
172
|
+
_cs = ''
|
173
|
+
if cs
|
174
|
+
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
|
175
|
+
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
176
|
+
end
|
177
|
+
|
178
|
+
_md = ''
|
179
|
+
if md
|
180
|
+
l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
|
181
|
+
_md = cs_str.read_pointer.read_string(l_cs_str)
|
182
|
+
end
|
183
|
+
|
184
|
+
alignments << Alignment.new(hit, cigar, _cs, _md)
|
185
|
+
|
186
|
+
FFI.mm_free_reg1(regs[i])
|
187
|
+
i += 1
|
188
|
+
end
|
189
|
+
ensure
|
190
|
+
while i < n_regs
|
191
|
+
FFI.mm_free_reg1(regs[i])
|
192
|
+
i += 1
|
193
|
+
end
|
194
|
+
end
|
195
|
+
alignments
|
196
|
+
end
|
197
|
+
|
198
|
+
# Retrieve a subsequence from the index.
|
199
|
+
# @param name
|
200
|
+
# @param start
|
201
|
+
# @param stop
|
202
|
+
|
203
|
+
def seq(name, start = 0, stop = 0x7fffffff)
|
204
|
+
lp = ::FFI::MemoryPointer.new(:int)
|
205
|
+
s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
|
206
|
+
l = lp.read_int
|
207
|
+
return nil if l.zero?
|
208
|
+
|
209
|
+
s.read_string(l)
|
210
|
+
end
|
211
|
+
|
212
|
+
# k-mer length, no larger than 28
|
213
|
+
|
214
|
+
def k
|
215
|
+
index[:k]
|
216
|
+
end
|
217
|
+
|
218
|
+
# minimizer window size, no larger than 255
|
219
|
+
|
220
|
+
def w
|
221
|
+
index[:w]
|
222
|
+
end
|
223
|
+
|
224
|
+
def n_seq
|
225
|
+
index[:n_seq]
|
226
|
+
end
|
227
|
+
|
228
|
+
def seq_names
|
229
|
+
ptr = index[:seq].to_ptr
|
230
|
+
Array.new(index[:n_seq]) do |i|
|
231
|
+
FFI::IdxSeq.new(ptr + i * FFI::IdxSeq.size)[:name]
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
# Alignment result.
|
5
|
+
#
|
6
|
+
# @!attribute ctg
|
7
|
+
# @return [String] name of the reference sequence the query is mapped to.
|
8
|
+
# @!attribute ctg_len
|
9
|
+
# @return [Integer] total length of the reference sequence.
|
10
|
+
# @!attribute r_st
|
11
|
+
# @return [Integer] start positions on the reference.
|
12
|
+
# @!attribute r_en
|
13
|
+
# @return [Integer] end positions on the reference.
|
14
|
+
# @!attribute strand
|
15
|
+
# @return [Integer] +1 if on the forward strand; -1 if on the reverse strand.
|
16
|
+
# @!attribute trans_strand
|
17
|
+
# @return [Integer] transcript strand.
|
18
|
+
# +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown.
|
19
|
+
# @!attribute blen
|
20
|
+
# @return [Integer] length of the alignment, including both alignment matches and gaps
|
21
|
+
# but excluding ambiguous bases.
|
22
|
+
# @!attribute mlen
|
23
|
+
# @return [Integer] length of the matching bases in the alignment,
|
24
|
+
# excluding ambiguous base matches.
|
25
|
+
# @!attribute nm
|
26
|
+
# @return [Integer] number of mismatches, gaps and ambiguous poistions in the alignment.
|
27
|
+
# @!attribute primary
|
28
|
+
# @return [Integer] if the alignment is primary (typically the best and the first to generate)
|
29
|
+
# @!attribute q_st
|
30
|
+
# @return [Integer] start positions on the query.
|
31
|
+
# @!attribute q_en
|
32
|
+
# @return [Integer] end positions on the query.
|
33
|
+
# @!attribute mapq
|
34
|
+
# @return [Integer] mapping quality.
|
35
|
+
# @!attribute cigar
|
36
|
+
# @return [Array] CIGAR returned as an array of shape (n_cigar,2).
|
37
|
+
# The two numbers give the length and the operator of each CIGAR operation.
|
38
|
+
# @!attribute read_num
|
39
|
+
# @return [Integer] read number that the alignment corresponds to;
|
40
|
+
# 1 for the first read and 2 for the second read.
|
41
|
+
# @!attribute cs
|
42
|
+
# @return [String] the cs tag.
|
43
|
+
# @!attribute md
|
44
|
+
# @return [String] the MD tag as in the SAM format.
|
45
|
+
# It is an empty string unless the md argument is applied when calling Aligner#align.
|
46
|
+
# @!attribute cigar_str
|
47
|
+
# @return [String] CIGAR string.
|
48
|
+
|
49
|
+
class Alignment
|
50
|
+
def self.keys
|
51
|
+
%i[ctg ctg_len r_st r_en strand trans_strand blen mlen nm primary
|
52
|
+
q_st q_en mapq cigar read_num cs md cigar_str]
|
53
|
+
end
|
54
|
+
|
55
|
+
attr_reader(*keys)
|
56
|
+
|
57
|
+
def initialize(h, cigar, cs = nil, md = nil)
|
58
|
+
@ctg = h[:ctg]
|
59
|
+
@ctg_len = h[:ctg_len]
|
60
|
+
@r_st = h[:ctg_start]
|
61
|
+
@r_en = h[:ctg_end]
|
62
|
+
@strand = h[:strand]
|
63
|
+
@trans_strand = h[:trans_strand]
|
64
|
+
@blen = h[:blen]
|
65
|
+
@mlen = h[:mlen]
|
66
|
+
@nm = h[:NM]
|
67
|
+
@primary = h[:is_primary]
|
68
|
+
@q_st = h[:qry_start]
|
69
|
+
@q_en = h[:qry_end]
|
70
|
+
@mapq = h[:mapq]
|
71
|
+
@cigar = cigar
|
72
|
+
@read_num = h[:seg_id] + 1
|
73
|
+
@cs = cs
|
74
|
+
@md = md
|
75
|
+
|
76
|
+
@cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
|
77
|
+
end
|
78
|
+
|
79
|
+
def primary?
|
80
|
+
@primary == 1
|
81
|
+
end
|
82
|
+
|
83
|
+
# Convert Alignment to hash.
|
84
|
+
|
85
|
+
def to_h
|
86
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
87
|
+
end
|
88
|
+
|
89
|
+
# Convert to the PAF format without the QueryName and QueryLength columns.
|
90
|
+
|
91
|
+
def to_s
|
92
|
+
strand = if @strand.positive?
|
93
|
+
'+'
|
94
|
+
elsif @strand.negative?
|
95
|
+
'-'
|
96
|
+
else
|
97
|
+
'?'
|
98
|
+
end
|
99
|
+
tp = @primary != 0 ? 'tp:A:P' : 'tp:A:S'
|
100
|
+
ts = if @trans_strand.positive?
|
101
|
+
'ts:A:+'
|
102
|
+
elsif @trans_strand.negative?
|
103
|
+
'ts:A:-'
|
104
|
+
else
|
105
|
+
'ts:A:.'
|
106
|
+
end
|
107
|
+
a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
|
108
|
+
@mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
|
109
|
+
a << "cs:Z:#{@cs}" if @cs
|
110
|
+
a.join("\t")
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/minimap2/ffi.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# bit fields
|
4
|
+
require_relative 'ffi_helper'
|
5
|
+
|
6
|
+
module Minimap2
|
7
|
+
# Native APIs
|
8
|
+
module FFI
|
9
|
+
extend ::FFI::Library
|
10
|
+
begin
|
11
|
+
ffi_lib Minimap2.ffi_lib
|
12
|
+
rescue LoadError => e
|
13
|
+
raise LoadError, "Could not find #{Minimap2.ffi_lib} \n#{e}"
|
14
|
+
end
|
15
|
+
|
16
|
+
# Continue even if some functions are not found.
|
17
|
+
def self.attach_function(*)
|
18
|
+
super
|
19
|
+
rescue ::FFI::NotFoundError => e
|
20
|
+
warn e.message
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
require_relative 'ffi/constants'
|
26
|
+
require_relative 'ffi/functions'
|
27
|
+
require_relative 'ffi/mappy'
|
@@ -0,0 +1,231 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
module FFI
|
5
|
+
# flags
|
6
|
+
NO_DIAG = 0x001 # no exact diagonal hit
|
7
|
+
NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
|
8
|
+
CIGAR = 0x004
|
9
|
+
OUT_SAM = 0x008
|
10
|
+
NO_QUAL = 0x010
|
11
|
+
OUT_CG = 0x020
|
12
|
+
OUT_CS = 0x040
|
13
|
+
SPLICE = 0x080 # splice mode
|
14
|
+
SPLICE_FOR = 0x100 # match GT-AG
|
15
|
+
SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
|
16
|
+
NO_LJOIN = 0x400
|
17
|
+
OUT_CS_LONG = 0x800
|
18
|
+
SR = 0x1000
|
19
|
+
FRAG_MODE = 0x2000
|
20
|
+
NO_PRINT_2ND = 0x4000
|
21
|
+
TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
|
22
|
+
LONG_CIGAR = 0x10000
|
23
|
+
INDEPEND_SEG = 0x20000
|
24
|
+
SPLICE_FLANK = 0x40000
|
25
|
+
SOFTCLIP = 0x80000
|
26
|
+
FOR_ONLY = 0x100000
|
27
|
+
REV_ONLY = 0x200000
|
28
|
+
HEAP_SORT = 0x400000
|
29
|
+
ALL_CHAINS = 0x800000
|
30
|
+
OUT_MD = 0x1000000
|
31
|
+
COPY_COMMENT = 0x2000000
|
32
|
+
EQX = 0x4000000 # use =/X instead of M
|
33
|
+
PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
|
34
|
+
NO_END_FLT = 0x10000000
|
35
|
+
HARD_MLEVEL = 0x20000000
|
36
|
+
SAM_HIT_ONLY = 0x40000000
|
37
|
+
RMQ = 0x80000000 # LL
|
38
|
+
|
39
|
+
HPC = 0x1
|
40
|
+
NO_SEQ = 0x2
|
41
|
+
NO_NAME = 0x4
|
42
|
+
|
43
|
+
IDX_MAGIC = "MMI\2"
|
44
|
+
|
45
|
+
MAX_SEG = 255
|
46
|
+
|
47
|
+
CIGAR_STR = 'MIDNSHP=XB'
|
48
|
+
|
49
|
+
# emulate 128-bit integers
|
50
|
+
class MM128 < ::FFI::Struct
|
51
|
+
layout \
|
52
|
+
:x, :uint64_t,
|
53
|
+
:y, :uint64_t
|
54
|
+
end
|
55
|
+
|
56
|
+
# emulate 128-bit arrays
|
57
|
+
class MM128V < ::FFI::Struct
|
58
|
+
layout \
|
59
|
+
:n, :size_t,
|
60
|
+
:m, :size_t,
|
61
|
+
:a, MM128.ptr
|
62
|
+
end
|
63
|
+
|
64
|
+
# indexing option
|
65
|
+
class IdxOpt < ::FFI::Struct
|
66
|
+
layout \
|
67
|
+
:k, :short,
|
68
|
+
:w, :short,
|
69
|
+
:flag, :short,
|
70
|
+
:bucket_bits, :short,
|
71
|
+
:mini_batch_size, :int64_t,
|
72
|
+
:batch_size, :uint64_t
|
73
|
+
end
|
74
|
+
|
75
|
+
# mapping option
|
76
|
+
class MapOpt < ::FFI::Struct
|
77
|
+
layout \
|
78
|
+
:flag, :int64_t, # see MM_F_* macros
|
79
|
+
:seed, :int,
|
80
|
+
:sdust_thres, :int, # score threshold for SDUST; 0 to disable
|
81
|
+
:max_qlen, :int, # max query length
|
82
|
+
:bw, :int, # bandwidth
|
83
|
+
:bw_long, :int,
|
84
|
+
:max_gap, :int, # break a chain if there are no minimizers in a max_gap window
|
85
|
+
:max_gap_ref, :int,
|
86
|
+
:max_frag_len, :int,
|
87
|
+
:max_chain_skip, :int,
|
88
|
+
:max_chain_iter, :int,
|
89
|
+
:min_cnt, :int, # min number of minimizers on each chain
|
90
|
+
:min_chain_score, :int, # min chaining score
|
91
|
+
:chain_gap_scale, :float,
|
92
|
+
:rmq_size_cap, :int,
|
93
|
+
:rmq_inner_dist, :int,
|
94
|
+
:rmq_rescue_size, :int,
|
95
|
+
:rmq_rescue_ratio, :float,
|
96
|
+
:mask_level, :float,
|
97
|
+
:mask_len, :int,
|
98
|
+
:pri_ratio, :float,
|
99
|
+
:best_n, :int, # top best_n chains are subjected to DP alignment
|
100
|
+
:alt_drop, :float,
|
101
|
+
:a, :int, # matching score
|
102
|
+
:b, :int, # mismatch
|
103
|
+
:q, :int, # gap-open
|
104
|
+
:e, :int, # gap-ext
|
105
|
+
:q2, :int, # gap-open
|
106
|
+
:e2, :int, # gap-ext
|
107
|
+
:sc_ambi, :int, # score when one or both bases are "N"
|
108
|
+
:noncan, :int, # cost of non-canonical splicing sites
|
109
|
+
:junc_bonus, :int,
|
110
|
+
:zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
|
111
|
+
:zdrop_inv, :int,
|
112
|
+
:end_bonus, :int,
|
113
|
+
:min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold
|
114
|
+
:min_ksw_len, :int,
|
115
|
+
:anchor_ext_len, :int,
|
116
|
+
:anchor_ext_shift, :int,
|
117
|
+
:max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
|
118
|
+
:pe_ori, :int,
|
119
|
+
:pe_bonus, :int,
|
120
|
+
:mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
|
121
|
+
:min_mid_occ, :int32_t,
|
122
|
+
:mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
|
123
|
+
:max_occ, :int32_t,
|
124
|
+
:mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
|
125
|
+
:max_sw_mat, :int64_t,
|
126
|
+
:split_prefix, :string
|
127
|
+
end
|
128
|
+
|
129
|
+
# minimap2 index
|
130
|
+
class IdxSeq < ::FFI::Struct
|
131
|
+
layout \
|
132
|
+
:name, :string, # name of the db sequence
|
133
|
+
:offset, :uint64_t, # offset in mm_idx_t::S
|
134
|
+
:len, :uint32_t, # length
|
135
|
+
:is_alt, :uint32_t
|
136
|
+
end
|
137
|
+
|
138
|
+
class Idx < ::FFI::Struct
|
139
|
+
layout \
|
140
|
+
:b, :int32_t,
|
141
|
+
:w, :int32_t,
|
142
|
+
:k, :int32_t,
|
143
|
+
:flag, :int32_t,
|
144
|
+
:n_seq, :uint32_t, # number of reference sequences
|
145
|
+
:index, :int32_t,
|
146
|
+
:n_alt, :int32_t,
|
147
|
+
:seq, IdxSeq.ptr, # sequence name, length and offset
|
148
|
+
:S, :pointer, # 4-bit packed sequence
|
149
|
+
:B, :pointer, # index (hidden)
|
150
|
+
:I, :pointer, # intervals (hidden)
|
151
|
+
:km, :pointer,
|
152
|
+
:h, :pointer
|
153
|
+
end
|
154
|
+
|
155
|
+
# index reader
|
156
|
+
class IdxReader < ::FFI::Struct
|
157
|
+
layout \
|
158
|
+
:is_idx, :int,
|
159
|
+
:n_parts, :int,
|
160
|
+
:idx_size, :int64_t,
|
161
|
+
:opt, IdxOpt,
|
162
|
+
:fp_out, :pointer, # FILE
|
163
|
+
:seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE
|
164
|
+
end
|
165
|
+
|
166
|
+
# minimap2 alignment
|
167
|
+
class Extra < ::FFI::BitStruct
|
168
|
+
layout \
|
169
|
+
:capacity, :uint32, # the capacity of cigar[]
|
170
|
+
:dp_score, :int32, # DP score
|
171
|
+
:dp_max, :int32, # score of the max-scoring segment
|
172
|
+
:dp_max2, :int32, # score of the best alternate mappings
|
173
|
+
:n_ambi_trans_strand, :uint32,
|
174
|
+
:n_cigar, :uint32
|
175
|
+
|
176
|
+
bitfields :n_ambi_trans_strand,
|
177
|
+
:n_ambi, 30, # number of ambiguous bases
|
178
|
+
:trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
|
179
|
+
|
180
|
+
# variable length array
|
181
|
+
def cigar
|
182
|
+
pointer.get_array_of_uint32(size, self[:n_cigar])
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
class Reg1 < ::FFI::BitStruct
|
187
|
+
layout \
|
188
|
+
:id, :int32_t, # ID for internal uses (see also parent below)
|
189
|
+
:cnt, :int32_t, # number of minimizers; if on the reverse strand
|
190
|
+
:rid, :int32_t, # reference index; if this is an alignment from inversion rescue
|
191
|
+
:score, :int32_t, # DP alignment score
|
192
|
+
:qs, :int32_t, # query start
|
193
|
+
:qe, :int32_t, # query end
|
194
|
+
:rs, :int32_t, # reference start
|
195
|
+
:re, :int32_t, # reference end
|
196
|
+
:parent, :int32_t, # parent==id if primary
|
197
|
+
:subsc, :int32_t, # best alternate mapping score
|
198
|
+
:as, :int32_t, # offset in the a[] array (for internal uses only)
|
199
|
+
:mlen, :int32_t, # seeded exact match length
|
200
|
+
:blen, :int32_t, # seeded alignment block length
|
201
|
+
:n_sub, :int32_t, # number of suboptimal mappings
|
202
|
+
:score0, :int32_t, # initial chaining score (before chain merging/spliting)
|
203
|
+
:fields, :uint32_t,
|
204
|
+
:hash, :uint32_t,
|
205
|
+
:div, :float,
|
206
|
+
:p, Extra.ptr
|
207
|
+
|
208
|
+
bitfields :fields,
|
209
|
+
:mapq, 8,
|
210
|
+
:split, 2,
|
211
|
+
:rev, 1,
|
212
|
+
:inv, 1,
|
213
|
+
:sam_pri, 1,
|
214
|
+
:proper_frag, 1,
|
215
|
+
:pe_thru, 1,
|
216
|
+
:seg_split, 1,
|
217
|
+
:seg_id, 8,
|
218
|
+
:split_inv, 1,
|
219
|
+
:is_alt, 1,
|
220
|
+
:dummy, 6
|
221
|
+
end
|
222
|
+
|
223
|
+
# memory buffer for thread-local storage during mapping
|
224
|
+
class TBuf < ::FFI::Struct
|
225
|
+
layout \
|
226
|
+
:km, :pointer,
|
227
|
+
:rep_len, :int,
|
228
|
+
:frag_gap, :int
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Minimap2
|
4
|
+
module FFI
|
5
|
+
attach_function \
|
6
|
+
:mm_set_opt_raw, :mm_set_opt,
|
7
|
+
[:pointer, IdxOpt.by_ref, MapOpt.by_ref],
|
8
|
+
:int
|
9
|
+
|
10
|
+
private_class_method :mm_set_opt_raw
|
11
|
+
|
12
|
+
def self.mm_set_opt(preset, io, mo)
|
13
|
+
ptr = if preset
|
14
|
+
::FFI::MemoryPointer.from_string(preset.to_s)
|
15
|
+
else
|
16
|
+
::FFI::Pointer.new(:int, 0)
|
17
|
+
end
|
18
|
+
mm_set_opt_raw(ptr, io, mo)
|
19
|
+
end
|
20
|
+
|
21
|
+
attach_function \
|
22
|
+
:mm_idx_reader_open,
|
23
|
+
[:string, IdxOpt.by_ref, :string],
|
24
|
+
IdxReader.by_ref
|
25
|
+
|
26
|
+
attach_function \
|
27
|
+
:mm_idx_reader_read,
|
28
|
+
[IdxReader.by_ref, :int],
|
29
|
+
Idx.by_ref
|
30
|
+
|
31
|
+
attach_function \
|
32
|
+
:mm_idx_reader_close,
|
33
|
+
[IdxReader.by_ref],
|
34
|
+
:void
|
35
|
+
|
36
|
+
attach_function \
|
37
|
+
:mm_idx_destroy,
|
38
|
+
[Idx.by_ref],
|
39
|
+
:void
|
40
|
+
|
41
|
+
attach_function \
|
42
|
+
:mm_mapopt_update,
|
43
|
+
[MapOpt.by_ref, Idx.by_ref],
|
44
|
+
:void
|
45
|
+
|
46
|
+
attach_function \
|
47
|
+
:mm_idx_index_name,
|
48
|
+
[Idx.by_ref],
|
49
|
+
:int
|
50
|
+
|
51
|
+
attach_function \
|
52
|
+
:mm_tbuf_init,
|
53
|
+
[],
|
54
|
+
TBuf.by_ref
|
55
|
+
|
56
|
+
attach_function \
|
57
|
+
:mm_tbuf_destroy,
|
58
|
+
[TBuf.by_ref],
|
59
|
+
:void
|
60
|
+
|
61
|
+
attach_function \
|
62
|
+
:mm_tbuf_get_km,
|
63
|
+
[TBuf.by_ref],
|
64
|
+
:pointer
|
65
|
+
|
66
|
+
attach_function \
|
67
|
+
:mm_gen_cs,
|
68
|
+
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
|
69
|
+
:int
|
70
|
+
|
71
|
+
attach_function \
|
72
|
+
:mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
|
73
|
+
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string],
|
74
|
+
:int
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://github.com/lh3/minimap2/blob/master/python/cmappy.h
|
4
|
+
|
5
|
+
module Minimap2
|
6
|
+
module FFI
|
7
|
+
class Hit < ::FFI::Struct
|
8
|
+
layout \
|
9
|
+
:ctg, :string,
|
10
|
+
:ctg_start, :int32_t,
|
11
|
+
:ctg_end, :int32_t,
|
12
|
+
:qry_start, :int32_t,
|
13
|
+
:qry_end, :int32_t,
|
14
|
+
:blen, :int32_t,
|
15
|
+
:mlen, :int32_t,
|
16
|
+
:NM, :int32_t,
|
17
|
+
:ctg_len, :int32_t,
|
18
|
+
:mapq, :uint8_t,
|
19
|
+
:is_primary, :uint8_t,
|
20
|
+
:strand, :int8_t,
|
21
|
+
:trans_strand, :int8_t,
|
22
|
+
:seg_id, :int32_t,
|
23
|
+
:n_cigar32, :int32_t,
|
24
|
+
:cigar32, :pointer
|
25
|
+
end
|
26
|
+
|
27
|
+
class KString < ::FFI::Struct
|
28
|
+
layout \
|
29
|
+
:l, :size_t,
|
30
|
+
:m, :size_t,
|
31
|
+
:s, :string
|
32
|
+
end
|
33
|
+
|
34
|
+
class KSeq < ::FFI::Struct
|
35
|
+
layout \
|
36
|
+
:name, KString,
|
37
|
+
:comment, KString,
|
38
|
+
:seq, KString,
|
39
|
+
:qual, KString,
|
40
|
+
:last_char, :int,
|
41
|
+
:f, :pointer # KStream
|
42
|
+
end
|
43
|
+
|
44
|
+
attach_function \
|
45
|
+
:mm_reg2hitpy,
|
46
|
+
[Idx.by_ref, Reg1.by_ref, Hit.by_ref],
|
47
|
+
:void
|
48
|
+
|
49
|
+
attach_function \
|
50
|
+
:mm_free_reg1,
|
51
|
+
[Reg1.by_ref],
|
52
|
+
:void
|
53
|
+
|
54
|
+
attach_function \
|
55
|
+
:mm_fastx_open,
|
56
|
+
[:string],
|
57
|
+
KSeq.by_ref
|
58
|
+
|
59
|
+
attach_function \
|
60
|
+
:mm_fastx_close,
|
61
|
+
[KSeq.by_ref],
|
62
|
+
:void
|
63
|
+
|
64
|
+
attach_function \
|
65
|
+
:mm_verbose_level,
|
66
|
+
[:int],
|
67
|
+
:int
|
68
|
+
|
69
|
+
attach_function \
|
70
|
+
:mm_reset_timer,
|
71
|
+
[:void],
|
72
|
+
:void
|
73
|
+
|
74
|
+
attach_function \
|
75
|
+
:mm_map_aux,
|
76
|
+
[Idx.by_ref, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref],
|
77
|
+
:pointer # Reg1
|
78
|
+
|
79
|
+
attach_function \
|
80
|
+
:mappy_revcomp,
|
81
|
+
%i[int pointer],
|
82
|
+
:string
|
83
|
+
|
84
|
+
attach_function \
|
85
|
+
:mappy_fetch_seq,
|
86
|
+
[Idx.by_ref, :string, :int, :int, :pointer],
|
87
|
+
:pointer # Use pointer instead of string to read with a specified length
|
88
|
+
|
89
|
+
attach_function \
|
90
|
+
:mappy_idx_seq,
|
91
|
+
%i[int int int int string int],
|
92
|
+
Idx.by_ref
|
93
|
+
|
94
|
+
attach_function \
|
95
|
+
:kseq_read,
|
96
|
+
[KSeq.by_ref],
|
97
|
+
:int
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ffi'
|
4
|
+
|
5
|
+
module FFI
|
6
|
+
class BitStruct < Struct
|
7
|
+
class << self
|
8
|
+
# def union_layout(*args)
|
9
|
+
# Class.new(FFI::Union) { layout(*args) }
|
10
|
+
# end
|
11
|
+
|
12
|
+
# def struct_layout(*args)
|
13
|
+
# Class.new(FFI::Struct) { layout(*args) }
|
14
|
+
# end
|
15
|
+
|
16
|
+
module BitFieldsModule
|
17
|
+
def [](name)
|
18
|
+
bit_fields = self.class.bit_fields_map
|
19
|
+
parent, start, width = bit_fields[name]
|
20
|
+
if parent
|
21
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
22
|
+
else
|
23
|
+
super(name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
private_constant :BitFieldsModule
|
28
|
+
|
29
|
+
attr_reader :bit_fields_map
|
30
|
+
|
31
|
+
def bitfields(*args)
|
32
|
+
unless instance_variable_defined?(:@bit_fields)
|
33
|
+
@bit_fields_map = {}
|
34
|
+
prepend BitFieldsModule
|
35
|
+
end
|
36
|
+
|
37
|
+
parent = args.shift
|
38
|
+
labels = []
|
39
|
+
widths = []
|
40
|
+
args.each_slice(2) do |l, w|
|
41
|
+
labels << l
|
42
|
+
widths << w
|
43
|
+
end
|
44
|
+
starts = widths.inject([0]) do |result, w|
|
45
|
+
result << (result.last + w)
|
46
|
+
end
|
47
|
+
labels.zip(starts, widths).each do |l, s, w|
|
48
|
+
@bit_fields_map[l] = [parent, s, w]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minimap2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +80,20 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tty-command
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
description: minimap2
|
70
98
|
email:
|
71
99
|
- 2xijok@gmail.com
|
@@ -75,8 +103,16 @@ extra_rdoc_files: []
|
|
75
103
|
files:
|
76
104
|
- LICENSE.txt
|
77
105
|
- README.md
|
78
|
-
- lib/minimap/version.rb
|
79
106
|
- lib/minimap2.rb
|
107
|
+
- lib/minimap2/aligner.rb
|
108
|
+
- lib/minimap2/alignment.rb
|
109
|
+
- lib/minimap2/ffi.rb
|
110
|
+
- lib/minimap2/ffi/constants.rb
|
111
|
+
- lib/minimap2/ffi/functions.rb
|
112
|
+
- lib/minimap2/ffi/mappy.rb
|
113
|
+
- lib/minimap2/ffi_helper.rb
|
114
|
+
- lib/minimap2/version.rb
|
115
|
+
- vendor/libminimap2.so
|
80
116
|
homepage: https://github.com/kojix2/ruby-minimap2
|
81
117
|
licenses:
|
82
118
|
- MIT
|
@@ -96,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
132
|
- !ruby/object:Gem::Version
|
97
133
|
version: '0'
|
98
134
|
requirements: []
|
99
|
-
rubygems_version: 3.
|
135
|
+
rubygems_version: 3.2.15
|
100
136
|
signing_key:
|
101
137
|
specification_version: 4
|
102
138
|
summary: minimap2
|