libssw 0.0.1.pre → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -2
- data/exe/rbssw +3 -0
- data/lib/libssw.rb +141 -68
- data/lib/libssw/BLOSUM50.rb +25 -25
- data/lib/libssw/BLOSUM62.rb +30 -0
- data/lib/libssw/align.rb +60 -0
- data/lib/libssw/ffi.rb +2 -8
- data/lib/libssw/profile.rb +41 -0
- data/lib/libssw/struct_helper.rb +13 -0
- data/lib/libssw/version.rb +1 -1
- metadata +22 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c888d3c126247f4f397fc151402fd482cdc4c105bd00163d07cc056b0e202607
|
4
|
+
data.tar.gz: 8717cde76e5ba26034b05c81871b43730580b09dfb610c9507b0c651e6b816a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d10a0cc734b8c53bf506c97ade223269ad6a64ed356f81d08eb9368e031b068ed85c217d3e47a737a303604b57009a00c0aab94a6eea89a62382cf79dbbee326
|
7
|
+
data.tar.gz: f18a9eddc8455e13a58058294d80f52ed96b31949e90c25494fd48ac7d3f22df4588e86df389160fca973489db36381bdabb67a2858ceeb811e9a8ddba3249cb
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# ruby-libssw
|
2
2
|
|
3
|
-
[
|
3
|
+
![test](https://github.com/kojix2/ruby-libssw/workflows/CI/badge.svg)
|
4
|
+
|
5
|
+
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
6
|
+
|
7
|
+
:construction: Under development.
|
4
8
|
|
5
9
|
## Installation
|
6
10
|
|
@@ -8,11 +12,39 @@
|
|
8
12
|
gem install libssw
|
9
13
|
```
|
10
14
|
|
15
|
+
Set the environment variable `LIBSSWDIR` to specify the location of the shared library.
|
16
|
+
For example, on Ubuntu, you can use libssw in the following way.
|
17
|
+
|
18
|
+
```
|
19
|
+
sudo apt install libssw-dev
|
20
|
+
export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
|
21
|
+
```
|
22
|
+
|
23
|
+
### Installing from source
|
24
|
+
|
25
|
+
When installing from source code using the following steps, the shared library `libssw.so` will be packed in the Ruby gem. In this case, the environment variable LIBSSWDIR is not required. (Only tested on Ubuntu)
|
26
|
+
|
27
|
+
```sh
|
28
|
+
git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
|
29
|
+
bundle exec rake libssw:compile
|
30
|
+
bundle exec rake install
|
31
|
+
```
|
32
|
+
|
11
33
|
## Usage
|
12
34
|
|
13
35
|
```ruby
|
14
36
|
require 'libssw'
|
15
|
-
|
37
|
+
|
38
|
+
ref = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
39
|
+
read = [0, 1, 2, 3, 3]
|
40
|
+
mat = [2, -2, -2, -2, 0,
|
41
|
+
-2, 2, -2, -2, 0,
|
42
|
+
-2, -2, 2, -2, 0,
|
43
|
+
-2, -2, -2, 2, 0,
|
44
|
+
0, 0, 0, 0, 0]
|
45
|
+
profile = LibSSW.ssw_init(read, mat)
|
46
|
+
align = LibSSW.ssw_align(profile, ref, 3, 1, 1, 0, 0, 15)
|
47
|
+
p align.to_h
|
16
48
|
```
|
17
49
|
|
18
50
|
## Development
|
data/exe/rbssw
CHANGED
data/lib/libssw.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'libssw/version'
|
4
|
+
require_relative 'libssw/BLOSUM50'
|
5
|
+
require_relative 'libssw/BLOSUM62'
|
4
6
|
|
5
7
|
module LibSSW
|
6
8
|
class Error < StandardError; end
|
@@ -19,102 +21,173 @@ module LibSSW
|
|
19
21
|
end
|
20
22
|
|
21
23
|
self.ffi_lib = if ENV['LIBSSWDIR'] && !ENV['LIBSSWDIR'].empty?
|
22
|
-
File.expand_path(lib_name, ENV['
|
24
|
+
File.expand_path(lib_name, ENV['LIBSSWDIR'])
|
23
25
|
else
|
24
26
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
25
27
|
end
|
26
28
|
|
27
29
|
require_relative 'libssw/ffi'
|
28
|
-
|
29
|
-
|
30
|
-
def cigar
|
31
|
-
pt = super
|
32
|
-
return [] if cigar_len.zero?
|
33
|
-
|
34
|
-
pt[0, 4 * cigar_len].unpack('L*')
|
35
|
-
end
|
36
|
-
|
37
|
-
def cigar_len
|
38
|
-
cigarLen
|
39
|
-
end
|
40
|
-
|
41
|
-
def to_h
|
42
|
-
h = {}
|
43
|
-
%i[score1
|
44
|
-
score2
|
45
|
-
ref_begin1
|
46
|
-
ref_end1
|
47
|
-
read_begin1
|
48
|
-
read_end1
|
49
|
-
ref_end2
|
50
|
-
cigar
|
51
|
-
cigar_len].each do |k|
|
52
|
-
h[k] = __send__(k)
|
53
|
-
end
|
54
|
-
h
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
class Profile < FFI::Profile
|
59
|
-
def read
|
60
|
-
pt = super
|
61
|
-
return [] if read_len.zero?
|
62
|
-
|
63
|
-
pt[0, read_len].unpack('c*')
|
64
|
-
end
|
65
|
-
|
66
|
-
def mat
|
67
|
-
pt = super
|
68
|
-
pt[0, n * n].unpack('c*')
|
69
|
-
end
|
70
|
-
|
71
|
-
def read_len
|
72
|
-
readLen
|
73
|
-
end
|
74
|
-
|
75
|
-
def to_h
|
76
|
-
h = {}
|
77
|
-
%i[byte
|
78
|
-
word
|
79
|
-
read
|
80
|
-
mat
|
81
|
-
read_len
|
82
|
-
n
|
83
|
-
bias].each do |k|
|
84
|
-
h[k] = __send__(k)
|
85
|
-
end
|
86
|
-
h
|
87
|
-
end
|
88
|
-
end
|
30
|
+
require_relative 'libssw/profile'
|
31
|
+
require_relative 'libssw/align'
|
89
32
|
|
90
33
|
class << self
|
91
|
-
|
34
|
+
# Create the query profile using the query sequence.
|
35
|
+
# @param read [Array] query sequence; the query sequence needs to be numbers
|
36
|
+
# @param mat [Array] substitution matrix; mat needs to be corresponding to the read sequence
|
37
|
+
# @param n [Integer] the square root of the number of elements in mat (mat has n*n elements)
|
38
|
+
# If you omit this argument, the square root of the size of mat will be set.
|
39
|
+
# @param score_size [Integer]
|
40
|
+
# estimated Smith-Waterman score;
|
41
|
+
# * if your estimated best alignment score is surely < 255 please set 0;
|
42
|
+
# * if your estimated best alignment score >= 255, please set 1;
|
43
|
+
# * if you don't know, please set 2
|
44
|
+
def ssw_init(read, mat, n = nil, score_size: 2)
|
45
|
+
read_str = read.pack('c*')
|
46
|
+
read_len = read.size
|
47
|
+
mat = mat.to_a.flatten
|
48
|
+
n = Math.sqrt(mat.size) if n.nil?
|
49
|
+
raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
|
50
|
+
|
51
|
+
mat_str = mat.flatten.pack('c*')
|
92
52
|
ptr = FFI.ssw_init(
|
93
|
-
|
53
|
+
read_str,
|
54
|
+
read_len,
|
55
|
+
mat_str,
|
56
|
+
n,
|
57
|
+
score_size
|
94
58
|
)
|
95
|
-
|
59
|
+
profile = LibSSW::Profile.new(ptr)
|
60
|
+
# Check Garbage Collection
|
61
|
+
%i[read read_len mat n].zip([read, read_len, mat, n]).each do |name, obj|
|
62
|
+
next unless profile.public_send(name) != obj
|
63
|
+
|
64
|
+
warn "[Error] Struct member: '#{name}'"
|
65
|
+
warn " * expected value: #{obj}"
|
66
|
+
warn " * actual value: #{profile.public_send(name)}"
|
67
|
+
warn " This may have been caused by Ruby'S GC."
|
68
|
+
end
|
69
|
+
# Preventing Garbage Collection --force
|
70
|
+
cstruct = profile.cstruct
|
71
|
+
cstruct.read = read_str
|
72
|
+
cstruct.mat = mat_str
|
73
|
+
cstruct.readLen = read_len
|
74
|
+
cstruct.n = n
|
75
|
+
ptr.instance_variable_set(:@read_str, read_str)
|
76
|
+
ptr.instance_variable_set(:@read_len, read_len)
|
77
|
+
ptr.instance_variable_set(:@mat_str, mat_str)
|
78
|
+
ptr.instance_variable_set(:@n, n)
|
79
|
+
profile
|
96
80
|
end
|
97
81
|
|
82
|
+
# Release the memory allocated by function ssw_init.
|
83
|
+
# @param p [Fiddle::Pointer, LibSSW::Profile, LibSSW::FFI::Profile]
|
84
|
+
# pointer to the query profile structure
|
85
|
+
# @note Ruby has garbage collection, so there is not much reason to call
|
86
|
+
# this method.
|
98
87
|
def init_destroy(profile)
|
99
88
|
FFI.init_destroy(profile)
|
100
89
|
end
|
101
90
|
|
102
|
-
|
91
|
+
# Do Striped Smith-Waterman alignment.
|
92
|
+
# @param prof [Fiddle::Pointer, LibSSW::Profile, LibSSW::FFI::Profile]
|
93
|
+
# pointer to the query profile structure
|
94
|
+
# @param ref [Array]
|
95
|
+
# target sequence;
|
96
|
+
# the target sequence needs to be numbers and corresponding to the mat
|
97
|
+
# parameter of function ssw_init
|
98
|
+
# @param weight_gap0 [Integer] the absolute value of gap open penalty
|
99
|
+
# @param weight_gapE [Integer] the absolute value of gap extension penalty
|
100
|
+
# @param flag [Integer]
|
101
|
+
# * bit 5: when setted as 1, function ssw_align will return the best
|
102
|
+
# alignment beginning position;
|
103
|
+
# * bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd &&
|
104
|
+
# read_end1 - read_begin1 < filterd), (whatever bit 5 is setted) the
|
105
|
+
# function will return the best alignment beginning position and cigar;
|
106
|
+
# * bit 7: when setted as 1, if the best alignment score >= filters,
|
107
|
+
# (whatever bit 5 is setted) the function will return the best
|
108
|
+
# alignment beginning position and cigar;
|
109
|
+
# * bit 8: when setted as 1, (whatever bit 5, 6 or 7 is setted) the
|
110
|
+
# function will always return the best alignment beginning position and
|
111
|
+
# cigar. When flag == 0, only the optimal and sub-optimal scores and the
|
112
|
+
# optimal alignment ending position will be returned.
|
113
|
+
# @param filters [Integer]
|
114
|
+
# scorefilter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0,
|
115
|
+
# filters will be used (Please check the decription of the flag parameter
|
116
|
+
# for detailed usage.)
|
117
|
+
# @param filterd [Integer]
|
118
|
+
# distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted
|
119
|
+
# as 0, filterd will be used (Please check the decription of the flag
|
120
|
+
# parameter for detailed usage.)
|
121
|
+
# @param mask_len [Integer]
|
122
|
+
# The distance between the optimal and suboptimal alignment ending
|
123
|
+
# position >= maskLen. We suggest to use readLen/2, if you don't have
|
124
|
+
# special concerns. Note: maskLen has to be >= 15, otherwise this function
|
125
|
+
# will NOT return the suboptimal alignment information. Detailed
|
126
|
+
# description of maskLen: After locating the optimal alignment ending
|
127
|
+
# position, the suboptimal alignment score can be heuristically found by
|
128
|
+
# checking the second largest score in the array that contains the maximal
|
129
|
+
# score of each column of the SW matrix. In order to avoid picking the
|
130
|
+
# scores that belong to the alignments sharing the partial best alignment,
|
131
|
+
# SSW C library masks the reference loci nearby (mask length = maskLen)
|
132
|
+
# the best alignment ending position and locates the second largest score
|
133
|
+
# from the unmasked elements.
|
134
|
+
def ssw_align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len)
|
135
|
+
ref_str = ref.pack('c*')
|
136
|
+
ref_len = ref.size
|
103
137
|
ptr = FFI.ssw_align(
|
104
|
-
prof,
|
138
|
+
prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
105
139
|
)
|
106
|
-
|
140
|
+
# Not sure yet if we should set the instance variable to the pointer as a
|
141
|
+
# garbage collection workaround.
|
142
|
+
# For example: instance_variable_set(:@ref_str, ref_str)
|
143
|
+
LibSSW::Align.new(ptr)
|
107
144
|
end
|
108
145
|
|
146
|
+
# Release the memory allocated by function ssw_align.
|
147
|
+
# @param a [Fiddle::Pointer, LibSSW::Align, LibSSW::FFI::Align]
|
148
|
+
# pointer to the alignment result structure
|
109
149
|
def align_destroy(align)
|
110
150
|
FFI.align_destroy(align)
|
111
151
|
end
|
112
152
|
|
153
|
+
# 1. Calculate the number of mismatches.
|
154
|
+
# 2. Modify the cigar string:
|
155
|
+
# differentiate matches (=), mismatches(X), and softclip(S).
|
156
|
+
# @note This method takes a Fiddle::Pointer as an argument. Please read the
|
157
|
+
# source code and understand it well before using this method.
|
158
|
+
# (Needs to be improved)
|
159
|
+
# @param ref_begin1 [Integer]
|
160
|
+
# 0-based best alignment beginning position on the reference sequence
|
161
|
+
# @param read_begin1 [Integer]
|
162
|
+
# 0-based best alignment beginning position on the read sequence
|
163
|
+
# @param read_end1 [Integer]
|
164
|
+
# 0-based best alignment ending position on the read sequence
|
165
|
+
# @param ref [Array]
|
166
|
+
# reference sequence
|
167
|
+
# @param read [Array]
|
168
|
+
# read sequence
|
169
|
+
# @param read_len [Integer] length of the read
|
170
|
+
# @param cigar [Fiddle::Pointer]
|
171
|
+
# best alignment cigar; stored the same as that in BAM format,
|
172
|
+
# high 28 bits: length, low 4 bits: M/I/D (0/1/2)
|
173
|
+
# @param cigar_len [Integer] length of the cigar string
|
174
|
+
# @return [Integer] The number of mismatches. The cigar and cigarLen are modified.
|
113
175
|
def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
|
114
176
|
warn 'implementation: fiexme: **cigar' # FIXME
|
115
177
|
FFI.mark_mismatch(
|
116
178
|
ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
|
117
179
|
)
|
118
180
|
end
|
181
|
+
|
182
|
+
def array_to_cigar_string(arr)
|
183
|
+
cigar_string = String.new
|
184
|
+
arr.each do |x|
|
185
|
+
n = x >> 4
|
186
|
+
m = x & 15
|
187
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
188
|
+
cigar_string << n.to_s << c
|
189
|
+
end
|
190
|
+
cigar_string
|
191
|
+
end
|
119
192
|
end
|
120
193
|
end
|
data/lib/libssw/BLOSUM50.rb
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
module LibSSW
|
2
2
|
BLOSUM50 = [
|
3
|
-
|
4
|
-
|
5
|
-
-2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5,
|
6
|
-
-1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5,
|
7
|
-
-2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5,
|
8
|
-
-1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5,
|
9
|
-
-1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5,
|
10
|
-
-1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5,
|
11
|
-
0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5,
|
12
|
-
-2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5,
|
13
|
-
-1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5,
|
14
|
-
-2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5,
|
15
|
-
-1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5,
|
16
|
-
-1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5,
|
17
|
-
-3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5,
|
18
|
-
-1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5,
|
19
|
-
|
20
|
-
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5,
|
21
|
-
-3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5,
|
22
|
-
-2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5,
|
23
|
-
0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5,
|
24
|
-
-2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5,
|
25
|
-
-1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5,
|
26
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5,
|
27
|
-
-5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1
|
3
|
+
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
4
|
+
5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, # A
|
5
|
+
-2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, # R
|
6
|
+
-1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, # N
|
7
|
+
-2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, # D
|
8
|
+
-1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, # C
|
9
|
+
-1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, # Q
|
10
|
+
-1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, # E
|
11
|
+
0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, # G
|
12
|
+
-2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, # H
|
13
|
+
-1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, # I
|
14
|
+
-2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, # L
|
15
|
+
-1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, # K
|
16
|
+
-1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, # M
|
17
|
+
-3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, # F
|
18
|
+
-1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, # P
|
19
|
+
1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, # S
|
20
|
+
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, # T
|
21
|
+
-3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, # W
|
22
|
+
-2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, # Y
|
23
|
+
0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, # V
|
24
|
+
-2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, # B
|
25
|
+
-1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, # Z
|
26
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, # X
|
27
|
+
-5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 # *
|
28
28
|
]
|
29
29
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module LibSSW
|
2
|
+
BLOSUM62 = [
|
3
|
+
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
4
|
+
4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4, # A
|
5
|
+
-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4, # R
|
6
|
+
-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4, # N
|
7
|
+
-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4, # D
|
8
|
+
0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4, # C
|
9
|
+
-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4, # Q
|
10
|
+
-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # E
|
11
|
+
0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4, # G
|
12
|
+
-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4, # H
|
13
|
+
-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4, # I
|
14
|
+
-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4, # L
|
15
|
+
-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4, # K
|
16
|
+
-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4, # M
|
17
|
+
-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4, # F
|
18
|
+
-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4, # P
|
19
|
+
1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4, # S
|
20
|
+
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4, # T
|
21
|
+
-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4, # W
|
22
|
+
-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4, # Y
|
23
|
+
0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4, # V
|
24
|
+
-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4, # B
|
25
|
+
-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # Z
|
26
|
+
0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4, # X
|
27
|
+
-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1, # *
|
28
|
+
]
|
29
|
+
end
|
30
|
+
|
data/lib/libssw/align.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'struct_helper'
|
4
|
+
|
5
|
+
module LibSSW
|
6
|
+
# structure of the alignment result
|
7
|
+
# @!attribute score1
|
8
|
+
# @return [Integer] the best alignment score
|
9
|
+
# @!attribute score2
|
10
|
+
# @return [Integer] sub-optimal alignment score
|
11
|
+
# @!attribute ref_begin1
|
12
|
+
# @return [Integer]
|
13
|
+
# 0-based best alignment beginning position on reference;
|
14
|
+
# ref_begin1 = -1 when the best alignment beginning position is not available
|
15
|
+
# @!attribute ref_end1
|
16
|
+
# @return [Integer] 0-based best alignment ending position on reference
|
17
|
+
# @!attribute read_begin1
|
18
|
+
# @return [Integer]
|
19
|
+
# 0-based best alignment beginning position on read;
|
20
|
+
# read_begin1 = -1 when the best alignment beginning position is not available
|
21
|
+
# @!attribute read_end1
|
22
|
+
# @return [Integer] 0-based best alignment ending position on read
|
23
|
+
# @!attribute read_end2
|
24
|
+
# @return [Integer] 0-based sub-optimal alignment ending position on read
|
25
|
+
# @!attribute cigar [r]
|
26
|
+
# @return [Array]
|
27
|
+
# best alignment cigar; stored the same as that in BAM format,
|
28
|
+
# high 28 bits: length, low 4 bits: M/I/D (0/1/2);
|
29
|
+
# cigar = 0 when the best alignment path is not available
|
30
|
+
# @!attribute cigar_len
|
31
|
+
# @return [Integer]
|
32
|
+
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
33
|
+
class Align < FFI::Align
|
34
|
+
include StructHelper
|
35
|
+
|
36
|
+
def self.keys
|
37
|
+
%i[score1 score2 ref_begin1 ref_end1
|
38
|
+
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
39
|
+
end
|
40
|
+
|
41
|
+
# This class is read_only
|
42
|
+
attr_reader(*keys, :ptr, :cstruct)
|
43
|
+
|
44
|
+
def initialize(ptr)
|
45
|
+
@ptr = ptr
|
46
|
+
@cstruct = align = FFI::Align.new(ptr)
|
47
|
+
@score1 = align.score1
|
48
|
+
@score2 = align.score2
|
49
|
+
@ref_begin1 = align.ref_begin1
|
50
|
+
@ref_end1 = align.ref_end1
|
51
|
+
@read_begin1 = align.read_begin1
|
52
|
+
@read_end1 = align.read_end1
|
53
|
+
@ref_end2 = align.ref_end2
|
54
|
+
@cigar_len = align.cigarLen
|
55
|
+
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
56
|
+
# Attributes for ruby binding only
|
57
|
+
@cigar_string = LibSSW.array_to_cigar_string(@cigar)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/libssw/ffi.rb
CHANGED
@@ -13,17 +13,11 @@ module LibSSW
|
|
13
13
|
end
|
14
14
|
|
15
15
|
class << self
|
16
|
-
attr_reader :func_map
|
17
|
-
|
18
16
|
def try_extern(signature, *opts)
|
19
17
|
extern(signature, *opts)
|
20
18
|
rescue StandardError => e
|
21
19
|
warn "#{e.class.name}: #{e.message}"
|
22
20
|
end
|
23
|
-
|
24
|
-
def ffi_methods
|
25
|
-
@ffi_methods ||= func_map.each_key.to_a
|
26
|
-
end
|
27
21
|
end
|
28
22
|
|
29
23
|
Align = struct [
|
@@ -39,8 +33,8 @@ module LibSSW
|
|
39
33
|
]
|
40
34
|
|
41
35
|
Profile = struct [
|
42
|
-
'__m128i* byte', # __m128i* profile_byte;
|
43
|
-
'__m128i* word', # __m128i* profile_word;
|
36
|
+
'__m128i* byte', # __m128i* profile_byte; // 0: none
|
37
|
+
'__m128i* word', # __m128i* profile_word; // 0: none
|
44
38
|
'const int8_t* read',
|
45
39
|
'const int8_t* mat',
|
46
40
|
'int32_t readLen',
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'struct_helper'
|
4
|
+
|
5
|
+
module LibSSW
|
6
|
+
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
7
|
+
# @!attribute read
|
8
|
+
# @!attribute mat
|
9
|
+
# @!attribute read_len
|
10
|
+
# @!attribute n
|
11
|
+
# @!attribute bias
|
12
|
+
class Profile < FFI::Profile
|
13
|
+
include StructHelper
|
14
|
+
|
15
|
+
def self.keys
|
16
|
+
%i[read mat read_len n bias]
|
17
|
+
end
|
18
|
+
|
19
|
+
# This class is read_only
|
20
|
+
attr_reader(*keys, :ptr, :cstruct)
|
21
|
+
|
22
|
+
def initialize(ptr)
|
23
|
+
@ptr = ptr
|
24
|
+
@cstruct = profile = LibSSW::FFI::Profile.new(ptr)
|
25
|
+
@read_len = profile.readLen
|
26
|
+
@read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
|
27
|
+
@n = profile.n
|
28
|
+
@mat = n.positive? ? profile.mat[0, n * n].unpack('c*') : []
|
29
|
+
@bias = profile.bias
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_ptr
|
33
|
+
# Garbage collection warkaround
|
34
|
+
# cstruct.read = p @ptr.instance_variable_get(:@read_str)
|
35
|
+
# cstruct.mat = p @ptr.instance_variable_get(:@mat_str)
|
36
|
+
# cstruct.readLen = p @ptr.instance_variable_get(:@read_len)
|
37
|
+
# cstruct.n = p @ptr.instance_variable_get(:@n)
|
38
|
+
@ptr
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/libssw/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fiddle
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: simplecov
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description: Ruby bindings for libssw
|
98
112
|
email:
|
99
113
|
- 2xijok@gmail.com
|
@@ -107,7 +121,11 @@ files:
|
|
107
121
|
- exe/rbssw
|
108
122
|
- lib/libssw.rb
|
109
123
|
- lib/libssw/BLOSUM50.rb
|
124
|
+
- lib/libssw/BLOSUM62.rb
|
125
|
+
- lib/libssw/align.rb
|
110
126
|
- lib/libssw/ffi.rb
|
127
|
+
- lib/libssw/profile.rb
|
128
|
+
- lib/libssw/struct_helper.rb
|
111
129
|
- lib/libssw/version.rb
|
112
130
|
homepage: https://github.com/kojix2/ruby-libssw
|
113
131
|
licenses:
|
@@ -124,9 +142,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
124
142
|
version: '2.5'
|
125
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
144
|
requirements:
|
127
|
-
- - "
|
145
|
+
- - ">="
|
128
146
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
147
|
+
version: '0'
|
130
148
|
requirements: []
|
131
149
|
rubygems_version: 3.2.3
|
132
150
|
signing_key:
|