libssw 0.0.1.pre → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -2
- data/exe/rbssw +3 -0
- data/lib/libssw.rb +141 -68
- data/lib/libssw/BLOSUM50.rb +25 -25
- data/lib/libssw/BLOSUM62.rb +30 -0
- data/lib/libssw/align.rb +60 -0
- data/lib/libssw/ffi.rb +2 -8
- data/lib/libssw/profile.rb +41 -0
- data/lib/libssw/struct_helper.rb +13 -0
- data/lib/libssw/version.rb +1 -1
- metadata +22 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c888d3c126247f4f397fc151402fd482cdc4c105bd00163d07cc056b0e202607
|
4
|
+
data.tar.gz: 8717cde76e5ba26034b05c81871b43730580b09dfb610c9507b0c651e6b816a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d10a0cc734b8c53bf506c97ade223269ad6a64ed356f81d08eb9368e031b068ed85c217d3e47a737a303604b57009a00c0aab94a6eea89a62382cf79dbbee326
|
7
|
+
data.tar.gz: f18a9eddc8455e13a58058294d80f52ed96b31949e90c25494fd48ac7d3f22df4588e86df389160fca973489db36381bdabb67a2858ceeb811e9a8ddba3249cb
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# ruby-libssw
|
2
2
|
|
3
|
-
[
|
3
|
+

|
4
|
+
|
5
|
+
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
6
|
+
|
7
|
+
:construction: Under development.
|
4
8
|
|
5
9
|
## Installation
|
6
10
|
|
@@ -8,11 +12,39 @@
|
|
8
12
|
gem install libssw
|
9
13
|
```
|
10
14
|
|
15
|
+
Set the environment variable `LIBSSWDIR` to specify the location of the shared library.
|
16
|
+
For example, on Ubuntu, you can use libssw in the following way.
|
17
|
+
|
18
|
+
```
|
19
|
+
sudo apt install libssw-dev
|
20
|
+
export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
|
21
|
+
```
|
22
|
+
|
23
|
+
### Installing from source
|
24
|
+
|
25
|
+
When installing from source code using the following steps, the shared library `libssw.so` will be packed in the Ruby gem. In this case, the environment variable LIBSSWDIR is not required. (Only tested on Ubuntu)
|
26
|
+
|
27
|
+
```sh
|
28
|
+
git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
|
29
|
+
bundle exec rake libssw:compile
|
30
|
+
bundle exec rake install
|
31
|
+
```
|
32
|
+
|
11
33
|
## Usage
|
12
34
|
|
13
35
|
```ruby
|
14
36
|
require 'libssw'
|
15
|
-
|
37
|
+
|
38
|
+
ref = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
39
|
+
read = [0, 1, 2, 3, 3]
|
40
|
+
mat = [2, -2, -2, -2, 0,
|
41
|
+
-2, 2, -2, -2, 0,
|
42
|
+
-2, -2, 2, -2, 0,
|
43
|
+
-2, -2, -2, 2, 0,
|
44
|
+
0, 0, 0, 0, 0]
|
45
|
+
profile = LibSSW.ssw_init(read, mat)
|
46
|
+
align = LibSSW.ssw_align(profile, ref, 3, 1, 1, 0, 0, 15)
|
47
|
+
p align.to_h
|
16
48
|
```
|
17
49
|
|
18
50
|
## Development
|
data/exe/rbssw
CHANGED
data/lib/libssw.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'libssw/version'
|
4
|
+
require_relative 'libssw/BLOSUM50'
|
5
|
+
require_relative 'libssw/BLOSUM62'
|
4
6
|
|
5
7
|
module LibSSW
|
6
8
|
class Error < StandardError; end
|
@@ -19,102 +21,173 @@ module LibSSW
|
|
19
21
|
end
|
20
22
|
|
21
23
|
self.ffi_lib = if ENV['LIBSSWDIR'] && !ENV['LIBSSWDIR'].empty?
|
22
|
-
File.expand_path(lib_name, ENV['
|
24
|
+
File.expand_path(lib_name, ENV['LIBSSWDIR'])
|
23
25
|
else
|
24
26
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
25
27
|
end
|
26
28
|
|
27
29
|
require_relative 'libssw/ffi'
|
28
|
-
|
29
|
-
|
30
|
-
def cigar
|
31
|
-
pt = super
|
32
|
-
return [] if cigar_len.zero?
|
33
|
-
|
34
|
-
pt[0, 4 * cigar_len].unpack('L*')
|
35
|
-
end
|
36
|
-
|
37
|
-
def cigar_len
|
38
|
-
cigarLen
|
39
|
-
end
|
40
|
-
|
41
|
-
def to_h
|
42
|
-
h = {}
|
43
|
-
%i[score1
|
44
|
-
score2
|
45
|
-
ref_begin1
|
46
|
-
ref_end1
|
47
|
-
read_begin1
|
48
|
-
read_end1
|
49
|
-
ref_end2
|
50
|
-
cigar
|
51
|
-
cigar_len].each do |k|
|
52
|
-
h[k] = __send__(k)
|
53
|
-
end
|
54
|
-
h
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
class Profile < FFI::Profile
|
59
|
-
def read
|
60
|
-
pt = super
|
61
|
-
return [] if read_len.zero?
|
62
|
-
|
63
|
-
pt[0, read_len].unpack('c*')
|
64
|
-
end
|
65
|
-
|
66
|
-
def mat
|
67
|
-
pt = super
|
68
|
-
pt[0, n * n].unpack('c*')
|
69
|
-
end
|
70
|
-
|
71
|
-
def read_len
|
72
|
-
readLen
|
73
|
-
end
|
74
|
-
|
75
|
-
def to_h
|
76
|
-
h = {}
|
77
|
-
%i[byte
|
78
|
-
word
|
79
|
-
read
|
80
|
-
mat
|
81
|
-
read_len
|
82
|
-
n
|
83
|
-
bias].each do |k|
|
84
|
-
h[k] = __send__(k)
|
85
|
-
end
|
86
|
-
h
|
87
|
-
end
|
88
|
-
end
|
30
|
+
require_relative 'libssw/profile'
|
31
|
+
require_relative 'libssw/align'
|
89
32
|
|
90
33
|
class << self
|
91
|
-
|
34
|
+
# Create the query profile using the query sequence.
|
35
|
+
# @param read [Array] query sequence; the query sequence needs to be numbers
|
36
|
+
# @param mat [Array] substitution matrix; mat needs to be corresponding to the read sequence
|
37
|
+
# @param n [Integer] the square root of the number of elements in mat (mat has n*n elements)
|
38
|
+
# If you omit this argument, the square root of the size of mat will be set.
|
39
|
+
# @param score_size [Integer]
|
40
|
+
# estimated Smith-Waterman score;
|
41
|
+
# * if your estimated best alignment score is surely < 255 please set 0;
|
42
|
+
# * if your estimated best alignment score >= 255, please set 1;
|
43
|
+
# * if you don't know, please set 2
|
44
|
+
def ssw_init(read, mat, n = nil, score_size: 2)
|
45
|
+
read_str = read.pack('c*')
|
46
|
+
read_len = read.size
|
47
|
+
mat = mat.to_a.flatten
|
48
|
+
n = Math.sqrt(mat.size) if n.nil?
|
49
|
+
raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
|
50
|
+
|
51
|
+
mat_str = mat.flatten.pack('c*')
|
92
52
|
ptr = FFI.ssw_init(
|
93
|
-
|
53
|
+
read_str,
|
54
|
+
read_len,
|
55
|
+
mat_str,
|
56
|
+
n,
|
57
|
+
score_size
|
94
58
|
)
|
95
|
-
|
59
|
+
profile = LibSSW::Profile.new(ptr)
|
60
|
+
# Check Garbage Collection
|
61
|
+
%i[read read_len mat n].zip([read, read_len, mat, n]).each do |name, obj|
|
62
|
+
next unless profile.public_send(name) != obj
|
63
|
+
|
64
|
+
warn "[Error] Struct member: '#{name}'"
|
65
|
+
warn " * expected value: #{obj}"
|
66
|
+
warn " * actual value: #{profile.public_send(name)}"
|
67
|
+
warn " This may have been caused by Ruby'S GC."
|
68
|
+
end
|
69
|
+
# Preventing Garbage Collection --force
|
70
|
+
cstruct = profile.cstruct
|
71
|
+
cstruct.read = read_str
|
72
|
+
cstruct.mat = mat_str
|
73
|
+
cstruct.readLen = read_len
|
74
|
+
cstruct.n = n
|
75
|
+
ptr.instance_variable_set(:@read_str, read_str)
|
76
|
+
ptr.instance_variable_set(:@read_len, read_len)
|
77
|
+
ptr.instance_variable_set(:@mat_str, mat_str)
|
78
|
+
ptr.instance_variable_set(:@n, n)
|
79
|
+
profile
|
96
80
|
end
|
97
81
|
|
82
|
+
# Release the memory allocated by function ssw_init.
|
83
|
+
# @param p [Fiddle::Pointer, LibSSW::Profile, LibSSW::FFI::Profile]
|
84
|
+
# pointer to the query profile structure
|
85
|
+
# @note Ruby has garbage collection, so there is not much reason to call
|
86
|
+
# this method.
|
98
87
|
def init_destroy(profile)
|
99
88
|
FFI.init_destroy(profile)
|
100
89
|
end
|
101
90
|
|
102
|
-
|
91
|
+
# Do Striped Smith-Waterman alignment.
|
92
|
+
# @param prof [Fiddle::Pointer, LibSSW::Profile, LibSSW::FFI::Profile]
|
93
|
+
# pointer to the query profile structure
|
94
|
+
# @param ref [Array]
|
95
|
+
# target sequence;
|
96
|
+
# the target sequence needs to be numbers and corresponding to the mat
|
97
|
+
# parameter of function ssw_init
|
98
|
+
# @param weight_gap0 [Integer] the absolute value of gap open penalty
|
99
|
+
# @param weight_gapE [Integer] the absolute value of gap extension penalty
|
100
|
+
# @param flag [Integer]
|
101
|
+
# * bit 5: when setted as 1, function ssw_align will return the best
|
102
|
+
# alignment beginning position;
|
103
|
+
# * bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd &&
|
104
|
+
# read_end1 - read_begin1 < filterd), (whatever bit 5 is setted) the
|
105
|
+
# function will return the best alignment beginning position and cigar;
|
106
|
+
# * bit 7: when setted as 1, if the best alignment score >= filters,
|
107
|
+
# (whatever bit 5 is setted) the function will return the best
|
108
|
+
# alignment beginning position and cigar;
|
109
|
+
# * bit 8: when setted as 1, (whatever bit 5, 6 or 7 is setted) the
|
110
|
+
# function will always return the best alignment beginning position and
|
111
|
+
# cigar. When flag == 0, only the optimal and sub-optimal scores and the
|
112
|
+
# optimal alignment ending position will be returned.
|
113
|
+
# @param filters [Integer]
|
114
|
+
# scorefilter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0,
|
115
|
+
# filters will be used (Please check the decription of the flag parameter
|
116
|
+
# for detailed usage.)
|
117
|
+
# @param filterd [Integer]
|
118
|
+
# distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted
|
119
|
+
# as 0, filterd will be used (Please check the decription of the flag
|
120
|
+
# parameter for detailed usage.)
|
121
|
+
# @param mask_len [Integer]
|
122
|
+
# The distance between the optimal and suboptimal alignment ending
|
123
|
+
# position >= maskLen. We suggest to use readLen/2, if you don't have
|
124
|
+
# special concerns. Note: maskLen has to be >= 15, otherwise this function
|
125
|
+
# will NOT return the suboptimal alignment information. Detailed
|
126
|
+
# description of maskLen: After locating the optimal alignment ending
|
127
|
+
# position, the suboptimal alignment score can be heuristically found by
|
128
|
+
# checking the second largest score in the array that contains the maximal
|
129
|
+
# score of each column of the SW matrix. In order to avoid picking the
|
130
|
+
# scores that belong to the alignments sharing the partial best alignment,
|
131
|
+
# SSW C library masks the reference loci nearby (mask length = maskLen)
|
132
|
+
# the best alignment ending position and locates the second largest score
|
133
|
+
# from the unmasked elements.
|
134
|
+
def ssw_align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len)
|
135
|
+
ref_str = ref.pack('c*')
|
136
|
+
ref_len = ref.size
|
103
137
|
ptr = FFI.ssw_align(
|
104
|
-
prof,
|
138
|
+
prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
105
139
|
)
|
106
|
-
|
140
|
+
# Not sure yet if we should set the instance variable to the pointer as a
|
141
|
+
# garbage collection workaround.
|
142
|
+
# For example: instance_variable_set(:@ref_str, ref_str)
|
143
|
+
LibSSW::Align.new(ptr)
|
107
144
|
end
|
108
145
|
|
146
|
+
# Release the memory allocated by function ssw_align.
|
147
|
+
# @param a [Fiddle::Pointer, LibSSW::Align, LibSSW::FFI::Align]
|
148
|
+
# pointer to the alignment result structure
|
109
149
|
def align_destroy(align)
|
110
150
|
FFI.align_destroy(align)
|
111
151
|
end
|
112
152
|
|
153
|
+
# 1. Calculate the number of mismatches.
|
154
|
+
# 2. Modify the cigar string:
|
155
|
+
# differentiate matches (=), mismatches(X), and softclip(S).
|
156
|
+
# @note This method takes a Fiddle::Pointer as an argument. Please read the
|
157
|
+
# source code and understand it well before using this method.
|
158
|
+
# (Needs to be improved)
|
159
|
+
# @param ref_begin1 [Integer]
|
160
|
+
# 0-based best alignment beginning position on the reference sequence
|
161
|
+
# @param read_begin1 [Integer]
|
162
|
+
# 0-based best alignment beginning position on the read sequence
|
163
|
+
# @param read_end1 [Integer]
|
164
|
+
# 0-based best alignment ending position on the read sequence
|
165
|
+
# @param ref [Array]
|
166
|
+
# reference sequence
|
167
|
+
# @param read [Array]
|
168
|
+
# read sequence
|
169
|
+
# @param read_len [Integer] length of the read
|
170
|
+
# @param cigar [Fiddle::Pointer]
|
171
|
+
# best alignment cigar; stored the same as that in BAM format,
|
172
|
+
# high 28 bits: length, low 4 bits: M/I/D (0/1/2)
|
173
|
+
# @param cigar_len [Integer] length of the cigar string
|
174
|
+
# @return [Integer] The number of mismatches. The cigar and cigarLen are modified.
|
113
175
|
def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
|
114
176
|
warn 'implementation: fiexme: **cigar' # FIXME
|
115
177
|
FFI.mark_mismatch(
|
116
178
|
ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
|
117
179
|
)
|
118
180
|
end
|
181
|
+
|
182
|
+
def array_to_cigar_string(arr)
|
183
|
+
cigar_string = String.new
|
184
|
+
arr.each do |x|
|
185
|
+
n = x >> 4
|
186
|
+
m = x & 15
|
187
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
188
|
+
cigar_string << n.to_s << c
|
189
|
+
end
|
190
|
+
cigar_string
|
191
|
+
end
|
119
192
|
end
|
120
193
|
end
|
data/lib/libssw/BLOSUM50.rb
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
module LibSSW
|
2
2
|
BLOSUM50 = [
|
3
|
-
|
4
|
-
|
5
|
-
-2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5,
|
6
|
-
-1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5,
|
7
|
-
-2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5,
|
8
|
-
-1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5,
|
9
|
-
-1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5,
|
10
|
-
-1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5,
|
11
|
-
0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5,
|
12
|
-
-2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5,
|
13
|
-
-1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5,
|
14
|
-
-2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5,
|
15
|
-
-1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5,
|
16
|
-
-1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5,
|
17
|
-
-3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5,
|
18
|
-
-1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5,
|
19
|
-
|
20
|
-
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5,
|
21
|
-
-3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5,
|
22
|
-
-2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5,
|
23
|
-
0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5,
|
24
|
-
-2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5,
|
25
|
-
-1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5,
|
26
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5,
|
27
|
-
-5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1
|
3
|
+
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
4
|
+
5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, # A
|
5
|
+
-2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, # R
|
6
|
+
-1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, # N
|
7
|
+
-2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, # D
|
8
|
+
-1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, # C
|
9
|
+
-1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, # Q
|
10
|
+
-1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, # E
|
11
|
+
0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, # G
|
12
|
+
-2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, # H
|
13
|
+
-1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, # I
|
14
|
+
-2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, # L
|
15
|
+
-1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, # K
|
16
|
+
-1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, # M
|
17
|
+
-3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, # F
|
18
|
+
-1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, # P
|
19
|
+
1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, # S
|
20
|
+
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, # T
|
21
|
+
-3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, # W
|
22
|
+
-2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, # Y
|
23
|
+
0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, # V
|
24
|
+
-2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, # B
|
25
|
+
-1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, # Z
|
26
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, # X
|
27
|
+
-5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 # *
|
28
28
|
]
|
29
29
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module LibSSW
|
2
|
+
BLOSUM62 = [
|
3
|
+
# A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
4
|
+
4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4, # A
|
5
|
+
-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4, # R
|
6
|
+
-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4, # N
|
7
|
+
-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4, # D
|
8
|
+
0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4, # C
|
9
|
+
-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4, # Q
|
10
|
+
-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # E
|
11
|
+
0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4, # G
|
12
|
+
-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4, # H
|
13
|
+
-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4, # I
|
14
|
+
-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4, # L
|
15
|
+
-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4, # K
|
16
|
+
-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4, # M
|
17
|
+
-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4, # F
|
18
|
+
-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4, # P
|
19
|
+
1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4, # S
|
20
|
+
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4, # T
|
21
|
+
-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4, # W
|
22
|
+
-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4, # Y
|
23
|
+
0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4, # V
|
24
|
+
-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4, # B
|
25
|
+
-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # Z
|
26
|
+
0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4, # X
|
27
|
+
-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1, # *
|
28
|
+
]
|
29
|
+
end
|
30
|
+
|
data/lib/libssw/align.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'struct_helper'
|
4
|
+
|
5
|
+
module LibSSW
|
6
|
+
# structure of the alignment result
|
7
|
+
# @!attribute score1
|
8
|
+
# @return [Integer] the best alignment score
|
9
|
+
# @!attribute score2
|
10
|
+
# @return [Integer] sub-optimal alignment score
|
11
|
+
# @!attribute ref_begin1
|
12
|
+
# @return [Integer]
|
13
|
+
# 0-based best alignment beginning position on reference;
|
14
|
+
# ref_begin1 = -1 when the best alignment beginning position is not available
|
15
|
+
# @!attribute ref_end1
|
16
|
+
# @return [Integer] 0-based best alignment ending position on reference
|
17
|
+
# @!attribute read_begin1
|
18
|
+
# @return [Integer]
|
19
|
+
# 0-based best alignment beginning position on read;
|
20
|
+
# read_begin1 = -1 when the best alignment beginning position is not available
|
21
|
+
# @!attribute read_end1
|
22
|
+
# @return [Integer] 0-based best alignment ending position on read
|
23
|
+
# @!attribute read_end2
|
24
|
+
# @return [Integer] 0-based sub-optimal alignment ending position on read
|
25
|
+
# @!attribute cigar [r]
|
26
|
+
# @return [Array]
|
27
|
+
# best alignment cigar; stored the same as that in BAM format,
|
28
|
+
# high 28 bits: length, low 4 bits: M/I/D (0/1/2);
|
29
|
+
# cigar = 0 when the best alignment path is not available
|
30
|
+
# @!attribute cigar_len
|
31
|
+
# @return [Integer]
|
32
|
+
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
33
|
+
class Align < FFI::Align
|
34
|
+
include StructHelper
|
35
|
+
|
36
|
+
def self.keys
|
37
|
+
%i[score1 score2 ref_begin1 ref_end1
|
38
|
+
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
39
|
+
end
|
40
|
+
|
41
|
+
# This class is read_only
|
42
|
+
attr_reader(*keys, :ptr, :cstruct)
|
43
|
+
|
44
|
+
def initialize(ptr)
|
45
|
+
@ptr = ptr
|
46
|
+
@cstruct = align = FFI::Align.new(ptr)
|
47
|
+
@score1 = align.score1
|
48
|
+
@score2 = align.score2
|
49
|
+
@ref_begin1 = align.ref_begin1
|
50
|
+
@ref_end1 = align.ref_end1
|
51
|
+
@read_begin1 = align.read_begin1
|
52
|
+
@read_end1 = align.read_end1
|
53
|
+
@ref_end2 = align.ref_end2
|
54
|
+
@cigar_len = align.cigarLen
|
55
|
+
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
56
|
+
# Attributes for ruby binding only
|
57
|
+
@cigar_string = LibSSW.array_to_cigar_string(@cigar)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/libssw/ffi.rb
CHANGED
@@ -13,17 +13,11 @@ module LibSSW
|
|
13
13
|
end
|
14
14
|
|
15
15
|
class << self
|
16
|
-
attr_reader :func_map
|
17
|
-
|
18
16
|
def try_extern(signature, *opts)
|
19
17
|
extern(signature, *opts)
|
20
18
|
rescue StandardError => e
|
21
19
|
warn "#{e.class.name}: #{e.message}"
|
22
20
|
end
|
23
|
-
|
24
|
-
def ffi_methods
|
25
|
-
@ffi_methods ||= func_map.each_key.to_a
|
26
|
-
end
|
27
21
|
end
|
28
22
|
|
29
23
|
Align = struct [
|
@@ -39,8 +33,8 @@ module LibSSW
|
|
39
33
|
]
|
40
34
|
|
41
35
|
Profile = struct [
|
42
|
-
'__m128i* byte', # __m128i* profile_byte;
|
43
|
-
'__m128i* word', # __m128i* profile_word;
|
36
|
+
'__m128i* byte', # __m128i* profile_byte; // 0: none
|
37
|
+
'__m128i* word', # __m128i* profile_word; // 0: none
|
44
38
|
'const int8_t* read',
|
45
39
|
'const int8_t* mat',
|
46
40
|
'int32_t readLen',
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'struct_helper'
|
4
|
+
|
5
|
+
module LibSSW
|
6
|
+
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
7
|
+
# @!attribute read
|
8
|
+
# @!attribute mat
|
9
|
+
# @!attribute read_len
|
10
|
+
# @!attribute n
|
11
|
+
# @!attribute bias
|
12
|
+
class Profile < FFI::Profile
|
13
|
+
include StructHelper
|
14
|
+
|
15
|
+
def self.keys
|
16
|
+
%i[read mat read_len n bias]
|
17
|
+
end
|
18
|
+
|
19
|
+
# This class is read_only
|
20
|
+
attr_reader(*keys, :ptr, :cstruct)
|
21
|
+
|
22
|
+
def initialize(ptr)
|
23
|
+
@ptr = ptr
|
24
|
+
@cstruct = profile = LibSSW::FFI::Profile.new(ptr)
|
25
|
+
@read_len = profile.readLen
|
26
|
+
@read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
|
27
|
+
@n = profile.n
|
28
|
+
@mat = n.positive? ? profile.mat[0, n * n].unpack('c*') : []
|
29
|
+
@bias = profile.bias
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_ptr
|
33
|
+
# Garbage collection warkaround
|
34
|
+
# cstruct.read = p @ptr.instance_variable_get(:@read_str)
|
35
|
+
# cstruct.mat = p @ptr.instance_variable_get(:@mat_str)
|
36
|
+
# cstruct.readLen = p @ptr.instance_variable_get(:@read_len)
|
37
|
+
# cstruct.n = p @ptr.instance_variable_get(:@n)
|
38
|
+
@ptr
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/libssw/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fiddle
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: simplecov
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description: Ruby bindings for libssw
|
98
112
|
email:
|
99
113
|
- 2xijok@gmail.com
|
@@ -107,7 +121,11 @@ files:
|
|
107
121
|
- exe/rbssw
|
108
122
|
- lib/libssw.rb
|
109
123
|
- lib/libssw/BLOSUM50.rb
|
124
|
+
- lib/libssw/BLOSUM62.rb
|
125
|
+
- lib/libssw/align.rb
|
110
126
|
- lib/libssw/ffi.rb
|
127
|
+
- lib/libssw/profile.rb
|
128
|
+
- lib/libssw/struct_helper.rb
|
111
129
|
- lib/libssw/version.rb
|
112
130
|
homepage: https://github.com/kojix2/ruby-libssw
|
113
131
|
licenses:
|
@@ -124,9 +142,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
124
142
|
version: '2.5'
|
125
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
144
|
requirements:
|
127
|
-
- - "
|
145
|
+
- - ">="
|
128
146
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
147
|
+
version: '0'
|
130
148
|
requirements: []
|
131
149
|
rubygems_version: 3.2.3
|
132
150
|
signing_key:
|