libssw 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +58 -10
- data/exe/rbssw +8 -31
- data/lib/libssw.rb +129 -21
- data/lib/libssw/align.rb +6 -5
- data/lib/libssw/profile.rb +10 -9
- data/lib/libssw/version.rb +1 -1
- metadata +2 -3
- data/lib/libssw/struct_helper.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 182b0d30cdf3d9a93b100b05f96d469d612b09bcfcfd2afc3cb63a4c93501d17
|
4
|
+
data.tar.gz: 99ec370125c707acff3a99664706ce8691f2e66e48d22a49af0362894b2c6b72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a49bf4924da5d12310b691f9f9335d96cdce0ef4b82ac1a38aeb6293c92d939cd17e4b0e6e99a4c427a0f4074f94db1cca91ca6c26b76a0551e558371b4aee7
|
7
|
+
data.tar.gz: 5af24c2124cd53f8aa54dc905172eb6f9b3d7c57b33fb87d1884ca2ab1a90acf0cb1f2ca73cb88eecda223e3f723836cca2dbae9cb0c1ac665d097993645ee19
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# ruby-libssw
|
2
2
|
|
3
3
|

|
4
|
+
[](https://rubygems.org/gems/libssw)
|
5
|
+
[](https://rubydoc.info/gems/libssw)
|
4
6
|
|
5
7
|
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
6
8
|
|
@@ -35,18 +37,64 @@ bundle exec rake install
|
|
35
37
|
```ruby
|
36
38
|
require 'libssw'
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
40
|
+
SSW = LibSSW
|
41
|
+
|
42
|
+
ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
|
43
|
+
ref_int = SSW.dna_to_int_array(ref_str)
|
44
|
+
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
45
|
+
|
46
|
+
read_str1 = "ACGTT"
|
47
|
+
read_str2 = SSW.dna_complement(read_str1)
|
48
|
+
read_int1 = SSW.dna_to_int_array(read_str1)
|
49
|
+
# [0, 1, 2, 3, 3]
|
50
|
+
read_int2 = SSW.dna_to_int_array(read_str2)
|
51
|
+
# [0, 0, 1, 2, 3]
|
52
|
+
|
53
|
+
mat = SSW.create_scoring_matrix(SSW::DNAElements, 2, -2)
|
54
|
+
# mat = [2, -2, -2, -2, 0,
|
55
|
+
# -2, 2, -2, -2, 0,
|
56
|
+
# -2, -2, 2, -2, 0,
|
57
|
+
# -2, -2, -2, 2, 0,
|
58
|
+
# 0, 0, 0, 0, 0]
|
59
|
+
|
60
|
+
profile1 = LibSSW.ssw_init(read_int1, mat)
|
61
|
+
align1 = LibSSW.ssw_align(profile1, ref_int, 3, 1, 1, 0, 0, 15)
|
62
|
+
pp align1.to_h
|
63
|
+
# {
|
64
|
+
# :score1 => 10,
|
65
|
+
# :score2 => 0,
|
66
|
+
# :ref_begin1 => 8,
|
67
|
+
# :ref_end1 => 12,
|
68
|
+
# :read_begin1 => 0,
|
69
|
+
# :read_end1 => 4,
|
70
|
+
# :ref_end2 => 0,
|
71
|
+
# :cigar => [80],
|
72
|
+
# :cigar_len => 1,
|
73
|
+
# :cigar_string => "5M"
|
74
|
+
# }
|
75
|
+
|
76
|
+
profile2 = LibSSW.ssw_init(read_int2, mat)
|
77
|
+
align2 = LibSSW.ssw_align(profile2, ref_int, 3, 1, 1, 0, 0, 15)
|
78
|
+
pp align2.to_h
|
79
|
+
# {
|
80
|
+
# :score1 => 10,
|
81
|
+
# :score2 => 0,
|
82
|
+
# :ref_begin1 => 7,
|
83
|
+
# :ref_end1 => 11,
|
84
|
+
# :read_begin1 => 0,
|
85
|
+
# :read_end1 => 4,
|
86
|
+
# :ref_end2 => 0,
|
87
|
+
# :cigar => [80],
|
88
|
+
# :cigar_len => 1,
|
89
|
+
# :cigar_string => "5M"
|
90
|
+
# }
|
48
91
|
```
|
49
92
|
|
93
|
+
|
94
|
+
## Documentation
|
95
|
+
|
96
|
+
* [API Documentation](https://rubydoc.info/gems/libssw)
|
97
|
+
|
50
98
|
## Development
|
51
99
|
|
52
100
|
```sh
|
data/exe/rbssw
CHANGED
@@ -99,13 +99,9 @@ lScore = nil
|
|
99
99
|
if opts[:bprotein]
|
100
100
|
# load AA score matrix
|
101
101
|
if !opts[:smatrix]
|
102
|
-
lEle =
|
103
|
-
|
104
|
-
|
105
|
-
dEle2Int[ele] = i
|
106
|
-
dEle2Int[ele.downcase] = i
|
107
|
-
dInt2Ele[i] = ele
|
108
|
-
end
|
102
|
+
lEle = SSW::AAELEMENTS
|
103
|
+
dEle2Int = SSW::AA2INT
|
104
|
+
dInt2Ele = SSW::INT2AA
|
109
105
|
nEleNum = lEle.size
|
110
106
|
lScore = SSW::Blosum50
|
111
107
|
else
|
@@ -113,31 +109,12 @@ if opts[:bprotein]
|
|
113
109
|
end
|
114
110
|
elsif !opts[:smatrix]
|
115
111
|
# init DNA score matrix
|
116
|
-
lEle =
|
117
|
-
dRc =
|
118
|
-
|
119
|
-
|
120
|
-
dEle2Int[ele.downcase] = i
|
121
|
-
dInt2Ele[i] = ele
|
122
|
-
end
|
123
|
-
# dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
|
124
|
-
# dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
|
112
|
+
lEle = SSW::DNAELEMENTS
|
113
|
+
dRc = SSW::DNARC
|
114
|
+
dEle2Int = SSW::DNA2INT
|
115
|
+
dInt2Ele = SSW::INT2DNA
|
125
116
|
nEleNum = lEle.size # 5
|
126
|
-
lScore =
|
127
|
-
(nEleNum - 1).times do |i|
|
128
|
-
(nEleNum - 1).times do |j|
|
129
|
-
lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
|
130
|
-
opts[:nmatch]
|
131
|
-
else
|
132
|
-
-opts[:nmismatch]
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
# lScore = [ 2, -2, -2, -2, 0,
|
137
|
-
# -2, 2, -2, -2, 0,
|
138
|
-
# -2, -2, 2, -2, 0,
|
139
|
-
# -2, -2, -2, 2, 0,
|
140
|
-
# 0, 0, 0, 0, 0 ]
|
117
|
+
lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
|
141
118
|
end
|
142
119
|
|
143
120
|
warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
|
data/lib/libssw.rb
CHANGED
@@ -30,6 +30,64 @@ module LibSSW
|
|
30
30
|
require_relative 'libssw/profile'
|
31
31
|
require_relative 'libssw/align'
|
32
32
|
|
33
|
+
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
34
|
+
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
35
|
+
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
|
36
|
+
|
37
|
+
AA2INT = { 'A' => 0, 'a' => 0,
|
38
|
+
'R' => 1, 'r' => 1,
|
39
|
+
'N' => 2, 'n' => 2,
|
40
|
+
'D' => 3, 'd' => 3,
|
41
|
+
'C' => 4, 'c' => 4,
|
42
|
+
'Q' => 5, 'q' => 5,
|
43
|
+
'E' => 6, 'e' => 6,
|
44
|
+
'G' => 7, 'g' => 7,
|
45
|
+
'H' => 8, 'h' => 8,
|
46
|
+
'I' => 9, 'i' => 9,
|
47
|
+
'L' => 10, 'l' => 10,
|
48
|
+
'K' => 11, 'k' => 11,
|
49
|
+
'M' => 12, 'm' => 12,
|
50
|
+
'F' => 13, 'f' => 13,
|
51
|
+
'P' => 14, 'p' => 14,
|
52
|
+
'S' => 15, 's' => 15,
|
53
|
+
'T' => 16, 't' => 16,
|
54
|
+
'W' => 17, 'w' => 17,
|
55
|
+
'Y' => 18, 'y' => 18,
|
56
|
+
'V' => 19, 'v' => 19,
|
57
|
+
'B' => 20, 'b' => 20,
|
58
|
+
'Z' => 21, 'z' => 21,
|
59
|
+
'X' => 22, 'x' => 22,
|
60
|
+
'*' => 23 }
|
61
|
+
|
62
|
+
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
63
|
+
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
64
|
+
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
65
|
+
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
66
|
+
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
67
|
+
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
|
68
|
+
|
69
|
+
DNAElements = %w[A C G T N]
|
70
|
+
|
71
|
+
DNA2INT = { 'A' => 0, 'a' => 0,
|
72
|
+
'C' => 1, 'c' => 1,
|
73
|
+
'G' => 2, 'g' => 2,
|
74
|
+
'T' => 3, 't' => 3,
|
75
|
+
'N' => 4, 'n' => 4 }
|
76
|
+
|
77
|
+
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
|
78
|
+
|
79
|
+
# reverse complement
|
80
|
+
DNARC = { 'A' => 'T',
|
81
|
+
'C' => 'G',
|
82
|
+
'G' => 'C',
|
83
|
+
'T' => 'A',
|
84
|
+
'N' => 'N',
|
85
|
+
'a' => 'T',
|
86
|
+
'c' => 'G',
|
87
|
+
'g' => 'C',
|
88
|
+
't' => 'A',
|
89
|
+
'n' => 'N' }
|
90
|
+
|
33
91
|
class << self
|
34
92
|
# Create the query profile using the query sequence.
|
35
93
|
# @param read [Array] query sequence; the query sequence needs to be numbers
|
@@ -56,27 +114,19 @@ module LibSSW
|
|
56
114
|
n,
|
57
115
|
score_size
|
58
116
|
)
|
59
|
-
|
60
|
-
#
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
cstruct.mat = mat_str
|
73
|
-
cstruct.readLen = read_len
|
74
|
-
cstruct.n = n
|
75
|
-
ptr.instance_variable_set(:@read_str, read_str)
|
76
|
-
ptr.instance_variable_set(:@read_len, read_len)
|
77
|
-
ptr.instance_variable_set(:@mat_str, mat_str)
|
78
|
-
ptr.instance_variable_set(:@n, n)
|
79
|
-
profile
|
117
|
+
# Garbage collection workaround
|
118
|
+
#
|
119
|
+
# * The following code will cause a segmentation violation when manually
|
120
|
+
# releasing memory. The reason is unknown.
|
121
|
+
# * func_map is only available in newer versions of fiddle.
|
122
|
+
# ptr.free = FFI.instance_variable_get(:@func_map)['init_destroy']
|
123
|
+
ptr.instance_variable_set(:@read_str, read_str)
|
124
|
+
ptr.instance_variable_set(:@read_len, read_len)
|
125
|
+
ptr.instance_variable_set(:@mat_str, mat_str)
|
126
|
+
ptr.instance_variable_set(:@n, n)
|
127
|
+
ptr.instance_variable_set(:@score_size, score_size)
|
128
|
+
|
129
|
+
LibSSW::Profile.new(ptr)
|
80
130
|
end
|
81
131
|
|
82
132
|
# Release the memory allocated by function ssw_init.
|
@@ -140,6 +190,8 @@ module LibSSW
|
|
140
190
|
# Not sure yet if we should set the instance variable to the pointer as a
|
141
191
|
# garbage collection workaround.
|
142
192
|
# For example: instance_variable_set(:@ref_str, ref_str)
|
193
|
+
#
|
194
|
+
# ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
|
143
195
|
LibSSW::Align.new(ptr)
|
144
196
|
end
|
145
197
|
|
@@ -189,5 +241,61 @@ module LibSSW
|
|
189
241
|
end
|
190
242
|
cigar_string
|
191
243
|
end
|
244
|
+
|
245
|
+
# Create scoring matrix of Smith-Waterman algrithum.
|
246
|
+
# @param [Array] elements
|
247
|
+
# @param [Integer] match_score
|
248
|
+
# @param [Integer] mismatch_score
|
249
|
+
def create_scoring_matrix(elements, match_score, mismatch_score)
|
250
|
+
size = elements.size
|
251
|
+
score = Array.new(size * size, 0)
|
252
|
+
(size - 1).times do |i|
|
253
|
+
(size - 1).times do |j|
|
254
|
+
score[i * size + j] = \
|
255
|
+
(elements[i] == elements[j] ? match_score : mismatch_score)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
score
|
259
|
+
end
|
260
|
+
|
261
|
+
# @param [String] seq
|
262
|
+
def dna_to_int_array(seq)
|
263
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
264
|
+
|
265
|
+
seq.each_char.map do |base|
|
266
|
+
DNA2INT[base] || DNA2INT['N']
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
def dna_complement(seq)
|
271
|
+
seq.each_char.map do |base|
|
272
|
+
DNARC[base]
|
273
|
+
end.join.reverse
|
274
|
+
end
|
275
|
+
|
276
|
+
# @param [Array] int array
|
277
|
+
def int_array_to_dna(arr)
|
278
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
279
|
+
|
280
|
+
arr.map do |i|
|
281
|
+
INT2DNA[i] || 'N'
|
282
|
+
end.join
|
283
|
+
end
|
284
|
+
|
285
|
+
def aaseq_to_int_array(seq)
|
286
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
287
|
+
|
288
|
+
seq.each_char.map do |base|
|
289
|
+
AA2INT[base] || AA2INT['*']
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def int_array_to_aaseq(arr)
|
294
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
295
|
+
|
296
|
+
arr.map do |i|
|
297
|
+
INT2AA[i] || '*'
|
298
|
+
end.join
|
299
|
+
end
|
192
300
|
end
|
193
301
|
end
|
data/lib/libssw/align.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative 'struct_helper'
|
4
|
-
|
5
3
|
module LibSSW
|
6
4
|
# structure of the alignment result
|
7
5
|
# @!attribute score1
|
@@ -31,15 +29,13 @@ module LibSSW
|
|
31
29
|
# @return [Integer]
|
32
30
|
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
33
31
|
class Align < FFI::Align
|
34
|
-
include StructHelper
|
35
|
-
|
36
32
|
def self.keys
|
37
33
|
%i[score1 score2 ref_begin1 ref_end1
|
38
34
|
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
39
35
|
end
|
40
36
|
|
41
37
|
# This class is read_only
|
42
|
-
attr_reader(*keys
|
38
|
+
attr_reader(*keys)
|
43
39
|
|
44
40
|
def initialize(ptr)
|
45
41
|
@ptr = ptr
|
@@ -55,6 +51,11 @@ module LibSSW
|
|
55
51
|
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
56
52
|
# Attributes for ruby binding only
|
57
53
|
@cigar_string = LibSSW.array_to_cigar_string(@cigar)
|
54
|
+
LibSSW.align_destroy(ptr)
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_h
|
58
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
58
59
|
end
|
59
60
|
end
|
60
61
|
end
|
data/lib/libssw/profile.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative 'struct_helper'
|
4
|
-
|
5
3
|
module LibSSW
|
6
4
|
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
7
5
|
# @!attribute read
|
@@ -10,8 +8,6 @@ module LibSSW
|
|
10
8
|
# @!attribute n
|
11
9
|
# @!attribute bias
|
12
10
|
class Profile < FFI::Profile
|
13
|
-
include StructHelper
|
14
|
-
|
15
11
|
def self.keys
|
16
12
|
%i[read mat read_len n bias]
|
17
13
|
end
|
@@ -31,11 +27,16 @@ module LibSSW
|
|
31
27
|
|
32
28
|
def to_ptr
|
33
29
|
# Garbage collection warkaround
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
# Preventing Garbage Collection --force
|
31
|
+
cstruct.read = ptr.instance_variable_get(:@read_str)
|
32
|
+
cstruct.mat = ptr.instance_variable_get(:@mat_str)
|
33
|
+
cstruct.readLen = ptr.instance_variable_get(:@read_len)
|
34
|
+
cstruct.n = ptr.instance_variable_get(:@n)
|
35
|
+
ptr
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_h
|
39
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
39
40
|
end
|
40
41
|
end
|
41
42
|
end
|
data/lib/libssw/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fiddle
|
@@ -125,7 +125,6 @@ files:
|
|
125
125
|
- lib/libssw/align.rb
|
126
126
|
- lib/libssw/ffi.rb
|
127
127
|
- lib/libssw/profile.rb
|
128
|
-
- lib/libssw/struct_helper.rb
|
129
128
|
- lib/libssw/version.rb
|
130
129
|
homepage: https://github.com/kojix2/ruby-libssw
|
131
130
|
licenses:
|