libssw 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +58 -10
- data/exe/rbssw +8 -31
- data/lib/libssw.rb +129 -21
- data/lib/libssw/align.rb +6 -5
- data/lib/libssw/profile.rb +10 -9
- data/lib/libssw/version.rb +1 -1
- metadata +2 -3
- data/lib/libssw/struct_helper.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 182b0d30cdf3d9a93b100b05f96d469d612b09bcfcfd2afc3cb63a4c93501d17
|
4
|
+
data.tar.gz: 99ec370125c707acff3a99664706ce8691f2e66e48d22a49af0362894b2c6b72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a49bf4924da5d12310b691f9f9335d96cdce0ef4b82ac1a38aeb6293c92d939cd17e4b0e6e99a4c427a0f4074f94db1cca91ca6c26b76a0551e558371b4aee7
|
7
|
+
data.tar.gz: 5af24c2124cd53f8aa54dc905172eb6f9b3d7c57b33fb87d1884ca2ab1a90acf0cb1f2ca73cb88eecda223e3f723836cca2dbae9cb0c1ac665d097993645ee19
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# ruby-libssw
|
2
2
|
|
3
3
|
![test](https://github.com/kojix2/ruby-libssw/workflows/CI/badge.svg)
|
4
|
+
[![Gem Version](https://img.shields.io/gem/v/libssw?color=brightgreen)](https://rubygems.org/gems/libssw)
|
5
|
+
[![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://rubydoc.info/gems/libssw)
|
4
6
|
|
5
7
|
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
6
8
|
|
@@ -35,18 +37,64 @@ bundle exec rake install
|
|
35
37
|
```ruby
|
36
38
|
require 'libssw'
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
40
|
+
SSW = LibSSW
|
41
|
+
|
42
|
+
ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
|
43
|
+
ref_int = SSW.dna_to_int_array(ref_str)
|
44
|
+
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
45
|
+
|
46
|
+
read_str1 = "ACGTT"
|
47
|
+
read_str2 = SSW.dna_complement(read_str1)
|
48
|
+
read_int1 = SSW.dna_to_int_array(read_str1)
|
49
|
+
# [0, 1, 2, 3, 3]
|
50
|
+
read_int2 = SSW.dna_to_int_array(read_str2)
|
51
|
+
# [0, 0, 1, 2, 3]
|
52
|
+
|
53
|
+
mat = SSW.create_scoring_matrix(SSW::DNAElements, 2, -2)
|
54
|
+
# mat = [2, -2, -2, -2, 0,
|
55
|
+
# -2, 2, -2, -2, 0,
|
56
|
+
# -2, -2, 2, -2, 0,
|
57
|
+
# -2, -2, -2, 2, 0,
|
58
|
+
# 0, 0, 0, 0, 0]
|
59
|
+
|
60
|
+
profile1 = LibSSW.ssw_init(read_int1, mat)
|
61
|
+
align1 = LibSSW.ssw_align(profile1, ref_int, 3, 1, 1, 0, 0, 15)
|
62
|
+
pp align1.to_h
|
63
|
+
# {
|
64
|
+
# :score1 => 10,
|
65
|
+
# :score2 => 0,
|
66
|
+
# :ref_begin1 => 8,
|
67
|
+
# :ref_end1 => 12,
|
68
|
+
# :read_begin1 => 0,
|
69
|
+
# :read_end1 => 4,
|
70
|
+
# :ref_end2 => 0,
|
71
|
+
# :cigar => [80],
|
72
|
+
# :cigar_len => 1,
|
73
|
+
# :cigar_string => "5M"
|
74
|
+
# }
|
75
|
+
|
76
|
+
profile2 = LibSSW.ssw_init(read_int2, mat)
|
77
|
+
align2 = LibSSW.ssw_align(profile2, ref_int, 3, 1, 1, 0, 0, 15)
|
78
|
+
pp align2.to_h
|
79
|
+
# {
|
80
|
+
# :score1 => 10,
|
81
|
+
# :score2 => 0,
|
82
|
+
# :ref_begin1 => 7,
|
83
|
+
# :ref_end1 => 11,
|
84
|
+
# :read_begin1 => 0,
|
85
|
+
# :read_end1 => 4,
|
86
|
+
# :ref_end2 => 0,
|
87
|
+
# :cigar => [80],
|
88
|
+
# :cigar_len => 1,
|
89
|
+
# :cigar_string => "5M"
|
90
|
+
# }
|
48
91
|
```
|
49
92
|
|
93
|
+
|
94
|
+
## Documentation
|
95
|
+
|
96
|
+
* [API Documentation](https://rubydoc.info/gems/libssw)
|
97
|
+
|
50
98
|
## Development
|
51
99
|
|
52
100
|
```sh
|
data/exe/rbssw
CHANGED
@@ -99,13 +99,9 @@ lScore = nil
|
|
99
99
|
if opts[:bprotein]
|
100
100
|
# load AA score matrix
|
101
101
|
if !opts[:smatrix]
|
102
|
-
lEle =
|
103
|
-
|
104
|
-
|
105
|
-
dEle2Int[ele] = i
|
106
|
-
dEle2Int[ele.downcase] = i
|
107
|
-
dInt2Ele[i] = ele
|
108
|
-
end
|
102
|
+
lEle = SSW::AAELEMENTS
|
103
|
+
dEle2Int = SSW::AA2INT
|
104
|
+
dInt2Ele = SSW::INT2AA
|
109
105
|
nEleNum = lEle.size
|
110
106
|
lScore = SSW::Blosum50
|
111
107
|
else
|
@@ -113,31 +109,12 @@ if opts[:bprotein]
|
|
113
109
|
end
|
114
110
|
elsif !opts[:smatrix]
|
115
111
|
# init DNA score matrix
|
116
|
-
lEle =
|
117
|
-
dRc =
|
118
|
-
|
119
|
-
|
120
|
-
dEle2Int[ele.downcase] = i
|
121
|
-
dInt2Ele[i] = ele
|
122
|
-
end
|
123
|
-
# dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
|
124
|
-
# dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
|
112
|
+
lEle = SSW::DNAELEMENTS
|
113
|
+
dRc = SSW::DNARC
|
114
|
+
dEle2Int = SSW::DNA2INT
|
115
|
+
dInt2Ele = SSW::INT2DNA
|
125
116
|
nEleNum = lEle.size # 5
|
126
|
-
lScore =
|
127
|
-
(nEleNum - 1).times do |i|
|
128
|
-
(nEleNum - 1).times do |j|
|
129
|
-
lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
|
130
|
-
opts[:nmatch]
|
131
|
-
else
|
132
|
-
-opts[:nmismatch]
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
# lScore = [ 2, -2, -2, -2, 0,
|
137
|
-
# -2, 2, -2, -2, 0,
|
138
|
-
# -2, -2, 2, -2, 0,
|
139
|
-
# -2, -2, -2, 2, 0,
|
140
|
-
# 0, 0, 0, 0, 0 ]
|
117
|
+
lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
|
141
118
|
end
|
142
119
|
|
143
120
|
warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
|
data/lib/libssw.rb
CHANGED
@@ -30,6 +30,64 @@ module LibSSW
|
|
30
30
|
require_relative 'libssw/profile'
|
31
31
|
require_relative 'libssw/align'
|
32
32
|
|
33
|
+
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
34
|
+
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
35
|
+
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
|
36
|
+
|
37
|
+
AA2INT = { 'A' => 0, 'a' => 0,
|
38
|
+
'R' => 1, 'r' => 1,
|
39
|
+
'N' => 2, 'n' => 2,
|
40
|
+
'D' => 3, 'd' => 3,
|
41
|
+
'C' => 4, 'c' => 4,
|
42
|
+
'Q' => 5, 'q' => 5,
|
43
|
+
'E' => 6, 'e' => 6,
|
44
|
+
'G' => 7, 'g' => 7,
|
45
|
+
'H' => 8, 'h' => 8,
|
46
|
+
'I' => 9, 'i' => 9,
|
47
|
+
'L' => 10, 'l' => 10,
|
48
|
+
'K' => 11, 'k' => 11,
|
49
|
+
'M' => 12, 'm' => 12,
|
50
|
+
'F' => 13, 'f' => 13,
|
51
|
+
'P' => 14, 'p' => 14,
|
52
|
+
'S' => 15, 's' => 15,
|
53
|
+
'T' => 16, 't' => 16,
|
54
|
+
'W' => 17, 'w' => 17,
|
55
|
+
'Y' => 18, 'y' => 18,
|
56
|
+
'V' => 19, 'v' => 19,
|
57
|
+
'B' => 20, 'b' => 20,
|
58
|
+
'Z' => 21, 'z' => 21,
|
59
|
+
'X' => 22, 'x' => 22,
|
60
|
+
'*' => 23 }
|
61
|
+
|
62
|
+
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
63
|
+
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
64
|
+
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
65
|
+
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
66
|
+
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
67
|
+
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
|
68
|
+
|
69
|
+
DNAElements = %w[A C G T N]
|
70
|
+
|
71
|
+
DNA2INT = { 'A' => 0, 'a' => 0,
|
72
|
+
'C' => 1, 'c' => 1,
|
73
|
+
'G' => 2, 'g' => 2,
|
74
|
+
'T' => 3, 't' => 3,
|
75
|
+
'N' => 4, 'n' => 4 }
|
76
|
+
|
77
|
+
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
|
78
|
+
|
79
|
+
# reverse complement
|
80
|
+
DNARC = { 'A' => 'T',
|
81
|
+
'C' => 'G',
|
82
|
+
'G' => 'C',
|
83
|
+
'T' => 'A',
|
84
|
+
'N' => 'N',
|
85
|
+
'a' => 'T',
|
86
|
+
'c' => 'G',
|
87
|
+
'g' => 'C',
|
88
|
+
't' => 'A',
|
89
|
+
'n' => 'N' }
|
90
|
+
|
33
91
|
class << self
|
34
92
|
# Create the query profile using the query sequence.
|
35
93
|
# @param read [Array] query sequence; the query sequence needs to be numbers
|
@@ -56,27 +114,19 @@ module LibSSW
|
|
56
114
|
n,
|
57
115
|
score_size
|
58
116
|
)
|
59
|
-
|
60
|
-
#
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
cstruct.mat = mat_str
|
73
|
-
cstruct.readLen = read_len
|
74
|
-
cstruct.n = n
|
75
|
-
ptr.instance_variable_set(:@read_str, read_str)
|
76
|
-
ptr.instance_variable_set(:@read_len, read_len)
|
77
|
-
ptr.instance_variable_set(:@mat_str, mat_str)
|
78
|
-
ptr.instance_variable_set(:@n, n)
|
79
|
-
profile
|
117
|
+
# Garbage collection workaround
|
118
|
+
#
|
119
|
+
# * The following code will cause a segmentation violation when manually
|
120
|
+
# releasing memory. The reason is unknown.
|
121
|
+
# * func_map is only available in newer versions of fiddle.
|
122
|
+
# ptr.free = FFI.instance_variable_get(:@func_map)['init_destroy']
|
123
|
+
ptr.instance_variable_set(:@read_str, read_str)
|
124
|
+
ptr.instance_variable_set(:@read_len, read_len)
|
125
|
+
ptr.instance_variable_set(:@mat_str, mat_str)
|
126
|
+
ptr.instance_variable_set(:@n, n)
|
127
|
+
ptr.instance_variable_set(:@score_size, score_size)
|
128
|
+
|
129
|
+
LibSSW::Profile.new(ptr)
|
80
130
|
end
|
81
131
|
|
82
132
|
# Release the memory allocated by function ssw_init.
|
@@ -140,6 +190,8 @@ module LibSSW
|
|
140
190
|
# Not sure yet if we should set the instance variable to the pointer as a
|
141
191
|
# garbage collection workaround.
|
142
192
|
# For example: instance_variable_set(:@ref_str, ref_str)
|
193
|
+
#
|
194
|
+
# ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
|
143
195
|
LibSSW::Align.new(ptr)
|
144
196
|
end
|
145
197
|
|
@@ -189,5 +241,61 @@ module LibSSW
|
|
189
241
|
end
|
190
242
|
cigar_string
|
191
243
|
end
|
244
|
+
|
245
|
+
# Create scoring matrix of Smith-Waterman algrithum.
|
246
|
+
# @param [Array] elements
|
247
|
+
# @param [Integer] match_score
|
248
|
+
# @param [Integer] mismatch_score
|
249
|
+
def create_scoring_matrix(elements, match_score, mismatch_score)
|
250
|
+
size = elements.size
|
251
|
+
score = Array.new(size * size, 0)
|
252
|
+
(size - 1).times do |i|
|
253
|
+
(size - 1).times do |j|
|
254
|
+
score[i * size + j] = \
|
255
|
+
(elements[i] == elements[j] ? match_score : mismatch_score)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
score
|
259
|
+
end
|
260
|
+
|
261
|
+
# @param [String] seq
|
262
|
+
def dna_to_int_array(seq)
|
263
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
264
|
+
|
265
|
+
seq.each_char.map do |base|
|
266
|
+
DNA2INT[base] || DNA2INT['N']
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
def dna_complement(seq)
|
271
|
+
seq.each_char.map do |base|
|
272
|
+
DNARC[base]
|
273
|
+
end.join.reverse
|
274
|
+
end
|
275
|
+
|
276
|
+
# @param [Array] int array
|
277
|
+
def int_array_to_dna(arr)
|
278
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
279
|
+
|
280
|
+
arr.map do |i|
|
281
|
+
INT2DNA[i] || 'N'
|
282
|
+
end.join
|
283
|
+
end
|
284
|
+
|
285
|
+
def aaseq_to_int_array(seq)
|
286
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
287
|
+
|
288
|
+
seq.each_char.map do |base|
|
289
|
+
AA2INT[base] || AA2INT['*']
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def int_array_to_aaseq(arr)
|
294
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
295
|
+
|
296
|
+
arr.map do |i|
|
297
|
+
INT2AA[i] || '*'
|
298
|
+
end.join
|
299
|
+
end
|
192
300
|
end
|
193
301
|
end
|
data/lib/libssw/align.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative 'struct_helper'
|
4
|
-
|
5
3
|
module LibSSW
|
6
4
|
# structure of the alignment result
|
7
5
|
# @!attribute score1
|
@@ -31,15 +29,13 @@ module LibSSW
|
|
31
29
|
# @return [Integer]
|
32
30
|
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
33
31
|
class Align < FFI::Align
|
34
|
-
include StructHelper
|
35
|
-
|
36
32
|
def self.keys
|
37
33
|
%i[score1 score2 ref_begin1 ref_end1
|
38
34
|
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
39
35
|
end
|
40
36
|
|
41
37
|
# This class is read_only
|
42
|
-
attr_reader(*keys
|
38
|
+
attr_reader(*keys)
|
43
39
|
|
44
40
|
def initialize(ptr)
|
45
41
|
@ptr = ptr
|
@@ -55,6 +51,11 @@ module LibSSW
|
|
55
51
|
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
56
52
|
# Attributes for ruby binding only
|
57
53
|
@cigar_string = LibSSW.array_to_cigar_string(@cigar)
|
54
|
+
LibSSW.align_destroy(ptr)
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_h
|
58
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
58
59
|
end
|
59
60
|
end
|
60
61
|
end
|
data/lib/libssw/profile.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative 'struct_helper'
|
4
|
-
|
5
3
|
module LibSSW
|
6
4
|
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
7
5
|
# @!attribute read
|
@@ -10,8 +8,6 @@ module LibSSW
|
|
10
8
|
# @!attribute n
|
11
9
|
# @!attribute bias
|
12
10
|
class Profile < FFI::Profile
|
13
|
-
include StructHelper
|
14
|
-
|
15
11
|
def self.keys
|
16
12
|
%i[read mat read_len n bias]
|
17
13
|
end
|
@@ -31,11 +27,16 @@ module LibSSW
|
|
31
27
|
|
32
28
|
def to_ptr
|
33
29
|
# Garbage collection warkaround
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
# Preventing Garbage Collection --force
|
31
|
+
cstruct.read = ptr.instance_variable_get(:@read_str)
|
32
|
+
cstruct.mat = ptr.instance_variable_get(:@mat_str)
|
33
|
+
cstruct.readLen = ptr.instance_variable_get(:@read_len)
|
34
|
+
cstruct.n = ptr.instance_variable_get(:@n)
|
35
|
+
ptr
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_h
|
39
|
+
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
39
40
|
end
|
40
41
|
end
|
41
42
|
end
|
data/lib/libssw/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fiddle
|
@@ -125,7 +125,6 @@ files:
|
|
125
125
|
- lib/libssw/align.rb
|
126
126
|
- lib/libssw/ffi.rb
|
127
127
|
- lib/libssw/profile.rb
|
128
|
-
- lib/libssw/struct_helper.rb
|
129
128
|
- lib/libssw/version.rb
|
130
129
|
homepage: https://github.com/kojix2/ruby-libssw
|
131
130
|
licenses:
|