libssw 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +62 -21
- data/lib/libssw.rb +69 -122
- data/lib/{libssw → ssw}/BLOSUM50.rb +1 -1
- data/lib/{libssw → ssw}/BLOSUM62.rb +1 -1
- data/lib/ssw/aaseq.rb +59 -0
- data/lib/{libssw → ssw}/align.rb +19 -4
- data/lib/ssw/dna.rb +53 -0
- data/lib/{libssw → ssw}/ffi.rb +2 -2
- data/lib/{libssw → ssw}/profile.rb +3 -3
- data/lib/{libssw → ssw}/version.rb +2 -2
- data/vendor/libssw.so +0 -0
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 010f0923d209dc9f0d3fde047c5cfbbb176e57239ff79e236a1f69f3b634573f
|
4
|
+
data.tar.gz: 9e08dc3fb6b03405eaa97ac6a65743bd61fc49d8fc28de6a32b737243a365236
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cf4524856aba84e141dd6b6e727e671a01ac907c1d1ad4b33907a16385a499024f235b7f982e66244af07af42e60b1efe0c093281bb433147fb594f887930e7
|
7
|
+
data.tar.gz: 275a528ded7a99cfac5d8d45487e27f290bd7b252cac7ddb141d1a03218b72042e0b6c54c92a7c39ee1ff276fd3cb1575772ae3b9b74316ea9a7a7e2e48ffe9a
|
data/README.md
CHANGED
@@ -3,11 +3,10 @@
|
|
3
3
|

|
4
4
|
[](https://rubygems.org/gems/libssw)
|
5
5
|
[](https://rubydoc.info/gems/libssw)
|
6
|
+
[](https://zenodo.org/badge/latestdoi/328163622)
|
6
7
|
|
7
8
|
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
8
9
|
|
9
|
-
:construction: Under development.
|
10
|
-
|
11
10
|
## Installation
|
12
11
|
|
13
12
|
```ssh
|
@@ -24,41 +23,42 @@ export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
|
|
24
23
|
|
25
24
|
### Installing from source
|
26
25
|
|
27
|
-
When installing from source code using the following steps, the shared library `libssw.so` will be packed in the Ruby gem. In this case, the environment variable LIBSSWDIR is not required.
|
26
|
+
When installing from source code using the following steps, the shared library `libssw.so` or `libssw.dylib` will be packed in the Ruby gem. In this case, the environment variable `LIBSSWDIR` is not required.
|
28
27
|
|
29
28
|
```sh
|
30
29
|
git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
|
31
|
-
bundle exec rake libssw:
|
30
|
+
bundle exec rake libssw:build
|
32
31
|
bundle exec rake install
|
33
32
|
```
|
34
33
|
|
34
|
+
ruby-libssw does not support Windows.
|
35
|
+
|
35
36
|
## Usage
|
36
37
|
|
37
38
|
```ruby
|
38
39
|
require 'libssw'
|
39
40
|
|
40
|
-
SSW = LibSSW
|
41
|
-
|
42
41
|
ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
|
43
|
-
ref_int = SSW.
|
42
|
+
ref_int = SSW::DNA.to_int_array(ref_str)
|
44
43
|
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
45
44
|
|
46
45
|
read_str1 = "ACGTT"
|
47
|
-
read_str2 = SSW.
|
48
|
-
|
46
|
+
read_str2 = SSW::DNA.revcomp(read_str1)
|
47
|
+
# "AACGT"
|
48
|
+
read_int1 = SSW::DNA.to_int_array(read_str1)
|
49
49
|
# [0, 1, 2, 3, 3]
|
50
|
-
read_int2 = SSW.
|
50
|
+
read_int2 = SSW::DNA.to_int_array(read_str2)
|
51
51
|
# [0, 0, 1, 2, 3]
|
52
52
|
|
53
|
-
mat = SSW.create_scoring_matrix(SSW::
|
53
|
+
mat = SSW.create_scoring_matrix(SSW::DNA::Elements, 2, -2)
|
54
54
|
# mat = [2, -2, -2, -2, 0,
|
55
55
|
# -2, 2, -2, -2, 0,
|
56
56
|
# -2, -2, 2, -2, 0,
|
57
57
|
# -2, -2, -2, 2, 0,
|
58
58
|
# 0, 0, 0, 0, 0]
|
59
59
|
|
60
|
-
profile1 =
|
61
|
-
align1 =
|
60
|
+
profile1 = SSW.init(read_int1, mat)
|
61
|
+
align1 = SSW.align(profile1, ref_int, 3, 1, 1, 0, 0)
|
62
62
|
pp align1.to_h
|
63
63
|
# {
|
64
64
|
# :score1 => 10,
|
@@ -73,8 +73,8 @@ pp align1.to_h
|
|
73
73
|
# :cigar_string => "5M"
|
74
74
|
# }
|
75
75
|
|
76
|
-
profile2 =
|
77
|
-
align2
|
76
|
+
profile2 = SSW.init(read_int2, mat)
|
77
|
+
align2 = SSW.align(profile2, ref_int, 3, 1, 1, 0, 0)
|
78
78
|
pp align2.to_h
|
79
79
|
# {
|
80
80
|
# :score1 => 10,
|
@@ -88,24 +88,65 @@ pp align2.to_h
|
|
88
88
|
# :cigar_len => 1,
|
89
89
|
# :cigar_string => "5M"
|
90
90
|
# }
|
91
|
-
```
|
92
91
|
|
92
|
+
puts SSW.build_path(read_str1, ref_str, align1)
|
93
|
+
# 5M
|
94
|
+
# ACGTT
|
95
|
+
# |||||
|
96
|
+
# ACGTT
|
97
|
+
```
|
93
98
|
|
94
|
-
##
|
95
|
-
|
96
|
-
|
99
|
+
## APIs
|
100
|
+
|
101
|
+
See [API Documentation](https://rubydoc.info/gems/libssw).
|
102
|
+
|
103
|
+
```markdown
|
104
|
+
* SSW module
|
105
|
+
* SSW.init
|
106
|
+
* SSW.init_destroy
|
107
|
+
* SSW.align
|
108
|
+
* SSW.align_destroy
|
109
|
+
* SSW.mark_mismatch
|
110
|
+
* SSW.create_scoring_matrix
|
111
|
+
* SSW.build_path
|
112
|
+
|
113
|
+
* Profile class
|
114
|
+
* attributes
|
115
|
+
* read, mat, read_len, n, bias
|
116
|
+
|
117
|
+
* Align class
|
118
|
+
* attributes
|
119
|
+
* score1, score2, ref_begin1, ref_end1, read_begin1, read_end1, ref_end2
|
120
|
+
cigar, cigar_len, cigar_string
|
121
|
+
|
122
|
+
* DNA module
|
123
|
+
* DNA.to_int_array
|
124
|
+
* DNA.from_int_array
|
125
|
+
* revcomp
|
126
|
+
|
127
|
+
* AASeq module
|
128
|
+
* AASeq.to_int_array
|
129
|
+
* AASeq.from_int_array
|
130
|
+
|
131
|
+
* BLOSUM62
|
132
|
+
* BLOSUM50
|
133
|
+
```
|
97
134
|
|
98
135
|
## Development
|
99
136
|
|
100
137
|
```sh
|
101
138
|
git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
|
102
|
-
bundle exec rake libssw:
|
139
|
+
bundle exec rake libssw:build
|
103
140
|
bundle exec rake test
|
104
141
|
```
|
105
142
|
|
106
143
|
## Contributing
|
107
144
|
|
108
|
-
|
145
|
+
* [Report bugs](https://github.com/kojix2/ruby-libssw/issues)
|
146
|
+
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-libssw/pulls)
|
147
|
+
* Write, clarify, or fix documentation
|
148
|
+
* English corrections are welcome
|
149
|
+
* Suggest or add new features
|
109
150
|
|
110
151
|
## License
|
111
152
|
|
data/lib/libssw.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
5
|
-
require_relative '
|
3
|
+
require_relative 'ssw/version'
|
4
|
+
require_relative 'ssw/BLOSUM50'
|
5
|
+
require_relative 'ssw/BLOSUM62'
|
6
|
+
require_relative 'ssw/dna'
|
7
|
+
require_relative 'ssw/aaseq'
|
6
8
|
|
7
|
-
module
|
9
|
+
module SSW
|
8
10
|
class Error < StandardError; end
|
9
11
|
|
10
12
|
class << self
|
@@ -26,67 +28,9 @@ module LibSSW
|
|
26
28
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
27
29
|
end
|
28
30
|
|
29
|
-
require_relative '
|
30
|
-
require_relative '
|
31
|
-
require_relative '
|
32
|
-
|
33
|
-
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
34
|
-
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
35
|
-
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
|
36
|
-
|
37
|
-
AA2INT = { 'A' => 0, 'a' => 0,
|
38
|
-
'R' => 1, 'r' => 1,
|
39
|
-
'N' => 2, 'n' => 2,
|
40
|
-
'D' => 3, 'd' => 3,
|
41
|
-
'C' => 4, 'c' => 4,
|
42
|
-
'Q' => 5, 'q' => 5,
|
43
|
-
'E' => 6, 'e' => 6,
|
44
|
-
'G' => 7, 'g' => 7,
|
45
|
-
'H' => 8, 'h' => 8,
|
46
|
-
'I' => 9, 'i' => 9,
|
47
|
-
'L' => 10, 'l' => 10,
|
48
|
-
'K' => 11, 'k' => 11,
|
49
|
-
'M' => 12, 'm' => 12,
|
50
|
-
'F' => 13, 'f' => 13,
|
51
|
-
'P' => 14, 'p' => 14,
|
52
|
-
'S' => 15, 's' => 15,
|
53
|
-
'T' => 16, 't' => 16,
|
54
|
-
'W' => 17, 'w' => 17,
|
55
|
-
'Y' => 18, 'y' => 18,
|
56
|
-
'V' => 19, 'v' => 19,
|
57
|
-
'B' => 20, 'b' => 20,
|
58
|
-
'Z' => 21, 'z' => 21,
|
59
|
-
'X' => 22, 'x' => 22,
|
60
|
-
'*' => 23 }
|
61
|
-
|
62
|
-
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
63
|
-
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
64
|
-
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
65
|
-
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
66
|
-
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
67
|
-
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
|
68
|
-
|
69
|
-
DNAElements = %w[A C G T N]
|
70
|
-
|
71
|
-
DNA2INT = { 'A' => 0, 'a' => 0,
|
72
|
-
'C' => 1, 'c' => 1,
|
73
|
-
'G' => 2, 'g' => 2,
|
74
|
-
'T' => 3, 't' => 3,
|
75
|
-
'N' => 4, 'n' => 4 }
|
76
|
-
|
77
|
-
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
|
78
|
-
|
79
|
-
# reverse complement
|
80
|
-
DNARC = { 'A' => 'T',
|
81
|
-
'C' => 'G',
|
82
|
-
'G' => 'C',
|
83
|
-
'T' => 'A',
|
84
|
-
'N' => 'N',
|
85
|
-
'a' => 'T',
|
86
|
-
'c' => 'G',
|
87
|
-
'g' => 'C',
|
88
|
-
't' => 'A',
|
89
|
-
'n' => 'N' }
|
31
|
+
require_relative 'ssw/ffi'
|
32
|
+
require_relative 'ssw/profile'
|
33
|
+
require_relative 'ssw/align'
|
90
34
|
|
91
35
|
class << self
|
92
36
|
# Create the query profile using the query sequence.
|
@@ -99,10 +43,14 @@ module LibSSW
|
|
99
43
|
# * if your estimated best alignment score is surely < 255 please set 0;
|
100
44
|
# * if your estimated best alignment score >= 255, please set 1;
|
101
45
|
# * if you don't know, please set 2
|
102
|
-
def
|
46
|
+
def init(read, mat, n = nil, score_size: 2)
|
47
|
+
read = read.to_a
|
48
|
+
mat = mat.to_a.flatten
|
49
|
+
raise ArgumentError, 'Expect class of read to be Array' unless read.is_a?(Array)
|
50
|
+
raise ArgumentError, 'Expect class of mat to be Array' unless mat.is_a?(Array)
|
51
|
+
|
103
52
|
read_str = read.pack('c*')
|
104
53
|
read_len = read.size
|
105
|
-
mat = mat.to_a.flatten
|
106
54
|
n = Math.sqrt(mat.size) if n.nil?
|
107
55
|
raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
|
108
56
|
|
@@ -126,20 +74,24 @@ module LibSSW
|
|
126
74
|
ptr.instance_variable_set(:@n, n)
|
127
75
|
ptr.instance_variable_set(:@score_size, score_size)
|
128
76
|
|
129
|
-
|
77
|
+
SSW::Profile.new(ptr)
|
130
78
|
end
|
131
79
|
|
132
80
|
# Release the memory allocated by function ssw_init.
|
133
|
-
# @param p [Fiddle::Pointer,
|
81
|
+
# @param p [Fiddle::Pointer, SSW::Profile, SSW::FFI::Profile]
|
134
82
|
# pointer to the query profile structure
|
135
83
|
# @note Ruby has garbage collection, so there is not much reason to call
|
136
84
|
# this method.
|
137
85
|
def init_destroy(profile)
|
86
|
+
unless profile.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
|
87
|
+
raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
|
88
|
+
end
|
89
|
+
|
138
90
|
FFI.init_destroy(profile)
|
139
91
|
end
|
140
92
|
|
141
93
|
# Do Striped Smith-Waterman alignment.
|
142
|
-
# @param prof [Fiddle::Pointer,
|
94
|
+
# @param prof [Fiddle::Pointer, SSW::Profile, SSW::FFI::Profile]
|
143
95
|
# pointer to the query profile structure
|
144
96
|
# @param ref [Array]
|
145
97
|
# target sequence;
|
@@ -181,9 +133,15 @@ module LibSSW
|
|
181
133
|
# SSW C library masks the reference loci nearby (mask length = maskLen)
|
182
134
|
# the best alignment ending position and locates the second largest score
|
183
135
|
# from the unmasked elements.
|
184
|
-
def
|
136
|
+
def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil)
|
137
|
+
unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
|
138
|
+
raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
|
139
|
+
end
|
140
|
+
raise ArgumentError, 'Expect class of ref to be Array' unless ref.is_a?(Array)
|
141
|
+
|
185
142
|
ref_str = ref.pack('c*')
|
186
143
|
ref_len = ref.size
|
144
|
+
mask_len ||= [ref_len / 2, 15].max
|
187
145
|
ptr = FFI.ssw_align(
|
188
146
|
prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
189
147
|
)
|
@@ -192,14 +150,21 @@ module LibSSW
|
|
192
150
|
# For example: instance_variable_set(:@ref_str, ref_str)
|
193
151
|
#
|
194
152
|
# ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
|
195
|
-
|
153
|
+
SSW::Align.new(ptr)
|
196
154
|
end
|
197
155
|
|
198
156
|
# Release the memory allocated by function ssw_align.
|
199
|
-
# @param a [Fiddle::Pointer,
|
157
|
+
# @param a [Fiddle::Pointer, SSW::Align, SSW::FFI::Align]
|
200
158
|
# pointer to the alignment result structure
|
201
159
|
def align_destroy(align)
|
202
|
-
|
160
|
+
if align.is_a?(Align)
|
161
|
+
warn "You don't need to call this method for Ruby's Align class."
|
162
|
+
nil
|
163
|
+
elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr)
|
164
|
+
FFI.align_destroy(align)
|
165
|
+
else
|
166
|
+
raise ArgumentError, 'Expect class of filename to be Pointer'
|
167
|
+
end
|
203
168
|
end
|
204
169
|
|
205
170
|
# 1. Calculate the number of mismatches.
|
@@ -231,17 +196,6 @@ module LibSSW
|
|
231
196
|
)
|
232
197
|
end
|
233
198
|
|
234
|
-
def array_to_cigar_string(arr)
|
235
|
-
cigar_string = String.new
|
236
|
-
arr.each do |x|
|
237
|
-
n = x >> 4
|
238
|
-
m = x & 15
|
239
|
-
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
240
|
-
cigar_string << n.to_s << c
|
241
|
-
end
|
242
|
-
cigar_string
|
243
|
-
end
|
244
|
-
|
245
199
|
# Create scoring matrix of Smith-Waterman algrithum.
|
246
200
|
# @param [Array] elements
|
247
201
|
# @param [Integer] match_score
|
@@ -258,44 +212,37 @@ module LibSSW
|
|
258
212
|
score
|
259
213
|
end
|
260
214
|
|
261
|
-
#
|
262
|
-
def
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
215
|
+
# TODO: fix variable names
|
216
|
+
def build_path(q_seq, r_seq, align)
|
217
|
+
sQ = ''
|
218
|
+
sA = ''
|
219
|
+
sR = ''
|
220
|
+
q_off = align.read_begin1
|
221
|
+
r_off = align.ref_begin1
|
222
|
+
align.cigar.each do |x|
|
223
|
+
n = x >> 4
|
224
|
+
m = x & 15
|
225
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
226
|
+
case c
|
227
|
+
when 'M'
|
228
|
+
sQ += q_seq[q_off...(q_off + n)]
|
229
|
+
sA += Array.new(n) { |j| q_seq[q_off + j] == r_seq[r_off + j] ? '|' : '*' }.join
|
230
|
+
sR += r_seq[r_off...(r_off + n)]
|
231
|
+
q_off += n
|
232
|
+
r_off += n
|
233
|
+
when 'I'
|
234
|
+
sQ += q_seq[q_off...(q_off + n)]
|
235
|
+
sA += ' ' * n
|
236
|
+
sR += ' ' * n
|
237
|
+
q_off += n
|
238
|
+
when 'D'
|
239
|
+
sQ += ' ' * n
|
240
|
+
sA += ' ' * n
|
241
|
+
sR += r_seq[r_off...(r_off + n)]
|
242
|
+
r_off += n
|
243
|
+
end
|
290
244
|
end
|
291
|
-
|
292
|
-
|
293
|
-
def int_array_to_aaseq(arr)
|
294
|
-
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
295
|
-
|
296
|
-
arr.map do |i|
|
297
|
-
INT2AA[i] || '*'
|
298
|
-
end.join
|
245
|
+
[align.cigar_string, sQ, sA, sR]
|
299
246
|
end
|
300
247
|
end
|
301
248
|
end
|
data/lib/ssw/aaseq.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SSW
|
4
|
+
module AASeq
|
5
|
+
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
6
|
+
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
7
|
+
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*'].freeze
|
8
|
+
|
9
|
+
AA2INT = { 'A' => 0, 'a' => 0,
|
10
|
+
'R' => 1, 'r' => 1,
|
11
|
+
'N' => 2, 'n' => 2,
|
12
|
+
'D' => 3, 'd' => 3,
|
13
|
+
'C' => 4, 'c' => 4,
|
14
|
+
'Q' => 5, 'q' => 5,
|
15
|
+
'E' => 6, 'e' => 6,
|
16
|
+
'G' => 7, 'g' => 7,
|
17
|
+
'H' => 8, 'h' => 8,
|
18
|
+
'I' => 9, 'i' => 9,
|
19
|
+
'L' => 10, 'l' => 10,
|
20
|
+
'K' => 11, 'k' => 11,
|
21
|
+
'M' => 12, 'm' => 12,
|
22
|
+
'F' => 13, 'f' => 13,
|
23
|
+
'P' => 14, 'p' => 14,
|
24
|
+
'S' => 15, 's' => 15,
|
25
|
+
'T' => 16, 't' => 16,
|
26
|
+
'W' => 17, 'w' => 17,
|
27
|
+
'Y' => 18, 'y' => 18,
|
28
|
+
'V' => 19, 'v' => 19,
|
29
|
+
'B' => 20, 'b' => 20,
|
30
|
+
'Z' => 21, 'z' => 21,
|
31
|
+
'X' => 22, 'x' => 22,
|
32
|
+
'*' => 23 }.freeze
|
33
|
+
|
34
|
+
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
35
|
+
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
36
|
+
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
37
|
+
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
38
|
+
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
39
|
+
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }.freeze
|
40
|
+
|
41
|
+
module_function
|
42
|
+
|
43
|
+
def to_int_array(seq)
|
44
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
45
|
+
|
46
|
+
seq.each_char.map do |base|
|
47
|
+
AA2INT[base] || AA2INT['*']
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def from_int_array(arr)
|
52
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
53
|
+
|
54
|
+
arr.map do |i|
|
55
|
+
INT2AA[i] || '*'
|
56
|
+
end.join
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/{libssw → ssw}/align.rb
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module SSW
|
4
4
|
# structure of the alignment result
|
5
5
|
# @!attribute score1
|
6
6
|
# @return [Integer] the best alignment score
|
@@ -28,7 +28,9 @@ module LibSSW
|
|
28
28
|
# @!attribute cigar_len
|
29
29
|
# @return [Integer]
|
30
30
|
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
31
|
-
|
31
|
+
# @!attribute cigar_string
|
32
|
+
# @return [String] cigar string
|
33
|
+
class Align
|
32
34
|
def self.keys
|
33
35
|
%i[score1 score2 ref_begin1 ref_end1
|
34
36
|
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
@@ -50,12 +52,25 @@ module LibSSW
|
|
50
52
|
@cigar_len = align.cigarLen
|
51
53
|
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
52
54
|
# Attributes for ruby binding only
|
53
|
-
@cigar_string =
|
54
|
-
|
55
|
+
@cigar_string = array_to_cigar_string(@cigar)
|
56
|
+
SSW.align_destroy(ptr)
|
55
57
|
end
|
56
58
|
|
57
59
|
def to_h
|
58
60
|
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
59
61
|
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def array_to_cigar_string(arr)
|
66
|
+
cigar_string = String.new
|
67
|
+
arr.each do |x|
|
68
|
+
n = x >> 4
|
69
|
+
m = x & 15
|
70
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
71
|
+
cigar_string << n.to_s << c
|
72
|
+
end
|
73
|
+
cigar_string
|
74
|
+
end
|
60
75
|
end
|
61
76
|
end
|
data/lib/ssw/dna.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SSW
|
4
|
+
module DNA
|
5
|
+
Elements = %w[A C G T N].freeze
|
6
|
+
|
7
|
+
DNA2INT = { 'A' => 0, 'a' => 0,
|
8
|
+
'C' => 1, 'c' => 1,
|
9
|
+
'G' => 2, 'g' => 2,
|
10
|
+
'T' => 3, 't' => 3,
|
11
|
+
'N' => 4, 'n' => 4 }.freeze
|
12
|
+
|
13
|
+
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }.freeze
|
14
|
+
|
15
|
+
# reverse complement
|
16
|
+
DNARC = { 'A' => 'T',
|
17
|
+
'C' => 'G',
|
18
|
+
'G' => 'C',
|
19
|
+
'T' => 'A',
|
20
|
+
'N' => 'N',
|
21
|
+
'a' => 'T',
|
22
|
+
'c' => 'G',
|
23
|
+
'g' => 'C',
|
24
|
+
't' => 'A',
|
25
|
+
'n' => 'N' }.freeze
|
26
|
+
|
27
|
+
module_function
|
28
|
+
|
29
|
+
# @param [String] seq
|
30
|
+
def to_int_array(seq)
|
31
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
32
|
+
|
33
|
+
seq.each_char.map do |base|
|
34
|
+
DNA2INT[base] || DNA2INT['N']
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param [Array] int array
|
39
|
+
def from_int_array(arr)
|
40
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
41
|
+
|
42
|
+
arr.map do |i|
|
43
|
+
INT2DNA[i] || 'N'
|
44
|
+
end.join
|
45
|
+
end
|
46
|
+
|
47
|
+
def revcomp(seq)
|
48
|
+
seq.each_char.map do |base|
|
49
|
+
DNARC[base]
|
50
|
+
end.join.reverse
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/{libssw → ssw}/ffi.rb
RENAMED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
require 'fiddle/import'
|
4
4
|
|
5
|
-
module
|
5
|
+
module SSW
|
6
6
|
module FFI
|
7
7
|
extend Fiddle::Importer
|
8
8
|
|
9
9
|
begin
|
10
|
-
dlload
|
10
|
+
dlload SSW.ffi_lib
|
11
11
|
rescue LoadError => e
|
12
12
|
raise LoadError, "Could not find libssw shared library. \n#{e}"
|
13
13
|
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module SSW
|
4
4
|
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
5
5
|
# @!attribute read
|
6
6
|
# @!attribute mat
|
7
7
|
# @!attribute read_len
|
8
8
|
# @!attribute n
|
9
9
|
# @!attribute bias
|
10
|
-
class Profile
|
10
|
+
class Profile
|
11
11
|
def self.keys
|
12
12
|
%i[read mat read_len n bias]
|
13
13
|
end
|
@@ -17,7 +17,7 @@ module LibSSW
|
|
17
17
|
|
18
18
|
def initialize(ptr)
|
19
19
|
@ptr = ptr
|
20
|
-
@cstruct = profile =
|
20
|
+
@cstruct = profile = SSW::FFI::Profile.new(ptr)
|
21
21
|
@read_len = profile.readLen
|
22
22
|
@read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
|
23
23
|
@n = profile.n
|
data/vendor/libssw.so
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fiddle
|
@@ -120,12 +120,15 @@ files:
|
|
120
120
|
- README.md
|
121
121
|
- exe/rbssw
|
122
122
|
- lib/libssw.rb
|
123
|
-
- lib/
|
124
|
-
- lib/
|
125
|
-
- lib/
|
126
|
-
- lib/
|
127
|
-
- lib/
|
128
|
-
- lib/
|
123
|
+
- lib/ssw/BLOSUM50.rb
|
124
|
+
- lib/ssw/BLOSUM62.rb
|
125
|
+
- lib/ssw/aaseq.rb
|
126
|
+
- lib/ssw/align.rb
|
127
|
+
- lib/ssw/dna.rb
|
128
|
+
- lib/ssw/ffi.rb
|
129
|
+
- lib/ssw/profile.rb
|
130
|
+
- lib/ssw/version.rb
|
131
|
+
- vendor/libssw.so
|
129
132
|
homepage: https://github.com/kojix2/ruby-libssw
|
130
133
|
licenses:
|
131
134
|
- MIT
|