libssw 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +62 -21
- data/lib/libssw.rb +69 -122
- data/lib/{libssw → ssw}/BLOSUM50.rb +1 -1
- data/lib/{libssw → ssw}/BLOSUM62.rb +1 -1
- data/lib/ssw/aaseq.rb +59 -0
- data/lib/{libssw → ssw}/align.rb +19 -4
- data/lib/ssw/dna.rb +53 -0
- data/lib/{libssw → ssw}/ffi.rb +2 -2
- data/lib/{libssw → ssw}/profile.rb +3 -3
- data/lib/{libssw → ssw}/version.rb +2 -2
- data/vendor/libssw.so +0 -0
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 010f0923d209dc9f0d3fde047c5cfbbb176e57239ff79e236a1f69f3b634573f
|
4
|
+
data.tar.gz: 9e08dc3fb6b03405eaa97ac6a65743bd61fc49d8fc28de6a32b737243a365236
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cf4524856aba84e141dd6b6e727e671a01ac907c1d1ad4b33907a16385a499024f235b7f982e66244af07af42e60b1efe0c093281bb433147fb594f887930e7
|
7
|
+
data.tar.gz: 275a528ded7a99cfac5d8d45487e27f290bd7b252cac7ddb141d1a03218b72042e0b6c54c92a7c39ee1ff276fd3cb1575772ae3b9b74316ea9a7a7e2e48ffe9a
|
data/README.md
CHANGED
@@ -3,11 +3,10 @@
|
|
3
3
|
![test](https://github.com/kojix2/ruby-libssw/workflows/CI/badge.svg)
|
4
4
|
[![Gem Version](https://img.shields.io/gem/v/libssw?color=brightgreen)](https://rubygems.org/gems/libssw)
|
5
5
|
[![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://rubydoc.info/gems/libssw)
|
6
|
+
[![DOI](https://zenodo.org/badge/328163622.svg)](https://zenodo.org/badge/latestdoi/328163622)
|
6
7
|
|
7
8
|
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
8
9
|
|
9
|
-
:construction: Under development.
|
10
|
-
|
11
10
|
## Installation
|
12
11
|
|
13
12
|
```ssh
|
@@ -24,41 +23,42 @@ export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
|
|
24
23
|
|
25
24
|
### Installing from source
|
26
25
|
|
27
|
-
When installing from source code using the following steps, the shared library `libssw.so` will be packed in the Ruby gem. In this case, the environment variable LIBSSWDIR is not required.
|
26
|
+
When installing from source code using the following steps, the shared library `libssw.so` or `libssw.dylib` will be packed in the Ruby gem. In this case, the environment variable `LIBSSWDIR` is not required.
|
28
27
|
|
29
28
|
```sh
|
30
29
|
git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
|
31
|
-
bundle exec rake libssw:
|
30
|
+
bundle exec rake libssw:build
|
32
31
|
bundle exec rake install
|
33
32
|
```
|
34
33
|
|
34
|
+
ruby-libssw does not support Windows.
|
35
|
+
|
35
36
|
## Usage
|
36
37
|
|
37
38
|
```ruby
|
38
39
|
require 'libssw'
|
39
40
|
|
40
|
-
SSW = LibSSW
|
41
|
-
|
42
41
|
ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
|
43
|
-
ref_int = SSW.
|
42
|
+
ref_int = SSW::DNA.to_int_array(ref_str)
|
44
43
|
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
45
44
|
|
46
45
|
read_str1 = "ACGTT"
|
47
|
-
read_str2 = SSW.
|
48
|
-
|
46
|
+
read_str2 = SSW::DNA.revcomp(read_str1)
|
47
|
+
# "AACGT"
|
48
|
+
read_int1 = SSW::DNA.to_int_array(read_str1)
|
49
49
|
# [0, 1, 2, 3, 3]
|
50
|
-
read_int2 = SSW.
|
50
|
+
read_int2 = SSW::DNA.to_int_array(read_str2)
|
51
51
|
# [0, 0, 1, 2, 3]
|
52
52
|
|
53
|
-
mat = SSW.create_scoring_matrix(SSW::
|
53
|
+
mat = SSW.create_scoring_matrix(SSW::DNA::Elements, 2, -2)
|
54
54
|
# mat = [2, -2, -2, -2, 0,
|
55
55
|
# -2, 2, -2, -2, 0,
|
56
56
|
# -2, -2, 2, -2, 0,
|
57
57
|
# -2, -2, -2, 2, 0,
|
58
58
|
# 0, 0, 0, 0, 0]
|
59
59
|
|
60
|
-
profile1 =
|
61
|
-
align1 =
|
60
|
+
profile1 = SSW.init(read_int1, mat)
|
61
|
+
align1 = SSW.align(profile1, ref_int, 3, 1, 1, 0, 0)
|
62
62
|
pp align1.to_h
|
63
63
|
# {
|
64
64
|
# :score1 => 10,
|
@@ -73,8 +73,8 @@ pp align1.to_h
|
|
73
73
|
# :cigar_string => "5M"
|
74
74
|
# }
|
75
75
|
|
76
|
-
profile2 =
|
77
|
-
align2
|
76
|
+
profile2 = SSW.init(read_int2, mat)
|
77
|
+
align2 = SSW.align(profile2, ref_int, 3, 1, 1, 0, 0)
|
78
78
|
pp align2.to_h
|
79
79
|
# {
|
80
80
|
# :score1 => 10,
|
@@ -88,24 +88,65 @@ pp align2.to_h
|
|
88
88
|
# :cigar_len => 1,
|
89
89
|
# :cigar_string => "5M"
|
90
90
|
# }
|
91
|
-
```
|
92
91
|
|
92
|
+
puts SSW.build_path(read_str1, ref_str, align1)
|
93
|
+
# 5M
|
94
|
+
# ACGTT
|
95
|
+
# |||||
|
96
|
+
# ACGTT
|
97
|
+
```
|
93
98
|
|
94
|
-
##
|
95
|
-
|
96
|
-
|
99
|
+
## APIs
|
100
|
+
|
101
|
+
See [API Documentation](https://rubydoc.info/gems/libssw).
|
102
|
+
|
103
|
+
```markdown
|
104
|
+
* SSW module
|
105
|
+
* SSW.init
|
106
|
+
* SSW.init_destroy
|
107
|
+
* SSW.align
|
108
|
+
* SSW.align_destroy
|
109
|
+
* SSW.mark_mismatch
|
110
|
+
* SSW.create_scoring_matrix
|
111
|
+
* SSW.build_path
|
112
|
+
|
113
|
+
* Profile class
|
114
|
+
* attributes
|
115
|
+
* read, mat, read_len, n, bias
|
116
|
+
|
117
|
+
* Align class
|
118
|
+
* attributes
|
119
|
+
* score1, score2, ref_begin1, ref_end1, read_begin1, read_end1, ref_end2
|
120
|
+
cigar, cigar_len, cigar_string
|
121
|
+
|
122
|
+
* DNA module
|
123
|
+
* DNA.to_int_array
|
124
|
+
* DNA.from_int_array
|
125
|
+
* revcomp
|
126
|
+
|
127
|
+
* AASeq module
|
128
|
+
* AASeq.to_int_array
|
129
|
+
* AASeq.from_int_array
|
130
|
+
|
131
|
+
* BLOSUM62
|
132
|
+
* BLOSUM50
|
133
|
+
```
|
97
134
|
|
98
135
|
## Development
|
99
136
|
|
100
137
|
```sh
|
101
138
|
git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
|
102
|
-
bundle exec rake libssw:
|
139
|
+
bundle exec rake libssw:build
|
103
140
|
bundle exec rake test
|
104
141
|
```
|
105
142
|
|
106
143
|
## Contributing
|
107
144
|
|
108
|
-
|
145
|
+
* [Report bugs](https://github.com/kojix2/ruby-libssw/issues)
|
146
|
+
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-libssw/pulls)
|
147
|
+
* Write, clarify, or fix documentation
|
148
|
+
* English corrections are welcome
|
149
|
+
* Suggest or add new features
|
109
150
|
|
110
151
|
## License
|
111
152
|
|
data/lib/libssw.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
5
|
-
require_relative '
|
3
|
+
require_relative 'ssw/version'
|
4
|
+
require_relative 'ssw/BLOSUM50'
|
5
|
+
require_relative 'ssw/BLOSUM62'
|
6
|
+
require_relative 'ssw/dna'
|
7
|
+
require_relative 'ssw/aaseq'
|
6
8
|
|
7
|
-
module
|
9
|
+
module SSW
|
8
10
|
class Error < StandardError; end
|
9
11
|
|
10
12
|
class << self
|
@@ -26,67 +28,9 @@ module LibSSW
|
|
26
28
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
27
29
|
end
|
28
30
|
|
29
|
-
require_relative '
|
30
|
-
require_relative '
|
31
|
-
require_relative '
|
32
|
-
|
33
|
-
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
34
|
-
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
35
|
-
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
|
36
|
-
|
37
|
-
AA2INT = { 'A' => 0, 'a' => 0,
|
38
|
-
'R' => 1, 'r' => 1,
|
39
|
-
'N' => 2, 'n' => 2,
|
40
|
-
'D' => 3, 'd' => 3,
|
41
|
-
'C' => 4, 'c' => 4,
|
42
|
-
'Q' => 5, 'q' => 5,
|
43
|
-
'E' => 6, 'e' => 6,
|
44
|
-
'G' => 7, 'g' => 7,
|
45
|
-
'H' => 8, 'h' => 8,
|
46
|
-
'I' => 9, 'i' => 9,
|
47
|
-
'L' => 10, 'l' => 10,
|
48
|
-
'K' => 11, 'k' => 11,
|
49
|
-
'M' => 12, 'm' => 12,
|
50
|
-
'F' => 13, 'f' => 13,
|
51
|
-
'P' => 14, 'p' => 14,
|
52
|
-
'S' => 15, 's' => 15,
|
53
|
-
'T' => 16, 't' => 16,
|
54
|
-
'W' => 17, 'w' => 17,
|
55
|
-
'Y' => 18, 'y' => 18,
|
56
|
-
'V' => 19, 'v' => 19,
|
57
|
-
'B' => 20, 'b' => 20,
|
58
|
-
'Z' => 21, 'z' => 21,
|
59
|
-
'X' => 22, 'x' => 22,
|
60
|
-
'*' => 23 }
|
61
|
-
|
62
|
-
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
63
|
-
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
64
|
-
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
65
|
-
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
66
|
-
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
67
|
-
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
|
68
|
-
|
69
|
-
DNAElements = %w[A C G T N]
|
70
|
-
|
71
|
-
DNA2INT = { 'A' => 0, 'a' => 0,
|
72
|
-
'C' => 1, 'c' => 1,
|
73
|
-
'G' => 2, 'g' => 2,
|
74
|
-
'T' => 3, 't' => 3,
|
75
|
-
'N' => 4, 'n' => 4 }
|
76
|
-
|
77
|
-
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
|
78
|
-
|
79
|
-
# reverse complement
|
80
|
-
DNARC = { 'A' => 'T',
|
81
|
-
'C' => 'G',
|
82
|
-
'G' => 'C',
|
83
|
-
'T' => 'A',
|
84
|
-
'N' => 'N',
|
85
|
-
'a' => 'T',
|
86
|
-
'c' => 'G',
|
87
|
-
'g' => 'C',
|
88
|
-
't' => 'A',
|
89
|
-
'n' => 'N' }
|
31
|
+
require_relative 'ssw/ffi'
|
32
|
+
require_relative 'ssw/profile'
|
33
|
+
require_relative 'ssw/align'
|
90
34
|
|
91
35
|
class << self
|
92
36
|
# Create the query profile using the query sequence.
|
@@ -99,10 +43,14 @@ module LibSSW
|
|
99
43
|
# * if your estimated best alignment score is surely < 255 please set 0;
|
100
44
|
# * if your estimated best alignment score >= 255, please set 1;
|
101
45
|
# * if you don't know, please set 2
|
102
|
-
def
|
46
|
+
def init(read, mat, n = nil, score_size: 2)
|
47
|
+
read = read.to_a
|
48
|
+
mat = mat.to_a.flatten
|
49
|
+
raise ArgumentError, 'Expect class of read to be Array' unless read.is_a?(Array)
|
50
|
+
raise ArgumentError, 'Expect class of mat to be Array' unless mat.is_a?(Array)
|
51
|
+
|
103
52
|
read_str = read.pack('c*')
|
104
53
|
read_len = read.size
|
105
|
-
mat = mat.to_a.flatten
|
106
54
|
n = Math.sqrt(mat.size) if n.nil?
|
107
55
|
raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
|
108
56
|
|
@@ -126,20 +74,24 @@ module LibSSW
|
|
126
74
|
ptr.instance_variable_set(:@n, n)
|
127
75
|
ptr.instance_variable_set(:@score_size, score_size)
|
128
76
|
|
129
|
-
|
77
|
+
SSW::Profile.new(ptr)
|
130
78
|
end
|
131
79
|
|
132
80
|
# Release the memory allocated by function ssw_init.
|
133
|
-
# @param p [Fiddle::Pointer,
|
81
|
+
# @param p [Fiddle::Pointer, SSW::Profile, SSW::FFI::Profile]
|
134
82
|
# pointer to the query profile structure
|
135
83
|
# @note Ruby has garbage collection, so there is not much reason to call
|
136
84
|
# this method.
|
137
85
|
def init_destroy(profile)
|
86
|
+
unless profile.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
|
87
|
+
raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
|
88
|
+
end
|
89
|
+
|
138
90
|
FFI.init_destroy(profile)
|
139
91
|
end
|
140
92
|
|
141
93
|
# Do Striped Smith-Waterman alignment.
|
142
|
-
# @param prof [Fiddle::Pointer,
|
94
|
+
# @param prof [Fiddle::Pointer, SSW::Profile, SSW::FFI::Profile]
|
143
95
|
# pointer to the query profile structure
|
144
96
|
# @param ref [Array]
|
145
97
|
# target sequence;
|
@@ -181,9 +133,15 @@ module LibSSW
|
|
181
133
|
# SSW C library masks the reference loci nearby (mask length = maskLen)
|
182
134
|
# the best alignment ending position and locates the second largest score
|
183
135
|
# from the unmasked elements.
|
184
|
-
def
|
136
|
+
def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil)
|
137
|
+
unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
|
138
|
+
raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
|
139
|
+
end
|
140
|
+
raise ArgumentError, 'Expect class of ref to be Array' unless ref.is_a?(Array)
|
141
|
+
|
185
142
|
ref_str = ref.pack('c*')
|
186
143
|
ref_len = ref.size
|
144
|
+
mask_len ||= [ref_len / 2, 15].max
|
187
145
|
ptr = FFI.ssw_align(
|
188
146
|
prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
189
147
|
)
|
@@ -192,14 +150,21 @@ module LibSSW
|
|
192
150
|
# For example: instance_variable_set(:@ref_str, ref_str)
|
193
151
|
#
|
194
152
|
# ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
|
195
|
-
|
153
|
+
SSW::Align.new(ptr)
|
196
154
|
end
|
197
155
|
|
198
156
|
# Release the memory allocated by function ssw_align.
|
199
|
-
# @param a [Fiddle::Pointer,
|
157
|
+
# @param a [Fiddle::Pointer, SSW::Align, SSW::FFI::Align]
|
200
158
|
# pointer to the alignment result structure
|
201
159
|
def align_destroy(align)
|
202
|
-
|
160
|
+
if align.is_a?(Align)
|
161
|
+
warn "You don't need to call this method for Ruby's Align class."
|
162
|
+
nil
|
163
|
+
elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr)
|
164
|
+
FFI.align_destroy(align)
|
165
|
+
else
|
166
|
+
raise ArgumentError, 'Expect class of filename to be Pointer'
|
167
|
+
end
|
203
168
|
end
|
204
169
|
|
205
170
|
# 1. Calculate the number of mismatches.
|
@@ -231,17 +196,6 @@ module LibSSW
|
|
231
196
|
)
|
232
197
|
end
|
233
198
|
|
234
|
-
def array_to_cigar_string(arr)
|
235
|
-
cigar_string = String.new
|
236
|
-
arr.each do |x|
|
237
|
-
n = x >> 4
|
238
|
-
m = x & 15
|
239
|
-
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
240
|
-
cigar_string << n.to_s << c
|
241
|
-
end
|
242
|
-
cigar_string
|
243
|
-
end
|
244
|
-
|
245
199
|
# Create scoring matrix of Smith-Waterman algrithum.
|
246
200
|
# @param [Array] elements
|
247
201
|
# @param [Integer] match_score
|
@@ -258,44 +212,37 @@ module LibSSW
|
|
258
212
|
score
|
259
213
|
end
|
260
214
|
|
261
|
-
#
|
262
|
-
def
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
215
|
+
# TODO: fix variable names
|
216
|
+
def build_path(q_seq, r_seq, align)
|
217
|
+
sQ = ''
|
218
|
+
sA = ''
|
219
|
+
sR = ''
|
220
|
+
q_off = align.read_begin1
|
221
|
+
r_off = align.ref_begin1
|
222
|
+
align.cigar.each do |x|
|
223
|
+
n = x >> 4
|
224
|
+
m = x & 15
|
225
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
226
|
+
case c
|
227
|
+
when 'M'
|
228
|
+
sQ += q_seq[q_off...(q_off + n)]
|
229
|
+
sA += Array.new(n) { |j| q_seq[q_off + j] == r_seq[r_off + j] ? '|' : '*' }.join
|
230
|
+
sR += r_seq[r_off...(r_off + n)]
|
231
|
+
q_off += n
|
232
|
+
r_off += n
|
233
|
+
when 'I'
|
234
|
+
sQ += q_seq[q_off...(q_off + n)]
|
235
|
+
sA += ' ' * n
|
236
|
+
sR += ' ' * n
|
237
|
+
q_off += n
|
238
|
+
when 'D'
|
239
|
+
sQ += ' ' * n
|
240
|
+
sA += ' ' * n
|
241
|
+
sR += r_seq[r_off...(r_off + n)]
|
242
|
+
r_off += n
|
243
|
+
end
|
290
244
|
end
|
291
|
-
|
292
|
-
|
293
|
-
def int_array_to_aaseq(arr)
|
294
|
-
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
295
|
-
|
296
|
-
arr.map do |i|
|
297
|
-
INT2AA[i] || '*'
|
298
|
-
end.join
|
245
|
+
[align.cigar_string, sQ, sA, sR]
|
299
246
|
end
|
300
247
|
end
|
301
248
|
end
|
data/lib/ssw/aaseq.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SSW
|
4
|
+
module AASeq
|
5
|
+
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
6
|
+
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
7
|
+
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*'].freeze
|
8
|
+
|
9
|
+
AA2INT = { 'A' => 0, 'a' => 0,
|
10
|
+
'R' => 1, 'r' => 1,
|
11
|
+
'N' => 2, 'n' => 2,
|
12
|
+
'D' => 3, 'd' => 3,
|
13
|
+
'C' => 4, 'c' => 4,
|
14
|
+
'Q' => 5, 'q' => 5,
|
15
|
+
'E' => 6, 'e' => 6,
|
16
|
+
'G' => 7, 'g' => 7,
|
17
|
+
'H' => 8, 'h' => 8,
|
18
|
+
'I' => 9, 'i' => 9,
|
19
|
+
'L' => 10, 'l' => 10,
|
20
|
+
'K' => 11, 'k' => 11,
|
21
|
+
'M' => 12, 'm' => 12,
|
22
|
+
'F' => 13, 'f' => 13,
|
23
|
+
'P' => 14, 'p' => 14,
|
24
|
+
'S' => 15, 's' => 15,
|
25
|
+
'T' => 16, 't' => 16,
|
26
|
+
'W' => 17, 'w' => 17,
|
27
|
+
'Y' => 18, 'y' => 18,
|
28
|
+
'V' => 19, 'v' => 19,
|
29
|
+
'B' => 20, 'b' => 20,
|
30
|
+
'Z' => 21, 'z' => 21,
|
31
|
+
'X' => 22, 'x' => 22,
|
32
|
+
'*' => 23 }.freeze
|
33
|
+
|
34
|
+
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
35
|
+
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
36
|
+
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
37
|
+
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
38
|
+
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
39
|
+
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }.freeze
|
40
|
+
|
41
|
+
module_function
|
42
|
+
|
43
|
+
def to_int_array(seq)
|
44
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
45
|
+
|
46
|
+
seq.each_char.map do |base|
|
47
|
+
AA2INT[base] || AA2INT['*']
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def from_int_array(arr)
|
52
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
53
|
+
|
54
|
+
arr.map do |i|
|
55
|
+
INT2AA[i] || '*'
|
56
|
+
end.join
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/{libssw → ssw}/align.rb
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module SSW
|
4
4
|
# structure of the alignment result
|
5
5
|
# @!attribute score1
|
6
6
|
# @return [Integer] the best alignment score
|
@@ -28,7 +28,9 @@ module LibSSW
|
|
28
28
|
# @!attribute cigar_len
|
29
29
|
# @return [Integer]
|
30
30
|
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
31
|
-
|
31
|
+
# @!attribute cigar_string
|
32
|
+
# @return [String] cigar string
|
33
|
+
class Align
|
32
34
|
def self.keys
|
33
35
|
%i[score1 score2 ref_begin1 ref_end1
|
34
36
|
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
@@ -50,12 +52,25 @@ module LibSSW
|
|
50
52
|
@cigar_len = align.cigarLen
|
51
53
|
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
52
54
|
# Attributes for ruby binding only
|
53
|
-
@cigar_string =
|
54
|
-
|
55
|
+
@cigar_string = array_to_cigar_string(@cigar)
|
56
|
+
SSW.align_destroy(ptr)
|
55
57
|
end
|
56
58
|
|
57
59
|
def to_h
|
58
60
|
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
59
61
|
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def array_to_cigar_string(arr)
|
66
|
+
cigar_string = String.new
|
67
|
+
arr.each do |x|
|
68
|
+
n = x >> 4
|
69
|
+
m = x & 15
|
70
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
71
|
+
cigar_string << n.to_s << c
|
72
|
+
end
|
73
|
+
cigar_string
|
74
|
+
end
|
60
75
|
end
|
61
76
|
end
|
data/lib/ssw/dna.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SSW
|
4
|
+
module DNA
|
5
|
+
Elements = %w[A C G T N].freeze
|
6
|
+
|
7
|
+
DNA2INT = { 'A' => 0, 'a' => 0,
|
8
|
+
'C' => 1, 'c' => 1,
|
9
|
+
'G' => 2, 'g' => 2,
|
10
|
+
'T' => 3, 't' => 3,
|
11
|
+
'N' => 4, 'n' => 4 }.freeze
|
12
|
+
|
13
|
+
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }.freeze
|
14
|
+
|
15
|
+
# reverse complement
|
16
|
+
DNARC = { 'A' => 'T',
|
17
|
+
'C' => 'G',
|
18
|
+
'G' => 'C',
|
19
|
+
'T' => 'A',
|
20
|
+
'N' => 'N',
|
21
|
+
'a' => 'T',
|
22
|
+
'c' => 'G',
|
23
|
+
'g' => 'C',
|
24
|
+
't' => 'A',
|
25
|
+
'n' => 'N' }.freeze
|
26
|
+
|
27
|
+
module_function
|
28
|
+
|
29
|
+
# @param [String] seq
|
30
|
+
def to_int_array(seq)
|
31
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
32
|
+
|
33
|
+
seq.each_char.map do |base|
|
34
|
+
DNA2INT[base] || DNA2INT['N']
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param [Array] int array
|
39
|
+
def from_int_array(arr)
|
40
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
41
|
+
|
42
|
+
arr.map do |i|
|
43
|
+
INT2DNA[i] || 'N'
|
44
|
+
end.join
|
45
|
+
end
|
46
|
+
|
47
|
+
def revcomp(seq)
|
48
|
+
seq.each_char.map do |base|
|
49
|
+
DNARC[base]
|
50
|
+
end.join.reverse
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/{libssw → ssw}/ffi.rb
RENAMED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
require 'fiddle/import'
|
4
4
|
|
5
|
-
module
|
5
|
+
module SSW
|
6
6
|
module FFI
|
7
7
|
extend Fiddle::Importer
|
8
8
|
|
9
9
|
begin
|
10
|
-
dlload
|
10
|
+
dlload SSW.ffi_lib
|
11
11
|
rescue LoadError => e
|
12
12
|
raise LoadError, "Could not find libssw shared library. \n#{e}"
|
13
13
|
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module SSW
|
4
4
|
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
5
5
|
# @!attribute read
|
6
6
|
# @!attribute mat
|
7
7
|
# @!attribute read_len
|
8
8
|
# @!attribute n
|
9
9
|
# @!attribute bias
|
10
|
-
class Profile
|
10
|
+
class Profile
|
11
11
|
def self.keys
|
12
12
|
%i[read mat read_len n bias]
|
13
13
|
end
|
@@ -17,7 +17,7 @@ module LibSSW
|
|
17
17
|
|
18
18
|
def initialize(ptr)
|
19
19
|
@ptr = ptr
|
20
|
-
@cstruct = profile =
|
20
|
+
@cstruct = profile = SSW::FFI::Profile.new(ptr)
|
21
21
|
@read_len = profile.readLen
|
22
22
|
@read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
|
23
23
|
@n = profile.n
|
data/vendor/libssw.so
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fiddle
|
@@ -120,12 +120,15 @@ files:
|
|
120
120
|
- README.md
|
121
121
|
- exe/rbssw
|
122
122
|
- lib/libssw.rb
|
123
|
-
- lib/
|
124
|
-
- lib/
|
125
|
-
- lib/
|
126
|
-
- lib/
|
127
|
-
- lib/
|
128
|
-
- lib/
|
123
|
+
- lib/ssw/BLOSUM50.rb
|
124
|
+
- lib/ssw/BLOSUM62.rb
|
125
|
+
- lib/ssw/aaseq.rb
|
126
|
+
- lib/ssw/align.rb
|
127
|
+
- lib/ssw/dna.rb
|
128
|
+
- lib/ssw/ffi.rb
|
129
|
+
- lib/ssw/profile.rb
|
130
|
+
- lib/ssw/version.rb
|
131
|
+
- vendor/libssw.so
|
129
132
|
homepage: https://github.com/kojix2/ruby-libssw
|
130
133
|
licenses:
|
131
134
|
- MIT
|