libssw 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c888d3c126247f4f397fc151402fd482cdc4c105bd00163d07cc056b0e202607
4
- data.tar.gz: 8717cde76e5ba26034b05c81871b43730580b09dfb610c9507b0c651e6b816a1
3
+ metadata.gz: 182b0d30cdf3d9a93b100b05f96d469d612b09bcfcfd2afc3cb63a4c93501d17
4
+ data.tar.gz: 99ec370125c707acff3a99664706ce8691f2e66e48d22a49af0362894b2c6b72
5
5
  SHA512:
6
- metadata.gz: d10a0cc734b8c53bf506c97ade223269ad6a64ed356f81d08eb9368e031b068ed85c217d3e47a737a303604b57009a00c0aab94a6eea89a62382cf79dbbee326
7
- data.tar.gz: f18a9eddc8455e13a58058294d80f52ed96b31949e90c25494fd48ac7d3f22df4588e86df389160fca973489db36381bdabb67a2858ceeb811e9a8ddba3249cb
6
+ metadata.gz: 8a49bf4924da5d12310b691f9f9335d96cdce0ef4b82ac1a38aeb6293c92d939cd17e4b0e6e99a4c427a0f4074f94db1cca91ca6c26b76a0551e558371b4aee7
7
+ data.tar.gz: 5af24c2124cd53f8aa54dc905172eb6f9b3d7c57b33fb87d1884ca2ab1a90acf0cb1f2ca73cb88eecda223e3f723836cca2dbae9cb0c1ac665d097993645ee19
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # ruby-libssw
2
2
 
3
3
  ![test](https://github.com/kojix2/ruby-libssw/workflows/CI/badge.svg)
4
+ [![Gem Version](https://img.shields.io/gem/v/libssw?color=brightgreen)](https://rubygems.org/gems/libssw)
5
+ [![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://rubydoc.info/gems/libssw)
4
6
 
5
7
  :checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
6
8
 
@@ -35,18 +37,64 @@ bundle exec rake install
35
37
  ```ruby
36
38
  require 'libssw'
37
39
 
38
- ref = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
39
- read = [0, 1, 2, 3, 3]
40
- mat = [2, -2, -2, -2, 0,
41
- -2, 2, -2, -2, 0,
42
- -2, -2, 2, -2, 0,
43
- -2, -2, -2, 2, 0,
44
- 0, 0, 0, 0, 0]
45
- profile = LibSSW.ssw_init(read, mat)
46
- align = LibSSW.ssw_align(profile, ref, 3, 1, 1, 0, 0, 15)
47
- p align.to_h
40
+ SSW = LibSSW
41
+
42
+ ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
43
+ ref_int = SSW.dna_to_int_array(ref_str)
44
+ # [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
45
+
46
+ read_str1 = "ACGTT"
47
+ read_str2 = SSW.dna_complement(read_str1)
48
+ read_int1 = SSW.dna_to_int_array(read_str1)
49
+ # [0, 1, 2, 3, 3]
50
+ read_int2 = SSW.dna_to_int_array(read_str2)
51
+ # [0, 0, 1, 2, 3]
52
+
53
+ mat = SSW.create_scoring_matrix(SSW::DNAElements, 2, -2)
54
+ # mat = [2, -2, -2, -2, 0,
55
+ # -2, 2, -2, -2, 0,
56
+ # -2, -2, 2, -2, 0,
57
+ # -2, -2, -2, 2, 0,
58
+ # 0, 0, 0, 0, 0]
59
+
60
+ profile1 = LibSSW.ssw_init(read_int1, mat)
61
+ align1 = LibSSW.ssw_align(profile1, ref_int, 3, 1, 1, 0, 0, 15)
62
+ pp align1.to_h
63
+ # {
64
+ # :score1 => 10,
65
+ # :score2 => 0,
66
+ # :ref_begin1 => 8,
67
+ # :ref_end1 => 12,
68
+ # :read_begin1 => 0,
69
+ # :read_end1 => 4,
70
+ # :ref_end2 => 0,
71
+ # :cigar => [80],
72
+ # :cigar_len => 1,
73
+ # :cigar_string => "5M"
74
+ # }
75
+
76
+ profile2 = LibSSW.ssw_init(read_int2, mat)
77
+ align2 = LibSSW.ssw_align(profile2, ref_int, 3, 1, 1, 0, 0, 15)
78
+ pp align2.to_h
79
+ # {
80
+ # :score1 => 10,
81
+ # :score2 => 0,
82
+ # :ref_begin1 => 7,
83
+ # :ref_end1 => 11,
84
+ # :read_begin1 => 0,
85
+ # :read_end1 => 4,
86
+ # :ref_end2 => 0,
87
+ # :cigar => [80],
88
+ # :cigar_len => 1,
89
+ # :cigar_string => "5M"
90
+ # }
48
91
  ```
49
92
 
93
+
94
+ ## Documentation
95
+
96
+ * [API Documentation](https://rubydoc.info/gems/libssw)
97
+
50
98
  ## Development
51
99
 
52
100
  ```sh
data/exe/rbssw CHANGED
@@ -99,13 +99,9 @@ lScore = nil
99
99
  if opts[:bprotein]
100
100
  # load AA score matrix
101
101
  if !opts[:smatrix]
102
- lEle = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K',
103
- 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
104
- lEle.each_with_index do |ele, i|
105
- dEle2Int[ele] = i
106
- dEle2Int[ele.downcase] = i
107
- dInt2Ele[i] = ele
108
- end
102
+ lEle = SSW::AAELEMENTS
103
+ dEle2Int = SSW::AA2INT
104
+ dInt2Ele = SSW::INT2AA
109
105
  nEleNum = lEle.size
110
106
  lScore = SSW::Blosum50
111
107
  else
@@ -113,31 +109,12 @@ if opts[:bprotein]
113
109
  end
114
110
  elsif !opts[:smatrix]
115
111
  # init DNA score matrix
116
- lEle = %w[A C G T N]
117
- dRc = { 'A': 'C', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 'C', 'c': 'G', 'g': 'C', 't': 'A' }
118
- lEle.each_with_index do |ele, i|
119
- dEle2Int[ele] = i
120
- dEle2Int[ele.downcase] = i
121
- dInt2Ele[i] = ele
122
- end
123
- # dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
124
- # dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
112
+ lEle = SSW::DNAELEMENTS
113
+ dRc = SSW::DNARC
114
+ dEle2Int = SSW::DNA2INT
115
+ dInt2Ele = SSW::INT2DNA
125
116
  nEleNum = lEle.size # 5
126
- lScore = Array.new(nEleNum**2, 0)
127
- (nEleNum - 1).times do |i|
128
- (nEleNum - 1).times do |j|
129
- lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
130
- opts[:nmatch]
131
- else
132
- -opts[:nmismatch]
133
- end
134
- end
135
- end
136
- # lScore = [ 2, -2, -2, -2, 0,
137
- # -2, 2, -2, -2, 0,
138
- # -2, -2, 2, -2, 0,
139
- # -2, -2, -2, 2, 0,
140
- # 0, 0, 0, 0, 0 ]
117
+ lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
141
118
  end
142
119
 
143
120
  warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
@@ -30,6 +30,64 @@ module LibSSW
30
30
  require_relative 'libssw/profile'
31
31
  require_relative 'libssw/align'
32
32
 
33
+ AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
34
+ 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
35
+ 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
36
+
37
+ AA2INT = { 'A' => 0, 'a' => 0,
38
+ 'R' => 1, 'r' => 1,
39
+ 'N' => 2, 'n' => 2,
40
+ 'D' => 3, 'd' => 3,
41
+ 'C' => 4, 'c' => 4,
42
+ 'Q' => 5, 'q' => 5,
43
+ 'E' => 6, 'e' => 6,
44
+ 'G' => 7, 'g' => 7,
45
+ 'H' => 8, 'h' => 8,
46
+ 'I' => 9, 'i' => 9,
47
+ 'L' => 10, 'l' => 10,
48
+ 'K' => 11, 'k' => 11,
49
+ 'M' => 12, 'm' => 12,
50
+ 'F' => 13, 'f' => 13,
51
+ 'P' => 14, 'p' => 14,
52
+ 'S' => 15, 's' => 15,
53
+ 'T' => 16, 't' => 16,
54
+ 'W' => 17, 'w' => 17,
55
+ 'Y' => 18, 'y' => 18,
56
+ 'V' => 19, 'v' => 19,
57
+ 'B' => 20, 'b' => 20,
58
+ 'Z' => 21, 'z' => 21,
59
+ 'X' => 22, 'x' => 22,
60
+ '*' => 23 }
61
+
62
+ INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
63
+ 4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
64
+ 8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
65
+ 12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
66
+ 16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
67
+ 20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
68
+
69
+ DNAElements = %w[A C G T N]
70
+
71
+ DNA2INT = { 'A' => 0, 'a' => 0,
72
+ 'C' => 1, 'c' => 1,
73
+ 'G' => 2, 'g' => 2,
74
+ 'T' => 3, 't' => 3,
75
+ 'N' => 4, 'n' => 4 }
76
+
77
+ INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
78
+
79
+ # reverse complement
80
+ DNARC = { 'A' => 'T',
81
+ 'C' => 'G',
82
+ 'G' => 'C',
83
+ 'T' => 'A',
84
+ 'N' => 'N',
85
+ 'a' => 'T',
86
+ 'c' => 'G',
87
+ 'g' => 'C',
88
+ 't' => 'A',
89
+ 'n' => 'N' }
90
+
33
91
  class << self
34
92
  # Create the query profile using the query sequence.
35
93
  # @param read [Array] query sequence; the query sequence needs to be numbers
@@ -56,27 +114,19 @@ module LibSSW
56
114
  n,
57
115
  score_size
58
116
  )
59
- profile = LibSSW::Profile.new(ptr)
60
- # Check Garbage Collection
61
- %i[read read_len mat n].zip([read, read_len, mat, n]).each do |name, obj|
62
- next unless profile.public_send(name) != obj
63
-
64
- warn "[Error] Struct member: '#{name}'"
65
- warn " * expected value: #{obj}"
66
- warn " * actual value: #{profile.public_send(name)}"
67
- warn " This may have been caused by Ruby'S GC."
68
- end
69
- # Preventing Garbage Collection --force
70
- cstruct = profile.cstruct
71
- cstruct.read = read_str
72
- cstruct.mat = mat_str
73
- cstruct.readLen = read_len
74
- cstruct.n = n
75
- ptr.instance_variable_set(:@read_str, read_str)
76
- ptr.instance_variable_set(:@read_len, read_len)
77
- ptr.instance_variable_set(:@mat_str, mat_str)
78
- ptr.instance_variable_set(:@n, n)
79
- profile
117
+ # Garbage collection workaround
118
+ #
119
+ # * The following code will cause a segmentation violation when manually
120
+ # releasing memory. The reason is unknown.
121
+ # * func_map is only available in newer versions of fiddle.
122
+ # ptr.free = FFI.instance_variable_get(:@func_map)['init_destroy']
123
+ ptr.instance_variable_set(:@read_str, read_str)
124
+ ptr.instance_variable_set(:@read_len, read_len)
125
+ ptr.instance_variable_set(:@mat_str, mat_str)
126
+ ptr.instance_variable_set(:@n, n)
127
+ ptr.instance_variable_set(:@score_size, score_size)
128
+
129
+ LibSSW::Profile.new(ptr)
80
130
  end
81
131
 
82
132
  # Release the memory allocated by function ssw_init.
@@ -140,6 +190,8 @@ module LibSSW
140
190
  # Not sure yet if we should set the instance variable to the pointer as a
141
191
  # garbage collection workaround.
142
192
  # For example: instance_variable_set(:@ref_str, ref_str)
193
+ #
194
+ # ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
143
195
  LibSSW::Align.new(ptr)
144
196
  end
145
197
 
@@ -189,5 +241,61 @@ module LibSSW
189
241
  end
190
242
  cigar_string
191
243
  end
244
+
245
+ # Create scoring matrix of Smith-Waterman algrithum.
246
+ # @param [Array] elements
247
+ # @param [Integer] match_score
248
+ # @param [Integer] mismatch_score
249
+ def create_scoring_matrix(elements, match_score, mismatch_score)
250
+ size = elements.size
251
+ score = Array.new(size * size, 0)
252
+ (size - 1).times do |i|
253
+ (size - 1).times do |j|
254
+ score[i * size + j] = \
255
+ (elements[i] == elements[j] ? match_score : mismatch_score)
256
+ end
257
+ end
258
+ score
259
+ end
260
+
261
+ # @param [String] seq
262
+ def dna_to_int_array(seq)
263
+ raise ArgumentError, 'seq must be a string' unless seq.is_a? String
264
+
265
+ seq.each_char.map do |base|
266
+ DNA2INT[base] || DNA2INT['N']
267
+ end
268
+ end
269
+
270
+ def dna_complement(seq)
271
+ seq.each_char.map do |base|
272
+ DNARC[base]
273
+ end.join.reverse
274
+ end
275
+
276
+ # @param [Array] int array
277
+ def int_array_to_dna(arr)
278
+ raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
279
+
280
+ arr.map do |i|
281
+ INT2DNA[i] || 'N'
282
+ end.join
283
+ end
284
+
285
+ def aaseq_to_int_array(seq)
286
+ raise ArgumentError, 'seq must be a string' unless seq.is_a? String
287
+
288
+ seq.each_char.map do |base|
289
+ AA2INT[base] || AA2INT['*']
290
+ end
291
+ end
292
+
293
+ def int_array_to_aaseq(arr)
294
+ raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
295
+
296
+ arr.map do |i|
297
+ INT2AA[i] || '*'
298
+ end.join
299
+ end
192
300
  end
193
301
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'struct_helper'
4
-
5
3
  module LibSSW
6
4
  # structure of the alignment result
7
5
  # @!attribute score1
@@ -31,15 +29,13 @@ module LibSSW
31
29
  # @return [Integer]
32
30
  # length of the cigar string; cigarLen = 0 when the best alignment path is not available
33
31
  class Align < FFI::Align
34
- include StructHelper
35
-
36
32
  def self.keys
37
33
  %i[score1 score2 ref_begin1 ref_end1
38
34
  read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
39
35
  end
40
36
 
41
37
  # This class is read_only
42
- attr_reader(*keys, :ptr, :cstruct)
38
+ attr_reader(*keys)
43
39
 
44
40
  def initialize(ptr)
45
41
  @ptr = ptr
@@ -55,6 +51,11 @@ module LibSSW
55
51
  @cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
56
52
  # Attributes for ruby binding only
57
53
  @cigar_string = LibSSW.array_to_cigar_string(@cigar)
54
+ LibSSW.align_destroy(ptr)
55
+ end
56
+
57
+ def to_h
58
+ self.class.keys.map { |k| [k, __send__(k)] }.to_h
58
59
  end
59
60
  end
60
61
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'struct_helper'
4
-
5
3
  module LibSSW
6
4
  # structure of the query profile/usr/lib/x86_64-linux-gnu/
7
5
  # @!attribute read
@@ -10,8 +8,6 @@ module LibSSW
10
8
  # @!attribute n
11
9
  # @!attribute bias
12
10
  class Profile < FFI::Profile
13
- include StructHelper
14
-
15
11
  def self.keys
16
12
  %i[read mat read_len n bias]
17
13
  end
@@ -31,11 +27,16 @@ module LibSSW
31
27
 
32
28
  def to_ptr
33
29
  # Garbage collection warkaround
34
- # cstruct.read = p @ptr.instance_variable_get(:@read_str)
35
- # cstruct.mat = p @ptr.instance_variable_get(:@mat_str)
36
- # cstruct.readLen = p @ptr.instance_variable_get(:@read_len)
37
- # cstruct.n = p @ptr.instance_variable_get(:@n)
38
- @ptr
30
+ # Preventing Garbage Collection --force
31
+ cstruct.read = ptr.instance_variable_get(:@read_str)
32
+ cstruct.mat = ptr.instance_variable_get(:@mat_str)
33
+ cstruct.readLen = ptr.instance_variable_get(:@read_len)
34
+ cstruct.n = ptr.instance_variable_get(:@n)
35
+ ptr
36
+ end
37
+
38
+ def to_h
39
+ self.class.keys.map { |k| [k, __send__(k)] }.to_h
39
40
  end
40
41
  end
41
42
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LibSSW
4
- VERSION = '0.0.1'
4
+ VERSION = '0.0.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libssw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-13 00:00:00.000000000 Z
11
+ date: 2021-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fiddle
@@ -125,7 +125,6 @@ files:
125
125
  - lib/libssw/align.rb
126
126
  - lib/libssw/ffi.rb
127
127
  - lib/libssw/profile.rb
128
- - lib/libssw/struct_helper.rb
129
128
  - lib/libssw/version.rb
130
129
  homepage: https://github.com/kojix2/ruby-libssw
131
130
  licenses:
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module LibSSW
4
- module StructHelper
5
- def to_h
6
- self.class.keys.map { |k| [k, __send__(k)] }.to_h
7
- end
8
-
9
- def to_ptr
10
- @ptr
11
- end
12
- end
13
- end