libssw 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c888d3c126247f4f397fc151402fd482cdc4c105bd00163d07cc056b0e202607
4
- data.tar.gz: 8717cde76e5ba26034b05c81871b43730580b09dfb610c9507b0c651e6b816a1
3
+ metadata.gz: 182b0d30cdf3d9a93b100b05f96d469d612b09bcfcfd2afc3cb63a4c93501d17
4
+ data.tar.gz: 99ec370125c707acff3a99664706ce8691f2e66e48d22a49af0362894b2c6b72
5
5
  SHA512:
6
- metadata.gz: d10a0cc734b8c53bf506c97ade223269ad6a64ed356f81d08eb9368e031b068ed85c217d3e47a737a303604b57009a00c0aab94a6eea89a62382cf79dbbee326
7
- data.tar.gz: f18a9eddc8455e13a58058294d80f52ed96b31949e90c25494fd48ac7d3f22df4588e86df389160fca973489db36381bdabb67a2858ceeb811e9a8ddba3249cb
6
+ metadata.gz: 8a49bf4924da5d12310b691f9f9335d96cdce0ef4b82ac1a38aeb6293c92d939cd17e4b0e6e99a4c427a0f4074f94db1cca91ca6c26b76a0551e558371b4aee7
7
+ data.tar.gz: 5af24c2124cd53f8aa54dc905172eb6f9b3d7c57b33fb87d1884ca2ab1a90acf0cb1f2ca73cb88eecda223e3f723836cca2dbae9cb0c1ac665d097993645ee19
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # ruby-libssw
2
2
 
3
3
  ![test](https://github.com/kojix2/ruby-libssw/workflows/CI/badge.svg)
4
+ [![Gem Version](https://img.shields.io/gem/v/libssw?color=brightgreen)](https://rubygems.org/gems/libssw)
5
+ [![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://rubydoc.info/gems/libssw)
4
6
 
5
7
  :checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
6
8
 
@@ -35,18 +37,64 @@ bundle exec rake install
35
37
  ```ruby
36
38
  require 'libssw'
37
39
 
38
- ref = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
39
- read = [0, 1, 2, 3, 3]
40
- mat = [2, -2, -2, -2, 0,
41
- -2, 2, -2, -2, 0,
42
- -2, -2, 2, -2, 0,
43
- -2, -2, -2, 2, 0,
44
- 0, 0, 0, 0, 0]
45
- profile = LibSSW.ssw_init(read, mat)
46
- align = LibSSW.ssw_align(profile, ref, 3, 1, 1, 0, 0, 15)
47
- p align.to_h
40
+ SSW = LibSSW
41
+
42
+ ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
43
+ ref_int = SSW.dna_to_int_array(ref_str)
44
+ # [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
45
+
46
+ read_str1 = "ACGTT"
47
+ read_str2 = SSW.dna_complement(read_str1)
48
+ read_int1 = SSW.dna_to_int_array(read_str1)
49
+ # [0, 1, 2, 3, 3]
50
+ read_int2 = SSW.dna_to_int_array(read_str2)
51
+ # [0, 0, 1, 2, 3]
52
+
53
+ mat = SSW.create_scoring_matrix(SSW::DNAElements, 2, -2)
54
+ # mat = [2, -2, -2, -2, 0,
55
+ # -2, 2, -2, -2, 0,
56
+ # -2, -2, 2, -2, 0,
57
+ # -2, -2, -2, 2, 0,
58
+ # 0, 0, 0, 0, 0]
59
+
60
+ profile1 = LibSSW.ssw_init(read_int1, mat)
61
+ align1 = LibSSW.ssw_align(profile1, ref_int, 3, 1, 1, 0, 0, 15)
62
+ pp align1.to_h
63
+ # {
64
+ # :score1 => 10,
65
+ # :score2 => 0,
66
+ # :ref_begin1 => 8,
67
+ # :ref_end1 => 12,
68
+ # :read_begin1 => 0,
69
+ # :read_end1 => 4,
70
+ # :ref_end2 => 0,
71
+ # :cigar => [80],
72
+ # :cigar_len => 1,
73
+ # :cigar_string => "5M"
74
+ # }
75
+
76
+ profile2 = LibSSW.ssw_init(read_int2, mat)
77
+ align2 = LibSSW.ssw_align(profile2, ref_int, 3, 1, 1, 0, 0, 15)
78
+ pp align2.to_h
79
+ # {
80
+ # :score1 => 10,
81
+ # :score2 => 0,
82
+ # :ref_begin1 => 7,
83
+ # :ref_end1 => 11,
84
+ # :read_begin1 => 0,
85
+ # :read_end1 => 4,
86
+ # :ref_end2 => 0,
87
+ # :cigar => [80],
88
+ # :cigar_len => 1,
89
+ # :cigar_string => "5M"
90
+ # }
48
91
  ```
49
92
 
93
+
94
+ ## Documentation
95
+
96
+ * [API Documentation](https://rubydoc.info/gems/libssw)
97
+
50
98
  ## Development
51
99
 
52
100
  ```sh
data/exe/rbssw CHANGED
@@ -99,13 +99,9 @@ lScore = nil
99
99
  if opts[:bprotein]
100
100
  # load AA score matrix
101
101
  if !opts[:smatrix]
102
- lEle = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K',
103
- 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
104
- lEle.each_with_index do |ele, i|
105
- dEle2Int[ele] = i
106
- dEle2Int[ele.downcase] = i
107
- dInt2Ele[i] = ele
108
- end
102
+ lEle = SSW::AAELEMENTS
103
+ dEle2Int = SSW::AA2INT
104
+ dInt2Ele = SSW::INT2AA
109
105
  nEleNum = lEle.size
110
106
  lScore = SSW::Blosum50
111
107
  else
@@ -113,31 +109,12 @@ if opts[:bprotein]
113
109
  end
114
110
  elsif !opts[:smatrix]
115
111
  # init DNA score matrix
116
- lEle = %w[A C G T N]
117
- dRc = { 'A': 'C', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 'C', 'c': 'G', 'g': 'C', 't': 'A' }
118
- lEle.each_with_index do |ele, i|
119
- dEle2Int[ele] = i
120
- dEle2Int[ele.downcase] = i
121
- dInt2Ele[i] = ele
122
- end
123
- # dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
124
- # dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
112
+ lEle = SSW::DNAELEMENTS
113
+ dRc = SSW::DNARC
114
+ dEle2Int = SSW::DNA2INT
115
+ dInt2Ele = SSW::INT2DNA
125
116
  nEleNum = lEle.size # 5
126
- lScore = Array.new(nEleNum**2, 0)
127
- (nEleNum - 1).times do |i|
128
- (nEleNum - 1).times do |j|
129
- lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
130
- opts[:nmatch]
131
- else
132
- -opts[:nmismatch]
133
- end
134
- end
135
- end
136
- # lScore = [ 2, -2, -2, -2, 0,
137
- # -2, 2, -2, -2, 0,
138
- # -2, -2, 2, -2, 0,
139
- # -2, -2, -2, 2, 0,
140
- # 0, 0, 0, 0, 0 ]
117
+ lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
141
118
  end
142
119
 
143
120
  warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
@@ -30,6 +30,64 @@ module LibSSW
30
30
  require_relative 'libssw/profile'
31
31
  require_relative 'libssw/align'
32
32
 
33
+ AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
34
+ 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
35
+ 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
36
+
37
+ AA2INT = { 'A' => 0, 'a' => 0,
38
+ 'R' => 1, 'r' => 1,
39
+ 'N' => 2, 'n' => 2,
40
+ 'D' => 3, 'd' => 3,
41
+ 'C' => 4, 'c' => 4,
42
+ 'Q' => 5, 'q' => 5,
43
+ 'E' => 6, 'e' => 6,
44
+ 'G' => 7, 'g' => 7,
45
+ 'H' => 8, 'h' => 8,
46
+ 'I' => 9, 'i' => 9,
47
+ 'L' => 10, 'l' => 10,
48
+ 'K' => 11, 'k' => 11,
49
+ 'M' => 12, 'm' => 12,
50
+ 'F' => 13, 'f' => 13,
51
+ 'P' => 14, 'p' => 14,
52
+ 'S' => 15, 's' => 15,
53
+ 'T' => 16, 't' => 16,
54
+ 'W' => 17, 'w' => 17,
55
+ 'Y' => 18, 'y' => 18,
56
+ 'V' => 19, 'v' => 19,
57
+ 'B' => 20, 'b' => 20,
58
+ 'Z' => 21, 'z' => 21,
59
+ 'X' => 22, 'x' => 22,
60
+ '*' => 23 }
61
+
62
+ INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
63
+ 4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
64
+ 8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
65
+ 12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
66
+ 16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
67
+ 20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
68
+
69
+ DNAElements = %w[A C G T N]
70
+
71
+ DNA2INT = { 'A' => 0, 'a' => 0,
72
+ 'C' => 1, 'c' => 1,
73
+ 'G' => 2, 'g' => 2,
74
+ 'T' => 3, 't' => 3,
75
+ 'N' => 4, 'n' => 4 }
76
+
77
+ INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
78
+
79
+ # reverse complement
80
+ DNARC = { 'A' => 'T',
81
+ 'C' => 'G',
82
+ 'G' => 'C',
83
+ 'T' => 'A',
84
+ 'N' => 'N',
85
+ 'a' => 'T',
86
+ 'c' => 'G',
87
+ 'g' => 'C',
88
+ 't' => 'A',
89
+ 'n' => 'N' }
90
+
33
91
  class << self
34
92
  # Create the query profile using the query sequence.
35
93
  # @param read [Array] query sequence; the query sequence needs to be numbers
@@ -56,27 +114,19 @@ module LibSSW
56
114
  n,
57
115
  score_size
58
116
  )
59
- profile = LibSSW::Profile.new(ptr)
60
- # Check Garbage Collection
61
- %i[read read_len mat n].zip([read, read_len, mat, n]).each do |name, obj|
62
- next unless profile.public_send(name) != obj
63
-
64
- warn "[Error] Struct member: '#{name}'"
65
- warn " * expected value: #{obj}"
66
- warn " * actual value: #{profile.public_send(name)}"
67
- warn " This may have been caused by Ruby'S GC."
68
- end
69
- # Preventing Garbage Collection --force
70
- cstruct = profile.cstruct
71
- cstruct.read = read_str
72
- cstruct.mat = mat_str
73
- cstruct.readLen = read_len
74
- cstruct.n = n
75
- ptr.instance_variable_set(:@read_str, read_str)
76
- ptr.instance_variable_set(:@read_len, read_len)
77
- ptr.instance_variable_set(:@mat_str, mat_str)
78
- ptr.instance_variable_set(:@n, n)
79
- profile
117
+ # Garbage collection workaround
118
+ #
119
+ # * The following code will cause a segmentation violation when manually
120
+ # releasing memory. The reason is unknown.
121
+ # * func_map is only available in newer versions of fiddle.
122
+ # ptr.free = FFI.instance_variable_get(:@func_map)['init_destroy']
123
+ ptr.instance_variable_set(:@read_str, read_str)
124
+ ptr.instance_variable_set(:@read_len, read_len)
125
+ ptr.instance_variable_set(:@mat_str, mat_str)
126
+ ptr.instance_variable_set(:@n, n)
127
+ ptr.instance_variable_set(:@score_size, score_size)
128
+
129
+ LibSSW::Profile.new(ptr)
80
130
  end
81
131
 
82
132
  # Release the memory allocated by function ssw_init.
@@ -140,6 +190,8 @@ module LibSSW
140
190
  # Not sure yet if we should set the instance variable to the pointer as a
141
191
  # garbage collection workaround.
142
192
  # For example: instance_variable_set(:@ref_str, ref_str)
193
+ #
194
+ # ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
143
195
  LibSSW::Align.new(ptr)
144
196
  end
145
197
 
@@ -189,5 +241,61 @@ module LibSSW
189
241
  end
190
242
  cigar_string
191
243
  end
244
+
245
+ # Create scoring matrix of Smith-Waterman algrithum.
246
+ # @param [Array] elements
247
+ # @param [Integer] match_score
248
+ # @param [Integer] mismatch_score
249
+ def create_scoring_matrix(elements, match_score, mismatch_score)
250
+ size = elements.size
251
+ score = Array.new(size * size, 0)
252
+ (size - 1).times do |i|
253
+ (size - 1).times do |j|
254
+ score[i * size + j] = \
255
+ (elements[i] == elements[j] ? match_score : mismatch_score)
256
+ end
257
+ end
258
+ score
259
+ end
260
+
261
+ # @param [String] seq
262
+ def dna_to_int_array(seq)
263
+ raise ArgumentError, 'seq must be a string' unless seq.is_a? String
264
+
265
+ seq.each_char.map do |base|
266
+ DNA2INT[base] || DNA2INT['N']
267
+ end
268
+ end
269
+
270
+ def dna_complement(seq)
271
+ seq.each_char.map do |base|
272
+ DNARC[base]
273
+ end.join.reverse
274
+ end
275
+
276
+ # @param [Array] int array
277
+ def int_array_to_dna(arr)
278
+ raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
279
+
280
+ arr.map do |i|
281
+ INT2DNA[i] || 'N'
282
+ end.join
283
+ end
284
+
285
+ def aaseq_to_int_array(seq)
286
+ raise ArgumentError, 'seq must be a string' unless seq.is_a? String
287
+
288
+ seq.each_char.map do |base|
289
+ AA2INT[base] || AA2INT['*']
290
+ end
291
+ end
292
+
293
+ def int_array_to_aaseq(arr)
294
+ raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
295
+
296
+ arr.map do |i|
297
+ INT2AA[i] || '*'
298
+ end.join
299
+ end
192
300
  end
193
301
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'struct_helper'
4
-
5
3
  module LibSSW
6
4
  # structure of the alignment result
7
5
  # @!attribute score1
@@ -31,15 +29,13 @@ module LibSSW
31
29
  # @return [Integer]
32
30
  # length of the cigar string; cigarLen = 0 when the best alignment path is not available
33
31
  class Align < FFI::Align
34
- include StructHelper
35
-
36
32
  def self.keys
37
33
  %i[score1 score2 ref_begin1 ref_end1
38
34
  read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
39
35
  end
40
36
 
41
37
  # This class is read_only
42
- attr_reader(*keys, :ptr, :cstruct)
38
+ attr_reader(*keys)
43
39
 
44
40
  def initialize(ptr)
45
41
  @ptr = ptr
@@ -55,6 +51,11 @@ module LibSSW
55
51
  @cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
56
52
  # Attributes for ruby binding only
57
53
  @cigar_string = LibSSW.array_to_cigar_string(@cigar)
54
+ LibSSW.align_destroy(ptr)
55
+ end
56
+
57
+ def to_h
58
+ self.class.keys.map { |k| [k, __send__(k)] }.to_h
58
59
  end
59
60
  end
60
61
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'struct_helper'
4
-
5
3
  module LibSSW
6
4
  # structure of the query profile/usr/lib/x86_64-linux-gnu/
7
5
  # @!attribute read
@@ -10,8 +8,6 @@ module LibSSW
10
8
  # @!attribute n
11
9
  # @!attribute bias
12
10
  class Profile < FFI::Profile
13
- include StructHelper
14
-
15
11
  def self.keys
16
12
  %i[read mat read_len n bias]
17
13
  end
@@ -31,11 +27,16 @@ module LibSSW
31
27
 
32
28
  def to_ptr
33
29
  # Garbage collection warkaround
34
- # cstruct.read = p @ptr.instance_variable_get(:@read_str)
35
- # cstruct.mat = p @ptr.instance_variable_get(:@mat_str)
36
- # cstruct.readLen = p @ptr.instance_variable_get(:@read_len)
37
- # cstruct.n = p @ptr.instance_variable_get(:@n)
38
- @ptr
30
+ # Preventing Garbage Collection --force
31
+ cstruct.read = ptr.instance_variable_get(:@read_str)
32
+ cstruct.mat = ptr.instance_variable_get(:@mat_str)
33
+ cstruct.readLen = ptr.instance_variable_get(:@read_len)
34
+ cstruct.n = ptr.instance_variable_get(:@n)
35
+ ptr
36
+ end
37
+
38
+ def to_h
39
+ self.class.keys.map { |k| [k, __send__(k)] }.to_h
39
40
  end
40
41
  end
41
42
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LibSSW
4
- VERSION = '0.0.1'
4
+ VERSION = '0.0.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libssw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-13 00:00:00.000000000 Z
11
+ date: 2021-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fiddle
@@ -125,7 +125,6 @@ files:
125
125
  - lib/libssw/align.rb
126
126
  - lib/libssw/ffi.rb
127
127
  - lib/libssw/profile.rb
128
- - lib/libssw/struct_helper.rb
129
128
  - lib/libssw/version.rb
130
129
  homepage: https://github.com/kojix2/ruby-libssw
131
130
  licenses:
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module LibSSW
4
- module StructHelper
5
- def to_h
6
- self.class.keys.map { |k| [k, __send__(k)] }.to_h
7
- end
8
-
9
- def to_ptr
10
- @ptr
11
- end
12
- end
13
- end