libssw 0.0.2 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 182b0d30cdf3d9a93b100b05f96d469d612b09bcfcfd2afc3cb63a4c93501d17
4
- data.tar.gz: 99ec370125c707acff3a99664706ce8691f2e66e48d22a49af0362894b2c6b72
3
+ metadata.gz: 0b67671ac9e959ee7e147bfff872719e6c787a020fb15a9b63e9128aef51d9d3
4
+ data.tar.gz: 0baec30769ef3e0f9248346dded445d9a4fa1eaafd9b2071b74e9e5deeb7e4be
5
5
  SHA512:
6
- metadata.gz: 8a49bf4924da5d12310b691f9f9335d96cdce0ef4b82ac1a38aeb6293c92d939cd17e4b0e6e99a4c427a0f4074f94db1cca91ca6c26b76a0551e558371b4aee7
7
- data.tar.gz: 5af24c2124cd53f8aa54dc905172eb6f9b3d7c57b33fb87d1884ca2ab1a90acf0cb1f2ca73cb88eecda223e3f723836cca2dbae9cb0c1ac665d097993645ee19
6
+ metadata.gz: 6fb9b0ad32647d27418b666545f3fe34ffab9e902ddbae0575733695d6101fa26b261dcc5d9066e9b8c9a85bac3e1c2cce6be79b58c63aab7a0896b691561a7e
7
+ data.tar.gz: 9c3862016d4490e0fce60296acf8fbe129c5f166363c0cdd978862e066819ec8b2a850dc345716750503e15521480e026d2aac0278f0e8db9394505cc98db3ab
data/README.md CHANGED
@@ -3,11 +3,10 @@
3
3
  ![test](https://github.com/kojix2/ruby-libssw/workflows/CI/badge.svg)
4
4
  [![Gem Version](https://img.shields.io/gem/v/libssw?color=brightgreen)](https://rubygems.org/gems/libssw)
5
5
  [![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://rubydoc.info/gems/libssw)
6
+ [![DOI](https://zenodo.org/badge/328163622.svg)](https://zenodo.org/badge/latestdoi/328163622)
6
7
 
7
8
  :checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
8
9
 
9
- :construction: Under development.
10
-
11
10
  ## Installation
12
11
 
13
12
  ```ssh
@@ -24,41 +23,42 @@ export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
24
23
 
25
24
  ### Installing from source
26
25
 
27
- When installing from source code using the following steps, the shared library `libssw.so` will be packed in the Ruby gem. In this case, the environment variable LIBSSWDIR is not required. (Only tested on Ubuntu)
26
+ When installing from source code using the following steps, the shared library `libssw.so` or `libssw.dylib` will be packed in the Ruby gem. In this case, the environment variable `LIBSSWDIR` is not required.
28
27
 
29
28
  ```sh
30
- git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
31
- bundle exec rake libssw:compile
29
+ git clone --recursive https://github.com/kojix2/ruby-libssw
30
+ bundle exec rake libssw:build
32
31
  bundle exec rake install
33
32
  ```
34
33
 
34
+ ruby-libssw does not support Windows.
35
+
35
36
  ## Usage
36
37
 
37
38
  ```ruby
38
39
  require 'libssw'
39
40
 
40
- SSW = LibSSW
41
-
42
41
  ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
43
- ref_int = SSW.dna_to_int_array(ref_str)
42
+ ref_int = SSW::DNA.to_int_array(ref_str)
44
43
  # [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
45
44
 
46
45
  read_str1 = "ACGTT"
47
- read_str2 = SSW.dna_complement(read_str1)
48
- read_int1 = SSW.dna_to_int_array(read_str1)
46
+ read_str2 = SSW::DNA.revcomp(read_str1)
47
+ # "AACGT"
48
+ read_int1 = SSW::DNA.to_int_array(read_str1)
49
49
  # [0, 1, 2, 3, 3]
50
- read_int2 = SSW.dna_to_int_array(read_str2)
50
+ read_int2 = SSW::DNA.to_int_array(read_str2)
51
51
  # [0, 0, 1, 2, 3]
52
52
 
53
- mat = SSW.create_scoring_matrix(SSW::DNAElements, 2, -2)
53
+ mat = SSW.create_scoring_matrix(SSW::DNA::Elements, 2, -2)
54
54
  # mat = [2, -2, -2, -2, 0,
55
55
  # -2, 2, -2, -2, 0,
56
56
  # -2, -2, 2, -2, 0,
57
57
  # -2, -2, -2, 2, 0,
58
58
  # 0, 0, 0, 0, 0]
59
59
 
60
- profile1 = LibSSW.ssw_init(read_int1, mat)
61
- align1 = LibSSW.ssw_align(profile1, ref_int, 3, 1, 1, 0, 0, 15)
60
+ profile1 = SSW.init(read_int1, mat)
61
+ align1 = SSW.align(profile1, ref_int, 3, 1, 1, 0, 0)
62
62
  pp align1.to_h
63
63
  # {
64
64
  # :score1 => 10,
@@ -73,8 +73,8 @@ pp align1.to_h
73
73
  # :cigar_string => "5M"
74
74
  # }
75
75
 
76
- profile2 = LibSSW.ssw_init(read_int2, mat)
77
- align2 = LibSSW.ssw_align(profile2, ref_int, 3, 1, 1, 0, 0, 15)
76
+ profile2 = SSW.init(read_int2, mat)
77
+ align2 = SSW.align(profile2, ref_int, 3, 1, 1, 0, 0)
78
78
  pp align2.to_h
79
79
  # {
80
80
  # :score1 => 10,
@@ -88,25 +88,75 @@ pp align2.to_h
88
88
  # :cigar_len => 1,
89
89
  # :cigar_string => "5M"
90
90
  # }
91
+
92
+ puts SSW.build_path(read_str1, ref_str, align1)
93
+ # 5M
94
+ # ACGTT
95
+ # |||||
96
+ # ACGTT
91
97
  ```
92
98
 
99
+ ## APIs
100
+
101
+ See [API Documentation](https://rubydoc.info/gems/libssw).
102
+
103
+ ```markdown
104
+ - SSW module
105
+
106
+ - SSW.init
107
+ - SSW.init_destroy
108
+ - SSW.align
109
+ - SSW.align_destroy
110
+ - SSW.mark_mismatch
111
+ - SSW.create_scoring_matrix
112
+ - SSW.build_path
113
+
114
+ - Profile class
93
115
 
94
- ## Documentation
116
+ - attributes
117
+ - read, mat, read_len, n, bias
95
118
 
96
- * [API Documentation](https://rubydoc.info/gems/libssw)
119
+ - Align class
120
+
121
+ - attributes
122
+ - score1, score2, ref_begin1, ref_end1, read_begin1, read_end1, ref_end2
123
+ cigar, cigar_len, cigar_string
124
+
125
+ - DNA module
126
+
127
+ - DNA.to_int_array
128
+ - DNA.from_int_array
129
+ - revcomp
130
+
131
+ - AASeq module
132
+
133
+ - AASeq.to_int_array
134
+ - AASeq.from_int_array
135
+
136
+ - BLOSUM62
137
+ - BLOSUM50
138
+ ```
97
139
 
98
140
  ## Development
99
141
 
100
142
  ```sh
101
- git clone --recurse-submodules https://github.com/kojix2/ruby-libssw
102
- bundle exec rake libssw:compile
143
+ git clone --recursive https://github.com/kojix2/ruby-libssw
144
+ bundle exec rake libssw:build
103
145
  bundle exec rake test
104
146
  ```
105
147
 
148
+ Do you need commit rights to my repository?
149
+ Do you want to get admin rights and take over the project?
150
+ If so, please feel free to contact me @kojix2.
151
+
106
152
  ## Contributing
107
153
 
108
- Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/ruby-libssw.
154
+ - [Report bugs](https://github.com/kojix2/ruby-libssw/issues)
155
+ - Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-libssw/pulls)
156
+ - Write, clarify, or fix documentation
157
+ - English corrections are welcome
158
+ - Suggest or add new features
109
159
 
110
160
  ## License
111
161
 
112
- * [MIT License](https://opensource.org/licenses/MIT).
162
+ - [MIT License](https://opensource.org/licenses/MIT).
data/lib/libssw.rb CHANGED
@@ -1,24 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'libssw/version'
4
- require_relative 'libssw/BLOSUM50'
5
- require_relative 'libssw/BLOSUM62'
3
+ require_relative 'ssw/version'
4
+ require_relative 'ssw/BLOSUM50'
5
+ require_relative 'ssw/BLOSUM62'
6
+ require_relative 'ssw/dna'
7
+ require_relative 'ssw/aaseq'
6
8
 
7
- module LibSSW
9
+ module SSW
8
10
  class Error < StandardError; end
9
11
 
10
12
  class << self
11
13
  attr_accessor :ffi_lib
12
14
  end
13
15
 
14
- lib_name = case RbConfig::CONFIG['host_os']
15
- when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
16
- 'libssw.dll' # unconfirmed
17
- when /darwin|mac os/
18
- 'libssw.dylib' # unconfirmed
19
- else
20
- 'libssw.so'
21
- end
16
+ lib_name = "libssw.#{RbConfig::CONFIG['SOEXT']}" # Ruby 2.5 or later
22
17
 
23
18
  self.ffi_lib = if ENV['LIBSSWDIR'] && !ENV['LIBSSWDIR'].empty?
24
19
  File.expand_path(lib_name, ENV['LIBSSWDIR'])
@@ -26,67 +21,14 @@ module LibSSW
26
21
  File.expand_path("../vendor/#{lib_name}", __dir__)
27
22
  end
28
23
 
29
- require_relative 'libssw/ffi'
30
- require_relative 'libssw/profile'
31
- require_relative 'libssw/align'
24
+ # NOTE: Why not use pkg-config?
25
+ # APT package is available.
26
+ # However, it dose not include a .pc file.
27
+ # Thus pkg-config will not find the shared library.
32
28
 
33
- AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
34
- 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
35
- 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
36
-
37
- AA2INT = { 'A' => 0, 'a' => 0,
38
- 'R' => 1, 'r' => 1,
39
- 'N' => 2, 'n' => 2,
40
- 'D' => 3, 'd' => 3,
41
- 'C' => 4, 'c' => 4,
42
- 'Q' => 5, 'q' => 5,
43
- 'E' => 6, 'e' => 6,
44
- 'G' => 7, 'g' => 7,
45
- 'H' => 8, 'h' => 8,
46
- 'I' => 9, 'i' => 9,
47
- 'L' => 10, 'l' => 10,
48
- 'K' => 11, 'k' => 11,
49
- 'M' => 12, 'm' => 12,
50
- 'F' => 13, 'f' => 13,
51
- 'P' => 14, 'p' => 14,
52
- 'S' => 15, 's' => 15,
53
- 'T' => 16, 't' => 16,
54
- 'W' => 17, 'w' => 17,
55
- 'Y' => 18, 'y' => 18,
56
- 'V' => 19, 'v' => 19,
57
- 'B' => 20, 'b' => 20,
58
- 'Z' => 21, 'z' => 21,
59
- 'X' => 22, 'x' => 22,
60
- '*' => 23 }
61
-
62
- INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
63
- 4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
64
- 8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
65
- 12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
66
- 16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
67
- 20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
68
-
69
- DNAElements = %w[A C G T N]
70
-
71
- DNA2INT = { 'A' => 0, 'a' => 0,
72
- 'C' => 1, 'c' => 1,
73
- 'G' => 2, 'g' => 2,
74
- 'T' => 3, 't' => 3,
75
- 'N' => 4, 'n' => 4 }
76
-
77
- INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
78
-
79
- # reverse complement
80
- DNARC = { 'A' => 'T',
81
- 'C' => 'G',
82
- 'G' => 'C',
83
- 'T' => 'A',
84
- 'N' => 'N',
85
- 'a' => 'T',
86
- 'c' => 'G',
87
- 'g' => 'C',
88
- 't' => 'A',
89
- 'n' => 'N' }
29
+ require_relative 'ssw/libssw'
30
+ require_relative 'ssw/profile'
31
+ require_relative 'ssw/align'
90
32
 
91
33
  class << self
92
34
  # Create the query profile using the query sequence.
@@ -99,47 +41,59 @@ module LibSSW
99
41
  # * if your estimated best alignment score is surely < 255 please set 0;
100
42
  # * if your estimated best alignment score >= 255, please set 1;
101
43
  # * if you don't know, please set 2
102
- def ssw_init(read, mat, n = nil, score_size: 2)
44
+ def init(read, mat, n = nil, score_size: 2)
45
+ read = read.to_a
46
+ mat = mat.to_a.flatten
47
+ raise ArgumentError, 'Expect class of read to be Array' unless read.is_a?(Array)
48
+ raise ArgumentError, 'Expect class of mat to be Array' unless mat.is_a?(Array)
49
+
103
50
  read_str = read.pack('c*')
104
51
  read_len = read.size
105
- mat = mat.to_a.flatten
106
52
  n = Math.sqrt(mat.size) if n.nil?
107
53
  raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
108
54
 
109
55
  mat_str = mat.flatten.pack('c*')
110
- ptr = FFI.ssw_init(
56
+ ptr = LibSSW.ssw_init(
111
57
  read_str,
112
58
  read_len,
113
59
  mat_str,
114
60
  n,
115
61
  score_size
116
62
  )
117
- # Garbage collection workaround
63
+ # Garbage collection workaround:
64
+ # The C library stores pointers to read and mat without copying the data.
65
+ # We must keep the Ruby strings (read_str, mat_str) alive for the lifetime
66
+ # of the profile structure to prevent segmentation faults.
118
67
  #
119
- # * The following code will cause a segmentation violation when manually
120
- # releasing memory. The reason is unknown.
121
- # * func_map is only available in newer versions of fiddle.
122
- # ptr.free = FFI.instance_variable_get(:@func_map)['init_destroy']
68
+ # We cannot use Fiddle's automatic memory management (ptr.free) here because:
69
+ # - Calling init_destroy from Ruby's GC causes segmentation violations
70
+ # - The user should explicitly call SSW.init_destroy when done, or let
71
+ # Ruby's GC clean up the profile structure itself (though the contained
72
+ # profile_byte/profile_word will leak unless init_destroy is called)
123
73
  ptr.instance_variable_set(:@read_str, read_str)
124
- ptr.instance_variable_set(:@read_len, read_len)
125
74
  ptr.instance_variable_set(:@mat_str, mat_str)
75
+ ptr.instance_variable_set(:@read_len, read_len)
126
76
  ptr.instance_variable_set(:@n, n)
127
77
  ptr.instance_variable_set(:@score_size, score_size)
128
78
 
129
- LibSSW::Profile.new(ptr)
79
+ SSW::Profile.new(ptr)
130
80
  end
131
81
 
132
82
  # Release the memory allocated by function ssw_init.
133
- # @param p [Fiddle::Pointer, LibSSW::Profile, LibSSW::FFI::Profile]
83
+ # @param profile [Fiddle::Pointer, SSW::Profile, SSW::LibSSW::Profile]
134
84
  # pointer to the query profile structure
135
85
  # @note Ruby has garbage collection, so there is not much reason to call
136
86
  # this method.
137
87
  def init_destroy(profile)
138
- FFI.init_destroy(profile)
88
+ unless profile.is_a?(Fiddle::Pointer) || profile.is_a?(Profile) || profile.respond_to?(:to_ptr)
89
+ raise ArgumentError, 'Expect class of profile to be Profile or Pointer'
90
+ end
91
+
92
+ LibSSW.init_destroy(profile)
139
93
  end
140
94
 
141
95
  # Do Striped Smith-Waterman alignment.
142
- # @param prof [Fiddle::Pointer, LibSSW::Profile, LibSSW::FFI::Profile]
96
+ # @param prof [Fiddle::Pointer, SSW::Profile, SSW::LibSSW::Profile]
143
97
  # pointer to the query profile structure
144
98
  # @param ref [Array]
145
99
  # target sequence;
@@ -148,25 +102,25 @@ module LibSSW
148
102
  # @param weight_gap0 [Integer] the absolute value of gap open penalty
149
103
  # @param weight_gapE [Integer] the absolute value of gap extension penalty
150
104
  # @param flag [Integer]
151
- # * bit 5: when setted as 1, function ssw_align will return the best
105
+ # * bit 5: when set as 1, function ssw_align will return the best
152
106
  # alignment beginning position;
153
- # * bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd &&
154
- # read_end1 - read_begin1 < filterd), (whatever bit 5 is setted) the
107
+ # * bit 6: when set as 1, if (ref_end1 - ref_begin1 < filterd &&
108
+ # read_end1 - read_begin1 < filterd), (whatever bit 5 is set) the
155
109
  # function will return the best alignment beginning position and cigar;
156
- # * bit 7: when setted as 1, if the best alignment score >= filters,
157
- # (whatever bit 5 is setted) the function will return the best
110
+ # * bit 7: when set as 1, if the best alignment score >= filters,
111
+ # (whatever bit 5 is set) the function will return the best
158
112
  # alignment beginning position and cigar;
159
- # * bit 8: when setted as 1, (whatever bit 5, 6 or 7 is setted) the
113
+ # * bit 8: when set as 1, (whatever bit 5, 6 or 7 is set) the
160
114
  # function will always return the best alignment beginning position and
161
115
  # cigar. When flag == 0, only the optimal and sub-optimal scores and the
162
116
  # optimal alignment ending position will be returned.
163
117
  # @param filters [Integer]
164
- # scorefilter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0,
165
- # filters will be used (Please check the decription of the flag parameter
118
+ # scorefilter: when bit 7 of flag is set as 1 and bit 8 is set as 0,
119
+ # filters will be used (Please check the description of the flag parameter
166
120
  # for detailed usage.)
167
121
  # @param filterd [Integer]
168
- # distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted
169
- # as 0, filterd will be used (Please check the decription of the flag
122
+ # distance filter: when bit 6 of flag is set as 1 and bit 8 is set
123
+ # as 0, filterd will be used (Please check the description of the flag
170
124
  # parameter for detailed usage.)
171
125
  # @param mask_len [Integer]
172
126
  # The distance between the optimal and suboptimal alignment ending
@@ -181,25 +135,40 @@ module LibSSW
181
135
  # SSW C library masks the reference loci nearby (mask length = maskLen)
182
136
  # the best alignment ending position and locates the second largest score
183
137
  # from the unmasked elements.
184
- def ssw_align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len)
138
+ # @return [Align]
139
+ def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil)
140
+ unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
141
+ raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
142
+ end
143
+ raise ArgumentError, 'Expect class of ref to be Array' unless ref.is_a?(Array)
144
+
185
145
  ref_str = ref.pack('c*')
186
146
  ref_len = ref.size
187
- ptr = FFI.ssw_align(
147
+ mask_len ||= [ref_len / 2, 15].max
148
+ ptr = LibSSW.ssw_align(
188
149
  prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
189
150
  )
190
- # Not sure yet if we should set the instance variable to the pointer as a
191
- # garbage collection workaround.
192
- # For example: instance_variable_set(:@ref_str, ref_str)
193
- #
194
- # ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy']
195
- LibSSW::Align.new(ptr)
151
+ # Garbage collection workaround:
152
+ # Keep ref_str alive while the C code might still need it.
153
+ # However, since Align.new immediately reads all values and calls align_destroy,
154
+ # the C memory is freed immediately, so ref_str only needs to live until then.
155
+ # We store it on ptr just to be safe during the Align.new call.
156
+ ptr.instance_variable_set(:@ref_str, ref_str)
157
+ SSW::Align.new(ptr)
196
158
  end
197
159
 
198
160
  # Release the memory allocated by function ssw_align.
199
- # @param a [Fiddle::Pointer, LibSSW::Align, LibSSW::FFI::Align]
161
+ # @param align [Fiddle::Pointer, SSW::Align, SSW::LibSSW::Align]
200
162
  # pointer to the alignment result structure
201
163
  def align_destroy(align)
202
- FFI.align_destroy(align)
164
+ if align.is_a?(Align)
165
+ warn "You don't need to call this method for Ruby's Align class."
166
+ nil
167
+ elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr)
168
+ LibSSW.align_destroy(align)
169
+ else
170
+ raise ArgumentError, 'Expect class of align to be Pointer'
171
+ end
203
172
  end
204
173
 
205
174
  # 1. Calculate the number of mismatches.
@@ -226,22 +195,11 @@ module LibSSW
226
195
  # @return [Integer] The number of mismatches. The cigar and cigarLen are modified.
227
196
  def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
228
197
  warn 'implementation: fiexme: **cigar' # FIXME
229
- FFI.mark_mismatch(
198
+ LibSSW.mark_mismatch(
230
199
  ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
231
200
  )
232
201
  end
233
202
 
234
- def array_to_cigar_string(arr)
235
- cigar_string = String.new
236
- arr.each do |x|
237
- n = x >> 4
238
- m = x & 15
239
- c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
240
- cigar_string << n.to_s << c
241
- end
242
- cigar_string
243
- end
244
-
245
203
  # Create scoring matrix of Smith-Waterman algrithum.
246
204
  # @param [Array] elements
247
205
  # @param [Integer] match_score
@@ -258,44 +216,41 @@ module LibSSW
258
216
  score
259
217
  end
260
218
 
261
- # @param [String] seq
262
- def dna_to_int_array(seq)
263
- raise ArgumentError, 'seq must be a string' unless seq.is_a? String
264
-
265
- seq.each_char.map do |base|
266
- DNA2INT[base] || DNA2INT['N']
267
- end
268
- end
269
-
270
- def dna_complement(seq)
271
- seq.each_char.map do |base|
272
- DNARC[base]
273
- end.join.reverse
274
- end
275
-
276
- # @param [Array] int array
277
- def int_array_to_dna(arr)
278
- raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
279
-
280
- arr.map do |i|
281
- INT2DNA[i] || 'N'
282
- end.join
283
- end
284
-
285
- def aaseq_to_int_array(seq)
286
- raise ArgumentError, 'seq must be a string' unless seq.is_a? String
287
-
288
- seq.each_char.map do |base|
289
- AA2INT[base] || AA2INT['*']
219
+ # TODO: fix variable names
220
+ # @param q_seq [String] query sequence
221
+ # @param r_seq [String] reference sequence
222
+ # @param align [Align] alignment result
223
+ # @return [Array]
224
+ def build_path(q_seq, r_seq, align)
225
+ sQ = ''
226
+ sA = ''
227
+ sR = ''
228
+ q_off = align.read_begin1
229
+ r_off = align.ref_begin1
230
+ align.cigar.each do |x|
231
+ n = x >> 4
232
+ m = x & 15
233
+ c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
234
+ case c
235
+ when 'M'
236
+ sQ += q_seq[q_off...(q_off + n)]
237
+ sA += Array.new(n) { |j| q_seq[q_off + j] == r_seq[r_off + j] ? '|' : '*' }.join
238
+ sR += r_seq[r_off...(r_off + n)]
239
+ q_off += n
240
+ r_off += n
241
+ when 'I'
242
+ sQ += q_seq[q_off...(q_off + n)]
243
+ sA += ' ' * n
244
+ sR += ' ' * n
245
+ q_off += n
246
+ when 'D'
247
+ sQ += ' ' * n
248
+ sA += ' ' * n
249
+ sR += r_seq[r_off...(r_off + n)]
250
+ r_off += n
251
+ end
290
252
  end
291
- end
292
-
293
- def int_array_to_aaseq(arr)
294
- raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
295
-
296
- arr.map do |i|
297
- INT2AA[i] || '*'
298
- end.join
253
+ [align.cigar_string, sQ, sA, sR]
299
254
  end
300
255
  end
301
256
  end
@@ -1,4 +1,4 @@
1
- module LibSSW
1
+ module SSW
2
2
  BLOSUM50 = [
3
3
  # A R N D C Q E G H I L K M F P S T W Y V B Z X *
4
4
  5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, # A
@@ -1,4 +1,4 @@
1
- module LibSSW
1
+ module SSW
2
2
  BLOSUM62 = [
3
3
  # A R N D C Q E G H I L K M F P S T W Y V B Z X *
4
4
  4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4, # A
data/lib/ssw/aaseq.rb ADDED
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SSW
4
+ module AASeq
5
+ AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
6
+ 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
7
+ 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*'].freeze
8
+
9
+ AA2INT = { 'A' => 0, 'a' => 0,
10
+ 'R' => 1, 'r' => 1,
11
+ 'N' => 2, 'n' => 2,
12
+ 'D' => 3, 'd' => 3,
13
+ 'C' => 4, 'c' => 4,
14
+ 'Q' => 5, 'q' => 5,
15
+ 'E' => 6, 'e' => 6,
16
+ 'G' => 7, 'g' => 7,
17
+ 'H' => 8, 'h' => 8,
18
+ 'I' => 9, 'i' => 9,
19
+ 'L' => 10, 'l' => 10,
20
+ 'K' => 11, 'k' => 11,
21
+ 'M' => 12, 'm' => 12,
22
+ 'F' => 13, 'f' => 13,
23
+ 'P' => 14, 'p' => 14,
24
+ 'S' => 15, 's' => 15,
25
+ 'T' => 16, 't' => 16,
26
+ 'W' => 17, 'w' => 17,
27
+ 'Y' => 18, 'y' => 18,
28
+ 'V' => 19, 'v' => 19,
29
+ 'B' => 20, 'b' => 20,
30
+ 'Z' => 21, 'z' => 21,
31
+ 'X' => 22, 'x' => 22,
32
+ '*' => 23 }.freeze
33
+
34
+ INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
35
+ 4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
36
+ 8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
37
+ 12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
38
+ 16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
39
+ 20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }.freeze
40
+
41
+ module_function
42
+
43
+ # Transform amino acid sequence into numerical sequence.
44
+ # @param seq [String] amin acid sequence
45
+ # @return [Array] int array
46
+ # @example
47
+ # SSW::AASeq.to_int_array("ARND") #=> [0, 1, 2, 3]
48
+
49
+ def to_int_array(seq)
50
+ raise ArgumentError, 'seq must be a string' unless seq.is_a? String
51
+
52
+ seq.each_char.map do |base|
53
+ AA2INT[base] || AA2INT['*']
54
+ end
55
+ end
56
+
57
+ # Transform numerical sequence into amino acid sequence.
58
+ # @param arr [Array] int array
59
+ # @return [String] amino acid sequence
60
+ # @example
61
+ # SSW::AASeq.from_int_array([0, 1, 2, 3]) #=> "ARND"
62
+
63
+ def from_int_array(arr)
64
+ raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
65
+
66
+ arr.map do |i|
67
+ INT2AA[i] || '*'
68
+ end.join
69
+ end
70
+ end
71
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LibSSW
3
+ module SSW
4
4
  # structure of the alignment result
5
5
  # @!attribute score1
6
6
  # @return [Integer] the best alignment score
@@ -28,7 +28,9 @@ module LibSSW
28
28
  # @!attribute cigar_len
29
29
  # @return [Integer]
30
30
  # length of the cigar string; cigarLen = 0 when the best alignment path is not available
31
- class Align < FFI::Align
31
+ # @!attribute cigar_string
32
+ # @return [String] cigar string
33
+ class Align
32
34
  def self.keys
33
35
  %i[score1 score2 ref_begin1 ref_end1
34
36
  read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
@@ -39,7 +41,7 @@ module LibSSW
39
41
 
40
42
  def initialize(ptr)
41
43
  @ptr = ptr
42
- @cstruct = align = FFI::Align.new(ptr)
44
+ @cstruct = align = LibSSW::Align.new(ptr)
43
45
  @score1 = align.score1
44
46
  @score2 = align.score2
45
47
  @ref_begin1 = align.ref_begin1
@@ -50,12 +52,25 @@ module LibSSW
50
52
  @cigar_len = align.cigarLen
51
53
  @cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
52
54
  # Attributes for ruby binding only
53
- @cigar_string = LibSSW.array_to_cigar_string(@cigar)
54
- LibSSW.align_destroy(ptr)
55
+ @cigar_string = array_to_cigar_string(@cigar)
56
+ SSW.align_destroy(ptr)
55
57
  end
56
58
 
57
59
  def to_h
58
60
  self.class.keys.map { |k| [k, __send__(k)] }.to_h
59
61
  end
62
+
63
+ private
64
+
65
+ def array_to_cigar_string(arr)
66
+ cigar_string = String.new
67
+ arr.each do |x|
68
+ n = x >> 4
69
+ m = x & 15
70
+ c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
71
+ cigar_string << n.to_s << c
72
+ end
73
+ cigar_string
74
+ end
60
75
  end
61
76
  end
data/lib/ssw/dna.rb ADDED
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SSW
4
+ module DNA
5
+ Elements = %w[A C G T N].freeze
6
+
7
+ DNA2INT = { 'A' => 0, 'a' => 0,
8
+ 'C' => 1, 'c' => 1,
9
+ 'G' => 2, 'g' => 2,
10
+ 'T' => 3, 't' => 3,
11
+ 'N' => 4, 'n' => 4 }.freeze
12
+
13
+ INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }.freeze
14
+
15
+ # reverse complement
16
+ DNARC = { 'A' => 'T',
17
+ 'C' => 'G',
18
+ 'G' => 'C',
19
+ 'T' => 'A',
20
+ 'N' => 'N',
21
+ 'a' => 'T',
22
+ 'c' => 'G',
23
+ 'g' => 'C',
24
+ 't' => 'A',
25
+ 'n' => 'N' }.freeze
26
+
27
+ module_function
28
+
29
+ # Transform DNA sequence into numerical sequence.
30
+ # @param seq [String] dna sequence
31
+ # @return [Array] int array
32
+ # @example
33
+ # SSW::DNA.to_int_array("TCGA") #=> [3, 1, 2, 0]
34
+
35
+ def to_int_array(seq)
36
+ raise ArgumentError, 'seq must be a string' unless seq.is_a? String
37
+
38
+ seq.each_char.map do |base|
39
+ DNA2INT[base] || DNA2INT['N']
40
+ end
41
+ end
42
+
43
+ # Transform numerical sequence into DNA sequence.
44
+ # @param arr [Array] int array
45
+ # @return [String] dna sequence
46
+ # @example
47
+ # SSW::DNA.from_int_array([3, 1, 2, 0]) #=> "TCGA"
48
+
49
+ def from_int_array(arr)
50
+ raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
51
+
52
+ arr.map do |i|
53
+ INT2DNA[i] || 'N'
54
+ end.join
55
+ end
56
+
57
+ # reverse complement
58
+ # @param seq [String] sequence
59
+ # @return [String] reverse complement
60
+ # @example
61
+ # SSW::DNA.revcomp("TCGAT") #=> "ATCGA"
62
+
63
+ def revcomp(seq)
64
+ seq.each_char.map do |base|
65
+ DNARC[base]
66
+ end.join.reverse
67
+ end
68
+ end
69
+ end
@@ -2,12 +2,12 @@
2
2
 
3
3
  require 'fiddle/import'
4
4
 
5
- module LibSSW
6
- module FFI
5
+ module SSW
6
+ module LibSSW
7
7
  extend Fiddle::Importer
8
8
 
9
9
  begin
10
- dlload LibSSW.ffi_lib
10
+ dlload SSW.ffi_lib
11
11
  rescue LoadError => e
12
12
  raise LoadError, "Could not find libssw shared library. \n#{e}"
13
13
  end
@@ -1,13 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LibSSW
3
+ module SSW
4
4
  # structure of the query profile/usr/lib/x86_64-linux-gnu/
5
5
  # @!attribute read
6
6
  # @!attribute mat
7
7
  # @!attribute read_len
8
8
  # @!attribute n
9
9
  # @!attribute bias
10
- class Profile < FFI::Profile
10
+ class Profile
11
11
  def self.keys
12
12
  %i[read mat read_len n bias]
13
13
  end
@@ -17,7 +17,7 @@ module LibSSW
17
17
 
18
18
  def initialize(ptr)
19
19
  @ptr = ptr
20
- @cstruct = profile = LibSSW::FFI::Profile.new(ptr)
20
+ @cstruct = profile = SSW::LibSSW::Profile.new(ptr)
21
21
  @read_len = profile.readLen
22
22
  @read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
23
23
  @n = profile.n
@@ -26,13 +26,11 @@ module LibSSW
26
26
  end
27
27
 
28
28
  def to_ptr
29
- # Garbage collection warkaround
30
- # Preventing Garbage Collection --force
31
- cstruct.read = ptr.instance_variable_get(:@read_str)
32
- cstruct.mat = ptr.instance_variable_get(:@mat_str)
33
- cstruct.readLen = ptr.instance_variable_get(:@read_len)
34
- cstruct.n = ptr.instance_variable_get(:@n)
35
- ptr
29
+ # The pointer already contains the correct C structure.
30
+ # The instance variables on @ptr (@read_str, @mat_str, etc.) are kept
31
+ # alive to prevent garbage collection of the memory that C is referencing.
32
+ # We don't need to modify the C structure here.
33
+ @ptr
36
34
  end
37
35
 
38
36
  def to_h
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LibSSW
4
- VERSION = '0.0.2'
3
+ module SSW
4
+ VERSION = '0.0.5'
5
5
  end
metadata CHANGED
@@ -1,107 +1,22 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libssw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
- autorequire:
9
- bindir: exe
8
+ bindir: bin
10
9
  cert_chain: []
11
- date: 2021-01-22 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: fiddle
15
14
  requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 1.0.7
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 1.0.7
27
- - !ruby/object:Gem::Dependency
28
- name: bio
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: bundler
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: minitest
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: rubocop
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
15
  requirements:
94
16
  - - ">="
95
17
  - !ruby/object:Gem::Version
96
18
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: simplecov
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
19
+ type: :runtime
105
20
  prerelease: false
106
21
  version_requirements: !ruby/object:Gem::Requirement
107
22
  requirements:
@@ -111,26 +26,25 @@ dependencies:
111
26
  description: Ruby bindings for libssw
112
27
  email:
113
28
  - 2xijok@gmail.com
114
- executables:
115
- - rbssw
29
+ executables: []
116
30
  extensions: []
117
31
  extra_rdoc_files: []
118
32
  files:
119
33
  - LICENSE.txt
120
34
  - README.md
121
- - exe/rbssw
122
35
  - lib/libssw.rb
123
- - lib/libssw/BLOSUM50.rb
124
- - lib/libssw/BLOSUM62.rb
125
- - lib/libssw/align.rb
126
- - lib/libssw/ffi.rb
127
- - lib/libssw/profile.rb
128
- - lib/libssw/version.rb
36
+ - lib/ssw/BLOSUM50.rb
37
+ - lib/ssw/BLOSUM62.rb
38
+ - lib/ssw/aaseq.rb
39
+ - lib/ssw/align.rb
40
+ - lib/ssw/dna.rb
41
+ - lib/ssw/libssw.rb
42
+ - lib/ssw/profile.rb
43
+ - lib/ssw/version.rb
129
44
  homepage: https://github.com/kojix2/ruby-libssw
130
45
  licenses:
131
46
  - MIT
132
47
  metadata: {}
133
- post_install_message:
134
48
  rdoc_options: []
135
49
  require_paths:
136
50
  - lib
@@ -145,8 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
59
  - !ruby/object:Gem::Version
146
60
  version: '0'
147
61
  requirements: []
148
- rubygems_version: 3.2.3
149
- signing_key:
62
+ rubygems_version: 3.6.9
150
63
  specification_version: 4
151
64
  summary: Ruby bindings for libssw
152
65
  test_files: []
data/exe/rbssw DELETED
@@ -1,193 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- warn 'This script is under development.'
5
- warn "It doesn't work properly yet!"
6
-
7
- require 'bio'
8
- require 'libssw'
9
- SSW = LibSSW
10
- require 'optparse'
11
-
12
- opts = {
13
- lib_path: nil,
14
- nmatch: 2,
15
- nmismatch: 2,
16
- nopen: 3,
17
- next: 1,
18
- bprotein: false,
19
- smatrix: nil,
20
- bpath: false,
21
- nthr: nil,
22
- bbest: false,
23
- bsam: nil, # typo?
24
- bheader: nil
25
- }
26
-
27
- parser = OptionParser.new do |opt|
28
- opt.version = LibSSW::VERSION
29
- opt.summary_width = 20
30
- opt.banner = 'Usage: rbssw [options] <target file> <query file>'
31
- opt.on('-l', '--sLibPath PATH', String,
32
- 'path of libssw.so') do |v|
33
- opts[:sLibPath] = v
34
- end
35
- opt.on('-m', '--nMatch VAL', Integer,
36
- 'a positive integer as the score for a match',
37
- "in genome sequence alignment. [#{opts[:nmatch]}]") do |v|
38
- opts[:nmatch] = v
39
- end
40
- opt.on('-x', '--nMismatch VAL', Integer,
41
- 'a positive integer as the score for a mismatch',
42
- "in genome sequence alignment. [#{opts[:nmismatch]}]") do |v|
43
- opts[:nmismatch] = v
44
- end
45
- opt.on('-o', '--nOpen VAL', Integer,
46
- 'a positive integer as the penalty for the gap opening',
47
- "in genome sequence alignment. [#{opts[:nopen]}]") do |v|
48
- opts[:nopen] = v
49
- end
50
- opt.on('-e', '--nExt VAL', Integer,
51
- 'a positive integer as the penalty for the gap extension',
52
- "in genome sequence alignment. [#{opts[:next]}]") do |v|
53
- opts[:next] = v
54
- end
55
- opt.on('-p', '--bProtien', TrueClass,
56
- 'Do protein sequence alignment.',
57
- "Without this option, do genome sequence alignment. [#{opts[:bprotein]}]") do |v|
58
- opts[:bprotein] = v
59
- end
60
- opt.on('-a', '--sMatrix VAL', String,
61
- "a file for either Blosum or Pam weight matrix. [#{opts[:smatrix]}]") do |v|
62
- opts[:smatrix] = v
63
- end
64
- opt.on('-c', '--bPath', TrueClass,
65
- "Return the alignment path. [#{opts[:bpath]}]") do |v|
66
- opts[:bpath] = v
67
- end
68
- opt.on('-f', '--nThr VAL', Integer,
69
- 'a positive integer.',
70
- 'Only output the alignments with the Smith-Waterman score >= N.') do |v|
71
- opts[:nthr] = v
72
- end
73
- opt.on('-r', '--bBest', TrueClass,
74
- 'The best alignment will be picked, between the original read',
75
- "alignment and the reverse complement read alignment. [#{opts[:bbest]}]") do |v|
76
- opts[:bbest] = v
77
- end
78
- opt.on('-s', '--bSam', TrueClass,
79
- 'Output in SAM format. [no header]') do |v| # TYPO?
80
- opts[:bsam] = v
81
- end
82
- opt.on('-header', '--bHeader', TrueClass,
83
- 'If -s is used, include header in SAM output.') do |v|
84
- opts[:bheader] = v
85
- end
86
- end
87
-
88
- parser.order!(ARGV)
89
-
90
- opts[:target] = ARGV[0]
91
- opts[:query] = ARGV[1]
92
-
93
- lEle = []
94
- dRc = {}
95
- dEle2Int = {}
96
- dInt2Ele = {}
97
- lScore = nil
98
-
99
- if opts[:bprotein]
100
- # load AA score matrix
101
- if !opts[:smatrix]
102
- lEle = SSW::AAELEMENTS
103
- dEle2Int = SSW::AA2INT
104
- dInt2Ele = SSW::INT2AA
105
- nEleNum = lEle.size
106
- lScore = SSW::Blosum50
107
- else
108
- lEle, dEle2Int, dInt2Ele, lScore = SSW.read_matrix(opts[:smatrix])
109
- end
110
- elsif !opts[:smatrix]
111
- # init DNA score matrix
112
- lEle = SSW::DNAELEMENTS
113
- dRc = SSW::DNARC
114
- dEle2Int = SSW::DNA2INT
115
- dInt2Ele = SSW::INT2DNA
116
- nEleNum = lEle.size # 5
117
- lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
118
- end
119
-
120
- warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
121
-
122
- # set flag
123
- nFlag = opts[:bpath] ? 2 : 0
124
-
125
- # print sam head
126
- if opts[:bsam] && opts[:bheader] && opts[:bpath]
127
- puts '@HD\tVN:1.4\tSO:queryname'
128
- Bio::Flatfile.open(opts[:target]) do |f|
129
- f.each do |entry|
130
- id = entry.entry_id
131
- len = entry.nalen
132
- puts "@SQ\tSN:#{id}\tLN:#{len}"
133
- end
134
- end
135
- elsif opts[:bsam] && !args[:bpath]
136
- warn 'SAM format output is only available together with option -c.\n'
137
- opts[:bsam] = false
138
- end
139
-
140
- def _to_int(seq, lEle, dEle2Int)
141
- seq.each_char.map do |ele|
142
- if dEle2Int.has_key?(ele)
143
- dEle2Int[ele]
144
- else
145
- dEle2Int[lEle[-1]]
146
- end
147
- end
148
- end
149
-
150
- # iterate query sequenc
151
- Bio::FlatFile.open(opts[:query]) do |query_file|
152
- query_file.each do |qentry|
153
- sQId = qentry.entry_id
154
- sQSeq = qentry.sequence_string
155
- sQQual = qentry.quality_string
156
- # build query profile
157
- qNum = _to_int(sQSeq, lEle, dEle2Int)
158
- qProfile = SSW.ssw_init(qNum, sQSeq.size, lScore, lEle.size, 2)
159
- # build rc query profile
160
- if opts[:bbest] && !opts[:bprotein]
161
- sQRcSeq = sQSeq.reverse.each_char.map { |x| dRc[x] }.join
162
- qRcNum = _to_int(sQRcSeq, lEle, dEle2Int)
163
- qRcProfile = SSW.ssw_init(qRcNum, sQSeq.size, mat, lEle.size, 2)
164
- end
165
- # set mask le
166
- if sQSeq.size > 30
167
- nMaskLen = sQSeq.size / 2
168
- else
169
- nMasklen = 15
170
- end
171
-
172
- # iter target sequence
173
- Bio::FlatFile.open(opts[:target]) do |target_file|
174
- target_file.each do |tentry|
175
- sRId = tentry.entry_id
176
- sRSeq = tentry.seq.to_s
177
- rNum = _to_int(sRSeq, lEle, dEle2Int)
178
- res = SSW.ssw_align(
179
- qProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
180
- )
181
- p res.to_h
182
- resRc = nil
183
- if opts[:bbest] && !opts[:bprotein]
184
- resRc = SSW.align_one(
185
- qRcProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
186
- )
187
- end
188
- # build cigar and trace back path
189
- strand = 0
190
- end
191
- end
192
- end
193
- end