libssw 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +47 -38
- data/lib/libssw.rb +49 -41
- data/lib/ssw/aaseq.rb +12 -0
- data/lib/ssw/align.rb +1 -1
- data/lib/ssw/dna.rb +18 -2
- data/lib/ssw/{ffi.rb → libssw.rb} +1 -1
- data/lib/ssw/profile.rb +6 -8
- data/lib/ssw/version.rb +1 -1
- metadata +7 -97
- data/exe/rbssw +0 -193
- data/vendor/libssw.so +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0b67671ac9e959ee7e147bfff872719e6c787a020fb15a9b63e9128aef51d9d3
|
|
4
|
+
data.tar.gz: 0baec30769ef3e0f9248346dded445d9a4fa1eaafd9b2071b74e9e5deeb7e4be
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6fb9b0ad32647d27418b666545f3fe34ffab9e902ddbae0575733695d6101fa26b261dcc5d9066e9b8c9a85bac3e1c2cce6be79b58c63aab7a0896b691561a7e
|
|
7
|
+
data.tar.gz: 9c3862016d4490e0fce60296acf8fbe129c5f166363c0cdd978862e066819ec8b2a850dc345716750503e15521480e026d2aac0278f0e8db9394505cc98db3ab
|
data/README.md
CHANGED
|
@@ -23,10 +23,10 @@ export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
|
|
|
23
23
|
|
|
24
24
|
### Installing from source
|
|
25
25
|
|
|
26
|
-
When installing from source code using the following steps, the shared library `libssw.so` or `libssw.dylib` will be packed in the Ruby gem. In this case, the environment variable `LIBSSWDIR` is not required.
|
|
26
|
+
When installing from source code using the following steps, the shared library `libssw.so` or `libssw.dylib` will be packed in the Ruby gem. In this case, the environment variable `LIBSSWDIR` is not required.
|
|
27
27
|
|
|
28
28
|
```sh
|
|
29
|
-
git clone --
|
|
29
|
+
git clone --recursive https://github.com/kojix2/ruby-libssw
|
|
30
30
|
bundle exec rake libssw:build
|
|
31
31
|
bundle exec rake install
|
|
32
32
|
```
|
|
@@ -39,7 +39,7 @@ ruby-libssw does not support Windows.
|
|
|
39
39
|
require 'libssw'
|
|
40
40
|
|
|
41
41
|
ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
|
|
42
|
-
ref_int = SSW::DNA.to_int_array(ref_str)
|
|
42
|
+
ref_int = SSW::DNA.to_int_array(ref_str)
|
|
43
43
|
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
44
44
|
|
|
45
45
|
read_str1 = "ACGTT"
|
|
@@ -101,53 +101,62 @@ puts SSW.build_path(read_str1, ref_str, align1)
|
|
|
101
101
|
See [API Documentation](https://rubydoc.info/gems/libssw).
|
|
102
102
|
|
|
103
103
|
```markdown
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
104
|
+
- SSW module
|
|
105
|
+
|
|
106
|
+
- SSW.init
|
|
107
|
+
- SSW.init_destroy
|
|
108
|
+
- SSW.align
|
|
109
|
+
- SSW.align_destroy
|
|
110
|
+
- SSW.mark_mismatch
|
|
111
|
+
- SSW.create_scoring_matrix
|
|
112
|
+
- SSW.build_path
|
|
113
|
+
|
|
114
|
+
- Profile class
|
|
115
|
+
|
|
116
|
+
- attributes
|
|
117
|
+
- read, mat, read_len, n, bias
|
|
118
|
+
|
|
119
|
+
- Align class
|
|
120
|
+
|
|
121
|
+
- attributes
|
|
122
|
+
- score1, score2, ref_begin1, ref_end1, read_begin1, read_end1, ref_end2
|
|
120
123
|
cigar, cigar_len, cigar_string
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
124
|
+
|
|
125
|
+
- DNA module
|
|
126
|
+
|
|
127
|
+
- DNA.to_int_array
|
|
128
|
+
- DNA.from_int_array
|
|
129
|
+
- revcomp
|
|
130
|
+
|
|
131
|
+
- AASeq module
|
|
132
|
+
|
|
133
|
+
- AASeq.to_int_array
|
|
134
|
+
- AASeq.from_int_array
|
|
135
|
+
|
|
136
|
+
- BLOSUM62
|
|
137
|
+
- BLOSUM50
|
|
133
138
|
```
|
|
134
139
|
|
|
135
140
|
## Development
|
|
136
141
|
|
|
137
142
|
```sh
|
|
138
|
-
git clone --
|
|
143
|
+
git clone --recursive https://github.com/kojix2/ruby-libssw
|
|
139
144
|
bundle exec rake libssw:build
|
|
140
145
|
bundle exec rake test
|
|
141
146
|
```
|
|
142
147
|
|
|
148
|
+
Do you need commit rights to my repository?
|
|
149
|
+
Do you want to get admin rights and take over the project?
|
|
150
|
+
If so, please feel free to contact me @kojix2.
|
|
151
|
+
|
|
143
152
|
## Contributing
|
|
144
153
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
154
|
+
- [Report bugs](https://github.com/kojix2/ruby-libssw/issues)
|
|
155
|
+
- Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-libssw/pulls)
|
|
156
|
+
- Write, clarify, or fix documentation
|
|
157
|
+
- English corrections are welcome
|
|
158
|
+
- Suggest or add new features
|
|
150
159
|
|
|
151
160
|
## License
|
|
152
161
|
|
|
153
|
-
|
|
162
|
+
- [MIT License](https://opensource.org/licenses/MIT).
|
data/lib/libssw.rb
CHANGED
|
@@ -13,14 +13,7 @@ module SSW
|
|
|
13
13
|
attr_accessor :ffi_lib
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
lib_name =
|
|
17
|
-
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
|
18
|
-
'libssw.dll' # unconfirmed
|
|
19
|
-
when /darwin|mac os/
|
|
20
|
-
'libssw.dylib' # unconfirmed
|
|
21
|
-
else
|
|
22
|
-
'libssw.so'
|
|
23
|
-
end
|
|
16
|
+
lib_name = "libssw.#{RbConfig::CONFIG['SOEXT']}" # Ruby 2.5 or later
|
|
24
17
|
|
|
25
18
|
self.ffi_lib = if ENV['LIBSSWDIR'] && !ENV['LIBSSWDIR'].empty?
|
|
26
19
|
File.expand_path(lib_name, ENV['LIBSSWDIR'])
|
|
@@ -28,7 +21,12 @@ module SSW
|
|
|
28
21
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
|
29
22
|
end
|
|
30
23
|
|
|
31
|
-
|
|
24
|
+
# NOTE: Why not use pkg-config?
|
|
25
|
+
# APT package is available.
|
|
26
|
+
# However, it dose not include a .pc file.
|
|
27
|
+
# Thus pkg-config will not find the shared library.
|
|
28
|
+
|
|
29
|
+
require_relative 'ssw/libssw'
|
|
32
30
|
require_relative 'ssw/profile'
|
|
33
31
|
require_relative 'ssw/align'
|
|
34
32
|
|
|
@@ -55,22 +53,26 @@ module SSW
|
|
|
55
53
|
raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
|
|
56
54
|
|
|
57
55
|
mat_str = mat.flatten.pack('c*')
|
|
58
|
-
ptr =
|
|
56
|
+
ptr = LibSSW.ssw_init(
|
|
59
57
|
read_str,
|
|
60
58
|
read_len,
|
|
61
59
|
mat_str,
|
|
62
60
|
n,
|
|
63
61
|
score_size
|
|
64
62
|
)
|
|
65
|
-
# Garbage collection workaround
|
|
63
|
+
# Garbage collection workaround:
|
|
64
|
+
# The C library stores pointers to read and mat without copying the data.
|
|
65
|
+
# We must keep the Ruby strings (read_str, mat_str) alive for the lifetime
|
|
66
|
+
# of the profile structure to prevent segmentation faults.
|
|
66
67
|
#
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
#
|
|
70
|
-
#
|
|
68
|
+
# We cannot use Fiddle's automatic memory management (ptr.free) here because:
|
|
69
|
+
# - Calling init_destroy from Ruby's GC causes segmentation violations
|
|
70
|
+
# - The user should explicitly call SSW.init_destroy when done, or let
|
|
71
|
+
# Ruby's GC clean up the profile structure itself (though the contained
|
|
72
|
+
# profile_byte/profile_word will leak unless init_destroy is called)
|
|
71
73
|
ptr.instance_variable_set(:@read_str, read_str)
|
|
72
|
-
ptr.instance_variable_set(:@read_len, read_len)
|
|
73
74
|
ptr.instance_variable_set(:@mat_str, mat_str)
|
|
75
|
+
ptr.instance_variable_set(:@read_len, read_len)
|
|
74
76
|
ptr.instance_variable_set(:@n, n)
|
|
75
77
|
ptr.instance_variable_set(:@score_size, score_size)
|
|
76
78
|
|
|
@@ -78,20 +80,20 @@ module SSW
|
|
|
78
80
|
end
|
|
79
81
|
|
|
80
82
|
# Release the memory allocated by function ssw_init.
|
|
81
|
-
# @param
|
|
83
|
+
# @param profile [Fiddle::Pointer, SSW::Profile, SSW::LibSSW::Profile]
|
|
82
84
|
# pointer to the query profile structure
|
|
83
85
|
# @note Ruby has garbage collection, so there is not much reason to call
|
|
84
86
|
# this method.
|
|
85
87
|
def init_destroy(profile)
|
|
86
|
-
unless profile.is_a?(Fiddle::Pointer) ||
|
|
87
|
-
raise ArgumentError, 'Expect class of
|
|
88
|
+
unless profile.is_a?(Fiddle::Pointer) || profile.is_a?(Profile) || profile.respond_to?(:to_ptr)
|
|
89
|
+
raise ArgumentError, 'Expect class of profile to be Profile or Pointer'
|
|
88
90
|
end
|
|
89
91
|
|
|
90
|
-
|
|
92
|
+
LibSSW.init_destroy(profile)
|
|
91
93
|
end
|
|
92
94
|
|
|
93
95
|
# Do Striped Smith-Waterman alignment.
|
|
94
|
-
# @param prof [Fiddle::Pointer, SSW::Profile, SSW::
|
|
96
|
+
# @param prof [Fiddle::Pointer, SSW::Profile, SSW::LibSSW::Profile]
|
|
95
97
|
# pointer to the query profile structure
|
|
96
98
|
# @param ref [Array]
|
|
97
99
|
# target sequence;
|
|
@@ -100,25 +102,25 @@ module SSW
|
|
|
100
102
|
# @param weight_gap0 [Integer] the absolute value of gap open penalty
|
|
101
103
|
# @param weight_gapE [Integer] the absolute value of gap extension penalty
|
|
102
104
|
# @param flag [Integer]
|
|
103
|
-
# * bit 5: when
|
|
105
|
+
# * bit 5: when set as 1, function ssw_align will return the best
|
|
104
106
|
# alignment beginning position;
|
|
105
|
-
# * bit 6: when
|
|
106
|
-
# read_end1 - read_begin1 < filterd), (whatever bit 5 is
|
|
107
|
+
# * bit 6: when set as 1, if (ref_end1 - ref_begin1 < filterd &&
|
|
108
|
+
# read_end1 - read_begin1 < filterd), (whatever bit 5 is set) the
|
|
107
109
|
# function will return the best alignment beginning position and cigar;
|
|
108
|
-
# * bit 7: when
|
|
109
|
-
# (whatever bit 5 is
|
|
110
|
+
# * bit 7: when set as 1, if the best alignment score >= filters,
|
|
111
|
+
# (whatever bit 5 is set) the function will return the best
|
|
110
112
|
# alignment beginning position and cigar;
|
|
111
|
-
# * bit 8: when
|
|
113
|
+
# * bit 8: when set as 1, (whatever bit 5, 6 or 7 is set) the
|
|
112
114
|
# function will always return the best alignment beginning position and
|
|
113
115
|
# cigar. When flag == 0, only the optimal and sub-optimal scores and the
|
|
114
116
|
# optimal alignment ending position will be returned.
|
|
115
117
|
# @param filters [Integer]
|
|
116
|
-
# scorefilter: when bit 7 of flag is
|
|
117
|
-
# filters will be used (Please check the
|
|
118
|
+
# scorefilter: when bit 7 of flag is set as 1 and bit 8 is set as 0,
|
|
119
|
+
# filters will be used (Please check the description of the flag parameter
|
|
118
120
|
# for detailed usage.)
|
|
119
121
|
# @param filterd [Integer]
|
|
120
|
-
# distance filter: when bit 6 of flag is
|
|
121
|
-
# as 0, filterd will be used (Please check the
|
|
122
|
+
# distance filter: when bit 6 of flag is set as 1 and bit 8 is set
|
|
123
|
+
# as 0, filterd will be used (Please check the description of the flag
|
|
122
124
|
# parameter for detailed usage.)
|
|
123
125
|
# @param mask_len [Integer]
|
|
124
126
|
# The distance between the optimal and suboptimal alignment ending
|
|
@@ -133,6 +135,7 @@ module SSW
|
|
|
133
135
|
# SSW C library masks the reference loci nearby (mask length = maskLen)
|
|
134
136
|
# the best alignment ending position and locates the second largest score
|
|
135
137
|
# from the unmasked elements.
|
|
138
|
+
# @return [Align]
|
|
136
139
|
def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil)
|
|
137
140
|
unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
|
|
138
141
|
raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
|
|
@@ -142,28 +145,29 @@ module SSW
|
|
|
142
145
|
ref_str = ref.pack('c*')
|
|
143
146
|
ref_len = ref.size
|
|
144
147
|
mask_len ||= [ref_len / 2, 15].max
|
|
145
|
-
ptr =
|
|
148
|
+
ptr = LibSSW.ssw_align(
|
|
146
149
|
prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
|
147
150
|
)
|
|
148
|
-
#
|
|
149
|
-
#
|
|
150
|
-
#
|
|
151
|
-
#
|
|
152
|
-
# ptr
|
|
151
|
+
# Garbage collection workaround:
|
|
152
|
+
# Keep ref_str alive while the C code might still need it.
|
|
153
|
+
# However, since Align.new immediately reads all values and calls align_destroy,
|
|
154
|
+
# the C memory is freed immediately, so ref_str only needs to live until then.
|
|
155
|
+
# We store it on ptr just to be safe during the Align.new call.
|
|
156
|
+
ptr.instance_variable_set(:@ref_str, ref_str)
|
|
153
157
|
SSW::Align.new(ptr)
|
|
154
158
|
end
|
|
155
159
|
|
|
156
160
|
# Release the memory allocated by function ssw_align.
|
|
157
|
-
# @param
|
|
161
|
+
# @param align [Fiddle::Pointer, SSW::Align, SSW::LibSSW::Align]
|
|
158
162
|
# pointer to the alignment result structure
|
|
159
163
|
def align_destroy(align)
|
|
160
164
|
if align.is_a?(Align)
|
|
161
165
|
warn "You don't need to call this method for Ruby's Align class."
|
|
162
166
|
nil
|
|
163
167
|
elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr)
|
|
164
|
-
|
|
168
|
+
LibSSW.align_destroy(align)
|
|
165
169
|
else
|
|
166
|
-
raise ArgumentError, 'Expect class of
|
|
170
|
+
raise ArgumentError, 'Expect class of align to be Pointer'
|
|
167
171
|
end
|
|
168
172
|
end
|
|
169
173
|
|
|
@@ -191,7 +195,7 @@ module SSW
|
|
|
191
195
|
# @return [Integer] The number of mismatches. The cigar and cigarLen are modified.
|
|
192
196
|
def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
|
|
193
197
|
warn 'implementation: fiexme: **cigar' # FIXME
|
|
194
|
-
|
|
198
|
+
LibSSW.mark_mismatch(
|
|
195
199
|
ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
|
|
196
200
|
)
|
|
197
201
|
end
|
|
@@ -213,6 +217,10 @@ module SSW
|
|
|
213
217
|
end
|
|
214
218
|
|
|
215
219
|
# TODO: fix variable names
|
|
220
|
+
# @param q_seq [String] query sequence
|
|
221
|
+
# @param r_seq [String] reference sequence
|
|
222
|
+
# @param align [Align] alignment result
|
|
223
|
+
# @return [Array]
|
|
216
224
|
def build_path(q_seq, r_seq, align)
|
|
217
225
|
sQ = ''
|
|
218
226
|
sA = ''
|
data/lib/ssw/aaseq.rb
CHANGED
|
@@ -40,6 +40,12 @@ module SSW
|
|
|
40
40
|
|
|
41
41
|
module_function
|
|
42
42
|
|
|
43
|
+
# Transform amino acid sequence into numerical sequence.
|
|
44
|
+
# @param seq [String] amin acid sequence
|
|
45
|
+
# @return [Array] int array
|
|
46
|
+
# @example
|
|
47
|
+
# SSW::AASeq.to_int_array("ARND") #=> [0, 1, 2, 3]
|
|
48
|
+
|
|
43
49
|
def to_int_array(seq)
|
|
44
50
|
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
|
45
51
|
|
|
@@ -48,6 +54,12 @@ module SSW
|
|
|
48
54
|
end
|
|
49
55
|
end
|
|
50
56
|
|
|
57
|
+
# Transform numerical sequence into amino acid sequence.
|
|
58
|
+
# @param arr [Array] int array
|
|
59
|
+
# @return [String] amino acid sequence
|
|
60
|
+
# @example
|
|
61
|
+
# SSW::AASeq.from_int_array([0, 1, 2, 3]) #=> "ARND"
|
|
62
|
+
|
|
51
63
|
def from_int_array(arr)
|
|
52
64
|
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
|
53
65
|
|
data/lib/ssw/align.rb
CHANGED
data/lib/ssw/dna.rb
CHANGED
|
@@ -26,7 +26,12 @@ module SSW
|
|
|
26
26
|
|
|
27
27
|
module_function
|
|
28
28
|
|
|
29
|
-
#
|
|
29
|
+
# Transform DNA sequence into numerical sequence.
|
|
30
|
+
# @param seq [String] dna sequence
|
|
31
|
+
# @return [Array] int array
|
|
32
|
+
# @example
|
|
33
|
+
# SSW::DNA.to_int_array("TCGA") #=> [3, 1, 2, 0]
|
|
34
|
+
|
|
30
35
|
def to_int_array(seq)
|
|
31
36
|
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
|
32
37
|
|
|
@@ -35,7 +40,12 @@ module SSW
|
|
|
35
40
|
end
|
|
36
41
|
end
|
|
37
42
|
|
|
38
|
-
#
|
|
43
|
+
# Transform numerical sequence into DNA sequence.
|
|
44
|
+
# @param arr [Array] int array
|
|
45
|
+
# @return [String] dna sequence
|
|
46
|
+
# @example
|
|
47
|
+
# SSW::DNA.from_int_array([3, 1, 2, 0]) #=> "TCGA"
|
|
48
|
+
|
|
39
49
|
def from_int_array(arr)
|
|
40
50
|
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
|
41
51
|
|
|
@@ -44,6 +54,12 @@ module SSW
|
|
|
44
54
|
end.join
|
|
45
55
|
end
|
|
46
56
|
|
|
57
|
+
# reverse complement
|
|
58
|
+
# @param seq [String] sequence
|
|
59
|
+
# @return [String] reverse complement
|
|
60
|
+
# @example
|
|
61
|
+
# SSW::DNA.revcomp("TCGAT") #=> "ATCGA"
|
|
62
|
+
|
|
47
63
|
def revcomp(seq)
|
|
48
64
|
seq.each_char.map do |base|
|
|
49
65
|
DNARC[base]
|
data/lib/ssw/profile.rb
CHANGED
|
@@ -17,7 +17,7 @@ module SSW
|
|
|
17
17
|
|
|
18
18
|
def initialize(ptr)
|
|
19
19
|
@ptr = ptr
|
|
20
|
-
@cstruct = profile = SSW::
|
|
20
|
+
@cstruct = profile = SSW::LibSSW::Profile.new(ptr)
|
|
21
21
|
@read_len = profile.readLen
|
|
22
22
|
@read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
|
|
23
23
|
@n = profile.n
|
|
@@ -26,13 +26,11 @@ module SSW
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
def to_ptr
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
cstruct.n = ptr.instance_variable_get(:@n)
|
|
35
|
-
ptr
|
|
29
|
+
# The pointer already contains the correct C structure.
|
|
30
|
+
# The instance variables on @ptr (@read_str, @mat_str, etc.) are kept
|
|
31
|
+
# alive to prevent garbage collection of the memory that C is referencing.
|
|
32
|
+
# We don't need to modify the C structure here.
|
|
33
|
+
@ptr
|
|
36
34
|
end
|
|
37
35
|
|
|
38
36
|
def to_h
|
data/lib/ssw/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,107 +1,22 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: libssw
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
8
|
-
|
|
9
|
-
bindir: exe
|
|
8
|
+
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: fiddle
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - ">="
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: 1.0.7
|
|
20
|
-
type: :runtime
|
|
21
|
-
prerelease: false
|
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
-
requirements:
|
|
24
|
-
- - ">="
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: 1.0.7
|
|
27
|
-
- !ruby/object:Gem::Dependency
|
|
28
|
-
name: bio
|
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
|
30
|
-
requirements:
|
|
31
|
-
- - ">="
|
|
32
|
-
- !ruby/object:Gem::Version
|
|
33
|
-
version: '0'
|
|
34
|
-
type: :development
|
|
35
|
-
prerelease: false
|
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
-
requirements:
|
|
38
|
-
- - ">="
|
|
39
|
-
- !ruby/object:Gem::Version
|
|
40
|
-
version: '0'
|
|
41
|
-
- !ruby/object:Gem::Dependency
|
|
42
|
-
name: bundler
|
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
|
44
|
-
requirements:
|
|
45
|
-
- - ">="
|
|
46
|
-
- !ruby/object:Gem::Version
|
|
47
|
-
version: '0'
|
|
48
|
-
type: :development
|
|
49
|
-
prerelease: false
|
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
-
requirements:
|
|
52
|
-
- - ">="
|
|
53
|
-
- !ruby/object:Gem::Version
|
|
54
|
-
version: '0'
|
|
55
|
-
- !ruby/object:Gem::Dependency
|
|
56
|
-
name: minitest
|
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
|
58
|
-
requirements:
|
|
59
|
-
- - ">="
|
|
60
|
-
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0'
|
|
62
|
-
type: :development
|
|
63
|
-
prerelease: false
|
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
-
requirements:
|
|
66
|
-
- - ">="
|
|
67
|
-
- !ruby/object:Gem::Version
|
|
68
|
-
version: '0'
|
|
69
|
-
- !ruby/object:Gem::Dependency
|
|
70
|
-
name: rake
|
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
|
72
|
-
requirements:
|
|
73
|
-
- - ">="
|
|
74
|
-
- !ruby/object:Gem::Version
|
|
75
|
-
version: '0'
|
|
76
|
-
type: :development
|
|
77
|
-
prerelease: false
|
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
-
requirements:
|
|
80
|
-
- - ">="
|
|
81
|
-
- !ruby/object:Gem::Version
|
|
82
|
-
version: '0'
|
|
83
|
-
- !ruby/object:Gem::Dependency
|
|
84
|
-
name: rubocop
|
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
|
86
|
-
requirements:
|
|
87
|
-
- - ">="
|
|
88
|
-
- !ruby/object:Gem::Version
|
|
89
|
-
version: '0'
|
|
90
|
-
type: :development
|
|
91
|
-
prerelease: false
|
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
93
15
|
requirements:
|
|
94
16
|
- - ">="
|
|
95
17
|
- !ruby/object:Gem::Version
|
|
96
18
|
version: '0'
|
|
97
|
-
|
|
98
|
-
name: simplecov
|
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
|
100
|
-
requirements:
|
|
101
|
-
- - ">="
|
|
102
|
-
- !ruby/object:Gem::Version
|
|
103
|
-
version: '0'
|
|
104
|
-
type: :development
|
|
19
|
+
type: :runtime
|
|
105
20
|
prerelease: false
|
|
106
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
22
|
requirements:
|
|
@@ -111,29 +26,25 @@ dependencies:
|
|
|
111
26
|
description: Ruby bindings for libssw
|
|
112
27
|
email:
|
|
113
28
|
- 2xijok@gmail.com
|
|
114
|
-
executables:
|
|
115
|
-
- rbssw
|
|
29
|
+
executables: []
|
|
116
30
|
extensions: []
|
|
117
31
|
extra_rdoc_files: []
|
|
118
32
|
files:
|
|
119
33
|
- LICENSE.txt
|
|
120
34
|
- README.md
|
|
121
|
-
- exe/rbssw
|
|
122
35
|
- lib/libssw.rb
|
|
123
36
|
- lib/ssw/BLOSUM50.rb
|
|
124
37
|
- lib/ssw/BLOSUM62.rb
|
|
125
38
|
- lib/ssw/aaseq.rb
|
|
126
39
|
- lib/ssw/align.rb
|
|
127
40
|
- lib/ssw/dna.rb
|
|
128
|
-
- lib/ssw/
|
|
41
|
+
- lib/ssw/libssw.rb
|
|
129
42
|
- lib/ssw/profile.rb
|
|
130
43
|
- lib/ssw/version.rb
|
|
131
|
-
- vendor/libssw.so
|
|
132
44
|
homepage: https://github.com/kojix2/ruby-libssw
|
|
133
45
|
licenses:
|
|
134
46
|
- MIT
|
|
135
47
|
metadata: {}
|
|
136
|
-
post_install_message:
|
|
137
48
|
rdoc_options: []
|
|
138
49
|
require_paths:
|
|
139
50
|
- lib
|
|
@@ -148,8 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
148
59
|
- !ruby/object:Gem::Version
|
|
149
60
|
version: '0'
|
|
150
61
|
requirements: []
|
|
151
|
-
rubygems_version: 3.
|
|
152
|
-
signing_key:
|
|
62
|
+
rubygems_version: 3.6.9
|
|
153
63
|
specification_version: 4
|
|
154
64
|
summary: Ruby bindings for libssw
|
|
155
65
|
test_files: []
|
data/exe/rbssw
DELETED
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
warn 'This script is under development.'
|
|
5
|
-
warn "It doesn't work properly yet!"
|
|
6
|
-
|
|
7
|
-
require 'bio'
|
|
8
|
-
require 'libssw'
|
|
9
|
-
SSW = LibSSW
|
|
10
|
-
require 'optparse'
|
|
11
|
-
|
|
12
|
-
opts = {
|
|
13
|
-
lib_path: nil,
|
|
14
|
-
nmatch: 2,
|
|
15
|
-
nmismatch: 2,
|
|
16
|
-
nopen: 3,
|
|
17
|
-
next: 1,
|
|
18
|
-
bprotein: false,
|
|
19
|
-
smatrix: nil,
|
|
20
|
-
bpath: false,
|
|
21
|
-
nthr: nil,
|
|
22
|
-
bbest: false,
|
|
23
|
-
bsam: nil, # typo?
|
|
24
|
-
bheader: nil
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
parser = OptionParser.new do |opt|
|
|
28
|
-
opt.version = LibSSW::VERSION
|
|
29
|
-
opt.summary_width = 20
|
|
30
|
-
opt.banner = 'Usage: rbssw [options] <target file> <query file>'
|
|
31
|
-
opt.on('-l', '--sLibPath PATH', String,
|
|
32
|
-
'path of libssw.so') do |v|
|
|
33
|
-
opts[:sLibPath] = v
|
|
34
|
-
end
|
|
35
|
-
opt.on('-m', '--nMatch VAL', Integer,
|
|
36
|
-
'a positive integer as the score for a match',
|
|
37
|
-
"in genome sequence alignment. [#{opts[:nmatch]}]") do |v|
|
|
38
|
-
opts[:nmatch] = v
|
|
39
|
-
end
|
|
40
|
-
opt.on('-x', '--nMismatch VAL', Integer,
|
|
41
|
-
'a positive integer as the score for a mismatch',
|
|
42
|
-
"in genome sequence alignment. [#{opts[:nmismatch]}]") do |v|
|
|
43
|
-
opts[:nmismatch] = v
|
|
44
|
-
end
|
|
45
|
-
opt.on('-o', '--nOpen VAL', Integer,
|
|
46
|
-
'a positive integer as the penalty for the gap opening',
|
|
47
|
-
"in genome sequence alignment. [#{opts[:nopen]}]") do |v|
|
|
48
|
-
opts[:nopen] = v
|
|
49
|
-
end
|
|
50
|
-
opt.on('-e', '--nExt VAL', Integer,
|
|
51
|
-
'a positive integer as the penalty for the gap extension',
|
|
52
|
-
"in genome sequence alignment. [#{opts[:next]}]") do |v|
|
|
53
|
-
opts[:next] = v
|
|
54
|
-
end
|
|
55
|
-
opt.on('-p', '--bProtien', TrueClass,
|
|
56
|
-
'Do protein sequence alignment.',
|
|
57
|
-
"Without this option, do genome sequence alignment. [#{opts[:bprotein]}]") do |v|
|
|
58
|
-
opts[:bprotein] = v
|
|
59
|
-
end
|
|
60
|
-
opt.on('-a', '--sMatrix VAL', String,
|
|
61
|
-
"a file for either Blosum or Pam weight matrix. [#{opts[:smatrix]}]") do |v|
|
|
62
|
-
opts[:smatrix] = v
|
|
63
|
-
end
|
|
64
|
-
opt.on('-c', '--bPath', TrueClass,
|
|
65
|
-
"Return the alignment path. [#{opts[:bpath]}]") do |v|
|
|
66
|
-
opts[:bpath] = v
|
|
67
|
-
end
|
|
68
|
-
opt.on('-f', '--nThr VAL', Integer,
|
|
69
|
-
'a positive integer.',
|
|
70
|
-
'Only output the alignments with the Smith-Waterman score >= N.') do |v|
|
|
71
|
-
opts[:nthr] = v
|
|
72
|
-
end
|
|
73
|
-
opt.on('-r', '--bBest', TrueClass,
|
|
74
|
-
'The best alignment will be picked, between the original read',
|
|
75
|
-
"alignment and the reverse complement read alignment. [#{opts[:bbest]}]") do |v|
|
|
76
|
-
opts[:bbest] = v
|
|
77
|
-
end
|
|
78
|
-
opt.on('-s', '--bSam', TrueClass,
|
|
79
|
-
'Output in SAM format. [no header]') do |v| # TYPO?
|
|
80
|
-
opts[:bsam] = v
|
|
81
|
-
end
|
|
82
|
-
opt.on('-header', '--bHeader', TrueClass,
|
|
83
|
-
'If -s is used, include header in SAM output.') do |v|
|
|
84
|
-
opts[:bheader] = v
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
parser.order!(ARGV)
|
|
89
|
-
|
|
90
|
-
opts[:target] = ARGV[0]
|
|
91
|
-
opts[:query] = ARGV[1]
|
|
92
|
-
|
|
93
|
-
lEle = []
|
|
94
|
-
dRc = {}
|
|
95
|
-
dEle2Int = {}
|
|
96
|
-
dInt2Ele = {}
|
|
97
|
-
lScore = nil
|
|
98
|
-
|
|
99
|
-
if opts[:bprotein]
|
|
100
|
-
# load AA score matrix
|
|
101
|
-
if !opts[:smatrix]
|
|
102
|
-
lEle = SSW::AAELEMENTS
|
|
103
|
-
dEle2Int = SSW::AA2INT
|
|
104
|
-
dInt2Ele = SSW::INT2AA
|
|
105
|
-
nEleNum = lEle.size
|
|
106
|
-
lScore = SSW::Blosum50
|
|
107
|
-
else
|
|
108
|
-
lEle, dEle2Int, dInt2Ele, lScore = SSW.read_matrix(opts[:smatrix])
|
|
109
|
-
end
|
|
110
|
-
elsif !opts[:smatrix]
|
|
111
|
-
# init DNA score matrix
|
|
112
|
-
lEle = SSW::DNAELEMENTS
|
|
113
|
-
dRc = SSW::DNARC
|
|
114
|
-
dEle2Int = SSW::DNA2INT
|
|
115
|
-
dInt2Ele = SSW::INT2DNA
|
|
116
|
-
nEleNum = lEle.size # 5
|
|
117
|
-
lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
|
|
121
|
-
|
|
122
|
-
# set flag
|
|
123
|
-
nFlag = opts[:bpath] ? 2 : 0
|
|
124
|
-
|
|
125
|
-
# print sam head
|
|
126
|
-
if opts[:bsam] && opts[:bheader] && opts[:bpath]
|
|
127
|
-
puts '@HD\tVN:1.4\tSO:queryname'
|
|
128
|
-
Bio::Flatfile.open(opts[:target]) do |f|
|
|
129
|
-
f.each do |entry|
|
|
130
|
-
id = entry.entry_id
|
|
131
|
-
len = entry.nalen
|
|
132
|
-
puts "@SQ\tSN:#{id}\tLN:#{len}"
|
|
133
|
-
end
|
|
134
|
-
end
|
|
135
|
-
elsif opts[:bsam] && !args[:bpath]
|
|
136
|
-
warn 'SAM format output is only available together with option -c.\n'
|
|
137
|
-
opts[:bsam] = false
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
def _to_int(seq, lEle, dEle2Int)
|
|
141
|
-
seq.each_char.map do |ele|
|
|
142
|
-
if dEle2Int.has_key?(ele)
|
|
143
|
-
dEle2Int[ele]
|
|
144
|
-
else
|
|
145
|
-
dEle2Int[lEle[-1]]
|
|
146
|
-
end
|
|
147
|
-
end
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
# iterate query sequenc
|
|
151
|
-
Bio::FlatFile.open(opts[:query]) do |query_file|
|
|
152
|
-
query_file.each do |qentry|
|
|
153
|
-
sQId = qentry.entry_id
|
|
154
|
-
sQSeq = qentry.sequence_string
|
|
155
|
-
sQQual = qentry.quality_string
|
|
156
|
-
# build query profile
|
|
157
|
-
qNum = _to_int(sQSeq, lEle, dEle2Int)
|
|
158
|
-
qProfile = SSW.ssw_init(qNum, sQSeq.size, lScore, lEle.size, 2)
|
|
159
|
-
# build rc query profile
|
|
160
|
-
if opts[:bbest] && !opts[:bprotein]
|
|
161
|
-
sQRcSeq = sQSeq.reverse.each_char.map { |x| dRc[x] }.join
|
|
162
|
-
qRcNum = _to_int(sQRcSeq, lEle, dEle2Int)
|
|
163
|
-
qRcProfile = SSW.ssw_init(qRcNum, sQSeq.size, mat, lEle.size, 2)
|
|
164
|
-
end
|
|
165
|
-
# set mask le
|
|
166
|
-
if sQSeq.size > 30
|
|
167
|
-
nMaskLen = sQSeq.size / 2
|
|
168
|
-
else
|
|
169
|
-
nMasklen = 15
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
# iter target sequence
|
|
173
|
-
Bio::FlatFile.open(opts[:target]) do |target_file|
|
|
174
|
-
target_file.each do |tentry|
|
|
175
|
-
sRId = tentry.entry_id
|
|
176
|
-
sRSeq = tentry.seq.to_s
|
|
177
|
-
rNum = _to_int(sRSeq, lEle, dEle2Int)
|
|
178
|
-
res = SSW.ssw_align(
|
|
179
|
-
qProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
|
|
180
|
-
)
|
|
181
|
-
p res.to_h
|
|
182
|
-
resRc = nil
|
|
183
|
-
if opts[:bbest] && !opts[:bprotein]
|
|
184
|
-
resRc = SSW.align_one(
|
|
185
|
-
qRcProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
|
|
186
|
-
)
|
|
187
|
-
end
|
|
188
|
-
# build cigar and trace back path
|
|
189
|
-
strand = 0
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
end
|
|
193
|
-
end
|
data/vendor/libssw.so
DELETED
|
Binary file
|