libssw 0.0.2 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +72 -22
- data/lib/libssw.rb +109 -154
- data/lib/{libssw → ssw}/BLOSUM50.rb +1 -1
- data/lib/{libssw → ssw}/BLOSUM62.rb +1 -1
- data/lib/ssw/aaseq.rb +71 -0
- data/lib/{libssw → ssw}/align.rb +20 -5
- data/lib/ssw/dna.rb +69 -0
- data/lib/{libssw/ffi.rb → ssw/libssw.rb} +3 -3
- data/lib/{libssw → ssw}/profile.rb +8 -10
- data/lib/{libssw → ssw}/version.rb +2 -2
- metadata +14 -101
- data/exe/rbssw +0 -193
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0b67671ac9e959ee7e147bfff872719e6c787a020fb15a9b63e9128aef51d9d3
|
|
4
|
+
data.tar.gz: 0baec30769ef3e0f9248346dded445d9a4fa1eaafd9b2071b74e9e5deeb7e4be
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6fb9b0ad32647d27418b666545f3fe34ffab9e902ddbae0575733695d6101fa26b261dcc5d9066e9b8c9a85bac3e1c2cce6be79b58c63aab7a0896b691561a7e
|
|
7
|
+
data.tar.gz: 9c3862016d4490e0fce60296acf8fbe129c5f166363c0cdd978862e066819ec8b2a850dc345716750503e15521480e026d2aac0278f0e8db9394505cc98db3ab
|
data/README.md
CHANGED
|
@@ -3,11 +3,10 @@
|
|
|
3
3
|

|
|
4
4
|
[](https://rubygems.org/gems/libssw)
|
|
5
5
|
[](https://rubydoc.info/gems/libssw)
|
|
6
|
+
[](https://zenodo.org/badge/latestdoi/328163622)
|
|
6
7
|
|
|
7
8
|
:checkered_flag: [libssw](https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library) - fast SIMD parallelized implementation of the Smith-Waterman algorithm - for Ruby
|
|
8
9
|
|
|
9
|
-
:construction: Under development.
|
|
10
|
-
|
|
11
10
|
## Installation
|
|
12
11
|
|
|
13
12
|
```ssh
|
|
@@ -24,41 +23,42 @@ export LIBSSWDIR=/usr/lib/x86_64-linux-gnu/ # libssw.so
|
|
|
24
23
|
|
|
25
24
|
### Installing from source
|
|
26
25
|
|
|
27
|
-
When installing from source code using the following steps, the shared library `libssw.so` will be packed in the Ruby gem. In this case, the environment variable LIBSSWDIR is not required.
|
|
26
|
+
When installing from source code using the following steps, the shared library `libssw.so` or `libssw.dylib` will be packed in the Ruby gem. In this case, the environment variable `LIBSSWDIR` is not required.
|
|
28
27
|
|
|
29
28
|
```sh
|
|
30
|
-
git clone --
|
|
31
|
-
bundle exec rake libssw:
|
|
29
|
+
git clone --recursive https://github.com/kojix2/ruby-libssw
|
|
30
|
+
bundle exec rake libssw:build
|
|
32
31
|
bundle exec rake install
|
|
33
32
|
```
|
|
34
33
|
|
|
34
|
+
ruby-libssw does not support Windows.
|
|
35
|
+
|
|
35
36
|
## Usage
|
|
36
37
|
|
|
37
38
|
```ruby
|
|
38
39
|
require 'libssw'
|
|
39
40
|
|
|
40
|
-
SSW = LibSSW
|
|
41
|
-
|
|
42
41
|
ref_str = "AAAAAAAAACGTTAAAAAAAAAA"
|
|
43
|
-
ref_int = SSW.
|
|
42
|
+
ref_int = SSW::DNA.to_int_array(ref_str)
|
|
44
43
|
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
45
44
|
|
|
46
45
|
read_str1 = "ACGTT"
|
|
47
|
-
read_str2 = SSW.
|
|
48
|
-
|
|
46
|
+
read_str2 = SSW::DNA.revcomp(read_str1)
|
|
47
|
+
# "AACGT"
|
|
48
|
+
read_int1 = SSW::DNA.to_int_array(read_str1)
|
|
49
49
|
# [0, 1, 2, 3, 3]
|
|
50
|
-
read_int2 = SSW.
|
|
50
|
+
read_int2 = SSW::DNA.to_int_array(read_str2)
|
|
51
51
|
# [0, 0, 1, 2, 3]
|
|
52
52
|
|
|
53
|
-
mat = SSW.create_scoring_matrix(SSW::
|
|
53
|
+
mat = SSW.create_scoring_matrix(SSW::DNA::Elements, 2, -2)
|
|
54
54
|
# mat = [2, -2, -2, -2, 0,
|
|
55
55
|
# -2, 2, -2, -2, 0,
|
|
56
56
|
# -2, -2, 2, -2, 0,
|
|
57
57
|
# -2, -2, -2, 2, 0,
|
|
58
58
|
# 0, 0, 0, 0, 0]
|
|
59
59
|
|
|
60
|
-
profile1 =
|
|
61
|
-
align1 =
|
|
60
|
+
profile1 = SSW.init(read_int1, mat)
|
|
61
|
+
align1 = SSW.align(profile1, ref_int, 3, 1, 1, 0, 0)
|
|
62
62
|
pp align1.to_h
|
|
63
63
|
# {
|
|
64
64
|
# :score1 => 10,
|
|
@@ -73,8 +73,8 @@ pp align1.to_h
|
|
|
73
73
|
# :cigar_string => "5M"
|
|
74
74
|
# }
|
|
75
75
|
|
|
76
|
-
profile2 =
|
|
77
|
-
align2
|
|
76
|
+
profile2 = SSW.init(read_int2, mat)
|
|
77
|
+
align2 = SSW.align(profile2, ref_int, 3, 1, 1, 0, 0)
|
|
78
78
|
pp align2.to_h
|
|
79
79
|
# {
|
|
80
80
|
# :score1 => 10,
|
|
@@ -88,25 +88,75 @@ pp align2.to_h
|
|
|
88
88
|
# :cigar_len => 1,
|
|
89
89
|
# :cigar_string => "5M"
|
|
90
90
|
# }
|
|
91
|
+
|
|
92
|
+
puts SSW.build_path(read_str1, ref_str, align1)
|
|
93
|
+
# 5M
|
|
94
|
+
# ACGTT
|
|
95
|
+
# |||||
|
|
96
|
+
# ACGTT
|
|
91
97
|
```
|
|
92
98
|
|
|
99
|
+
## APIs
|
|
100
|
+
|
|
101
|
+
See [API Documentation](https://rubydoc.info/gems/libssw).
|
|
102
|
+
|
|
103
|
+
```markdown
|
|
104
|
+
- SSW module
|
|
105
|
+
|
|
106
|
+
- SSW.init
|
|
107
|
+
- SSW.init_destroy
|
|
108
|
+
- SSW.align
|
|
109
|
+
- SSW.align_destroy
|
|
110
|
+
- SSW.mark_mismatch
|
|
111
|
+
- SSW.create_scoring_matrix
|
|
112
|
+
- SSW.build_path
|
|
113
|
+
|
|
114
|
+
- Profile class
|
|
93
115
|
|
|
94
|
-
|
|
116
|
+
- attributes
|
|
117
|
+
- read, mat, read_len, n, bias
|
|
95
118
|
|
|
96
|
-
|
|
119
|
+
- Align class
|
|
120
|
+
|
|
121
|
+
- attributes
|
|
122
|
+
- score1, score2, ref_begin1, ref_end1, read_begin1, read_end1, ref_end2
|
|
123
|
+
cigar, cigar_len, cigar_string
|
|
124
|
+
|
|
125
|
+
- DNA module
|
|
126
|
+
|
|
127
|
+
- DNA.to_int_array
|
|
128
|
+
- DNA.from_int_array
|
|
129
|
+
- revcomp
|
|
130
|
+
|
|
131
|
+
- AASeq module
|
|
132
|
+
|
|
133
|
+
- AASeq.to_int_array
|
|
134
|
+
- AASeq.from_int_array
|
|
135
|
+
|
|
136
|
+
- BLOSUM62
|
|
137
|
+
- BLOSUM50
|
|
138
|
+
```
|
|
97
139
|
|
|
98
140
|
## Development
|
|
99
141
|
|
|
100
142
|
```sh
|
|
101
|
-
git clone --
|
|
102
|
-
bundle exec rake libssw:
|
|
143
|
+
git clone --recursive https://github.com/kojix2/ruby-libssw
|
|
144
|
+
bundle exec rake libssw:build
|
|
103
145
|
bundle exec rake test
|
|
104
146
|
```
|
|
105
147
|
|
|
148
|
+
Do you need commit rights to my repository?
|
|
149
|
+
Do you want to get admin rights and take over the project?
|
|
150
|
+
If so, please feel free to contact me @kojix2.
|
|
151
|
+
|
|
106
152
|
## Contributing
|
|
107
153
|
|
|
108
|
-
|
|
154
|
+
- [Report bugs](https://github.com/kojix2/ruby-libssw/issues)
|
|
155
|
+
- Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-libssw/pulls)
|
|
156
|
+
- Write, clarify, or fix documentation
|
|
157
|
+
- English corrections are welcome
|
|
158
|
+
- Suggest or add new features
|
|
109
159
|
|
|
110
160
|
## License
|
|
111
161
|
|
|
112
|
-
|
|
162
|
+
- [MIT License](https://opensource.org/licenses/MIT).
|
data/lib/libssw.rb
CHANGED
|
@@ -1,24 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative '
|
|
4
|
-
require_relative '
|
|
5
|
-
require_relative '
|
|
3
|
+
require_relative 'ssw/version'
|
|
4
|
+
require_relative 'ssw/BLOSUM50'
|
|
5
|
+
require_relative 'ssw/BLOSUM62'
|
|
6
|
+
require_relative 'ssw/dna'
|
|
7
|
+
require_relative 'ssw/aaseq'
|
|
6
8
|
|
|
7
|
-
module
|
|
9
|
+
module SSW
|
|
8
10
|
class Error < StandardError; end
|
|
9
11
|
|
|
10
12
|
class << self
|
|
11
13
|
attr_accessor :ffi_lib
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
lib_name =
|
|
15
|
-
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
|
16
|
-
'libssw.dll' # unconfirmed
|
|
17
|
-
when /darwin|mac os/
|
|
18
|
-
'libssw.dylib' # unconfirmed
|
|
19
|
-
else
|
|
20
|
-
'libssw.so'
|
|
21
|
-
end
|
|
16
|
+
lib_name = "libssw.#{RbConfig::CONFIG['SOEXT']}" # Ruby 2.5 or later
|
|
22
17
|
|
|
23
18
|
self.ffi_lib = if ENV['LIBSSWDIR'] && !ENV['LIBSSWDIR'].empty?
|
|
24
19
|
File.expand_path(lib_name, ENV['LIBSSWDIR'])
|
|
@@ -26,67 +21,14 @@ module LibSSW
|
|
|
26
21
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
|
27
22
|
end
|
|
28
23
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
24
|
+
# NOTE: Why not use pkg-config?
|
|
25
|
+
# APT package is available.
|
|
26
|
+
# However, it dose not include a .pc file.
|
|
27
|
+
# Thus pkg-config will not find the shared library.
|
|
32
28
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
AA2INT = { 'A' => 0, 'a' => 0,
|
|
38
|
-
'R' => 1, 'r' => 1,
|
|
39
|
-
'N' => 2, 'n' => 2,
|
|
40
|
-
'D' => 3, 'd' => 3,
|
|
41
|
-
'C' => 4, 'c' => 4,
|
|
42
|
-
'Q' => 5, 'q' => 5,
|
|
43
|
-
'E' => 6, 'e' => 6,
|
|
44
|
-
'G' => 7, 'g' => 7,
|
|
45
|
-
'H' => 8, 'h' => 8,
|
|
46
|
-
'I' => 9, 'i' => 9,
|
|
47
|
-
'L' => 10, 'l' => 10,
|
|
48
|
-
'K' => 11, 'k' => 11,
|
|
49
|
-
'M' => 12, 'm' => 12,
|
|
50
|
-
'F' => 13, 'f' => 13,
|
|
51
|
-
'P' => 14, 'p' => 14,
|
|
52
|
-
'S' => 15, 's' => 15,
|
|
53
|
-
'T' => 16, 't' => 16,
|
|
54
|
-
'W' => 17, 'w' => 17,
|
|
55
|
-
'Y' => 18, 'y' => 18,
|
|
56
|
-
'V' => 19, 'v' => 19,
|
|
57
|
-
'B' => 20, 'b' => 20,
|
|
58
|
-
'Z' => 21, 'z' => 21,
|
|
59
|
-
'X' => 22, 'x' => 22,
|
|
60
|
-
'*' => 23 }
|
|
61
|
-
|
|
62
|
-
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
|
63
|
-
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
|
64
|
-
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
|
65
|
-
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
|
66
|
-
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
|
67
|
-
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
|
|
68
|
-
|
|
69
|
-
DNAElements = %w[A C G T N]
|
|
70
|
-
|
|
71
|
-
DNA2INT = { 'A' => 0, 'a' => 0,
|
|
72
|
-
'C' => 1, 'c' => 1,
|
|
73
|
-
'G' => 2, 'g' => 2,
|
|
74
|
-
'T' => 3, 't' => 3,
|
|
75
|
-
'N' => 4, 'n' => 4 }
|
|
76
|
-
|
|
77
|
-
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
|
|
78
|
-
|
|
79
|
-
# reverse complement
|
|
80
|
-
DNARC = { 'A' => 'T',
|
|
81
|
-
'C' => 'G',
|
|
82
|
-
'G' => 'C',
|
|
83
|
-
'T' => 'A',
|
|
84
|
-
'N' => 'N',
|
|
85
|
-
'a' => 'T',
|
|
86
|
-
'c' => 'G',
|
|
87
|
-
'g' => 'C',
|
|
88
|
-
't' => 'A',
|
|
89
|
-
'n' => 'N' }
|
|
29
|
+
require_relative 'ssw/libssw'
|
|
30
|
+
require_relative 'ssw/profile'
|
|
31
|
+
require_relative 'ssw/align'
|
|
90
32
|
|
|
91
33
|
class << self
|
|
92
34
|
# Create the query profile using the query sequence.
|
|
@@ -99,47 +41,59 @@ module LibSSW
|
|
|
99
41
|
# * if your estimated best alignment score is surely < 255 please set 0;
|
|
100
42
|
# * if your estimated best alignment score >= 255, please set 1;
|
|
101
43
|
# * if you don't know, please set 2
|
|
102
|
-
def
|
|
44
|
+
def init(read, mat, n = nil, score_size: 2)
|
|
45
|
+
read = read.to_a
|
|
46
|
+
mat = mat.to_a.flatten
|
|
47
|
+
raise ArgumentError, 'Expect class of read to be Array' unless read.is_a?(Array)
|
|
48
|
+
raise ArgumentError, 'Expect class of mat to be Array' unless mat.is_a?(Array)
|
|
49
|
+
|
|
103
50
|
read_str = read.pack('c*')
|
|
104
51
|
read_len = read.size
|
|
105
|
-
mat = mat.to_a.flatten
|
|
106
52
|
n = Math.sqrt(mat.size) if n.nil?
|
|
107
53
|
raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n
|
|
108
54
|
|
|
109
55
|
mat_str = mat.flatten.pack('c*')
|
|
110
|
-
ptr =
|
|
56
|
+
ptr = LibSSW.ssw_init(
|
|
111
57
|
read_str,
|
|
112
58
|
read_len,
|
|
113
59
|
mat_str,
|
|
114
60
|
n,
|
|
115
61
|
score_size
|
|
116
62
|
)
|
|
117
|
-
# Garbage collection workaround
|
|
63
|
+
# Garbage collection workaround:
|
|
64
|
+
# The C library stores pointers to read and mat without copying the data.
|
|
65
|
+
# We must keep the Ruby strings (read_str, mat_str) alive for the lifetime
|
|
66
|
+
# of the profile structure to prevent segmentation faults.
|
|
118
67
|
#
|
|
119
|
-
#
|
|
120
|
-
#
|
|
121
|
-
#
|
|
122
|
-
#
|
|
68
|
+
# We cannot use Fiddle's automatic memory management (ptr.free) here because:
|
|
69
|
+
# - Calling init_destroy from Ruby's GC causes segmentation violations
|
|
70
|
+
# - The user should explicitly call SSW.init_destroy when done, or let
|
|
71
|
+
# Ruby's GC clean up the profile structure itself (though the contained
|
|
72
|
+
# profile_byte/profile_word will leak unless init_destroy is called)
|
|
123
73
|
ptr.instance_variable_set(:@read_str, read_str)
|
|
124
|
-
ptr.instance_variable_set(:@read_len, read_len)
|
|
125
74
|
ptr.instance_variable_set(:@mat_str, mat_str)
|
|
75
|
+
ptr.instance_variable_set(:@read_len, read_len)
|
|
126
76
|
ptr.instance_variable_set(:@n, n)
|
|
127
77
|
ptr.instance_variable_set(:@score_size, score_size)
|
|
128
78
|
|
|
129
|
-
|
|
79
|
+
SSW::Profile.new(ptr)
|
|
130
80
|
end
|
|
131
81
|
|
|
132
82
|
# Release the memory allocated by function ssw_init.
|
|
133
|
-
# @param
|
|
83
|
+
# @param profile [Fiddle::Pointer, SSW::Profile, SSW::LibSSW::Profile]
|
|
134
84
|
# pointer to the query profile structure
|
|
135
85
|
# @note Ruby has garbage collection, so there is not much reason to call
|
|
136
86
|
# this method.
|
|
137
87
|
def init_destroy(profile)
|
|
138
|
-
|
|
88
|
+
unless profile.is_a?(Fiddle::Pointer) || profile.is_a?(Profile) || profile.respond_to?(:to_ptr)
|
|
89
|
+
raise ArgumentError, 'Expect class of profile to be Profile or Pointer'
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
LibSSW.init_destroy(profile)
|
|
139
93
|
end
|
|
140
94
|
|
|
141
95
|
# Do Striped Smith-Waterman alignment.
|
|
142
|
-
# @param prof [Fiddle::Pointer,
|
|
96
|
+
# @param prof [Fiddle::Pointer, SSW::Profile, SSW::LibSSW::Profile]
|
|
143
97
|
# pointer to the query profile structure
|
|
144
98
|
# @param ref [Array]
|
|
145
99
|
# target sequence;
|
|
@@ -148,25 +102,25 @@ module LibSSW
|
|
|
148
102
|
# @param weight_gap0 [Integer] the absolute value of gap open penalty
|
|
149
103
|
# @param weight_gapE [Integer] the absolute value of gap extension penalty
|
|
150
104
|
# @param flag [Integer]
|
|
151
|
-
# * bit 5: when
|
|
105
|
+
# * bit 5: when set as 1, function ssw_align will return the best
|
|
152
106
|
# alignment beginning position;
|
|
153
|
-
# * bit 6: when
|
|
154
|
-
# read_end1 - read_begin1 < filterd), (whatever bit 5 is
|
|
107
|
+
# * bit 6: when set as 1, if (ref_end1 - ref_begin1 < filterd &&
|
|
108
|
+
# read_end1 - read_begin1 < filterd), (whatever bit 5 is set) the
|
|
155
109
|
# function will return the best alignment beginning position and cigar;
|
|
156
|
-
# * bit 7: when
|
|
157
|
-
# (whatever bit 5 is
|
|
110
|
+
# * bit 7: when set as 1, if the best alignment score >= filters,
|
|
111
|
+
# (whatever bit 5 is set) the function will return the best
|
|
158
112
|
# alignment beginning position and cigar;
|
|
159
|
-
# * bit 8: when
|
|
113
|
+
# * bit 8: when set as 1, (whatever bit 5, 6 or 7 is set) the
|
|
160
114
|
# function will always return the best alignment beginning position and
|
|
161
115
|
# cigar. When flag == 0, only the optimal and sub-optimal scores and the
|
|
162
116
|
# optimal alignment ending position will be returned.
|
|
163
117
|
# @param filters [Integer]
|
|
164
|
-
# scorefilter: when bit 7 of flag is
|
|
165
|
-
# filters will be used (Please check the
|
|
118
|
+
# scorefilter: when bit 7 of flag is set as 1 and bit 8 is set as 0,
|
|
119
|
+
# filters will be used (Please check the description of the flag parameter
|
|
166
120
|
# for detailed usage.)
|
|
167
121
|
# @param filterd [Integer]
|
|
168
|
-
# distance filter: when bit 6 of flag is
|
|
169
|
-
# as 0, filterd will be used (Please check the
|
|
122
|
+
# distance filter: when bit 6 of flag is set as 1 and bit 8 is set
|
|
123
|
+
# as 0, filterd will be used (Please check the description of the flag
|
|
170
124
|
# parameter for detailed usage.)
|
|
171
125
|
# @param mask_len [Integer]
|
|
172
126
|
# The distance between the optimal and suboptimal alignment ending
|
|
@@ -181,25 +135,40 @@ module LibSSW
|
|
|
181
135
|
# SSW C library masks the reference loci nearby (mask length = maskLen)
|
|
182
136
|
# the best alignment ending position and locates the second largest score
|
|
183
137
|
# from the unmasked elements.
|
|
184
|
-
|
|
138
|
+
# @return [Align]
|
|
139
|
+
def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil)
|
|
140
|
+
unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
|
|
141
|
+
raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
|
|
142
|
+
end
|
|
143
|
+
raise ArgumentError, 'Expect class of ref to be Array' unless ref.is_a?(Array)
|
|
144
|
+
|
|
185
145
|
ref_str = ref.pack('c*')
|
|
186
146
|
ref_len = ref.size
|
|
187
|
-
|
|
147
|
+
mask_len ||= [ref_len / 2, 15].max
|
|
148
|
+
ptr = LibSSW.ssw_align(
|
|
188
149
|
prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
|
189
150
|
)
|
|
190
|
-
#
|
|
191
|
-
#
|
|
192
|
-
#
|
|
193
|
-
#
|
|
194
|
-
# ptr
|
|
195
|
-
|
|
151
|
+
# Garbage collection workaround:
|
|
152
|
+
# Keep ref_str alive while the C code might still need it.
|
|
153
|
+
# However, since Align.new immediately reads all values and calls align_destroy,
|
|
154
|
+
# the C memory is freed immediately, so ref_str only needs to live until then.
|
|
155
|
+
# We store it on ptr just to be safe during the Align.new call.
|
|
156
|
+
ptr.instance_variable_set(:@ref_str, ref_str)
|
|
157
|
+
SSW::Align.new(ptr)
|
|
196
158
|
end
|
|
197
159
|
|
|
198
160
|
# Release the memory allocated by function ssw_align.
|
|
199
|
-
# @param
|
|
161
|
+
# @param align [Fiddle::Pointer, SSW::Align, SSW::LibSSW::Align]
|
|
200
162
|
# pointer to the alignment result structure
|
|
201
163
|
def align_destroy(align)
|
|
202
|
-
|
|
164
|
+
if align.is_a?(Align)
|
|
165
|
+
warn "You don't need to call this method for Ruby's Align class."
|
|
166
|
+
nil
|
|
167
|
+
elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr)
|
|
168
|
+
LibSSW.align_destroy(align)
|
|
169
|
+
else
|
|
170
|
+
raise ArgumentError, 'Expect class of align to be Pointer'
|
|
171
|
+
end
|
|
203
172
|
end
|
|
204
173
|
|
|
205
174
|
# 1. Calculate the number of mismatches.
|
|
@@ -226,22 +195,11 @@ module LibSSW
|
|
|
226
195
|
# @return [Integer] The number of mismatches. The cigar and cigarLen are modified.
|
|
227
196
|
def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
|
|
228
197
|
warn 'implementation: fiexme: **cigar' # FIXME
|
|
229
|
-
|
|
198
|
+
LibSSW.mark_mismatch(
|
|
230
199
|
ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
|
|
231
200
|
)
|
|
232
201
|
end
|
|
233
202
|
|
|
234
|
-
def array_to_cigar_string(arr)
|
|
235
|
-
cigar_string = String.new
|
|
236
|
-
arr.each do |x|
|
|
237
|
-
n = x >> 4
|
|
238
|
-
m = x & 15
|
|
239
|
-
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
|
240
|
-
cigar_string << n.to_s << c
|
|
241
|
-
end
|
|
242
|
-
cigar_string
|
|
243
|
-
end
|
|
244
|
-
|
|
245
203
|
# Create scoring matrix of Smith-Waterman algrithum.
|
|
246
204
|
# @param [Array] elements
|
|
247
205
|
# @param [Integer] match_score
|
|
@@ -258,44 +216,41 @@ module LibSSW
|
|
|
258
216
|
score
|
|
259
217
|
end
|
|
260
218
|
|
|
261
|
-
#
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
219
|
+
# TODO: fix variable names
|
|
220
|
+
# @param q_seq [String] query sequence
|
|
221
|
+
# @param r_seq [String] reference sequence
|
|
222
|
+
# @param align [Align] alignment result
|
|
223
|
+
# @return [Array]
|
|
224
|
+
def build_path(q_seq, r_seq, align)
|
|
225
|
+
sQ = ''
|
|
226
|
+
sA = ''
|
|
227
|
+
sR = ''
|
|
228
|
+
q_off = align.read_begin1
|
|
229
|
+
r_off = align.ref_begin1
|
|
230
|
+
align.cigar.each do |x|
|
|
231
|
+
n = x >> 4
|
|
232
|
+
m = x & 15
|
|
233
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
|
234
|
+
case c
|
|
235
|
+
when 'M'
|
|
236
|
+
sQ += q_seq[q_off...(q_off + n)]
|
|
237
|
+
sA += Array.new(n) { |j| q_seq[q_off + j] == r_seq[r_off + j] ? '|' : '*' }.join
|
|
238
|
+
sR += r_seq[r_off...(r_off + n)]
|
|
239
|
+
q_off += n
|
|
240
|
+
r_off += n
|
|
241
|
+
when 'I'
|
|
242
|
+
sQ += q_seq[q_off...(q_off + n)]
|
|
243
|
+
sA += ' ' * n
|
|
244
|
+
sR += ' ' * n
|
|
245
|
+
q_off += n
|
|
246
|
+
when 'D'
|
|
247
|
+
sQ += ' ' * n
|
|
248
|
+
sA += ' ' * n
|
|
249
|
+
sR += r_seq[r_off...(r_off + n)]
|
|
250
|
+
r_off += n
|
|
251
|
+
end
|
|
290
252
|
end
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
def int_array_to_aaseq(arr)
|
|
294
|
-
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
|
295
|
-
|
|
296
|
-
arr.map do |i|
|
|
297
|
-
INT2AA[i] || '*'
|
|
298
|
-
end.join
|
|
253
|
+
[align.cigar_string, sQ, sA, sR]
|
|
299
254
|
end
|
|
300
255
|
end
|
|
301
256
|
end
|
data/lib/ssw/aaseq.rb
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SSW
|
|
4
|
+
module AASeq
|
|
5
|
+
AAELEMENTS = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
|
|
6
|
+
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S',
|
|
7
|
+
'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*'].freeze
|
|
8
|
+
|
|
9
|
+
AA2INT = { 'A' => 0, 'a' => 0,
|
|
10
|
+
'R' => 1, 'r' => 1,
|
|
11
|
+
'N' => 2, 'n' => 2,
|
|
12
|
+
'D' => 3, 'd' => 3,
|
|
13
|
+
'C' => 4, 'c' => 4,
|
|
14
|
+
'Q' => 5, 'q' => 5,
|
|
15
|
+
'E' => 6, 'e' => 6,
|
|
16
|
+
'G' => 7, 'g' => 7,
|
|
17
|
+
'H' => 8, 'h' => 8,
|
|
18
|
+
'I' => 9, 'i' => 9,
|
|
19
|
+
'L' => 10, 'l' => 10,
|
|
20
|
+
'K' => 11, 'k' => 11,
|
|
21
|
+
'M' => 12, 'm' => 12,
|
|
22
|
+
'F' => 13, 'f' => 13,
|
|
23
|
+
'P' => 14, 'p' => 14,
|
|
24
|
+
'S' => 15, 's' => 15,
|
|
25
|
+
'T' => 16, 't' => 16,
|
|
26
|
+
'W' => 17, 'w' => 17,
|
|
27
|
+
'Y' => 18, 'y' => 18,
|
|
28
|
+
'V' => 19, 'v' => 19,
|
|
29
|
+
'B' => 20, 'b' => 20,
|
|
30
|
+
'Z' => 21, 'z' => 21,
|
|
31
|
+
'X' => 22, 'x' => 22,
|
|
32
|
+
'*' => 23 }.freeze
|
|
33
|
+
|
|
34
|
+
INT2AA = { 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D',
|
|
35
|
+
4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G',
|
|
36
|
+
8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K',
|
|
37
|
+
12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S',
|
|
38
|
+
16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V',
|
|
39
|
+
20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }.freeze
|
|
40
|
+
|
|
41
|
+
module_function
|
|
42
|
+
|
|
43
|
+
# Transform amino acid sequence into numerical sequence.
|
|
44
|
+
# @param seq [String] amin acid sequence
|
|
45
|
+
# @return [Array] int array
|
|
46
|
+
# @example
|
|
47
|
+
# SSW::AASeq.to_int_array("ARND") #=> [0, 1, 2, 3]
|
|
48
|
+
|
|
49
|
+
def to_int_array(seq)
|
|
50
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
|
51
|
+
|
|
52
|
+
seq.each_char.map do |base|
|
|
53
|
+
AA2INT[base] || AA2INT['*']
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Transform numerical sequence into amino acid sequence.
|
|
58
|
+
# @param arr [Array] int array
|
|
59
|
+
# @return [String] amino acid sequence
|
|
60
|
+
# @example
|
|
61
|
+
# SSW::AASeq.from_int_array([0, 1, 2, 3]) #=> "ARND"
|
|
62
|
+
|
|
63
|
+
def from_int_array(arr)
|
|
64
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
|
65
|
+
|
|
66
|
+
arr.map do |i|
|
|
67
|
+
INT2AA[i] || '*'
|
|
68
|
+
end.join
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
data/lib/{libssw → ssw}/align.rb
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
module
|
|
3
|
+
module SSW
|
|
4
4
|
# structure of the alignment result
|
|
5
5
|
# @!attribute score1
|
|
6
6
|
# @return [Integer] the best alignment score
|
|
@@ -28,7 +28,9 @@ module LibSSW
|
|
|
28
28
|
# @!attribute cigar_len
|
|
29
29
|
# @return [Integer]
|
|
30
30
|
# length of the cigar string; cigarLen = 0 when the best alignment path is not available
|
|
31
|
-
|
|
31
|
+
# @!attribute cigar_string
|
|
32
|
+
# @return [String] cigar string
|
|
33
|
+
class Align
|
|
32
34
|
def self.keys
|
|
33
35
|
%i[score1 score2 ref_begin1 ref_end1
|
|
34
36
|
read_begin1 read_end1 ref_end2 cigar cigar_len cigar_string]
|
|
@@ -39,7 +41,7 @@ module LibSSW
|
|
|
39
41
|
|
|
40
42
|
def initialize(ptr)
|
|
41
43
|
@ptr = ptr
|
|
42
|
-
@cstruct = align =
|
|
44
|
+
@cstruct = align = LibSSW::Align.new(ptr)
|
|
43
45
|
@score1 = align.score1
|
|
44
46
|
@score2 = align.score2
|
|
45
47
|
@ref_begin1 = align.ref_begin1
|
|
@@ -50,12 +52,25 @@ module LibSSW
|
|
|
50
52
|
@cigar_len = align.cigarLen
|
|
51
53
|
@cigar = cigar_len.positive? ? align.cigar[0, 4 * cigar_len].unpack('L*') : []
|
|
52
54
|
# Attributes for ruby binding only
|
|
53
|
-
@cigar_string =
|
|
54
|
-
|
|
55
|
+
@cigar_string = array_to_cigar_string(@cigar)
|
|
56
|
+
SSW.align_destroy(ptr)
|
|
55
57
|
end
|
|
56
58
|
|
|
57
59
|
def to_h
|
|
58
60
|
self.class.keys.map { |k| [k, __send__(k)] }.to_h
|
|
59
61
|
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def array_to_cigar_string(arr)
|
|
66
|
+
cigar_string = String.new
|
|
67
|
+
arr.each do |x|
|
|
68
|
+
n = x >> 4
|
|
69
|
+
m = x & 15
|
|
70
|
+
c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
|
|
71
|
+
cigar_string << n.to_s << c
|
|
72
|
+
end
|
|
73
|
+
cigar_string
|
|
74
|
+
end
|
|
60
75
|
end
|
|
61
76
|
end
|
data/lib/ssw/dna.rb
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SSW
|
|
4
|
+
module DNA
|
|
5
|
+
Elements = %w[A C G T N].freeze
|
|
6
|
+
|
|
7
|
+
DNA2INT = { 'A' => 0, 'a' => 0,
|
|
8
|
+
'C' => 1, 'c' => 1,
|
|
9
|
+
'G' => 2, 'g' => 2,
|
|
10
|
+
'T' => 3, 't' => 3,
|
|
11
|
+
'N' => 4, 'n' => 4 }.freeze
|
|
12
|
+
|
|
13
|
+
INT2DNA = { 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }.freeze
|
|
14
|
+
|
|
15
|
+
# reverse complement
|
|
16
|
+
DNARC = { 'A' => 'T',
|
|
17
|
+
'C' => 'G',
|
|
18
|
+
'G' => 'C',
|
|
19
|
+
'T' => 'A',
|
|
20
|
+
'N' => 'N',
|
|
21
|
+
'a' => 'T',
|
|
22
|
+
'c' => 'G',
|
|
23
|
+
'g' => 'C',
|
|
24
|
+
't' => 'A',
|
|
25
|
+
'n' => 'N' }.freeze
|
|
26
|
+
|
|
27
|
+
module_function
|
|
28
|
+
|
|
29
|
+
# Transform DNA sequence into numerical sequence.
|
|
30
|
+
# @param seq [String] dna sequence
|
|
31
|
+
# @return [Array] int array
|
|
32
|
+
# @example
|
|
33
|
+
# SSW::DNA.to_int_array("TCGA") #=> [3, 1, 2, 0]
|
|
34
|
+
|
|
35
|
+
def to_int_array(seq)
|
|
36
|
+
raise ArgumentError, 'seq must be a string' unless seq.is_a? String
|
|
37
|
+
|
|
38
|
+
seq.each_char.map do |base|
|
|
39
|
+
DNA2INT[base] || DNA2INT['N']
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Transform numerical sequence into DNA sequence.
|
|
44
|
+
# @param arr [Array] int array
|
|
45
|
+
# @return [String] dna sequence
|
|
46
|
+
# @example
|
|
47
|
+
# SSW::DNA.from_int_array([3, 1, 2, 0]) #=> "TCGA"
|
|
48
|
+
|
|
49
|
+
def from_int_array(arr)
|
|
50
|
+
raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array
|
|
51
|
+
|
|
52
|
+
arr.map do |i|
|
|
53
|
+
INT2DNA[i] || 'N'
|
|
54
|
+
end.join
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# reverse complement
|
|
58
|
+
# @param seq [String] sequence
|
|
59
|
+
# @return [String] reverse complement
|
|
60
|
+
# @example
|
|
61
|
+
# SSW::DNA.revcomp("TCGAT") #=> "ATCGA"
|
|
62
|
+
|
|
63
|
+
def revcomp(seq)
|
|
64
|
+
seq.each_char.map do |base|
|
|
65
|
+
DNARC[base]
|
|
66
|
+
end.join.reverse
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
require 'fiddle/import'
|
|
4
4
|
|
|
5
|
-
module
|
|
6
|
-
module
|
|
5
|
+
module SSW
|
|
6
|
+
module LibSSW
|
|
7
7
|
extend Fiddle::Importer
|
|
8
8
|
|
|
9
9
|
begin
|
|
10
|
-
dlload
|
|
10
|
+
dlload SSW.ffi_lib
|
|
11
11
|
rescue LoadError => e
|
|
12
12
|
raise LoadError, "Could not find libssw shared library. \n#{e}"
|
|
13
13
|
end
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
module
|
|
3
|
+
module SSW
|
|
4
4
|
# structure of the query profile/usr/lib/x86_64-linux-gnu/
|
|
5
5
|
# @!attribute read
|
|
6
6
|
# @!attribute mat
|
|
7
7
|
# @!attribute read_len
|
|
8
8
|
# @!attribute n
|
|
9
9
|
# @!attribute bias
|
|
10
|
-
class Profile
|
|
10
|
+
class Profile
|
|
11
11
|
def self.keys
|
|
12
12
|
%i[read mat read_len n bias]
|
|
13
13
|
end
|
|
@@ -17,7 +17,7 @@ module LibSSW
|
|
|
17
17
|
|
|
18
18
|
def initialize(ptr)
|
|
19
19
|
@ptr = ptr
|
|
20
|
-
@cstruct = profile = LibSSW::
|
|
20
|
+
@cstruct = profile = SSW::LibSSW::Profile.new(ptr)
|
|
21
21
|
@read_len = profile.readLen
|
|
22
22
|
@read = read_len.positive? ? profile.read[0, read_len].unpack('c*') : []
|
|
23
23
|
@n = profile.n
|
|
@@ -26,13 +26,11 @@ module LibSSW
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
def to_ptr
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
cstruct.n = ptr.instance_variable_get(:@n)
|
|
35
|
-
ptr
|
|
29
|
+
# The pointer already contains the correct C structure.
|
|
30
|
+
# The instance variables on @ptr (@read_str, @mat_str, etc.) are kept
|
|
31
|
+
# alive to prevent garbage collection of the memory that C is referencing.
|
|
32
|
+
# We don't need to modify the C structure here.
|
|
33
|
+
@ptr
|
|
36
34
|
end
|
|
37
35
|
|
|
38
36
|
def to_h
|
metadata
CHANGED
|
@@ -1,107 +1,22 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: libssw
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
8
|
-
|
|
9
|
-
bindir: exe
|
|
8
|
+
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: fiddle
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - ">="
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: 1.0.7
|
|
20
|
-
type: :runtime
|
|
21
|
-
prerelease: false
|
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
-
requirements:
|
|
24
|
-
- - ">="
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: 1.0.7
|
|
27
|
-
- !ruby/object:Gem::Dependency
|
|
28
|
-
name: bio
|
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
|
30
|
-
requirements:
|
|
31
|
-
- - ">="
|
|
32
|
-
- !ruby/object:Gem::Version
|
|
33
|
-
version: '0'
|
|
34
|
-
type: :development
|
|
35
|
-
prerelease: false
|
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
-
requirements:
|
|
38
|
-
- - ">="
|
|
39
|
-
- !ruby/object:Gem::Version
|
|
40
|
-
version: '0'
|
|
41
|
-
- !ruby/object:Gem::Dependency
|
|
42
|
-
name: bundler
|
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
|
44
|
-
requirements:
|
|
45
|
-
- - ">="
|
|
46
|
-
- !ruby/object:Gem::Version
|
|
47
|
-
version: '0'
|
|
48
|
-
type: :development
|
|
49
|
-
prerelease: false
|
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
-
requirements:
|
|
52
|
-
- - ">="
|
|
53
|
-
- !ruby/object:Gem::Version
|
|
54
|
-
version: '0'
|
|
55
|
-
- !ruby/object:Gem::Dependency
|
|
56
|
-
name: minitest
|
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
|
58
|
-
requirements:
|
|
59
|
-
- - ">="
|
|
60
|
-
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0'
|
|
62
|
-
type: :development
|
|
63
|
-
prerelease: false
|
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
-
requirements:
|
|
66
|
-
- - ">="
|
|
67
|
-
- !ruby/object:Gem::Version
|
|
68
|
-
version: '0'
|
|
69
|
-
- !ruby/object:Gem::Dependency
|
|
70
|
-
name: rake
|
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
|
72
|
-
requirements:
|
|
73
|
-
- - ">="
|
|
74
|
-
- !ruby/object:Gem::Version
|
|
75
|
-
version: '0'
|
|
76
|
-
type: :development
|
|
77
|
-
prerelease: false
|
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
-
requirements:
|
|
80
|
-
- - ">="
|
|
81
|
-
- !ruby/object:Gem::Version
|
|
82
|
-
version: '0'
|
|
83
|
-
- !ruby/object:Gem::Dependency
|
|
84
|
-
name: rubocop
|
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
|
86
|
-
requirements:
|
|
87
|
-
- - ">="
|
|
88
|
-
- !ruby/object:Gem::Version
|
|
89
|
-
version: '0'
|
|
90
|
-
type: :development
|
|
91
|
-
prerelease: false
|
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
93
15
|
requirements:
|
|
94
16
|
- - ">="
|
|
95
17
|
- !ruby/object:Gem::Version
|
|
96
18
|
version: '0'
|
|
97
|
-
|
|
98
|
-
name: simplecov
|
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
|
100
|
-
requirements:
|
|
101
|
-
- - ">="
|
|
102
|
-
- !ruby/object:Gem::Version
|
|
103
|
-
version: '0'
|
|
104
|
-
type: :development
|
|
19
|
+
type: :runtime
|
|
105
20
|
prerelease: false
|
|
106
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
22
|
requirements:
|
|
@@ -111,26 +26,25 @@ dependencies:
|
|
|
111
26
|
description: Ruby bindings for libssw
|
|
112
27
|
email:
|
|
113
28
|
- 2xijok@gmail.com
|
|
114
|
-
executables:
|
|
115
|
-
- rbssw
|
|
29
|
+
executables: []
|
|
116
30
|
extensions: []
|
|
117
31
|
extra_rdoc_files: []
|
|
118
32
|
files:
|
|
119
33
|
- LICENSE.txt
|
|
120
34
|
- README.md
|
|
121
|
-
- exe/rbssw
|
|
122
35
|
- lib/libssw.rb
|
|
123
|
-
- lib/
|
|
124
|
-
- lib/
|
|
125
|
-
- lib/
|
|
126
|
-
- lib/
|
|
127
|
-
- lib/
|
|
128
|
-
- lib/libssw
|
|
36
|
+
- lib/ssw/BLOSUM50.rb
|
|
37
|
+
- lib/ssw/BLOSUM62.rb
|
|
38
|
+
- lib/ssw/aaseq.rb
|
|
39
|
+
- lib/ssw/align.rb
|
|
40
|
+
- lib/ssw/dna.rb
|
|
41
|
+
- lib/ssw/libssw.rb
|
|
42
|
+
- lib/ssw/profile.rb
|
|
43
|
+
- lib/ssw/version.rb
|
|
129
44
|
homepage: https://github.com/kojix2/ruby-libssw
|
|
130
45
|
licenses:
|
|
131
46
|
- MIT
|
|
132
47
|
metadata: {}
|
|
133
|
-
post_install_message:
|
|
134
48
|
rdoc_options: []
|
|
135
49
|
require_paths:
|
|
136
50
|
- lib
|
|
@@ -145,8 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
145
59
|
- !ruby/object:Gem::Version
|
|
146
60
|
version: '0'
|
|
147
61
|
requirements: []
|
|
148
|
-
rubygems_version: 3.
|
|
149
|
-
signing_key:
|
|
62
|
+
rubygems_version: 3.6.9
|
|
150
63
|
specification_version: 4
|
|
151
64
|
summary: Ruby bindings for libssw
|
|
152
65
|
test_files: []
|
data/exe/rbssw
DELETED
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
warn 'This script is under development.'
|
|
5
|
-
warn "It doesn't work properly yet!"
|
|
6
|
-
|
|
7
|
-
require 'bio'
|
|
8
|
-
require 'libssw'
|
|
9
|
-
SSW = LibSSW
|
|
10
|
-
require 'optparse'
|
|
11
|
-
|
|
12
|
-
opts = {
|
|
13
|
-
lib_path: nil,
|
|
14
|
-
nmatch: 2,
|
|
15
|
-
nmismatch: 2,
|
|
16
|
-
nopen: 3,
|
|
17
|
-
next: 1,
|
|
18
|
-
bprotein: false,
|
|
19
|
-
smatrix: nil,
|
|
20
|
-
bpath: false,
|
|
21
|
-
nthr: nil,
|
|
22
|
-
bbest: false,
|
|
23
|
-
bsam: nil, # typo?
|
|
24
|
-
bheader: nil
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
parser = OptionParser.new do |opt|
|
|
28
|
-
opt.version = LibSSW::VERSION
|
|
29
|
-
opt.summary_width = 20
|
|
30
|
-
opt.banner = 'Usage: rbssw [options] <target file> <query file>'
|
|
31
|
-
opt.on('-l', '--sLibPath PATH', String,
|
|
32
|
-
'path of libssw.so') do |v|
|
|
33
|
-
opts[:sLibPath] = v
|
|
34
|
-
end
|
|
35
|
-
opt.on('-m', '--nMatch VAL', Integer,
|
|
36
|
-
'a positive integer as the score for a match',
|
|
37
|
-
"in genome sequence alignment. [#{opts[:nmatch]}]") do |v|
|
|
38
|
-
opts[:nmatch] = v
|
|
39
|
-
end
|
|
40
|
-
opt.on('-x', '--nMismatch VAL', Integer,
|
|
41
|
-
'a positive integer as the score for a mismatch',
|
|
42
|
-
"in genome sequence alignment. [#{opts[:nmismatch]}]") do |v|
|
|
43
|
-
opts[:nmismatch] = v
|
|
44
|
-
end
|
|
45
|
-
opt.on('-o', '--nOpen VAL', Integer,
|
|
46
|
-
'a positive integer as the penalty for the gap opening',
|
|
47
|
-
"in genome sequence alignment. [#{opts[:nopen]}]") do |v|
|
|
48
|
-
opts[:nopen] = v
|
|
49
|
-
end
|
|
50
|
-
opt.on('-e', '--nExt VAL', Integer,
|
|
51
|
-
'a positive integer as the penalty for the gap extension',
|
|
52
|
-
"in genome sequence alignment. [#{opts[:next]}]") do |v|
|
|
53
|
-
opts[:next] = v
|
|
54
|
-
end
|
|
55
|
-
opt.on('-p', '--bProtien', TrueClass,
|
|
56
|
-
'Do protein sequence alignment.',
|
|
57
|
-
"Without this option, do genome sequence alignment. [#{opts[:bprotein]}]") do |v|
|
|
58
|
-
opts[:bprotein] = v
|
|
59
|
-
end
|
|
60
|
-
opt.on('-a', '--sMatrix VAL', String,
|
|
61
|
-
"a file for either Blosum or Pam weight matrix. [#{opts[:smatrix]}]") do |v|
|
|
62
|
-
opts[:smatrix] = v
|
|
63
|
-
end
|
|
64
|
-
opt.on('-c', '--bPath', TrueClass,
|
|
65
|
-
"Return the alignment path. [#{opts[:bpath]}]") do |v|
|
|
66
|
-
opts[:bpath] = v
|
|
67
|
-
end
|
|
68
|
-
opt.on('-f', '--nThr VAL', Integer,
|
|
69
|
-
'a positive integer.',
|
|
70
|
-
'Only output the alignments with the Smith-Waterman score >= N.') do |v|
|
|
71
|
-
opts[:nthr] = v
|
|
72
|
-
end
|
|
73
|
-
opt.on('-r', '--bBest', TrueClass,
|
|
74
|
-
'The best alignment will be picked, between the original read',
|
|
75
|
-
"alignment and the reverse complement read alignment. [#{opts[:bbest]}]") do |v|
|
|
76
|
-
opts[:bbest] = v
|
|
77
|
-
end
|
|
78
|
-
opt.on('-s', '--bSam', TrueClass,
|
|
79
|
-
'Output in SAM format. [no header]') do |v| # TYPO?
|
|
80
|
-
opts[:bsam] = v
|
|
81
|
-
end
|
|
82
|
-
opt.on('-header', '--bHeader', TrueClass,
|
|
83
|
-
'If -s is used, include header in SAM output.') do |v|
|
|
84
|
-
opts[:bheader] = v
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
parser.order!(ARGV)
|
|
89
|
-
|
|
90
|
-
opts[:target] = ARGV[0]
|
|
91
|
-
opts[:query] = ARGV[1]
|
|
92
|
-
|
|
93
|
-
lEle = []
|
|
94
|
-
dRc = {}
|
|
95
|
-
dEle2Int = {}
|
|
96
|
-
dInt2Ele = {}
|
|
97
|
-
lScore = nil
|
|
98
|
-
|
|
99
|
-
if opts[:bprotein]
|
|
100
|
-
# load AA score matrix
|
|
101
|
-
if !opts[:smatrix]
|
|
102
|
-
lEle = SSW::AAELEMENTS
|
|
103
|
-
dEle2Int = SSW::AA2INT
|
|
104
|
-
dInt2Ele = SSW::INT2AA
|
|
105
|
-
nEleNum = lEle.size
|
|
106
|
-
lScore = SSW::Blosum50
|
|
107
|
-
else
|
|
108
|
-
lEle, dEle2Int, dInt2Ele, lScore = SSW.read_matrix(opts[:smatrix])
|
|
109
|
-
end
|
|
110
|
-
elsif !opts[:smatrix]
|
|
111
|
-
# init DNA score matrix
|
|
112
|
-
lEle = SSW::DNAELEMENTS
|
|
113
|
-
dRc = SSW::DNARC
|
|
114
|
-
dEle2Int = SSW::DNA2INT
|
|
115
|
-
dInt2Ele = SSW::INT2DNA
|
|
116
|
-
nEleNum = lEle.size # 5
|
|
117
|
-
lScore = LibSSW.create_scoring_matrix(lEle, opts[:nmatch], -opts[:nmismatch])
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
|
|
121
|
-
|
|
122
|
-
# set flag
|
|
123
|
-
nFlag = opts[:bpath] ? 2 : 0
|
|
124
|
-
|
|
125
|
-
# print sam head
|
|
126
|
-
if opts[:bsam] && opts[:bheader] && opts[:bpath]
|
|
127
|
-
puts '@HD\tVN:1.4\tSO:queryname'
|
|
128
|
-
Bio::Flatfile.open(opts[:target]) do |f|
|
|
129
|
-
f.each do |entry|
|
|
130
|
-
id = entry.entry_id
|
|
131
|
-
len = entry.nalen
|
|
132
|
-
puts "@SQ\tSN:#{id}\tLN:#{len}"
|
|
133
|
-
end
|
|
134
|
-
end
|
|
135
|
-
elsif opts[:bsam] && !args[:bpath]
|
|
136
|
-
warn 'SAM format output is only available together with option -c.\n'
|
|
137
|
-
opts[:bsam] = false
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
def _to_int(seq, lEle, dEle2Int)
|
|
141
|
-
seq.each_char.map do |ele|
|
|
142
|
-
if dEle2Int.has_key?(ele)
|
|
143
|
-
dEle2Int[ele]
|
|
144
|
-
else
|
|
145
|
-
dEle2Int[lEle[-1]]
|
|
146
|
-
end
|
|
147
|
-
end
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
# iterate query sequenc
|
|
151
|
-
Bio::FlatFile.open(opts[:query]) do |query_file|
|
|
152
|
-
query_file.each do |qentry|
|
|
153
|
-
sQId = qentry.entry_id
|
|
154
|
-
sQSeq = qentry.sequence_string
|
|
155
|
-
sQQual = qentry.quality_string
|
|
156
|
-
# build query profile
|
|
157
|
-
qNum = _to_int(sQSeq, lEle, dEle2Int)
|
|
158
|
-
qProfile = SSW.ssw_init(qNum, sQSeq.size, lScore, lEle.size, 2)
|
|
159
|
-
# build rc query profile
|
|
160
|
-
if opts[:bbest] && !opts[:bprotein]
|
|
161
|
-
sQRcSeq = sQSeq.reverse.each_char.map { |x| dRc[x] }.join
|
|
162
|
-
qRcNum = _to_int(sQRcSeq, lEle, dEle2Int)
|
|
163
|
-
qRcProfile = SSW.ssw_init(qRcNum, sQSeq.size, mat, lEle.size, 2)
|
|
164
|
-
end
|
|
165
|
-
# set mask le
|
|
166
|
-
if sQSeq.size > 30
|
|
167
|
-
nMaskLen = sQSeq.size / 2
|
|
168
|
-
else
|
|
169
|
-
nMasklen = 15
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
# iter target sequence
|
|
173
|
-
Bio::FlatFile.open(opts[:target]) do |target_file|
|
|
174
|
-
target_file.each do |tentry|
|
|
175
|
-
sRId = tentry.entry_id
|
|
176
|
-
sRSeq = tentry.seq.to_s
|
|
177
|
-
rNum = _to_int(sRSeq, lEle, dEle2Int)
|
|
178
|
-
res = SSW.ssw_align(
|
|
179
|
-
qProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
|
|
180
|
-
)
|
|
181
|
-
p res.to_h
|
|
182
|
-
resRc = nil
|
|
183
|
-
if opts[:bbest] && !opts[:bprotein]
|
|
184
|
-
resRc = SSW.align_one(
|
|
185
|
-
qRcProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
|
|
186
|
-
)
|
|
187
|
-
end
|
|
188
|
-
# build cigar and trace back path
|
|
189
|
-
strand = 0
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
end
|
|
193
|
-
end
|