libssw 0.0.0.pre → 0.0.1.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/rbssw +209 -0
- data/lib/libssw.rb +92 -10
- data/lib/libssw/ffi.rb +9 -4
- data/lib/libssw/version.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a45ea25e49633d46da6d363c9ed1d23c9fd3ed1a07b6128d4d61e4b320a7ff44
|
4
|
+
data.tar.gz: 599aee0f039c17fe6b3b50e4e89302e2a642b9a6ec886b0dd6ebc13e8972387b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b03f65581c285cedb488946ae84fd029147b54a6b022b9aff5bf39f7554881d8eb2e8a60064f06f012abe7fee21b2aff1801ed1017d7c3ef995557a7a066b687
|
7
|
+
data.tar.gz: e651c5fc0c9fb918a479c5070a00631e308562191d7adb0784e7042d07874e5026245c4c76acb48cf99900739e6c0da2617be2100398dbd1134b011a565a0014
|
data/exe/rbssw
CHANGED
@@ -1,4 +1,213 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
+
require 'bio'
|
4
5
|
require 'libssw'
|
6
|
+
SSW = LibSSW
|
7
|
+
require 'optparse'
|
8
|
+
|
9
|
+
opts = {
|
10
|
+
lib_path: nil,
|
11
|
+
nmatch: 2,
|
12
|
+
nmismatch: 2,
|
13
|
+
nopen: 3,
|
14
|
+
next: 1,
|
15
|
+
bprotein: false,
|
16
|
+
smatrix: nil,
|
17
|
+
bpath: false,
|
18
|
+
nthr: nil,
|
19
|
+
bbest: false,
|
20
|
+
bsam: nil, # typo?
|
21
|
+
bheader: nil
|
22
|
+
}
|
23
|
+
|
24
|
+
parser = OptionParser.new do |opt|
|
25
|
+
opt.version = LibSSW::VERSION
|
26
|
+
opt.summary_width = 20
|
27
|
+
opt.banner = 'Usage: rbssw [options] <target file> <query file>'
|
28
|
+
opt.on('-l', '--sLibPath PATH', String,
|
29
|
+
'path of libssw.so') do |v|
|
30
|
+
opts[:sLibPath] = v
|
31
|
+
end
|
32
|
+
opt.on('-m', '--nMatch VAL', Integer,
|
33
|
+
'a positive integer as the score for a match',
|
34
|
+
"in genome sequence alignment. [#{opts[:nmatch]}]") do |v|
|
35
|
+
opts[:nmatch] = v
|
36
|
+
end
|
37
|
+
opt.on('-x', '--nMismatch VAL', Integer,
|
38
|
+
'a positive integer as the score for a mismatch',
|
39
|
+
"in genome sequence alignment. [#{opts[:nmismatch]}]") do |v|
|
40
|
+
opts[:nmismatch] = v
|
41
|
+
end
|
42
|
+
opt.on('-o', '--nOpen VAL', Integer,
|
43
|
+
'a positive integer as the penalty for the gap opening',
|
44
|
+
"in genome sequence alignment. [#{opts[:nopen]}]") do |v|
|
45
|
+
opts[:nopen] = v
|
46
|
+
end
|
47
|
+
opt.on('-e', '--nExt VAL', Integer,
|
48
|
+
'a positive integer as the penalty for the gap extension',
|
49
|
+
"in genome sequence alignment. [#{opts[:next]}]") do |v|
|
50
|
+
opts[:next] = v
|
51
|
+
end
|
52
|
+
opt.on('-p', '--bProtien', TrueClass,
|
53
|
+
'Do protein sequence alignment.',
|
54
|
+
"Without this option, do genome sequence alignment. [#{opts[:bprotein]}]") do |v|
|
55
|
+
opts[:bprotein] = v
|
56
|
+
end
|
57
|
+
opt.on('-a', '--sMatrix VAL', String,
|
58
|
+
"a file for either Blosum or Pam weight matrix. [#{opts[:smatrix]}]") do |v|
|
59
|
+
opts[:smatrix] = v
|
60
|
+
end
|
61
|
+
opt.on('-c', '--bPath', TrueClass,
|
62
|
+
"Return the alignment path. [#{opts[:bpath]}]") do |v|
|
63
|
+
opts[:bpath] = v
|
64
|
+
end
|
65
|
+
opt.on('-f', '--nThr VAL', Integer,
|
66
|
+
'a positive integer.',
|
67
|
+
'Only output the alignments with the Smith-Waterman score >= N.') do |v|
|
68
|
+
opts[:nthr] = v
|
69
|
+
end
|
70
|
+
opt.on('-r', '--bBest', TrueClass,
|
71
|
+
'The best alignment will be picked, between the original read',
|
72
|
+
"alignment and the reverse complement read alignment. [#{opts[:bbest]}]") do |v|
|
73
|
+
opts[:bbest] = v
|
74
|
+
end
|
75
|
+
opt.on('-s', '--bSam', TrueClass,
|
76
|
+
'Output in SAM format. [no header]') do |v| # TYPO?
|
77
|
+
opts[:bsam] = v
|
78
|
+
end
|
79
|
+
opt.on('-header', '--bHeader', TrueClass,
|
80
|
+
'If -s is used, include header in SAM output.') do |v|
|
81
|
+
opts[:bheader] = v
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
parser.order!(ARGV)
|
86
|
+
|
87
|
+
opts[:target] = ARGV[0]
|
88
|
+
opts[:query] = ARGV[1]
|
89
|
+
|
90
|
+
lEle = []
|
91
|
+
dRc = {}
|
92
|
+
dEle2Int = {}
|
93
|
+
dInt2Ele = {}
|
94
|
+
lScore = nil
|
95
|
+
|
96
|
+
if opts[:bprotein]
|
97
|
+
# load AA score matrix
|
98
|
+
if !opts[:smatrix]
|
99
|
+
lEle = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K',
|
100
|
+
'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
|
101
|
+
lEle.each_with_index do |ele, i|
|
102
|
+
dEle2Int[ele] = i
|
103
|
+
dEle2Int[ele.downcase] = i
|
104
|
+
dInt2Ele[i] = ele
|
105
|
+
end
|
106
|
+
nEleNum = lEle.size
|
107
|
+
lScore = SSW::Blosum50
|
108
|
+
else
|
109
|
+
lEle, dEle2Int, dInt2Ele, lScore = SSW.read_matrix(opts[:smatrix])
|
110
|
+
end
|
111
|
+
elsif !opts[:smatrix]
|
112
|
+
# init DNA score matrix
|
113
|
+
lEle = %w[A C G T N]
|
114
|
+
dRc = { 'A': 'C', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 'C', 'c': 'G', 'g': 'C', 't': 'A' }
|
115
|
+
lEle.each_with_index do |ele, i|
|
116
|
+
dEle2Int[ele] = i
|
117
|
+
dEle2Int[ele.downcase] = i
|
118
|
+
dInt2Ele[i] = ele
|
119
|
+
end
|
120
|
+
# dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
|
121
|
+
# dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
|
122
|
+
nEleNum = lEle.size # 5
|
123
|
+
lScore = Array.new(nEleNum**2, 0)
|
124
|
+
(nEleNum - 1).times do |i|
|
125
|
+
(nEleNum - 1).times do |j|
|
126
|
+
lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
|
127
|
+
opts[:nmatch]
|
128
|
+
else
|
129
|
+
-opts[:nmismatch]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
# lScore = [ 2, -2, -2, -2, 0,
|
134
|
+
# -2, 2, -2, -2, 0,
|
135
|
+
# -2, -2, 2, -2, 0,
|
136
|
+
# -2, -2, -2, 2, 0,
|
137
|
+
# 0, 0, 0, 0, 0 ]
|
138
|
+
end
|
139
|
+
|
140
|
+
warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
|
141
|
+
|
142
|
+
# set flag
|
143
|
+
nFlag = opts[:bpath] ? 2 : 0
|
144
|
+
|
145
|
+
# print sam head
|
146
|
+
if opts[:bsam] && opts[:bheader] && opts[:bpath]
|
147
|
+
puts '@HD\tVN:1.4\tSO:queryname'
|
148
|
+
Bio::Flatfile.open(opts[:target]) do |f|
|
149
|
+
f.each do |entry|
|
150
|
+
id = entry.entry_id
|
151
|
+
len = entry.nalen
|
152
|
+
puts "@SQ\tSN:#{id}\tLN:#{len}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
elsif opts[:bsam] && !args[:bpath]
|
156
|
+
warn 'SAM format output is only available together with option -c.\n'
|
157
|
+
opts[:bsam] = false
|
158
|
+
end
|
159
|
+
|
160
|
+
def _to_int(seq, lEle, dEle2Int)
|
161
|
+
seq.each_char.map do |ele|
|
162
|
+
if dEle2Int.has_key?(ele)
|
163
|
+
dEle2Int[ele]
|
164
|
+
else
|
165
|
+
dEle2Int[lEle[-1]]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# iterate query sequenc
|
171
|
+
Bio::FlatFile.open(opts[:query]) do |query_file|
|
172
|
+
query_file.each do |qentry|
|
173
|
+
sQId = qentry.entry_id
|
174
|
+
sQSeq = qentry.sequence_string
|
175
|
+
sQQual = qentry.quality_string
|
176
|
+
# build query profile
|
177
|
+
qNum = _to_int(sQSeq, lEle, dEle2Int)
|
178
|
+
qProfile = SSW.ssw_init(qNum, sQSeq.size, lScore, lEle.size, 2)
|
179
|
+
# build rc query profile
|
180
|
+
if opts[:bbest] && !opts[:bprotein]
|
181
|
+
sQRcSeq = sQSeq.reverse.each_char.map { |x| dRc[x] }.join
|
182
|
+
qRcNum = _to_int(sQRcSeq, lEle, dEle2Int)
|
183
|
+
qRcProfile = SSW.ssw_init(qRcNum, sQSeq.size, mat, lEle.size, 2)
|
184
|
+
end
|
185
|
+
# set mask le
|
186
|
+
if sQSeq.size > 30
|
187
|
+
nMaskLen = sQSeq.size / 2
|
188
|
+
else
|
189
|
+
nMasklen = 15
|
190
|
+
end
|
191
|
+
|
192
|
+
# iter target sequence
|
193
|
+
Bio::FlatFile.open(opts[:target]) do |target_file|
|
194
|
+
target_file.each do |tentry|
|
195
|
+
sRId = tentry.entry_id
|
196
|
+
sRSeq = tentry.seq.to_s
|
197
|
+
rNum = _to_int(sRSeq, lEle, dEle2Int)
|
198
|
+
res = SSW.ssw_align(
|
199
|
+
qProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
|
200
|
+
)
|
201
|
+
p res.to_h
|
202
|
+
resRc = nil
|
203
|
+
if opts[:bbest] && !opts[:bprotein]
|
204
|
+
resRc = SSW.align_one(
|
205
|
+
qRcProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
|
206
|
+
)
|
207
|
+
end
|
208
|
+
# build cigar and trace back path
|
209
|
+
strand = 0
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
data/lib/libssw.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'forwardable'
|
4
3
|
require_relative 'libssw/version'
|
5
4
|
|
6
5
|
module LibSSW
|
@@ -25,14 +24,97 @@ module LibSSW
|
|
25
24
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
26
25
|
end
|
27
26
|
|
28
|
-
|
27
|
+
require_relative 'libssw/ffi'
|
29
28
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
29
|
+
class Align < FFI::Align
|
30
|
+
def cigar
|
31
|
+
pt = super
|
32
|
+
return [] if cigar_len.zero?
|
33
|
+
|
34
|
+
pt[0, 4 * cigar_len].unpack('L*')
|
35
|
+
end
|
36
|
+
|
37
|
+
def cigar_len
|
38
|
+
cigarLen
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_h
|
42
|
+
h = {}
|
43
|
+
%i[score1
|
44
|
+
score2
|
45
|
+
ref_begin1
|
46
|
+
ref_end1
|
47
|
+
read_begin1
|
48
|
+
read_end1
|
49
|
+
ref_end2
|
50
|
+
cigar
|
51
|
+
cigar_len].each do |k|
|
52
|
+
h[k] = __send__(k)
|
53
|
+
end
|
54
|
+
h
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class Profile < FFI::Profile
|
59
|
+
def read
|
60
|
+
pt = super
|
61
|
+
return [] if read_len.zero?
|
62
|
+
|
63
|
+
pt[0, read_len].unpack('c*')
|
64
|
+
end
|
65
|
+
|
66
|
+
def mat
|
67
|
+
pt = super
|
68
|
+
pt[0, n * n].unpack('c*')
|
69
|
+
end
|
70
|
+
|
71
|
+
def read_len
|
72
|
+
readLen
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_h
|
76
|
+
h = {}
|
77
|
+
%i[byte
|
78
|
+
word
|
79
|
+
read
|
80
|
+
mat
|
81
|
+
read_len
|
82
|
+
n
|
83
|
+
bias].each do |k|
|
84
|
+
h[k] = __send__(k)
|
85
|
+
end
|
86
|
+
h
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class << self
|
91
|
+
def ssw_init(read, read_len, mat, n, score_size)
|
92
|
+
ptr = FFI.ssw_init(
|
93
|
+
read.pack('c*'), read_len, mat.flatten.pack('c*'), n, score_size
|
94
|
+
)
|
95
|
+
SSW::Profile.new(ptr)
|
96
|
+
end
|
97
|
+
|
98
|
+
def init_destroy(profile)
|
99
|
+
FFI.init_destroy(profile)
|
100
|
+
end
|
101
|
+
|
102
|
+
def ssw_align(prof, ref, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len)
|
103
|
+
ptr = FFI.ssw_align(
|
104
|
+
prof, ref.pack('c*'), ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
|
105
|
+
)
|
106
|
+
SSW::Align.new(ptr)
|
107
|
+
end
|
108
|
+
|
109
|
+
def align_destroy(align)
|
110
|
+
FFI.align_destroy(align)
|
111
|
+
end
|
112
|
+
|
113
|
+
def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
|
114
|
+
warn 'implementation: fiexme: **cigar' # FIXME
|
115
|
+
FFI.mark_mismatch(
|
116
|
+
ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
|
117
|
+
)
|
118
|
+
end
|
119
|
+
end
|
38
120
|
end
|
data/lib/libssw/ffi.rb
CHANGED
@@ -31,7 +31,7 @@ module LibSSW
|
|
31
31
|
'uint16_t score2',
|
32
32
|
'int32_t ref_begin1',
|
33
33
|
'int32_t ref_end1',
|
34
|
-
'int32_t
|
34
|
+
'int32_t read_begin1',
|
35
35
|
'int32_t read_end1',
|
36
36
|
'int32_t ref_end2',
|
37
37
|
'uint32_t* cigar',
|
@@ -39,8 +39,8 @@ module LibSSW
|
|
39
39
|
]
|
40
40
|
|
41
41
|
Profile = struct [
|
42
|
-
'
|
43
|
-
'
|
42
|
+
'__m128i* byte', # __m128i* profile_byte; // 0: none
|
43
|
+
'__m128i* word', # __m128i* profile_word; // 0: none
|
44
44
|
'const int8_t* read',
|
45
45
|
'const int8_t* mat',
|
46
46
|
'int32_t readLen',
|
@@ -49,7 +49,12 @@ module LibSSW
|
|
49
49
|
]
|
50
50
|
|
51
51
|
# s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size)
|
52
|
-
try_extern 's_profile* ssw_init (
|
52
|
+
try_extern 's_profile* ssw_init (' \
|
53
|
+
'const int8_t* read,' \
|
54
|
+
'int32_t readLen,' \
|
55
|
+
'const int8_t* mat,' \
|
56
|
+
'int32_t n,' \
|
57
|
+
'int8_t score_size)'
|
53
58
|
|
54
59
|
try_extern 'void init_destroy (s_profile* p)'
|
55
60
|
|
data/lib/libssw/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libssw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.0.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bio
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|