libssw 0.0.0.pre → 0.0.1.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f52642bad07253a1fa4ad73032c49d805a53080812bbb663c0fa825c6b0241f0
4
- data.tar.gz: af7af5c499367ad64f4914bd1c4efce747966170d181960fa20b2c419745666b
3
+ metadata.gz: a45ea25e49633d46da6d363c9ed1d23c9fd3ed1a07b6128d4d61e4b320a7ff44
4
+ data.tar.gz: 599aee0f039c17fe6b3b50e4e89302e2a642b9a6ec886b0dd6ebc13e8972387b
5
5
  SHA512:
6
- metadata.gz: 3858b66d605d0011ce6e9c6606befbd7520613a1435297276a707cf22431ee38e043c7402ac336a08851da79ffd097bf28d72cd8debd995e66e1c1466c3a48b5
7
- data.tar.gz: 7aed161e8a3a8d8c9b754749af60408dbdcc05a8076efe3d116d76fdccac03dbee967cf95a6c7f042cdf7b9cdb99b24e9f059e995c351079a8a156316198f3e7
6
+ metadata.gz: b03f65581c285cedb488946ae84fd029147b54a6b022b9aff5bf39f7554881d8eb2e8a60064f06f012abe7fee21b2aff1801ed1017d7c3ef995557a7a066b687
7
+ data.tar.gz: e651c5fc0c9fb918a479c5070a00631e308562191d7adb0784e7042d07874e5026245c4c76acb48cf99900739e6c0da2617be2100398dbd1134b011a565a0014
data/exe/rbssw CHANGED
@@ -1,4 +1,213 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
+ require 'bio'
4
5
  require 'libssw'
6
+ SSW = LibSSW
7
+ require 'optparse'
8
+
9
+ opts = {
10
+ lib_path: nil,
11
+ nmatch: 2,
12
+ nmismatch: 2,
13
+ nopen: 3,
14
+ next: 1,
15
+ bprotein: false,
16
+ smatrix: nil,
17
+ bpath: false,
18
+ nthr: nil,
19
+ bbest: false,
20
+ bsam: nil, # typo?
21
+ bheader: nil
22
+ }
23
+
24
+ parser = OptionParser.new do |opt|
25
+ opt.version = LibSSW::VERSION
26
+ opt.summary_width = 20
27
+ opt.banner = 'Usage: rbssw [options] <target file> <query file>'
28
+ opt.on('-l', '--sLibPath PATH', String,
29
+ 'path of libssw.so') do |v|
30
+ opts[:sLibPath] = v
31
+ end
32
+ opt.on('-m', '--nMatch VAL', Integer,
33
+ 'a positive integer as the score for a match',
34
+ "in genome sequence alignment. [#{opts[:nmatch]}]") do |v|
35
+ opts[:nmatch] = v
36
+ end
37
+ opt.on('-x', '--nMismatch VAL', Integer,
38
+ 'a positive integer as the score for a mismatch',
39
+ "in genome sequence alignment. [#{opts[:nmismatch]}]") do |v|
40
+ opts[:nmismatch] = v
41
+ end
42
+ opt.on('-o', '--nOpen VAL', Integer,
43
+ 'a positive integer as the penalty for the gap opening',
44
+ "in genome sequence alignment. [#{opts[:nopen]}]") do |v|
45
+ opts[:nopen] = v
46
+ end
47
+ opt.on('-e', '--nExt VAL', Integer,
48
+ 'a positive integer as the penalty for the gap extension',
49
+ "in genome sequence alignment. [#{opts[:next]}]") do |v|
50
+ opts[:next] = v
51
+ end
52
+ opt.on('-p', '--bProtien', TrueClass,
53
+ 'Do protein sequence alignment.',
54
+ "Without this option, do genome sequence alignment. [#{opts[:bprotein]}]") do |v|
55
+ opts[:bprotein] = v
56
+ end
57
+ opt.on('-a', '--sMatrix VAL', String,
58
+ "a file for either Blosum or Pam weight matrix. [#{opts[:smatrix]}]") do |v|
59
+ opts[:smatrix] = v
60
+ end
61
+ opt.on('-c', '--bPath', TrueClass,
62
+ "Return the alignment path. [#{opts[:bpath]}]") do |v|
63
+ opts[:bpath] = v
64
+ end
65
+ opt.on('-f', '--nThr VAL', Integer,
66
+ 'a positive integer.',
67
+ 'Only output the alignments with the Smith-Waterman score >= N.') do |v|
68
+ opts[:nthr] = v
69
+ end
70
+ opt.on('-r', '--bBest', TrueClass,
71
+ 'The best alignment will be picked, between the original read',
72
+ "alignment and the reverse complement read alignment. [#{opts[:bbest]}]") do |v|
73
+ opts[:bbest] = v
74
+ end
75
+ opt.on('-s', '--bSam', TrueClass,
76
+ 'Output in SAM format. [no header]') do |v| # TYPO?
77
+ opts[:bsam] = v
78
+ end
79
+ opt.on('-header', '--bHeader', TrueClass,
80
+ 'If -s is used, include header in SAM output.') do |v|
81
+ opts[:bheader] = v
82
+ end
83
+ end
84
+
85
+ parser.order!(ARGV)
86
+
87
+ opts[:target] = ARGV[0]
88
+ opts[:query] = ARGV[1]
89
+
90
+ lEle = []
91
+ dRc = {}
92
+ dEle2Int = {}
93
+ dInt2Ele = {}
94
+ lScore = nil
95
+
96
+ if opts[:bprotein]
97
+ # load AA score matrix
98
+ if !opts[:smatrix]
99
+ lEle = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K',
100
+ 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
101
+ lEle.each_with_index do |ele, i|
102
+ dEle2Int[ele] = i
103
+ dEle2Int[ele.downcase] = i
104
+ dInt2Ele[i] = ele
105
+ end
106
+ nEleNum = lEle.size
107
+ lScore = SSW::Blosum50
108
+ else
109
+ lEle, dEle2Int, dInt2Ele, lScore = SSW.read_matrix(opts[:smatrix])
110
+ end
111
+ elsif !opts[:smatrix]
112
+ # init DNA score matrix
113
+ lEle = %w[A C G T N]
114
+ dRc = { 'A': 'C', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 'C', 'c': 'G', 'g': 'C', 't': 'A' }
115
+ lEle.each_with_index do |ele, i|
116
+ dEle2Int[ele] = i
117
+ dEle2Int[ele.downcase] = i
118
+ dInt2Ele[i] = ele
119
+ end
120
+ # dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
121
+ # dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
122
+ nEleNum = lEle.size # 5
123
+ lScore = Array.new(nEleNum**2, 0)
124
+ (nEleNum - 1).times do |i|
125
+ (nEleNum - 1).times do |j|
126
+ lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
127
+ opts[:nmatch]
128
+ else
129
+ -opts[:nmismatch]
130
+ end
131
+ end
132
+ end
133
+ # lScore = [ 2, -2, -2, -2, 0,
134
+ # -2, 2, -2, -2, 0,
135
+ # -2, -2, 2, -2, 0,
136
+ # -2, -2, -2, 2, 0,
137
+ # 0, 0, 0, 0, 0 ]
138
+ end
139
+
140
+ warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
141
+
142
+ # set flag
143
+ nFlag = opts[:bpath] ? 2 : 0
144
+
145
+ # print sam head
146
+ if opts[:bsam] && opts[:bheader] && opts[:bpath]
147
+ puts '@HD\tVN:1.4\tSO:queryname'
148
+ Bio::Flatfile.open(opts[:target]) do |f|
149
+ f.each do |entry|
150
+ id = entry.entry_id
151
+ len = entry.nalen
152
+ puts "@SQ\tSN:#{id}\tLN:#{len}"
153
+ end
154
+ end
155
+ elsif opts[:bsam] && !args[:bpath]
156
+ warn 'SAM format output is only available together with option -c.\n'
157
+ opts[:bsam] = false
158
+ end
159
+
160
+ def _to_int(seq, lEle, dEle2Int)
161
+ seq.each_char.map do |ele|
162
+ if dEle2Int.has_key?(ele)
163
+ dEle2Int[ele]
164
+ else
165
+ dEle2Int[lEle[-1]]
166
+ end
167
+ end
168
+ end
169
+
170
+ # iterate query sequenc
171
+ Bio::FlatFile.open(opts[:query]) do |query_file|
172
+ query_file.each do |qentry|
173
+ sQId = qentry.entry_id
174
+ sQSeq = qentry.sequence_string
175
+ sQQual = qentry.quality_string
176
+ # build query profile
177
+ qNum = _to_int(sQSeq, lEle, dEle2Int)
178
+ qProfile = SSW.ssw_init(qNum, sQSeq.size, lScore, lEle.size, 2)
179
+ # build rc query profile
180
+ if opts[:bbest] && !opts[:bprotein]
181
+ sQRcSeq = sQSeq.reverse.each_char.map { |x| dRc[x] }.join
182
+ qRcNum = _to_int(sQRcSeq, lEle, dEle2Int)
183
+ qRcProfile = SSW.ssw_init(qRcNum, sQSeq.size, mat, lEle.size, 2)
184
+ end
185
+ # set mask le
186
+ if sQSeq.size > 30
187
+ nMaskLen = sQSeq.size / 2
188
+ else
189
+ nMasklen = 15
190
+ end
191
+
192
+ # iter target sequence
193
+ Bio::FlatFile.open(opts[:target]) do |target_file|
194
+ target_file.each do |tentry|
195
+ sRId = tentry.entry_id
196
+ sRSeq = tentry.seq.to_s
197
+ rNum = _to_int(sRSeq, lEle, dEle2Int)
198
+ res = SSW.ssw_align(
199
+ qProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
200
+ )
201
+ p res.to_h
202
+ resRc = nil
203
+ if opts[:bbest] && !opts[:bprotein]
204
+ resRc = SSW.align_one(
205
+ qRcProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
206
+ )
207
+ end
208
+ # build cigar and trace back path
209
+ strand = 0
210
+ end
211
+ end
212
+ end
213
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'forwardable'
4
3
  require_relative 'libssw/version'
5
4
 
6
5
  module LibSSW
@@ -25,14 +24,97 @@ module LibSSW
25
24
  File.expand_path("../vendor/#{lib_name}", __dir__)
26
25
  end
27
26
 
28
- autoload :FFI, 'libssw/ffi'
27
+ require_relative 'libssw/ffi'
29
28
 
30
- extend Forwardable
31
- Align = FFI::Align
32
- Profile = FFI::Profile
33
- def_delegators :FFI,
34
- :ssw_init,
35
- :init_destroy,
36
- :align_destroy,
37
- :mark_mismatch
29
+ class Align < FFI::Align
30
+ def cigar
31
+ pt = super
32
+ return [] if cigar_len.zero?
33
+
34
+ pt[0, 4 * cigar_len].unpack('L*')
35
+ end
36
+
37
+ def cigar_len
38
+ cigarLen
39
+ end
40
+
41
+ def to_h
42
+ h = {}
43
+ %i[score1
44
+ score2
45
+ ref_begin1
46
+ ref_end1
47
+ read_begin1
48
+ read_end1
49
+ ref_end2
50
+ cigar
51
+ cigar_len].each do |k|
52
+ h[k] = __send__(k)
53
+ end
54
+ h
55
+ end
56
+ end
57
+
58
+ class Profile < FFI::Profile
59
+ def read
60
+ pt = super
61
+ return [] if read_len.zero?
62
+
63
+ pt[0, read_len].unpack('c*')
64
+ end
65
+
66
+ def mat
67
+ pt = super
68
+ pt[0, n * n].unpack('c*')
69
+ end
70
+
71
+ def read_len
72
+ readLen
73
+ end
74
+
75
+ def to_h
76
+ h = {}
77
+ %i[byte
78
+ word
79
+ read
80
+ mat
81
+ read_len
82
+ n
83
+ bias].each do |k|
84
+ h[k] = __send__(k)
85
+ end
86
+ h
87
+ end
88
+ end
89
+
90
+ class << self
91
+ def ssw_init(read, read_len, mat, n, score_size)
92
+ ptr = FFI.ssw_init(
93
+ read.pack('c*'), read_len, mat.flatten.pack('c*'), n, score_size
94
+ )
95
+ SSW::Profile.new(ptr)
96
+ end
97
+
98
+ def init_destroy(profile)
99
+ FFI.init_destroy(profile)
100
+ end
101
+
102
+ def ssw_align(prof, ref, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len)
103
+ ptr = FFI.ssw_align(
104
+ prof, ref.pack('c*'), ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
105
+ )
106
+ SSW::Align.new(ptr)
107
+ end
108
+
109
+ def align_destroy(align)
110
+ FFI.align_destroy(align)
111
+ end
112
+
113
+ def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
114
+ warn 'implementation: fiexme: **cigar' # FIXME
115
+ FFI.mark_mismatch(
116
+ ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
117
+ )
118
+ end
119
+ end
38
120
  end
@@ -31,7 +31,7 @@ module LibSSW
31
31
  'uint16_t score2',
32
32
  'int32_t ref_begin1',
33
33
  'int32_t ref_end1',
34
- 'int32_t read_begin1',
34
+ 'int32_t read_begin1',
35
35
  'int32_t read_end1',
36
36
  'int32_t ref_end2',
37
37
  'uint32_t* cigar',
@@ -39,8 +39,8 @@ module LibSSW
39
39
  ]
40
40
 
41
41
  Profile = struct [
42
- 'int32_t* byte', # __m128i* profile_byte; // 0: none
43
- 'int32_t* word', # __m128i* profile_word; // 0: none
42
+ '__m128i* byte', # __m128i* profile_byte; // 0: none
43
+ '__m128i* word', # __m128i* profile_word; // 0: none
44
44
  'const int8_t* read',
45
45
  'const int8_t* mat',
46
46
  'int32_t readLen',
@@ -49,7 +49,12 @@ module LibSSW
49
49
  ]
50
50
 
51
51
  # s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size)
52
- try_extern 's_profile* ssw_init (const int8_t* read, int32_t readLen, const int8_t* mat, int32_t n, int8_t score_size)'
52
+ try_extern 's_profile* ssw_init (' \
53
+ 'const int8_t* read,' \
54
+ 'int32_t readLen,' \
55
+ 'const int8_t* mat,' \
56
+ 'int32_t n,' \
57
+ 'int8_t score_size)'
53
58
 
54
59
  try_extern 'void init_destroy (s_profile* p)'
55
60
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LibSSW
4
- VERSION = '0.0.0.pre'
4
+ VERSION = '0.0.1.pre'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libssw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.pre
4
+ version: 0.0.1.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.0.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: bio
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement