libssw 0.0.0.pre → 0.0.1.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f52642bad07253a1fa4ad73032c49d805a53080812bbb663c0fa825c6b0241f0
4
- data.tar.gz: af7af5c499367ad64f4914bd1c4efce747966170d181960fa20b2c419745666b
3
+ metadata.gz: a45ea25e49633d46da6d363c9ed1d23c9fd3ed1a07b6128d4d61e4b320a7ff44
4
+ data.tar.gz: 599aee0f039c17fe6b3b50e4e89302e2a642b9a6ec886b0dd6ebc13e8972387b
5
5
  SHA512:
6
- metadata.gz: 3858b66d605d0011ce6e9c6606befbd7520613a1435297276a707cf22431ee38e043c7402ac336a08851da79ffd097bf28d72cd8debd995e66e1c1466c3a48b5
7
- data.tar.gz: 7aed161e8a3a8d8c9b754749af60408dbdcc05a8076efe3d116d76fdccac03dbee967cf95a6c7f042cdf7b9cdb99b24e9f059e995c351079a8a156316198f3e7
6
+ metadata.gz: b03f65581c285cedb488946ae84fd029147b54a6b022b9aff5bf39f7554881d8eb2e8a60064f06f012abe7fee21b2aff1801ed1017d7c3ef995557a7a066b687
7
+ data.tar.gz: e651c5fc0c9fb918a479c5070a00631e308562191d7adb0784e7042d07874e5026245c4c76acb48cf99900739e6c0da2617be2100398dbd1134b011a565a0014
data/exe/rbssw CHANGED
@@ -1,4 +1,213 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
+ require 'bio'
4
5
  require 'libssw'
6
+ SSW = LibSSW
7
+ require 'optparse'
8
+
9
+ opts = {
10
+ lib_path: nil,
11
+ nmatch: 2,
12
+ nmismatch: 2,
13
+ nopen: 3,
14
+ next: 1,
15
+ bprotein: false,
16
+ smatrix: nil,
17
+ bpath: false,
18
+ nthr: nil,
19
+ bbest: false,
20
+ bsam: nil, # typo?
21
+ bheader: nil
22
+ }
23
+
24
+ parser = OptionParser.new do |opt|
25
+ opt.version = LibSSW::VERSION
26
+ opt.summary_width = 20
27
+ opt.banner = 'Usage: rbssw [options] <target file> <query file>'
28
+ opt.on('-l', '--sLibPath PATH', String,
29
+ 'path of libssw.so') do |v|
30
+ opts[:sLibPath] = v
31
+ end
32
+ opt.on('-m', '--nMatch VAL', Integer,
33
+ 'a positive integer as the score for a match',
34
+ "in genome sequence alignment. [#{opts[:nmatch]}]") do |v|
35
+ opts[:nmatch] = v
36
+ end
37
+ opt.on('-x', '--nMismatch VAL', Integer,
38
+ 'a positive integer as the score for a mismatch',
39
+ "in genome sequence alignment. [#{opts[:nmismatch]}]") do |v|
40
+ opts[:nmismatch] = v
41
+ end
42
+ opt.on('-o', '--nOpen VAL', Integer,
43
+ 'a positive integer as the penalty for the gap opening',
44
+ "in genome sequence alignment. [#{opts[:nopen]}]") do |v|
45
+ opts[:nopen] = v
46
+ end
47
+ opt.on('-e', '--nExt VAL', Integer,
48
+ 'a positive integer as the penalty for the gap extension',
49
+ "in genome sequence alignment. [#{opts[:next]}]") do |v|
50
+ opts[:next] = v
51
+ end
52
+ opt.on('-p', '--bProtien', TrueClass,
53
+ 'Do protein sequence alignment.',
54
+ "Without this option, do genome sequence alignment. [#{opts[:bprotein]}]") do |v|
55
+ opts[:bprotein] = v
56
+ end
57
+ opt.on('-a', '--sMatrix VAL', String,
58
+ "a file for either Blosum or Pam weight matrix. [#{opts[:smatrix]}]") do |v|
59
+ opts[:smatrix] = v
60
+ end
61
+ opt.on('-c', '--bPath', TrueClass,
62
+ "Return the alignment path. [#{opts[:bpath]}]") do |v|
63
+ opts[:bpath] = v
64
+ end
65
+ opt.on('-f', '--nThr VAL', Integer,
66
+ 'a positive integer.',
67
+ 'Only output the alignments with the Smith-Waterman score >= N.') do |v|
68
+ opts[:nthr] = v
69
+ end
70
+ opt.on('-r', '--bBest', TrueClass,
71
+ 'The best alignment will be picked, between the original read',
72
+ "alignment and the reverse complement read alignment. [#{opts[:bbest]}]") do |v|
73
+ opts[:bbest] = v
74
+ end
75
+ opt.on('-s', '--bSam', TrueClass,
76
+ 'Output in SAM format. [no header]') do |v| # TYPO?
77
+ opts[:bsam] = v
78
+ end
79
+ opt.on('-header', '--bHeader', TrueClass,
80
+ 'If -s is used, include header in SAM output.') do |v|
81
+ opts[:bheader] = v
82
+ end
83
+ end
84
+
85
+ parser.order!(ARGV)
86
+
87
+ opts[:target] = ARGV[0]
88
+ opts[:query] = ARGV[1]
89
+
90
+ lEle = []
91
+ dRc = {}
92
+ dEle2Int = {}
93
+ dInt2Ele = {}
94
+ lScore = nil
95
+
96
+ if opts[:bprotein]
97
+ # load AA score matrix
98
+ if !opts[:smatrix]
99
+ lEle = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K',
100
+ 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
101
+ lEle.each_with_index do |ele, i|
102
+ dEle2Int[ele] = i
103
+ dEle2Int[ele.downcase] = i
104
+ dInt2Ele[i] = ele
105
+ end
106
+ nEleNum = lEle.size
107
+ lScore = SSW::Blosum50
108
+ else
109
+ lEle, dEle2Int, dInt2Ele, lScore = SSW.read_matrix(opts[:smatrix])
110
+ end
111
+ elsif !opts[:smatrix]
112
+ # init DNA score matrix
113
+ lEle = %w[A C G T N]
114
+ dRc = { 'A': 'C', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 'C', 'c': 'G', 'g': 'C', 't': 'A' }
115
+ lEle.each_with_index do |ele, i|
116
+ dEle2Int[ele] = i
117
+ dEle2Int[ele.downcase] = i
118
+ dInt2Ele[i] = ele
119
+ end
120
+ # dEle2Int = {'A': 0, 'a': 0, 'C': 1, 'G': 2, 'g': 2, 'c': 1, 'N': 4, 'T': 3, 'n': 4, 't': 3}
121
+ # dInt2Ele = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'}
122
+ nEleNum = lEle.size # 5
123
+ lScore = Array.new(nEleNum**2, 0)
124
+ (nEleNum - 1).times do |i|
125
+ (nEleNum - 1).times do |j|
126
+ lScore[i * nEleNum + j] = if lEle[i] == lEle[j]
127
+ opts[:nmatch]
128
+ else
129
+ -opts[:nmismatch]
130
+ end
131
+ end
132
+ end
133
+ # lScore = [ 2, -2, -2, -2, 0,
134
+ # -2, 2, -2, -2, 0,
135
+ # -2, -2, 2, -2, 0,
136
+ # -2, -2, -2, 2, 0,
137
+ # 0, 0, 0, 0, 0 ]
138
+ end
139
+
140
+ warn 'Reverse complement alignment is not available for protein sequences.' if opts[:bbest] && opts[:bprotein]
141
+
142
+ # set flag
143
+ nFlag = opts[:bpath] ? 2 : 0
144
+
145
+ # print sam head
146
+ if opts[:bsam] && opts[:bheader] && opts[:bpath]
147
+ puts '@HD\tVN:1.4\tSO:queryname'
148
+ Bio::Flatfile.open(opts[:target]) do |f|
149
+ f.each do |entry|
150
+ id = entry.entry_id
151
+ len = entry.nalen
152
+ puts "@SQ\tSN:#{id}\tLN:#{len}"
153
+ end
154
+ end
155
+ elsif opts[:bsam] && !args[:bpath]
156
+ warn 'SAM format output is only available together with option -c.\n'
157
+ opts[:bsam] = false
158
+ end
159
+
160
+ def _to_int(seq, lEle, dEle2Int)
161
+ seq.each_char.map do |ele|
162
+ if dEle2Int.has_key?(ele)
163
+ dEle2Int[ele]
164
+ else
165
+ dEle2Int[lEle[-1]]
166
+ end
167
+ end
168
+ end
169
+
170
+ # iterate query sequenc
171
+ Bio::FlatFile.open(opts[:query]) do |query_file|
172
+ query_file.each do |qentry|
173
+ sQId = qentry.entry_id
174
+ sQSeq = qentry.sequence_string
175
+ sQQual = qentry.quality_string
176
+ # build query profile
177
+ qNum = _to_int(sQSeq, lEle, dEle2Int)
178
+ qProfile = SSW.ssw_init(qNum, sQSeq.size, lScore, lEle.size, 2)
179
+ # build rc query profile
180
+ if opts[:bbest] && !opts[:bprotein]
181
+ sQRcSeq = sQSeq.reverse.each_char.map { |x| dRc[x] }.join
182
+ qRcNum = _to_int(sQRcSeq, lEle, dEle2Int)
183
+ qRcProfile = SSW.ssw_init(qRcNum, sQSeq.size, mat, lEle.size, 2)
184
+ end
185
+ # set mask le
186
+ if sQSeq.size > 30
187
+ nMaskLen = sQSeq.size / 2
188
+ else
189
+ nMasklen = 15
190
+ end
191
+
192
+ # iter target sequence
193
+ Bio::FlatFile.open(opts[:target]) do |target_file|
194
+ target_file.each do |tentry|
195
+ sRId = tentry.entry_id
196
+ sRSeq = tentry.seq.to_s
197
+ rNum = _to_int(sRSeq, lEle, dEle2Int)
198
+ res = SSW.ssw_align(
199
+ qProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
200
+ )
201
+ p res.to_h
202
+ resRc = nil
203
+ if opts[:bbest] && !opts[:bprotein]
204
+ resRc = SSW.align_one(
205
+ qRcProfile, rNum, sRSeq.size, opts[:nopen], opts[:next], nFlag, 0, 0, nMaskLen
206
+ )
207
+ end
208
+ # build cigar and trace back path
209
+ strand = 0
210
+ end
211
+ end
212
+ end
213
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'forwardable'
4
3
  require_relative 'libssw/version'
5
4
 
6
5
  module LibSSW
@@ -25,14 +24,97 @@ module LibSSW
25
24
  File.expand_path("../vendor/#{lib_name}", __dir__)
26
25
  end
27
26
 
28
- autoload :FFI, 'libssw/ffi'
27
+ require_relative 'libssw/ffi'
29
28
 
30
- extend Forwardable
31
- Align = FFI::Align
32
- Profile = FFI::Profile
33
- def_delegators :FFI,
34
- :ssw_init,
35
- :init_destroy,
36
- :align_destroy,
37
- :mark_mismatch
29
+ class Align < FFI::Align
30
+ def cigar
31
+ pt = super
32
+ return [] if cigar_len.zero?
33
+
34
+ pt[0, 4 * cigar_len].unpack('L*')
35
+ end
36
+
37
+ def cigar_len
38
+ cigarLen
39
+ end
40
+
41
+ def to_h
42
+ h = {}
43
+ %i[score1
44
+ score2
45
+ ref_begin1
46
+ ref_end1
47
+ read_begin1
48
+ read_end1
49
+ ref_end2
50
+ cigar
51
+ cigar_len].each do |k|
52
+ h[k] = __send__(k)
53
+ end
54
+ h
55
+ end
56
+ end
57
+
58
+ class Profile < FFI::Profile
59
+ def read
60
+ pt = super
61
+ return [] if read_len.zero?
62
+
63
+ pt[0, read_len].unpack('c*')
64
+ end
65
+
66
+ def mat
67
+ pt = super
68
+ pt[0, n * n].unpack('c*')
69
+ end
70
+
71
+ def read_len
72
+ readLen
73
+ end
74
+
75
+ def to_h
76
+ h = {}
77
+ %i[byte
78
+ word
79
+ read
80
+ mat
81
+ read_len
82
+ n
83
+ bias].each do |k|
84
+ h[k] = __send__(k)
85
+ end
86
+ h
87
+ end
88
+ end
89
+
90
+ class << self
91
+ def ssw_init(read, read_len, mat, n, score_size)
92
+ ptr = FFI.ssw_init(
93
+ read.pack('c*'), read_len, mat.flatten.pack('c*'), n, score_size
94
+ )
95
+ SSW::Profile.new(ptr)
96
+ end
97
+
98
+ def init_destroy(profile)
99
+ FFI.init_destroy(profile)
100
+ end
101
+
102
+ def ssw_align(prof, ref, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len)
103
+ ptr = FFI.ssw_align(
104
+ prof, ref.pack('c*'), ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
105
+ )
106
+ SSW::Align.new(ptr)
107
+ end
108
+
109
+ def align_destroy(align)
110
+ FFI.align_destroy(align)
111
+ end
112
+
113
+ def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
114
+ warn 'implementation: fiexme: **cigar' # FIXME
115
+ FFI.mark_mismatch(
116
+ ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
117
+ )
118
+ end
119
+ end
38
120
  end
@@ -31,7 +31,7 @@ module LibSSW
31
31
  'uint16_t score2',
32
32
  'int32_t ref_begin1',
33
33
  'int32_t ref_end1',
34
- 'int32_t read_begin1',
34
+ 'int32_t read_begin1',
35
35
  'int32_t read_end1',
36
36
  'int32_t ref_end2',
37
37
  'uint32_t* cigar',
@@ -39,8 +39,8 @@ module LibSSW
39
39
  ]
40
40
 
41
41
  Profile = struct [
42
- 'int32_t* byte', # __m128i* profile_byte; // 0: none
43
- 'int32_t* word', # __m128i* profile_word; // 0: none
42
+ '__m128i* byte', # __m128i* profile_byte; // 0: none
43
+ '__m128i* word', # __m128i* profile_word; // 0: none
44
44
  'const int8_t* read',
45
45
  'const int8_t* mat',
46
46
  'int32_t readLen',
@@ -49,7 +49,12 @@ module LibSSW
49
49
  ]
50
50
 
51
51
  # s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size)
52
- try_extern 's_profile* ssw_init (const int8_t* read, int32_t readLen, const int8_t* mat, int32_t n, int8_t score_size)'
52
+ try_extern 's_profile* ssw_init (' \
53
+ 'const int8_t* read,' \
54
+ 'int32_t readLen,' \
55
+ 'const int8_t* mat,' \
56
+ 'int32_t n,' \
57
+ 'int8_t score_size)'
53
58
 
54
59
  try_extern 'void init_destroy (s_profile* p)'
55
60
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LibSSW
4
- VERSION = '0.0.0.pre'
4
+ VERSION = '0.0.1.pre'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libssw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.pre
4
+ version: 0.0.1.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.0.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: bio
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement