bio-rocker 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/ROCker +18 -9
- data/lib/rocker.rb +2 -2
- data/lib/rocker/blasthit.rb +2 -1
- data/lib/rocker/rocdata.rb +17 -12
- data/lib/rocker/step/compile.rb +2 -1
- data/lib/rocker/step/filter.rb +47 -23
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0ecb36031d65a56ae6a7e4144d7102f171d30bebcadfdf5a132622b3a743b901
|
4
|
+
data.tar.gz: 5f8251402d40ddbba6097014e1f93a1bce0245e4cc2b3fac4a3d849c5a009c18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5eac28f2473d7b8357d127741c54aab97403872fe9ef3a2185268b049bb4eb32cc3ea0216f56d6d955de66ea7172962a81a76b90a3b229fbad631365c3eb0a40
|
7
|
+
data.tar.gz: 8c23e1ad05edc88aa364b4f54ca594897cf971a98b3f98332a165d0a268b65985bba84eb020e509b3a5ae414099bcd79878d025ee2cb164f6c25d8e225ace685
|
data/bin/ROCker
CHANGED
@@ -139,14 +139,14 @@ opts = OptionParser.new do |opt|
|
|
139
139
|
*ROCker.default(:searchcmd).keys.map do |k|
|
140
140
|
["By default if --search #{k}:", "'" +
|
141
141
|
ROCker.default(:searchcmd)[k] + "'"]
|
142
|
-
end.
|
142
|
+
end.flatten ){ |v| o[:searchcmd]=v }
|
143
143
|
opt.on("--makedb-cmd STR",
|
144
144
|
"Command calling database formater for similarity search, where",
|
145
145
|
"%1$s: binaries, %2$s: dbtype, %3$s: input, %4$s: database.",
|
146
146
|
*ROCker.default(:makedbcmd).keys.map do |k|
|
147
147
|
["By default if --search #{k}:", "'" +
|
148
148
|
ROCker.default(:makedbcmd)[k] + "'"]
|
149
|
-
end.
|
149
|
+
end.flatten ){ |v| o[:makedbcmd]=v }
|
150
150
|
opt.on("--simulator-cmd STR",
|
151
151
|
"Command calling simulator, where %1$s: binary, %2$s: input,",
|
152
152
|
"%3$s: seq. depth (X), %4$d: read len., %5$s: output file, " +
|
@@ -154,7 +154,7 @@ opts = OptionParser.new do |opt|
|
|
154
154
|
*ROCker.default(:simulatorcmd).keys.map do |k|
|
155
155
|
["By default if --simulator #{k}:", "'" +
|
156
156
|
ROCker.default(:simulatorcmd)[k] + "'"]
|
157
|
-
end.
|
157
|
+
end.flatten ){ |v| o[:simulatorcmd]=v }
|
158
158
|
opt.on("--aligner-cmd STR",
|
159
159
|
"Command calling aligner, where %1$s: binary, %2$s: input,",
|
160
160
|
"%3$s: output, %4$d: threads.",
|
@@ -164,6 +164,9 @@ opts = OptionParser.new do |opt|
|
|
164
164
|
end.reduce([],:+) ){ |v| o[:alignercmd]=v }
|
165
165
|
when "compile"
|
166
166
|
opt.separator "+ COMPILATION ARGUMENTS"
|
167
|
+
opt.on("-l", "--readlen INTEGER",
|
168
|
+
"Average read length used in the compile step (Mandatory)."
|
169
|
+
){ |v| o[:readlen] = v.to_i }
|
167
170
|
opt.on("-a", "--alignment PATH",
|
168
171
|
"Protein alignment of the reference sequences. Required."
|
169
172
|
){ |v| o[:aln]=v }
|
@@ -233,6 +236,9 @@ opts = OptionParser.new do |opt|
|
|
233
236
|
opt.on("-x", "--query-blast PATH",
|
234
237
|
"Unfiltered tabular BLAST. By default, this file is not " +
|
235
238
|
"kept."){ |v| o[:qblast]=v }
|
239
|
+
opt.on('-L', '--length-correction PATH',
|
240
|
+
'Path to the query reads, used to apply read-length correction to ' +
|
241
|
+
'bit scores.'){ |v| o[:lencorr] = v }
|
236
242
|
|
237
243
|
opt.separator ""
|
238
244
|
opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
|
@@ -248,16 +254,16 @@ opts = OptionParser.new do |opt|
|
|
248
254
|
"Command calling similarity search, where %1$s: binaries, %2$s: " +
|
249
255
|
"program, %3$s: input, %4$s: database, %5$s: output, %6$d: threads.",
|
250
256
|
*ROCker.default(:searchcmd).keys.map do |k|
|
251
|
-
"By default if --search #{k}:
|
252
|
-
|
253
|
-
end ){ |v| o[:searchcmd]=v }
|
257
|
+
["By default if --search #{k}:",
|
258
|
+
"'" + ROCker.default(:searchcmd)[k] + "'"]
|
259
|
+
end.flatten ){ |v| o[:searchcmd]=v }
|
254
260
|
opt.on("--makedb-cmd STR",
|
255
261
|
"Command calling database format for similarity search, where %1$s: " +
|
256
262
|
"binaries, %2$s: dbtype, %3$s: input, %4$s: database.",
|
257
263
|
*ROCker.default(:makedbcmd).keys.map do |k|
|
258
|
-
"By default if --search #{k}:
|
259
|
-
|
260
|
-
end ){ |v| o[:makedbcmd]=v }
|
264
|
+
["By default if --search #{k}:",
|
265
|
+
"'" + ROCker.default(:makedbcmd)[k] + "'" ]
|
266
|
+
end.flatten ){ |v| o[:makedbcmd]=v }
|
261
267
|
when "filter"
|
262
268
|
opt.separator "+ FILTERING ARGUMENTS"
|
263
269
|
opt.on("-k", "--rocker PATH",
|
@@ -268,6 +274,9 @@ opts = OptionParser.new do |opt|
|
|
268
274
|
"dataset. Required."){ |v| o[:qblast]=v }
|
269
275
|
opt.on("-o", "--out-blast PATH",
|
270
276
|
"Filtered tabular BLAST to be created. Required."){ |v| o[:oblast]=v }
|
277
|
+
opt.on('-L', '--length-correction PATH',
|
278
|
+
'Path to the query reads, used to apply read-length correction to ' +
|
279
|
+
'bit scores.'){ |v| o[:lencorr] = v }
|
271
280
|
when "plot"
|
272
281
|
opt.separator "+ PLOTTING ARGUMENTS"
|
273
282
|
opt.on("-k", "--rocker PATH",
|
data/lib/rocker.rb
CHANGED
@@ -9,13 +9,13 @@ require "rocker/rocdata"
|
|
9
9
|
|
10
10
|
class ROCker
|
11
11
|
#================================[ Class ]
|
12
|
-
@@VERSION = "1.
|
12
|
+
@@VERSION = "1.3.0"
|
13
13
|
@@CITATION = [
|
14
14
|
"Orellana, Rodriguez-R & Konstantinidis, 2016. DOI:10.1093/nar/gkw900.",
|
15
15
|
"ROCker: accurate detection and quantification of target genes in",
|
16
16
|
"short-read metagenomic data sets by modeling sliding-window bitscores.",
|
17
17
|
"Nucleic Acids Research 45(3):e14."]
|
18
|
-
@@DATE = "
|
18
|
+
@@DATE = "2019-06-02"
|
19
19
|
@@DEFAULTS = {
|
20
20
|
# General
|
21
21
|
q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
|
data/lib/rocker/blasthit.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
#
|
7
7
|
|
8
8
|
class BlastHit
|
9
|
-
attr_reader :sbj, :sfrom, :sto, :bits, :istrue, :isfalse, :midpoint
|
9
|
+
attr_reader :qry, :sbj, :sfrom, :sto, :bits, :istrue, :isfalse, :midpoint
|
10
10
|
# Initialize from BLAST using new(ln,aln),
|
11
11
|
# initialize from TABLE using new(ln)
|
12
12
|
def initialize(ln, aln=nil)
|
@@ -20,6 +20,7 @@ class BlastHit
|
|
20
20
|
@istrue = l[4]=='-1'
|
21
21
|
@midpoint = l[5].to_i
|
22
22
|
else
|
23
|
+
@qry = l[0]
|
23
24
|
s = aln.seq(l[1])
|
24
25
|
return nil if s.nil?
|
25
26
|
@sbj = s.id
|
data/lib/rocker/rocdata.rb
CHANGED
@@ -11,24 +11,30 @@ require 'rocker/alignment'
|
|
11
11
|
require 'tmpdir'
|
12
12
|
|
13
13
|
class ROCData
|
14
|
-
attr_reader :aln, :windows, :r, :refined
|
14
|
+
attr_reader :aln, :windows, :r, :refined, :signatures
|
15
15
|
# Use ROCData.new(table,aln,window) to re-compute from table, use
|
16
16
|
# ROCData.new(data) to load
|
17
|
-
def initialize(val, aln=nil, window=nil)
|
17
|
+
def initialize(val, aln = nil, window = nil)
|
18
18
|
@r = RInterface.new
|
19
19
|
@nucl = false
|
20
20
|
@refined = false
|
21
21
|
if not aln.nil?
|
22
22
|
@aln = aln
|
23
|
+
@signatures = { v: "ROCker #{ROCker.VERSION}", d: Time.now.to_s }
|
23
24
|
self.rrun "library('pROC');"
|
24
25
|
self.rrun "x <- read.table('#{val}', sep='\\t', h=F);"
|
25
26
|
self.init_windows! window
|
26
27
|
else
|
27
28
|
f = File.open(val, "r")
|
28
29
|
@windows = []
|
30
|
+
@signatures = {}
|
29
31
|
while ln = f.gets
|
30
|
-
|
31
|
-
|
32
|
+
break unless /^#:/.match(ln).nil?
|
33
|
+
if ln =~ /^#(\S+) (.*)/
|
34
|
+
@signatures[$1.to_sym] = $2
|
35
|
+
else
|
36
|
+
@windows << ROCWindow.new(self, ln)
|
37
|
+
end
|
32
38
|
end
|
33
39
|
f.close
|
34
40
|
@aln = Alignment.new
|
@@ -126,16 +132,15 @@ class ROCData
|
|
126
132
|
end
|
127
133
|
end
|
128
134
|
def rrun(cmd, type=nil) self.r.run cmd, type end
|
129
|
-
def save(file)
|
130
|
-
|
131
|
-
|
132
|
-
f.close
|
135
|
+
def save(file, sign = {})
|
136
|
+
@signatures.merge! sign
|
137
|
+
File.open(file, 'w') { |fh| fh.print self.to_s }
|
133
138
|
end
|
134
139
|
def to_s
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
140
|
+
o = signatures.map{ |k,v| "##{k} #{v}\n" }.join
|
141
|
+
self.windows.each{ |w| o += w.to_s }
|
142
|
+
o += self.aln.to_s
|
143
|
+
return o
|
139
144
|
end
|
140
145
|
end
|
141
146
|
|
data/lib/rocker/step/compile.rb
CHANGED
@@ -13,6 +13,7 @@ class ROCker
|
|
13
13
|
def compile!
|
14
14
|
raise "-a/--alignment is mandatory." if @o[:aln].nil?
|
15
15
|
raise "-a/--alignment must exist." unless File.exist? @o[:aln]
|
16
|
+
raise "-l/--readlen is mandatory." if @o[:readlen].nil?
|
16
17
|
if @o[:table].nil?
|
17
18
|
raise "-T/--table is mandatory unless -b is provided." if
|
18
19
|
@o[:blast].nil? or not File.exist? @o[:blast]
|
@@ -52,7 +53,7 @@ class ROCker
|
|
52
53
|
data.refine! @o[:table]
|
53
54
|
end
|
54
55
|
puts " * saving ROCker file: #{@o[:rocker]}." unless @o[:q]
|
55
|
-
data.save
|
56
|
+
data.save(@o[:rocker], l: @o[:readlen])
|
56
57
|
end # compile!
|
57
58
|
end # ROCker
|
58
59
|
|
data/lib/rocker/step/filter.rb
CHANGED
@@ -6,31 +6,55 @@
|
|
6
6
|
#
|
7
7
|
|
8
8
|
class ROCker
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
9
|
+
#================================[ Class ]
|
10
|
+
#@@DEFAULTS.merge!({ })
|
11
|
+
|
12
|
+
#================================[ Filter ]
|
13
|
+
def filter!(data=nil)
|
14
|
+
raise "-k/--rocker is mandatory." if @o[:rocker].nil?
|
15
|
+
raise "-x/--query-blast is mandatory." if @o[:qblast].nil?
|
16
|
+
raise "-o/--out-blast is mandatory." if @o[:oblast].nil?
|
17
|
+
|
18
|
+
# Read ROCker file
|
19
|
+
if data.nil?
|
20
|
+
puts "Loading ROCker file: #{@o[:rocker]}." unless @o[:q]
|
21
|
+
data = ROCData.new @o[:rocker]
|
22
|
+
end
|
23
|
+
corr = {}
|
24
|
+
readlen = 0
|
25
|
+
unless @o[:lencorr].nil?
|
26
|
+
raise "Unsigned length in model, please re-compile model to use -L" if
|
27
|
+
data.signatures[:l].nil?
|
28
|
+
readlen = data.signatures[:l].to_i
|
29
|
+
File.open(@o[:lencorr], 'r') do |fh|
|
30
|
+
k = nil
|
31
|
+
fh.each_line do |ln|
|
32
|
+
if ln =~ /^>(\S+)/
|
33
|
+
k = $1
|
34
|
+
corr[k] = 0
|
35
|
+
else
|
36
|
+
corr[k] += ln.chomp.size
|
37
|
+
end
|
38
|
+
end
|
22
39
|
end
|
40
|
+
end
|
23
41
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
42
|
+
# Filter similarity search
|
43
|
+
puts "Filtering similarity search: #{@o[:qblast]}." unless @o[:q]
|
44
|
+
oh = File.open(@o[:oblast], 'w')
|
45
|
+
File.open(@o[:qblast], 'r') do |ih|
|
46
|
+
ih.each_line do |ln|
|
47
|
+
bh = BlastHit.new(ln, data.aln)
|
48
|
+
bs = bh.bits
|
49
|
+
unless @o[:lencorr].nil?
|
50
|
+
corrlen = [corr[bh.qry].to_i, 0.6 * readlen].max
|
51
|
+
bs = bs * readlen / corrlen if corrlen < readlen
|
52
|
+
end
|
53
|
+
oh.print ln if
|
54
|
+
not(bh.sfrom.nil?) and bs >= data.win_at_col(bh.midpoint).thr
|
31
55
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
56
|
+
end
|
57
|
+
oh.close
|
58
|
+
end # filter!
|
35
59
|
end # ROCker
|
36
60
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-06-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
@@ -81,8 +81,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
|
-
|
85
|
-
rubygems_version: 2.6.13
|
84
|
+
rubygems_version: 3.0.3
|
86
85
|
signing_key:
|
87
86
|
specification_version: 4
|
88
87
|
summary: ROCker
|