bio-rocker 1.0.0 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ROCker +276 -96
- data/lib/rocker.rb +25 -14
- data/lib/rocker/blasthit.rb +8 -5
- data/lib/rocker/genome-set.rb +70 -0
- data/lib/rocker/protein-set.rb +90 -0
- data/lib/rocker/rocdata.rb +26 -9
- data/lib/rocker/rocwindow.rb +20 -18
- data/lib/rocker/step/build.rb +233 -200
- data/lib/rocker/step/compile.rb +11 -6
- data/lib/rocker/step/filter.rb +11 -7
- data/lib/rocker/step/plot.rb +80 -26
- data/lib/rocker/step/search.rb +27 -4
- metadata +16 -14
data/lib/rocker/step/compile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update Sep-07-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
class ROCker
|
@@ -14,17 +14,21 @@ class ROCker
|
|
14
14
|
raise "-a/--alignment is mandatory." if @o[:aln].nil?
|
15
15
|
raise "-a/--alignment must exist." unless File.exist? @o[:aln]
|
16
16
|
if @o[:table].nil?
|
17
|
-
raise "-
|
17
|
+
raise "-T/--table is mandatory unless -b is provided." if
|
18
|
+
@o[:blast].nil? or not File.exist? @o[:blast]
|
18
19
|
@o[:table] = "#{@o[:blast]}.table"
|
19
20
|
else
|
20
21
|
@o[:reuse] = true
|
21
22
|
end
|
22
|
-
raise "-b/--blast is mandatory unless -t exists." if
|
23
|
+
raise "-b/--blast is mandatory unless -t exists." if
|
24
|
+
@o[:blast].nil? and not File.exist? @o[:table]
|
23
25
|
raise "-k/--rocker is mandatory." if @o[:rocker].nil?
|
24
26
|
|
25
27
|
puts "Testing environment." unless @o[:q]
|
26
|
-
bash
|
27
|
-
|
28
|
+
bash("echo '' | #{@o[:r]} --vanilla",
|
29
|
+
"-r/--path-to-r must be executable. Is R installed?")
|
30
|
+
bash("echo \"library('pROC')\" | #{@o[:r]} --vanilla",
|
31
|
+
"Please install the 'pROC' library for R first.")
|
28
32
|
|
29
33
|
puts "Reading files." unless @o[:q]
|
30
34
|
puts " * loading alignment: #{@o[:aln]}." unless @o[:q]
|
@@ -44,7 +48,8 @@ class ROCker
|
|
44
48
|
data.nucl = @o[:nucl]
|
45
49
|
if @o[:refine]
|
46
50
|
puts " * refining windows." unless @o[:q]
|
47
|
-
warn "Insufficient hits to refine results." unless
|
51
|
+
warn "Insufficient hits to refine results." unless
|
52
|
+
data.refine! @o[:table]
|
48
53
|
end
|
49
54
|
puts " * saving ROCker file: #{@o[:rocker]}." unless @o[:q]
|
50
55
|
data.save @o[:rocker]
|
data/lib/rocker/step/filter.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update Jun-
|
5
|
+
# @update Jun-08-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
class ROCker
|
@@ -10,17 +10,21 @@ class ROCker
|
|
10
10
|
#@@DEFAULTS.merge!({ })
|
11
11
|
|
12
12
|
#================================[ Filter ]
|
13
|
-
def filter!
|
13
|
+
def filter!(data=nil)
|
14
14
|
raise "-k/--rocker is mandatory." if @o[:rocker].nil?
|
15
15
|
raise "-x/--query-blast is mandatory." if @o[:qblast].nil?
|
16
16
|
raise "-o/--out-blast is mandatory." if @o[:oblast].nil?
|
17
17
|
|
18
|
-
|
19
|
-
data
|
18
|
+
# Read ROCker file
|
19
|
+
if data.nil?
|
20
|
+
puts "Loading ROCker file: #{@o[:rocker]}." unless @o[:q]
|
21
|
+
data = ROCData.new @o[:rocker]
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
+
# Filter similarity search
|
25
|
+
puts "Filtering similarity search: #{@o[:qblast]}." unless @o[:q]
|
26
|
+
ih = File.open(@o[:qblast], "r")
|
27
|
+
oh = File.open(@o[:oblast], "w")
|
24
28
|
while ln = ih.gets
|
25
29
|
bh = BlastHit.new(ln, data.aln)
|
26
30
|
oh.print ln if not(bh.sfrom.nil?) and bh.bits >= data.win_at_col(bh.midpoint).thr
|
data/lib/rocker/step/plot.rb
CHANGED
@@ -2,25 +2,32 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update Sep-09-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
class ROCker
|
9
9
|
#================================[ Class ]
|
10
|
-
@@DEFAULTS.merge!({
|
10
|
+
@@DEFAULTS.merge!({
|
11
|
+
color:false, gformat:"pdf", width:9, height:9, impact:false,
|
12
|
+
transparency:true, sbj:[], tag_negatives:false
|
13
|
+
})
|
11
14
|
|
12
15
|
#================================[ Search ]
|
13
16
|
def plot!
|
14
17
|
raise "-k/--rocker is mandatory." if o[:rocker].nil?
|
15
18
|
if @o[:table].nil?
|
16
|
-
raise "-t/--table is mandatory unless -b is provided." if
|
19
|
+
raise "-t/--table is mandatory unless -b is provided." if
|
20
|
+
@o[:blast].nil?
|
17
21
|
@o[:table] = "#{@o[:blast]}.table"
|
18
22
|
end
|
19
|
-
raise "-b/--blast is mandatory unless -t exists." if
|
23
|
+
raise "-b/--blast is mandatory unless -t exists." if
|
24
|
+
@o[:blast].nil? and not File.exist? @o[:table]
|
20
25
|
|
21
26
|
puts "Testing environment." unless @o[:q]
|
22
|
-
bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be
|
27
|
+
bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be " +
|
28
|
+
"executable. Is R installed?"
|
23
29
|
|
30
|
+
# Source files
|
24
31
|
puts "Reading files." unless @o[:q]
|
25
32
|
puts " * loding ROCker file: #{@o[:rocker]}." unless @o[:q]
|
26
33
|
data = ROCData.new @o[:rocker]
|
@@ -31,40 +38,71 @@ class ROCker
|
|
31
38
|
blast2table(@o[:blast], @o[:table], data.aln, @o[:minscore])
|
32
39
|
end
|
33
40
|
|
41
|
+
# Matches (middle panel)
|
34
42
|
puts "Plotting matches." unless @o[:q]
|
35
|
-
extra = @o[:gformat]==
|
43
|
+
extra = @o[:gformat]=="pdf" ? "" : ", units='in', res=300"
|
36
44
|
@o[:gout] ||= "#{@o[:rocker]}.#{@o[:gformat]}"
|
37
|
-
data.rrun "#{@o[:gformat]}('#{@o[:gout]}', #{@o[:width]},
|
45
|
+
data.rrun "#{@o[:gformat]}('#{@o[:gout]}', #{@o[:width]}, " +
|
46
|
+
"#{@o[:height]}#{extra});"
|
38
47
|
data.rrun "layout(c(2,1,3), heights=c(2-1/#{data.aln.size},3,1));"
|
39
48
|
some_thr = data.load_table! @o[:table], @o[:sbj], @o[:minscore]
|
40
49
|
data.rrun "par(mar=c(0,4,0,0.5)+.1);"
|
41
|
-
data.rrun "plot(1, t='n', xlim=c(0.5,#{data.aln.cols}+0.5),
|
50
|
+
data.rrun "plot(1, t='n', xlim=c(0.5,#{data.aln.cols}+0.5), " +
|
51
|
+
"ylim=range(x$V4)+c(-0.04,0.04)*diff(range(x$V4)), xlab='', " +
|
52
|
+
"ylab='Bit score', xaxs='i', xaxt='n');"
|
42
53
|
data.rrun "noise <- runif(ncol(x),-.2,.2)"
|
43
|
-
data.rrun "
|
44
|
-
|
45
|
-
|
54
|
+
data.rrun "hit.col <- ifelse(x$V5==1, " +
|
55
|
+
"rgb(0,0,.5,#{@o[:transparency] ? ".2" : "1"}), " +
|
56
|
+
"rgb(.5,0,0,#{@o[:transparency] ? ".2" : "1"}))"
|
57
|
+
data.rrun "hit.col[ x$V5==-1 ] <- " +
|
58
|
+
"rgb(0.722,0.722,0,#{@o[:transparency] ? ".2" : "1"})" if
|
59
|
+
@o[:tag_negatives]
|
60
|
+
data.rrun "arrows(x0=x$V2, x1=x$V3, y0=x$V4+noise, lty=1, col=hit.col, " +
|
61
|
+
"length=0);"
|
62
|
+
data.rrun "points(x$V6, x$V4+noise, col=hit.col, pch=19, cex=1/4);"
|
63
|
+
|
64
|
+
# Windows (middle panel)
|
46
65
|
puts "Plotting windows." unless @o[:q]
|
47
66
|
if some_thr
|
48
67
|
data.rrun "arrows(x0=w$V1, x1=w$V2, y0=w$V5, lwd=2, length=0)"
|
49
|
-
data.rrun "arrows(x0=w$V2[-nrow(w)], x1=w$V1[-1],
|
68
|
+
data.rrun "arrows(x0=w$V2[-nrow(w)], x1=w$V1[-1], " +
|
69
|
+
"y0=w$V5[-nrow(w)], y1=w$V5[-1], lwd=2, length=0)"
|
50
70
|
end
|
51
|
-
data.rrun "legend('bottomright',legend=c('Match span',
|
52
|
-
"
|
71
|
+
data.rrun "legend('bottomright', legend=c('Match span'," +
|
72
|
+
"'Match mid-point','Reference (+)'," +
|
73
|
+
"#{"'Reference (-)'," if @o[:tag_negatives]}'Non-reference'), " +
|
74
|
+
"lwd=c(1,NA,1,1,1), pch=c(NA,19,19,19,19), ncol=5, bty='n', " +
|
75
|
+
"col=c('black','black','darkblue'," +
|
76
|
+
"#{"rgb(.722,.722,0)," if @o[:tag_negatives]}'darkred'))"
|
53
77
|
|
78
|
+
# Alignment (top panel)
|
54
79
|
puts "Plotting alignment." unless @o[:q]
|
55
80
|
data.rrun "par(mar=c(0,4,0.5,0.5)+0.1);"
|
56
|
-
data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}),
|
81
|
+
data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}), " +
|
82
|
+
"ylim=c(1,#{data.aln.seqs.size}), xlab='', ylab='Alignment', " +
|
83
|
+
"xaxs='i', xaxt='n', yaxs='i', yaxt='n', bty='n');"
|
57
84
|
i = 0
|
58
85
|
data.rrun "clr <- rainbow(26, v=1/2, s=3/4);" if @o[:color]
|
59
86
|
data.aln.seqs.values.each do |s|
|
60
|
-
color = s.aln.split(//).map
|
61
|
-
|
87
|
+
color = (s.aln.split(//).map do |c|
|
88
|
+
c=="-" ? "'grey80'" :
|
89
|
+
(@o[:sbj].include?(s.id) ? "'red'" :
|
90
|
+
(@o[:color] ? "clr[#{c.ord-64}]" :
|
91
|
+
"'black'"))
|
92
|
+
end.join(","))
|
93
|
+
data.rrun "rect((1:#{data.aln.cols-1})-0.5, " +
|
94
|
+
"rep(#{i}, #{data.aln.cols-1}), (1:#{data.aln.cols-1})+0.5, " +
|
95
|
+
"rep(#{i+1}, #{data.aln.cols-1}), col=c(#{color}), border=NA);"
|
62
96
|
i += 1
|
63
97
|
end
|
64
98
|
|
99
|
+
# Statistics (bottom panel)
|
65
100
|
puts "Plotting statistics." unless @o[:q]
|
66
101
|
data.rrun "par(mar=c(5,4,0,0.5)+.1);"
|
67
|
-
data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}),
|
102
|
+
data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}), " +
|
103
|
+
"ylim=c(#{@o[:ylim].nil? ? (@o[:impact] ? "-2,.1" : "50,100") :
|
104
|
+
@o[:ylim]}), xlab='Alignment position (amino acids)', " +
|
105
|
+
"ylab='Precision',xaxs='i');"
|
68
106
|
if some_thr
|
69
107
|
sn = data.rrun "100*sum(w$tp)/(sum(w$tp)+sum(w$fn))", :float
|
70
108
|
sp = data.rrun "100*sum(w$tn)/(sum(w$fp)+sum(w$tn))", :float
|
@@ -76,17 +114,33 @@ class ROCker
|
|
76
114
|
end
|
77
115
|
data.rrun "pos <- (w$V1+w$V2)/2"
|
78
116
|
if @o[:impact]
|
79
|
-
data.rrun "lines(pos[!is.na(w$specificity)],
|
80
|
-
|
81
|
-
|
117
|
+
data.rrun "lines(pos[!is.na(w$specificity)], " +
|
118
|
+
"(w$specificity[!is.na(w$specificity)]-#{sp})*" +
|
119
|
+
"w$tp[!is.na(w$specificity)]/sum(w$tp), " +
|
120
|
+
"col='darkred', lwd=2, t='o', cex=1/3, pch=19);"
|
121
|
+
data.rrun "lines(pos[!is.na(w$sensitivity)], " +
|
122
|
+
"(w$sensitivity[!is.na(w$sensitivity)]-#{sn})*" +
|
123
|
+
"w$tn[!is.na(w$sensitivity)]/sum(w$tn), " +
|
124
|
+
"col='darkgreen', lwd=2, t='o', cex=1/3, pch=19);"
|
125
|
+
data.rrun "lines(pos[!is.na(w$accuracy)], " +
|
126
|
+
"(w$accuracy[!is.na(w$accuracy)]-#{ac})*" +
|
127
|
+
"(w$tp+w$tn)[!is.na(w$accuracy)]/sum(c(w$tp, w$tn)), " +
|
128
|
+
"col='darkblue', lwd=2, t='o', cex=1/3, pch=19);"
|
82
129
|
else
|
83
|
-
data.rrun "lines(pos[!is.na(w$specificity)],
|
84
|
-
|
85
|
-
|
130
|
+
data.rrun "lines(pos[!is.na(w$specificity)], " +
|
131
|
+
"w$specificity[!is.na(w$specificity)], col='darkred', " +
|
132
|
+
"lwd=2, t='o', cex=1/3, pch=19);"
|
133
|
+
data.rrun "lines(pos[!is.na(w$sensitivity)], " +
|
134
|
+
"w$sensitivity[!is.na(w$sensitivity)], col='darkgreen', " +
|
135
|
+
"lwd=2, t='o', cex=1/3, pch=19);"
|
136
|
+
data.rrun "lines(pos[!is.na(w$accuracy)], " +
|
137
|
+
"w$accuracy[!is.na(w$accuracy)], col='darkblue', lwd=2, " +
|
138
|
+
"t='o', cex=1/3, pch=19);"
|
86
139
|
end
|
87
|
-
#data.rrun "lines(pos[!is.na(w$precision)], w$precision[!is.na(w$precision)], col='purple', lwd=2, t='o', cex=1/3, pch=19);"
|
88
140
|
end
|
89
|
-
data.rrun "legend('bottomright',
|
141
|
+
data.rrun "legend('bottomright', " +
|
142
|
+
"legend=c('Specificity','Sensitivity','Accuracy'), lwd=2, " +
|
143
|
+
"col=c('darkred','darkgreen','darkblue'), ncol=3, bty='n')"
|
90
144
|
data.rrun "dev.off();"
|
91
145
|
end # plot!
|
92
146
|
end # ROCker
|
data/lib/rocker/step/search.rb
CHANGED
@@ -2,9 +2,11 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update Jun-
|
5
|
+
# @update Jun-08-2015
|
6
6
|
#
|
7
7
|
|
8
|
+
require 'tmpdir'
|
9
|
+
|
8
10
|
class ROCker
|
9
11
|
#================================[ Class ]
|
10
12
|
#@@DEFAULTS.merge!({ })
|
@@ -12,9 +14,30 @@ class ROCker
|
|
12
14
|
#================================[ Search ]
|
13
15
|
def search!
|
14
16
|
raise "-k/--rocker is mandatory." if @o[:rocker].nil?
|
15
|
-
raise "
|
16
|
-
|
17
|
-
#
|
17
|
+
raise "-i/--query is mandatory." if @o[:query].nil?
|
18
|
+
|
19
|
+
# Check requirements
|
20
|
+
puts "Testing environment." unless @o[:q]
|
21
|
+
@o[:searchcmd] = @o[:searchcmd][@o[:search]] if @o[:searchcmd].is_a? Hash
|
22
|
+
@o[:makedbcmd] = @o[:makedbcmd][@o[:search]] if @o[:makedbcmd].is_a? Hash
|
23
|
+
self.bash "#{@o[:searchbins]}makeblastdb -version", "--search-bins must contain executables. Is BLAST+ installed?" if @o[:search]==:blast
|
24
|
+
self.bash "#{@o[:searchbins]}diamond --help", "--search-bins must contain executables. Is DIAMOND installed?" if @o[:search]==:diamond
|
25
|
+
|
26
|
+
# Run similarity search
|
27
|
+
Dir.mktmpdir do |dir|
|
28
|
+
@o[:qblast] ||= "#{dir}/blast"
|
29
|
+
puts "Loading ROCker file: #{@o[:rocker]}." unless @o[:q]
|
30
|
+
data = ROCData.new @o[:rocker]
|
31
|
+
puts "Running similarity search." unless @o[:q]
|
32
|
+
puts " * preparing database." unless @o[:q]
|
33
|
+
ofh = File.new("#{dir}/ref.fasta", "w")
|
34
|
+
ofh.print data.aln.to_seq_s
|
35
|
+
ofh.close
|
36
|
+
bash sprintf(@o[:makedbcmd], @o[:searchbins], 'prot', "#{dir}/ref.fasta", "#{dir}/ref")
|
37
|
+
puts " * running similarity search." unless @o[:q]
|
38
|
+
bash sprintf(@o[:searchcmd], @o[:searchbins], 'blastx', @o[:query], "#{dir}/ref", @o[:qblast], @o[:thr])
|
39
|
+
self.filter! data
|
40
|
+
end
|
18
41
|
end # search!
|
19
42
|
end # ROCker
|
20
43
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,34 +9,34 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-07-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: 1.7.3
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 1.7.3
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: json
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: 1.8.1
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: 1.8.1
|
42
42
|
description: Detecting and quantifying functional genes in short-read metagenomic
|
@@ -47,19 +47,21 @@ executables:
|
|
47
47
|
extensions: []
|
48
48
|
extra_rdoc_files: []
|
49
49
|
files:
|
50
|
+
- bin/ROCker
|
50
51
|
- lib/rocker.rb
|
51
|
-
- lib/rocker/sequence.rb
|
52
52
|
- lib/rocker/alignment.rb
|
53
53
|
- lib/rocker/blasthit.rb
|
54
|
-
- lib/rocker/
|
55
|
-
- lib/rocker/
|
54
|
+
- lib/rocker/genome-set.rb
|
55
|
+
- lib/rocker/protein-set.rb
|
56
56
|
- lib/rocker/rinterface.rb
|
57
|
+
- lib/rocker/rocdata.rb
|
58
|
+
- lib/rocker/rocwindow.rb
|
59
|
+
- lib/rocker/sequence.rb
|
57
60
|
- lib/rocker/step/build.rb
|
58
61
|
- lib/rocker/step/compile.rb
|
59
|
-
- lib/rocker/step/search.rb
|
60
62
|
- lib/rocker/step/filter.rb
|
61
63
|
- lib/rocker/step/plot.rb
|
62
|
-
-
|
64
|
+
- lib/rocker/step/search.rb
|
63
65
|
homepage: http://enve-omics.ce.gatech.edu/rocker
|
64
66
|
licenses:
|
65
67
|
- artistic 2.0
|
@@ -70,17 +72,17 @@ require_paths:
|
|
70
72
|
- lib
|
71
73
|
required_ruby_version: !ruby/object:Gem::Requirement
|
72
74
|
requirements:
|
73
|
-
- -
|
75
|
+
- - ">="
|
74
76
|
- !ruby/object:Gem::Version
|
75
77
|
version: '2.0'
|
76
78
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
79
|
requirements:
|
78
|
-
- -
|
80
|
+
- - ">="
|
79
81
|
- !ruby/object:Gem::Version
|
80
82
|
version: '0'
|
81
83
|
requirements: []
|
82
84
|
rubyforge_project:
|
83
|
-
rubygems_version: 2.
|
85
|
+
rubygems_version: 2.4.5.1
|
84
86
|
signing_key:
|
85
87
|
specification_version: 4
|
86
88
|
summary: ROCker
|