genevalidator 1.6.12 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +30 -1
- data/.ruby-version +1 -0
- data/.travis.yml +13 -12
- data/Gemfile +4 -1
- data/Gemfile.lock +135 -0
- data/README.md +104 -122
- data/Rakefile +377 -5
- data/aux/gv_results.slim +155 -0
- data/aux/html_files/css/gv.compiled.min.css +8 -0
- data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
- data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
- data/aux/{files → html_files}/css/src/style.css +0 -0
- data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
- data/aux/{files → html_files}/img/gene.png +0 -0
- data/aux/html_files/js/gv.compiled.min.js +1 -0
- data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
- data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
- data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
- data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
- data/aux/{files → html_files}/js/src/plots.js +1 -1
- data/aux/{files → html_files}/js/src/script.js +0 -0
- data/aux/{files → html_files}/json/.gitkeep +0 -0
- data/bin/genevalidator +393 -56
- data/exemplar_data/README.md +60 -0
- data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
- data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
- data/genevalidator.gemspec +35 -20
- data/install.sh +92 -0
- data/lib/genevalidator.rb +171 -56
- data/lib/genevalidator/arg_validation.rb +26 -55
- data/lib/genevalidator/blast.rb +44 -99
- data/lib/genevalidator/clusterization.rb +18 -22
- data/lib/genevalidator/exceptions.rb +17 -17
- data/lib/genevalidator/ext/array.rb +21 -4
- data/lib/genevalidator/get_raw_sequences.rb +32 -31
- data/lib/genevalidator/hsp.rb +31 -2
- data/lib/genevalidator/json_to_gv_results.rb +38 -122
- data/lib/genevalidator/output.rb +158 -172
- data/lib/genevalidator/output_files.rb +134 -0
- data/lib/genevalidator/pool.rb +2 -5
- data/lib/genevalidator/query.rb +1 -1
- data/lib/genevalidator/tabular_parser.rb +8 -29
- data/lib/genevalidator/validation.rb +48 -90
- data/lib/genevalidator/validation_alignment.rb +64 -75
- data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
- data/lib/genevalidator/validation_duplication.rb +85 -84
- data/lib/genevalidator/validation_gene_merge.rb +46 -35
- data/lib/genevalidator/validation_length_cluster.rb +18 -15
- data/lib/genevalidator/validation_length_rank.rb +19 -15
- data/lib/genevalidator/validation_maker_qi.rb +13 -12
- data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
- data/lib/genevalidator/validation_report.rb +1 -1
- data/lib/genevalidator/validation_test.rb +1 -1
- data/lib/genevalidator/version.rb +1 -1
- data/test/overall.rb +1 -1
- data/test/test_all_validations.rb +36 -24
- data/test/test_blast.rb +39 -24
- data/test/test_clusterization_2d.rb +4 -4
- data/test/test_helper.rb +2 -2
- data/test/test_query.rb +16 -20
- data/test/test_validation_open_reading_frame.rb +122 -122
- data/test/test_validations.rb +12 -10
- metadata +94 -79
- data/aux/files/css/genevalidator.compiled.min.css +0 -16
- data/aux/files/js/genevalidator.compiled.min.js +0 -28
- data/aux/json_footer.erb +0 -8
- data/aux/json_header.erb +0 -19
- data/aux/json_query.erb +0 -15
- data/aux/template_footer.erb +0 -8
- data/aux/template_header.erb +0 -19
- data/aux/template_query.erb +0 -14
- data/data/README.md +0 -57
- data/data/mrna_data.fasta.blast_tabular +0 -3567
- data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
- data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
- data/data/mrna_data.fasta.blast_xml +0 -39800
- data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
- data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
- data/data/mrna_data.fasta.json +0 -1
- data/data/protein_data.fasta.blast_tabular +0 -3278
- data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
- data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
- data/data/protein_data.fasta.blast_xml +0 -26228
- data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
- data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
- data/data/protein_data.fasta.json +0 -1
data/lib/genevalidator/output.rb
CHANGED
|
@@ -1,15 +1,10 @@
|
|
|
1
|
-
require 'erb'
|
|
2
|
-
require 'fileutils'
|
|
3
1
|
require 'forwardable'
|
|
4
2
|
require 'json'
|
|
5
3
|
|
|
6
|
-
require 'genevalidator/version'
|
|
7
|
-
|
|
8
4
|
module GeneValidator
|
|
9
5
|
class Output
|
|
10
6
|
extend Forwardable
|
|
11
|
-
def_delegators GeneValidator, :opt, :config, :
|
|
12
|
-
:mutex_json
|
|
7
|
+
def_delegators GeneValidator, :opt, :config, :dirs, :mutex
|
|
13
8
|
attr_accessor :prediction_def
|
|
14
9
|
attr_accessor :nr_hits
|
|
15
10
|
|
|
@@ -27,9 +22,12 @@ module GeneValidator
|
|
|
27
22
|
# Params:
|
|
28
23
|
# +current_idx+: index of the current query
|
|
29
24
|
def initialize(current_idx, no_of_hits, definition)
|
|
30
|
-
@opt
|
|
31
|
-
@
|
|
25
|
+
@opt = opt
|
|
26
|
+
@dirs = dirs
|
|
27
|
+
@config = config
|
|
32
28
|
@config[:run_no] += 1
|
|
29
|
+
output_dir = @dirs[:output_dir]
|
|
30
|
+
@output_filename = File.join(output_dir, "#{@dirs[:filename]}_results")
|
|
33
31
|
|
|
34
32
|
@prediction_def = definition
|
|
35
33
|
@nr_hits = no_of_hits
|
|
@@ -37,62 +35,38 @@ module GeneValidator
|
|
|
37
35
|
end
|
|
38
36
|
|
|
39
37
|
def print_output_console
|
|
38
|
+
return unless @opt[:output_formats].include? 'stdout'
|
|
39
|
+
c_fmt = "%3s\t%5s\t%20s\t%7s\t"
|
|
40
40
|
mutex.synchronize do
|
|
41
|
-
print_console_header
|
|
42
|
-
short_def = @prediction_def.
|
|
43
|
-
print format(
|
|
44
|
-
@nr_hits)
|
|
41
|
+
print_console_header(c_fmt)
|
|
42
|
+
short_def = @prediction_def.split(' ')[0]
|
|
43
|
+
print format(c_fmt, @idx, @overall_score, short_def, @nr_hits)
|
|
45
44
|
puts validations.map(&:print).join("\t").gsub(' ', ' ')
|
|
46
45
|
end
|
|
47
46
|
end
|
|
48
47
|
|
|
49
|
-
def
|
|
50
|
-
@
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
mutex_html.synchronize do
|
|
58
|
-
output_html = output_filename
|
|
59
|
-
query_erb = File.join(@config[:aux], 'template_query.erb')
|
|
60
|
-
template_file = File.open(query_erb, 'r').read
|
|
61
|
-
erb = ERB.new(template_file, 0, '>')
|
|
62
|
-
File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def output_filename
|
|
67
|
-
idx = (@config[:run_no].to_f / @config[:output_max]).ceil
|
|
68
|
-
output_html = File.join(@config[:html_path], "results#{idx}.html")
|
|
69
|
-
write_html_header(output_html)
|
|
70
|
-
output_html
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
def write_html_header(output_html)
|
|
74
|
-
head_erb = File.join(@config[:aux], 'template_header.erb')
|
|
75
|
-
set_up_html(head_erb, output_html) unless File.exist?(output_html)
|
|
48
|
+
def generate_json
|
|
49
|
+
fname = File.join(@dirs[:json_dir], "#{@dirs[:filename]}_#{@idx}.json")
|
|
50
|
+
row_data = { idx: @idx, overall_score: @overall_score,
|
|
51
|
+
definition: @prediction_def, no_hits: @nr_hits }
|
|
52
|
+
row = create_validation_hash(row_data)
|
|
53
|
+
arr_idx = @idx - 1
|
|
54
|
+
@config[:json_output][arr_idx] = row
|
|
55
|
+
File.open(fname, 'w') { |f| f.write(row.to_json) }
|
|
76
56
|
end
|
|
77
57
|
|
|
78
|
-
|
|
79
|
-
return if File.exist?(output_file)
|
|
80
|
-
template_contents = File.open(erb_file, 'r').read
|
|
81
|
-
erb = ERB.new(template_contents, 0, '>')
|
|
82
|
-
File.open(output_file, 'w+') { |f| f.write(erb.result(binding)) }
|
|
83
|
-
end
|
|
58
|
+
private
|
|
84
59
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
end
|
|
60
|
+
def print_console_header(c_fmt)
|
|
61
|
+
return if @config[:console_header_printed]
|
|
62
|
+
@config[:console_header_printed] = true
|
|
63
|
+
warn '==> Validating input sequences'
|
|
64
|
+
warn '' # blank line
|
|
65
|
+
print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
|
|
66
|
+
puts validations.map(&:short_header).join("\t")
|
|
93
67
|
end
|
|
94
68
|
|
|
95
|
-
def
|
|
69
|
+
def create_validation_hash(row)
|
|
96
70
|
row[:validations] = {}
|
|
97
71
|
@validations.each do |item|
|
|
98
72
|
val = add_basic_validation_info(item)
|
|
@@ -106,7 +80,8 @@ module GeneValidator
|
|
|
106
80
|
|
|
107
81
|
def add_basic_validation_info(item)
|
|
108
82
|
{ header: item.header, description: item.description, status: item.color,
|
|
109
|
-
print: item.print.gsub(' ', ' ')
|
|
83
|
+
print: item.print.gsub(' ', ' '), run_time: item.run_time,
|
|
84
|
+
validation: item.validation }
|
|
110
85
|
end
|
|
111
86
|
|
|
112
87
|
def add_explanation_data(item)
|
|
@@ -124,138 +99,149 @@ module GeneValidator
|
|
|
124
99
|
graphs
|
|
125
100
|
end
|
|
126
101
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
File.open(json_file, 'w') { |f| f.write(array.to_json) }
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
##
|
|
138
|
-
# Method that closes the gas in the html file and writes the overall
|
|
139
|
-
# evaluation
|
|
140
|
-
# Param:
|
|
141
|
-
# +all_query_outputs+: array with +ValidationTest+ objects
|
|
142
|
-
# +html_path+: path of the html folder
|
|
143
|
-
# +filemane+: name of the fasta input file
|
|
144
|
-
def self.print_footer(overview, config)
|
|
145
|
-
set_overall_evaluation(overview, config)
|
|
146
|
-
|
|
147
|
-
footer_erb = File.join(config[:aux], 'template_footer.erb')
|
|
148
|
-
|
|
149
|
-
no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil
|
|
150
|
-
template_file = File.open(footer_erb, 'r').read
|
|
151
|
-
erb = ERB.new(template_file, 0, '>')
|
|
152
|
-
|
|
153
|
-
output_files = []
|
|
154
|
-
(1..no_of_results_files).each { |i| output_files << "results#{i}.html" }
|
|
155
|
-
|
|
156
|
-
(1..no_of_results_files).each do |i|
|
|
157
|
-
results_html = File.join(config[:html_path], "results#{i}.html")
|
|
158
|
-
File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
|
|
102
|
+
class <<self
|
|
103
|
+
def print_console_footer(overall_evaluation, opt)
|
|
104
|
+
return unless (opt[:output_formats].include? 'stdout') ||
|
|
105
|
+
opt[:hide_summary]
|
|
106
|
+
warn ''
|
|
107
|
+
warn "==> #{overall_evaluation.join("\n")}"
|
|
108
|
+
warn ''
|
|
159
109
|
end
|
|
160
110
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
def self.turn_off_sorting(html_path)
|
|
176
|
-
script_file = File.join(html_path,
|
|
177
|
-
'files/js/genevalidator.compiled.min.js')
|
|
178
|
-
content = File.read(script_file).gsub(',initTableSorter(),', ',')
|
|
179
|
-
File.open("#{script_file}.tmp", 'w') { |f| f.puts content }
|
|
180
|
-
FileUtils.mv("#{script_file}.tmp", script_file)
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
def self.print_summary_to_console(overall_evaluation, summary)
|
|
184
|
-
# print to console
|
|
185
|
-
eval = ''
|
|
186
|
-
overall_evaluation.each { |e| eval << "#{e}\n" }
|
|
187
|
-
$stderr.puts eval if summary
|
|
188
|
-
$stderr.puts ''
|
|
189
|
-
eval
|
|
190
|
-
end
|
|
111
|
+
def generate_overview(json_data, min_blast_hits)
|
|
112
|
+
scores_from_json = json_data.map { |e| e[:overall_score] }
|
|
113
|
+
quartiles = scores_from_json.all_quartiles
|
|
114
|
+
nee = calculate_no_quries_with_no_evidence(json_data)
|
|
115
|
+
no_mafft = count_mafft_errors(json_data)
|
|
116
|
+
no_internet = count_internet_errors(json_data)
|
|
117
|
+
map_errors = map_errors(json_data)
|
|
118
|
+
run_time = calculate_run_time(json_data)
|
|
119
|
+
min_hits = json_data.count { |e| e[:no_hits] < min_blast_hits }
|
|
120
|
+
overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
|
|
121
|
+
map_errors, run_time, min_hits)
|
|
122
|
+
end
|
|
191
123
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
124
|
+
def overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
|
|
125
|
+
map_errors, run_time, insufficient_BLAST_hits)
|
|
126
|
+
{
|
|
127
|
+
scores: scores_from_json,
|
|
128
|
+
no_queries: scores_from_json.length,
|
|
129
|
+
good_scores: scores_from_json.count { |s| s >= 75 },
|
|
130
|
+
bad_scores: scores_from_json.count { |s| s < 75 },
|
|
131
|
+
nee: nee, no_mafft: no_mafft, no_internet: no_internet,
|
|
132
|
+
map_errors: map_errors, run_time: run_time,
|
|
133
|
+
first_quartile_of_scores: quartiles[0],
|
|
134
|
+
second_quartile_of_scores: quartiles[1],
|
|
135
|
+
third_quartile_of_scores: quartiles[2],
|
|
136
|
+
insufficient_BLAST_hits: insufficient_BLAST_hits
|
|
137
|
+
}
|
|
138
|
+
end
|
|
203
139
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
140
|
+
# calculate number of queries that had warnings for all validations.
|
|
141
|
+
def calculate_no_quries_with_no_evidence(json_data)
|
|
142
|
+
all_warnings = 0
|
|
143
|
+
json_data.each do |row|
|
|
144
|
+
status = row[:validations].map { |_, h| h[:status] }
|
|
145
|
+
if status.count { |r| r == 'warning' } == status.length
|
|
146
|
+
all_warnings += 1
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
all_warnings
|
|
150
|
+
end
|
|
214
151
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
152
|
+
def count_mafft_errors(json_data)
|
|
153
|
+
json_data.count do |row|
|
|
154
|
+
num = row[:validations].count { |_, h| h[:print] == 'Mafft error' }
|
|
155
|
+
num.zero? ? false : true
|
|
156
|
+
end
|
|
157
|
+
end
|
|
218
158
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
159
|
+
def count_internet_errors(json_data)
|
|
160
|
+
json_data.count do |row|
|
|
161
|
+
num = row[:validations].count { |_, h| h[:print] == 'Internet error' }
|
|
162
|
+
num.zero? ? false : true
|
|
163
|
+
end
|
|
164
|
+
end
|
|
222
165
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
166
|
+
def map_errors(json_data)
|
|
167
|
+
errors = Hash.new(0)
|
|
168
|
+
json_data.each do |row|
|
|
169
|
+
e = row[:validations].map { |s, h| s if h[:validation] == 'error' }
|
|
170
|
+
e.compact.each { |err| errors[err] += 1 }
|
|
171
|
+
end
|
|
172
|
+
errors
|
|
173
|
+
end
|
|
228
174
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
175
|
+
def calculate_run_time(json_data)
|
|
176
|
+
run_time = Hash.new(Pair1.new(0, 0))
|
|
177
|
+
json_data.map do |row|
|
|
178
|
+
row[:validations].each do |short_header, v|
|
|
179
|
+
next if v[:run_time].nil? || v[:run_time].zero?
|
|
180
|
+
next if v[:validation] == 'unapplicable' || v[:validation] == 'error'
|
|
181
|
+
p = Pair1.new(run_time[short_header.to_s].x + v[:run_time],
|
|
182
|
+
run_time[short_header.to_s].y + 1)
|
|
183
|
+
run_time[short_header.to_s] = p
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
run_time
|
|
232
187
|
end
|
|
233
|
-
eval
|
|
234
|
-
end
|
|
235
188
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
189
|
+
##
|
|
190
|
+
# Calculates an overall evaluation of the output
|
|
191
|
+
# Params:
|
|
192
|
+
# +all_query_outputs+: Array of +ValidationTest+ objects
|
|
193
|
+
# Output
|
|
194
|
+
# Array of Strigs with the reports
|
|
195
|
+
def generate_evaluation_text(overview)
|
|
196
|
+
eval = general_overview(overview)
|
|
197
|
+
error_eval = errors_overview(overview)
|
|
198
|
+
time_eval = time_overview(overview)
|
|
199
|
+
|
|
200
|
+
[eval, error_eval, time_eval].reject(&:empty?)
|
|
241
201
|
end
|
|
242
|
-
|
|
243
|
-
|
|
202
|
+
|
|
203
|
+
private
|
|
204
|
+
|
|
205
|
+
def general_overview(o)
|
|
206
|
+
good_pred = o[:good_scores] == 1 ? 'One' : "#{o[:good_scores]} are"
|
|
207
|
+
bad_pred = o[:bad_scores] == 1 ? 'One' : "#{o[:bad_scores]} are"
|
|
208
|
+
|
|
209
|
+
plural = 'prediction was' if o[:insufficient_BLAST_hits] == 1
|
|
210
|
+
plural = 'predictions were' if o[:insufficient_BLAST_hits] >= 2
|
|
211
|
+
b = "#{o[:insufficient_BLAST_hits]} #{plural} not evaluated due to an" \
|
|
212
|
+
' insufficient number of BLAST hits.'
|
|
213
|
+
blast_hits = o[:insufficient_BLAST_hits].zero? ? '' : b
|
|
214
|
+
|
|
215
|
+
['Overall Query Score Evaluation:',
|
|
216
|
+
"#{o[:no_queries]} predictions were validated, from which there were:",
|
|
217
|
+
"#{good_pred} good prediction(s),",
|
|
218
|
+
"#{bad_pred} possibly weak prediction(s).", blast_hits,
|
|
219
|
+
"The median overall score was #{o[:second_quartile_of_scores]} with" \
|
|
220
|
+
" an upper quartile of #{o[:third_quartile_of_scores]}" \
|
|
221
|
+
" and a lower quartile of #{o[:first_quartile_of_scores]}."]
|
|
244
222
|
end
|
|
245
|
-
|
|
246
|
-
|
|
223
|
+
|
|
224
|
+
# errors per validation
|
|
225
|
+
def errors_overview(o)
|
|
226
|
+
error_eval = o[:map_errors].map do |k, v|
|
|
227
|
+
"We couldn't run #{k} Validation for #{v} queries"
|
|
228
|
+
end
|
|
229
|
+
if o[:no_mafft] >= (o[:no_queries] - o[:nee])
|
|
230
|
+
error_eval << "We couldn't run MAFFT multiple alignment"
|
|
231
|
+
end
|
|
232
|
+
if o[:no_internet] >= (o[:no_queries] - o[:nee])
|
|
233
|
+
error_eval << "\nWe couldn't make use of your internet connection"
|
|
234
|
+
end
|
|
235
|
+
error_eval
|
|
247
236
|
end
|
|
248
|
-
error_eval
|
|
249
|
-
end
|
|
250
237
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
238
|
+
def time_overview(o)
|
|
239
|
+
o[:run_time].map do |key, value|
|
|
240
|
+
mean_time = value.x / value.y.to_f
|
|
241
|
+
"Average running time for #{key} Validation: #{mean_time.round(3)}s" \
|
|
242
|
+
' per validation'
|
|
243
|
+
end
|
|
257
244
|
end
|
|
258
|
-
time_eval
|
|
259
245
|
end
|
|
260
246
|
end
|
|
261
247
|
end
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
require 'csv'
|
|
2
|
+
require 'slim'
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
require 'forwardable'
|
|
5
|
+
require 'json'
|
|
6
|
+
|
|
7
|
+
require 'genevalidator/version'
|
|
8
|
+
|
|
9
|
+
module GeneValidator
|
|
10
|
+
# A Class for creating output files
|
|
11
|
+
class OutputFiles
|
|
12
|
+
extend Forwardable
|
|
13
|
+
def_delegators GeneValidator, :opt, :config, :dirs, :overview
|
|
14
|
+
|
|
15
|
+
def initialize()
|
|
16
|
+
@config = config
|
|
17
|
+
@opt = opt
|
|
18
|
+
@dirs = dirs
|
|
19
|
+
@overview = overview
|
|
20
|
+
@json_data = @config[:json_output]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def write_html(overall_eval)
|
|
24
|
+
return unless @opt[:output_formats].include? 'html'
|
|
25
|
+
@all_html_fnames = all_html_filenames
|
|
26
|
+
@json_data.each_slice(@config[:output_max]).with_index do |data, i|
|
|
27
|
+
@json_data_section = data
|
|
28
|
+
template_file = File.join(@dirs[:aux_dir], 'gv_results.slim')
|
|
29
|
+
template_contents = File.open(template_file, 'r').read
|
|
30
|
+
html_output = Slim::Template.new { template_contents }.render(self)
|
|
31
|
+
File.open(@all_html_fnames[i], 'w') { |f| f.write(html_output) }
|
|
32
|
+
end
|
|
33
|
+
create_overview_json_file(overall_eval)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def write_json
|
|
37
|
+
return unless @opt[:output_formats].include? 'json'
|
|
38
|
+
File.open(@dirs[:json_file], 'w') { |f| f.write(@json_data.to_json) }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def write_csv
|
|
42
|
+
return unless @opt[:output_formats].include? 'csv'
|
|
43
|
+
File.open(@dirs[:csv_file], 'a') do |file|
|
|
44
|
+
file.puts csv_header.join(',')
|
|
45
|
+
@json_data.each do |data|
|
|
46
|
+
short_def = data[:definition].split(' ')[0]
|
|
47
|
+
line = [data[:idx], data[:overall_score], short_def, data[:nr_hits]]
|
|
48
|
+
line += data[:validations].values.map { |e| e[:print] }
|
|
49
|
+
.each { |e| e.gsub!(' ', ' ') }
|
|
50
|
+
line.map { |e| e.gsub!(',', ' -') if e.is_a? String }
|
|
51
|
+
file.puts line.join(',')
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def write_summary
|
|
57
|
+
return unless @opt[:output_formats].include? 'summary'
|
|
58
|
+
data = generate_summary_data
|
|
59
|
+
File.open(@dirs[:summary_file], 'w') do |f|
|
|
60
|
+
f.write data.map(&:to_csv).join
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def print_best_fasta
|
|
65
|
+
return unless @opt[:select_single_best]
|
|
66
|
+
top_data = @json_data.max_by { |e| [e[:overall_score], e[:no_hits]] }
|
|
67
|
+
query = GeneValidator.extract_input_fasta_sequence(top_data[:idx])
|
|
68
|
+
File.open(@dirs[:fasta_file], 'w') { |f| f.write(query) }
|
|
69
|
+
puts query
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def all_html_filenames
|
|
75
|
+
result_parts = (@json_data.length / @config[:output_max]).ceil
|
|
76
|
+
(0..result_parts).map do |idx|
|
|
77
|
+
multiple_files_needed = @json_data.length < @config[:output_max]
|
|
78
|
+
part = multiple_files_needed ? '' : "_#{idx + 1}"
|
|
79
|
+
fname = File.join(@dirs[:output_dir], "#{@dirs[:filename]}_results")
|
|
80
|
+
fname + part + '.html'
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# By default, on page load, the results are automatically sorted by the
|
|
85
|
+
# index. However since the whole idea is that users would sort by JSON,
|
|
86
|
+
# this is not wanted here.
|
|
87
|
+
def turn_off_automated_sorting
|
|
88
|
+
js_file = File.join(@dirs[:output_dir], 'html_files/js/gv.compiled.min.js')
|
|
89
|
+
original_content = File.read(js_file)
|
|
90
|
+
# removes the automatic sort on page load
|
|
91
|
+
updated_content = original_content.gsub(',sortList:[[0,0]]', '')
|
|
92
|
+
File.open("#{script_file}.tmp", 'w') { |f| f.puts updated_content }
|
|
93
|
+
FileUtils.mv("#{script_file}.tmp", script_file)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def create_overview_json_file(overall_eval)
|
|
97
|
+
evaluation = overall_eval.flatten.join('<br>').gsub("'", %q(\\\'))
|
|
98
|
+
less = overall_eval[0].join('<br>')
|
|
99
|
+
hash = overview_html_hash(evaluation, less)
|
|
100
|
+
json = File.join(@dirs[:json_dir], 'overview.json')
|
|
101
|
+
File.open(json, 'w') { |f| f.write hash.to_json }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# make the historgram with the resulted scores
|
|
105
|
+
def overview_html_hash(evaluation, less)
|
|
106
|
+
data = [@overview[:scores].group_by { |a| a }.map do |k, vs|
|
|
107
|
+
{ 'key': k, 'value': vs.length, 'main': false }
|
|
108
|
+
end]
|
|
109
|
+
{ data: data, type: :simplebars, aux1: 10, aux2: '',
|
|
110
|
+
title: 'Overall GeneValidator Score Evaluation', footer: '',
|
|
111
|
+
xtitle: 'Validation Score', ytitle: 'Number of Queries',
|
|
112
|
+
less: less, evaluation: evaluation }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def csv_header
|
|
116
|
+
header = %w[AnalysisNumber GVScore Identifier NumberOfHits]
|
|
117
|
+
header += @json_data[0][:validations].keys
|
|
118
|
+
header
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def generate_summary_data
|
|
122
|
+
[
|
|
123
|
+
['num_predictions', @overview[:no_queries]],
|
|
124
|
+
['num_good_predictions', @overview[:good_scores]],
|
|
125
|
+
['num_bad_predictions', @overview[:bad_scores]],
|
|
126
|
+
['num_predictions_with_insufficient_blast_hits',
|
|
127
|
+
@overview[:insufficient_BLAST_hits]],
|
|
128
|
+
['first_quartile_of_scores', @overview[:first_quartile_of_scores]],
|
|
129
|
+
['second_quartile_of_scores', @overview[:second_quartile_of_scores]],
|
|
130
|
+
['third_quartile_of_scores', @overview[:third_quartile_of_scores]]
|
|
131
|
+
]
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|