genevalidator 1.6.12 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +30 -1
- data/.ruby-version +1 -0
- data/.travis.yml +13 -12
- data/Gemfile +4 -1
- data/Gemfile.lock +135 -0
- data/README.md +104 -122
- data/Rakefile +377 -5
- data/aux/gv_results.slim +155 -0
- data/aux/html_files/css/gv.compiled.min.css +8 -0
- data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
- data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
- data/aux/{files → html_files}/css/src/style.css +0 -0
- data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
- data/aux/{files → html_files}/img/gene.png +0 -0
- data/aux/html_files/js/gv.compiled.min.js +1 -0
- data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
- data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
- data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
- data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
- data/aux/{files → html_files}/js/src/plots.js +1 -1
- data/aux/{files → html_files}/js/src/script.js +0 -0
- data/aux/{files → html_files}/json/.gitkeep +0 -0
- data/bin/genevalidator +393 -56
- data/exemplar_data/README.md +60 -0
- data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
- data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
- data/genevalidator.gemspec +35 -20
- data/install.sh +92 -0
- data/lib/genevalidator.rb +171 -56
- data/lib/genevalidator/arg_validation.rb +26 -55
- data/lib/genevalidator/blast.rb +44 -99
- data/lib/genevalidator/clusterization.rb +18 -22
- data/lib/genevalidator/exceptions.rb +17 -17
- data/lib/genevalidator/ext/array.rb +21 -4
- data/lib/genevalidator/get_raw_sequences.rb +32 -31
- data/lib/genevalidator/hsp.rb +31 -2
- data/lib/genevalidator/json_to_gv_results.rb +38 -122
- data/lib/genevalidator/output.rb +158 -172
- data/lib/genevalidator/output_files.rb +134 -0
- data/lib/genevalidator/pool.rb +2 -5
- data/lib/genevalidator/query.rb +1 -1
- data/lib/genevalidator/tabular_parser.rb +8 -29
- data/lib/genevalidator/validation.rb +48 -90
- data/lib/genevalidator/validation_alignment.rb +64 -75
- data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
- data/lib/genevalidator/validation_duplication.rb +85 -84
- data/lib/genevalidator/validation_gene_merge.rb +46 -35
- data/lib/genevalidator/validation_length_cluster.rb +18 -15
- data/lib/genevalidator/validation_length_rank.rb +19 -15
- data/lib/genevalidator/validation_maker_qi.rb +13 -12
- data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
- data/lib/genevalidator/validation_report.rb +1 -1
- data/lib/genevalidator/validation_test.rb +1 -1
- data/lib/genevalidator/version.rb +1 -1
- data/test/overall.rb +1 -1
- data/test/test_all_validations.rb +36 -24
- data/test/test_blast.rb +39 -24
- data/test/test_clusterization_2d.rb +4 -4
- data/test/test_helper.rb +2 -2
- data/test/test_query.rb +16 -20
- data/test/test_validation_open_reading_frame.rb +122 -122
- data/test/test_validations.rb +12 -10
- metadata +94 -79
- data/aux/files/css/genevalidator.compiled.min.css +0 -16
- data/aux/files/js/genevalidator.compiled.min.js +0 -28
- data/aux/json_footer.erb +0 -8
- data/aux/json_header.erb +0 -19
- data/aux/json_query.erb +0 -15
- data/aux/template_footer.erb +0 -8
- data/aux/template_header.erb +0 -19
- data/aux/template_query.erb +0 -14
- data/data/README.md +0 -57
- data/data/mrna_data.fasta.blast_tabular +0 -3567
- data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
- data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
- data/data/mrna_data.fasta.blast_xml +0 -39800
- data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
- data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
- data/data/mrna_data.fasta.json +0 -1
- data/data/protein_data.fasta.blast_tabular +0 -3278
- data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
- data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
- data/data/protein_data.fasta.blast_xml +0 -26228
- data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
- data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
- data/data/protein_data.fasta.json +0 -1
data/lib/genevalidator/output.rb
CHANGED
@@ -1,15 +1,10 @@
|
|
1
|
-
require 'erb'
|
2
|
-
require 'fileutils'
|
3
1
|
require 'forwardable'
|
4
2
|
require 'json'
|
5
3
|
|
6
|
-
require 'genevalidator/version'
|
7
|
-
|
8
4
|
module GeneValidator
|
9
5
|
class Output
|
10
6
|
extend Forwardable
|
11
|
-
def_delegators GeneValidator, :opt, :config, :
|
12
|
-
:mutex_json
|
7
|
+
def_delegators GeneValidator, :opt, :config, :dirs, :mutex
|
13
8
|
attr_accessor :prediction_def
|
14
9
|
attr_accessor :nr_hits
|
15
10
|
|
@@ -27,9 +22,12 @@ module GeneValidator
|
|
27
22
|
# Params:
|
28
23
|
# +current_idx+: index of the current query
|
29
24
|
def initialize(current_idx, no_of_hits, definition)
|
30
|
-
@opt
|
31
|
-
@
|
25
|
+
@opt = opt
|
26
|
+
@dirs = dirs
|
27
|
+
@config = config
|
32
28
|
@config[:run_no] += 1
|
29
|
+
output_dir = @dirs[:output_dir]
|
30
|
+
@output_filename = File.join(output_dir, "#{@dirs[:filename]}_results")
|
33
31
|
|
34
32
|
@prediction_def = definition
|
35
33
|
@nr_hits = no_of_hits
|
@@ -37,62 +35,38 @@ module GeneValidator
|
|
37
35
|
end
|
38
36
|
|
39
37
|
def print_output_console
|
38
|
+
return unless @opt[:output_formats].include? 'stdout'
|
39
|
+
c_fmt = "%3s\t%5s\t%20s\t%7s\t"
|
40
40
|
mutex.synchronize do
|
41
|
-
print_console_header
|
42
|
-
short_def = @prediction_def.
|
43
|
-
print format(
|
44
|
-
@nr_hits)
|
41
|
+
print_console_header(c_fmt)
|
42
|
+
short_def = @prediction_def.split(' ')[0]
|
43
|
+
print format(c_fmt, @idx, @overall_score, short_def, @nr_hits)
|
45
44
|
puts validations.map(&:print).join("\t").gsub(' ', ' ')
|
46
45
|
end
|
47
46
|
end
|
48
47
|
|
49
|
-
def
|
50
|
-
@
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
mutex_html.synchronize do
|
58
|
-
output_html = output_filename
|
59
|
-
query_erb = File.join(@config[:aux], 'template_query.erb')
|
60
|
-
template_file = File.open(query_erb, 'r').read
|
61
|
-
erb = ERB.new(template_file, 0, '>')
|
62
|
-
File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def output_filename
|
67
|
-
idx = (@config[:run_no].to_f / @config[:output_max]).ceil
|
68
|
-
output_html = File.join(@config[:html_path], "results#{idx}.html")
|
69
|
-
write_html_header(output_html)
|
70
|
-
output_html
|
71
|
-
end
|
72
|
-
|
73
|
-
def write_html_header(output_html)
|
74
|
-
head_erb = File.join(@config[:aux], 'template_header.erb')
|
75
|
-
set_up_html(head_erb, output_html) unless File.exist?(output_html)
|
48
|
+
def generate_json
|
49
|
+
fname = File.join(@dirs[:json_dir], "#{@dirs[:filename]}_#{@idx}.json")
|
50
|
+
row_data = { idx: @idx, overall_score: @overall_score,
|
51
|
+
definition: @prediction_def, no_hits: @nr_hits }
|
52
|
+
row = create_validation_hash(row_data)
|
53
|
+
arr_idx = @idx - 1
|
54
|
+
@config[:json_output][arr_idx] = row
|
55
|
+
File.open(fname, 'w') { |f| f.write(row.to_json) }
|
76
56
|
end
|
77
57
|
|
78
|
-
|
79
|
-
return if File.exist?(output_file)
|
80
|
-
template_contents = File.open(erb_file, 'r').read
|
81
|
-
erb = ERB.new(template_contents, 0, '>')
|
82
|
-
File.open(output_file, 'w+') { |f| f.write(erb.result(binding)) }
|
83
|
-
end
|
58
|
+
private
|
84
59
|
|
85
|
-
def
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
end
|
60
|
+
def print_console_header(c_fmt)
|
61
|
+
return if @config[:console_header_printed]
|
62
|
+
@config[:console_header_printed] = true
|
63
|
+
warn '==> Validating input sequences'
|
64
|
+
warn '' # blank line
|
65
|
+
print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
|
66
|
+
puts validations.map(&:short_header).join("\t")
|
93
67
|
end
|
94
68
|
|
95
|
-
def
|
69
|
+
def create_validation_hash(row)
|
96
70
|
row[:validations] = {}
|
97
71
|
@validations.each do |item|
|
98
72
|
val = add_basic_validation_info(item)
|
@@ -106,7 +80,8 @@ module GeneValidator
|
|
106
80
|
|
107
81
|
def add_basic_validation_info(item)
|
108
82
|
{ header: item.header, description: item.description, status: item.color,
|
109
|
-
print: item.print.gsub(' ', ' ')
|
83
|
+
print: item.print.gsub(' ', ' '), run_time: item.run_time,
|
84
|
+
validation: item.validation }
|
110
85
|
end
|
111
86
|
|
112
87
|
def add_explanation_data(item)
|
@@ -124,138 +99,149 @@ module GeneValidator
|
|
124
99
|
graphs
|
125
100
|
end
|
126
101
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
File.open(json_file, 'w') { |f| f.write(array.to_json) }
|
135
|
-
end
|
136
|
-
|
137
|
-
##
|
138
|
-
# Method that closes the gas in the html file and writes the overall
|
139
|
-
# evaluation
|
140
|
-
# Param:
|
141
|
-
# +all_query_outputs+: array with +ValidationTest+ objects
|
142
|
-
# +html_path+: path of the html folder
|
143
|
-
# +filemane+: name of the fasta input file
|
144
|
-
def self.print_footer(overview, config)
|
145
|
-
set_overall_evaluation(overview, config)
|
146
|
-
|
147
|
-
footer_erb = File.join(config[:aux], 'template_footer.erb')
|
148
|
-
|
149
|
-
no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil
|
150
|
-
template_file = File.open(footer_erb, 'r').read
|
151
|
-
erb = ERB.new(template_file, 0, '>')
|
152
|
-
|
153
|
-
output_files = []
|
154
|
-
(1..no_of_results_files).each { |i| output_files << "results#{i}.html" }
|
155
|
-
|
156
|
-
(1..no_of_results_files).each do |i|
|
157
|
-
results_html = File.join(config[:html_path], "results#{i}.html")
|
158
|
-
File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
|
102
|
+
class <<self
|
103
|
+
def print_console_footer(overall_evaluation, opt)
|
104
|
+
return unless (opt[:output_formats].include? 'stdout') ||
|
105
|
+
opt[:hide_summary]
|
106
|
+
warn ''
|
107
|
+
warn "==> #{overall_evaluation.join("\n")}"
|
108
|
+
warn ''
|
159
109
|
end
|
160
110
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
end
|
174
|
-
|
175
|
-
def self.turn_off_sorting(html_path)
|
176
|
-
script_file = File.join(html_path,
|
177
|
-
'files/js/genevalidator.compiled.min.js')
|
178
|
-
content = File.read(script_file).gsub(',initTableSorter(),', ',')
|
179
|
-
File.open("#{script_file}.tmp", 'w') { |f| f.puts content }
|
180
|
-
FileUtils.mv("#{script_file}.tmp", script_file)
|
181
|
-
end
|
182
|
-
|
183
|
-
def self.print_summary_to_console(overall_evaluation, summary)
|
184
|
-
# print to console
|
185
|
-
eval = ''
|
186
|
-
overall_evaluation.each { |e| eval << "#{e}\n" }
|
187
|
-
$stderr.puts eval if summary
|
188
|
-
$stderr.puts ''
|
189
|
-
eval
|
190
|
-
end
|
111
|
+
def generate_overview(json_data, min_blast_hits)
|
112
|
+
scores_from_json = json_data.map { |e| e[:overall_score] }
|
113
|
+
quartiles = scores_from_json.all_quartiles
|
114
|
+
nee = calculate_no_quries_with_no_evidence(json_data)
|
115
|
+
no_mafft = count_mafft_errors(json_data)
|
116
|
+
no_internet = count_internet_errors(json_data)
|
117
|
+
map_errors = map_errors(json_data)
|
118
|
+
run_time = calculate_run_time(json_data)
|
119
|
+
min_hits = json_data.count { |e| e[:no_hits] < min_blast_hits }
|
120
|
+
overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
|
121
|
+
map_errors, run_time, min_hits)
|
122
|
+
end
|
191
123
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
124
|
+
def overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
|
125
|
+
map_errors, run_time, insufficient_BLAST_hits)
|
126
|
+
{
|
127
|
+
scores: scores_from_json,
|
128
|
+
no_queries: scores_from_json.length,
|
129
|
+
good_scores: scores_from_json.count { |s| s >= 75 },
|
130
|
+
bad_scores: scores_from_json.count { |s| s < 75 },
|
131
|
+
nee: nee, no_mafft: no_mafft, no_internet: no_internet,
|
132
|
+
map_errors: map_errors, run_time: run_time,
|
133
|
+
first_quartile_of_scores: quartiles[0],
|
134
|
+
second_quartile_of_scores: quartiles[1],
|
135
|
+
third_quartile_of_scores: quartiles[2],
|
136
|
+
insufficient_BLAST_hits: insufficient_BLAST_hits
|
137
|
+
}
|
138
|
+
end
|
203
139
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
140
|
+
# calculate number of queries that had warnings for all validations.
|
141
|
+
def calculate_no_quries_with_no_evidence(json_data)
|
142
|
+
all_warnings = 0
|
143
|
+
json_data.each do |row|
|
144
|
+
status = row[:validations].map { |_, h| h[:status] }
|
145
|
+
if status.count { |r| r == 'warning' } == status.length
|
146
|
+
all_warnings += 1
|
147
|
+
end
|
148
|
+
end
|
149
|
+
all_warnings
|
150
|
+
end
|
214
151
|
|
215
|
-
|
216
|
-
|
217
|
-
|
152
|
+
def count_mafft_errors(json_data)
|
153
|
+
json_data.count do |row|
|
154
|
+
num = row[:validations].count { |_, h| h[:print] == 'Mafft error' }
|
155
|
+
num.zero? ? false : true
|
156
|
+
end
|
157
|
+
end
|
218
158
|
|
219
|
-
|
220
|
-
|
221
|
-
|
159
|
+
def count_internet_errors(json_data)
|
160
|
+
json_data.count do |row|
|
161
|
+
num = row[:validations].count { |_, h| h[:print] == 'Internet error' }
|
162
|
+
num.zero? ? false : true
|
163
|
+
end
|
164
|
+
end
|
222
165
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
166
|
+
def map_errors(json_data)
|
167
|
+
errors = Hash.new(0)
|
168
|
+
json_data.each do |row|
|
169
|
+
e = row[:validations].map { |s, h| s if h[:validation] == 'error' }
|
170
|
+
e.compact.each { |err| errors[err] += 1 }
|
171
|
+
end
|
172
|
+
errors
|
173
|
+
end
|
228
174
|
|
229
|
-
|
230
|
-
|
231
|
-
|
175
|
+
def calculate_run_time(json_data)
|
176
|
+
run_time = Hash.new(Pair1.new(0, 0))
|
177
|
+
json_data.map do |row|
|
178
|
+
row[:validations].each do |short_header, v|
|
179
|
+
next if v[:run_time].nil? || v[:run_time].zero?
|
180
|
+
next if v[:validation] == 'unapplicable' || v[:validation] == 'error'
|
181
|
+
p = Pair1.new(run_time[short_header.to_s].x + v[:run_time],
|
182
|
+
run_time[short_header.to_s].y + 1)
|
183
|
+
run_time[short_header.to_s] = p
|
184
|
+
end
|
185
|
+
end
|
186
|
+
run_time
|
232
187
|
end
|
233
|
-
eval
|
234
|
-
end
|
235
188
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
189
|
+
##
|
190
|
+
# Calculates an overall evaluation of the output
|
191
|
+
# Params:
|
192
|
+
# +all_query_outputs+: Array of +ValidationTest+ objects
|
193
|
+
# Output
|
194
|
+
# Array of Strigs with the reports
|
195
|
+
def generate_evaluation_text(overview)
|
196
|
+
eval = general_overview(overview)
|
197
|
+
error_eval = errors_overview(overview)
|
198
|
+
time_eval = time_overview(overview)
|
199
|
+
|
200
|
+
[eval, error_eval, time_eval].reject(&:empty?)
|
241
201
|
end
|
242
|
-
|
243
|
-
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
def general_overview(o)
|
206
|
+
good_pred = o[:good_scores] == 1 ? 'One' : "#{o[:good_scores]} are"
|
207
|
+
bad_pred = o[:bad_scores] == 1 ? 'One' : "#{o[:bad_scores]} are"
|
208
|
+
|
209
|
+
plural = 'prediction was' if o[:insufficient_BLAST_hits] == 1
|
210
|
+
plural = 'predictions were' if o[:insufficient_BLAST_hits] >= 2
|
211
|
+
b = "#{o[:insufficient_BLAST_hits]} #{plural} not evaluated due to an" \
|
212
|
+
' insufficient number of BLAST hits.'
|
213
|
+
blast_hits = o[:insufficient_BLAST_hits].zero? ? '' : b
|
214
|
+
|
215
|
+
['Overall Query Score Evaluation:',
|
216
|
+
"#{o[:no_queries]} predictions were validated, from which there were:",
|
217
|
+
"#{good_pred} good prediction(s),",
|
218
|
+
"#{bad_pred} possibly weak prediction(s).", blast_hits,
|
219
|
+
"The median overall score was #{o[:second_quartile_of_scores]} with" \
|
220
|
+
" an upper quartile of #{o[:third_quartile_of_scores]}" \
|
221
|
+
" and a lower quartile of #{o[:first_quartile_of_scores]}."]
|
244
222
|
end
|
245
|
-
|
246
|
-
|
223
|
+
|
224
|
+
# errors per validation
|
225
|
+
def errors_overview(o)
|
226
|
+
error_eval = o[:map_errors].map do |k, v|
|
227
|
+
"We couldn't run #{k} Validation for #{v} queries"
|
228
|
+
end
|
229
|
+
if o[:no_mafft] >= (o[:no_queries] - o[:nee])
|
230
|
+
error_eval << "We couldn't run MAFFT multiple alignment"
|
231
|
+
end
|
232
|
+
if o[:no_internet] >= (o[:no_queries] - o[:nee])
|
233
|
+
error_eval << "\nWe couldn't make use of your internet connection"
|
234
|
+
end
|
235
|
+
error_eval
|
247
236
|
end
|
248
|
-
error_eval
|
249
|
-
end
|
250
237
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
238
|
+
def time_overview(o)
|
239
|
+
o[:run_time].map do |key, value|
|
240
|
+
mean_time = value.x / value.y.to_f
|
241
|
+
"Average running time for #{key} Validation: #{mean_time.round(3)}s" \
|
242
|
+
' per validation'
|
243
|
+
end
|
257
244
|
end
|
258
|
-
time_eval
|
259
245
|
end
|
260
246
|
end
|
261
247
|
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'slim'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'forwardable'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
require 'genevalidator/version'
|
8
|
+
|
9
|
+
module GeneValidator
|
10
|
+
# A Class for creating output files
|
11
|
+
class OutputFiles
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators GeneValidator, :opt, :config, :dirs, :overview
|
14
|
+
|
15
|
+
def initialize()
|
16
|
+
@config = config
|
17
|
+
@opt = opt
|
18
|
+
@dirs = dirs
|
19
|
+
@overview = overview
|
20
|
+
@json_data = @config[:json_output]
|
21
|
+
end
|
22
|
+
|
23
|
+
def write_html(overall_eval)
|
24
|
+
return unless @opt[:output_formats].include? 'html'
|
25
|
+
@all_html_fnames = all_html_filenames
|
26
|
+
@json_data.each_slice(@config[:output_max]).with_index do |data, i|
|
27
|
+
@json_data_section = data
|
28
|
+
template_file = File.join(@dirs[:aux_dir], 'gv_results.slim')
|
29
|
+
template_contents = File.open(template_file, 'r').read
|
30
|
+
html_output = Slim::Template.new { template_contents }.render(self)
|
31
|
+
File.open(@all_html_fnames[i], 'w') { |f| f.write(html_output) }
|
32
|
+
end
|
33
|
+
create_overview_json_file(overall_eval)
|
34
|
+
end
|
35
|
+
|
36
|
+
def write_json
|
37
|
+
return unless @opt[:output_formats].include? 'json'
|
38
|
+
File.open(@dirs[:json_file], 'w') { |f| f.write(@json_data.to_json) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def write_csv
|
42
|
+
return unless @opt[:output_formats].include? 'csv'
|
43
|
+
File.open(@dirs[:csv_file], 'a') do |file|
|
44
|
+
file.puts csv_header.join(',')
|
45
|
+
@json_data.each do |data|
|
46
|
+
short_def = data[:definition].split(' ')[0]
|
47
|
+
line = [data[:idx], data[:overall_score], short_def, data[:nr_hits]]
|
48
|
+
line += data[:validations].values.map { |e| e[:print] }
|
49
|
+
.each { |e| e.gsub!(' ', ' ') }
|
50
|
+
line.map { |e| e.gsub!(',', ' -') if e.is_a? String }
|
51
|
+
file.puts line.join(',')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def write_summary
|
57
|
+
return unless @opt[:output_formats].include? 'summary'
|
58
|
+
data = generate_summary_data
|
59
|
+
File.open(@dirs[:summary_file], 'w') do |f|
|
60
|
+
f.write data.map(&:to_csv).join
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def print_best_fasta
|
65
|
+
return unless @opt[:select_single_best]
|
66
|
+
top_data = @json_data.max_by { |e| [e[:overall_score], e[:no_hits]] }
|
67
|
+
query = GeneValidator.extract_input_fasta_sequence(top_data[:idx])
|
68
|
+
File.open(@dirs[:fasta_file], 'w') { |f| f.write(query) }
|
69
|
+
puts query
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def all_html_filenames
|
75
|
+
result_parts = (@json_data.length / @config[:output_max]).ceil
|
76
|
+
(0..result_parts).map do |idx|
|
77
|
+
multiple_files_needed = @json_data.length < @config[:output_max]
|
78
|
+
part = multiple_files_needed ? '' : "_#{idx + 1}"
|
79
|
+
fname = File.join(@dirs[:output_dir], "#{@dirs[:filename]}_results")
|
80
|
+
fname + part + '.html'
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# By default, on page load, the results are automatically sorted by the
|
85
|
+
# index. However since the whole idea is that users would sort by JSON,
|
86
|
+
# this is not wanted here.
|
87
|
+
def turn_off_automated_sorting
|
88
|
+
js_file = File.join(@dirs[:output_dir], 'html_files/js/gv.compiled.min.js')
|
89
|
+
original_content = File.read(js_file)
|
90
|
+
# removes the automatic sort on page load
|
91
|
+
updated_content = original_content.gsub(',sortList:[[0,0]]', '')
|
92
|
+
File.open("#{script_file}.tmp", 'w') { |f| f.puts updated_content }
|
93
|
+
FileUtils.mv("#{script_file}.tmp", script_file)
|
94
|
+
end
|
95
|
+
|
96
|
+
def create_overview_json_file(overall_eval)
|
97
|
+
evaluation = overall_eval.flatten.join('<br>').gsub("'", %q(\\\'))
|
98
|
+
less = overall_eval[0].join('<br>')
|
99
|
+
hash = overview_html_hash(evaluation, less)
|
100
|
+
json = File.join(@dirs[:json_dir], 'overview.json')
|
101
|
+
File.open(json, 'w') { |f| f.write hash.to_json }
|
102
|
+
end
|
103
|
+
|
104
|
+
# make the historgram with the resulted scores
|
105
|
+
def overview_html_hash(evaluation, less)
|
106
|
+
data = [@overview[:scores].group_by { |a| a }.map do |k, vs|
|
107
|
+
{ 'key': k, 'value': vs.length, 'main': false }
|
108
|
+
end]
|
109
|
+
{ data: data, type: :simplebars, aux1: 10, aux2: '',
|
110
|
+
title: 'Overall GeneValidator Score Evaluation', footer: '',
|
111
|
+
xtitle: 'Validation Score', ytitle: 'Number of Queries',
|
112
|
+
less: less, evaluation: evaluation }
|
113
|
+
end
|
114
|
+
|
115
|
+
def csv_header
|
116
|
+
header = %w[AnalysisNumber GVScore Identifier NumberOfHits]
|
117
|
+
header += @json_data[0][:validations].keys
|
118
|
+
header
|
119
|
+
end
|
120
|
+
|
121
|
+
def generate_summary_data
|
122
|
+
[
|
123
|
+
['num_predictions', @overview[:no_queries]],
|
124
|
+
['num_good_predictions', @overview[:good_scores]],
|
125
|
+
['num_bad_predictions', @overview[:bad_scores]],
|
126
|
+
['num_predictions_with_insufficient_blast_hits',
|
127
|
+
@overview[:insufficient_BLAST_hits]],
|
128
|
+
['first_quartile_of_scores', @overview[:first_quartile_of_scores]],
|
129
|
+
['second_quartile_of_scores', @overview[:second_quartile_of_scores]],
|
130
|
+
['third_quartile_of_scores', @overview[:third_quartile_of_scores]]
|
131
|
+
]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|