genevalidator 1.6.12 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +30 -1
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +13 -12
  5. data/Gemfile +4 -1
  6. data/Gemfile.lock +135 -0
  7. data/README.md +104 -122
  8. data/Rakefile +377 -5
  9. data/aux/gv_results.slim +155 -0
  10. data/aux/html_files/css/gv.compiled.min.css +8 -0
  11. data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
  12. data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
  13. data/aux/{files → html_files}/css/src/style.css +0 -0
  14. data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
  15. data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
  16. data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
  17. data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
  18. data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
  19. data/aux/{files → html_files}/img/gene.png +0 -0
  20. data/aux/html_files/js/gv.compiled.min.js +1 -0
  21. data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
  22. data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
  23. data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
  24. data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
  25. data/aux/{files → html_files}/js/src/plots.js +1 -1
  26. data/aux/{files → html_files}/js/src/script.js +0 -0
  27. data/aux/{files → html_files}/json/.gitkeep +0 -0
  28. data/bin/genevalidator +393 -56
  29. data/exemplar_data/README.md +60 -0
  30. data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
  31. data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
  32. data/genevalidator.gemspec +35 -20
  33. data/install.sh +92 -0
  34. data/lib/genevalidator.rb +171 -56
  35. data/lib/genevalidator/arg_validation.rb +26 -55
  36. data/lib/genevalidator/blast.rb +44 -99
  37. data/lib/genevalidator/clusterization.rb +18 -22
  38. data/lib/genevalidator/exceptions.rb +17 -17
  39. data/lib/genevalidator/ext/array.rb +21 -4
  40. data/lib/genevalidator/get_raw_sequences.rb +32 -31
  41. data/lib/genevalidator/hsp.rb +31 -2
  42. data/lib/genevalidator/json_to_gv_results.rb +38 -122
  43. data/lib/genevalidator/output.rb +158 -172
  44. data/lib/genevalidator/output_files.rb +134 -0
  45. data/lib/genevalidator/pool.rb +2 -5
  46. data/lib/genevalidator/query.rb +1 -1
  47. data/lib/genevalidator/tabular_parser.rb +8 -29
  48. data/lib/genevalidator/validation.rb +48 -90
  49. data/lib/genevalidator/validation_alignment.rb +64 -75
  50. data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
  51. data/lib/genevalidator/validation_duplication.rb +85 -84
  52. data/lib/genevalidator/validation_gene_merge.rb +46 -35
  53. data/lib/genevalidator/validation_length_cluster.rb +18 -15
  54. data/lib/genevalidator/validation_length_rank.rb +19 -15
  55. data/lib/genevalidator/validation_maker_qi.rb +13 -12
  56. data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
  57. data/lib/genevalidator/validation_report.rb +1 -1
  58. data/lib/genevalidator/validation_test.rb +1 -1
  59. data/lib/genevalidator/version.rb +1 -1
  60. data/test/overall.rb +1 -1
  61. data/test/test_all_validations.rb +36 -24
  62. data/test/test_blast.rb +39 -24
  63. data/test/test_clusterization_2d.rb +4 -4
  64. data/test/test_helper.rb +2 -2
  65. data/test/test_query.rb +16 -20
  66. data/test/test_validation_open_reading_frame.rb +122 -122
  67. data/test/test_validations.rb +12 -10
  68. metadata +94 -79
  69. data/aux/files/css/genevalidator.compiled.min.css +0 -16
  70. data/aux/files/js/genevalidator.compiled.min.js +0 -28
  71. data/aux/json_footer.erb +0 -8
  72. data/aux/json_header.erb +0 -19
  73. data/aux/json_query.erb +0 -15
  74. data/aux/template_footer.erb +0 -8
  75. data/aux/template_header.erb +0 -19
  76. data/aux/template_query.erb +0 -14
  77. data/data/README.md +0 -57
  78. data/data/mrna_data.fasta.blast_tabular +0 -3567
  79. data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
  80. data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
  81. data/data/mrna_data.fasta.blast_xml +0 -39800
  82. data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
  83. data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
  84. data/data/mrna_data.fasta.json +0 -1
  85. data/data/protein_data.fasta.blast_tabular +0 -3278
  86. data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
  87. data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
  88. data/data/protein_data.fasta.blast_xml +0 -26228
  89. data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
  90. data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
  91. data/data/protein_data.fasta.json +0 -1
@@ -1,15 +1,10 @@
1
- require 'erb'
2
- require 'fileutils'
3
1
  require 'forwardable'
4
2
  require 'json'
5
3
 
6
- require 'genevalidator/version'
7
-
8
4
  module GeneValidator
9
5
  class Output
10
6
  extend Forwardable
11
- def_delegators GeneValidator, :opt, :config, :mutex, :mutex_html,
12
- :mutex_json
7
+ def_delegators GeneValidator, :opt, :config, :dirs, :mutex
13
8
  attr_accessor :prediction_def
14
9
  attr_accessor :nr_hits
15
10
 
@@ -27,9 +22,12 @@ module GeneValidator
27
22
  # Params:
28
23
  # +current_idx+: index of the current query
29
24
  def initialize(current_idx, no_of_hits, definition)
30
- @opt = opt
31
- @config = config
25
+ @opt = opt
26
+ @dirs = dirs
27
+ @config = config
32
28
  @config[:run_no] += 1
29
+ output_dir = @dirs[:output_dir]
30
+ @output_filename = File.join(output_dir, "#{@dirs[:filename]}_results")
33
31
 
34
32
  @prediction_def = definition
35
33
  @nr_hits = no_of_hits
@@ -37,62 +35,38 @@ module GeneValidator
37
35
  end
38
36
 
39
37
  def print_output_console
38
+ return unless @opt[:output_formats].include? 'stdout'
39
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
40
40
  mutex.synchronize do
41
- print_console_header unless @config[:console_header_printed]
42
- short_def = @prediction_def.scan(/([^ ]+)/)[0][0]
43
- print format("%3s\t%5s\t%20s\t%7s\t", @idx, @overall_score, short_def,
44
- @nr_hits)
41
+ print_console_header(c_fmt)
42
+ short_def = @prediction_def.split(' ')[0]
43
+ print format(c_fmt, @idx, @overall_score, short_def, @nr_hits)
45
44
  puts validations.map(&:print).join("\t").gsub(' ', ' ')
46
45
  end
47
46
  end
48
47
 
49
- def print_console_header
50
- @config[:console_header_printed] = true
51
- print format("%3s\t%5s\t%20s\t%7s\t", 'No', 'Score', 'Identifier',
52
- 'No_Hits')
53
- puts validations.map(&:short_header).join("\t")
54
- end
55
-
56
- def generate_html
57
- mutex_html.synchronize do
58
- output_html = output_filename
59
- query_erb = File.join(@config[:aux], 'template_query.erb')
60
- template_file = File.open(query_erb, 'r').read
61
- erb = ERB.new(template_file, 0, '>')
62
- File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
63
- end
64
- end
65
-
66
- def output_filename
67
- idx = (@config[:run_no].to_f / @config[:output_max]).ceil
68
- output_html = File.join(@config[:html_path], "results#{idx}.html")
69
- write_html_header(output_html)
70
- output_html
71
- end
72
-
73
- def write_html_header(output_html)
74
- head_erb = File.join(@config[:aux], 'template_header.erb')
75
- set_up_html(head_erb, output_html) unless File.exist?(output_html)
48
+ def generate_json
49
+ fname = File.join(@dirs[:json_dir], "#{@dirs[:filename]}_#{@idx}.json")
50
+ row_data = { idx: @idx, overall_score: @overall_score,
51
+ definition: @prediction_def, no_hits: @nr_hits }
52
+ row = create_validation_hash(row_data)
53
+ arr_idx = @idx - 1
54
+ @config[:json_output][arr_idx] = row
55
+ File.open(fname, 'w') { |f| f.write(row.to_json) }
76
56
  end
77
57
 
78
- def set_up_html(erb_file, output_file)
79
- return if File.exist?(output_file)
80
- template_contents = File.open(erb_file, 'r').read
81
- erb = ERB.new(template_contents, 0, '>')
82
- File.open(output_file, 'w+') { |f| f.write(erb.result(binding)) }
83
- end
58
+ private
84
59
 
85
- def generate_json
86
- mutex_json.synchronize do
87
- row = { idx: @idx, overall_score: @overall_score,
88
- definition: @prediction_def, no_hits: @nr_hits }
89
- row = create_validation_hashes(row)
90
- write_row_json(row)
91
- @config[:json_output] << row
92
- end
60
+ def print_console_header(c_fmt)
61
+ return if @config[:console_header_printed]
62
+ @config[:console_header_printed] = true
63
+ warn '==> Validating input sequences'
64
+ warn '' # blank line
65
+ print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
66
+ puts validations.map(&:short_header).join("\t")
93
67
  end
94
68
 
95
- def create_validation_hashes(row)
69
+ def create_validation_hash(row)
96
70
  row[:validations] = {}
97
71
  @validations.each do |item|
98
72
  val = add_basic_validation_info(item)
@@ -106,7 +80,8 @@ module GeneValidator
106
80
 
107
81
  def add_basic_validation_info(item)
108
82
  { header: item.header, description: item.description, status: item.color,
109
- print: item.print.gsub('&nbsp;', ' ') }
83
+ print: item.print.gsub('&nbsp;', ' '), run_time: item.run_time,
84
+ validation: item.validation }
110
85
  end
111
86
 
112
87
  def add_explanation_data(item)
@@ -124,138 +99,149 @@ module GeneValidator
124
99
  graphs
125
100
  end
126
101
 
127
- def write_row_json(row)
128
- row_json = File.join(@config[:plot_dir],
129
- "#{@config[:filename]}_#{@idx}.json")
130
- File.open(row_json, 'w') { |f| f.write(row.to_json) }
131
- end
132
-
133
- def self.write_json_file(array, json_file)
134
- File.open(json_file, 'w') { |f| f.write(array.to_json) }
135
- end
136
-
137
- ##
138
- # Method that closes the gas in the html file and writes the overall
139
- # evaluation
140
- # Param:
141
- # +all_query_outputs+: array with +ValidationTest+ objects
142
- # +html_path+: path of the html folder
143
- # +filemane+: name of the fasta input file
144
- def self.print_footer(overview, config)
145
- set_overall_evaluation(overview, config)
146
-
147
- footer_erb = File.join(config[:aux], 'template_footer.erb')
148
-
149
- no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil
150
- template_file = File.open(footer_erb, 'r').read
151
- erb = ERB.new(template_file, 0, '>')
152
-
153
- output_files = []
154
- (1..no_of_results_files).each { |i| output_files << "results#{i}.html" }
155
-
156
- (1..no_of_results_files).each do |i|
157
- results_html = File.join(config[:html_path], "results#{i}.html")
158
- File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
102
+ class <<self
103
+ def print_console_footer(overall_evaluation, opt)
104
+ return unless (opt[:output_formats].include? 'stdout') ||
105
+ opt[:hide_summary]
106
+ warn ''
107
+ warn "==> #{overall_evaluation.join("\n")}"
108
+ warn ''
159
109
  end
160
110
 
161
- turn_off_sorting(config[:html_path]) if no_of_results_files > 1
162
- end
163
-
164
- def self.set_overall_evaluation(overview, config)
165
- overall_evaluation = overview(overview)
166
- less = overall_evaluation[0].gsub("\n", '<br>').gsub("'", %q(\\\'))
167
-
168
- eval = print_summary_to_console(overall_evaluation, config[:summary])
169
- evaluation = eval.gsub("\n", '<br>').gsub("'", %q(\\\'))
170
-
171
- create_overview_json(overview[:scores], config[:plot_dir], less,
172
- evaluation)
173
- end
174
-
175
- def self.turn_off_sorting(html_path)
176
- script_file = File.join(html_path,
177
- 'files/js/genevalidator.compiled.min.js')
178
- content = File.read(script_file).gsub(',initTableSorter(),', ',')
179
- File.open("#{script_file}.tmp", 'w') { |f| f.puts content }
180
- FileUtils.mv("#{script_file}.tmp", script_file)
181
- end
182
-
183
- def self.print_summary_to_console(overall_evaluation, summary)
184
- # print to console
185
- eval = ''
186
- overall_evaluation.each { |e| eval << "#{e}\n" }
187
- $stderr.puts eval if summary
188
- $stderr.puts ''
189
- eval
190
- end
111
+ def generate_overview(json_data, min_blast_hits)
112
+ scores_from_json = json_data.map { |e| e[:overall_score] }
113
+ quartiles = scores_from_json.all_quartiles
114
+ nee = calculate_no_quries_with_no_evidence(json_data)
115
+ no_mafft = count_mafft_errors(json_data)
116
+ no_internet = count_internet_errors(json_data)
117
+ map_errors = map_errors(json_data)
118
+ run_time = calculate_run_time(json_data)
119
+ min_hits = json_data.count { |e| e[:no_hits] < min_blast_hits }
120
+ overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
121
+ map_errors, run_time, min_hits)
122
+ end
191
123
 
192
- # make the historgram with the resulted scores
193
- def self.create_overview_json(scores, plot_dir, less, evaluation)
194
- plot_file = File.join(plot_dir, 'overview.json')
195
- data = [scores.group_by { |a| a }.map { |k, vs| { 'key' => k, 'value' => vs.length, 'main' => false } }]
196
- hash = { data: data, type: :simplebars,
197
- title: 'Overall GeneValidator Score Evaluation',
198
- footer: '', xtitle: 'Validation Score',
199
- ytitle: 'Number of Queries', aux1: 10, aux2: '', less: less,
200
- evaluation: evaluation }
201
- File.open(plot_file, 'w') { |f| f.write hash.to_json }
202
- end
124
+ def overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
125
+ map_errors, run_time, insufficient_BLAST_hits)
126
+ {
127
+ scores: scores_from_json,
128
+ no_queries: scores_from_json.length,
129
+ good_scores: scores_from_json.count { |s| s >= 75 },
130
+ bad_scores: scores_from_json.count { |s| s < 75 },
131
+ nee: nee, no_mafft: no_mafft, no_internet: no_internet,
132
+ map_errors: map_errors, run_time: run_time,
133
+ first_quartile_of_scores: quartiles[0],
134
+ second_quartile_of_scores: quartiles[1],
135
+ third_quartile_of_scores: quartiles[2],
136
+ insufficient_BLAST_hits: insufficient_BLAST_hits
137
+ }
138
+ end
203
139
 
204
- ##
205
- # Calculates an overall evaluation of the output
206
- # Params:
207
- # +all_query_outputs+: Array of +ValidationTest+ objects
208
- # Output
209
- # Array of Strigs with the reports
210
- def self.overview(o)
211
- eval = general_overview(o)
212
- error_eval = errors_overview(o)
213
- time_eval = time_overview(o)
140
+ # calculate number of queries that had warnings for all validations.
141
+ def calculate_no_quries_with_no_evidence(json_data)
142
+ all_warnings = 0
143
+ json_data.each do |row|
144
+ status = row[:validations].map { |_, h| h[:status] }
145
+ if status.count { |r| r == 'warning' } == status.length
146
+ all_warnings += 1
147
+ end
148
+ end
149
+ all_warnings
150
+ end
214
151
 
215
- overall_evaluation = [eval, error_eval, time_eval]
216
- overall_evaluation.select { |e| e != '' }
217
- end
152
+ def count_mafft_errors(json_data)
153
+ json_data.count do |row|
154
+ num = row[:validations].count { |_, h| h[:print] == 'Mafft error' }
155
+ num.zero? ? false : true
156
+ end
157
+ end
218
158
 
219
- def self.general_overview(o)
220
- good_pred = (o[:good_scores] == 1) ? 'One' : "#{o[:good_scores]} are"
221
- bad_pred = (o[:bad_scores] == 1) ? 'One' : "#{o[:bad_scores]} are"
159
+ def count_internet_errors(json_data)
160
+ json_data.count do |row|
161
+ num = row[:validations].count { |_, h| h[:print] == 'Internet error' }
162
+ num.zero? ? false : true
163
+ end
164
+ end
222
165
 
223
- eval = "Overall Query Score Evaluation:\n" \
224
- "#{o[:no_queries]} predictions were validated, from which there" \
225
- " were:\n" \
226
- "#{good_pred} good prediction(s),\n" \
227
- "#{bad_pred} possibly weak prediction(s).\n"
166
+ def map_errors(json_data)
167
+ errors = Hash.new(0)
168
+ json_data.each do |row|
169
+ e = row[:validations].map { |s, h| s if h[:validation] == 'error' }
170
+ e.compact.each { |err| errors[err] += 1 }
171
+ end
172
+ errors
173
+ end
228
174
 
229
- if o[:nee] != 0 # nee = no evidence
230
- eval << "#{o[:nee]} could not be evaluated due to the lack of" \
231
- ' evidence.'
175
+ def calculate_run_time(json_data)
176
+ run_time = Hash.new(Pair1.new(0, 0))
177
+ json_data.map do |row|
178
+ row[:validations].each do |short_header, v|
179
+ next if v[:run_time].nil? || v[:run_time].zero?
180
+ next if v[:validation] == 'unapplicable' || v[:validation] == 'error'
181
+ p = Pair1.new(run_time[short_header.to_s].x + v[:run_time],
182
+ run_time[short_header.to_s].y + 1)
183
+ run_time[short_header.to_s] = p
184
+ end
185
+ end
186
+ run_time
232
187
  end
233
- eval
234
- end
235
188
 
236
- # errors per validation
237
- def self.errors_overview(o)
238
- error_eval = ''
239
- o[:map_errors].each do |k, v|
240
- error_eval << "\nWe couldn't run #{k} Validation for #{v} queries"
189
+ ##
190
+ # Calculates an overall evaluation of the output
191
+ # Params:
192
+ # +all_query_outputs+: Array of +ValidationTest+ objects
193
+ # Output
194
+ # Array of Strigs with the reports
195
+ def generate_evaluation_text(overview)
196
+ eval = general_overview(overview)
197
+ error_eval = errors_overview(overview)
198
+ time_eval = time_overview(overview)
199
+
200
+ [eval, error_eval, time_eval].reject(&:empty?)
241
201
  end
242
- if o[:no_mafft] >= (o[:no_queries] - o[:nee])
243
- error_eval << "\nWe couldn't run MAFFT multiple alignment"
202
+
203
+ private
204
+
205
+ def general_overview(o)
206
+ good_pred = o[:good_scores] == 1 ? 'One' : "#{o[:good_scores]} are"
207
+ bad_pred = o[:bad_scores] == 1 ? 'One' : "#{o[:bad_scores]} are"
208
+
209
+ plural = 'prediction was' if o[:insufficient_BLAST_hits] == 1
210
+ plural = 'predictions were' if o[:insufficient_BLAST_hits] >= 2
211
+ b = "#{o[:insufficient_BLAST_hits]} #{plural} not evaluated due to an" \
212
+ ' insufficient number of BLAST hits.'
213
+ blast_hits = o[:insufficient_BLAST_hits].zero? ? '' : b
214
+
215
+ ['Overall Query Score Evaluation:',
216
+ "#{o[:no_queries]} predictions were validated, from which there were:",
217
+ "#{good_pred} good prediction(s),",
218
+ "#{bad_pred} possibly weak prediction(s).", blast_hits,
219
+ "The median overall score was #{o[:second_quartile_of_scores]} with" \
220
+ " an upper quartile of #{o[:third_quartile_of_scores]}" \
221
+ " and a lower quartile of #{o[:first_quartile_of_scores]}."]
244
222
  end
245
- if o[:no_internet] >= (o[:no_queries] - o[:nee])
246
- error_eval << "\nWe couldn't make use of your internet connection"
223
+
224
+ # errors per validation
225
+ def errors_overview(o)
226
+ error_eval = o[:map_errors].map do |k, v|
227
+ "We couldn't run #{k} Validation for #{v} queries"
228
+ end
229
+ if o[:no_mafft] >= (o[:no_queries] - o[:nee])
230
+ error_eval << "We couldn't run MAFFT multiple alignment"
231
+ end
232
+ if o[:no_internet] >= (o[:no_queries] - o[:nee])
233
+ error_eval << "\nWe couldn't make use of your internet connection"
234
+ end
235
+ error_eval
247
236
  end
248
- error_eval
249
- end
250
237
 
251
- def self.time_overview(o)
252
- time_eval = ''
253
- o[:run_time].each do |key, value|
254
- average_time = value.x / (value.y).to_f
255
- time_eval << "\nAverage running time for #{key} Validation:" \
256
- " #{average_time.round(3)}s per validation"
238
+ def time_overview(o)
239
+ o[:run_time].map do |key, value|
240
+ mean_time = value.x / value.y.to_f
241
+ "Average running time for #{key} Validation: #{mean_time.round(3)}s" \
242
+ ' per validation'
243
+ end
257
244
  end
258
- time_eval
259
245
  end
260
246
  end
261
247
  end
@@ -0,0 +1,134 @@
1
+ require 'csv'
2
+ require 'slim'
3
+ require 'fileutils'
4
+ require 'forwardable'
5
+ require 'json'
6
+
7
+ require 'genevalidator/version'
8
+
9
+ module GeneValidator
10
+ # A Class for creating output files
11
+ class OutputFiles
12
+ extend Forwardable
13
+ def_delegators GeneValidator, :opt, :config, :dirs, :overview
14
+
15
+ def initialize()
16
+ @config = config
17
+ @opt = opt
18
+ @dirs = dirs
19
+ @overview = overview
20
+ @json_data = @config[:json_output]
21
+ end
22
+
23
+ def write_html(overall_eval)
24
+ return unless @opt[:output_formats].include? 'html'
25
+ @all_html_fnames = all_html_filenames
26
+ @json_data.each_slice(@config[:output_max]).with_index do |data, i|
27
+ @json_data_section = data
28
+ template_file = File.join(@dirs[:aux_dir], 'gv_results.slim')
29
+ template_contents = File.open(template_file, 'r').read
30
+ html_output = Slim::Template.new { template_contents }.render(self)
31
+ File.open(@all_html_fnames[i], 'w') { |f| f.write(html_output) }
32
+ end
33
+ create_overview_json_file(overall_eval)
34
+ end
35
+
36
+ def write_json
37
+ return unless @opt[:output_formats].include? 'json'
38
+ File.open(@dirs[:json_file], 'w') { |f| f.write(@json_data.to_json) }
39
+ end
40
+
41
+ def write_csv
42
+ return unless @opt[:output_formats].include? 'csv'
43
+ File.open(@dirs[:csv_file], 'a') do |file|
44
+ file.puts csv_header.join(',')
45
+ @json_data.each do |data|
46
+ short_def = data[:definition].split(' ')[0]
47
+ line = [data[:idx], data[:overall_score], short_def, data[:nr_hits]]
48
+ line += data[:validations].values.map { |e| e[:print] }
49
+ .each { |e| e.gsub!('&nbsp;', ' ') }
50
+ line.map { |e| e.gsub!(',', ' -') if e.is_a? String }
51
+ file.puts line.join(',')
52
+ end
53
+ end
54
+ end
55
+
56
+ def write_summary
57
+ return unless @opt[:output_formats].include? 'summary'
58
+ data = generate_summary_data
59
+ File.open(@dirs[:summary_file], 'w') do |f|
60
+ f.write data.map(&:to_csv).join
61
+ end
62
+ end
63
+
64
+ def print_best_fasta
65
+ return unless @opt[:select_single_best]
66
+ top_data = @json_data.max_by { |e| [e[:overall_score], e[:no_hits]] }
67
+ query = GeneValidator.extract_input_fasta_sequence(top_data[:idx])
68
+ File.open(@dirs[:fasta_file], 'w') { |f| f.write(query) }
69
+ puts query
70
+ end
71
+
72
+ private
73
+
74
+ def all_html_filenames
75
+ result_parts = (@json_data.length / @config[:output_max]).ceil
76
+ (0..result_parts).map do |idx|
77
+ multiple_files_needed = @json_data.length < @config[:output_max]
78
+ part = multiple_files_needed ? '' : "_#{idx + 1}"
79
+ fname = File.join(@dirs[:output_dir], "#{@dirs[:filename]}_results")
80
+ fname + part + '.html'
81
+ end
82
+ end
83
+
84
+ # By default, on page load, the results are automatically sorted by the
85
+ # index. However since the whole idea is that users would sort by JSON,
86
+ # this is not wanted here.
87
+ def turn_off_automated_sorting
88
+ js_file = File.join(@dirs[:output_dir], 'html_files/js/gv.compiled.min.js')
89
+ original_content = File.read(js_file)
90
+ # removes the automatic sort on page load
91
+ updated_content = original_content.gsub(',sortList:[[0,0]]', '')
92
+ File.open("#{script_file}.tmp", 'w') { |f| f.puts updated_content }
93
+ FileUtils.mv("#{script_file}.tmp", script_file)
94
+ end
95
+
96
+ def create_overview_json_file(overall_eval)
97
+ evaluation = overall_eval.flatten.join('<br>').gsub("'", %q(\\\'))
98
+ less = overall_eval[0].join('<br>')
99
+ hash = overview_html_hash(evaluation, less)
100
+ json = File.join(@dirs[:json_dir], 'overview.json')
101
+ File.open(json, 'w') { |f| f.write hash.to_json }
102
+ end
103
+
104
+ # make the historgram with the resulted scores
105
+ def overview_html_hash(evaluation, less)
106
+ data = [@overview[:scores].group_by { |a| a }.map do |k, vs|
107
+ { 'key': k, 'value': vs.length, 'main': false }
108
+ end]
109
+ { data: data, type: :simplebars, aux1: 10, aux2: '',
110
+ title: 'Overall GeneValidator Score Evaluation', footer: '',
111
+ xtitle: 'Validation Score', ytitle: 'Number of Queries',
112
+ less: less, evaluation: evaluation }
113
+ end
114
+
115
+ def csv_header
116
+ header = %w[AnalysisNumber GVScore Identifier NumberOfHits]
117
+ header += @json_data[0][:validations].keys
118
+ header
119
+ end
120
+
121
+ def generate_summary_data
122
+ [
123
+ ['num_predictions', @overview[:no_queries]],
124
+ ['num_good_predictions', @overview[:good_scores]],
125
+ ['num_bad_predictions', @overview[:bad_scores]],
126
+ ['num_predictions_with_insufficient_blast_hits',
127
+ @overview[:insufficient_BLAST_hits]],
128
+ ['first_quartile_of_scores', @overview[:first_quartile_of_scores]],
129
+ ['second_quartile_of_scores', @overview[:second_quartile_of_scores]],
130
+ ['third_quartile_of_scores', @overview[:third_quartile_of_scores]]
131
+ ]
132
+ end
133
+ end
134
+ end