genevalidator 1.6.12 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +30 -1
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +13 -12
  5. data/Gemfile +4 -1
  6. data/Gemfile.lock +135 -0
  7. data/README.md +104 -122
  8. data/Rakefile +377 -5
  9. data/aux/gv_results.slim +155 -0
  10. data/aux/html_files/css/gv.compiled.min.css +8 -0
  11. data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
  12. data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
  13. data/aux/{files → html_files}/css/src/style.css +0 -0
  14. data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
  15. data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
  16. data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
  17. data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
  18. data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
  19. data/aux/{files → html_files}/img/gene.png +0 -0
  20. data/aux/html_files/js/gv.compiled.min.js +1 -0
  21. data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
  22. data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
  23. data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
  24. data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
  25. data/aux/{files → html_files}/js/src/plots.js +1 -1
  26. data/aux/{files → html_files}/js/src/script.js +0 -0
  27. data/aux/{files → html_files}/json/.gitkeep +0 -0
  28. data/bin/genevalidator +393 -56
  29. data/exemplar_data/README.md +60 -0
  30. data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
  31. data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
  32. data/genevalidator.gemspec +35 -20
  33. data/install.sh +92 -0
  34. data/lib/genevalidator.rb +171 -56
  35. data/lib/genevalidator/arg_validation.rb +26 -55
  36. data/lib/genevalidator/blast.rb +44 -99
  37. data/lib/genevalidator/clusterization.rb +18 -22
  38. data/lib/genevalidator/exceptions.rb +17 -17
  39. data/lib/genevalidator/ext/array.rb +21 -4
  40. data/lib/genevalidator/get_raw_sequences.rb +32 -31
  41. data/lib/genevalidator/hsp.rb +31 -2
  42. data/lib/genevalidator/json_to_gv_results.rb +38 -122
  43. data/lib/genevalidator/output.rb +158 -172
  44. data/lib/genevalidator/output_files.rb +134 -0
  45. data/lib/genevalidator/pool.rb +2 -5
  46. data/lib/genevalidator/query.rb +1 -1
  47. data/lib/genevalidator/tabular_parser.rb +8 -29
  48. data/lib/genevalidator/validation.rb +48 -90
  49. data/lib/genevalidator/validation_alignment.rb +64 -75
  50. data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
  51. data/lib/genevalidator/validation_duplication.rb +85 -84
  52. data/lib/genevalidator/validation_gene_merge.rb +46 -35
  53. data/lib/genevalidator/validation_length_cluster.rb +18 -15
  54. data/lib/genevalidator/validation_length_rank.rb +19 -15
  55. data/lib/genevalidator/validation_maker_qi.rb +13 -12
  56. data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
  57. data/lib/genevalidator/validation_report.rb +1 -1
  58. data/lib/genevalidator/validation_test.rb +1 -1
  59. data/lib/genevalidator/version.rb +1 -1
  60. data/test/overall.rb +1 -1
  61. data/test/test_all_validations.rb +36 -24
  62. data/test/test_blast.rb +39 -24
  63. data/test/test_clusterization_2d.rb +4 -4
  64. data/test/test_helper.rb +2 -2
  65. data/test/test_query.rb +16 -20
  66. data/test/test_validation_open_reading_frame.rb +122 -122
  67. data/test/test_validations.rb +12 -10
  68. metadata +94 -79
  69. data/aux/files/css/genevalidator.compiled.min.css +0 -16
  70. data/aux/files/js/genevalidator.compiled.min.js +0 -28
  71. data/aux/json_footer.erb +0 -8
  72. data/aux/json_header.erb +0 -19
  73. data/aux/json_query.erb +0 -15
  74. data/aux/template_footer.erb +0 -8
  75. data/aux/template_header.erb +0 -19
  76. data/aux/template_query.erb +0 -14
  77. data/data/README.md +0 -57
  78. data/data/mrna_data.fasta.blast_tabular +0 -3567
  79. data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
  80. data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
  81. data/data/mrna_data.fasta.blast_xml +0 -39800
  82. data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
  83. data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
  84. data/data/mrna_data.fasta.json +0 -1
  85. data/data/protein_data.fasta.blast_tabular +0 -3278
  86. data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
  87. data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
  88. data/data/protein_data.fasta.blast_xml +0 -26228
  89. data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
  90. data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
  91. data/data/protein_data.fasta.json +0 -1
@@ -1,15 +1,10 @@
1
- require 'erb'
2
- require 'fileutils'
3
1
  require 'forwardable'
4
2
  require 'json'
5
3
 
6
- require 'genevalidator/version'
7
-
8
4
  module GeneValidator
9
5
  class Output
10
6
  extend Forwardable
11
- def_delegators GeneValidator, :opt, :config, :mutex, :mutex_html,
12
- :mutex_json
7
+ def_delegators GeneValidator, :opt, :config, :dirs, :mutex
13
8
  attr_accessor :prediction_def
14
9
  attr_accessor :nr_hits
15
10
 
@@ -27,9 +22,12 @@ module GeneValidator
27
22
  # Params:
28
23
  # +current_idx+: index of the current query
29
24
  def initialize(current_idx, no_of_hits, definition)
30
- @opt = opt
31
- @config = config
25
+ @opt = opt
26
+ @dirs = dirs
27
+ @config = config
32
28
  @config[:run_no] += 1
29
+ output_dir = @dirs[:output_dir]
30
+ @output_filename = File.join(output_dir, "#{@dirs[:filename]}_results")
33
31
 
34
32
  @prediction_def = definition
35
33
  @nr_hits = no_of_hits
@@ -37,62 +35,38 @@ module GeneValidator
37
35
  end
38
36
 
39
37
  def print_output_console
38
+ return unless @opt[:output_formats].include? 'stdout'
39
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
40
40
  mutex.synchronize do
41
- print_console_header unless @config[:console_header_printed]
42
- short_def = @prediction_def.scan(/([^ ]+)/)[0][0]
43
- print format("%3s\t%5s\t%20s\t%7s\t", @idx, @overall_score, short_def,
44
- @nr_hits)
41
+ print_console_header(c_fmt)
42
+ short_def = @prediction_def.split(' ')[0]
43
+ print format(c_fmt, @idx, @overall_score, short_def, @nr_hits)
45
44
  puts validations.map(&:print).join("\t").gsub(' ', ' ')
46
45
  end
47
46
  end
48
47
 
49
- def print_console_header
50
- @config[:console_header_printed] = true
51
- print format("%3s\t%5s\t%20s\t%7s\t", 'No', 'Score', 'Identifier',
52
- 'No_Hits')
53
- puts validations.map(&:short_header).join("\t")
54
- end
55
-
56
- def generate_html
57
- mutex_html.synchronize do
58
- output_html = output_filename
59
- query_erb = File.join(@config[:aux], 'template_query.erb')
60
- template_file = File.open(query_erb, 'r').read
61
- erb = ERB.new(template_file, 0, '>')
62
- File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
63
- end
64
- end
65
-
66
- def output_filename
67
- idx = (@config[:run_no].to_f / @config[:output_max]).ceil
68
- output_html = File.join(@config[:html_path], "results#{idx}.html")
69
- write_html_header(output_html)
70
- output_html
71
- end
72
-
73
- def write_html_header(output_html)
74
- head_erb = File.join(@config[:aux], 'template_header.erb')
75
- set_up_html(head_erb, output_html) unless File.exist?(output_html)
48
+ def generate_json
49
+ fname = File.join(@dirs[:json_dir], "#{@dirs[:filename]}_#{@idx}.json")
50
+ row_data = { idx: @idx, overall_score: @overall_score,
51
+ definition: @prediction_def, no_hits: @nr_hits }
52
+ row = create_validation_hash(row_data)
53
+ arr_idx = @idx - 1
54
+ @config[:json_output][arr_idx] = row
55
+ File.open(fname, 'w') { |f| f.write(row.to_json) }
76
56
  end
77
57
 
78
- def set_up_html(erb_file, output_file)
79
- return if File.exist?(output_file)
80
- template_contents = File.open(erb_file, 'r').read
81
- erb = ERB.new(template_contents, 0, '>')
82
- File.open(output_file, 'w+') { |f| f.write(erb.result(binding)) }
83
- end
58
+ private
84
59
 
85
- def generate_json
86
- mutex_json.synchronize do
87
- row = { idx: @idx, overall_score: @overall_score,
88
- definition: @prediction_def, no_hits: @nr_hits }
89
- row = create_validation_hashes(row)
90
- write_row_json(row)
91
- @config[:json_output] << row
92
- end
60
+ def print_console_header(c_fmt)
61
+ return if @config[:console_header_printed]
62
+ @config[:console_header_printed] = true
63
+ warn '==> Validating input sequences'
64
+ warn '' # blank line
65
+ print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
66
+ puts validations.map(&:short_header).join("\t")
93
67
  end
94
68
 
95
- def create_validation_hashes(row)
69
+ def create_validation_hash(row)
96
70
  row[:validations] = {}
97
71
  @validations.each do |item|
98
72
  val = add_basic_validation_info(item)
@@ -106,7 +80,8 @@ module GeneValidator
106
80
 
107
81
  def add_basic_validation_info(item)
108
82
  { header: item.header, description: item.description, status: item.color,
109
- print: item.print.gsub('&nbsp;', ' ') }
83
+ print: item.print.gsub('&nbsp;', ' '), run_time: item.run_time,
84
+ validation: item.validation }
110
85
  end
111
86
 
112
87
  def add_explanation_data(item)
@@ -124,138 +99,149 @@ module GeneValidator
124
99
  graphs
125
100
  end
126
101
 
127
- def write_row_json(row)
128
- row_json = File.join(@config[:plot_dir],
129
- "#{@config[:filename]}_#{@idx}.json")
130
- File.open(row_json, 'w') { |f| f.write(row.to_json) }
131
- end
132
-
133
- def self.write_json_file(array, json_file)
134
- File.open(json_file, 'w') { |f| f.write(array.to_json) }
135
- end
136
-
137
- ##
138
- # Method that closes the gas in the html file and writes the overall
139
- # evaluation
140
- # Param:
141
- # +all_query_outputs+: array with +ValidationTest+ objects
142
- # +html_path+: path of the html folder
143
- # +filemane+: name of the fasta input file
144
- def self.print_footer(overview, config)
145
- set_overall_evaluation(overview, config)
146
-
147
- footer_erb = File.join(config[:aux], 'template_footer.erb')
148
-
149
- no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil
150
- template_file = File.open(footer_erb, 'r').read
151
- erb = ERB.new(template_file, 0, '>')
152
-
153
- output_files = []
154
- (1..no_of_results_files).each { |i| output_files << "results#{i}.html" }
155
-
156
- (1..no_of_results_files).each do |i|
157
- results_html = File.join(config[:html_path], "results#{i}.html")
158
- File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
102
+ class <<self
103
+ def print_console_footer(overall_evaluation, opt)
104
+ return unless (opt[:output_formats].include? 'stdout') ||
105
+ opt[:hide_summary]
106
+ warn ''
107
+ warn "==> #{overall_evaluation.join("\n")}"
108
+ warn ''
159
109
  end
160
110
 
161
- turn_off_sorting(config[:html_path]) if no_of_results_files > 1
162
- end
163
-
164
- def self.set_overall_evaluation(overview, config)
165
- overall_evaluation = overview(overview)
166
- less = overall_evaluation[0].gsub("\n", '<br>').gsub("'", %q(\\\'))
167
-
168
- eval = print_summary_to_console(overall_evaluation, config[:summary])
169
- evaluation = eval.gsub("\n", '<br>').gsub("'", %q(\\\'))
170
-
171
- create_overview_json(overview[:scores], config[:plot_dir], less,
172
- evaluation)
173
- end
174
-
175
- def self.turn_off_sorting(html_path)
176
- script_file = File.join(html_path,
177
- 'files/js/genevalidator.compiled.min.js')
178
- content = File.read(script_file).gsub(',initTableSorter(),', ',')
179
- File.open("#{script_file}.tmp", 'w') { |f| f.puts content }
180
- FileUtils.mv("#{script_file}.tmp", script_file)
181
- end
182
-
183
- def self.print_summary_to_console(overall_evaluation, summary)
184
- # print to console
185
- eval = ''
186
- overall_evaluation.each { |e| eval << "#{e}\n" }
187
- $stderr.puts eval if summary
188
- $stderr.puts ''
189
- eval
190
- end
111
+ def generate_overview(json_data, min_blast_hits)
112
+ scores_from_json = json_data.map { |e| e[:overall_score] }
113
+ quartiles = scores_from_json.all_quartiles
114
+ nee = calculate_no_quries_with_no_evidence(json_data)
115
+ no_mafft = count_mafft_errors(json_data)
116
+ no_internet = count_internet_errors(json_data)
117
+ map_errors = map_errors(json_data)
118
+ run_time = calculate_run_time(json_data)
119
+ min_hits = json_data.count { |e| e[:no_hits] < min_blast_hits }
120
+ overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
121
+ map_errors, run_time, min_hits)
122
+ end
191
123
 
192
- # make the historgram with the resulted scores
193
- def self.create_overview_json(scores, plot_dir, less, evaluation)
194
- plot_file = File.join(plot_dir, 'overview.json')
195
- data = [scores.group_by { |a| a }.map { |k, vs| { 'key' => k, 'value' => vs.length, 'main' => false } }]
196
- hash = { data: data, type: :simplebars,
197
- title: 'Overall GeneValidator Score Evaluation',
198
- footer: '', xtitle: 'Validation Score',
199
- ytitle: 'Number of Queries', aux1: 10, aux2: '', less: less,
200
- evaluation: evaluation }
201
- File.open(plot_file, 'w') { |f| f.write hash.to_json }
202
- end
124
+ def overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
125
+ map_errors, run_time, insufficient_BLAST_hits)
126
+ {
127
+ scores: scores_from_json,
128
+ no_queries: scores_from_json.length,
129
+ good_scores: scores_from_json.count { |s| s >= 75 },
130
+ bad_scores: scores_from_json.count { |s| s < 75 },
131
+ nee: nee, no_mafft: no_mafft, no_internet: no_internet,
132
+ map_errors: map_errors, run_time: run_time,
133
+ first_quartile_of_scores: quartiles[0],
134
+ second_quartile_of_scores: quartiles[1],
135
+ third_quartile_of_scores: quartiles[2],
136
+ insufficient_BLAST_hits: insufficient_BLAST_hits
137
+ }
138
+ end
203
139
 
204
- ##
205
- # Calculates an overall evaluation of the output
206
- # Params:
207
- # +all_query_outputs+: Array of +ValidationTest+ objects
208
- # Output
209
- # Array of Strigs with the reports
210
- def self.overview(o)
211
- eval = general_overview(o)
212
- error_eval = errors_overview(o)
213
- time_eval = time_overview(o)
140
+ # calculate number of queries that had warnings for all validations.
141
+ def calculate_no_quries_with_no_evidence(json_data)
142
+ all_warnings = 0
143
+ json_data.each do |row|
144
+ status = row[:validations].map { |_, h| h[:status] }
145
+ if status.count { |r| r == 'warning' } == status.length
146
+ all_warnings += 1
147
+ end
148
+ end
149
+ all_warnings
150
+ end
214
151
 
215
- overall_evaluation = [eval, error_eval, time_eval]
216
- overall_evaluation.select { |e| e != '' }
217
- end
152
+ def count_mafft_errors(json_data)
153
+ json_data.count do |row|
154
+ num = row[:validations].count { |_, h| h[:print] == 'Mafft error' }
155
+ num.zero? ? false : true
156
+ end
157
+ end
218
158
 
219
- def self.general_overview(o)
220
- good_pred = (o[:good_scores] == 1) ? 'One' : "#{o[:good_scores]} are"
221
- bad_pred = (o[:bad_scores] == 1) ? 'One' : "#{o[:bad_scores]} are"
159
+ def count_internet_errors(json_data)
160
+ json_data.count do |row|
161
+ num = row[:validations].count { |_, h| h[:print] == 'Internet error' }
162
+ num.zero? ? false : true
163
+ end
164
+ end
222
165
 
223
- eval = "Overall Query Score Evaluation:\n" \
224
- "#{o[:no_queries]} predictions were validated, from which there" \
225
- " were:\n" \
226
- "#{good_pred} good prediction(s),\n" \
227
- "#{bad_pred} possibly weak prediction(s).\n"
166
+ def map_errors(json_data)
167
+ errors = Hash.new(0)
168
+ json_data.each do |row|
169
+ e = row[:validations].map { |s, h| s if h[:validation] == 'error' }
170
+ e.compact.each { |err| errors[err] += 1 }
171
+ end
172
+ errors
173
+ end
228
174
 
229
- if o[:nee] != 0 # nee = no evidence
230
- eval << "#{o[:nee]} could not be evaluated due to the lack of" \
231
- ' evidence.'
175
+ def calculate_run_time(json_data)
176
+ run_time = Hash.new(Pair1.new(0, 0))
177
+ json_data.map do |row|
178
+ row[:validations].each do |short_header, v|
179
+ next if v[:run_time].nil? || v[:run_time].zero?
180
+ next if v[:validation] == 'unapplicable' || v[:validation] == 'error'
181
+ p = Pair1.new(run_time[short_header.to_s].x + v[:run_time],
182
+ run_time[short_header.to_s].y + 1)
183
+ run_time[short_header.to_s] = p
184
+ end
185
+ end
186
+ run_time
232
187
  end
233
- eval
234
- end
235
188
 
236
- # errors per validation
237
- def self.errors_overview(o)
238
- error_eval = ''
239
- o[:map_errors].each do |k, v|
240
- error_eval << "\nWe couldn't run #{k} Validation for #{v} queries"
189
+ ##
190
+ # Calculates an overall evaluation of the output
191
+ # Params:
192
+ # +all_query_outputs+: Array of +ValidationTest+ objects
193
+ # Output
194
+ # Array of Strigs with the reports
195
+ def generate_evaluation_text(overview)
196
+ eval = general_overview(overview)
197
+ error_eval = errors_overview(overview)
198
+ time_eval = time_overview(overview)
199
+
200
+ [eval, error_eval, time_eval].reject(&:empty?)
241
201
  end
242
- if o[:no_mafft] >= (o[:no_queries] - o[:nee])
243
- error_eval << "\nWe couldn't run MAFFT multiple alignment"
202
+
203
+ private
204
+
205
+ def general_overview(o)
206
+ good_pred = o[:good_scores] == 1 ? 'One' : "#{o[:good_scores]} are"
207
+ bad_pred = o[:bad_scores] == 1 ? 'One' : "#{o[:bad_scores]} are"
208
+
209
+ plural = 'prediction was' if o[:insufficient_BLAST_hits] == 1
210
+ plural = 'predictions were' if o[:insufficient_BLAST_hits] >= 2
211
+ b = "#{o[:insufficient_BLAST_hits]} #{plural} not evaluated due to an" \
212
+ ' insufficient number of BLAST hits.'
213
+ blast_hits = o[:insufficient_BLAST_hits].zero? ? '' : b
214
+
215
+ ['Overall Query Score Evaluation:',
216
+ "#{o[:no_queries]} predictions were validated, from which there were:",
217
+ "#{good_pred} good prediction(s),",
218
+ "#{bad_pred} possibly weak prediction(s).", blast_hits,
219
+ "The median overall score was #{o[:second_quartile_of_scores]} with" \
220
+ " an upper quartile of #{o[:third_quartile_of_scores]}" \
221
+ " and a lower quartile of #{o[:first_quartile_of_scores]}."]
244
222
  end
245
- if o[:no_internet] >= (o[:no_queries] - o[:nee])
246
- error_eval << "\nWe couldn't make use of your internet connection"
223
+
224
+ # errors per validation
225
+ def errors_overview(o)
226
+ error_eval = o[:map_errors].map do |k, v|
227
+ "We couldn't run #{k} Validation for #{v} queries"
228
+ end
229
+ if o[:no_mafft] >= (o[:no_queries] - o[:nee])
230
+ error_eval << "We couldn't run MAFFT multiple alignment"
231
+ end
232
+ if o[:no_internet] >= (o[:no_queries] - o[:nee])
233
+ error_eval << "\nWe couldn't make use of your internet connection"
234
+ end
235
+ error_eval
247
236
  end
248
- error_eval
249
- end
250
237
 
251
- def self.time_overview(o)
252
- time_eval = ''
253
- o[:run_time].each do |key, value|
254
- average_time = value.x / (value.y).to_f
255
- time_eval << "\nAverage running time for #{key} Validation:" \
256
- " #{average_time.round(3)}s per validation"
238
+ def time_overview(o)
239
+ o[:run_time].map do |key, value|
240
+ mean_time = value.x / value.y.to_f
241
+ "Average running time for #{key} Validation: #{mean_time.round(3)}s" \
242
+ ' per validation'
243
+ end
257
244
  end
258
- time_eval
259
245
  end
260
246
  end
261
247
  end
@@ -0,0 +1,134 @@
1
+ require 'csv'
2
+ require 'slim'
3
+ require 'fileutils'
4
+ require 'forwardable'
5
+ require 'json'
6
+
7
+ require 'genevalidator/version'
8
+
9
+ module GeneValidator
10
+ # A Class for creating output files
11
+ class OutputFiles
12
+ extend Forwardable
13
+ def_delegators GeneValidator, :opt, :config, :dirs, :overview
14
+
15
+ def initialize()
16
+ @config = config
17
+ @opt = opt
18
+ @dirs = dirs
19
+ @overview = overview
20
+ @json_data = @config[:json_output]
21
+ end
22
+
23
+ def write_html(overall_eval)
24
+ return unless @opt[:output_formats].include? 'html'
25
+ @all_html_fnames = all_html_filenames
26
+ @json_data.each_slice(@config[:output_max]).with_index do |data, i|
27
+ @json_data_section = data
28
+ template_file = File.join(@dirs[:aux_dir], 'gv_results.slim')
29
+ template_contents = File.open(template_file, 'r').read
30
+ html_output = Slim::Template.new { template_contents }.render(self)
31
+ File.open(@all_html_fnames[i], 'w') { |f| f.write(html_output) }
32
+ end
33
+ create_overview_json_file(overall_eval)
34
+ end
35
+
36
+ def write_json
37
+ return unless @opt[:output_formats].include? 'json'
38
+ File.open(@dirs[:json_file], 'w') { |f| f.write(@json_data.to_json) }
39
+ end
40
+
41
+ def write_csv
42
+ return unless @opt[:output_formats].include? 'csv'
43
+ File.open(@dirs[:csv_file], 'a') do |file|
44
+ file.puts csv_header.join(',')
45
+ @json_data.each do |data|
46
+ short_def = data[:definition].split(' ')[0]
47
+ line = [data[:idx], data[:overall_score], short_def, data[:nr_hits]]
48
+ line += data[:validations].values.map { |e| e[:print] }
49
+ .each { |e| e.gsub!('&nbsp;', ' ') }
50
+ line.map { |e| e.gsub!(',', ' -') if e.is_a? String }
51
+ file.puts line.join(',')
52
+ end
53
+ end
54
+ end
55
+
56
+ def write_summary
57
+ return unless @opt[:output_formats].include? 'summary'
58
+ data = generate_summary_data
59
+ File.open(@dirs[:summary_file], 'w') do |f|
60
+ f.write data.map(&:to_csv).join
61
+ end
62
+ end
63
+
64
+ def print_best_fasta
65
+ return unless @opt[:select_single_best]
66
+ top_data = @json_data.max_by { |e| [e[:overall_score], e[:no_hits]] }
67
+ query = GeneValidator.extract_input_fasta_sequence(top_data[:idx])
68
+ File.open(@dirs[:fasta_file], 'w') { |f| f.write(query) }
69
+ puts query
70
+ end
71
+
72
+ private
73
+
74
+ def all_html_filenames
75
+ result_parts = (@json_data.length / @config[:output_max]).ceil
76
+ (0..result_parts).map do |idx|
77
+ multiple_files_needed = @json_data.length < @config[:output_max]
78
+ part = multiple_files_needed ? '' : "_#{idx + 1}"
79
+ fname = File.join(@dirs[:output_dir], "#{@dirs[:filename]}_results")
80
+ fname + part + '.html'
81
+ end
82
+ end
83
+
84
+ # By default, on page load, the results are automatically sorted by the
85
+ # index. However since the whole idea is that users would sort by JSON,
86
+ # this is not wanted here.
87
+ def turn_off_automated_sorting
88
+ js_file = File.join(@dirs[:output_dir], 'html_files/js/gv.compiled.min.js')
89
+ original_content = File.read(js_file)
90
+ # removes the automatic sort on page load
91
+ updated_content = original_content.gsub(',sortList:[[0,0]]', '')
92
+ File.open("#{script_file}.tmp", 'w') { |f| f.puts updated_content }
93
+ FileUtils.mv("#{script_file}.tmp", script_file)
94
+ end
95
+
96
+ def create_overview_json_file(overall_eval)
97
+ evaluation = overall_eval.flatten.join('<br>').gsub("'", %q(\\\'))
98
+ less = overall_eval[0].join('<br>')
99
+ hash = overview_html_hash(evaluation, less)
100
+ json = File.join(@dirs[:json_dir], 'overview.json')
101
+ File.open(json, 'w') { |f| f.write hash.to_json }
102
+ end
103
+
104
+ # make the historgram with the resulted scores
105
+ def overview_html_hash(evaluation, less)
106
+ data = [@overview[:scores].group_by { |a| a }.map do |k, vs|
107
+ { 'key': k, 'value': vs.length, 'main': false }
108
+ end]
109
+ { data: data, type: :simplebars, aux1: 10, aux2: '',
110
+ title: 'Overall GeneValidator Score Evaluation', footer: '',
111
+ xtitle: 'Validation Score', ytitle: 'Number of Queries',
112
+ less: less, evaluation: evaluation }
113
+ end
114
+
115
+ def csv_header
116
+ header = %w[AnalysisNumber GVScore Identifier NumberOfHits]
117
+ header += @json_data[0][:validations].keys
118
+ header
119
+ end
120
+
121
+ def generate_summary_data
122
+ [
123
+ ['num_predictions', @overview[:no_queries]],
124
+ ['num_good_predictions', @overview[:good_scores]],
125
+ ['num_bad_predictions', @overview[:bad_scores]],
126
+ ['num_predictions_with_insufficient_blast_hits',
127
+ @overview[:insufficient_BLAST_hits]],
128
+ ['first_quartile_of_scores', @overview[:first_quartile_of_scores]],
129
+ ['second_quartile_of_scores', @overview[:second_quartile_of_scores]],
130
+ ['third_quartile_of_scores', @overview[:third_quartile_of_scores]]
131
+ ]
132
+ end
133
+ end
134
+ end