genevalidator 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +147 -76
- data/Rakefile +1 -1
- data/aux/files/css/genevalidator.compiled.min.css +16 -0
- data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
- data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
- data/aux/files/css/{style.css → src/style.css} +0 -0
- data/aux/files/js/genevalidator.compiled.min.js +28 -0
- data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
- data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
- data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
- data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
- data/aux/files/js/src/plots.js +814 -0
- data/aux/files/js/src/script.js +43 -0
- data/aux/json_header.erb +6 -6
- data/aux/json_query.erb +2 -1
- data/aux/template_footer.erb +0 -11
- data/aux/template_header.erb +4 -4
- data/aux/template_query.erb +1 -1
- data/bin/genevalidator +8 -6
- data/genevalidator.gemspec +1 -1
- data/lib/genevalidator.rb +7 -5
- data/lib/genevalidator/arg_validation.rb +12 -9
- data/lib/genevalidator/blast.rb +18 -11
- data/lib/genevalidator/clusterization.rb +35 -31
- data/lib/genevalidator/exceptions.rb +0 -1
- data/lib/genevalidator/get_raw_sequences.rb +115 -69
- data/lib/genevalidator/hsp.rb +8 -8
- data/lib/genevalidator/json_to_gv_results.rb +4 -4
- data/lib/genevalidator/output.rb +40 -41
- data/lib/genevalidator/pool.rb +5 -4
- data/lib/genevalidator/query.rb +37 -0
- data/lib/genevalidator/tabular_parser.rb +3 -4
- data/lib/genevalidator/validation.rb +16 -11
- data/lib/genevalidator/validation_alignment.rb +17 -23
- data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
- data/lib/genevalidator/validation_duplication.rb +8 -18
- data/lib/genevalidator/validation_gene_merge.rb +11 -9
- data/lib/genevalidator/validation_length_cluster.rb +8 -11
- data/lib/genevalidator/validation_length_rank.rb +5 -4
- data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +2 -1
- data/test/test_blast.rb +4 -3
- data/test/test_extended_array_methods.rb +2 -1
- data/test/{test_sequences.rb → test_query.rb} +5 -23
- data/test/test_validation_open_reading_frame.rb +7 -7
- data/test/test_validations.rb +8 -6
- metadata +16 -16
- data/aux/app_template_footer.erb +0 -1
- data/aux/app_template_header.erb +0 -12
- data/aux/files/js/plots.js +0 -828
- data/aux/files/js/script.js +0 -71
- data/lib/genevalidator/sequences.rb +0 -101
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
require 'bio-blastxmlparser'
|
|
2
2
|
require 'forwardable'
|
|
3
3
|
require 'net/http'
|
|
4
|
+
require 'tempfile'
|
|
4
5
|
require 'uri'
|
|
5
6
|
require 'yaml'
|
|
6
7
|
|
|
7
8
|
require 'genevalidator/exceptions'
|
|
8
|
-
require 'genevalidator/
|
|
9
|
+
require 'genevalidator/query'
|
|
9
10
|
|
|
10
11
|
module GeneValidator
|
|
11
12
|
# Gets the raw sequences for each hit in a BLAST output file
|
|
@@ -14,34 +15,32 @@ module GeneValidator
|
|
|
14
15
|
extend Forwardable
|
|
15
16
|
def_delegators GeneValidator, :opt, :config
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
# Obtains raw_sequences from BLAST output file...
|
|
19
|
-
def run
|
|
20
|
-
@opt = opt
|
|
21
|
-
@config = config
|
|
22
|
-
|
|
18
|
+
def init
|
|
23
19
|
$stderr.puts 'Extracting sequences within the BLAST output file from' \
|
|
24
20
|
' the BLAST database'
|
|
25
21
|
|
|
26
|
-
if
|
|
27
|
-
|
|
28
|
-
else
|
|
29
|
-
@blast_file = @opt[:blast_tabular_file]
|
|
30
|
-
end
|
|
22
|
+
@blast_file = opt[:blast_xml_file] if opt[:blast_xml_file]
|
|
23
|
+
@blast_file = opt[:blast_tabular_file] if opt[:blast_tabular_file]
|
|
31
24
|
|
|
32
|
-
|
|
33
|
-
index_file
|
|
25
|
+
opt[:raw_sequences] = @blast_file + '.raw_seq'
|
|
26
|
+
@index_file = @blast_file + '.index'
|
|
27
|
+
end
|
|
34
28
|
|
|
29
|
+
##
|
|
30
|
+
# Obtains raw_sequences from BLAST output file...
|
|
31
|
+
def run
|
|
32
|
+
init
|
|
35
33
|
if opt[:db] =~ /remote/
|
|
36
|
-
write_a_raw_seq_file(
|
|
34
|
+
write_a_raw_seq_file(opt[:raw_sequences], 'remote')
|
|
37
35
|
else
|
|
38
|
-
write_an_index_file(index_file, 'local')
|
|
39
|
-
|
|
36
|
+
write_an_index_file(@index_file, 'local')
|
|
37
|
+
FetchRawSequences.extract_from_local_db(true, nil, @index_file)
|
|
40
38
|
end
|
|
41
|
-
index_raw_seq_file(
|
|
39
|
+
index_raw_seq_file(opt[:raw_sequences])
|
|
42
40
|
end
|
|
43
41
|
|
|
44
42
|
##
|
|
43
|
+
#
|
|
45
44
|
# Index the raw sequences file...
|
|
46
45
|
def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
|
|
47
46
|
# leave only the identifiers in the fasta description
|
|
@@ -74,8 +73,15 @@ module GeneValidator
|
|
|
74
73
|
|
|
75
74
|
def write_an_index_file(output_file, db_type)
|
|
76
75
|
file = File.open(output_file, 'w+')
|
|
77
|
-
iterate_xml(file, db_type) if
|
|
78
|
-
iterate_tabular(file, db_type) if
|
|
76
|
+
iterate_xml(file, db_type) if opt[:blast_xml_file]
|
|
77
|
+
iterate_tabular(file, db_type) if opt[:blast_tabular_file]
|
|
78
|
+
rescue
|
|
79
|
+
$stderr.puts '*** Error: There was an error in analysing the BLAST'
|
|
80
|
+
$stderr.puts ' output file. Please ensure that BLAST output file'
|
|
81
|
+
$stderr.puts ' is in the correct format and then try again. If you'
|
|
82
|
+
$stderr.puts ' are using a remote database, please ensure that you'
|
|
83
|
+
$stderr.puts ' have internet access.'
|
|
84
|
+
exit 1
|
|
79
85
|
ensure
|
|
80
86
|
file.close unless file.nil?
|
|
81
87
|
end
|
|
@@ -83,85 +89,125 @@ module GeneValidator
|
|
|
83
89
|
alias_method :write_a_raw_seq_file, :write_an_index_file
|
|
84
90
|
|
|
85
91
|
def iterate_xml(file, db_type)
|
|
86
|
-
n = Bio::BlastXMLParser::XmlIterator.new(
|
|
92
|
+
n = Bio::BlastXMLParser::XmlIterator.new(opt[:blast_xml_file]).to_enum
|
|
87
93
|
n.each do |iter|
|
|
88
94
|
iter.each do |hit|
|
|
89
95
|
if db_type == 'remote' || hit.hit_id.nil?
|
|
90
|
-
file.puts
|
|
96
|
+
file.puts FetchRawSequences.extract_from_remote_db(hit.accession)
|
|
91
97
|
else
|
|
92
|
-
file.puts hit.
|
|
98
|
+
file.puts hit.accession
|
|
93
99
|
end
|
|
94
100
|
end
|
|
95
101
|
end
|
|
96
|
-
rescue
|
|
97
|
-
$stderr.puts '*** Error: There was an error in analysing the BLAST XML file.'
|
|
98
|
-
$stderr.puts ' Please ensure that BLAST XML file is in the correct format'
|
|
99
|
-
$stderr.puts ' and then try again. If you are using a remote database,'
|
|
100
|
-
$stderr.puts ' please ensure that you have internet access.'
|
|
101
|
-
exit 1
|
|
102
102
|
end
|
|
103
103
|
|
|
104
104
|
def iterate_tabular(file, db_type)
|
|
105
|
-
table_headers =
|
|
106
|
-
tab_file = File.read(
|
|
105
|
+
table_headers = opt[:blast_tabular_options].split(/[ ,]/)
|
|
106
|
+
tab_file = File.read(opt[:blast_tabular_file])
|
|
107
107
|
rows = CSV.parse(tab_file, col_sep: "\t",
|
|
108
108
|
skip_lines: /^#/,
|
|
109
109
|
headers: table_headers)
|
|
110
|
-
assert_table_has_correct_no_of_collumns(rows, table_headers)
|
|
111
110
|
|
|
112
111
|
rows.each do |row|
|
|
113
112
|
if db_type == 'remote' || row['sseqid'].nil?
|
|
114
|
-
file.puts
|
|
113
|
+
file.puts FetchRawSequences.extract_from_remote_db(row['sacc'])
|
|
115
114
|
else
|
|
116
115
|
file.puts row['sseqid']
|
|
117
116
|
end
|
|
118
117
|
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
class FetchRawSequences
|
|
123
|
+
class << self
|
|
124
|
+
extend Forwardable
|
|
125
|
+
def_delegators GeneValidator, :opt, :config
|
|
126
|
+
|
|
127
|
+
def run(identifier, accession)
|
|
128
|
+
# first try to extract from previously created raw_sequences HASH
|
|
129
|
+
raw_seq = extract_from_index(identifier) if opt[:raw_sequences]
|
|
130
|
+
# then try to just extract that sequence based on accession.
|
|
131
|
+
if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
|
|
132
|
+
raw_seq = extract_from_local_db(false, accession)
|
|
133
|
+
end
|
|
134
|
+
# then try to extract from remote database
|
|
135
|
+
if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
|
|
136
|
+
raw_seq = extract_from_remote_db(accession)
|
|
137
|
+
end
|
|
138
|
+
# return nil if the raw_sequence still produces an error.
|
|
139
|
+
(raw_seq =~ /Error/) ? nil : raw_seq
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
##
|
|
143
|
+
# Gets raw sequence by fasta identifier from a fasta index file
|
|
144
|
+
# Params:
|
|
145
|
+
# +identifier+: String
|
|
146
|
+
# Output:
|
|
147
|
+
# String with the nucleotide sequence corresponding to the identifier
|
|
148
|
+
def extract_from_index(identifier)
|
|
149
|
+
idx = config[:raw_seq_file_load][identifier]
|
|
150
|
+
query = IO.binread(opt[:raw_sequences], idx[1] - idx[0], idx[0])
|
|
151
|
+
parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
|
|
152
|
+
parse_query[1].gsub("\n", '')
|
|
119
153
|
rescue
|
|
120
|
-
|
|
121
|
-
$stderr.puts ' file. Please ensure that BLAST tabular file is in the correct'
|
|
122
|
-
$stderr.puts ' format and then try again. If you are using a remote'
|
|
123
|
-
$stderr.puts ' database, please ensure that you have internet access.'
|
|
124
|
-
exit 1
|
|
154
|
+
'Error' # return error so it can then try alternative fetching method.
|
|
125
155
|
end
|
|
126
156
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
157
|
+
##
|
|
158
|
+
# Gets raw sequence by accession number from a givem database
|
|
159
|
+
# Params:
|
|
160
|
+
# +accno+: accession number as String
|
|
161
|
+
# +db+: database as String
|
|
162
|
+
# Output:
|
|
163
|
+
# String with the nucleotide sequence corresponding to the accession
|
|
164
|
+
def extract_from_local_db(batch, accno = nil, idx_file = nil)
|
|
165
|
+
cmd = (batch) ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
|
|
166
|
+
efile = Tempfile.new('blast_out')
|
|
167
|
+
`#{cmd} &>#{efile.path}`
|
|
168
|
+
raw_seqs = efile.read
|
|
169
|
+
failed_raw_sequences(raw_seqs) if batch && raw_seqs =~ /Error/
|
|
170
|
+
raw_seqs # when obtaining a single raw_seq, this contains the sequence
|
|
171
|
+
ensure
|
|
172
|
+
efile.close
|
|
173
|
+
efile.unlink
|
|
131
174
|
end
|
|
132
175
|
|
|
133
|
-
def
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
result = Net::HTTP.get(URI.parse(uri))
|
|
137
|
-
query = result.scan(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[0][0]
|
|
138
|
-
web_env = result.scan(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[0][0]
|
|
139
|
-
|
|
140
|
-
uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
|
|
141
|
-
'rettype=fasta&retmode=text&retstart=0&retmax=1&' \
|
|
142
|
-
"db=protein&query_key=#{query}&WebEnv=#{web_env}"
|
|
143
|
-
result = Net::HTTP.get(URI.parse(uri))
|
|
144
|
-
raw_seqs = result[0..result.length - 2]
|
|
145
|
-
unless raw_seqs.downcase.index(/error/).nil?
|
|
146
|
-
$stderr.puts '*** Error: There was an error in obtaining the raw sequence' \
|
|
147
|
-
' of a BLAST hit. Please ensure that you have internet access.'
|
|
148
|
-
exit 1
|
|
149
|
-
end
|
|
150
|
-
raw_seqs
|
|
176
|
+
def batch_raw_seq_cmd(index_file)
|
|
177
|
+
"blastdbcmd -entry_batch '#{index_file}' -db '#{opt[:db]}'" \
|
|
178
|
+
" -outfmt '%f' -out '#{opt[:raw_sequences]}'"
|
|
151
179
|
end
|
|
152
180
|
|
|
153
|
-
def
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
181
|
+
def single_raw_seq_cmd(accession)
|
|
182
|
+
"blastdbcmd -entry '#{accession}' -db '#{opt[:db]}' -outfmt '%s'"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def failed_raw_sequences(blast_output)
|
|
186
|
+
blast_output.each_line do |line|
|
|
187
|
+
acc = line.match(/Error: (\w+): OID not found/)[1]
|
|
188
|
+
$stderr.puts "\nCould not find sequence '#{acc.chomp}' within the" \
|
|
189
|
+
' BLAST database.'
|
|
190
|
+
$stderr.puts "Attempting to obtain sequence '#{acc.chomp}' from" \
|
|
191
|
+
' remote BLAST databases.'
|
|
192
|
+
File.open(opt[:raw_sequences], 'a+') do |f|
|
|
193
|
+
f.puts extract_from_remote_db(acc)
|
|
161
194
|
end
|
|
162
|
-
break # break after checking the first column
|
|
163
195
|
end
|
|
164
196
|
end
|
|
197
|
+
|
|
198
|
+
def extract_from_remote_db(accession, db_seq_type = 'protein')
|
|
199
|
+
uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
|
|
200
|
+
"db=#{db_seq_type}&retmax=1&usehistory=y&term=#{accession}/"
|
|
201
|
+
result = Net::HTTP.get(URI.parse(uri))
|
|
202
|
+
query = result.match(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[1]
|
|
203
|
+
web_env = result.match(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[1]
|
|
204
|
+
|
|
205
|
+
uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
|
|
206
|
+
'rettype=fasta&retmode=text&retstart=0&retmax=1&' \
|
|
207
|
+
"db=#{db_seq_type}&query_key=#{query}&WebEnv=#{web_env}"
|
|
208
|
+
result = Net::HTTP.get(URI.parse(uri))
|
|
209
|
+
result[0..result.length - 2]
|
|
210
|
+
end
|
|
165
211
|
end
|
|
166
212
|
end
|
|
167
213
|
end
|
data/lib/genevalidator/hsp.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'genevalidator/blast'
|
|
2
2
|
require 'genevalidator/exceptions'
|
|
3
|
+
|
|
3
4
|
module GeneValidator
|
|
4
5
|
# A class that initialises the BLAST tabular attributes
|
|
5
6
|
class Hsp
|
|
@@ -45,14 +46,13 @@ module GeneValidator
|
|
|
45
46
|
@pidentity = hash['pident'].to_f if hash['pident']
|
|
46
47
|
@identity = hash['nident'].to_f if hash['nident']
|
|
47
48
|
@hsp_evalue = hash['evalue'].to_f if hash['evalue']
|
|
48
|
-
if hash['
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
end
|
|
49
|
+
assert_seq_type(@query_alignment) if hash['sseq']
|
|
50
|
+
assert_seq_type(@hit_alignment) if hash['sseq']
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def assert_seq_type(query)
|
|
54
|
+
seq_type = BlastUtils.guess_sequence_type(query)
|
|
55
|
+
fail SequenceTypeError if seq_type != :protein
|
|
56
56
|
end
|
|
57
57
|
end
|
|
58
58
|
end
|
|
@@ -93,16 +93,16 @@ module GeneValidator
|
|
|
93
93
|
end
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
-
# Since the whole idea is that users would sort by
|
|
96
|
+
# Since the whole idea is that users would sort by JSON when there are
|
|
97
|
+
# too many rows in the HTML table.
|
|
97
98
|
def turn_off_automated_sorting
|
|
98
99
|
script_file = File.join(@config[:html_path], 'files/js/script.js')
|
|
99
|
-
|
|
100
|
-
File.open(temp_file, 'w') do |out_file|
|
|
100
|
+
File.open("#{script_file}.tmp", 'w') do |out_file|
|
|
101
101
|
out_file.puts File.readlines(script_file)[0..23].join
|
|
102
102
|
out_file.puts '}'
|
|
103
103
|
out_file.puts File.readlines(script_file)[26..-1].join
|
|
104
104
|
end
|
|
105
|
-
FileUtils.mv(
|
|
105
|
+
FileUtils.mv("#{script_file}.tmp", script_file)
|
|
106
106
|
end
|
|
107
107
|
end
|
|
108
108
|
end
|
data/lib/genevalidator/output.rb
CHANGED
|
@@ -34,8 +34,6 @@ module GeneValidator
|
|
|
34
34
|
@prediction_def = definition
|
|
35
35
|
@nr_hits = no_of_hits
|
|
36
36
|
@idx = current_idx
|
|
37
|
-
|
|
38
|
-
@app_html = File.join(@config[:html_path], 'files/table.html')
|
|
39
37
|
end
|
|
40
38
|
|
|
41
39
|
def print_output_console
|
|
@@ -50,33 +48,31 @@ module GeneValidator
|
|
|
50
48
|
|
|
51
49
|
def print_console_header
|
|
52
50
|
@config[:console_header_printed] = true
|
|
53
|
-
print format("%3s\t%5s\t%20s\t%7s\t", 'No', 'Score', 'Identifier',
|
|
51
|
+
print format("%3s\t%5s\t%20s\t%7s\t", 'No', 'Score', 'Identifier',
|
|
52
|
+
'No_Hits')
|
|
54
53
|
puts validations.map(&:short_header).join("\t")
|
|
55
54
|
end
|
|
56
55
|
|
|
57
56
|
def generate_html
|
|
58
57
|
mutex_html.synchronize do
|
|
59
|
-
output_html
|
|
58
|
+
output_html = output_filename
|
|
60
59
|
query_erb = File.join(@config[:aux], 'template_query.erb')
|
|
61
60
|
template_file = File.open(query_erb, 'r').read
|
|
62
61
|
erb = ERB.new(template_file, 0, '>')
|
|
63
62
|
File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
|
|
64
|
-
File.open(@app_html, 'a') { |f| f.write(erb.result(binding)) }
|
|
65
63
|
end
|
|
66
64
|
end
|
|
67
65
|
|
|
68
66
|
def output_filename
|
|
69
|
-
|
|
70
|
-
output_html = File.join(@config[:html_path], "results#{
|
|
67
|
+
idx = (@config[:run_no].to_f / @config[:output_max]).ceil
|
|
68
|
+
output_html = File.join(@config[:html_path], "results#{idx}.html")
|
|
71
69
|
write_html_header(output_html)
|
|
72
70
|
output_html
|
|
73
71
|
end
|
|
74
72
|
|
|
75
73
|
def write_html_header(output_html)
|
|
76
74
|
head_erb = File.join(@config[:aux], 'template_header.erb')
|
|
77
|
-
head_table_erb = File.join(@config[:aux], 'app_template_header.erb')
|
|
78
75
|
set_up_html(head_erb, output_html) unless File.exist?(output_html)
|
|
79
|
-
set_up_html(head_table_erb, @app_html) unless File.exist?(@app_html)
|
|
80
76
|
end
|
|
81
77
|
|
|
82
78
|
def set_up_html(erb_file, output_file)
|
|
@@ -99,25 +95,31 @@ module GeneValidator
|
|
|
99
95
|
def create_validation_hashes(row)
|
|
100
96
|
row[:validations] = {}
|
|
101
97
|
@validations.each do |item|
|
|
102
|
-
val
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
explain = { approach: item.approach, explanation: item.explanation,
|
|
106
|
-
conclusion: item.conclusion }
|
|
107
|
-
val.merge!(explain)
|
|
108
|
-
end
|
|
98
|
+
val = add_basic_validation_info(item)
|
|
99
|
+
explain = add_explanation_data(item) if item.color != 'warning'
|
|
100
|
+
val.merge!(explain) if explain
|
|
109
101
|
val[:graphs] = create_graphs_hash(item) unless item.plot_files.nil?
|
|
110
102
|
row[:validations][item.short_header] = val
|
|
111
103
|
end
|
|
112
104
|
row
|
|
113
105
|
end
|
|
114
106
|
|
|
107
|
+
def add_basic_validation_info(item)
|
|
108
|
+
{ header: item.header, description: item.description, status: item.color,
|
|
109
|
+
print: item.print.gsub(' ', ' ') }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def add_explanation_data(item)
|
|
113
|
+
{ approach: item.approach, explanation: item.explanation,
|
|
114
|
+
conclusion: item.conclusion }
|
|
115
|
+
end
|
|
116
|
+
|
|
115
117
|
def create_graphs_hash(item)
|
|
116
118
|
graphs = []
|
|
117
119
|
item.plot_files.each do |g|
|
|
118
120
|
graphs << { data: g.data, type: g.type, title: g.title,
|
|
119
|
-
footer: g.footer, xtitle: g.xtitle,
|
|
120
|
-
|
|
121
|
+
footer: g.footer, xtitle: g.xtitle, ytitle: g.ytitle,
|
|
122
|
+
aux1: g.aux1, aux2: g.aux2 }
|
|
121
123
|
end
|
|
122
124
|
graphs
|
|
123
125
|
end
|
|
@@ -140,16 +142,9 @@ module GeneValidator
|
|
|
140
142
|
# +html_path+: path of the html folder
|
|
141
143
|
# +filemane+: name of the fasta input file
|
|
142
144
|
def self.print_footer(overview, config)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
create_plot_json(overview[:scores], config[:plot_dir])
|
|
145
|
+
set_overall_evaluation(overview, config)
|
|
146
146
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
eval = print_summary_to_console(overall_evaluation, config[:summary])
|
|
150
|
-
evaluation = eval.gsub("\n", '<br>').gsub("'", %q(\\\'))
|
|
151
|
-
|
|
152
|
-
footer_erb = File.join(config[:aux], 'template_footer.erb')
|
|
147
|
+
footer_erb = File.join(config[:aux], 'template_footer.erb')
|
|
153
148
|
|
|
154
149
|
no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil
|
|
155
150
|
template_file = File.open(footer_erb, 'r').read
|
|
@@ -164,40 +159,44 @@ module GeneValidator
|
|
|
164
159
|
end
|
|
165
160
|
|
|
166
161
|
turn_off_sorting(config[:html_path]) if no_of_results_files > 1
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def self.set_overall_evaluation(overview, config)
|
|
165
|
+
overall_evaluation = overview(overview)
|
|
166
|
+
less = overall_evaluation[0].gsub("\n", '<br>').gsub("'", %q(\\\'))
|
|
167
|
+
|
|
168
|
+
eval = print_summary_to_console(overall_evaluation, config[:summary])
|
|
169
|
+
evaluation = eval.gsub("\n", '<br>').gsub("'", %q(\\\'))
|
|
167
170
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
table_html = File.join(config[:html_path], 'files/table.html')
|
|
171
|
-
table_footer_template = File.open(app_footer_erb, 'r').read
|
|
172
|
-
table_erb = ERB.new(table_footer_template, 0, '>')
|
|
173
|
-
File.open(table_html, 'a+') { |f| f.write(table_erb.result(binding)) }
|
|
171
|
+
create_overview_json(overview[:scores], config[:plot_dir], less,
|
|
172
|
+
evaluation)
|
|
174
173
|
end
|
|
175
174
|
|
|
176
175
|
def self.turn_off_sorting(html_path)
|
|
177
176
|
script_file = File.join(html_path, 'files/js/script.js')
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
out_file.puts File.readlines(script_file)[30..-1].join
|
|
177
|
+
File.open("#{script_file}.tmp", 'w') do |f|
|
|
178
|
+
f.puts File.readlines(script_file)[30..-1].join
|
|
181
179
|
end
|
|
182
|
-
FileUtils.mv(
|
|
180
|
+
FileUtils.mv("#{script_file}.tmp", script_file)
|
|
183
181
|
end
|
|
184
182
|
|
|
185
183
|
def self.print_summary_to_console(overall_evaluation, summary)
|
|
186
184
|
# print to console
|
|
187
185
|
eval = ''
|
|
188
|
-
overall_evaluation.each { |e| eval << "
|
|
186
|
+
overall_evaluation.each { |e| eval << "#{e}\n" }
|
|
189
187
|
$stderr.puts eval if summary
|
|
190
188
|
$stderr.puts ''
|
|
191
189
|
eval
|
|
192
190
|
end
|
|
193
191
|
|
|
194
192
|
# make the historgram with the resulted scores
|
|
195
|
-
def self.
|
|
193
|
+
def self.create_overview_json(scores, plot_dir, less, evaluation)
|
|
196
194
|
plot_file = File.join(plot_dir, 'overview.json')
|
|
197
195
|
data = [scores.group_by { |a| a }.map { |k, vs| { 'key' => k, 'value' => vs.length, 'main' => false } }]
|
|
198
196
|
hash = { data: data, type: :simplebars, title: 'Overall Evaluation',
|
|
199
197
|
footer: '', xtitle: 'Validation Score',
|
|
200
|
-
ytitle: 'Number of Queries', aux1: 10, aux2: ''
|
|
198
|
+
ytitle: 'Number of Queries', aux1: 10, aux2: '', less: less,
|
|
199
|
+
evaluation: evaluation }
|
|
201
200
|
File.open(plot_file, 'w') { |f| f.write hash.to_json }
|
|
202
201
|
end
|
|
203
202
|
|
|
@@ -233,8 +232,8 @@ module GeneValidator
|
|
|
233
232
|
eval
|
|
234
233
|
end
|
|
235
234
|
|
|
235
|
+
# errors per validation
|
|
236
236
|
def self.errors_overview(o)
|
|
237
|
-
# errors per validation
|
|
238
237
|
error_eval = ''
|
|
239
238
|
o[:map_errors].each do |k, v|
|
|
240
239
|
error_eval << "\nWe couldn't run #{k} Validation for #{v} queries"
|