genevalidator 1.6.12 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +30 -1
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +13 -12
  5. data/Gemfile +4 -1
  6. data/Gemfile.lock +135 -0
  7. data/README.md +104 -122
  8. data/Rakefile +377 -5
  9. data/aux/gv_results.slim +155 -0
  10. data/aux/html_files/css/gv.compiled.min.css +8 -0
  11. data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
  12. data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
  13. data/aux/{files → html_files}/css/src/style.css +0 -0
  14. data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
  15. data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
  16. data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
  17. data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
  18. data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
  19. data/aux/{files → html_files}/img/gene.png +0 -0
  20. data/aux/html_files/js/gv.compiled.min.js +1 -0
  21. data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
  22. data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
  23. data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
  24. data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
  25. data/aux/{files → html_files}/js/src/plots.js +1 -1
  26. data/aux/{files → html_files}/js/src/script.js +0 -0
  27. data/aux/{files → html_files}/json/.gitkeep +0 -0
  28. data/bin/genevalidator +393 -56
  29. data/exemplar_data/README.md +60 -0
  30. data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
  31. data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
  32. data/genevalidator.gemspec +35 -20
  33. data/install.sh +92 -0
  34. data/lib/genevalidator.rb +171 -56
  35. data/lib/genevalidator/arg_validation.rb +26 -55
  36. data/lib/genevalidator/blast.rb +44 -99
  37. data/lib/genevalidator/clusterization.rb +18 -22
  38. data/lib/genevalidator/exceptions.rb +17 -17
  39. data/lib/genevalidator/ext/array.rb +21 -4
  40. data/lib/genevalidator/get_raw_sequences.rb +32 -31
  41. data/lib/genevalidator/hsp.rb +31 -2
  42. data/lib/genevalidator/json_to_gv_results.rb +38 -122
  43. data/lib/genevalidator/output.rb +158 -172
  44. data/lib/genevalidator/output_files.rb +134 -0
  45. data/lib/genevalidator/pool.rb +2 -5
  46. data/lib/genevalidator/query.rb +1 -1
  47. data/lib/genevalidator/tabular_parser.rb +8 -29
  48. data/lib/genevalidator/validation.rb +48 -90
  49. data/lib/genevalidator/validation_alignment.rb +64 -75
  50. data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
  51. data/lib/genevalidator/validation_duplication.rb +85 -84
  52. data/lib/genevalidator/validation_gene_merge.rb +46 -35
  53. data/lib/genevalidator/validation_length_cluster.rb +18 -15
  54. data/lib/genevalidator/validation_length_rank.rb +19 -15
  55. data/lib/genevalidator/validation_maker_qi.rb +13 -12
  56. data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
  57. data/lib/genevalidator/validation_report.rb +1 -1
  58. data/lib/genevalidator/validation_test.rb +1 -1
  59. data/lib/genevalidator/version.rb +1 -1
  60. data/test/overall.rb +1 -1
  61. data/test/test_all_validations.rb +36 -24
  62. data/test/test_blast.rb +39 -24
  63. data/test/test_clusterization_2d.rb +4 -4
  64. data/test/test_helper.rb +2 -2
  65. data/test/test_query.rb +16 -20
  66. data/test/test_validation_open_reading_frame.rb +122 -122
  67. data/test/test_validations.rb +12 -10
  68. metadata +94 -79
  69. data/aux/files/css/genevalidator.compiled.min.css +0 -16
  70. data/aux/files/js/genevalidator.compiled.min.js +0 -28
  71. data/aux/json_footer.erb +0 -8
  72. data/aux/json_header.erb +0 -19
  73. data/aux/json_query.erb +0 -15
  74. data/aux/template_footer.erb +0 -8
  75. data/aux/template_header.erb +0 -19
  76. data/aux/template_query.erb +0 -14
  77. data/data/README.md +0 -57
  78. data/data/mrna_data.fasta.blast_tabular +0 -3567
  79. data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
  80. data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
  81. data/data/mrna_data.fasta.blast_xml +0 -39800
  82. data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
  83. data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
  84. data/data/mrna_data.fasta.json +0 -1
  85. data/data/protein_data.fasta.blast_tabular +0 -3278
  86. data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
  87. data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
  88. data/data/protein_data.fasta.blast_xml +0 -26228
  89. data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
  90. data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
  91. data/data/protein_data.fasta.json +0 -1
@@ -1,11 +1,11 @@
1
1
  module GeneValidator
2
2
  # Exception raised when BLAST path is not added to the CLASSPATH
3
- class ClasspathError < Exception
3
+ class ClasspathError < RuntimeError
4
4
  end
5
5
 
6
6
  # Exception raised when the command line type argument
7
7
  # does not corrsepond to the type of the sequences in the fasta file
8
- class SequenceTypeError < Exception
8
+ class SequenceTypeError < RuntimeError
9
9
  def to_s
10
10
  "\nSequence Type error: Possible cause include that the blast output" \
11
11
  " was not obtained against a protein database.\n"
@@ -13,15 +13,15 @@ module GeneValidator
13
13
  end
14
14
 
15
15
  # Exception raised when an unexisting file is accessed
16
- class FileNotFoundException < Exception
16
+ class FileNotFoundException < RuntimeError
17
17
  end
18
18
 
19
19
  # Exception raised when blast does not find any hit
20
- class QueryError < Exception
20
+ class QueryError < RuntimeError
21
21
  end
22
22
 
23
23
  # Exception raised when a validation class is not instance of ValidationTest
24
- class ValidationClassError < Exception
24
+ class ValidationClassError < RuntimeError
25
25
  def to_s
26
26
  "\nClass Type error: Possible cause include that one of the validations" \
27
27
  " is not a sub-class of ValidationTest\n"
@@ -30,7 +30,7 @@ module GeneValidator
30
30
 
31
31
  # Exception raised when a validation report class is not instance of
32
32
  # ValidationReport
33
- class ReportClassError < Exception
33
+ class ReportClassError < RuntimeError
34
34
  def to_s
35
35
  "\nClass Type error: Possible causes include that the type of one of" \
36
36
  ' the validation reports is not a subclass of the ValidationReport' \
@@ -40,20 +40,20 @@ module GeneValidator
40
40
 
41
41
  # Exception raised when there are not enough blast hits to make a statistical
42
42
  # validation
43
- class NotEnoughHitsError < Exception
43
+ class NotEnoughHitsError < RuntimeError
44
44
  end
45
45
 
46
46
  # Exception raised when function dependig on the internet connection raise
47
47
  # Exception
48
- class NoInternetError < Exception
48
+ class NoInternetError < RuntimeError
49
49
  end
50
50
 
51
51
  # Exception raised when the alignment initialization raises exception
52
- class NoMafftInstallationError < Exception
52
+ class NoMafftInstallationError < RuntimeError
53
53
  end
54
54
 
55
55
  # Exception raised when the -v argument didn't filter any validatio test
56
- class NoValidationError < Exception
56
+ class NoValidationError < RuntimeError
57
57
  def to_s
58
58
  "\nValidation error: Possible cause inlcude that the -v arguments" \
59
59
  " supplied is not valid\n"
@@ -61,7 +61,7 @@ module GeneValidator
61
61
  end
62
62
 
63
63
  # Exception raised when the are alias duplications
64
- class AliasDuplicationError < Exception
64
+ class AliasDuplicationError < RuntimeError
65
65
  def to_s
66
66
  "\nAlias Duplication error: Possible cause: At least two validations" \
67
67
  " have the same CLI alias\n"
@@ -69,27 +69,27 @@ module GeneValidator
69
69
  end
70
70
 
71
71
  # Exception raised when the BLAST is not set up with the '-parse-seqids' arg.
72
- class BLASTDBError < Exception
72
+ class BLASTDBError < RuntimeError
73
73
  end
74
74
 
75
75
  # Error raised by QI Validation when the query does not have QI tag
76
- class NotEnoughEvidence < Exception
76
+ class NotEnoughEvidence < RuntimeError
77
77
  end
78
78
 
79
79
  # Exception raised when the are alias duplications
80
- class NoPIdentError < Exception
80
+ class NoPIdentError < RuntimeError
81
81
  end
82
82
 
83
83
  # Exception raised when the tabular format does not correspond to the tabular
84
84
  # argument
85
- class InconsistentTabularFormat < Exception
85
+ class InconsistentTabularFormat < RuntimeError
86
86
  end
87
87
 
88
88
  # Exception raised when there are more than one reading frame among the hits
89
89
  # of one prediction
90
- class ReadingFrameError < Exception
90
+ class ReadingFrameError < RuntimeError
91
91
  end
92
92
 
93
- class OtherError < Exception
93
+ class OtherError < RuntimeError
94
94
  end
95
95
  end
@@ -9,15 +9,15 @@ module GeneValidator
9
9
  sum / length.to_f
10
10
  end
11
11
 
12
- def median
13
- sorted = sort
12
+ def median(already_sorted = false)
13
+ sorted = already_sorted ? self : sort
14
14
  len = sorted.length
15
15
  (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
16
16
  end
17
17
 
18
18
  def mode
19
- freq = inject(Hash.new(0)) { |h, v| h[v] += 1; h }
20
- sort_by { |v| freq[v] }.last
19
+ freq = each_with_object(Hash.new(0)) { |v, h| h[v] += 1; }
20
+ max_by { |v| freq[v] }
21
21
  end
22
22
 
23
23
  def sample_variance
@@ -29,6 +29,23 @@ module GeneValidator
29
29
  def standard_deviation
30
30
  Math.sqrt(sample_variance)
31
31
  end
32
+
33
+ def all_quartiles
34
+ sorted = sort
35
+ len = sorted.length
36
+ split = sorted.median_split
37
+ [
38
+ split[0].median(true),
39
+ sorted.median(true),
40
+ split[1].median(true)
41
+ ]
42
+ end
43
+
44
+ def median_split
45
+ len = length
46
+ center = len % 2
47
+ [self[0..len / 2 - 1], self[len / 2 + center..-1]]
48
+ end
32
49
  end
33
50
  end
34
51
 
@@ -13,24 +13,25 @@ module GeneValidator
13
13
  class RawSequences
14
14
  class <<self
15
15
  extend Forwardable
16
- def_delegators GeneValidator, :opt, :config
16
+ def_delegators GeneValidator, :opt, :config, :dirs
17
17
 
18
18
  def init
19
- $stderr.puts 'Extracting sequences within the BLAST output file from' \
20
- ' the BLAST database'
19
+ warn '==> Extracting fasta sequences for each BLAST HSP from the' \
20
+ ' BLAST database'
21
21
 
22
22
  @blast_file = opt[:blast_xml_file] if opt[:blast_xml_file]
23
23
  @blast_file = opt[:blast_tabular_file] if opt[:blast_tabular_file]
24
24
 
25
- opt[:raw_sequences] = @blast_file + '.raw_seq'
26
- @index_file = @blast_file + '.index'
25
+ fname = File.basename(@blast_file)
26
+ opt[:raw_sequences] = File.join(dirs[:tmp_dir], "#{fname}.raw_seq")
27
+ @index_file = File.join(dirs[:tmp_dir], "#{fname}.index")
27
28
  end
28
29
 
29
30
  ##
30
31
  # Obtains raw_sequences from BLAST output file...
31
32
  def run
32
33
  init
33
- if opt[:db] =~ /remote/
34
+ if opt[:db].match?(/remote/)
34
35
  write_a_raw_seq_file(opt[:raw_sequences], 'remote')
35
36
  else
36
37
  write_an_index_file(@index_file, 'local')
@@ -55,12 +56,13 @@ module GeneValidator
55
56
  index_hash = {}
56
57
  keys.each_with_index do |k, i|
57
58
  start = values[i]
58
- endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
59
+ endf = i == values.length - 1 ? content.length - 1 : values[i + 1]
59
60
  index_hash[k] = [start, endf]
60
61
  end
61
62
 
62
63
  # create FASTA index
63
- config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
64
+ fname = File.basename(raw_seq_file)
65
+ config[:raw_seq_file_index] = File.join(dirs[:tmp_dir], "#{fname}.idx")
64
66
  config[:raw_seq_file_load] = index_hash
65
67
 
66
68
  File.open(config[:raw_seq_file_index], 'w') do |f|
@@ -76,29 +78,29 @@ module GeneValidator
76
78
  iterate_xml(file, db_type) if opt[:blast_xml_file]
77
79
  iterate_tabular(file, db_type) if opt[:blast_tabular_file]
78
80
  rescue BLASTDBError
79
- $stderr.puts "*** BLAST Database Error: Genevalidator requires BLAST" \
81
+ warn '*** BLAST Database Error: Genevalidator requires BLAST' \
80
82
  " databases to be created with the '-parse_seqids argument."
81
- $stderr.puts " See https://github.com/wurmlab/genevalidator" \
82
- "#setting-up-a-blast-database for more information"
83
+ warn ' See https://github.com/wurmlab/genevalidator' \
84
+ '#setting-up-a-blast-database for more information'
83
85
  exit 1
84
- rescue
85
- $stderr.puts '*** Error: There was an error in analysing the BLAST'
86
- $stderr.puts ' output file. Please ensure that BLAST output file'
87
- $stderr.puts ' is in the correct format and then try again. If you'
88
- $stderr.puts ' are using a remote database, please ensure that you'
89
- $stderr.puts ' have internet access.'
86
+ rescue StandardError
87
+ warn '*** Error: There was an error in analysing the BLAST'
88
+ warn ' output file. Please ensure that BLAST output file'
89
+ warn ' is in the correct format and then try again. If you'
90
+ warn ' are using a remote database, please ensure that you'
91
+ warn ' have internet access.'
90
92
  exit 1
91
93
  ensure
92
94
  file.close unless file.nil?
93
95
  end
94
96
 
95
- alias_method :write_a_raw_seq_file, :write_an_index_file
97
+ alias write_a_raw_seq_file write_an_index_file
96
98
 
97
99
  def iterate_xml(file, db_type)
98
100
  n = Bio::BlastXMLParser::XmlIterator.new(opt[:blast_xml_file]).to_enum
99
101
  n.each do |iter|
100
102
  iter.each do |hit|
101
- fail BLASTDBError if hit.hit_id =~ /\|BL_ORD_ID\|/
103
+ raise BLASTDBError if hit.hit_id =~ /\|BL_ORD_ID\|/
102
104
  if db_type == 'remote' || hit.hit_id.nil?
103
105
  file.puts FetchRawSequences.extract_from_remote_db(hit.accession)
104
106
  else
@@ -116,7 +118,7 @@ module GeneValidator
116
118
  headers: table_headers)
117
119
 
118
120
  rows.each do |row|
119
- fail BLASTDBError if row['sseqid'] =~ /\|BL_ORD_ID\|/
121
+ raise BLASTDBError if row['sseqid'] =~ /\|BL_ORD_ID\|/i
120
122
  if db_type == 'remote' || row['sseqid'].nil?
121
123
  file.puts FetchRawSequences.extract_from_remote_db(row['sacc'])
122
124
  else
@@ -136,15 +138,15 @@ module GeneValidator
136
138
  # first try to extract from previously created raw_sequences HASH
137
139
  raw_seq = extract_from_index(identifier) if opt[:raw_sequences]
138
140
  # then try to just extract that sequence based on accession.
139
- if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
141
+ if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/i)
140
142
  raw_seq = extract_from_local_db(false, accession)
141
143
  end
142
144
  # then try to extract from remote database
143
- if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
145
+ if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/i)
144
146
  raw_seq = extract_from_remote_db(accession)
145
147
  end
146
148
  # return nil if the raw_sequence still produces an error.
147
- (raw_seq =~ /Error/) ? nil : raw_seq
149
+ raw_seq =~ /Error/i ? nil : raw_seq
148
150
  end
149
151
 
150
152
  ##
@@ -157,8 +159,8 @@ module GeneValidator
157
159
  idx = config[:raw_seq_file_load][identifier]
158
160
  query = IO.binread(opt[:raw_sequences], idx[1] - idx[0], idx[0])
159
161
  parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
160
- parse_query[1].gsub("\n", '')
161
- rescue
162
+ parse_query[1].delete("\n")
163
+ rescue StandardError
162
164
  'Error' # return error so it can then try alternative fetching method.
163
165
  end
164
166
 
@@ -170,7 +172,7 @@ module GeneValidator
170
172
  # Output:
171
173
  # String with the nucleotide sequence corresponding to the accession
172
174
  def extract_from_local_db(batch, accno = nil, idx_file = nil)
173
- cmd = (batch) ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
175
+ cmd = batch ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
174
176
  efile = Tempfile.new('blast_out')
175
177
  `#{cmd} &>#{efile.path}`
176
178
  raw_seqs = efile.read
@@ -193,9 +195,9 @@ module GeneValidator
193
195
  def failed_raw_sequences(blast_output)
194
196
  blast_output.each_line do |line|
195
197
  acc = line.match(/Error: (\w+): OID not found/)[1]
196
- $stderr.puts "\nCould not find sequence '#{acc.chomp}' within the" \
198
+ warn "\nCould not find sequence '#{acc.chomp}' within the" \
197
199
  ' BLAST database.'
198
- $stderr.puts "Attempting to obtain sequence '#{acc.chomp}' from" \
200
+ warn "Attempting to obtain sequence '#{acc.chomp}' from" \
199
201
  ' remote BLAST databases.'
200
202
  File.open(opt[:raw_sequences], 'a+') do |f|
201
203
  f.puts extract_from_remote_db(acc)
@@ -204,13 +206,12 @@ module GeneValidator
204
206
  end
205
207
 
206
208
  def extract_from_remote_db(accession, db_seq_type = 'protein')
207
- uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
209
+ uri = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
208
210
  "db=#{db_seq_type}&retmax=1&usehistory=y&term=#{accession}/"
209
211
  result = Net::HTTP.get(URI.parse(uri))
210
212
  query = result.match(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[1]
211
213
  web_env = result.match(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[1]
212
-
213
- uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
214
+ uri = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
214
215
  'rettype=fasta&retmode=text&retstart=0&retmax=1&' \
215
216
  "db=#{db_seq_type}&query_key=#{query}&WebEnv=#{web_env}"
216
217
  result = Net::HTTP.get(URI.parse(uri))
@@ -1,9 +1,14 @@
1
+ require 'forwardable'
2
+
1
3
  require 'genevalidator/blast'
2
4
  require 'genevalidator/exceptions'
3
5
 
4
6
  module GeneValidator
5
7
  # A class that initialises the BLAST tabular attributes
6
8
  class Hsp
9
+ extend Forwardable
10
+ def_delegators GeneValidator, :config
11
+
7
12
  attr_accessor :hit_from # ref. from the unaligned hit sequence
8
13
  attr_accessor :hit_to
9
14
  attr_accessor :match_query_from # ref. from the unaligned query sequence
@@ -23,9 +28,30 @@ module GeneValidator
23
28
  attr_accessor :gaps
24
29
  attr_accessor :align_len
25
30
 
26
- def initialize
31
+ def initialize(input = {})
27
32
  @query_alignment = nil
28
33
  @hit_alignment = nil
34
+ init_xml_attributes(input[:xml_input]) if input[:xml_input]
35
+ init_tabular_attribute(input[:tabular_input]) if input[:tabular_input]
36
+ end
37
+
38
+ def init_xml_attributes(hsp)
39
+ @match_query_from = hsp.query_from.to_i
40
+ @match_query_to = hsp.query_to.to_i
41
+ @query_reading_frame = hsp.query_frame.to_i
42
+ @hit_from = hsp.hit_from.to_i
43
+ @hit_to = hsp.hit_to.to_i
44
+ @query_alignment = hsp.qseq.to_s
45
+ @hit_alignment = hsp.hseq.to_s
46
+ @align_len = hsp.align_len.to_i
47
+ @pidentity = (100 * hsp.identity / hsp.align_len.to_f).round(2)
48
+ @identity = hsp.identity.to_i
49
+ @hsp_evalue = format('%.0e', hsp.evalue)
50
+ assert_seq_type(@hit_alignment) if @hit_alignment
51
+ assert_seq_type(@query_alignment) if @query_alignment
52
+ return unless config[:type] == :nucleotide
53
+ @match_query_from = (@match_query_from / 3) + 1
54
+ @match_query_to = (@match_query_to / 3) + 1
29
55
  end
30
56
 
31
57
  ##
@@ -52,7 +78,10 @@ module GeneValidator
52
78
 
53
79
  def assert_seq_type(query)
54
80
  seq_type = BlastUtils.guess_sequence_type(query)
55
- fail SequenceTypeError if seq_type != :protein
81
+ raise SequenceTypeError if seq_type != :protein
82
+ rescue SequenceTypeError => e
83
+ warn e
84
+ exit 1
56
85
  end
57
86
  end
58
87
  end
@@ -1,10 +1,7 @@
1
- require 'erb'
2
- require 'fileutils'
3
1
  require 'forwardable'
4
2
  require 'json'
5
3
 
6
4
  require 'genevalidator'
7
- require 'genevalidator/output'
8
5
  require 'genevalidator/version'
9
6
 
10
7
  module GeneValidator
@@ -12,139 +9,58 @@ module GeneValidator
12
9
  class JsonToGVResults
13
10
  class << self
14
11
  extend Forwardable
15
- def_delegators GeneValidator, :opt
12
+ def_delegators GeneValidator, :opt, :config, :dirs
16
13
 
17
- def init
18
- @opt = opt
19
- @config = {
20
- html_path: "#{@opt[:json_file]}.html",
21
- plot_dir: "#{@opt[:json_file]}.html/files/json",
22
- aux: File.expand_path(File.join(File.dirname(__FILE__), '../../aux')),
23
- filename: File.basename(@opt[:json_file]),
24
- output_max: 2500,
25
- run_no: 0
26
- }
27
- @json_array = load_json_file
14
+ def init(opt)
15
+ GeneValidator.opt = opt
16
+ GeneValidator.config = { output_max: 2500, run_no: 0,
17
+ json_output: load_json_file }
18
+ GeneValidator.dirs = GeneValidator.setup_dirnames(opt[:json_file])
28
19
  end
29
20
 
30
21
  def run
31
- init
32
- GeneValidator.create_output_folder(@config[:html_path], @config[:aux])
33
- @json_array.each do |row|
34
- @config[:run_no] += 1
35
- create_json_file(row)
36
- output_html = output_filename
37
- generate_html_header(output_html) unless File.exist?(output_html)
38
- generate_html_query(output_html, row)
22
+ warn '==> Parsing input JSON results'
23
+ print_console_header(config[:json_output][0])
24
+ config[:json_output].each do |row|
25
+ print_output_console(row)
26
+ create_row_json_plot_files(row)
39
27
  end
40
- html_footer
41
- calculate_overall_score
28
+ GeneValidator.produce_output
42
29
  end
43
30
 
44
- def load_json_file
45
- json_contents = File.read(File.expand_path(@opt[:json_file]))
46
- JSON.load(json_contents)
31
+ def print_console_header(first_row)
32
+ return unless opt[:output_formats].include? 'stdout'
33
+ return if config[:console_header_printed]
34
+ config[:console_header_printed] = true
35
+ warn '' # blank line
36
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
37
+ print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
38
+ puts first_row[:validations].keys.join("\t")
47
39
  end
48
40
 
49
- def create_json_file(row)
50
- @json_file = File.join(@config[:plot_dir],
51
- "#{@config[:filename]}_#{row['idx']}.json")
52
- File.open(@json_file, 'w') { |f| f.write(row.to_json) }
53
- end
54
-
55
- def output_filename
56
- i = (@config[:run_no].to_f / @config[:output_max]).ceil
57
- File.join(@config[:html_path], "results#{i}.html")
58
- end
59
-
60
- def generate_html_header(output_html)
61
- return if File.exist?(output_html)
62
- json_header_template = File.join(@config[:aux], 'json_header.erb')
63
- template_contents = File.open(json_header_template, 'r').read
64
- erb = ERB.new(template_contents, 0, '>')
65
- File.open(output_html, 'w+') { |f| f.write(erb.result(binding)) }
66
- end
67
-
68
- def generate_html_query(output_html, row)
69
- @row = row
70
- json_query_template = File.join(@config[:aux], 'json_query.erb')
71
- template_contents = File.open(json_query_template, 'r').read
72
- erb = ERB.new(template_contents, 0, '>')
73
- File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
74
- end
75
-
76
- # Add footer to all output files
77
- def html_footer
78
- no_of_output_files = (@config[:run_no].to_f / @config[:output_max]).ceil
79
-
80
- output_files = []
81
- (1..no_of_output_files).each { |i| output_files << "results#{i}.html" }
82
-
83
- write_html_footer(no_of_output_files, output_files)
84
- end
85
-
86
- def write_html_footer(no_of_output_files, output_files)
87
- turn_off_automated_sorting
88
- json_footer_template = File.join(@config[:aux], 'json_footer.erb')
89
- template_contents = File.open(json_footer_template, 'r').read
90
- erb = ERB.new(template_contents, 0, '>')
91
- (1..no_of_output_files).each do |i|
92
- results_html = File.join(@config[:html_path], "results#{i}.html")
93
- File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
41
+ def print_output_console(row)
42
+ return unless opt[:output_formats].include? 'stdout'
43
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
44
+ short_def = row[:definition].split(' ')[0]
45
+ print format(c_fmt, row[:idx], row[:overall_score], short_def,
46
+ row[:no_hits])
47
+ puts row[:validations].values.map { |e| e[:print] }.join("\t")
48
+ .gsub('&nbsp;', ' ')
94
49
  end
95
- end
96
-
97
- # By default, on page load, the results are automatically sorted by the
98
- # index. However since the whole idea is that users would sort by JSON,
99
- # this is not wanted here.
100
- def turn_off_automated_sorting
101
- script_file = File.join(@config[:html_path],
102
- 'files/js/genevalidator.compiled.min.js')
103
- original_content = File.read(script_file)
104
- # removes the automatic sort on page load
105
- updated_content = original_content.gsub(',sortList:[[0,0]]', '')
106
- File.open("#{script_file}.tmp", 'w') { |f| f.puts updated_content }
107
- FileUtils.mv("#{script_file}.tmp", script_file)
108
- end
109
50
 
110
- def calculate_overall_score
111
- scores = []
112
- @json_array.each { |row| scores << row['overall_score'] }
113
- plot_dir = File.join(@config[:html_path], 'files/json')
114
- less = generate_evaluation(scores)
115
- Output.create_overview_json(scores, plot_dir, less, less)
116
- end
117
-
118
- def generate_evaluation(scores)
119
- no_of_queries = scores.length
120
- good_scores = scores.count { |s| s >= 75 }
121
- bad_scores = scores.count { |s| s < 75 }
122
- nee = calculate_no_quries_with_no_evidence # nee = no evidence
51
+ private
123
52
 
124
- good_pred = (good_scores == 1) ? 'One' : "#{good_scores} are"
125
- bad_pred = (bad_scores == 1) ? 'One' : "#{bad_scores} are"
126
- eval = 'Overall Query Score Evaluation:<br>' \
127
- "#{no_of_queries} predictions were validated, from which there" \
128
- ' were:<br>' \
129
- "#{good_pred} good prediction(s),<br>" \
130
- "#{bad_pred} possibly weak prediction(s).<br>"
131
- return eval if nee == 0
132
- eval << "#{nee} could not be evaluated due to the lack of" \
133
- ' evidence.<br>'
134
- eval
135
- end
53
+ def load_json_file
54
+ json_contents = File.read(File.expand_path(opt[:json_file]))
55
+ JSON.parse(json_contents, symbolize_names: true)
56
+ end
136
57
 
137
- # calculate number of queries that had warnings for all validations.
138
- def calculate_no_quries_with_no_evidence
139
- all_warnings = 0
140
- @json_array.each do |row|
141
- status = row['validations'].map { |_, h| h['status'] }
142
- if status.count { |r| r == 'warning' } == status.length
143
- all_warnings += 1
144
- end
58
+ def create_row_json_plot_files(row)
59
+ config[:run_no] += 1
60
+ fname = "#{dirs[:filename]}_#{row[:idx]}.json"
61
+ json_file = File.join(dirs[:json_dir], fname)
62
+ File.open(json_file, 'w') { |f| f.write(row.to_json) }
145
63
  end
146
- all_warnings
147
- end
148
64
  end
149
65
  end
150
66
  end