genevalidator 1.6.12 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +30 -1
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +13 -12
  5. data/Gemfile +4 -1
  6. data/Gemfile.lock +135 -0
  7. data/README.md +104 -122
  8. data/Rakefile +377 -5
  9. data/aux/gv_results.slim +155 -0
  10. data/aux/html_files/css/gv.compiled.min.css +8 -0
  11. data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
  12. data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
  13. data/aux/{files → html_files}/css/src/style.css +0 -0
  14. data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
  15. data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
  16. data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
  17. data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
  18. data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
  19. data/aux/{files → html_files}/img/gene.png +0 -0
  20. data/aux/html_files/js/gv.compiled.min.js +1 -0
  21. data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
  22. data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
  23. data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
  24. data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
  25. data/aux/{files → html_files}/js/src/plots.js +1 -1
  26. data/aux/{files → html_files}/js/src/script.js +0 -0
  27. data/aux/{files → html_files}/json/.gitkeep +0 -0
  28. data/bin/genevalidator +393 -56
  29. data/exemplar_data/README.md +60 -0
  30. data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
  31. data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
  32. data/genevalidator.gemspec +35 -20
  33. data/install.sh +92 -0
  34. data/lib/genevalidator.rb +171 -56
  35. data/lib/genevalidator/arg_validation.rb +26 -55
  36. data/lib/genevalidator/blast.rb +44 -99
  37. data/lib/genevalidator/clusterization.rb +18 -22
  38. data/lib/genevalidator/exceptions.rb +17 -17
  39. data/lib/genevalidator/ext/array.rb +21 -4
  40. data/lib/genevalidator/get_raw_sequences.rb +32 -31
  41. data/lib/genevalidator/hsp.rb +31 -2
  42. data/lib/genevalidator/json_to_gv_results.rb +38 -122
  43. data/lib/genevalidator/output.rb +158 -172
  44. data/lib/genevalidator/output_files.rb +134 -0
  45. data/lib/genevalidator/pool.rb +2 -5
  46. data/lib/genevalidator/query.rb +1 -1
  47. data/lib/genevalidator/tabular_parser.rb +8 -29
  48. data/lib/genevalidator/validation.rb +48 -90
  49. data/lib/genevalidator/validation_alignment.rb +64 -75
  50. data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
  51. data/lib/genevalidator/validation_duplication.rb +85 -84
  52. data/lib/genevalidator/validation_gene_merge.rb +46 -35
  53. data/lib/genevalidator/validation_length_cluster.rb +18 -15
  54. data/lib/genevalidator/validation_length_rank.rb +19 -15
  55. data/lib/genevalidator/validation_maker_qi.rb +13 -12
  56. data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
  57. data/lib/genevalidator/validation_report.rb +1 -1
  58. data/lib/genevalidator/validation_test.rb +1 -1
  59. data/lib/genevalidator/version.rb +1 -1
  60. data/test/overall.rb +1 -1
  61. data/test/test_all_validations.rb +36 -24
  62. data/test/test_blast.rb +39 -24
  63. data/test/test_clusterization_2d.rb +4 -4
  64. data/test/test_helper.rb +2 -2
  65. data/test/test_query.rb +16 -20
  66. data/test/test_validation_open_reading_frame.rb +122 -122
  67. data/test/test_validations.rb +12 -10
  68. metadata +94 -79
  69. data/aux/files/css/genevalidator.compiled.min.css +0 -16
  70. data/aux/files/js/genevalidator.compiled.min.js +0 -28
  71. data/aux/json_footer.erb +0 -8
  72. data/aux/json_header.erb +0 -19
  73. data/aux/json_query.erb +0 -15
  74. data/aux/template_footer.erb +0 -8
  75. data/aux/template_header.erb +0 -19
  76. data/aux/template_query.erb +0 -14
  77. data/data/README.md +0 -57
  78. data/data/mrna_data.fasta.blast_tabular +0 -3567
  79. data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
  80. data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
  81. data/data/mrna_data.fasta.blast_xml +0 -39800
  82. data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
  83. data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
  84. data/data/mrna_data.fasta.json +0 -1
  85. data/data/protein_data.fasta.blast_tabular +0 -3278
  86. data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
  87. data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
  88. data/data/protein_data.fasta.blast_xml +0 -26228
  89. data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
  90. data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
  91. data/data/protein_data.fasta.json +0 -1
@@ -1,11 +1,11 @@
1
1
  module GeneValidator
2
2
  # Exception raised when BLAST path is not added to the CLASSPATH
3
- class ClasspathError < Exception
3
+ class ClasspathError < RuntimeError
4
4
  end
5
5
 
6
6
  # Exception raised when the command line type argument
7
7
  # does not corrsepond to the type of the sequences in the fasta file
8
- class SequenceTypeError < Exception
8
+ class SequenceTypeError < RuntimeError
9
9
  def to_s
10
10
  "\nSequence Type error: Possible cause include that the blast output" \
11
11
  " was not obtained against a protein database.\n"
@@ -13,15 +13,15 @@ module GeneValidator
13
13
  end
14
14
 
15
15
  # Exception raised when an unexisting file is accessed
16
- class FileNotFoundException < Exception
16
+ class FileNotFoundException < RuntimeError
17
17
  end
18
18
 
19
19
  # Exception raised when blast does not find any hit
20
- class QueryError < Exception
20
+ class QueryError < RuntimeError
21
21
  end
22
22
 
23
23
  # Exception raised when a validation class is not instance of ValidationTest
24
- class ValidationClassError < Exception
24
+ class ValidationClassError < RuntimeError
25
25
  def to_s
26
26
  "\nClass Type error: Possible cause include that one of the validations" \
27
27
  " is not a sub-class of ValidationTest\n"
@@ -30,7 +30,7 @@ module GeneValidator
30
30
 
31
31
  # Exception raised when a validation report class is not instance of
32
32
  # ValidationReport
33
- class ReportClassError < Exception
33
+ class ReportClassError < RuntimeError
34
34
  def to_s
35
35
  "\nClass Type error: Possible causes include that the type of one of" \
36
36
  ' the validation reports is not a subclass of the ValidationReport' \
@@ -40,20 +40,20 @@ module GeneValidator
40
40
 
41
41
  # Exception raised when there are not enough blast hits to make a statistical
42
42
  # validation
43
- class NotEnoughHitsError < Exception
43
+ class NotEnoughHitsError < RuntimeError
44
44
  end
45
45
 
46
46
  # Exception raised when function dependig on the internet connection raise
47
47
  # Exception
48
- class NoInternetError < Exception
48
+ class NoInternetError < RuntimeError
49
49
  end
50
50
 
51
51
  # Exception raised when the alignment initialization raises exception
52
- class NoMafftInstallationError < Exception
52
+ class NoMafftInstallationError < RuntimeError
53
53
  end
54
54
 
55
55
  # Exception raised when the -v argument didn't filter any validatio test
56
- class NoValidationError < Exception
56
+ class NoValidationError < RuntimeError
57
57
  def to_s
58
58
  "\nValidation error: Possible cause inlcude that the -v arguments" \
59
59
  " supplied is not valid\n"
@@ -61,7 +61,7 @@ module GeneValidator
61
61
  end
62
62
 
63
63
  # Exception raised when the are alias duplications
64
- class AliasDuplicationError < Exception
64
+ class AliasDuplicationError < RuntimeError
65
65
  def to_s
66
66
  "\nAlias Duplication error: Possible cause: At least two validations" \
67
67
  " have the same CLI alias\n"
@@ -69,27 +69,27 @@ module GeneValidator
69
69
  end
70
70
 
71
71
  # Exception raised when the BLAST is not set up with the '-parse-seqids' arg.
72
- class BLASTDBError < Exception
72
+ class BLASTDBError < RuntimeError
73
73
  end
74
74
 
75
75
  # Error raised by QI Validation when the query does not have QI tag
76
- class NotEnoughEvidence < Exception
76
+ class NotEnoughEvidence < RuntimeError
77
77
  end
78
78
 
79
79
  # Exception raised when the are alias duplications
80
- class NoPIdentError < Exception
80
+ class NoPIdentError < RuntimeError
81
81
  end
82
82
 
83
83
  # Exception raised when the tabular format does not correspond to the tabular
84
84
  # argument
85
- class InconsistentTabularFormat < Exception
85
+ class InconsistentTabularFormat < RuntimeError
86
86
  end
87
87
 
88
88
  # Exception raised when there are more than one reading frame among the hits
89
89
  # of one prediction
90
- class ReadingFrameError < Exception
90
+ class ReadingFrameError < RuntimeError
91
91
  end
92
92
 
93
- class OtherError < Exception
93
+ class OtherError < RuntimeError
94
94
  end
95
95
  end
@@ -9,15 +9,15 @@ module GeneValidator
9
9
  sum / length.to_f
10
10
  end
11
11
 
12
- def median
13
- sorted = sort
12
+ def median(already_sorted = false)
13
+ sorted = already_sorted ? self : sort
14
14
  len = sorted.length
15
15
  (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
16
16
  end
17
17
 
18
18
  def mode
19
- freq = inject(Hash.new(0)) { |h, v| h[v] += 1; h }
20
- sort_by { |v| freq[v] }.last
19
+ freq = each_with_object(Hash.new(0)) { |v, h| h[v] += 1; }
20
+ max_by { |v| freq[v] }
21
21
  end
22
22
 
23
23
  def sample_variance
@@ -29,6 +29,23 @@ module GeneValidator
29
29
  def standard_deviation
30
30
  Math.sqrt(sample_variance)
31
31
  end
32
+
33
+ def all_quartiles
34
+ sorted = sort
35
+ len = sorted.length
36
+ split = sorted.median_split
37
+ [
38
+ split[0].median(true),
39
+ sorted.median(true),
40
+ split[1].median(true)
41
+ ]
42
+ end
43
+
44
+ def median_split
45
+ len = length
46
+ center = len % 2
47
+ [self[0..len / 2 - 1], self[len / 2 + center..-1]]
48
+ end
32
49
  end
33
50
  end
34
51
 
@@ -13,24 +13,25 @@ module GeneValidator
13
13
  class RawSequences
14
14
  class <<self
15
15
  extend Forwardable
16
- def_delegators GeneValidator, :opt, :config
16
+ def_delegators GeneValidator, :opt, :config, :dirs
17
17
 
18
18
  def init
19
- $stderr.puts 'Extracting sequences within the BLAST output file from' \
20
- ' the BLAST database'
19
+ warn '==> Extracting fasta sequences for each BLAST HSP from the' \
20
+ ' BLAST database'
21
21
 
22
22
  @blast_file = opt[:blast_xml_file] if opt[:blast_xml_file]
23
23
  @blast_file = opt[:blast_tabular_file] if opt[:blast_tabular_file]
24
24
 
25
- opt[:raw_sequences] = @blast_file + '.raw_seq'
26
- @index_file = @blast_file + '.index'
25
+ fname = File.basename(@blast_file)
26
+ opt[:raw_sequences] = File.join(dirs[:tmp_dir], "#{fname}.raw_seq")
27
+ @index_file = File.join(dirs[:tmp_dir], "#{fname}.index")
27
28
  end
28
29
 
29
30
  ##
30
31
  # Obtains raw_sequences from BLAST output file...
31
32
  def run
32
33
  init
33
- if opt[:db] =~ /remote/
34
+ if opt[:db].match?(/remote/)
34
35
  write_a_raw_seq_file(opt[:raw_sequences], 'remote')
35
36
  else
36
37
  write_an_index_file(@index_file, 'local')
@@ -55,12 +56,13 @@ module GeneValidator
55
56
  index_hash = {}
56
57
  keys.each_with_index do |k, i|
57
58
  start = values[i]
58
- endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
59
+ endf = i == values.length - 1 ? content.length - 1 : values[i + 1]
59
60
  index_hash[k] = [start, endf]
60
61
  end
61
62
 
62
63
  # create FASTA index
63
- config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
64
+ fname = File.basename(raw_seq_file)
65
+ config[:raw_seq_file_index] = File.join(dirs[:tmp_dir], "#{fname}.idx")
64
66
  config[:raw_seq_file_load] = index_hash
65
67
 
66
68
  File.open(config[:raw_seq_file_index], 'w') do |f|
@@ -76,29 +78,29 @@ module GeneValidator
76
78
  iterate_xml(file, db_type) if opt[:blast_xml_file]
77
79
  iterate_tabular(file, db_type) if opt[:blast_tabular_file]
78
80
  rescue BLASTDBError
79
- $stderr.puts "*** BLAST Database Error: Genevalidator requires BLAST" \
81
+ warn '*** BLAST Database Error: Genevalidator requires BLAST' \
80
82
  " databases to be created with the '-parse_seqids argument."
81
- $stderr.puts " See https://github.com/wurmlab/genevalidator" \
82
- "#setting-up-a-blast-database for more information"
83
+ warn ' See https://github.com/wurmlab/genevalidator' \
84
+ '#setting-up-a-blast-database for more information'
83
85
  exit 1
84
- rescue
85
- $stderr.puts '*** Error: There was an error in analysing the BLAST'
86
- $stderr.puts ' output file. Please ensure that BLAST output file'
87
- $stderr.puts ' is in the correct format and then try again. If you'
88
- $stderr.puts ' are using a remote database, please ensure that you'
89
- $stderr.puts ' have internet access.'
86
+ rescue StandardError
87
+ warn '*** Error: There was an error in analysing the BLAST'
88
+ warn ' output file. Please ensure that BLAST output file'
89
+ warn ' is in the correct format and then try again. If you'
90
+ warn ' are using a remote database, please ensure that you'
91
+ warn ' have internet access.'
90
92
  exit 1
91
93
  ensure
92
94
  file.close unless file.nil?
93
95
  end
94
96
 
95
- alias_method :write_a_raw_seq_file, :write_an_index_file
97
+ alias write_a_raw_seq_file write_an_index_file
96
98
 
97
99
  def iterate_xml(file, db_type)
98
100
  n = Bio::BlastXMLParser::XmlIterator.new(opt[:blast_xml_file]).to_enum
99
101
  n.each do |iter|
100
102
  iter.each do |hit|
101
- fail BLASTDBError if hit.hit_id =~ /\|BL_ORD_ID\|/
103
+ raise BLASTDBError if hit.hit_id =~ /\|BL_ORD_ID\|/
102
104
  if db_type == 'remote' || hit.hit_id.nil?
103
105
  file.puts FetchRawSequences.extract_from_remote_db(hit.accession)
104
106
  else
@@ -116,7 +118,7 @@ module GeneValidator
116
118
  headers: table_headers)
117
119
 
118
120
  rows.each do |row|
119
- fail BLASTDBError if row['sseqid'] =~ /\|BL_ORD_ID\|/
121
+ raise BLASTDBError if row['sseqid'] =~ /\|BL_ORD_ID\|/i
120
122
  if db_type == 'remote' || row['sseqid'].nil?
121
123
  file.puts FetchRawSequences.extract_from_remote_db(row['sacc'])
122
124
  else
@@ -136,15 +138,15 @@ module GeneValidator
136
138
  # first try to extract from previously created raw_sequences HASH
137
139
  raw_seq = extract_from_index(identifier) if opt[:raw_sequences]
138
140
  # then try to just extract that sequence based on accession.
139
- if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
141
+ if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/i)
140
142
  raw_seq = extract_from_local_db(false, accession)
141
143
  end
142
144
  # then try to extract from remote database
143
- if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
145
+ if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/i)
144
146
  raw_seq = extract_from_remote_db(accession)
145
147
  end
146
148
  # return nil if the raw_sequence still produces an error.
147
- (raw_seq =~ /Error/) ? nil : raw_seq
149
+ raw_seq =~ /Error/i ? nil : raw_seq
148
150
  end
149
151
 
150
152
  ##
@@ -157,8 +159,8 @@ module GeneValidator
157
159
  idx = config[:raw_seq_file_load][identifier]
158
160
  query = IO.binread(opt[:raw_sequences], idx[1] - idx[0], idx[0])
159
161
  parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
160
- parse_query[1].gsub("\n", '')
161
- rescue
162
+ parse_query[1].delete("\n")
163
+ rescue StandardError
162
164
  'Error' # return error so it can then try alternative fetching method.
163
165
  end
164
166
 
@@ -170,7 +172,7 @@ module GeneValidator
170
172
  # Output:
171
173
  # String with the nucleotide sequence corresponding to the accession
172
174
  def extract_from_local_db(batch, accno = nil, idx_file = nil)
173
- cmd = (batch) ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
175
+ cmd = batch ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
174
176
  efile = Tempfile.new('blast_out')
175
177
  `#{cmd} &>#{efile.path}`
176
178
  raw_seqs = efile.read
@@ -193,9 +195,9 @@ module GeneValidator
193
195
  def failed_raw_sequences(blast_output)
194
196
  blast_output.each_line do |line|
195
197
  acc = line.match(/Error: (\w+): OID not found/)[1]
196
- $stderr.puts "\nCould not find sequence '#{acc.chomp}' within the" \
198
+ warn "\nCould not find sequence '#{acc.chomp}' within the" \
197
199
  ' BLAST database.'
198
- $stderr.puts "Attempting to obtain sequence '#{acc.chomp}' from" \
200
+ warn "Attempting to obtain sequence '#{acc.chomp}' from" \
199
201
  ' remote BLAST databases.'
200
202
  File.open(opt[:raw_sequences], 'a+') do |f|
201
203
  f.puts extract_from_remote_db(acc)
@@ -204,13 +206,12 @@ module GeneValidator
204
206
  end
205
207
 
206
208
  def extract_from_remote_db(accession, db_seq_type = 'protein')
207
- uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
209
+ uri = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
208
210
  "db=#{db_seq_type}&retmax=1&usehistory=y&term=#{accession}/"
209
211
  result = Net::HTTP.get(URI.parse(uri))
210
212
  query = result.match(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[1]
211
213
  web_env = result.match(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[1]
212
-
213
- uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
214
+ uri = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
214
215
  'rettype=fasta&retmode=text&retstart=0&retmax=1&' \
215
216
  "db=#{db_seq_type}&query_key=#{query}&WebEnv=#{web_env}"
216
217
  result = Net::HTTP.get(URI.parse(uri))
@@ -1,9 +1,14 @@
1
+ require 'forwardable'
2
+
1
3
  require 'genevalidator/blast'
2
4
  require 'genevalidator/exceptions'
3
5
 
4
6
  module GeneValidator
5
7
  # A class that initialises the BLAST tabular attributes
6
8
  class Hsp
9
+ extend Forwardable
10
+ def_delegators GeneValidator, :config
11
+
7
12
  attr_accessor :hit_from # ref. from the unaligned hit sequence
8
13
  attr_accessor :hit_to
9
14
  attr_accessor :match_query_from # ref. from the unaligned query sequence
@@ -23,9 +28,30 @@ module GeneValidator
23
28
  attr_accessor :gaps
24
29
  attr_accessor :align_len
25
30
 
26
- def initialize
31
+ def initialize(input = {})
27
32
  @query_alignment = nil
28
33
  @hit_alignment = nil
34
+ init_xml_attributes(input[:xml_input]) if input[:xml_input]
35
+ init_tabular_attribute(input[:tabular_input]) if input[:tabular_input]
36
+ end
37
+
38
+ def init_xml_attributes(hsp)
39
+ @match_query_from = hsp.query_from.to_i
40
+ @match_query_to = hsp.query_to.to_i
41
+ @query_reading_frame = hsp.query_frame.to_i
42
+ @hit_from = hsp.hit_from.to_i
43
+ @hit_to = hsp.hit_to.to_i
44
+ @query_alignment = hsp.qseq.to_s
45
+ @hit_alignment = hsp.hseq.to_s
46
+ @align_len = hsp.align_len.to_i
47
+ @pidentity = (100 * hsp.identity / hsp.align_len.to_f).round(2)
48
+ @identity = hsp.identity.to_i
49
+ @hsp_evalue = format('%.0e', hsp.evalue)
50
+ assert_seq_type(@hit_alignment) if @hit_alignment
51
+ assert_seq_type(@query_alignment) if @query_alignment
52
+ return unless config[:type] == :nucleotide
53
+ @match_query_from = (@match_query_from / 3) + 1
54
+ @match_query_to = (@match_query_to / 3) + 1
29
55
  end
30
56
 
31
57
  ##
@@ -52,7 +78,10 @@ module GeneValidator
52
78
 
53
79
  def assert_seq_type(query)
54
80
  seq_type = BlastUtils.guess_sequence_type(query)
55
- fail SequenceTypeError if seq_type != :protein
81
+ raise SequenceTypeError if seq_type != :protein
82
+ rescue SequenceTypeError => e
83
+ warn e
84
+ exit 1
56
85
  end
57
86
  end
58
87
  end
@@ -1,10 +1,7 @@
1
- require 'erb'
2
- require 'fileutils'
3
1
  require 'forwardable'
4
2
  require 'json'
5
3
 
6
4
  require 'genevalidator'
7
- require 'genevalidator/output'
8
5
  require 'genevalidator/version'
9
6
 
10
7
  module GeneValidator
@@ -12,139 +9,58 @@ module GeneValidator
12
9
  class JsonToGVResults
13
10
  class << self
14
11
  extend Forwardable
15
- def_delegators GeneValidator, :opt
12
+ def_delegators GeneValidator, :opt, :config, :dirs
16
13
 
17
- def init
18
- @opt = opt
19
- @config = {
20
- html_path: "#{@opt[:json_file]}.html",
21
- plot_dir: "#{@opt[:json_file]}.html/files/json",
22
- aux: File.expand_path(File.join(File.dirname(__FILE__), '../../aux')),
23
- filename: File.basename(@opt[:json_file]),
24
- output_max: 2500,
25
- run_no: 0
26
- }
27
- @json_array = load_json_file
14
+ def init(opt)
15
+ GeneValidator.opt = opt
16
+ GeneValidator.config = { output_max: 2500, run_no: 0,
17
+ json_output: load_json_file }
18
+ GeneValidator.dirs = GeneValidator.setup_dirnames(opt[:json_file])
28
19
  end
29
20
 
30
21
  def run
31
- init
32
- GeneValidator.create_output_folder(@config[:html_path], @config[:aux])
33
- @json_array.each do |row|
34
- @config[:run_no] += 1
35
- create_json_file(row)
36
- output_html = output_filename
37
- generate_html_header(output_html) unless File.exist?(output_html)
38
- generate_html_query(output_html, row)
22
+ warn '==> Parsing input JSON results'
23
+ print_console_header(config[:json_output][0])
24
+ config[:json_output].each do |row|
25
+ print_output_console(row)
26
+ create_row_json_plot_files(row)
39
27
  end
40
- html_footer
41
- calculate_overall_score
28
+ GeneValidator.produce_output
42
29
  end
43
30
 
44
- def load_json_file
45
- json_contents = File.read(File.expand_path(@opt[:json_file]))
46
- JSON.load(json_contents)
31
+ def print_console_header(first_row)
32
+ return unless opt[:output_formats].include? 'stdout'
33
+ return if config[:console_header_printed]
34
+ config[:console_header_printed] = true
35
+ warn '' # blank line
36
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
37
+ print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
38
+ puts first_row[:validations].keys.join("\t")
47
39
  end
48
40
 
49
- def create_json_file(row)
50
- @json_file = File.join(@config[:plot_dir],
51
- "#{@config[:filename]}_#{row['idx']}.json")
52
- File.open(@json_file, 'w') { |f| f.write(row.to_json) }
53
- end
54
-
55
- def output_filename
56
- i = (@config[:run_no].to_f / @config[:output_max]).ceil
57
- File.join(@config[:html_path], "results#{i}.html")
58
- end
59
-
60
- def generate_html_header(output_html)
61
- return if File.exist?(output_html)
62
- json_header_template = File.join(@config[:aux], 'json_header.erb')
63
- template_contents = File.open(json_header_template, 'r').read
64
- erb = ERB.new(template_contents, 0, '>')
65
- File.open(output_html, 'w+') { |f| f.write(erb.result(binding)) }
66
- end
67
-
68
- def generate_html_query(output_html, row)
69
- @row = row
70
- json_query_template = File.join(@config[:aux], 'json_query.erb')
71
- template_contents = File.open(json_query_template, 'r').read
72
- erb = ERB.new(template_contents, 0, '>')
73
- File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
74
- end
75
-
76
- # Add footer to all output files
77
- def html_footer
78
- no_of_output_files = (@config[:run_no].to_f / @config[:output_max]).ceil
79
-
80
- output_files = []
81
- (1..no_of_output_files).each { |i| output_files << "results#{i}.html" }
82
-
83
- write_html_footer(no_of_output_files, output_files)
84
- end
85
-
86
- def write_html_footer(no_of_output_files, output_files)
87
- turn_off_automated_sorting
88
- json_footer_template = File.join(@config[:aux], 'json_footer.erb')
89
- template_contents = File.open(json_footer_template, 'r').read
90
- erb = ERB.new(template_contents, 0, '>')
91
- (1..no_of_output_files).each do |i|
92
- results_html = File.join(@config[:html_path], "results#{i}.html")
93
- File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
41
+ def print_output_console(row)
42
+ return unless opt[:output_formats].include? 'stdout'
43
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
44
+ short_def = row[:definition].split(' ')[0]
45
+ print format(c_fmt, row[:idx], row[:overall_score], short_def,
46
+ row[:no_hits])
47
+ puts row[:validations].values.map { |e| e[:print] }.join("\t")
48
+ .gsub('&nbsp;', ' ')
94
49
  end
95
- end
96
-
97
- # By default, on page load, the results are automatically sorted by the
98
- # index. However since the whole idea is that users would sort by JSON,
99
- # this is not wanted here.
100
- def turn_off_automated_sorting
101
- script_file = File.join(@config[:html_path],
102
- 'files/js/genevalidator.compiled.min.js')
103
- original_content = File.read(script_file)
104
- # removes the automatic sort on page load
105
- updated_content = original_content.gsub(',sortList:[[0,0]]', '')
106
- File.open("#{script_file}.tmp", 'w') { |f| f.puts updated_content }
107
- FileUtils.mv("#{script_file}.tmp", script_file)
108
- end
109
50
 
110
- def calculate_overall_score
111
- scores = []
112
- @json_array.each { |row| scores << row['overall_score'] }
113
- plot_dir = File.join(@config[:html_path], 'files/json')
114
- less = generate_evaluation(scores)
115
- Output.create_overview_json(scores, plot_dir, less, less)
116
- end
117
-
118
- def generate_evaluation(scores)
119
- no_of_queries = scores.length
120
- good_scores = scores.count { |s| s >= 75 }
121
- bad_scores = scores.count { |s| s < 75 }
122
- nee = calculate_no_quries_with_no_evidence # nee = no evidence
51
+ private
123
52
 
124
- good_pred = (good_scores == 1) ? 'One' : "#{good_scores} are"
125
- bad_pred = (bad_scores == 1) ? 'One' : "#{bad_scores} are"
126
- eval = 'Overall Query Score Evaluation:<br>' \
127
- "#{no_of_queries} predictions were validated, from which there" \
128
- ' were:<br>' \
129
- "#{good_pred} good prediction(s),<br>" \
130
- "#{bad_pred} possibly weak prediction(s).<br>"
131
- return eval if nee == 0
132
- eval << "#{nee} could not be evaluated due to the lack of" \
133
- ' evidence.<br>'
134
- eval
135
- end
53
+ def load_json_file
54
+ json_contents = File.read(File.expand_path(opt[:json_file]))
55
+ JSON.parse(json_contents, symbolize_names: true)
56
+ end
136
57
 
137
- # calculate number of queries that had warnings for all validations.
138
- def calculate_no_quries_with_no_evidence
139
- all_warnings = 0
140
- @json_array.each do |row|
141
- status = row['validations'].map { |_, h| h['status'] }
142
- if status.count { |r| r == 'warning' } == status.length
143
- all_warnings += 1
144
- end
58
+ def create_row_json_plot_files(row)
59
+ config[:run_no] += 1
60
+ fname = "#{dirs[:filename]}_#{row[:idx]}.json"
61
+ json_file = File.join(dirs[:json_dir], fname)
62
+ File.open(json_file, 'w') { |f| f.write(row.to_json) }
145
63
  end
146
- all_warnings
147
- end
148
64
  end
149
65
  end
150
66
  end