RubyGems - genevalidator - Versions diffs - 1.6.1 → 1.6.2 - Mend

genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

checksums.yaml +4 -4
data/.gitignore +3 -1
data/.travis.yml +2 -0
data/README.md +78 -30
data/Rakefile +11 -8
data/aux/app_template_footer.erb +1 -6
data/aux/app_template_header.erb +12 -32
data/aux/files/css/style.css +2 -8
data/aux/files/js/plots.js +564 -576
data/aux/files/js/script.js +10 -0
data/aux/json_footer.erb +8 -0
data/aux/json_header.erb +19 -0
data/aux/json_query.erb +14 -0
data/aux/template_footer.erb +9 -58
data/aux/template_header.erb +18 -58
data/aux/template_query.erb +8 -36
data/bin/genevalidator +45 -32
data/genevalidator.gemspec +11 -7
data/lib/genevalidator.rb +75 -455
data/lib/genevalidator/arg_validation.rb +78 -107
data/lib/genevalidator/blast.rb +57 -60
data/lib/genevalidator/clusterization.rb +15 -15
data/lib/genevalidator/exceptions.rb +32 -5
data/lib/genevalidator/get_raw_sequences.rb +70 -33
data/lib/genevalidator/hsp.rb +1 -4
data/lib/genevalidator/json_to_gv_results.rb +109 -0
data/lib/genevalidator/output.rb +177 -185
data/lib/genevalidator/pool.rb +2 -1
data/lib/genevalidator/sequences.rb +3 -3
data/lib/genevalidator/tabular_parser.rb +24 -18
data/lib/genevalidator/validation.rb +279 -0
data/lib/genevalidator/validation_alignment.rb +31 -47
data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
data/lib/genevalidator/validation_duplication.rb +23 -19
data/lib/genevalidator/validation_gene_merge.rb +30 -65
data/lib/genevalidator/validation_length_cluster.rb +14 -53
data/lib/genevalidator/validation_length_rank.rb +10 -11
data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
data/lib/genevalidator/validation_report.rb +2 -5
data/lib/genevalidator/validation_test.rb +8 -4
data/lib/genevalidator/version.rb +1 -1
data/test/test_all_validations.rb +51 -66
data/test/test_blast.rb +68 -51
data/test/test_clusterization.rb +1 -1
data/test/test_clusterization_2d.rb +19 -13
data/test/test_extended_array_methods.rb +1 -1
data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
data/test/test_sequences.rb +46 -41
data/test/test_validation_open_reading_frame.rb +318 -202
data/test/test_validations.rb +48 -32
metadata +76 -102
data/doc/AliasDuplicationError.html +0 -134
data/doc/AlignmentValidation.html +0 -1687
data/doc/AlignmentValidationOutput.html +0 -659
data/doc/Blast.html +0 -1905
data/doc/BlastRFValidationOutput.html +0 -545
data/doc/BlastReadingFrameValidation.html +0 -370
data/doc/BlastUtils.html +0 -875
data/doc/ClasspathError.html +0 -134
data/doc/Cluster.html +0 -1316
data/doc/DuplciationValidationOutput.html +0 -564
data/doc/DuplicationValidation.html +0 -920
data/doc/DuplicationValidationOutput.html +0 -564
data/doc/FileNotFoundException.html +0 -134
data/doc/GeneMergeValidation.html +0 -935
data/doc/GeneMergeValidationOutput.html +0 -652
data/doc/HierarchicalClusterization.html +0 -994
data/doc/Hsp.html +0 -1485
data/doc/InconsistentTabularFormat.html +0 -135
data/doc/LengthClusterValidation.html +0 -982
data/doc/LengthClusterValidationOutput.html +0 -515
data/doc/LengthRankValidation.html +0 -496
data/doc/LengthRankValidationOutput.html +0 -517
data/doc/NoInternetError.html +0 -135
data/doc/NoMafftInstallationError.html +0 -134
data/doc/NoPIdentError.html +0 -134
data/doc/NoValidationError.html +0 -134
data/doc/NotEnoughHitsError.html +0 -135
data/doc/ORFValidationOutput.html +0 -593
data/doc/OpenReadingFrameValidation.html +0 -1107
data/doc/OtherError.html +0 -123
data/doc/Output.html +0 -1540
data/doc/Pair.html +0 -309
data/doc/PairCluster.html +0 -767
data/doc/Plot.html +0 -837
data/doc/QueryError.html +0 -134
data/doc/ReportClassError.html +0 -135
data/doc/Sequence.html +0 -1299
data/doc/SequenceTypeError.html +0 -135
data/doc/TabularEntry.html +0 -837
data/doc/TabularParser.html +0 -1104
data/doc/Validation.html +0 -2147
data/doc/ValidationClassError.html +0 -134
data/doc/ValidationOutput.html +0 -460
data/doc/ValidationReport.html +0 -940
data/doc/ValidationTest.html +0 -939
data/doc/_index.html +0 -449
data/doc/class_list.html +0 -54
data/doc/css/common.css +0 -1
data/doc/css/full_list.css +0 -57
data/doc/css/style.css +0 -338
data/doc/file.README.html +0 -151
data/doc/file_list.html +0 -56
data/doc/frames.html +0 -26
data/doc/index.html +0 -151
data/doc/js/app.js +0 -214
data/doc/js/full_list.js +0 -178
data/doc/js/jquery.js +0 -4
data/doc/method_list.html +0 -1505
data/doc/top-level-namespace.html +0 -112
data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751

data/lib/genevalidator.rb CHANGED

@@ -1,159 +1,94 @@
+require 'fileutils'
+require 'bio-blastxmlparser'
 require 'genevalidator/arg_validation'
-require 'genevalidator/get_raw_sequences'
-require 'genevalidator/tabular_parser'
 require 'genevalidator/blast'
-require 'genevalidator/output'
 require 'genevalidator/exceptions'
-require 'genevalidator/validation_length_cluster'
-require 'genevalidator/validation_length_rank'
-require 'genevalidator/validation_blast_reading_frame'
-require 'genevalidator/validation_gene_merge'
-require 'genevalidator/validation_duplication'
-require 'genevalidator/validation_open_reading_frame'
-require 'genevalidator/validation_alignment'
-require 'genevalidator/pool'
-require 'bio-blastxmlparser'
-require 'open-uri'
-require 'uri'
-require 'io/console'
-require 'yaml'
-require 'thread'
+require 'genevalidator/get_raw_sequences'
+require 'genevalidator/output'
+require 'genevalidator/tabular_parser'
+require 'genevalidator/validation'
 # Top level module / namespace.
 module GeneValidator
-  Pair1 = Struct.new(:x, :y)
-  # Main Class that initalises and then runs validations.
-  class Validation
-    attr_reader :opt
-    attr_reader :type
-    attr_reader :input_fasta_file
-    attr_reader :html_path
-    attr_reader :yaml_path
-    attr_reader :filename
+  class << self
+    attr_accessor :opt, :config, :overview
     attr_reader :raw_seq_file_index
     attr_reader :raw_seq_file_load
-    attr_accessor :idx  # current number of the querry processed
-    attr_reader :start_idx
     # array of indexes for the start offsets of each query in the fasta file
-    attr_reader :query_offset_lst
-    attr_reader :overall_evaluation
-    # global variables
-    attr_reader :no_queries
-    attr_reader :scores
-    attr_reader :good_predictions
-    attr_reader :bad_predictions
-    attr_reader :nee
-    attr_reader :no_mafft
-    attr_reader :no_internet
-    attr_reader :map_errors
-    attr_reader :map_running_times
-    attr_reader :threads
-    attr_reader :mutex
-    attr_reader :mutex_yaml
-    attr_reader :mutex_html
-    attr_reader :mutex_array
-    ##
-    # Initilizes the object
-    # Params:
-    # +opt+: A hash - Default Values: {validations: ['all'],
-    # blast_tabular_file: nil, blast_tabular_options: nil, blast_xml_file: nil,
-    # db: 'remote', raw_sequences: nil, num_threads: 1 fast: false}
-    # +start_idx+: number of the sequence from the file to start with
-    # +overall_evaluation+: boolean variable for printing overall evaluation
-    def initialize(opt, start_idx = 1, overall_evaluation = true)
-      # Validate opts
-      @opt = GVArgValidation.validate_args(opt)
-      puts "\nDepending on your input and your computational resources, this"\
-           ' may take a while. Please wait...'
-      @idx                    = 0
-      @start_idx              = start_idx
-      @overall_evaluation     = overall_evaluation
-      # start a worker thread
-      @threads                = [] # used for parallelizing the validations.
-      @mutex                  = Mutex.new
-      @mutex_yaml             = Mutex.new
-      @mutex_html             = Mutex.new
-      @mutex_array            = Mutex.new
-      # global variables
-      @no_queries             = 0
-      @scores                 = []
-      @good_predictions       = 0
-      @bad_predictions        = 0
-      @nee                    = 0
-      @no_mafft               = 0
-      @no_internet            = 0
-      @map_errors             = Hash.new(0)
-      @map_running_times      = Hash.new(Pair1.new(0, 0))
-      @type                   = determine_sequence_type
-      @query_offset_lst       = index_the_input
-      # build the path of html folder output
-      dir                     = File.dirname(@opt[:input_fasta_file])
-      @filename               = File.basename(@opt[:input_fasta_file])
-      @yaml_path              = dir
-      @html_path              = "#{opt[:input_fasta_file]}.html"
-      @plot_dir               = "#{@html_path}/files/json"
-      # create 'html' directory
-      Dir.mkdir(@html_path)
-      # copy auxiliar folders to the html folder
-      aux = File.join(File.dirname(File.expand_path(__FILE__)), '../aux/files')
-      FileUtils.cp_r(aux, @html_path)
+    attr_reader :query_idx
+    attr_accessor :mutex, :mutex_html, :mutex_json, :mutex_array
+    def init(opt, start_idx = 1, summary = true)
+      $stderr.puts 'Analysing input arguments'
+      @opt = opt
+      GVArgValidation.validate_args # validates @opt
+      @config = {
+        idx: 0,
+        start_idx: start_idx,
+        summary: summary,
+        type: BlastUtils.guess_sequence_type_from_input_file,
+        filename: File.basename(@opt[:input_fasta_file]),
+        html_path: "#{@opt[:input_fasta_file]}.html",
+        json_file: File.join(File.dirname(@opt[:input_fasta_file]),
+                             "#{File.basename(@opt[:input_fasta_file])}.json"),
+        plot_dir: "#{@opt[:input_fasta_file]}.html/files/json",
+        aux: File.expand_path(File.join(File.dirname(__FILE__), '../aux')),
+        json_output: [],
+        run_no: 0,
+        output_max: 2500 # max no. of queries in the output file
+      }
+      @overview = {
+        no_queries: 0,
+        scores: [],
+        good_scores: 0,
+        bad_scores: 0,
+        nee: 0,
+        no_mafft: 0,
+        no_internet: 0,
+        map_errors: Hash.new(0),
+        run_time: Hash.new(Pair1.new(0, 0))
+      }
+      @mutex       = Mutex.new
+      @mutex_array = Mutex.new
+      @mutex_html  = Mutex.new
+      @mutex_json  = Mutex.new
+      create_output_folder
+      index_the_input
+      RawSequences.index_raw_seq_file if @opt[:raw_sequences]
     end
     ##
     # Parse the blast output and run validations
     def run
-      # Run BLAST on all sequences
-      run_blast_on_the_input_file if @opt[:fast]
+      # Run BLAST on all sequences (generates @opt[:blast_xml_file])
+      #   if no BLAST OUTPUT file provided...
       unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
-        # run BLAST on each sequence individually & then run validations
-        run_blast_on_each_sequence
-      else
-        # Extract raw sequences of hits
-        extract_raw_sequences_of_blast_hits unless @opt[:raw_sequences]
-        create_an_index_file_of_raw_seq_file(@opt[:raw_sequences])
-        # Run Validations
-        iterator = parse_blast_output_file
-        run_validations(iterator)
+        BlastUtils.run_blast_on_input_file
       end
-      return unless @overall_evaluation
-      Output.print_footer(@no_queries, @scores, @good_predictions,
-                          @bad_predictions, @nee, @no_mafft, @no_internet,
-                          @map_errors, @map_running_times, @html_path,
-                          @filename)
-    end
-    def determine_sequence_type
-      BlastUtils.guess_sequence_type_from_file(@opt[:input_fasta_file])
+      # Obtain fasta file of all BLAST hits
+      RawSequences.run unless @opt[:raw_sequences]
+      # Run Validations
+      iterator = parse_blast_output_file
+      (Validations.new).run_validations(iterator)
+      Output.write_json_file(@config[:json_output], @config[:json_file])
+      Output.print_footer(@overview, @config)
     end
     ##
-    # Runs BLAST on the input file - only run when the opt[:fast] is true
-    def run_blast_on_the_input_file
-      return if @opt[:blast_xml_file] || @opt[:blast_tabular_file]
-      puts 'Running BLAST'
-      @opt[:blast_xml_file] = @opt[:input_fasta_file] + '.blast_xml'
-      BlastUtils.run_blast_on_file(@opt)
-    end
-    ##
-    # Extracts raw sequences of all blast hits
-    def extract_raw_sequences_of_blast_hits
-      puts 'Extracting sequences within the BLAST output file from the BLAST' \
-           ' database'
-      @opt[:raw_sequences] = GetRawSequences.run(@opt)
+    # Creates the output folder and copies the auxiliar folders to this folder
+    def create_output_folder(output_dir = @config[:html_path],
+                             aux_dir = @config[:aux])
+      Dir.mkdir(output_dir)
+      aux_files = File.join(aux_dir, 'files/')
+      FileUtils.cp_r(aux_files, output_dir)
     end
     ##
@@ -162,39 +97,8 @@ module GeneValidator
     # start and end positions of each query.
     def index_the_input
       fasta_content = IO.binread(@opt[:input_fasta_file])
-      offset_array  = fasta_content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
-      offset_array.push(fasta_content.length)
-      fasta_content = nil
-      offset_array
-    end
-    ##
-    # Index the raw sequences file...
-    def create_an_index_file_of_raw_seq_file(raw_sequence_file)
-      # leave only the identifiers in the fasta description
-      content = File.open(raw_sequence_file, 'rb').read.gsub(/ .*/, '')
-      File.open(raw_sequence_file, 'w+') { |f| f.write(content) }
-      # index the fasta file
-      keys   = content.scan(/>(.*)\n/).flatten
-      values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
-      # make an index hash
-      index_hash = {}
-      keys.each_with_index do |k, i|
-        start = values[i]
-        endf  = (i == values.length - 1) ? content.length - 1 : values[i + 1]
-        index_hash[k] = [start, endf]
-      end
-      # create FASTA index
-      @raw_seq_file_index = "#{raw_sequence_file}.idx"
-      @raw_seq_file_load  = index_hash
-      File.open(@raw_seq_file_index, 'w') do |f|
-        YAML.dump(index_hash, f)
-      end
-      content = nil
+      @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
+      @query_idx.push(fasta_content.length)
     end
     ##
@@ -206,293 +110,9 @@ module GeneValidator
       if @opt[:blast_xml_file]
         Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
       else
-        TabularParser.new(@opt[:blast_tabular_file],
-                          @opt[:blast_tabular_options], @type)
+        TabularParser.new
       end
       ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
     end
-    ##
-    #
-    def run_blast_on_each_sequence
-      # file seek for each query
-      @query_offset_lst[0..@query_offset_lst.length - 2].each_with_index do |_pos, i|
-        if (i + 1) >= @start_idx
-          start_offset = @query_offset_lst[i + 1] - @query_offset_lst[i]
-          end_offset   = @query_offset_lst[i]
-          query = IO.binread(@opt[:input_fasta_file], start_offset, end_offset)
-          # call blast with the default parameters
-          blast_type = (type == :protein) ? 'blastp' : 'blastx'
-          blast_xml_output = BlastUtils.run_blast(blast_type, query, @opt[:db],
-                                                  @opt[:num_threads])
-          iterator = Bio::BlastXMLParser::NokogiriBlastXml.new(blast_xml_output).to_enum
-          run_validations(iterator)
-        else
-          @idx += 1
-        end
-      end
-    end
-    ##
-    #
-    def run_validations(iterator)
-      p = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
-      while @idx + 1 < @query_offset_lst.length
-        prediction = get_info_on_each_query_sequence
-        @idx += 1
-        hits = parse_next_iteration(iterator, prediction)
-        if hits.nil?
-          @idx -= 1
-          break
-        end
-        current_idx = @idx
-        # the first validation should be treated separately
-        if current_idx == @start_idx || @opt[:num_threads] == 1
-          validate(prediction, hits, current_idx)
-        else
-          p.schedule(prediction, hits, current_idx) do |prediction, hits, current_idx|
-            validate(prediction, hits, current_idx)
-          end
-        end
-      end
-    ensure
-      p.shutdown if @opt[:num_threads] > 1
-    end
-    def parse_next_iteration(iterator, prediction)
-      iterator.next if @idx < @start_idx
-      if @opt[:blast_xml_file]
-        BlastUtils.parse_next(iterator, @type)
-      elsif @opt[:blast_tabular_file]
-        iterator.parse_next(prediction.identifier)
-      end
-    end
-    ##
-    # get info about the query
-    def get_info_on_each_query_sequence
-      prediction   = Sequence.new
-      start_offset = @query_offset_lst[idx + 1] - @query_offset_lst[idx]
-      end_offset   = @query_offset_lst[idx]
-      query        = IO.binread(@opt[:input_fasta_file], start_offset, end_offset)
-      parse_query  = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
-      prediction.definition     = parse_query[0].gsub("\n", '')
-      prediction.identifier     = prediction.definition.gsub(/ .*/, '')
-      prediction.type           = @type
-      prediction.raw_sequence   = parse_query[1].gsub("\n", '')
-      prediction.length_protein = prediction.raw_sequence.length
-      prediction.length_protein /= 3 if @type == :nucleotide
-      prediction
-    end
-    ##
-    # Validate one query and create validation report
-    # Params:
-    # +prediction+: Sequence object
-    # +hits+: Array of +Sequence+ objects
-    # +idx+: the index number of the query
-    def validate(prediction, hits, current_idx)
-      query_output = do_validations(prediction, hits, current_idx)
-      query_output.generate_html
-      query_output.print_output_file_yaml
-      query_output.print_output_console
-      validations = query_output.validations
-      no_mafft = 0
-      no_internet = 0
-      errors = []
-      validations.each do |v|
-        unless v.errors.nil?
-          no_mafft += v.errors.select { |e| e == NoMafftInstallationError }.length
-          no_internet += v.errors.select { |e| e == NoInternetError }.length
-        end
-        errors.push(v.short_header) if v.validation == :error
-      end
-      no_evidence = validations.count { |v| v.result == :unapplicable || v.result == :warning } == validations.length
-      nee = (no_evidence) ? 1 : 0
-      good_predictions = (query_output.overall_score >= 75) ? 1 : 0
-      bad_predictions  = (query_output.overall_score >= 75) ? 0 : 1
-      @mutex_array.synchronize do
-        @no_queries += 1
-        @scores.push(query_output.overall_score)
-        @good_predictions += good_predictions
-        @bad_predictions += bad_predictions
-        @nee += nee
-        @no_mafft += no_mafft
-        @no_internet += no_internet
-        errors.each { |err| @map_errors[err] += 1 }
-        validations.each do |v|
-          next if v.running_time == 0 || v.running_time.nil?
-          next if v.validation == :unapplicable || v.validation == :error
-          p = Pair1.new(@map_running_times[v.short_header].x + v.running_time, @map_running_times[v.short_header].y + 1)
-          @map_running_times[v.short_header] = p
-        end
-      end
-      query_output
-    end
-    ##
-    # Removes identical hits
-    # Params:
-    # +prediction+: Sequence object
-    # +hits+: Array of +Sequence+ objects
-    # Output:
-    # new array of hit +Sequence+ objects
-    def remove_identical_hits(prediction, hits)
-      # remove the identical hits
-      # identical hit means 100%coverage and >99% identity
-      identical_hits = []
-      hits.each do |hit|
-        # check if all hsps have identity more than 99%
-        low_identity = hit.hsp_list.select { |hsp| hsp.pidentity.nil? || hsp.pidentity < 99 }
-        # check the coverage
-        coverage = Array.new(prediction.length_protein, 0)
-        hit.hsp_list.each do |hsp|
-          len = hsp.match_query_to - hsp.match_query_from + 1
-          coverage[hsp.match_query_from - 1..hsp.match_query_to - 1] = Array.new(len, 1)
-        end
-        if low_identity.length == 0 && coverage.uniq.length == 1
-          identical_hits.push(hit)
-        end
-      end
-      identical_hits.each { |hit| hits.delete(hit) }
-      hits
-    end
-    ##
-    # Runs all the validations and prints the outputs given the current
-    # prediction query and the corresponding hits
-    # Params:
-    # +prediction+: Sequence object
-    # +hits+: Array of +Sequence+ objects
-    # +idx+: the index number of the query
-    # Output:
-    # +Output+ object
-    def do_validations(prediction, hits, current_idx)
-      begin
-        hits = remove_identical_hits(prediction, hits)
-        rescue Exception => error # NoPIdentError
-      end
-      query_output                = Output.new(@mutex, @mutex_yaml, @mutex_html,
-                                               @filename, @html_path,
-                                               @yaml_path, current_idx, @start_idx)
-      query_output.prediction_len = prediction.length_protein
-      query_output.prediction_def = prediction.definition
-      query_output.nr_hits        = hits.length
-      plot_path                   = File.join(@plot_dir, "#{@filename}_#{current_idx}")
-      validations = []
-      validations.push LengthClusterValidation.new(@type, prediction, hits,
-                                                   plot_path)
-      validations.push LengthRankValidation.new(@type, prediction, hits)
-      validations.push GeneMergeValidation.new(@type, prediction, hits,
-                                               plot_path)
-      validations.push DuplicationValidation.new(@type, prediction, hits,
-                                                 @opt[:raw_sequences],
-                                                 @raw_seq_file_index,
-                                                 @raw_seq_file_load, @opt[:db],
-                                                 @opt[:num_threads])
-      validations.push BlastReadingFrameValidation.new(@type, prediction, hits)
-      validations.push OpenReadingFrameValidation.new(@type, prediction, hits,
-                                                      plot_path)
-      validations.push AlignmentValidation.new(@type, prediction, hits,
-                                               plot_path, @opt[:raw_sequences],
-                                               @raw_seq_file_index,
-                                               @raw_seq_file_load,
-                                               @opt[:db], @opt[:num_threads])
-      validations = validations.select { |v| @opt[:validations].include? v.cli_name.downcase }
-      # check the class type of the elements in the list
-      validations.each do |v|
-        fail ValidationClassError unless v.is_a? ValidationTest
-      end
-      # check alias duplication
-      aliases = validations.map(&:cli_name)
-      fail AliasDuplicationError unless aliases.length == aliases.uniq.length
-      validations.each do |v|
-        v.run
-        fail ReportClassError unless v.validation_report.is_a? ValidationReport
-      end
-      query_output.validations = validations.map(&:validation_report)
-      fail NoValidationError if query_output.validations.length == 0
-      # compute validation score
-      compute_scores(query_output)
-      query_output
-    rescue ValidationClassError => error
-      error_line = error.backtrace[0].scan(%r{/([^/]+:\d+):.*})[0][0]
-      $stderr.print "Class Type error at #{error_line}." \
-                    ' Possible cause: type of one of the validations is not' \
-                    " ValidationTest\n"
-      exit 1
-    rescue NoValidationError => error
-      error_line = error.backtrace[0].scan(%r{/([^/]+:\d+):.*})[0][0]
-      $stderr.print "Validation error at #{error_line}." \
-                    " Possible cause: your -v arguments are not valid aliases\n"
-      exit 1
-    rescue ReportClassError => error
-      error_line = error.backtrace[0].scan(%r{/([^/]+:\d+):.*})[0][0]
-      $stderr.print "Class Type error at #{error_line}."\
-                    ' Possible cause: type of one of the validation reports' \
-                    " returned by the 'run' method is not ValidationReport\n"
-      exit 1
-    rescue AliasDuplicationError => error
-      error_line = error.backtrace[0].scan(%r{/([^/]+:\d+):.*})[0][0]
-      $stderr.print "Alias Duplication error at #{error_line}."\
-                    ' Possible cause: At least two validations have the same' \
-                    " CLI alias\n"
-      exit 1
-    end
-    def compute_scores(query_output)
-      validations = query_output.validations
-      successes = validations.map { |v| v.result == v.expected }.count(true)
-      fails = validations.map { |v| v.validation != :unapplicable &&
-                                    v.validation != :error &&
-                                    v.result != v.expected }.count(true)
-      lcv = validations.select { |v| v.class == LengthClusterValidationOutput }
-      lrv = validations.select { |v| v.class == LengthRankValidationOutput }
-      if lcv.length == 1 && lrv.length == 1
-        score_lcv = (lcv[0].result == lcv[0].expected)
-        score_lrv = (lrv[0].result == lrv[0].expected)
-        # if both are true this should be counted as a single success
-        if score_lcv == true && score_lrv == true
-          successes -= 1
-        elsif score_lcv == false && score_lrv == false
-          # if both are false this will be a fail
-          fails -= 1
-        else
-          successes -= 0.5
-          fails -= 0.5
-        end
-      end
-      query_output.successes     = successes
-      query_output.fails         = fails
-      total_query                = successes.to_i + fails
-      query_output.overall_score = (successes * 100 / (total_query)).round(0)
-    end
   end
 end