genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,7 +1,10 @@
1
- require 'genevalidator/validation_report'
2
- require 'genevalidator/validation_test'
1
+ require 'forwardable'
2
+
3
3
  require 'genevalidator/exceptions'
4
4
  require 'genevalidator/ext/array'
5
+ require 'genevalidator/validation_report'
6
+ require 'genevalidator/validation_test'
7
+
5
8
  module GeneValidator
6
9
  ##
7
10
  # Class that stores the validation output information
@@ -81,11 +84,9 @@ module GeneValidator
81
84
  ##
82
85
  # Initializes the object
83
86
  # Params:
84
- # +hits+: a vector of +Sequence+ objects (representing blast hits)
85
87
  # +prediction+: a +Sequence+ object representing the blast query
86
- # +threshold+: threshold below which the prediction length rank is
87
- # considered to be inadequate
88
- def initialize(type, prediction, hits)
88
+ # +hits+: a vector of +Sequence+ objects (representing blast hits)
89
+ def initialize(prediction, hits)
89
90
  super
90
91
  @short_header = 'LengthRank'
91
92
  @header = 'Length Rank'
@@ -143,19 +144,17 @@ module GeneValidator
143
144
  largest_hit,
144
145
  extreme_hits,
145
146
  percentage)
146
- @validation_report.running_time = Time.now - start
147
+ @validation_report.run_time = Time.now - start
147
148
  @validation_report
148
149
 
149
150
  rescue NotEnoughHitsError
150
151
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
151
152
  @short_header, @header,
152
- @description, @approach,
153
- @explanation, @conclusion)
153
+ @description)
154
154
  rescue Exception
155
155
  @validation_report = ValidationReport.new('Unexpected error', :error,
156
156
  @short_header, @header,
157
- @description, @approach,
158
- @explanation, @conclusion)
157
+ @description)
159
158
  @validation_report.errors.push 'Unexpected Error'
160
159
  end
161
160
  end
@@ -1,5 +1,10 @@
1
- require 'genevalidator/validation_report'
2
1
  require 'bio'
2
+ require 'forwardable'
3
+
4
+ require 'genevalidator/exceptions'
5
+ require 'genevalidator/validation_report'
6
+ require 'genevalidator/validation_test'
7
+
3
8
  module GeneValidator
4
9
  ##
5
10
  # Class that stores the validation output information
@@ -57,24 +62,23 @@ module GeneValidator
57
62
  # This class contains the methods necessary for checking whether there is
58
63
  # a main Open Reading Frame in the predicted sequence
59
64
  class OpenReadingFrameValidation < ValidationTest
60
- attr_reader :filename
65
+ extend Forwardable
66
+ def_delegators GeneValidator, :config
61
67
 
62
68
  ##
63
69
  # Initilizes the object
64
70
  # Params:
65
- # +type+: type of the predicted sequence (:nucleotide or :protein)
66
71
  # +prediction+: a +Sequence+ object representing the blast query
67
72
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
68
- # +plot_filename+: name of the input file, used when making plot files
69
- def initialize(type, prediction, hits, filename)
73
+ def initialize(prediction, hits)
70
74
  super
71
- @short_header = 'ORF'
75
+ @short_header = 'MainORF'
72
76
  @header = 'Main ORF'
73
77
  @description = 'Check whether there is a single main Open Reading' \
74
78
  ' Frame in the predicted gene. Applicable only for' \
75
79
  ' nucleotide queries.'
76
80
  @cli_name = 'orf'
77
- @filename = filename
81
+ @type = config[:type]
78
82
  end
79
83
 
80
84
  ##
@@ -101,15 +105,14 @@ module GeneValidator
101
105
  @validation_report = ORFValidationOutput.new(@short_header, @header,
102
106
  @description, orfs,
103
107
  coverage, longest_orf_frame)
104
- @validation_report.running_time = Time.now - start
108
+ @validation_report.run_time = Time.now - start
105
109
 
106
110
  @validation_report.plot_files.push(plot1)
107
111
  @validation_report
108
112
  rescue Exception
109
113
  @validation_report = ValidationReport.new('Unexpected error', :error,
110
114
  @short_header, @header,
111
- @description, @approach,
112
- @explanation, @conclusion)
115
+ @description)
113
116
  @validation_report.errors.push 'Unexpected Error'
114
117
  end
115
118
 
@@ -157,29 +160,25 @@ module GeneValidator
157
160
  # +orfs+: +Hash+ containing the open reading frame
158
161
  # +output+: location where the plot will be saved in jped file format
159
162
  # +prediction+: Sequence objects
160
- def plot_orfs(orfs, translated_length, output = "#{@filename}_orfs.json")
163
+ def plot_orfs(orfs, translated_length, output = "#{@plot_path}_orfs.json")
161
164
  fail QueryError unless orfs.is_a? Hash
162
165
 
163
- results = []
166
+ data = []
164
167
 
165
168
  # Create hashes for the Background
166
169
  (-3..3).each do |frame|
167
170
  next if frame == 0
168
- results << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
171
+ data << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
169
172
  'color' => 'gray' }
170
173
  end
171
174
 
172
175
  # Create the hashes for the ORFs...
173
176
  orfs.each do |_key, h|
174
- results << { 'y' => h[:frame], 'start' => h[:orf_start],
177
+ data << { 'y' => h[:frame], 'start' => h[:orf_start],
175
178
  'stop' => h[:orf_end], 'color' => 'red' }
176
179
  end
177
180
 
178
- f = File.open(output, 'w')
179
- f.write((results).to_json)
180
- f.close
181
-
182
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
181
+ Plot.new(data,
183
182
  :lines,
184
183
  'Open Reading Frames in all 6 Frames',
185
184
  'Open Reading Frame (Minimimum Length: 30 amino acids),red',
@@ -1,6 +1,6 @@
1
1
  # Top level module / namespace.
2
2
  module GeneValidator
3
- Plot = Struct.new(:filename, :type, :title, :footer, :xtitle, :ytitle, :aux1,
3
+ Plot = Struct.new(:data, :type, :title, :footer, :xtitle, :ytitle, :aux1,
4
4
  :aux2)
5
5
 
6
6
  ##
@@ -8,7 +8,6 @@ module GeneValidator
8
8
  # all validation reports
9
9
  class ValidationReport
10
10
  attr_reader :message
11
- attr_reader :bg_color
12
11
  attr_reader :plot_files
13
12
  attr_reader :result
14
13
  attr_reader :expected
@@ -17,7 +16,7 @@ module GeneValidator
17
16
  attr_accessor :short_header
18
17
  attr_accessor :header
19
18
  attr_accessor :description
20
- attr_accessor :running_time
19
+ attr_accessor :run_time
21
20
  attr_accessor :approach
22
21
  attr_accessor :explanation
23
22
  attr_accessor :conclusion
@@ -31,7 +30,6 @@ module GeneValidator
31
30
  # +short_header+: String
32
31
  # +header+: String
33
32
  # +description+: String
34
- # +bg_color+: background color of the table cell for the html output (nil
35
33
  # by default)
36
34
  def initialize(message = 'Not enough evidence', validation_result = :no,
37
35
  short_header = '', header = '', description = '',
@@ -60,7 +58,6 @@ module GeneValidator
60
58
  ##
61
59
  # May return "success" or "error"
62
60
  def color
63
- bg_color unless bg_color.nil?
64
61
  if validation == @expected
65
62
  'success'
66
63
  elsif validation == :error || validation == :unapplicable
@@ -1,7 +1,11 @@
1
+ require 'forwardable'
2
+
1
3
  module GeneValidator
2
4
  # This is an abstract class extended
3
5
  # by all validation classes
4
6
  class ValidationTest
7
+ extend Forwardable
8
+ def_delegators GeneValidator, :config
5
9
  attr_accessor :type
6
10
  attr_accessor :prediction
7
11
  attr_accessor :hits
@@ -10,7 +14,7 @@ module GeneValidator
10
14
  attr_accessor :cli_name
11
15
  attr_accessor :description
12
16
  attr_accessor :validation_report
13
- attr_accessor :running_time
17
+ attr_accessor :run_time
14
18
 
15
19
  ##
16
20
  # Initilizes the object
@@ -19,13 +23,13 @@ module GeneValidator
19
23
  # +prediction+: a +Sequence+ object representing the blast query
20
24
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
21
25
  # +argv+: aditional arguments if needed
22
- def initialize(type, prediction, hits = nil, *_argv)
23
- @type = type
26
+ def initialize(prediction, hits = nil, *_argv)
27
+ @type = config[:type]
24
28
  @prediction = prediction
25
29
  @hits = hits
26
30
  @short_header = 'NewVal'
27
31
  @header = 'New Validation'
28
- @running_time = 0
32
+ @run_time = 0
29
33
  @cli_name = 'all'
30
34
  @description = 'No description available.'
31
35
  @validation_report = ValidationReport.new('Not enough evidence')
@@ -1,3 +1,3 @@
1
1
  module GeneValidator
2
- VERSION = '1.6.1'
2
+ VERSION = '1.6.2'
3
3
  end
@@ -3,93 +3,79 @@ require 'minitest/autorun'
3
3
  require 'yaml'
4
4
  require 'fileutils'
5
5
  require 'genevalidator'
6
- require 'genevalidator/blast'
7
- require 'genevalidator/validation_length_cluster'
8
- require 'genevalidator/validation_length_rank'
9
- require 'genevalidator/validation_blast_reading_frame'
10
- require 'genevalidator/validation_gene_merge'
11
- require 'genevalidator/validation_duplication'
12
- require 'genevalidator/validation_open_reading_frame'
13
- require 'genevalidator/validation_alignment'
14
6
 
15
7
  module GeneValidator
8
+ # Test if GV produces the same output with XML and tabular input
16
9
  class ValidateOutput < Minitest::Test
17
-
18
- prot_input_fasta_file = "test/test_files/all_validations_prot/all_validations_prot.fasta"
19
- prot_blast_xml_file = "#{prot_input_fasta_file}.blast_xml"
20
- prot_blast_xml_raw_seq = "#{prot_input_fasta_file}.blast_xml.raw_seq"
21
-
22
- prot_blast_tab_file = "#{prot_input_fasta_file}.blast_tab"
23
- prot_blast_tab_raw_seq = "#{prot_input_fasta_file}.blast_tab.raw_seq"
24
-
25
- mrna_input_fasta_file = "test/test_files/all_validations_mrna/all_validations_mrna.fasta"
26
- mrna_blast_xml_file = "#{mrna_input_fasta_file}.blast_xml"
27
- mrna_blast_xml_raw_seq = "#{mrna_input_fasta_file}.blast_xml.raw_seq"
28
-
29
- mrna_blast_tab_file = "#{mrna_input_fasta_file}.blast_tab"
30
- mrna_blast_tab_raw_seq = "#{mrna_input_fasta_file}.blast_tab.raw_seq"
31
-
32
- tab_options = "qseqid sseqid sacc slen qstart qend sstart send length qframe pident evalue"
33
-
34
- database = 'swissprot -remote'
35
- threads = '1'
36
-
37
- # Unwanted Output Files
38
- prot_xml_out = "#{prot_blast_xml_file}.out"
39
- prot_tab_out = "#{mrna_blast_tab_file}.out"
40
- mrna_xml_out = "#{mrna_blast_xml_file}.out"
41
- mrna_tab_out = "#{mrna_blast_tab_file}.out"
42
- prot_output_dir = "#{prot_input_fasta_file}.html"
43
- mrna_output_dir = "#{mrna_input_fasta_file}.html"
44
- prot_yaml = "#{prot_input_fasta_file}.yaml"
45
- mrna_yaml = "#{mrna_input_fasta_file}.yaml"
10
+ prot_dir = 'test/test_files/all_validations_prot'
11
+ prot_input = File.join(prot_dir, 'prot.fa')
12
+ prot_xml = File.join(prot_dir, 'prot.blast_xml')
13
+ prot_tab = File.join(prot_dir, 'prot.blast_tab6')
14
+ prot_raw = File.join(prot_dir, 'prot.raw_seq')
15
+
16
+ mrna_dir = 'test/test_files/all_validations_mrna'
17
+ mrna_input = File.join(mrna_dir, 'mrna.fa')
18
+ mrna_xml = File.join(mrna_dir, 'mrna.blast_xml')
19
+ mrna_tab = File.join(mrna_dir, 'mrna.blast_tab6')
20
+ mrna_raw = File.join(mrna_dir, 'mrna.raw_seq')
21
+
22
+ tab_options = 'qseqid sseqid sacc slen qstart qend sstart send length' \
23
+ ' qframe pident nident evalue qseq sseq'
24
+ database = 'swissprot -remote'
25
+ threads = '1'
26
+
27
+ # Unwanted Output Files
28
+ prot_xml_out = "#{prot_xml}.out"
29
+ prot_tab_out = "#{prot_tab}.out"
30
+ prot_output_dir = "#{prot_input}.html"
31
+ mrna_xml_out = "#{mrna_xml}.out"
32
+ mrna_tab_out = "#{mrna_tab}.out"
33
+ mrna_output_dir = "#{mrna_input}.html"
46
34
 
47
35
  describe 'Protein dataset' do
48
36
  it 'xml and tabular inputs give the same output' do
49
-
50
37
  original_stdout = $stdout.clone
51
38
  $stdout.reopen(prot_xml_out, 'w')
52
39
 
53
- FileUtils.rm_rf(prot_output_dir) rescue Error
54
-
40
+ FileUtils.rm_rf(prot_output_dir) rescue Errno::ENOENT
55
41
  opts = {
56
- validations: %w(lenc lenr frame merge dup orf),
42
+ validations: %w(lenc lenr frame merge dup orf align),
57
43
  db: database,
58
44
  num_threads: threads,
59
45
  fast: false,
60
- input_fasta_file: prot_input_fasta_file,
61
- blast_xml_file: prot_blast_xml_file,
62
- raw_sequences: prot_blast_xml_raw_seq,
46
+ input_fasta_file: prot_input,
47
+ blast_xml_file: prot_xml,
48
+ raw_sequences: prot_raw,
63
49
  test: true
64
50
  }
65
51
 
66
- (GeneValidator::Validation.new(opts, 1, false)).run
52
+ GeneValidator.init(opts, 1, false)
53
+ GeneValidator.run
67
54
  $stdout.reopen original_stdout
68
55
  $stdout.reopen(prot_tab_out, 'w')
69
56
 
70
- FileUtils.rm_rf(prot_output_dir) rescue Error
57
+ FileUtils.rm_rf(prot_output_dir) rescue Errno::ENOENT
71
58
 
72
59
  opts1 = {
73
- validations: %w(lenc lenr frame merge dup orf),
60
+ validations: %w(lenc lenr frame merge dup orf align),
74
61
  db: database,
75
62
  num_threads: threads,
76
63
  fast: false,
77
- input_fasta_file: prot_input_fasta_file,
78
- blast_tabular_file: prot_blast_tab_file,
64
+ input_fasta_file: prot_input,
65
+ blast_tabular_file: prot_tab,
79
66
  blast_tabular_options: tab_options,
80
- raw_sequences: prot_blast_tab_raw_seq,
67
+ raw_sequences: prot_raw,
81
68
  test: true
82
69
  }
83
70
 
84
- (GeneValidator::Validation.new(opts1, 1, false)).run
71
+ GeneValidator.init(opts1, 1, false)
72
+ GeneValidator.run
85
73
  $stdout.reopen original_stdout
86
74
 
87
75
  diff = FileUtils.compare_file(prot_xml_out, prot_tab_out)
88
76
 
89
77
  File.delete(prot_xml_out)
90
78
  File.delete(prot_tab_out)
91
- File.delete(prot_yaml)
92
-
93
79
  FileUtils.rm_rf(prot_output_dir)
94
80
 
95
81
  assert_equal(true, diff)
@@ -98,50 +84,49 @@ module GeneValidator
98
84
 
99
85
  describe 'mRNA dataset' do
100
86
  it 'xml and tabular inputs give the same output' do
101
-
102
87
  original_stdout = $stdout.clone
103
88
  $stdout.reopen(mrna_xml_out, 'w')
104
89
 
105
- FileUtils.rm_rf(mrna_output_dir) rescue Error
90
+ FileUtils.rm_rf(mrna_output_dir) rescue Errno::ENOENT
106
91
 
107
92
  opts = {
108
93
  validations: %w(lenc lenr frame merge dup orf align),
109
94
  db: database,
110
95
  num_threads: threads,
111
96
  fast: false,
112
- input_fasta_file: mrna_input_fasta_file,
113
- blast_xml_file: mrna_blast_xml_file ,
114
- raw_sequences: mrna_blast_xml_raw_seq,
97
+ input_fasta_file: mrna_input,
98
+ blast_xml_file: mrna_xml,
99
+ raw_sequences: mrna_raw,
115
100
  test: true
116
101
  }
117
102
 
118
- (GeneValidator::Validation.new(opts, 1, false)).run
103
+ GeneValidator.init(opts, 1, false)
104
+ GeneValidator.run
119
105
  $stdout.reopen original_stdout
120
106
  $stdout.reopen(mrna_tab_out, 'w')
121
107
 
122
- FileUtils.rm_rf(mrna_output_dir) rescue Error
108
+ FileUtils.rm_rf(mrna_output_dir) rescue Errno::ENOENT
123
109
 
124
110
  opts1 = {
125
111
  validations: %w(lenc lenr frame merge dup orf align),
126
112
  db: database,
127
113
  num_threads: threads,
128
114
  fast: false,
129
- input_fasta_file: mrna_input_fasta_file,
130
- blast_tabular_file: mrna_blast_tab_file,
115
+ input_fasta_file: mrna_input,
116
+ blast_tabular_file: mrna_tab,
131
117
  blast_tabular_options: tab_options,
132
- raw_sequences: mrna_blast_tab_raw_seq,
118
+ raw_sequences: mrna_raw,
133
119
  test: true
134
120
  }
135
121
 
136
- (GeneValidator::Validation.new(opts1, 1, false)).run
122
+ GeneValidator.init(opts1, 1, false)
123
+ GeneValidator.run
137
124
  $stdout.reopen original_stdout
138
125
 
139
126
  diff = FileUtils.compare_file(mrna_xml_out, mrna_tab_out)
140
127
 
141
128
  File.delete(mrna_xml_out)
142
129
  File.delete(mrna_tab_out)
143
- File.delete(mrna_yaml)
144
-
145
130
  FileUtils.rm_rf(mrna_output_dir)
146
131
 
147
132
  assert_equal(true, diff)
@@ -4,8 +4,10 @@ require 'fileutils'
4
4
  require 'genevalidator'
5
5
  require 'genevalidator/blast'
6
6
  require 'genevalidator/tabular_parser'
7
+ require 'genevalidator/validation'
7
8
 
8
9
  module GeneValidator
10
+ # Test the BlastUtil Class
9
11
  class TestBlastClass < Minitest::Test
10
12
  dir = 'test/test_files'
11
13
  filename_mrna = "#{dir}/file_mrna.txt"
@@ -20,9 +22,7 @@ module GeneValidator
20
22
  ncbi_mrna_xml20 = "#{dir}/ncbi_mrna.xml.20"
21
23
 
22
24
  describe 'Test Blast Class' do
23
-
24
25
  it 'should detect nucleotide seq type' do
25
-
26
26
  file_mrna = File.open(filename_mrna, 'w+')
27
27
  query_mrna = 'ATGGCTAAATTACAGAGGAAGAGAAGCAAGGCTCTTGGGTCATCTCTAGAGATGT' \
28
28
  'CCCAGATAATGGATGCAGGAACAAACAAAATTAAAAGAAGAATAAGAGATTTAGA' \
@@ -37,7 +37,7 @@ module GeneValidator
37
37
  file_mrna.puts(query_mrna)
38
38
  file_mrna.close
39
39
 
40
- FileUtils.rm_rf("#{filename_mrna}.html") rescue Error
40
+ FileUtils.rm_rf("#{filename_mrna}.html") rescue Errno::ENOENT
41
41
 
42
42
  default_opt = {
43
43
  input_fasta_file: filename_mrna,
@@ -47,11 +47,10 @@ module GeneValidator
47
47
  test: true
48
48
  }
49
49
 
50
- val = GeneValidator::Validation.new(default_opt)
51
-
50
+ GeneValidator.init(default_opt)
52
51
  File.delete(filename_mrna)
53
52
  FileUtils.rm_rf("#{filename_mrna}.html")
54
- assert_equal(:nucleotide, val.type)
53
+ assert_equal(:nucleotide, GeneValidator.config[:type])
55
54
  end
56
55
 
57
56
  it 'should detect protein type' do
@@ -72,7 +71,7 @@ module GeneValidator
72
71
  file_prot.puts(query_prot)
73
72
  file_prot.close
74
73
 
75
- FileUtils.rm_rf("#{filename_prot}.html") rescue Error
74
+ FileUtils.rm_rf("#{filename_prot}.html") rescue Errno::ENOENT
76
75
 
77
76
  default_opt = {
78
77
  input_fasta_file: filename_prot,
@@ -82,12 +81,11 @@ module GeneValidator
82
81
  test: true
83
82
  }
84
83
 
85
- val = GeneValidator::Validation.new(default_opt)
84
+ GeneValidator.init(default_opt)
86
85
 
87
86
  File.delete(filename_prot)
88
87
  FileUtils.rm_rf("#{filename_prot}.html")
89
- assert_equal(:protein, val.type)
90
-
88
+ assert_equal(:protein, GeneValidator.config[:type])
91
89
  end
92
90
 
93
91
  it 'should raise error when input types are mixed in the fasta' do
@@ -96,18 +94,18 @@ module GeneValidator
96
94
  original_stderr = $stderr
97
95
  $stderr.reopen('/dev/null', 'w')
98
96
 
99
- FileUtils.rm_rf("#{filename_prot}.html") rescue Error
97
+ FileUtils.rm_rf("#{filename_prot}.html") rescue Errno::ENOENT
100
98
 
101
99
  default_opt = {
102
100
  input_fasta_file: mixed_fasta,
103
101
  validations: ['all'],
104
102
  db: 'swissprot -remote',
105
103
  num_threads: 1,
106
- test: true
104
+ test: true
107
105
  }
108
106
 
109
- GeneValidator::Validation.new(default_opt)
110
- rescue SystemExit => e
107
+ GeneValidator.init(default_opt)
108
+ rescue SystemExit
111
109
  mixed = true
112
110
  end
113
111
  $stderr = original_stderr
@@ -126,10 +124,12 @@ module GeneValidator
126
124
  end
127
125
 
128
126
  it 'should parse tabular -6 input with default tabular format' do
129
-
130
- output = File.open(ncbi_mrna_tab20, 'rb').read
131
- tabular_headers = 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
132
- iterator_tab = TabularParser.new(ncbi_mrna_tab20, tabular_headers, :protein)
127
+ tabular_headers = 'qseqid sseqid pident length mismatch gapopen' \
128
+ ' qstart qend sstart send evalue bitscore'
129
+ GeneValidator.opt = { blast_tabular_file: ncbi_mrna_tab20,
130
+ blast_tabular_options: tabular_headers }
131
+ GeneValidator.config = { type: :protein }
132
+ iterator_tab = TabularParser.new
133
133
  hits = iterator_tab.parse_next
134
134
 
135
135
  assert_equal(20, hits.length)
@@ -145,9 +145,12 @@ module GeneValidator
145
145
  end
146
146
 
147
147
  it 'should parse tabular -6 input with tabular format as argument' do
148
- output = File.open(output_tab6, 'rb').read
149
- tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send pident length qframe evalue'
150
- iterator_tab = TabularParser.new(output_tab6, tabular_headers, :protein)
148
+ tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart' \
149
+ ' send pident length qframe evalue'
150
+ GeneValidator.opt = { blast_tabular_file: output_tab6,
151
+ blast_tabular_options: tabular_headers }
152
+ GeneValidator.config = { type: :protein }
153
+ iterator_tab = TabularParser.new
151
154
  hits = iterator_tab.parse_next
152
155
  assert_equal(4, hits.length)
153
156
  assert_equal(199, hits[0].length_protein)
@@ -157,9 +160,12 @@ module GeneValidator
157
160
  end
158
161
 
159
162
  it 'should parse tabular -6 input with mixed columns' do
160
- output = File.open(output_tab_mixed, 'rb').read
161
- tabular_headers = 'qend sstart send pident length qframe evalue qseqid sseqid sacc slen qstart'
162
- iterator_tab = TabularParser.new(output_tab_mixed, tabular_headers, :protein)
163
+ tabular_headers = 'qend sstart send pident length qframe evalue' \
164
+ ' qseqid sseqid sacc slen qstart'
165
+ GeneValidator.opt = { blast_tabular_file: output_tab_mixed,
166
+ blast_tabular_options: tabular_headers }
167
+ GeneValidator.config = { type: :protein }
168
+ iterator_tab = TabularParser.new
163
169
  hits = iterator_tab.parse_next
164
170
  assert_equal(4, hits.length)
165
171
  assert_equal(199, hits[0].length_protein)
@@ -169,9 +175,12 @@ module GeneValidator
169
175
  end
170
176
 
171
177
  it 'should parse tabular -7 input' do
172
- output = File.open(output_tab7, 'rb').read
173
- tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send length qframe evalue'
174
- iterator_tab = TabularParser.new(output_tab7, tabular_headers, :protein)
178
+ tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send' \
179
+ ' length qframe evalue'
180
+ GeneValidator.opt = { blast_tabular_file: output_tab7,
181
+ blast_tabular_options: tabular_headers }
182
+ GeneValidator.config = { type: :protein }
183
+ iterator_tab = TabularParser.new
175
184
  hits = iterator_tab.parse_next
176
185
  assert_equal(4, hits.length)
177
186
  assert_equal(199, hits[0].length_protein)
@@ -180,10 +189,8 @@ module GeneValidator
180
189
  assert_equal(100, hits[0].hsp_list[2].hit_to)
181
190
  end
182
191
 
183
- it 'should remove identical matches among protein sequences' do
184
- output = File.open(output_tab6, 'rb').read
185
-
186
- FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
192
+ it 'should remove identical matches (protein sequences)' do
193
+ FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
187
194
 
188
195
  default_opt = {
189
196
  input_fasta_file: filename_fasta,
@@ -193,19 +200,26 @@ module GeneValidator
193
200
  test: true
194
201
  }
195
202
 
196
- b = GeneValidator::Validation.new(default_opt) # just use a valida filename to create the object
203
+ GeneValidator.init(default_opt)
204
+
197
205
  prediction = Sequence.new
198
206
  prediction.length_protein = 1808
199
- tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send pident length qframe evalue'
200
- iterator_tab = TabularParser.new(output_tab6, tabular_headers, :protein)
207
+ tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart' \
208
+ ' send pident length qframe evalue'
209
+ GeneValidator.opt = { blast_tabular_file: output_tab6,
210
+ blast_tabular_options: tabular_headers }
211
+ GeneValidator.config = { type: :protein }
212
+ iterator_tab = TabularParser.new
201
213
  iterator_tab.parse_next
202
214
  hits = iterator_tab.parse_next
203
215
 
204
- # before removal
205
216
  assert_equal(2, hits.length)
206
217
  assert_equal(100, hits[0].hsp_list[0].pidentity)
207
218
  assert_in_delta(99.23, hits[0].hsp_list[1].pidentity, 0.01)
208
219
  assert_in_delta(90, hits[1].hsp_list[0].pidentity, 0.01)
220
+
221
+ # Remove identical hits
222
+ b = GeneValidator::Validate.new
209
223
  hits = b.remove_identical_hits(prediction, hits)
210
224
 
211
225
  # after removal of identical hits
@@ -214,10 +228,8 @@ module GeneValidator
214
228
  FileUtils.rm_rf("#{filename_fasta}.html")
215
229
  end
216
230
 
217
- it 'should remove identical matches among nucleotide sequences with tabular input' do
218
- output = File.open(ncbi_mrna_tab20, 'rb').read
219
-
220
- FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
231
+ it 'should remove identical matches (nucleotide seqs) - tabular input' do
232
+ FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
221
233
 
222
234
  default_opt = {
223
235
  input_fasta_file: filename_fasta,
@@ -226,17 +238,22 @@ module GeneValidator
226
238
  num_threads: 1,
227
239
  test: true
228
240
  }
229
- tabular_headers = 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
230
241
 
231
- b = GeneValidator::Validation.new(default_opt) # just use a valida filename to create the object
242
+ GeneValidator.init(default_opt)
232
243
 
233
244
  prediction = Sequence.new
234
245
  prediction.length_protein = 219 / 3
235
- iterator_tab = TabularParser.new(ncbi_mrna_tab20, tabular_headers, :nucleotide)
246
+ tabular_headers = 'qseqid sseqid pident length mismatch gapopen' \
247
+ ' qstart qend sstart send evalue bitscore'
248
+ GeneValidator.opt = { blast_tabular_file: ncbi_mrna_tab20,
249
+ blast_tabular_options: tabular_headers }
250
+ GeneValidator.config = { type: :nucleotide }
251
+ iterator_tab = TabularParser.new
236
252
  hits = iterator_tab.parse_next
237
253
 
238
254
  assert_equal(20, hits.length)
239
-
255
+ # remove identical hits
256
+ b = GeneValidator::Validate.new
240
257
  hits = b.remove_identical_hits(prediction, hits)
241
258
 
242
259
  assert_equal(13, hits.length)
@@ -244,10 +261,8 @@ module GeneValidator
244
261
  FileUtils.rm_rf("#{filename_fasta}.html")
245
262
  end
246
263
 
247
- it 'should remove identical matches among nucleotide sequences with xml input' do
248
- output = File.open(ncbi_mrna_xml20, 'rb').read
249
-
250
- FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
264
+ it 'should remove identical matches (nucleotide seqs) - xml input' do
265
+ FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
251
266
 
252
267
  # just use a valid opts hash to create the object
253
268
  default_opt = {
@@ -258,16 +273,17 @@ module GeneValidator
258
273
  test: true
259
274
  }
260
275
 
261
- b = GeneValidator::Validation.new(default_opt)
276
+ GeneValidator.init(default_opt)
262
277
 
263
278
  prediction = Sequence.new
264
279
  prediction.length_protein = 219 / 3
265
-
280
+ output = File.open(ncbi_mrna_xml20, 'rb').read
266
281
  iterator = Bio::BlastXMLParser::NokogiriBlastXml.new(output).to_enum
267
282
  hits = BlastUtils.parse_next(iterator, :protein)
268
283
 
269
284
  assert_equal(20, hits.length)
270
285
 
286
+ b = GeneValidator::Validate.new
271
287
  hits = b.remove_identical_hits(prediction, hits)
272
288
 
273
289
  assert_equal(13, hits.length)
@@ -286,8 +302,9 @@ module GeneValidator
286
302
  test: true
287
303
  }
288
304
 
289
- (GeneValidator::Validation.new(default_opt)).run
290
- rescue SystemExit => error
305
+ GeneValidator.init(default_opt)
306
+
307
+ rescue SystemExit
291
308
  error = true
292
309
  end
293
310
  assert_equal(true, error)