genevalidator 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,7 +1,10 @@
1
- require 'genevalidator/validation_report'
2
- require 'genevalidator/validation_test'
1
+ require 'forwardable'
2
+
3
3
  require 'genevalidator/exceptions'
4
4
  require 'genevalidator/ext/array'
5
+ require 'genevalidator/validation_report'
6
+ require 'genevalidator/validation_test'
7
+
5
8
  module GeneValidator
6
9
  ##
7
10
  # Class that stores the validation output information
@@ -81,11 +84,9 @@ module GeneValidator
81
84
  ##
82
85
  # Initializes the object
83
86
  # Params:
84
- # +hits+: a vector of +Sequence+ objects (representing blast hits)
85
87
  # +prediction+: a +Sequence+ object representing the blast query
86
- # +threshold+: threshold below which the prediction length rank is
87
- # considered to be inadequate
88
- def initialize(type, prediction, hits)
88
+ # +hits+: a vector of +Sequence+ objects (representing blast hits)
89
+ def initialize(prediction, hits)
89
90
  super
90
91
  @short_header = 'LengthRank'
91
92
  @header = 'Length Rank'
@@ -143,19 +144,17 @@ module GeneValidator
143
144
  largest_hit,
144
145
  extreme_hits,
145
146
  percentage)
146
- @validation_report.running_time = Time.now - start
147
+ @validation_report.run_time = Time.now - start
147
148
  @validation_report
148
149
 
149
150
  rescue NotEnoughHitsError
150
151
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
151
152
  @short_header, @header,
152
- @description, @approach,
153
- @explanation, @conclusion)
153
+ @description)
154
154
  rescue Exception
155
155
  @validation_report = ValidationReport.new('Unexpected error', :error,
156
156
  @short_header, @header,
157
- @description, @approach,
158
- @explanation, @conclusion)
157
+ @description)
159
158
  @validation_report.errors.push 'Unexpected Error'
160
159
  end
161
160
  end
@@ -1,5 +1,10 @@
1
- require 'genevalidator/validation_report'
2
1
  require 'bio'
2
+ require 'forwardable'
3
+
4
+ require 'genevalidator/exceptions'
5
+ require 'genevalidator/validation_report'
6
+ require 'genevalidator/validation_test'
7
+
3
8
  module GeneValidator
4
9
  ##
5
10
  # Class that stores the validation output information
@@ -57,24 +62,23 @@ module GeneValidator
57
62
  # This class contains the methods necessary for checking whether there is
58
63
  # a main Open Reading Frame in the predicted sequence
59
64
  class OpenReadingFrameValidation < ValidationTest
60
- attr_reader :filename
65
+ extend Forwardable
66
+ def_delegators GeneValidator, :config
61
67
 
62
68
  ##
63
69
  # Initilizes the object
64
70
  # Params:
65
- # +type+: type of the predicted sequence (:nucleotide or :protein)
66
71
  # +prediction+: a +Sequence+ object representing the blast query
67
72
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
68
- # +plot_filename+: name of the input file, used when making plot files
69
- def initialize(type, prediction, hits, filename)
73
+ def initialize(prediction, hits)
70
74
  super
71
- @short_header = 'ORF'
75
+ @short_header = 'MainORF'
72
76
  @header = 'Main ORF'
73
77
  @description = 'Check whether there is a single main Open Reading' \
74
78
  ' Frame in the predicted gene. Applicable only for' \
75
79
  ' nucleotide queries.'
76
80
  @cli_name = 'orf'
77
- @filename = filename
81
+ @type = config[:type]
78
82
  end
79
83
 
80
84
  ##
@@ -101,15 +105,14 @@ module GeneValidator
101
105
  @validation_report = ORFValidationOutput.new(@short_header, @header,
102
106
  @description, orfs,
103
107
  coverage, longest_orf_frame)
104
- @validation_report.running_time = Time.now - start
108
+ @validation_report.run_time = Time.now - start
105
109
 
106
110
  @validation_report.plot_files.push(plot1)
107
111
  @validation_report
108
112
  rescue Exception
109
113
  @validation_report = ValidationReport.new('Unexpected error', :error,
110
114
  @short_header, @header,
111
- @description, @approach,
112
- @explanation, @conclusion)
115
+ @description)
113
116
  @validation_report.errors.push 'Unexpected Error'
114
117
  end
115
118
 
@@ -157,29 +160,25 @@ module GeneValidator
157
160
  # +orfs+: +Hash+ containing the open reading frame
158
161
  # +output+: location where the plot will be saved in jped file format
159
162
  # +prediction+: Sequence objects
160
- def plot_orfs(orfs, translated_length, output = "#{@filename}_orfs.json")
163
+ def plot_orfs(orfs, translated_length, output = "#{@plot_path}_orfs.json")
161
164
  fail QueryError unless orfs.is_a? Hash
162
165
 
163
- results = []
166
+ data = []
164
167
 
165
168
  # Create hashes for the Background
166
169
  (-3..3).each do |frame|
167
170
  next if frame == 0
168
- results << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
171
+ data << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
169
172
  'color' => 'gray' }
170
173
  end
171
174
 
172
175
  # Create the hashes for the ORFs...
173
176
  orfs.each do |_key, h|
174
- results << { 'y' => h[:frame], 'start' => h[:orf_start],
177
+ data << { 'y' => h[:frame], 'start' => h[:orf_start],
175
178
  'stop' => h[:orf_end], 'color' => 'red' }
176
179
  end
177
180
 
178
- f = File.open(output, 'w')
179
- f.write((results).to_json)
180
- f.close
181
-
182
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
181
+ Plot.new(data,
183
182
  :lines,
184
183
  'Open Reading Frames in all 6 Frames',
185
184
  'Open Reading Frame (Minimimum Length: 30 amino acids),red',
@@ -1,6 +1,6 @@
1
1
  # Top level module / namespace.
2
2
  module GeneValidator
3
- Plot = Struct.new(:filename, :type, :title, :footer, :xtitle, :ytitle, :aux1,
3
+ Plot = Struct.new(:data, :type, :title, :footer, :xtitle, :ytitle, :aux1,
4
4
  :aux2)
5
5
 
6
6
  ##
@@ -8,7 +8,6 @@ module GeneValidator
8
8
  # all validation reports
9
9
  class ValidationReport
10
10
  attr_reader :message
11
- attr_reader :bg_color
12
11
  attr_reader :plot_files
13
12
  attr_reader :result
14
13
  attr_reader :expected
@@ -17,7 +16,7 @@ module GeneValidator
17
16
  attr_accessor :short_header
18
17
  attr_accessor :header
19
18
  attr_accessor :description
20
- attr_accessor :running_time
19
+ attr_accessor :run_time
21
20
  attr_accessor :approach
22
21
  attr_accessor :explanation
23
22
  attr_accessor :conclusion
@@ -31,7 +30,6 @@ module GeneValidator
31
30
  # +short_header+: String
32
31
  # +header+: String
33
32
  # +description+: String
34
- # +bg_color+: background color of the table cell for the html output (nil
35
33
  # by default)
36
34
  def initialize(message = 'Not enough evidence', validation_result = :no,
37
35
  short_header = '', header = '', description = '',
@@ -60,7 +58,6 @@ module GeneValidator
60
58
  ##
61
59
  # May return "success" or "error"
62
60
  def color
63
- bg_color unless bg_color.nil?
64
61
  if validation == @expected
65
62
  'success'
66
63
  elsif validation == :error || validation == :unapplicable
@@ -1,7 +1,11 @@
1
+ require 'forwardable'
2
+
1
3
  module GeneValidator
2
4
  # This is an abstract class extended
3
5
  # by all validation classes
4
6
  class ValidationTest
7
+ extend Forwardable
8
+ def_delegators GeneValidator, :config
5
9
  attr_accessor :type
6
10
  attr_accessor :prediction
7
11
  attr_accessor :hits
@@ -10,7 +14,7 @@ module GeneValidator
10
14
  attr_accessor :cli_name
11
15
  attr_accessor :description
12
16
  attr_accessor :validation_report
13
- attr_accessor :running_time
17
+ attr_accessor :run_time
14
18
 
15
19
  ##
16
20
  # Initilizes the object
@@ -19,13 +23,13 @@ module GeneValidator
19
23
  # +prediction+: a +Sequence+ object representing the blast query
20
24
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
21
25
  # +argv+: aditional arguments if needed
22
- def initialize(type, prediction, hits = nil, *_argv)
23
- @type = type
26
+ def initialize(prediction, hits = nil, *_argv)
27
+ @type = config[:type]
24
28
  @prediction = prediction
25
29
  @hits = hits
26
30
  @short_header = 'NewVal'
27
31
  @header = 'New Validation'
28
- @running_time = 0
32
+ @run_time = 0
29
33
  @cli_name = 'all'
30
34
  @description = 'No description available.'
31
35
  @validation_report = ValidationReport.new('Not enough evidence')
@@ -1,3 +1,3 @@
1
1
  module GeneValidator
2
- VERSION = '1.6.1'
2
+ VERSION = '1.6.2'
3
3
  end
@@ -3,93 +3,79 @@ require 'minitest/autorun'
3
3
  require 'yaml'
4
4
  require 'fileutils'
5
5
  require 'genevalidator'
6
- require 'genevalidator/blast'
7
- require 'genevalidator/validation_length_cluster'
8
- require 'genevalidator/validation_length_rank'
9
- require 'genevalidator/validation_blast_reading_frame'
10
- require 'genevalidator/validation_gene_merge'
11
- require 'genevalidator/validation_duplication'
12
- require 'genevalidator/validation_open_reading_frame'
13
- require 'genevalidator/validation_alignment'
14
6
 
15
7
  module GeneValidator
8
+ # Test if GV produces the same output with XML and tabular input
16
9
  class ValidateOutput < Minitest::Test
17
-
18
- prot_input_fasta_file = "test/test_files/all_validations_prot/all_validations_prot.fasta"
19
- prot_blast_xml_file = "#{prot_input_fasta_file}.blast_xml"
20
- prot_blast_xml_raw_seq = "#{prot_input_fasta_file}.blast_xml.raw_seq"
21
-
22
- prot_blast_tab_file = "#{prot_input_fasta_file}.blast_tab"
23
- prot_blast_tab_raw_seq = "#{prot_input_fasta_file}.blast_tab.raw_seq"
24
-
25
- mrna_input_fasta_file = "test/test_files/all_validations_mrna/all_validations_mrna.fasta"
26
- mrna_blast_xml_file = "#{mrna_input_fasta_file}.blast_xml"
27
- mrna_blast_xml_raw_seq = "#{mrna_input_fasta_file}.blast_xml.raw_seq"
28
-
29
- mrna_blast_tab_file = "#{mrna_input_fasta_file}.blast_tab"
30
- mrna_blast_tab_raw_seq = "#{mrna_input_fasta_file}.blast_tab.raw_seq"
31
-
32
- tab_options = "qseqid sseqid sacc slen qstart qend sstart send length qframe pident evalue"
33
-
34
- database = 'swissprot -remote'
35
- threads = '1'
36
-
37
- # Unwanted Output Files
38
- prot_xml_out = "#{prot_blast_xml_file}.out"
39
- prot_tab_out = "#{mrna_blast_tab_file}.out"
40
- mrna_xml_out = "#{mrna_blast_xml_file}.out"
41
- mrna_tab_out = "#{mrna_blast_tab_file}.out"
42
- prot_output_dir = "#{prot_input_fasta_file}.html"
43
- mrna_output_dir = "#{mrna_input_fasta_file}.html"
44
- prot_yaml = "#{prot_input_fasta_file}.yaml"
45
- mrna_yaml = "#{mrna_input_fasta_file}.yaml"
10
+ prot_dir = 'test/test_files/all_validations_prot'
11
+ prot_input = File.join(prot_dir, 'prot.fa')
12
+ prot_xml = File.join(prot_dir, 'prot.blast_xml')
13
+ prot_tab = File.join(prot_dir, 'prot.blast_tab6')
14
+ prot_raw = File.join(prot_dir, 'prot.raw_seq')
15
+
16
+ mrna_dir = 'test/test_files/all_validations_mrna'
17
+ mrna_input = File.join(mrna_dir, 'mrna.fa')
18
+ mrna_xml = File.join(mrna_dir, 'mrna.blast_xml')
19
+ mrna_tab = File.join(mrna_dir, 'mrna.blast_tab6')
20
+ mrna_raw = File.join(mrna_dir, 'mrna.raw_seq')
21
+
22
+ tab_options = 'qseqid sseqid sacc slen qstart qend sstart send length' \
23
+ ' qframe pident nident evalue qseq sseq'
24
+ database = 'swissprot -remote'
25
+ threads = '1'
26
+
27
+ # Unwanted Output Files
28
+ prot_xml_out = "#{prot_xml}.out"
29
+ prot_tab_out = "#{prot_tab}.out"
30
+ prot_output_dir = "#{prot_input}.html"
31
+ mrna_xml_out = "#{mrna_xml}.out"
32
+ mrna_tab_out = "#{mrna_tab}.out"
33
+ mrna_output_dir = "#{mrna_input}.html"
46
34
 
47
35
  describe 'Protein dataset' do
48
36
  it 'xml and tabular inputs give the same output' do
49
-
50
37
  original_stdout = $stdout.clone
51
38
  $stdout.reopen(prot_xml_out, 'w')
52
39
 
53
- FileUtils.rm_rf(prot_output_dir) rescue Error
54
-
40
+ FileUtils.rm_rf(prot_output_dir) rescue Errno::ENOENT
55
41
  opts = {
56
- validations: %w(lenc lenr frame merge dup orf),
42
+ validations: %w(lenc lenr frame merge dup orf align),
57
43
  db: database,
58
44
  num_threads: threads,
59
45
  fast: false,
60
- input_fasta_file: prot_input_fasta_file,
61
- blast_xml_file: prot_blast_xml_file,
62
- raw_sequences: prot_blast_xml_raw_seq,
46
+ input_fasta_file: prot_input,
47
+ blast_xml_file: prot_xml,
48
+ raw_sequences: prot_raw,
63
49
  test: true
64
50
  }
65
51
 
66
- (GeneValidator::Validation.new(opts, 1, false)).run
52
+ GeneValidator.init(opts, 1, false)
53
+ GeneValidator.run
67
54
  $stdout.reopen original_stdout
68
55
  $stdout.reopen(prot_tab_out, 'w')
69
56
 
70
- FileUtils.rm_rf(prot_output_dir) rescue Error
57
+ FileUtils.rm_rf(prot_output_dir) rescue Errno::ENOENT
71
58
 
72
59
  opts1 = {
73
- validations: %w(lenc lenr frame merge dup orf),
60
+ validations: %w(lenc lenr frame merge dup orf align),
74
61
  db: database,
75
62
  num_threads: threads,
76
63
  fast: false,
77
- input_fasta_file: prot_input_fasta_file,
78
- blast_tabular_file: prot_blast_tab_file,
64
+ input_fasta_file: prot_input,
65
+ blast_tabular_file: prot_tab,
79
66
  blast_tabular_options: tab_options,
80
- raw_sequences: prot_blast_tab_raw_seq,
67
+ raw_sequences: prot_raw,
81
68
  test: true
82
69
  }
83
70
 
84
- (GeneValidator::Validation.new(opts1, 1, false)).run
71
+ GeneValidator.init(opts1, 1, false)
72
+ GeneValidator.run
85
73
  $stdout.reopen original_stdout
86
74
 
87
75
  diff = FileUtils.compare_file(prot_xml_out, prot_tab_out)
88
76
 
89
77
  File.delete(prot_xml_out)
90
78
  File.delete(prot_tab_out)
91
- File.delete(prot_yaml)
92
-
93
79
  FileUtils.rm_rf(prot_output_dir)
94
80
 
95
81
  assert_equal(true, diff)
@@ -98,50 +84,49 @@ module GeneValidator
98
84
 
99
85
  describe 'mRNA dataset' do
100
86
  it 'xml and tabular inputs give the same output' do
101
-
102
87
  original_stdout = $stdout.clone
103
88
  $stdout.reopen(mrna_xml_out, 'w')
104
89
 
105
- FileUtils.rm_rf(mrna_output_dir) rescue Error
90
+ FileUtils.rm_rf(mrna_output_dir) rescue Errno::ENOENT
106
91
 
107
92
  opts = {
108
93
  validations: %w(lenc lenr frame merge dup orf align),
109
94
  db: database,
110
95
  num_threads: threads,
111
96
  fast: false,
112
- input_fasta_file: mrna_input_fasta_file,
113
- blast_xml_file: mrna_blast_xml_file ,
114
- raw_sequences: mrna_blast_xml_raw_seq,
97
+ input_fasta_file: mrna_input,
98
+ blast_xml_file: mrna_xml,
99
+ raw_sequences: mrna_raw,
115
100
  test: true
116
101
  }
117
102
 
118
- (GeneValidator::Validation.new(opts, 1, false)).run
103
+ GeneValidator.init(opts, 1, false)
104
+ GeneValidator.run
119
105
  $stdout.reopen original_stdout
120
106
  $stdout.reopen(mrna_tab_out, 'w')
121
107
 
122
- FileUtils.rm_rf(mrna_output_dir) rescue Error
108
+ FileUtils.rm_rf(mrna_output_dir) rescue Errno::ENOENT
123
109
 
124
110
  opts1 = {
125
111
  validations: %w(lenc lenr frame merge dup orf align),
126
112
  db: database,
127
113
  num_threads: threads,
128
114
  fast: false,
129
- input_fasta_file: mrna_input_fasta_file,
130
- blast_tabular_file: mrna_blast_tab_file,
115
+ input_fasta_file: mrna_input,
116
+ blast_tabular_file: mrna_tab,
131
117
  blast_tabular_options: tab_options,
132
- raw_sequences: mrna_blast_tab_raw_seq,
118
+ raw_sequences: mrna_raw,
133
119
  test: true
134
120
  }
135
121
 
136
- (GeneValidator::Validation.new(opts1, 1, false)).run
122
+ GeneValidator.init(opts1, 1, false)
123
+ GeneValidator.run
137
124
  $stdout.reopen original_stdout
138
125
 
139
126
  diff = FileUtils.compare_file(mrna_xml_out, mrna_tab_out)
140
127
 
141
128
  File.delete(mrna_xml_out)
142
129
  File.delete(mrna_tab_out)
143
- File.delete(mrna_yaml)
144
-
145
130
  FileUtils.rm_rf(mrna_output_dir)
146
131
 
147
132
  assert_equal(true, diff)
@@ -4,8 +4,10 @@ require 'fileutils'
4
4
  require 'genevalidator'
5
5
  require 'genevalidator/blast'
6
6
  require 'genevalidator/tabular_parser'
7
+ require 'genevalidator/validation'
7
8
 
8
9
  module GeneValidator
10
+ # Test the BlastUtil Class
9
11
  class TestBlastClass < Minitest::Test
10
12
  dir = 'test/test_files'
11
13
  filename_mrna = "#{dir}/file_mrna.txt"
@@ -20,9 +22,7 @@ module GeneValidator
20
22
  ncbi_mrna_xml20 = "#{dir}/ncbi_mrna.xml.20"
21
23
 
22
24
  describe 'Test Blast Class' do
23
-
24
25
  it 'should detect nucleotide seq type' do
25
-
26
26
  file_mrna = File.open(filename_mrna, 'w+')
27
27
  query_mrna = 'ATGGCTAAATTACAGAGGAAGAGAAGCAAGGCTCTTGGGTCATCTCTAGAGATGT' \
28
28
  'CCCAGATAATGGATGCAGGAACAAACAAAATTAAAAGAAGAATAAGAGATTTAGA' \
@@ -37,7 +37,7 @@ module GeneValidator
37
37
  file_mrna.puts(query_mrna)
38
38
  file_mrna.close
39
39
 
40
- FileUtils.rm_rf("#{filename_mrna}.html") rescue Error
40
+ FileUtils.rm_rf("#{filename_mrna}.html") rescue Errno::ENOENT
41
41
 
42
42
  default_opt = {
43
43
  input_fasta_file: filename_mrna,
@@ -47,11 +47,10 @@ module GeneValidator
47
47
  test: true
48
48
  }
49
49
 
50
- val = GeneValidator::Validation.new(default_opt)
51
-
50
+ GeneValidator.init(default_opt)
52
51
  File.delete(filename_mrna)
53
52
  FileUtils.rm_rf("#{filename_mrna}.html")
54
- assert_equal(:nucleotide, val.type)
53
+ assert_equal(:nucleotide, GeneValidator.config[:type])
55
54
  end
56
55
 
57
56
  it 'should detect protein type' do
@@ -72,7 +71,7 @@ module GeneValidator
72
71
  file_prot.puts(query_prot)
73
72
  file_prot.close
74
73
 
75
- FileUtils.rm_rf("#{filename_prot}.html") rescue Error
74
+ FileUtils.rm_rf("#{filename_prot}.html") rescue Errno::ENOENT
76
75
 
77
76
  default_opt = {
78
77
  input_fasta_file: filename_prot,
@@ -82,12 +81,11 @@ module GeneValidator
82
81
  test: true
83
82
  }
84
83
 
85
- val = GeneValidator::Validation.new(default_opt)
84
+ GeneValidator.init(default_opt)
86
85
 
87
86
  File.delete(filename_prot)
88
87
  FileUtils.rm_rf("#{filename_prot}.html")
89
- assert_equal(:protein, val.type)
90
-
88
+ assert_equal(:protein, GeneValidator.config[:type])
91
89
  end
92
90
 
93
91
  it 'should raise error when input types are mixed in the fasta' do
@@ -96,18 +94,18 @@ module GeneValidator
96
94
  original_stderr = $stderr
97
95
  $stderr.reopen('/dev/null', 'w')
98
96
 
99
- FileUtils.rm_rf("#{filename_prot}.html") rescue Error
97
+ FileUtils.rm_rf("#{filename_prot}.html") rescue Errno::ENOENT
100
98
 
101
99
  default_opt = {
102
100
  input_fasta_file: mixed_fasta,
103
101
  validations: ['all'],
104
102
  db: 'swissprot -remote',
105
103
  num_threads: 1,
106
- test: true
104
+ test: true
107
105
  }
108
106
 
109
- GeneValidator::Validation.new(default_opt)
110
- rescue SystemExit => e
107
+ GeneValidator.init(default_opt)
108
+ rescue SystemExit
111
109
  mixed = true
112
110
  end
113
111
  $stderr = original_stderr
@@ -126,10 +124,12 @@ module GeneValidator
126
124
  end
127
125
 
128
126
  it 'should parse tabular -6 input with default tabular format' do
129
-
130
- output = File.open(ncbi_mrna_tab20, 'rb').read
131
- tabular_headers = 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
132
- iterator_tab = TabularParser.new(ncbi_mrna_tab20, tabular_headers, :protein)
127
+ tabular_headers = 'qseqid sseqid pident length mismatch gapopen' \
128
+ ' qstart qend sstart send evalue bitscore'
129
+ GeneValidator.opt = { blast_tabular_file: ncbi_mrna_tab20,
130
+ blast_tabular_options: tabular_headers }
131
+ GeneValidator.config = { type: :protein }
132
+ iterator_tab = TabularParser.new
133
133
  hits = iterator_tab.parse_next
134
134
 
135
135
  assert_equal(20, hits.length)
@@ -145,9 +145,12 @@ module GeneValidator
145
145
  end
146
146
 
147
147
  it 'should parse tabular -6 input with tabular format as argument' do
148
- output = File.open(output_tab6, 'rb').read
149
- tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send pident length qframe evalue'
150
- iterator_tab = TabularParser.new(output_tab6, tabular_headers, :protein)
148
+ tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart' \
149
+ ' send pident length qframe evalue'
150
+ GeneValidator.opt = { blast_tabular_file: output_tab6,
151
+ blast_tabular_options: tabular_headers }
152
+ GeneValidator.config = { type: :protein }
153
+ iterator_tab = TabularParser.new
151
154
  hits = iterator_tab.parse_next
152
155
  assert_equal(4, hits.length)
153
156
  assert_equal(199, hits[0].length_protein)
@@ -157,9 +160,12 @@ module GeneValidator
157
160
  end
158
161
 
159
162
  it 'should parse tabular -6 input with mixed columns' do
160
- output = File.open(output_tab_mixed, 'rb').read
161
- tabular_headers = 'qend sstart send pident length qframe evalue qseqid sseqid sacc slen qstart'
162
- iterator_tab = TabularParser.new(output_tab_mixed, tabular_headers, :protein)
163
+ tabular_headers = 'qend sstart send pident length qframe evalue' \
164
+ ' qseqid sseqid sacc slen qstart'
165
+ GeneValidator.opt = { blast_tabular_file: output_tab_mixed,
166
+ blast_tabular_options: tabular_headers }
167
+ GeneValidator.config = { type: :protein }
168
+ iterator_tab = TabularParser.new
163
169
  hits = iterator_tab.parse_next
164
170
  assert_equal(4, hits.length)
165
171
  assert_equal(199, hits[0].length_protein)
@@ -169,9 +175,12 @@ module GeneValidator
169
175
  end
170
176
 
171
177
  it 'should parse tabular -7 input' do
172
- output = File.open(output_tab7, 'rb').read
173
- tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send length qframe evalue'
174
- iterator_tab = TabularParser.new(output_tab7, tabular_headers, :protein)
178
+ tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send' \
179
+ ' length qframe evalue'
180
+ GeneValidator.opt = { blast_tabular_file: output_tab7,
181
+ blast_tabular_options: tabular_headers }
182
+ GeneValidator.config = { type: :protein }
183
+ iterator_tab = TabularParser.new
175
184
  hits = iterator_tab.parse_next
176
185
  assert_equal(4, hits.length)
177
186
  assert_equal(199, hits[0].length_protein)
@@ -180,10 +189,8 @@ module GeneValidator
180
189
  assert_equal(100, hits[0].hsp_list[2].hit_to)
181
190
  end
182
191
 
183
- it 'should remove identical matches among protein sequences' do
184
- output = File.open(output_tab6, 'rb').read
185
-
186
- FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
192
+ it 'should remove identical matches (protein sequences)' do
193
+ FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
187
194
 
188
195
  default_opt = {
189
196
  input_fasta_file: filename_fasta,
@@ -193,19 +200,26 @@ module GeneValidator
193
200
  test: true
194
201
  }
195
202
 
196
- b = GeneValidator::Validation.new(default_opt) # just use a valida filename to create the object
203
+ GeneValidator.init(default_opt)
204
+
197
205
  prediction = Sequence.new
198
206
  prediction.length_protein = 1808
199
- tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send pident length qframe evalue'
200
- iterator_tab = TabularParser.new(output_tab6, tabular_headers, :protein)
207
+ tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart' \
208
+ ' send pident length qframe evalue'
209
+ GeneValidator.opt = { blast_tabular_file: output_tab6,
210
+ blast_tabular_options: tabular_headers }
211
+ GeneValidator.config = { type: :protein }
212
+ iterator_tab = TabularParser.new
201
213
  iterator_tab.parse_next
202
214
  hits = iterator_tab.parse_next
203
215
 
204
- # before removal
205
216
  assert_equal(2, hits.length)
206
217
  assert_equal(100, hits[0].hsp_list[0].pidentity)
207
218
  assert_in_delta(99.23, hits[0].hsp_list[1].pidentity, 0.01)
208
219
  assert_in_delta(90, hits[1].hsp_list[0].pidentity, 0.01)
220
+
221
+ # Remove identical hits
222
+ b = GeneValidator::Validate.new
209
223
  hits = b.remove_identical_hits(prediction, hits)
210
224
 
211
225
  # after removal of identical hits
@@ -214,10 +228,8 @@ module GeneValidator
214
228
  FileUtils.rm_rf("#{filename_fasta}.html")
215
229
  end
216
230
 
217
- it 'should remove identical matches among nucleotide sequences with tabular input' do
218
- output = File.open(ncbi_mrna_tab20, 'rb').read
219
-
220
- FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
231
+ it 'should remove identical matches (nucleotide seqs) - tabular input' do
232
+ FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
221
233
 
222
234
  default_opt = {
223
235
  input_fasta_file: filename_fasta,
@@ -226,17 +238,22 @@ module GeneValidator
226
238
  num_threads: 1,
227
239
  test: true
228
240
  }
229
- tabular_headers = 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
230
241
 
231
- b = GeneValidator::Validation.new(default_opt) # just use a valida filename to create the object
242
+ GeneValidator.init(default_opt)
232
243
 
233
244
  prediction = Sequence.new
234
245
  prediction.length_protein = 219 / 3
235
- iterator_tab = TabularParser.new(ncbi_mrna_tab20, tabular_headers, :nucleotide)
246
+ tabular_headers = 'qseqid sseqid pident length mismatch gapopen' \
247
+ ' qstart qend sstart send evalue bitscore'
248
+ GeneValidator.opt = { blast_tabular_file: ncbi_mrna_tab20,
249
+ blast_tabular_options: tabular_headers }
250
+ GeneValidator.config = { type: :nucleotide }
251
+ iterator_tab = TabularParser.new
236
252
  hits = iterator_tab.parse_next
237
253
 
238
254
  assert_equal(20, hits.length)
239
-
255
+ # remove identical hits
256
+ b = GeneValidator::Validate.new
240
257
  hits = b.remove_identical_hits(prediction, hits)
241
258
 
242
259
  assert_equal(13, hits.length)
@@ -244,10 +261,8 @@ module GeneValidator
244
261
  FileUtils.rm_rf("#{filename_fasta}.html")
245
262
  end
246
263
 
247
- it 'should remove identical matches among nucleotide sequences with xml input' do
248
- output = File.open(ncbi_mrna_xml20, 'rb').read
249
-
250
- FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
264
+ it 'should remove identical matches (nucleotide seqs) - xml input' do
265
+ FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
251
266
 
252
267
  # just use a valid opts hash to create the object
253
268
  default_opt = {
@@ -258,16 +273,17 @@ module GeneValidator
258
273
  test: true
259
274
  }
260
275
 
261
- b = GeneValidator::Validation.new(default_opt)
276
+ GeneValidator.init(default_opt)
262
277
 
263
278
  prediction = Sequence.new
264
279
  prediction.length_protein = 219 / 3
265
-
280
+ output = File.open(ncbi_mrna_xml20, 'rb').read
266
281
  iterator = Bio::BlastXMLParser::NokogiriBlastXml.new(output).to_enum
267
282
  hits = BlastUtils.parse_next(iterator, :protein)
268
283
 
269
284
  assert_equal(20, hits.length)
270
285
 
286
+ b = GeneValidator::Validate.new
271
287
  hits = b.remove_identical_hits(prediction, hits)
272
288
 
273
289
  assert_equal(13, hits.length)
@@ -286,8 +302,9 @@ module GeneValidator
286
302
  test: true
287
303
  }
288
304
 
289
- (GeneValidator::Validation.new(default_opt)).run
290
- rescue SystemExit => error
305
+ GeneValidator.init(default_opt)
306
+
307
+ rescue SystemExit
291
308
  error = true
292
309
  end
293
310
  assert_equal(true, error)