genevalidator 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/.travis.yml +2 -0
- data/README.md +78 -30
- data/Rakefile +11 -8
- data/aux/app_template_footer.erb +1 -6
- data/aux/app_template_header.erb +12 -32
- data/aux/files/css/style.css +2 -8
- data/aux/files/js/plots.js +564 -576
- data/aux/files/js/script.js +10 -0
- data/aux/json_footer.erb +8 -0
- data/aux/json_header.erb +19 -0
- data/aux/json_query.erb +14 -0
- data/aux/template_footer.erb +9 -58
- data/aux/template_header.erb +18 -58
- data/aux/template_query.erb +8 -36
- data/bin/genevalidator +45 -32
- data/genevalidator.gemspec +11 -7
- data/lib/genevalidator.rb +75 -455
- data/lib/genevalidator/arg_validation.rb +78 -107
- data/lib/genevalidator/blast.rb +57 -60
- data/lib/genevalidator/clusterization.rb +15 -15
- data/lib/genevalidator/exceptions.rb +32 -5
- data/lib/genevalidator/get_raw_sequences.rb +70 -33
- data/lib/genevalidator/hsp.rb +1 -4
- data/lib/genevalidator/json_to_gv_results.rb +109 -0
- data/lib/genevalidator/output.rb +177 -185
- data/lib/genevalidator/pool.rb +2 -1
- data/lib/genevalidator/sequences.rb +3 -3
- data/lib/genevalidator/tabular_parser.rb +24 -18
- data/lib/genevalidator/validation.rb +279 -0
- data/lib/genevalidator/validation_alignment.rb +31 -47
- data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
- data/lib/genevalidator/validation_duplication.rb +23 -19
- data/lib/genevalidator/validation_gene_merge.rb +30 -65
- data/lib/genevalidator/validation_length_cluster.rb +14 -53
- data/lib/genevalidator/validation_length_rank.rb +10 -11
- data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
- data/lib/genevalidator/validation_report.rb +2 -5
- data/lib/genevalidator/validation_test.rb +8 -4
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +51 -66
- data/test/test_blast.rb +68 -51
- data/test/test_clusterization.rb +1 -1
- data/test/test_clusterization_2d.rb +19 -13
- data/test/test_extended_array_methods.rb +1 -1
- data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
- data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
- data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
- data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
- data/test/test_sequences.rb +46 -41
- data/test/test_validation_open_reading_frame.rb +318 -202
- data/test/test_validations.rb +48 -32
- metadata +76 -102
- data/doc/AliasDuplicationError.html +0 -134
- data/doc/AlignmentValidation.html +0 -1687
- data/doc/AlignmentValidationOutput.html +0 -659
- data/doc/Blast.html +0 -1905
- data/doc/BlastRFValidationOutput.html +0 -545
- data/doc/BlastReadingFrameValidation.html +0 -370
- data/doc/BlastUtils.html +0 -875
- data/doc/ClasspathError.html +0 -134
- data/doc/Cluster.html +0 -1316
- data/doc/DuplciationValidationOutput.html +0 -564
- data/doc/DuplicationValidation.html +0 -920
- data/doc/DuplicationValidationOutput.html +0 -564
- data/doc/FileNotFoundException.html +0 -134
- data/doc/GeneMergeValidation.html +0 -935
- data/doc/GeneMergeValidationOutput.html +0 -652
- data/doc/HierarchicalClusterization.html +0 -994
- data/doc/Hsp.html +0 -1485
- data/doc/InconsistentTabularFormat.html +0 -135
- data/doc/LengthClusterValidation.html +0 -982
- data/doc/LengthClusterValidationOutput.html +0 -515
- data/doc/LengthRankValidation.html +0 -496
- data/doc/LengthRankValidationOutput.html +0 -517
- data/doc/NoInternetError.html +0 -135
- data/doc/NoMafftInstallationError.html +0 -134
- data/doc/NoPIdentError.html +0 -134
- data/doc/NoValidationError.html +0 -134
- data/doc/NotEnoughHitsError.html +0 -135
- data/doc/ORFValidationOutput.html +0 -593
- data/doc/OpenReadingFrameValidation.html +0 -1107
- data/doc/OtherError.html +0 -123
- data/doc/Output.html +0 -1540
- data/doc/Pair.html +0 -309
- data/doc/PairCluster.html +0 -767
- data/doc/Plot.html +0 -837
- data/doc/QueryError.html +0 -134
- data/doc/ReportClassError.html +0 -135
- data/doc/Sequence.html +0 -1299
- data/doc/SequenceTypeError.html +0 -135
- data/doc/TabularEntry.html +0 -837
- data/doc/TabularParser.html +0 -1104
- data/doc/Validation.html +0 -2147
- data/doc/ValidationClassError.html +0 -134
- data/doc/ValidationOutput.html +0 -460
- data/doc/ValidationReport.html +0 -940
- data/doc/ValidationTest.html +0 -939
- data/doc/_index.html +0 -449
- data/doc/class_list.html +0 -54
- data/doc/css/common.css +0 -1
- data/doc/css/full_list.css +0 -57
- data/doc/css/style.css +0 -338
- data/doc/file.README.html +0 -151
- data/doc/file_list.html +0 -56
- data/doc/frames.html +0 -26
- data/doc/index.html +0 -151
- data/doc/js/app.js +0 -214
- data/doc/js/full_list.js +0 -178
- data/doc/js/jquery.js +0 -4
- data/doc/method_list.html +0 -1505
- data/doc/top-level-namespace.html +0 -112
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,7 +1,10 @@
|
|
1
|
-
require '
|
2
|
-
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
3
|
require 'genevalidator/exceptions'
|
4
4
|
require 'genevalidator/ext/array'
|
5
|
+
require 'genevalidator/validation_report'
|
6
|
+
require 'genevalidator/validation_test'
|
7
|
+
|
5
8
|
module GeneValidator
|
6
9
|
##
|
7
10
|
# Class that stores the validation output information
|
@@ -81,11 +84,9 @@ module GeneValidator
|
|
81
84
|
##
|
82
85
|
# Initializes the object
|
83
86
|
# Params:
|
84
|
-
# +hits+: a vector of +Sequence+ objects (representing blast hits)
|
85
87
|
# +prediction+: a +Sequence+ object representing the blast query
|
86
|
-
# +
|
87
|
-
|
88
|
-
def initialize(type, prediction, hits)
|
88
|
+
# +hits+: a vector of +Sequence+ objects (representing blast hits)
|
89
|
+
def initialize(prediction, hits)
|
89
90
|
super
|
90
91
|
@short_header = 'LengthRank'
|
91
92
|
@header = 'Length Rank'
|
@@ -143,19 +144,17 @@ module GeneValidator
|
|
143
144
|
largest_hit,
|
144
145
|
extreme_hits,
|
145
146
|
percentage)
|
146
|
-
@validation_report.
|
147
|
+
@validation_report.run_time = Time.now - start
|
147
148
|
@validation_report
|
148
149
|
|
149
150
|
rescue NotEnoughHitsError
|
150
151
|
@validation_report = ValidationReport.new('Not enough evidence', :warning,
|
151
152
|
@short_header, @header,
|
152
|
-
@description
|
153
|
-
@explanation, @conclusion)
|
153
|
+
@description)
|
154
154
|
rescue Exception
|
155
155
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
156
156
|
@short_header, @header,
|
157
|
-
@description
|
158
|
-
@explanation, @conclusion)
|
157
|
+
@description)
|
159
158
|
@validation_report.errors.push 'Unexpected Error'
|
160
159
|
end
|
161
160
|
end
|
@@ -1,5 +1,10 @@
|
|
1
|
-
require 'genevalidator/validation_report'
|
2
1
|
require 'bio'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
require 'genevalidator/exceptions'
|
5
|
+
require 'genevalidator/validation_report'
|
6
|
+
require 'genevalidator/validation_test'
|
7
|
+
|
3
8
|
module GeneValidator
|
4
9
|
##
|
5
10
|
# Class that stores the validation output information
|
@@ -57,24 +62,23 @@ module GeneValidator
|
|
57
62
|
# This class contains the methods necessary for checking whether there is
|
58
63
|
# a main Open Reading Frame in the predicted sequence
|
59
64
|
class OpenReadingFrameValidation < ValidationTest
|
60
|
-
|
65
|
+
extend Forwardable
|
66
|
+
def_delegators GeneValidator, :config
|
61
67
|
|
62
68
|
##
|
63
69
|
# Initilizes the object
|
64
70
|
# Params:
|
65
|
-
# +type+: type of the predicted sequence (:nucleotide or :protein)
|
66
71
|
# +prediction+: a +Sequence+ object representing the blast query
|
67
72
|
# +hits+: a vector of +Sequence+ objects (representing blast hits)
|
68
|
-
|
69
|
-
def initialize(type, prediction, hits, filename)
|
73
|
+
def initialize(prediction, hits)
|
70
74
|
super
|
71
|
-
@short_header = '
|
75
|
+
@short_header = 'MainORF'
|
72
76
|
@header = 'Main ORF'
|
73
77
|
@description = 'Check whether there is a single main Open Reading' \
|
74
78
|
' Frame in the predicted gene. Applicable only for' \
|
75
79
|
' nucleotide queries.'
|
76
80
|
@cli_name = 'orf'
|
77
|
-
@
|
81
|
+
@type = config[:type]
|
78
82
|
end
|
79
83
|
|
80
84
|
##
|
@@ -101,15 +105,14 @@ module GeneValidator
|
|
101
105
|
@validation_report = ORFValidationOutput.new(@short_header, @header,
|
102
106
|
@description, orfs,
|
103
107
|
coverage, longest_orf_frame)
|
104
|
-
@validation_report.
|
108
|
+
@validation_report.run_time = Time.now - start
|
105
109
|
|
106
110
|
@validation_report.plot_files.push(plot1)
|
107
111
|
@validation_report
|
108
112
|
rescue Exception
|
109
113
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
110
114
|
@short_header, @header,
|
111
|
-
@description
|
112
|
-
@explanation, @conclusion)
|
115
|
+
@description)
|
113
116
|
@validation_report.errors.push 'Unexpected Error'
|
114
117
|
end
|
115
118
|
|
@@ -157,29 +160,25 @@ module GeneValidator
|
|
157
160
|
# +orfs+: +Hash+ containing the open reading frame
|
158
161
|
# +output+: location where the plot will be saved in jped file format
|
159
162
|
# +prediction+: Sequence objects
|
160
|
-
def plot_orfs(orfs, translated_length, output = "#{@
|
163
|
+
def plot_orfs(orfs, translated_length, output = "#{@plot_path}_orfs.json")
|
161
164
|
fail QueryError unless orfs.is_a? Hash
|
162
165
|
|
163
|
-
|
166
|
+
data = []
|
164
167
|
|
165
168
|
# Create hashes for the Background
|
166
169
|
(-3..3).each do |frame|
|
167
170
|
next if frame == 0
|
168
|
-
|
171
|
+
data << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
|
169
172
|
'color' => 'gray' }
|
170
173
|
end
|
171
174
|
|
172
175
|
# Create the hashes for the ORFs...
|
173
176
|
orfs.each do |_key, h|
|
174
|
-
|
177
|
+
data << { 'y' => h[:frame], 'start' => h[:orf_start],
|
175
178
|
'stop' => h[:orf_end], 'color' => 'red' }
|
176
179
|
end
|
177
180
|
|
178
|
-
|
179
|
-
f.write((results).to_json)
|
180
|
-
f.close
|
181
|
-
|
182
|
-
Plot.new(output.scan(%r{([^/]+)$})[0][0],
|
181
|
+
Plot.new(data,
|
183
182
|
:lines,
|
184
183
|
'Open Reading Frames in all 6 Frames',
|
185
184
|
'Open Reading Frame (Minimimum Length: 30 amino acids),red',
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# Top level module / namespace.
|
2
2
|
module GeneValidator
|
3
|
-
Plot = Struct.new(:
|
3
|
+
Plot = Struct.new(:data, :type, :title, :footer, :xtitle, :ytitle, :aux1,
|
4
4
|
:aux2)
|
5
5
|
|
6
6
|
##
|
@@ -8,7 +8,6 @@ module GeneValidator
|
|
8
8
|
# all validation reports
|
9
9
|
class ValidationReport
|
10
10
|
attr_reader :message
|
11
|
-
attr_reader :bg_color
|
12
11
|
attr_reader :plot_files
|
13
12
|
attr_reader :result
|
14
13
|
attr_reader :expected
|
@@ -17,7 +16,7 @@ module GeneValidator
|
|
17
16
|
attr_accessor :short_header
|
18
17
|
attr_accessor :header
|
19
18
|
attr_accessor :description
|
20
|
-
attr_accessor :
|
19
|
+
attr_accessor :run_time
|
21
20
|
attr_accessor :approach
|
22
21
|
attr_accessor :explanation
|
23
22
|
attr_accessor :conclusion
|
@@ -31,7 +30,6 @@ module GeneValidator
|
|
31
30
|
# +short_header+: String
|
32
31
|
# +header+: String
|
33
32
|
# +description+: String
|
34
|
-
# +bg_color+: background color of the table cell for the html output (nil
|
35
33
|
# by default)
|
36
34
|
def initialize(message = 'Not enough evidence', validation_result = :no,
|
37
35
|
short_header = '', header = '', description = '',
|
@@ -60,7 +58,6 @@ module GeneValidator
|
|
60
58
|
##
|
61
59
|
# May return "success" or "error"
|
62
60
|
def color
|
63
|
-
bg_color unless bg_color.nil?
|
64
61
|
if validation == @expected
|
65
62
|
'success'
|
66
63
|
elsif validation == :error || validation == :unapplicable
|
@@ -1,7 +1,11 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
1
3
|
module GeneValidator
|
2
4
|
# This is an abstract class extended
|
3
5
|
# by all validation classes
|
4
6
|
class ValidationTest
|
7
|
+
extend Forwardable
|
8
|
+
def_delegators GeneValidator, :config
|
5
9
|
attr_accessor :type
|
6
10
|
attr_accessor :prediction
|
7
11
|
attr_accessor :hits
|
@@ -10,7 +14,7 @@ module GeneValidator
|
|
10
14
|
attr_accessor :cli_name
|
11
15
|
attr_accessor :description
|
12
16
|
attr_accessor :validation_report
|
13
|
-
attr_accessor :
|
17
|
+
attr_accessor :run_time
|
14
18
|
|
15
19
|
##
|
16
20
|
# Initilizes the object
|
@@ -19,13 +23,13 @@ module GeneValidator
|
|
19
23
|
# +prediction+: a +Sequence+ object representing the blast query
|
20
24
|
# +hits+: a vector of +Sequence+ objects (representing blast hits)
|
21
25
|
# +argv+: aditional arguments if needed
|
22
|
-
def initialize(
|
23
|
-
@type = type
|
26
|
+
def initialize(prediction, hits = nil, *_argv)
|
27
|
+
@type = config[:type]
|
24
28
|
@prediction = prediction
|
25
29
|
@hits = hits
|
26
30
|
@short_header = 'NewVal'
|
27
31
|
@header = 'New Validation'
|
28
|
-
@
|
32
|
+
@run_time = 0
|
29
33
|
@cli_name = 'all'
|
30
34
|
@description = 'No description available.'
|
31
35
|
@validation_report = ValidationReport.new('Not enough evidence')
|
@@ -3,93 +3,79 @@ require 'minitest/autorun'
|
|
3
3
|
require 'yaml'
|
4
4
|
require 'fileutils'
|
5
5
|
require 'genevalidator'
|
6
|
-
require 'genevalidator/blast'
|
7
|
-
require 'genevalidator/validation_length_cluster'
|
8
|
-
require 'genevalidator/validation_length_rank'
|
9
|
-
require 'genevalidator/validation_blast_reading_frame'
|
10
|
-
require 'genevalidator/validation_gene_merge'
|
11
|
-
require 'genevalidator/validation_duplication'
|
12
|
-
require 'genevalidator/validation_open_reading_frame'
|
13
|
-
require 'genevalidator/validation_alignment'
|
14
6
|
|
15
7
|
module GeneValidator
|
8
|
+
# Test if GV produces the same output with XML and tabular input
|
16
9
|
class ValidateOutput < Minitest::Test
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
mrna_tab_out = "#{mrna_blast_tab_file}.out"
|
42
|
-
prot_output_dir = "#{prot_input_fasta_file}.html"
|
43
|
-
mrna_output_dir = "#{mrna_input_fasta_file}.html"
|
44
|
-
prot_yaml = "#{prot_input_fasta_file}.yaml"
|
45
|
-
mrna_yaml = "#{mrna_input_fasta_file}.yaml"
|
10
|
+
prot_dir = 'test/test_files/all_validations_prot'
|
11
|
+
prot_input = File.join(prot_dir, 'prot.fa')
|
12
|
+
prot_xml = File.join(prot_dir, 'prot.blast_xml')
|
13
|
+
prot_tab = File.join(prot_dir, 'prot.blast_tab6')
|
14
|
+
prot_raw = File.join(prot_dir, 'prot.raw_seq')
|
15
|
+
|
16
|
+
mrna_dir = 'test/test_files/all_validations_mrna'
|
17
|
+
mrna_input = File.join(mrna_dir, 'mrna.fa')
|
18
|
+
mrna_xml = File.join(mrna_dir, 'mrna.blast_xml')
|
19
|
+
mrna_tab = File.join(mrna_dir, 'mrna.blast_tab6')
|
20
|
+
mrna_raw = File.join(mrna_dir, 'mrna.raw_seq')
|
21
|
+
|
22
|
+
tab_options = 'qseqid sseqid sacc slen qstart qend sstart send length' \
|
23
|
+
' qframe pident nident evalue qseq sseq'
|
24
|
+
database = 'swissprot -remote'
|
25
|
+
threads = '1'
|
26
|
+
|
27
|
+
# Unwanted Output Files
|
28
|
+
prot_xml_out = "#{prot_xml}.out"
|
29
|
+
prot_tab_out = "#{prot_tab}.out"
|
30
|
+
prot_output_dir = "#{prot_input}.html"
|
31
|
+
mrna_xml_out = "#{mrna_xml}.out"
|
32
|
+
mrna_tab_out = "#{mrna_tab}.out"
|
33
|
+
mrna_output_dir = "#{mrna_input}.html"
|
46
34
|
|
47
35
|
describe 'Protein dataset' do
|
48
36
|
it 'xml and tabular inputs give the same output' do
|
49
|
-
|
50
37
|
original_stdout = $stdout.clone
|
51
38
|
$stdout.reopen(prot_xml_out, 'w')
|
52
39
|
|
53
|
-
FileUtils.rm_rf(prot_output_dir) rescue
|
54
|
-
|
40
|
+
FileUtils.rm_rf(prot_output_dir) rescue Errno::ENOENT
|
55
41
|
opts = {
|
56
|
-
validations: %w(lenc lenr frame merge dup orf),
|
42
|
+
validations: %w(lenc lenr frame merge dup orf align),
|
57
43
|
db: database,
|
58
44
|
num_threads: threads,
|
59
45
|
fast: false,
|
60
|
-
input_fasta_file:
|
61
|
-
blast_xml_file:
|
62
|
-
raw_sequences:
|
46
|
+
input_fasta_file: prot_input,
|
47
|
+
blast_xml_file: prot_xml,
|
48
|
+
raw_sequences: prot_raw,
|
63
49
|
test: true
|
64
50
|
}
|
65
51
|
|
66
|
-
|
52
|
+
GeneValidator.init(opts, 1, false)
|
53
|
+
GeneValidator.run
|
67
54
|
$stdout.reopen original_stdout
|
68
55
|
$stdout.reopen(prot_tab_out, 'w')
|
69
56
|
|
70
|
-
FileUtils.rm_rf(prot_output_dir) rescue
|
57
|
+
FileUtils.rm_rf(prot_output_dir) rescue Errno::ENOENT
|
71
58
|
|
72
59
|
opts1 = {
|
73
|
-
validations: %w(lenc lenr frame merge dup orf),
|
60
|
+
validations: %w(lenc lenr frame merge dup orf align),
|
74
61
|
db: database,
|
75
62
|
num_threads: threads,
|
76
63
|
fast: false,
|
77
|
-
input_fasta_file:
|
78
|
-
blast_tabular_file:
|
64
|
+
input_fasta_file: prot_input,
|
65
|
+
blast_tabular_file: prot_tab,
|
79
66
|
blast_tabular_options: tab_options,
|
80
|
-
raw_sequences:
|
67
|
+
raw_sequences: prot_raw,
|
81
68
|
test: true
|
82
69
|
}
|
83
70
|
|
84
|
-
|
71
|
+
GeneValidator.init(opts1, 1, false)
|
72
|
+
GeneValidator.run
|
85
73
|
$stdout.reopen original_stdout
|
86
74
|
|
87
75
|
diff = FileUtils.compare_file(prot_xml_out, prot_tab_out)
|
88
76
|
|
89
77
|
File.delete(prot_xml_out)
|
90
78
|
File.delete(prot_tab_out)
|
91
|
-
File.delete(prot_yaml)
|
92
|
-
|
93
79
|
FileUtils.rm_rf(prot_output_dir)
|
94
80
|
|
95
81
|
assert_equal(true, diff)
|
@@ -98,50 +84,49 @@ module GeneValidator
|
|
98
84
|
|
99
85
|
describe 'mRNA dataset' do
|
100
86
|
it 'xml and tabular inputs give the same output' do
|
101
|
-
|
102
87
|
original_stdout = $stdout.clone
|
103
88
|
$stdout.reopen(mrna_xml_out, 'w')
|
104
89
|
|
105
|
-
FileUtils.rm_rf(mrna_output_dir) rescue
|
90
|
+
FileUtils.rm_rf(mrna_output_dir) rescue Errno::ENOENT
|
106
91
|
|
107
92
|
opts = {
|
108
93
|
validations: %w(lenc lenr frame merge dup orf align),
|
109
94
|
db: database,
|
110
95
|
num_threads: threads,
|
111
96
|
fast: false,
|
112
|
-
input_fasta_file:
|
113
|
-
blast_xml_file:
|
114
|
-
raw_sequences:
|
97
|
+
input_fasta_file: mrna_input,
|
98
|
+
blast_xml_file: mrna_xml,
|
99
|
+
raw_sequences: mrna_raw,
|
115
100
|
test: true
|
116
101
|
}
|
117
102
|
|
118
|
-
|
103
|
+
GeneValidator.init(opts, 1, false)
|
104
|
+
GeneValidator.run
|
119
105
|
$stdout.reopen original_stdout
|
120
106
|
$stdout.reopen(mrna_tab_out, 'w')
|
121
107
|
|
122
|
-
FileUtils.rm_rf(mrna_output_dir) rescue
|
108
|
+
FileUtils.rm_rf(mrna_output_dir) rescue Errno::ENOENT
|
123
109
|
|
124
110
|
opts1 = {
|
125
111
|
validations: %w(lenc lenr frame merge dup orf align),
|
126
112
|
db: database,
|
127
113
|
num_threads: threads,
|
128
114
|
fast: false,
|
129
|
-
input_fasta_file:
|
130
|
-
blast_tabular_file:
|
115
|
+
input_fasta_file: mrna_input,
|
116
|
+
blast_tabular_file: mrna_tab,
|
131
117
|
blast_tabular_options: tab_options,
|
132
|
-
raw_sequences:
|
118
|
+
raw_sequences: mrna_raw,
|
133
119
|
test: true
|
134
120
|
}
|
135
121
|
|
136
|
-
|
122
|
+
GeneValidator.init(opts1, 1, false)
|
123
|
+
GeneValidator.run
|
137
124
|
$stdout.reopen original_stdout
|
138
125
|
|
139
126
|
diff = FileUtils.compare_file(mrna_xml_out, mrna_tab_out)
|
140
127
|
|
141
128
|
File.delete(mrna_xml_out)
|
142
129
|
File.delete(mrna_tab_out)
|
143
|
-
File.delete(mrna_yaml)
|
144
|
-
|
145
130
|
FileUtils.rm_rf(mrna_output_dir)
|
146
131
|
|
147
132
|
assert_equal(true, diff)
|
data/test/test_blast.rb
CHANGED
@@ -4,8 +4,10 @@ require 'fileutils'
|
|
4
4
|
require 'genevalidator'
|
5
5
|
require 'genevalidator/blast'
|
6
6
|
require 'genevalidator/tabular_parser'
|
7
|
+
require 'genevalidator/validation'
|
7
8
|
|
8
9
|
module GeneValidator
|
10
|
+
# Test the BlastUtil Class
|
9
11
|
class TestBlastClass < Minitest::Test
|
10
12
|
dir = 'test/test_files'
|
11
13
|
filename_mrna = "#{dir}/file_mrna.txt"
|
@@ -20,9 +22,7 @@ module GeneValidator
|
|
20
22
|
ncbi_mrna_xml20 = "#{dir}/ncbi_mrna.xml.20"
|
21
23
|
|
22
24
|
describe 'Test Blast Class' do
|
23
|
-
|
24
25
|
it 'should detect nucleotide seq type' do
|
25
|
-
|
26
26
|
file_mrna = File.open(filename_mrna, 'w+')
|
27
27
|
query_mrna = 'ATGGCTAAATTACAGAGGAAGAGAAGCAAGGCTCTTGGGTCATCTCTAGAGATGT' \
|
28
28
|
'CCCAGATAATGGATGCAGGAACAAACAAAATTAAAAGAAGAATAAGAGATTTAGA' \
|
@@ -37,7 +37,7 @@ module GeneValidator
|
|
37
37
|
file_mrna.puts(query_mrna)
|
38
38
|
file_mrna.close
|
39
39
|
|
40
|
-
FileUtils.rm_rf("#{filename_mrna}.html") rescue
|
40
|
+
FileUtils.rm_rf("#{filename_mrna}.html") rescue Errno::ENOENT
|
41
41
|
|
42
42
|
default_opt = {
|
43
43
|
input_fasta_file: filename_mrna,
|
@@ -47,11 +47,10 @@ module GeneValidator
|
|
47
47
|
test: true
|
48
48
|
}
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
GeneValidator.init(default_opt)
|
52
51
|
File.delete(filename_mrna)
|
53
52
|
FileUtils.rm_rf("#{filename_mrna}.html")
|
54
|
-
assert_equal(:nucleotide,
|
53
|
+
assert_equal(:nucleotide, GeneValidator.config[:type])
|
55
54
|
end
|
56
55
|
|
57
56
|
it 'should detect protein type' do
|
@@ -72,7 +71,7 @@ module GeneValidator
|
|
72
71
|
file_prot.puts(query_prot)
|
73
72
|
file_prot.close
|
74
73
|
|
75
|
-
FileUtils.rm_rf("#{filename_prot}.html") rescue
|
74
|
+
FileUtils.rm_rf("#{filename_prot}.html") rescue Errno::ENOENT
|
76
75
|
|
77
76
|
default_opt = {
|
78
77
|
input_fasta_file: filename_prot,
|
@@ -82,12 +81,11 @@ module GeneValidator
|
|
82
81
|
test: true
|
83
82
|
}
|
84
83
|
|
85
|
-
|
84
|
+
GeneValidator.init(default_opt)
|
86
85
|
|
87
86
|
File.delete(filename_prot)
|
88
87
|
FileUtils.rm_rf("#{filename_prot}.html")
|
89
|
-
assert_equal(:protein,
|
90
|
-
|
88
|
+
assert_equal(:protein, GeneValidator.config[:type])
|
91
89
|
end
|
92
90
|
|
93
91
|
it 'should raise error when input types are mixed in the fasta' do
|
@@ -96,18 +94,18 @@ module GeneValidator
|
|
96
94
|
original_stderr = $stderr
|
97
95
|
$stderr.reopen('/dev/null', 'w')
|
98
96
|
|
99
|
-
FileUtils.rm_rf("#{filename_prot}.html") rescue
|
97
|
+
FileUtils.rm_rf("#{filename_prot}.html") rescue Errno::ENOENT
|
100
98
|
|
101
99
|
default_opt = {
|
102
100
|
input_fasta_file: mixed_fasta,
|
103
101
|
validations: ['all'],
|
104
102
|
db: 'swissprot -remote',
|
105
103
|
num_threads: 1,
|
106
|
-
|
104
|
+
test: true
|
107
105
|
}
|
108
106
|
|
109
|
-
GeneValidator
|
110
|
-
rescue SystemExit
|
107
|
+
GeneValidator.init(default_opt)
|
108
|
+
rescue SystemExit
|
111
109
|
mixed = true
|
112
110
|
end
|
113
111
|
$stderr = original_stderr
|
@@ -126,10 +124,12 @@ module GeneValidator
|
|
126
124
|
end
|
127
125
|
|
128
126
|
it 'should parse tabular -6 input with default tabular format' do
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
127
|
+
tabular_headers = 'qseqid sseqid pident length mismatch gapopen' \
|
128
|
+
' qstart qend sstart send evalue bitscore'
|
129
|
+
GeneValidator.opt = { blast_tabular_file: ncbi_mrna_tab20,
|
130
|
+
blast_tabular_options: tabular_headers }
|
131
|
+
GeneValidator.config = { type: :protein }
|
132
|
+
iterator_tab = TabularParser.new
|
133
133
|
hits = iterator_tab.parse_next
|
134
134
|
|
135
135
|
assert_equal(20, hits.length)
|
@@ -145,9 +145,12 @@ module GeneValidator
|
|
145
145
|
end
|
146
146
|
|
147
147
|
it 'should parse tabular -6 input with tabular format as argument' do
|
148
|
-
|
149
|
-
|
150
|
-
|
148
|
+
tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart' \
|
149
|
+
' send pident length qframe evalue'
|
150
|
+
GeneValidator.opt = { blast_tabular_file: output_tab6,
|
151
|
+
blast_tabular_options: tabular_headers }
|
152
|
+
GeneValidator.config = { type: :protein }
|
153
|
+
iterator_tab = TabularParser.new
|
151
154
|
hits = iterator_tab.parse_next
|
152
155
|
assert_equal(4, hits.length)
|
153
156
|
assert_equal(199, hits[0].length_protein)
|
@@ -157,9 +160,12 @@ module GeneValidator
|
|
157
160
|
end
|
158
161
|
|
159
162
|
it 'should parse tabular -6 input with mixed columns' do
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
+
tabular_headers = 'qend sstart send pident length qframe evalue' \
|
164
|
+
' qseqid sseqid sacc slen qstart'
|
165
|
+
GeneValidator.opt = { blast_tabular_file: output_tab_mixed,
|
166
|
+
blast_tabular_options: tabular_headers }
|
167
|
+
GeneValidator.config = { type: :protein }
|
168
|
+
iterator_tab = TabularParser.new
|
163
169
|
hits = iterator_tab.parse_next
|
164
170
|
assert_equal(4, hits.length)
|
165
171
|
assert_equal(199, hits[0].length_protein)
|
@@ -169,9 +175,12 @@ module GeneValidator
|
|
169
175
|
end
|
170
176
|
|
171
177
|
it 'should parse tabular -7 input' do
|
172
|
-
|
173
|
-
|
174
|
-
|
178
|
+
tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart send' \
|
179
|
+
' length qframe evalue'
|
180
|
+
GeneValidator.opt = { blast_tabular_file: output_tab7,
|
181
|
+
blast_tabular_options: tabular_headers }
|
182
|
+
GeneValidator.config = { type: :protein }
|
183
|
+
iterator_tab = TabularParser.new
|
175
184
|
hits = iterator_tab.parse_next
|
176
185
|
assert_equal(4, hits.length)
|
177
186
|
assert_equal(199, hits[0].length_protein)
|
@@ -180,10 +189,8 @@ module GeneValidator
|
|
180
189
|
assert_equal(100, hits[0].hsp_list[2].hit_to)
|
181
190
|
end
|
182
191
|
|
183
|
-
it 'should remove identical matches
|
184
|
-
|
185
|
-
|
186
|
-
FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
|
192
|
+
it 'should remove identical matches (protein sequences)' do
|
193
|
+
FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
|
187
194
|
|
188
195
|
default_opt = {
|
189
196
|
input_fasta_file: filename_fasta,
|
@@ -193,19 +200,26 @@ module GeneValidator
|
|
193
200
|
test: true
|
194
201
|
}
|
195
202
|
|
196
|
-
|
203
|
+
GeneValidator.init(default_opt)
|
204
|
+
|
197
205
|
prediction = Sequence.new
|
198
206
|
prediction.length_protein = 1808
|
199
|
-
tabular_headers
|
200
|
-
|
207
|
+
tabular_headers = 'qseqid sseqid sacc slen qstart qend sstart' \
|
208
|
+
' send pident length qframe evalue'
|
209
|
+
GeneValidator.opt = { blast_tabular_file: output_tab6,
|
210
|
+
blast_tabular_options: tabular_headers }
|
211
|
+
GeneValidator.config = { type: :protein }
|
212
|
+
iterator_tab = TabularParser.new
|
201
213
|
iterator_tab.parse_next
|
202
214
|
hits = iterator_tab.parse_next
|
203
215
|
|
204
|
-
# before removal
|
205
216
|
assert_equal(2, hits.length)
|
206
217
|
assert_equal(100, hits[0].hsp_list[0].pidentity)
|
207
218
|
assert_in_delta(99.23, hits[0].hsp_list[1].pidentity, 0.01)
|
208
219
|
assert_in_delta(90, hits[1].hsp_list[0].pidentity, 0.01)
|
220
|
+
|
221
|
+
# Remove identical hits
|
222
|
+
b = GeneValidator::Validate.new
|
209
223
|
hits = b.remove_identical_hits(prediction, hits)
|
210
224
|
|
211
225
|
# after removal of identical hits
|
@@ -214,10 +228,8 @@ module GeneValidator
|
|
214
228
|
FileUtils.rm_rf("#{filename_fasta}.html")
|
215
229
|
end
|
216
230
|
|
217
|
-
it 'should remove identical matches
|
218
|
-
|
219
|
-
|
220
|
-
FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
|
231
|
+
it 'should remove identical matches (nucleotide seqs) - tabular input' do
|
232
|
+
FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
|
221
233
|
|
222
234
|
default_opt = {
|
223
235
|
input_fasta_file: filename_fasta,
|
@@ -226,17 +238,22 @@ module GeneValidator
|
|
226
238
|
num_threads: 1,
|
227
239
|
test: true
|
228
240
|
}
|
229
|
-
tabular_headers = 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
|
230
241
|
|
231
|
-
|
242
|
+
GeneValidator.init(default_opt)
|
232
243
|
|
233
244
|
prediction = Sequence.new
|
234
245
|
prediction.length_protein = 219 / 3
|
235
|
-
|
246
|
+
tabular_headers = 'qseqid sseqid pident length mismatch gapopen' \
|
247
|
+
' qstart qend sstart send evalue bitscore'
|
248
|
+
GeneValidator.opt = { blast_tabular_file: ncbi_mrna_tab20,
|
249
|
+
blast_tabular_options: tabular_headers }
|
250
|
+
GeneValidator.config = { type: :nucleotide }
|
251
|
+
iterator_tab = TabularParser.new
|
236
252
|
hits = iterator_tab.parse_next
|
237
253
|
|
238
254
|
assert_equal(20, hits.length)
|
239
|
-
|
255
|
+
# remove identical hits
|
256
|
+
b = GeneValidator::Validate.new
|
240
257
|
hits = b.remove_identical_hits(prediction, hits)
|
241
258
|
|
242
259
|
assert_equal(13, hits.length)
|
@@ -244,10 +261,8 @@ module GeneValidator
|
|
244
261
|
FileUtils.rm_rf("#{filename_fasta}.html")
|
245
262
|
end
|
246
263
|
|
247
|
-
it 'should remove identical matches
|
248
|
-
|
249
|
-
|
250
|
-
FileUtils.rm_rf("#{filename_fasta}.html") rescue Error
|
264
|
+
it 'should remove identical matches (nucleotide seqs) - xml input' do
|
265
|
+
FileUtils.rm_rf("#{filename_fasta}.html") rescue Errno::ENOENT
|
251
266
|
|
252
267
|
# just use a valid opts hash to create the object
|
253
268
|
default_opt = {
|
@@ -258,16 +273,17 @@ module GeneValidator
|
|
258
273
|
test: true
|
259
274
|
}
|
260
275
|
|
261
|
-
|
276
|
+
GeneValidator.init(default_opt)
|
262
277
|
|
263
278
|
prediction = Sequence.new
|
264
279
|
prediction.length_protein = 219 / 3
|
265
|
-
|
280
|
+
output = File.open(ncbi_mrna_xml20, 'rb').read
|
266
281
|
iterator = Bio::BlastXMLParser::NokogiriBlastXml.new(output).to_enum
|
267
282
|
hits = BlastUtils.parse_next(iterator, :protein)
|
268
283
|
|
269
284
|
assert_equal(20, hits.length)
|
270
285
|
|
286
|
+
b = GeneValidator::Validate.new
|
271
287
|
hits = b.remove_identical_hits(prediction, hits)
|
272
288
|
|
273
289
|
assert_equal(13, hits.length)
|
@@ -286,8 +302,9 @@ module GeneValidator
|
|
286
302
|
test: true
|
287
303
|
}
|
288
304
|
|
289
|
-
|
290
|
-
|
305
|
+
GeneValidator.init(default_opt)
|
306
|
+
|
307
|
+
rescue SystemExit
|
291
308
|
error = true
|
292
309
|
end
|
293
310
|
assert_equal(true, error)
|