genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,5 +1,7 @@
1
1
  require 'net/http'
2
2
  require 'io/console'
3
+ require 'yaml'
4
+
3
5
  module GeneValidator
4
6
  # This is a class for the storing data on each sequence
5
7
  class Sequence
@@ -65,7 +67,7 @@ module GeneValidator
65
67
  end
66
68
  @raw_sequence = seq
67
69
  else
68
- # puts "Tries to connect to the internet for #{accno}"
70
+ $stderr.puts "Getting sequence for '#{accno}' from NCBI - avoid this with '-r'."
69
71
  uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?'\
70
72
  "db=#{dbtype}&retmax=1&usehistory=y&term=#{accno}/"
71
73
  result = Net::HTTP.get(URI.parse(uri))
@@ -85,8 +87,6 @@ module GeneValidator
85
87
  @raw_sequence = '' unless @raw_sequence.index(/ERROR/).nil?
86
88
  end
87
89
  @raw_sequence
88
- # rescue Exception => error
89
- # @raw_sequence = ""
90
90
  end
91
91
 
92
92
  ##
@@ -1,5 +1,9 @@
1
- require 'genevalidator/exceptions'
2
1
  require 'csv'
2
+ require 'forwardable'
3
+
4
+ require 'genevalidator/sequences'
5
+ require 'genevalidator/hsp'
6
+ require 'genevalidator/exceptions'
3
7
 
4
8
  #
5
9
  module GeneValidator
@@ -8,6 +12,9 @@ module GeneValidator
8
12
  ##
9
13
  # This class parses the tabular output of BLAST (outfmt 6 & 7)
10
14
  class TabularParser
15
+ extend Forwardable
16
+ def_delegators GeneValidator, :opt, :config
17
+
11
18
  attr_reader :rows
12
19
  attr_reader :tab_results
13
20
  attr_reader :column_names
@@ -15,28 +22,25 @@ module GeneValidator
15
22
 
16
23
  ##
17
24
  # Initializes the object
18
- # +file_content+ : String with the tabular BLAST output
19
- # +format+: format of the tabular output (comma/space delimited string)
20
- # +type+: :nucleotide or :mrna
21
- def initialize(filename, format, type)
25
+ def initialize(tab_file = opt[:blast_tabular_file],
26
+ format = opt[:blast_tabular_options], type = config[:type])
22
27
  @column_names = format.gsub(/[-\d]/, '').split(/[ ,]/)
23
- @tab_results = analayse_tabular_file(filename)
24
- @rows = @tab_results.to_enum
25
28
  @type = type
29
+ @tab_results = analayse_tabular_file(tab_file)
30
+ @rows = @tab_results.to_enum
26
31
  end
27
32
 
28
33
  ##
29
34
  #
30
35
  def analayse_tabular_file(filename)
31
- tab_results = []
32
- file = File.read(filename)
33
- lines = CSV.parse(file, col_sep: "\t",
34
- skip_lines: /^#/,
35
- headers: @column_names)
36
+ results = []
37
+ file = File.read(filename)
38
+ lines = CSV.parse(file, col_sep: "\t", skip_lines: /^#/,
39
+ headers: @column_names)
36
40
  lines.each do |line|
37
- tab_results << line.to_hash
41
+ results << line.to_hash
38
42
  end
39
- tab_results
43
+ results
40
44
  end
41
45
 
42
46
  ##
@@ -58,17 +62,19 @@ module GeneValidator
58
62
  def parse_next(query_id = nil)
59
63
  current_id = @rows.peek['qseqid']
60
64
  return [] if !query_id.nil? && current_id != query_id
61
- hits = @tab_results.partition { |h| h['qseqid'] == current_id }[0]
62
- hit_seq = initialise_classes(hits)
65
+ hit_seq = initialise_classes(current_id)
63
66
  move_to_next_query
64
67
  hit_seq
65
68
  rescue StopIteration
66
69
  return []
67
70
  end
68
71
 
72
+ private
73
+
69
74
  ##
70
75
  #
71
- def initialise_classes(hits)
76
+ def initialise_classes(current_id, tab_results = @tab_results)
77
+ hits = tab_results.partition { |h| h['qseqid'] == current_id }[0]
72
78
  hit_list = []
73
79
  grouped_hits = hits.group_by { |row| row['sseqid'] }
74
80
 
@@ -90,7 +96,7 @@ module GeneValidator
90
96
  hsps = hits.select { |row| row['sseqid'] == current_query_id }
91
97
  hsps.each do |row|
92
98
  hsp = Hsp.new
93
- hsp.init_tabular_attribute(row, type)
99
+ hsp.init_tabular_attribute(row)
94
100
  hit_seq.hsp_list.push(hsp)
95
101
  end
96
102
  end
@@ -0,0 +1,279 @@
1
+ require 'forwardable'
2
+
3
+ require 'genevalidator/blast'
4
+ require 'genevalidator/exceptions'
5
+ require 'genevalidator/output'
6
+ require 'genevalidator/pool'
7
+ require 'genevalidator/sequences'
8
+ require 'genevalidator/validation_length_cluster'
9
+ require 'genevalidator/validation_length_rank'
10
+ require 'genevalidator/validation_blast_reading_frame'
11
+ require 'genevalidator/validation_gene_merge'
12
+ require 'genevalidator/validation_duplication'
13
+ require 'genevalidator/validation_open_reading_frame'
14
+ require 'genevalidator/validation_alignment'
15
+
16
+ # Top level module / namespace.
17
+ module GeneValidator
18
+ Pair1 = Struct.new(:x, :y)
19
+
20
+ # Class that initalises separate Validate.new() instances for each query.
21
+ class Validations
22
+ extend Forwardable
23
+ def_delegators GeneValidator, :opt, :config, :query_idx
24
+ def initialize
25
+ @opt = opt
26
+ @config = config
27
+ @query_idx = query_idx
28
+ end
29
+
30
+ ##
31
+ #
32
+ def run_validations(iterator)
33
+ p = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
34
+
35
+ while @config[:idx] + 1 < @query_idx.length
36
+ prediction = get_info_on_query_sequence
37
+ @config[:idx] += 1
38
+
39
+ blast_hits = parse_next_iteration(iterator, prediction)
40
+
41
+ if blast_hits.nil?
42
+ @config[:idx] -= 1
43
+ break
44
+ end
45
+
46
+ if @opt[:num_threads] == 1
47
+ (Validate.new).validate(prediction, blast_hits, @config[:idx])
48
+ else
49
+ p.schedule(prediction, blast_hits, @config[:idx]) do |pred, hits, idx|
50
+ (Validate.new).validate(pred, hits, idx)
51
+ end
52
+ end
53
+ end
54
+ ensure
55
+ p.shutdown if @opt[:num_threads] > 1
56
+ end
57
+
58
+ ##
59
+ # get info about the query
60
+ def get_info_on_query_sequence(input_file = @opt[:input_fasta_file],
61
+ seq_type = @config[:type])
62
+ start_offset = @query_idx[@config[:idx] + 1] - @query_idx[@config[:idx]]
63
+ end_offset = @query_idx[@config[:idx]]
64
+ query = IO.binread(input_file, start_offset, end_offset)
65
+ parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
66
+
67
+ prediction = Sequence.new
68
+ prediction.definition = parse_query[0].gsub("\n", '')
69
+ prediction.identifier = prediction.definition.gsub(/ .*/, '')
70
+ prediction.type = seq_type
71
+ prediction.raw_sequence = parse_query[1].gsub("\n", '')
72
+ prediction.length_protein = prediction.raw_sequence.length
73
+ prediction.length_protein /= 3 if seq_type == :nucleotide
74
+ prediction
75
+ end
76
+
77
+ def parse_next_iteration(iterator, prediction)
78
+ iterator.next if @config[:idx] < @config[:start_idx]
79
+ if @opt[:blast_xml_file]
80
+ BlastUtils.parse_next(iterator)
81
+ elsif @opt[:blast_tabular_file]
82
+ iterator.parse_next(prediction.identifier)
83
+ end
84
+ end
85
+ end
86
+
87
+ # Class that runs the validations (Instatiated for each query)
88
+ class Validate
89
+ extend Forwardable
90
+ def_delegators GeneValidator, :opt, :config, :mutex_array, :overview
91
+
92
+ ##
93
+ # Initilizes the object
94
+ # Params:
95
+ # +opt+: A hash with the following keys: validations:, blast_tabular_file:,
96
+ # blast_tabular_options:, blast_xml_file:, db:, raw_sequences:,
97
+ # num_threads:, fast:}
98
+ # +start_idx+: number of the sequence from the file to start with
99
+ # +overall_evaluation+: boolean variable for printing overall evaluation
100
+ def initialize
101
+ @opt = opt
102
+ @config = config
103
+ @mutex_array = mutex_array
104
+ @run_output = nil
105
+
106
+ @overview = overview
107
+ end
108
+
109
+ ##
110
+ # Validate one query and create validation report
111
+ # Params:
112
+ # +prediction+: Sequence object
113
+ # +hits+: Array of +Sequence+ objects
114
+ # +current_idx+: the index number of the query
115
+ def validate(prediction, hits, current_idx)
116
+ hits = remove_identical_hits(prediction, hits)
117
+ vals = create_validation_tests(prediction, hits)
118
+ check_validations(vals)
119
+ vals.each(&:run)
120
+ @run_output = Output.new(current_idx, hits.length, prediction.definition)
121
+ @run_output.validations = vals.map(&:validation_report)
122
+ check_validations_output(vals)
123
+
124
+ compute_scores
125
+ generate_run_output
126
+ end
127
+
128
+ ##
129
+ # Removes identical hits (100% coverage and >99% identity)
130
+ # Params:
131
+ # +prediction+: Sequence object
132
+ # +hits+: Array of +Sequence+ objects
133
+ # Output:
134
+ # new array of hit +Sequence+ objects
135
+ def remove_identical_hits(prediction, hits)
136
+ identical_hits = []
137
+ hits.each do |hit|
138
+ low_identity = hit.hsp_list.select { |hsp| hsp.pidentity < 99 }
139
+ no_data = hit.hsp_list.select { |hsp| hsp.pidentity.nil? }
140
+ low_identity += no_data
141
+ # check the coverage
142
+ coverage = Array.new(prediction.length_protein, 0)
143
+ hit.hsp_list.each do |hsp|
144
+ match_to = hsp.match_query_to
145
+ match_from = hsp.match_query_from
146
+ len = match_to - match_from + 1
147
+ coverage[match_from - 1..match_to - 1] = Array.new(len, 1)
148
+ end
149
+
150
+ if low_identity.length == 0 && coverage.uniq.length == 1
151
+ identical_hits.push(hit)
152
+ end
153
+ end
154
+
155
+ identical_hits.each { |hit| hits.delete(hit) }
156
+ hits
157
+ end
158
+
159
+ def create_validation_tests(prediction, hits)
160
+ val = []
161
+ val.push LengthClusterValidation.new(prediction, hits)
162
+ val.push LengthRankValidation.new(prediction, hits)
163
+ val.push GeneMergeValidation.new(prediction, hits)
164
+ val.push DuplicationValidation.new(prediction, hits)
165
+ if @config[:type] == :nucleotide
166
+ val.push BlastReadingFrameValidation.new(prediction, hits)
167
+ val.push OpenReadingFrameValidation.new(prediction, hits)
168
+ end
169
+ val.push AlignmentValidation.new(prediction, hits)
170
+ val.select { |v| @opt[:validations].include? v.cli_name.downcase }
171
+ end
172
+
173
+ def check_validations(vals)
174
+ # check the class type of the elements in the list
175
+ vals.each { |v| fail ValidationClassError unless v.is_a? ValidationTest }
176
+ # check alias duplication
177
+ aliases = vals.map(&:cli_name)
178
+ fail AliasDuplicationError unless aliases.length == aliases.uniq.length
179
+ rescue ValidationClassError => e
180
+ $stderr.puts e
181
+ exit 1
182
+ rescue AliasDuplicationError => e
183
+ $stderr.puts e
184
+ exit 1
185
+ end
186
+
187
+ def check_validations_output(vals)
188
+ fail NoValidationError if @run_output.validations.length == 0
189
+ vals.each do |v|
190
+ fail ReportClassError unless v.validation_report.is_a? ValidationReport
191
+ end
192
+ rescue NoValidationError => e
193
+ $stderr.puts e
194
+ exit 1
195
+ rescue ReportClassError => e
196
+ $stderr.puts e
197
+ exit 1
198
+ end
199
+
200
+ def compute_scores
201
+ validations = @run_output.validations
202
+ scores = {}
203
+ scores[:successes] = validations.map { |v| v.result == v.expected }.count(true)
204
+ scores[:fails] = validations.map { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }.count(true)
205
+ scores = length_validation_scores(validations, scores)
206
+
207
+ @run_output.successes = scores[:successes]
208
+ @run_output.fails = scores[:fails]
209
+ total_query = scores[:successes].to_i + scores[:fails]
210
+ @run_output.overall_score = (scores[:successes] * 100 / total_query).round
211
+ end
212
+
213
+ # Since there are two length validations, it is necessary to adjust the
214
+ # scores accordingly
215
+ def length_validation_scores(validations, scores)
216
+ lcv = validations.select { |v| v.class == LengthClusterValidationOutput }
217
+ lrv = validations.select { |v| v.class == LengthRankValidationOutput }
218
+ if lcv.length == 1 && lrv.length == 1
219
+ score_lcv = (lcv[0].result == lcv[0].expected)
220
+ score_lrv = (lrv[0].result == lrv[0].expected)
221
+ if score_lcv == true && score_lrv == true
222
+ scores[:successes] -= 1 # if both are true: counted as 1 success
223
+ elsif score_lcv == false && score_lrv == false
224
+ scores[:fails] -= 1 # if both are false: counted as 1 fail
225
+ else
226
+ scores[:successes] -= 0.5
227
+ scores[:fails] -= 0.5
228
+ end
229
+ end
230
+ scores
231
+ end
232
+
233
+ def generate_run_output
234
+ @run_output.generate_html
235
+ @run_output.generate_json
236
+ @run_output.print_output_console
237
+ generate_run_overview
238
+ end
239
+
240
+ def generate_run_overview
241
+ vals = @run_output.validations
242
+ no_mafft = 0
243
+ no_internet = 0
244
+ errors = []
245
+ vals.each do |v|
246
+ unless v.errors.nil?
247
+ no_mafft += v.errors.select { |e| e == NoMafftInstallationError }.length
248
+ no_internet += v.errors.select { |e| e == NoInternetError }.length
249
+ end
250
+ errors.push(v.short_header) if v.validation == :error
251
+ end
252
+
253
+ no_evidence = vals.count { |v| v.result == :unapplicable || v.result == :warning } == vals.length
254
+ nee = (no_evidence) ? 1 : 0
255
+
256
+ good_scores = (@run_output.overall_score >= 75) ? 1 : 0
257
+ bad_scores = (@run_output.overall_score >= 75) ? 0 : 1
258
+
259
+ @mutex_array.synchronize do
260
+ @overview[:no_queries] += 1
261
+ @overview[:scores].push(@run_output.overall_score)
262
+ @overview[:good_scores] += good_scores
263
+ @overview[:bad_scores] += bad_scores
264
+ @overview[:nee] += nee
265
+ @overview[:no_mafft] += no_mafft
266
+ @overview[:no_internet] += no_internet
267
+ errors.each { |err| @overview[:map_errors][err] += 1 }
268
+
269
+ vals.each do |v|
270
+ next if v.run_time == 0 || v.run_time.nil?
271
+ next if v.validation == :unapplicable || v.validation == :error
272
+ p = Pair1.new(@overview[:run_time][v.short_header].x + v.run_time,
273
+ @overview[:run_time][v.short_header].y + 1)
274
+ @overview[:run_time][v.short_header] = p
275
+ end
276
+ end
277
+ end
278
+ end
279
+ end
@@ -1,5 +1,9 @@
1
- require 'genevalidator/validation_report'
1
+ require 'bio'
2
+ require 'forwardable'
3
+
2
4
  require 'genevalidator/exceptions'
5
+ require 'genevalidator/validation_report'
6
+ require 'genevalidator/validation_test'
3
7
 
4
8
  module GeneValidator
5
9
  ##
@@ -81,7 +85,8 @@ module GeneValidator
81
85
  # This class contains the methods necessary for
82
86
  # validations based on multiple alignment
83
87
  class AlignmentValidation < ValidationTest
84
- attr_reader :filename
88
+ extend Forwardable
89
+ def_delegators GeneValidator, :opt, :config
85
90
  attr_reader :multiple_alignment
86
91
  attr_reader :raw_seq_file
87
92
  attr_reader :index_file_name
@@ -90,32 +95,26 @@ module GeneValidator
90
95
  ##
91
96
  # Initilizes the object
92
97
  # Params:
93
- # +type+: type of the predicted sequence (:nucleotide or :protein)
94
98
  # +prediction+: a +Sequence+ object representing the blast query
95
99
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
96
- # +filename+: name of the fasta file
97
- # +mafft_path+: path of the MAFFT installation
98
- # +raw_seq_file+: name of the fasta file with raw sequences
99
- # +index_file_name+: name of the fasta index file
100
- # +raw_seq_file_load+: String - loaded content of the index file
101
- def initialize(type, prediction, hits, filename, raw_seq_file,
102
- index_file_name, raw_seq_file_load, db, num_threads)
100
+ # +plot_path+: name of the fasta file
101
+ def initialize(prediction, hits)
103
102
  super
104
- @short_header = 'MA'
105
- @header = 'Missing/Extra sequences'
103
+ @short_header = 'MissingExtraSequences'
104
+ @cli_name = 'align'
105
+ @header = 'Missing/Extra Sequences'
106
106
  @description = 'Finds missing and extra sequences in the' \
107
107
  ' prediction, based on the multiple alignment of' \
108
108
  ' the best hits. Also counts the percentage of' \
109
109
  ' the conserved regions that appear in the' \
110
110
  ' prediction.'
111
- @filename = filename
112
- @raw_seq_file = raw_seq_file
113
- @index_file_name = index_file_name
114
- @raw_seq_file_load = raw_seq_file_load
115
- @db = db
111
+ @raw_seq_file = opt[:raw_sequences]
112
+ @index_file_name = config[:raw_seq_file_index]
113
+ @raw_seq_file_load = config[:raw_seq_file_load]
114
+ @db = opt[:db]
116
115
  @multiple_alignment = []
117
- @cli_name = 'align'
118
- @num_threads = num_threads
116
+ @num_threads = opt[:num_threads]
117
+ @type = config[:type]
119
118
  end
120
119
 
121
120
  ##
@@ -189,39 +188,32 @@ module GeneValidator
189
188
  @description, gaps,
190
189
  extra_seq, consensus)
191
190
  @validation_report.plot_files.push(plot1)
192
- @validation_report.running_time = Time.now - start
191
+ @validation_report.run_time = Time.now - start
193
192
  @validation_report
194
193
 
195
194
  rescue NotEnoughHitsError
196
195
  @validation_report = ValidationReport.new('Not enough evidence',
197
196
  :warning, @short_header,
198
- @header, @description,
199
- @approach, @explanation,
200
- @conclusion)
197
+ @header, @description)
201
198
  rescue NoMafftInstallationError
202
199
  @validation_report = ValidationReport.new('Mafft error', :error,
203
200
  @short_header, @header,
204
- @description, @approach,
205
- @explanation, @conclusion)
201
+ @description)
206
202
  @validation_report.errors.push NoMafftInstallationError
207
203
  rescue NoInternetError
208
204
  @validation_report = ValidationReport.new('Internet error', :error,
209
205
  @short_header, @header,
210
- @description, @approach,
211
- @explanation, @conclusion)
206
+ @description)
212
207
  @validation_report.errors.push NoInternetError
213
208
  rescue ReadingFrameError
214
209
  @validation_report = ValidationReport.new('Multiple reading frames',
215
210
  :error, @short_header,
216
- @header, @description,
217
- @approach, @explanation,
218
- @conclusion)
211
+ @header, @description)
219
212
  @validation_report.errors.push 'Multiple reading frames Error'
220
213
  rescue Exception
221
214
  @validation_report = ValidationReport.new('Unexpected error', :error,
222
215
  @short_header, @header,
223
- @description, @approach,
224
- @explanation, @conclusion)
216
+ @description)
225
217
  @validation_report.errors.push 'Unexpected Error'
226
218
  end
227
219
 
@@ -417,9 +409,9 @@ module GeneValidator
417
409
  # lines for multiple hits alignment, prediction and statistical model
418
410
  # Params:
419
411
  # +freq+: +String+ residue frequency from the statistical model
420
- # +output+: filename of the json file
412
+ # +output+: plot_path of the json file
421
413
  # +ma+: +String+ array with the multiple alignmened hits and prediction
422
- def plot_alignment(freq, output = "#{@filename}_ma.json", ma = @multiple_alignment)
414
+ def plot_alignment(freq, ma = @multiple_alignment)
423
415
  # get indeces of consensus in the multiple alignment
424
416
  consensus = get_consensus(@multiple_alignment[0..@multiple_alignment.length - 2])
425
417
  consensus_idxs = consensus.split(//).each_index.select { |j| isalpha(consensus[j]) }
@@ -438,32 +430,24 @@ module GeneValidator
438
430
 
439
431
  len = ma[0].length
440
432
 
441
- f = File.open(output, 'w')
442
- f.write((
443
433
  # plot statistical model
444
- freq.each_with_index.map { |f, j| { 'y' => ma.length, 'start' => j, 'stop' => j + 1, 'color' => 'orange', 'height' => f } } +
434
+ data = freq.each_with_index.map { |h, j| { 'y' => ma.length, 'start' => j, 'stop' => j + 1, 'color' => 'orange', 'height' => h } } +
445
435
  # hits
446
436
  match_alignment_ranges.each_with_index.map { |ranges, j| ranges.map { |range| { 'y' => ma.length - j - 1, 'start' => range.first, 'stop' => range.last, 'color' => 'red', 'height' => -1 } } }.flatten +
447
- ma[0..ma.length - 2].each_with_index.map { |_seq, j|
448
- consensus_ranges.map { |range| { 'y' => j + 1, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }
449
- }.flatten +
437
+ ma[0..ma.length - 2].each_with_index.map { |_seq, j| consensus_ranges.map { |range| { 'y' => j + 1, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } } }.flatten +
450
438
  # plot prediction
451
439
  [{ 'y' => 0, 'start' => 0, 'stop' => len, 'color' => 'gray', 'height' => -1 }] +
452
440
  query_alignment_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'red', 'height' => -1 } }.flatten +
453
441
 
454
442
  # plot consensus
455
- consensus_all_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }.flatten).to_json)
456
-
457
- f.close
443
+ consensus_all_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }.flatten
458
444
 
459
445
  yAxisValues = 'Prediction'
460
- (1..ma.length - 1).each do |i|
461
- yAxisValues << ", hit&nbsp;#{i}"
462
- end
446
+ (1..ma.length - 1).each { |i| yAxisValues << ", hit #{i}" }
463
447
 
464
448
  yAxisValues << ', Statistical Model'
465
449
 
466
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
450
+ Plot.new(data,
467
451
  :align,
468
452
  'Missing/Extra sequences Validation: Multiple Align. & Statistical model of hits',
469
453
  'Conserved Region, Yellow',