genevalidator 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,5 +1,7 @@
1
1
  require 'net/http'
2
2
  require 'io/console'
3
+ require 'yaml'
4
+
3
5
  module GeneValidator
4
6
  # This is a class for the storing data on each sequence
5
7
  class Sequence
@@ -65,7 +67,7 @@ module GeneValidator
65
67
  end
66
68
  @raw_sequence = seq
67
69
  else
68
- # puts "Tries to connect to the internet for #{accno}"
70
+ $stderr.puts "Getting sequence for '#{accno}' from NCBI - avoid this with '-r'."
69
71
  uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?'\
70
72
  "db=#{dbtype}&retmax=1&usehistory=y&term=#{accno}/"
71
73
  result = Net::HTTP.get(URI.parse(uri))
@@ -85,8 +87,6 @@ module GeneValidator
85
87
  @raw_sequence = '' unless @raw_sequence.index(/ERROR/).nil?
86
88
  end
87
89
  @raw_sequence
88
- # rescue Exception => error
89
- # @raw_sequence = ""
90
90
  end
91
91
 
92
92
  ##
@@ -1,5 +1,9 @@
1
- require 'genevalidator/exceptions'
2
1
  require 'csv'
2
+ require 'forwardable'
3
+
4
+ require 'genevalidator/sequences'
5
+ require 'genevalidator/hsp'
6
+ require 'genevalidator/exceptions'
3
7
 
4
8
  #
5
9
  module GeneValidator
@@ -8,6 +12,9 @@ module GeneValidator
8
12
  ##
9
13
  # This class parses the tabular output of BLAST (outfmt 6 & 7)
10
14
  class TabularParser
15
+ extend Forwardable
16
+ def_delegators GeneValidator, :opt, :config
17
+
11
18
  attr_reader :rows
12
19
  attr_reader :tab_results
13
20
  attr_reader :column_names
@@ -15,28 +22,25 @@ module GeneValidator
15
22
 
16
23
  ##
17
24
  # Initializes the object
18
- # +file_content+ : String with the tabular BLAST output
19
- # +format+: format of the tabular output (comma/space delimited string)
20
- # +type+: :nucleotide or :mrna
21
- def initialize(filename, format, type)
25
+ def initialize(tab_file = opt[:blast_tabular_file],
26
+ format = opt[:blast_tabular_options], type = config[:type])
22
27
  @column_names = format.gsub(/[-\d]/, '').split(/[ ,]/)
23
- @tab_results = analayse_tabular_file(filename)
24
- @rows = @tab_results.to_enum
25
28
  @type = type
29
+ @tab_results = analayse_tabular_file(tab_file)
30
+ @rows = @tab_results.to_enum
26
31
  end
27
32
 
28
33
  ##
29
34
  #
30
35
  def analayse_tabular_file(filename)
31
- tab_results = []
32
- file = File.read(filename)
33
- lines = CSV.parse(file, col_sep: "\t",
34
- skip_lines: /^#/,
35
- headers: @column_names)
36
+ results = []
37
+ file = File.read(filename)
38
+ lines = CSV.parse(file, col_sep: "\t", skip_lines: /^#/,
39
+ headers: @column_names)
36
40
  lines.each do |line|
37
- tab_results << line.to_hash
41
+ results << line.to_hash
38
42
  end
39
- tab_results
43
+ results
40
44
  end
41
45
 
42
46
  ##
@@ -58,17 +62,19 @@ module GeneValidator
58
62
  def parse_next(query_id = nil)
59
63
  current_id = @rows.peek['qseqid']
60
64
  return [] if !query_id.nil? && current_id != query_id
61
- hits = @tab_results.partition { |h| h['qseqid'] == current_id }[0]
62
- hit_seq = initialise_classes(hits)
65
+ hit_seq = initialise_classes(current_id)
63
66
  move_to_next_query
64
67
  hit_seq
65
68
  rescue StopIteration
66
69
  return []
67
70
  end
68
71
 
72
+ private
73
+
69
74
  ##
70
75
  #
71
- def initialise_classes(hits)
76
+ def initialise_classes(current_id, tab_results = @tab_results)
77
+ hits = tab_results.partition { |h| h['qseqid'] == current_id }[0]
72
78
  hit_list = []
73
79
  grouped_hits = hits.group_by { |row| row['sseqid'] }
74
80
 
@@ -90,7 +96,7 @@ module GeneValidator
90
96
  hsps = hits.select { |row| row['sseqid'] == current_query_id }
91
97
  hsps.each do |row|
92
98
  hsp = Hsp.new
93
- hsp.init_tabular_attribute(row, type)
99
+ hsp.init_tabular_attribute(row)
94
100
  hit_seq.hsp_list.push(hsp)
95
101
  end
96
102
  end
@@ -0,0 +1,279 @@
1
+ require 'forwardable'
2
+
3
+ require 'genevalidator/blast'
4
+ require 'genevalidator/exceptions'
5
+ require 'genevalidator/output'
6
+ require 'genevalidator/pool'
7
+ require 'genevalidator/sequences'
8
+ require 'genevalidator/validation_length_cluster'
9
+ require 'genevalidator/validation_length_rank'
10
+ require 'genevalidator/validation_blast_reading_frame'
11
+ require 'genevalidator/validation_gene_merge'
12
+ require 'genevalidator/validation_duplication'
13
+ require 'genevalidator/validation_open_reading_frame'
14
+ require 'genevalidator/validation_alignment'
15
+
16
+ # Top level module / namespace.
17
+ module GeneValidator
18
+ Pair1 = Struct.new(:x, :y)
19
+
20
+ # Class that initalises separate Validate.new() instances for each query.
21
+ class Validations
22
+ extend Forwardable
23
+ def_delegators GeneValidator, :opt, :config, :query_idx
24
+ def initialize
25
+ @opt = opt
26
+ @config = config
27
+ @query_idx = query_idx
28
+ end
29
+
30
+ ##
31
+ #
32
+ def run_validations(iterator)
33
+ p = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
34
+
35
+ while @config[:idx] + 1 < @query_idx.length
36
+ prediction = get_info_on_query_sequence
37
+ @config[:idx] += 1
38
+
39
+ blast_hits = parse_next_iteration(iterator, prediction)
40
+
41
+ if blast_hits.nil?
42
+ @config[:idx] -= 1
43
+ break
44
+ end
45
+
46
+ if @opt[:num_threads] == 1
47
+ (Validate.new).validate(prediction, blast_hits, @config[:idx])
48
+ else
49
+ p.schedule(prediction, blast_hits, @config[:idx]) do |pred, hits, idx|
50
+ (Validate.new).validate(pred, hits, idx)
51
+ end
52
+ end
53
+ end
54
+ ensure
55
+ p.shutdown if @opt[:num_threads] > 1
56
+ end
57
+
58
+ ##
59
+ # get info about the query
60
+ def get_info_on_query_sequence(input_file = @opt[:input_fasta_file],
61
+ seq_type = @config[:type])
62
+ start_offset = @query_idx[@config[:idx] + 1] - @query_idx[@config[:idx]]
63
+ end_offset = @query_idx[@config[:idx]]
64
+ query = IO.binread(input_file, start_offset, end_offset)
65
+ parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
66
+
67
+ prediction = Sequence.new
68
+ prediction.definition = parse_query[0].gsub("\n", '')
69
+ prediction.identifier = prediction.definition.gsub(/ .*/, '')
70
+ prediction.type = seq_type
71
+ prediction.raw_sequence = parse_query[1].gsub("\n", '')
72
+ prediction.length_protein = prediction.raw_sequence.length
73
+ prediction.length_protein /= 3 if seq_type == :nucleotide
74
+ prediction
75
+ end
76
+
77
+ def parse_next_iteration(iterator, prediction)
78
+ iterator.next if @config[:idx] < @config[:start_idx]
79
+ if @opt[:blast_xml_file]
80
+ BlastUtils.parse_next(iterator)
81
+ elsif @opt[:blast_tabular_file]
82
+ iterator.parse_next(prediction.identifier)
83
+ end
84
+ end
85
+ end
86
+
87
+ # Class that runs the validations (Instatiated for each query)
88
+ class Validate
89
+ extend Forwardable
90
+ def_delegators GeneValidator, :opt, :config, :mutex_array, :overview
91
+
92
+ ##
93
+ # Initilizes the object
94
+ # Params:
95
+ # +opt+: A hash with the following keys: validations:, blast_tabular_file:,
96
+ # blast_tabular_options:, blast_xml_file:, db:, raw_sequences:,
97
+ # num_threads:, fast:}
98
+ # +start_idx+: number of the sequence from the file to start with
99
+ # +overall_evaluation+: boolean variable for printing overall evaluation
100
+ def initialize
101
+ @opt = opt
102
+ @config = config
103
+ @mutex_array = mutex_array
104
+ @run_output = nil
105
+
106
+ @overview = overview
107
+ end
108
+
109
+ ##
110
+ # Validate one query and create validation report
111
+ # Params:
112
+ # +prediction+: Sequence object
113
+ # +hits+: Array of +Sequence+ objects
114
+ # +current_idx+: the index number of the query
115
+ def validate(prediction, hits, current_idx)
116
+ hits = remove_identical_hits(prediction, hits)
117
+ vals = create_validation_tests(prediction, hits)
118
+ check_validations(vals)
119
+ vals.each(&:run)
120
+ @run_output = Output.new(current_idx, hits.length, prediction.definition)
121
+ @run_output.validations = vals.map(&:validation_report)
122
+ check_validations_output(vals)
123
+
124
+ compute_scores
125
+ generate_run_output
126
+ end
127
+
128
+ ##
129
+ # Removes identical hits (100% coverage and >99% identity)
130
+ # Params:
131
+ # +prediction+: Sequence object
132
+ # +hits+: Array of +Sequence+ objects
133
+ # Output:
134
+ # new array of hit +Sequence+ objects
135
+ def remove_identical_hits(prediction, hits)
136
+ identical_hits = []
137
+ hits.each do |hit|
138
+ low_identity = hit.hsp_list.select { |hsp| hsp.pidentity < 99 }
139
+ no_data = hit.hsp_list.select { |hsp| hsp.pidentity.nil? }
140
+ low_identity += no_data
141
+ # check the coverage
142
+ coverage = Array.new(prediction.length_protein, 0)
143
+ hit.hsp_list.each do |hsp|
144
+ match_to = hsp.match_query_to
145
+ match_from = hsp.match_query_from
146
+ len = match_to - match_from + 1
147
+ coverage[match_from - 1..match_to - 1] = Array.new(len, 1)
148
+ end
149
+
150
+ if low_identity.length == 0 && coverage.uniq.length == 1
151
+ identical_hits.push(hit)
152
+ end
153
+ end
154
+
155
+ identical_hits.each { |hit| hits.delete(hit) }
156
+ hits
157
+ end
158
+
159
+ def create_validation_tests(prediction, hits)
160
+ val = []
161
+ val.push LengthClusterValidation.new(prediction, hits)
162
+ val.push LengthRankValidation.new(prediction, hits)
163
+ val.push GeneMergeValidation.new(prediction, hits)
164
+ val.push DuplicationValidation.new(prediction, hits)
165
+ if @config[:type] == :nucleotide
166
+ val.push BlastReadingFrameValidation.new(prediction, hits)
167
+ val.push OpenReadingFrameValidation.new(prediction, hits)
168
+ end
169
+ val.push AlignmentValidation.new(prediction, hits)
170
+ val.select { |v| @opt[:validations].include? v.cli_name.downcase }
171
+ end
172
+
173
+ def check_validations(vals)
174
+ # check the class type of the elements in the list
175
+ vals.each { |v| fail ValidationClassError unless v.is_a? ValidationTest }
176
+ # check alias duplication
177
+ aliases = vals.map(&:cli_name)
178
+ fail AliasDuplicationError unless aliases.length == aliases.uniq.length
179
+ rescue ValidationClassError => e
180
+ $stderr.puts e
181
+ exit 1
182
+ rescue AliasDuplicationError => e
183
+ $stderr.puts e
184
+ exit 1
185
+ end
186
+
187
+ def check_validations_output(vals)
188
+ fail NoValidationError if @run_output.validations.length == 0
189
+ vals.each do |v|
190
+ fail ReportClassError unless v.validation_report.is_a? ValidationReport
191
+ end
192
+ rescue NoValidationError => e
193
+ $stderr.puts e
194
+ exit 1
195
+ rescue ReportClassError => e
196
+ $stderr.puts e
197
+ exit 1
198
+ end
199
+
200
+ def compute_scores
201
+ validations = @run_output.validations
202
+ scores = {}
203
+ scores[:successes] = validations.map { |v| v.result == v.expected }.count(true)
204
+ scores[:fails] = validations.map { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }.count(true)
205
+ scores = length_validation_scores(validations, scores)
206
+
207
+ @run_output.successes = scores[:successes]
208
+ @run_output.fails = scores[:fails]
209
+ total_query = scores[:successes].to_i + scores[:fails]
210
+ @run_output.overall_score = (scores[:successes] * 100 / total_query).round
211
+ end
212
+
213
+ # Since there are two length validations, it is necessary to adjust the
214
+ # scores accordingly
215
+ def length_validation_scores(validations, scores)
216
+ lcv = validations.select { |v| v.class == LengthClusterValidationOutput }
217
+ lrv = validations.select { |v| v.class == LengthRankValidationOutput }
218
+ if lcv.length == 1 && lrv.length == 1
219
+ score_lcv = (lcv[0].result == lcv[0].expected)
220
+ score_lrv = (lrv[0].result == lrv[0].expected)
221
+ if score_lcv == true && score_lrv == true
222
+ scores[:successes] -= 1 # if both are true: counted as 1 success
223
+ elsif score_lcv == false && score_lrv == false
224
+ scores[:fails] -= 1 # if both are false: counted as 1 fail
225
+ else
226
+ scores[:successes] -= 0.5
227
+ scores[:fails] -= 0.5
228
+ end
229
+ end
230
+ scores
231
+ end
232
+
233
+ def generate_run_output
234
+ @run_output.generate_html
235
+ @run_output.generate_json
236
+ @run_output.print_output_console
237
+ generate_run_overview
238
+ end
239
+
240
+ def generate_run_overview
241
+ vals = @run_output.validations
242
+ no_mafft = 0
243
+ no_internet = 0
244
+ errors = []
245
+ vals.each do |v|
246
+ unless v.errors.nil?
247
+ no_mafft += v.errors.select { |e| e == NoMafftInstallationError }.length
248
+ no_internet += v.errors.select { |e| e == NoInternetError }.length
249
+ end
250
+ errors.push(v.short_header) if v.validation == :error
251
+ end
252
+
253
+ no_evidence = vals.count { |v| v.result == :unapplicable || v.result == :warning } == vals.length
254
+ nee = (no_evidence) ? 1 : 0
255
+
256
+ good_scores = (@run_output.overall_score >= 75) ? 1 : 0
257
+ bad_scores = (@run_output.overall_score >= 75) ? 0 : 1
258
+
259
+ @mutex_array.synchronize do
260
+ @overview[:no_queries] += 1
261
+ @overview[:scores].push(@run_output.overall_score)
262
+ @overview[:good_scores] += good_scores
263
+ @overview[:bad_scores] += bad_scores
264
+ @overview[:nee] += nee
265
+ @overview[:no_mafft] += no_mafft
266
+ @overview[:no_internet] += no_internet
267
+ errors.each { |err| @overview[:map_errors][err] += 1 }
268
+
269
+ vals.each do |v|
270
+ next if v.run_time == 0 || v.run_time.nil?
271
+ next if v.validation == :unapplicable || v.validation == :error
272
+ p = Pair1.new(@overview[:run_time][v.short_header].x + v.run_time,
273
+ @overview[:run_time][v.short_header].y + 1)
274
+ @overview[:run_time][v.short_header] = p
275
+ end
276
+ end
277
+ end
278
+ end
279
+ end
@@ -1,5 +1,9 @@
1
- require 'genevalidator/validation_report'
1
+ require 'bio'
2
+ require 'forwardable'
3
+
2
4
  require 'genevalidator/exceptions'
5
+ require 'genevalidator/validation_report'
6
+ require 'genevalidator/validation_test'
3
7
 
4
8
  module GeneValidator
5
9
  ##
@@ -81,7 +85,8 @@ module GeneValidator
81
85
  # This class contains the methods necessary for
82
86
  # validations based on multiple alignment
83
87
  class AlignmentValidation < ValidationTest
84
- attr_reader :filename
88
+ extend Forwardable
89
+ def_delegators GeneValidator, :opt, :config
85
90
  attr_reader :multiple_alignment
86
91
  attr_reader :raw_seq_file
87
92
  attr_reader :index_file_name
@@ -90,32 +95,26 @@ module GeneValidator
90
95
  ##
91
96
  # Initilizes the object
92
97
  # Params:
93
- # +type+: type of the predicted sequence (:nucleotide or :protein)
94
98
  # +prediction+: a +Sequence+ object representing the blast query
95
99
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
96
- # +filename+: name of the fasta file
97
- # +mafft_path+: path of the MAFFT installation
98
- # +raw_seq_file+: name of the fasta file with raw sequences
99
- # +index_file_name+: name of the fasta index file
100
- # +raw_seq_file_load+: String - loaded content of the index file
101
- def initialize(type, prediction, hits, filename, raw_seq_file,
102
- index_file_name, raw_seq_file_load, db, num_threads)
100
+ # +plot_path+: name of the fasta file
101
+ def initialize(prediction, hits)
103
102
  super
104
- @short_header = 'MA'
105
- @header = 'Missing/Extra sequences'
103
+ @short_header = 'MissingExtraSequences'
104
+ @cli_name = 'align'
105
+ @header = 'Missing/Extra Sequences'
106
106
  @description = 'Finds missing and extra sequences in the' \
107
107
  ' prediction, based on the multiple alignment of' \
108
108
  ' the best hits. Also counts the percentage of' \
109
109
  ' the conserved regions that appear in the' \
110
110
  ' prediction.'
111
- @filename = filename
112
- @raw_seq_file = raw_seq_file
113
- @index_file_name = index_file_name
114
- @raw_seq_file_load = raw_seq_file_load
115
- @db = db
111
+ @raw_seq_file = opt[:raw_sequences]
112
+ @index_file_name = config[:raw_seq_file_index]
113
+ @raw_seq_file_load = config[:raw_seq_file_load]
114
+ @db = opt[:db]
116
115
  @multiple_alignment = []
117
- @cli_name = 'align'
118
- @num_threads = num_threads
116
+ @num_threads = opt[:num_threads]
117
+ @type = config[:type]
119
118
  end
120
119
 
121
120
  ##
@@ -189,39 +188,32 @@ module GeneValidator
189
188
  @description, gaps,
190
189
  extra_seq, consensus)
191
190
  @validation_report.plot_files.push(plot1)
192
- @validation_report.running_time = Time.now - start
191
+ @validation_report.run_time = Time.now - start
193
192
  @validation_report
194
193
 
195
194
  rescue NotEnoughHitsError
196
195
  @validation_report = ValidationReport.new('Not enough evidence',
197
196
  :warning, @short_header,
198
- @header, @description,
199
- @approach, @explanation,
200
- @conclusion)
197
+ @header, @description)
201
198
  rescue NoMafftInstallationError
202
199
  @validation_report = ValidationReport.new('Mafft error', :error,
203
200
  @short_header, @header,
204
- @description, @approach,
205
- @explanation, @conclusion)
201
+ @description)
206
202
  @validation_report.errors.push NoMafftInstallationError
207
203
  rescue NoInternetError
208
204
  @validation_report = ValidationReport.new('Internet error', :error,
209
205
  @short_header, @header,
210
- @description, @approach,
211
- @explanation, @conclusion)
206
+ @description)
212
207
  @validation_report.errors.push NoInternetError
213
208
  rescue ReadingFrameError
214
209
  @validation_report = ValidationReport.new('Multiple reading frames',
215
210
  :error, @short_header,
216
- @header, @description,
217
- @approach, @explanation,
218
- @conclusion)
211
+ @header, @description)
219
212
  @validation_report.errors.push 'Multiple reading frames Error'
220
213
  rescue Exception
221
214
  @validation_report = ValidationReport.new('Unexpected error', :error,
222
215
  @short_header, @header,
223
- @description, @approach,
224
- @explanation, @conclusion)
216
+ @description)
225
217
  @validation_report.errors.push 'Unexpected Error'
226
218
  end
227
219
 
@@ -417,9 +409,9 @@ module GeneValidator
417
409
  # lines for multiple hits alignment, prediction and statistical model
418
410
  # Params:
419
411
  # +freq+: +String+ residue frequency from the statistical model
420
- # +output+: filename of the json file
412
+ # +output+: plot_path of the json file
421
413
  # +ma+: +String+ array with the multiple alignmened hits and prediction
422
- def plot_alignment(freq, output = "#{@filename}_ma.json", ma = @multiple_alignment)
414
+ def plot_alignment(freq, ma = @multiple_alignment)
423
415
  # get indeces of consensus in the multiple alignment
424
416
  consensus = get_consensus(@multiple_alignment[0..@multiple_alignment.length - 2])
425
417
  consensus_idxs = consensus.split(//).each_index.select { |j| isalpha(consensus[j]) }
@@ -438,32 +430,24 @@ module GeneValidator
438
430
 
439
431
  len = ma[0].length
440
432
 
441
- f = File.open(output, 'w')
442
- f.write((
443
433
  # plot statistical model
444
- freq.each_with_index.map { |f, j| { 'y' => ma.length, 'start' => j, 'stop' => j + 1, 'color' => 'orange', 'height' => f } } +
434
+ data = freq.each_with_index.map { |h, j| { 'y' => ma.length, 'start' => j, 'stop' => j + 1, 'color' => 'orange', 'height' => h } } +
445
435
  # hits
446
436
  match_alignment_ranges.each_with_index.map { |ranges, j| ranges.map { |range| { 'y' => ma.length - j - 1, 'start' => range.first, 'stop' => range.last, 'color' => 'red', 'height' => -1 } } }.flatten +
447
- ma[0..ma.length - 2].each_with_index.map { |_seq, j|
448
- consensus_ranges.map { |range| { 'y' => j + 1, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }
449
- }.flatten +
437
+ ma[0..ma.length - 2].each_with_index.map { |_seq, j| consensus_ranges.map { |range| { 'y' => j + 1, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } } }.flatten +
450
438
  # plot prediction
451
439
  [{ 'y' => 0, 'start' => 0, 'stop' => len, 'color' => 'gray', 'height' => -1 }] +
452
440
  query_alignment_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'red', 'height' => -1 } }.flatten +
453
441
 
454
442
  # plot consensus
455
- consensus_all_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }.flatten).to_json)
456
-
457
- f.close
443
+ consensus_all_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }.flatten
458
444
 
459
445
  yAxisValues = 'Prediction'
460
- (1..ma.length - 1).each do |i|
461
- yAxisValues << ", hit&nbsp;#{i}"
462
- end
446
+ (1..ma.length - 1).each { |i| yAxisValues << ", hit #{i}" }
463
447
 
464
448
  yAxisValues << ', Statistical Model'
465
449
 
466
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
450
+ Plot.new(data,
467
451
  :align,
468
452
  'Missing/Extra sequences Validation: Multiple Align. & Statistical model of hits',
469
453
  'Conserved Region, Yellow',