genevalidator 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,24 +1,29 @@
1
+ require 'forwardable'
2
+
3
+ require 'genevalidator/exceptions'
1
4
  require 'genevalidator/validation_report'
5
+ require 'genevalidator/validation_test'
6
+
2
7
  module GeneValidator
3
8
  ##
4
9
  # Class that stores the validation output information
5
10
  class BlastRFValidationOutput < ValidationReport
6
- attr_reader :frames_histo
11
+ attr_reader :frames
7
12
  attr_reader :msg
8
13
  attr_reader :total_hsp
9
14
  attr_reader :result
10
15
 
11
- def initialize(short_header, header, description, frames_histo,
16
+ def initialize(short_header, header, description, frames,
12
17
  expected = :yes)
13
18
  @short_header, @header, @description = short_header, header, description
14
- @frames_histo = frames_histo
19
+ @frames = frames
15
20
  @expected = expected
16
21
  @result = validation
17
22
 
18
23
  @msg = ''
19
24
  @exp_msg = ''
20
25
  @total_hsp = 0
21
- @frames_histo.each do |x, y|
26
+ @frames.each do |x, y|
22
27
  @msg << "#{y}&nbsp;HSPs&nbsp;align&nbsp;in&nbsp;frame&nbsp;#{x}; "
23
28
  @exp_msg << "#{y} HSPs align in frame #{x}; "
24
29
  @total_hsp += y.to_i
@@ -36,7 +41,7 @@ module GeneValidator
36
41
  t = "BLAST identified #{@total_hsp} High-scoring Segment Pairs" \
37
42
  ' (HSPs)'
38
43
  if @result == :yes # i.e. if there is only one ORF...
39
- frame = @frames_histo.keys[0].to_s
44
+ frame = @frames.keys[0].to_s
40
45
  t1 = "; all of these align in frame #{frame}."
41
46
  else
42
47
  t1 = ": #{@exp_msg.gsub(/; $/, '')}."
@@ -61,7 +66,7 @@ module GeneValidator
61
66
  # chack if there are different reading frames
62
67
  count_p = 0
63
68
  count_n = 0
64
- frames_histo.each do |x, _y|
69
+ frames.each do |x, _y|
65
70
  count_p += 1 if x > 0
66
71
  count_n += 1 if x < 0
67
72
  end
@@ -75,7 +80,7 @@ module GeneValidator
75
80
  class BlastReadingFrameValidation < ValidationTest
76
81
  def initialize(type, prediction, hits = nil)
77
82
  super
78
- @short_header = 'Frame'
83
+ @short_header = 'ReadingFrame'
79
84
  @header = 'Reading Frame'
80
85
  @description = 'Check whether there is a single reading frame among' \
81
86
  ' BLAST hits. Otherwise there might be a reading frame' \
@@ -101,29 +106,25 @@ module GeneValidator
101
106
  start = Time.now
102
107
 
103
108
  rfs = lst.map { |x| x.hsp_list.map(&:query_reading_frame) }.flatten
104
- frames_histo = Hash[rfs.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
109
+ frames = Hash[rfs.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
105
110
 
106
111
  # get the main reading frame
107
- main_rf = frames_histo.map { |_k, v| v }.max
108
- @prediction.nucleotide_rf = frames_histo.select { |_k, v| v == main_rf }.first.first
112
+ main_rf = frames.map { |_k, v| v }.max
113
+ @prediction.nucleotide_rf = frames.select { |_k, v| v == main_rf }.first.first
109
114
 
110
115
  @validation_report = BlastRFValidationOutput.new(@short_header, @header,
111
- @description,
112
- frames_histo)
113
- @validation_report.running_time = Time.now - start
116
+ @description, frames)
117
+ @validation_report.run_time = Time.now - start
114
118
  @validation_report
115
119
 
116
120
  rescue NotEnoughHitsError
117
121
  @validation_report = ValidationReport.new('Not enough evidence',
118
122
  :warning, @short_header,
119
- @header, @description,
120
- @approach, @explanation,
121
- @conclusion)
123
+ @header, @description)
122
124
  rescue Exception
123
125
  @validation_report = ValidationReport.new('Unexpected error', :error,
124
126
  @short_header, @header,
125
- @description, @approach,
126
- @explanation, @conclusion)
127
+ @description)
127
128
  @validation_report.errors.push 'Unexpected Error'
128
129
  end
129
130
  end
@@ -1,6 +1,12 @@
1
- require 'genevalidator/validation_report'
1
+ require 'bio'
2
+ require 'forwardable'
3
+ require 'statsample'
4
+
2
5
  require 'genevalidator/exceptions'
3
6
  require 'genevalidator/ext/array'
7
+ require 'genevalidator/validation_report'
8
+ require 'genevalidator/validation_test'
9
+
4
10
  module GeneValidator
5
11
  ##
6
12
  # Class that stores the validation output information
@@ -71,12 +77,14 @@ module GeneValidator
71
77
  # This class contains the methods necessary for
72
78
  # finding duplicated subsequences in the predicted gene
73
79
  class DuplicationValidation < ValidationTest
80
+ extend Forwardable
81
+ def_delegators GeneValidator, :opt, :config
82
+
74
83
  attr_reader :raw_seq_file
75
84
  attr_reader :index_file_name
76
85
  attr_reader :raw_seq_file_load
77
86
 
78
- def initialize(type, prediction, hits, raw_seq_file, index_file_name,
79
- raw_seq_file_load, db, num_threads)
87
+ def initialize(prediction, hits)
80
88
  super
81
89
  @short_header = 'Duplication'
82
90
  @header = 'Duplication'
@@ -84,11 +92,12 @@ module GeneValidator
84
92
  ' in the predicted gene by counting the hsp' \
85
93
  ' residue coverage of the prediction, for each hit.'
86
94
  @cli_name = 'dup'
87
- @raw_seq_file = raw_seq_file
88
- @index_file_name = index_file_name
89
- @raw_seq_file_load = raw_seq_file_load
90
- @db = db
91
- @num_threads = num_threads
95
+ @raw_seq_file = opt[:raw_sequences]
96
+ @index_file_name = config[:raw_seq_file_index]
97
+ @raw_seq_file_load = config[:raw_seq_file_load]
98
+ @db = opt[:db]
99
+ @num_threads = opt[:num_threads]
100
+ @type = config[:type]
92
101
  end
93
102
 
94
103
  def in_range?(ranges, idx)
@@ -214,7 +223,7 @@ module GeneValidator
214
223
  @header,
215
224
  @description, 1,
216
225
  averages)
217
- @validation_report.running_time = Time.now - start
226
+ @validation_report.run_time = Time.now - start
218
227
  return @validation_report
219
228
  end
220
229
 
@@ -224,31 +233,27 @@ module GeneValidator
224
233
  @header,
225
234
  @description, pval,
226
235
  averages)
227
- @running_time = Time.now - start
236
+ @run_time = Time.now - start
228
237
  @validation_report
229
238
 
230
239
  rescue NotEnoughHitsError
231
240
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
232
241
  @short_header, @header,
233
- @description, @explanation,
234
- @conclusion)
242
+ @description)
235
243
  rescue NoMafftInstallationError
236
244
  @validation_report = ValidationReport.new('Mafft error', :error,
237
245
  @short_header, @header,
238
- @description, @explanation,
239
- @conclusion)
246
+ @description)
240
247
  @validation_report.errors.push NoMafftInstallationError
241
248
  rescue NoInternetError
242
249
  @validation_report = ValidationReport.new('Internet error', :error,
243
250
  @short_header, @header,
244
- @description, @explanation,
245
- @conclusion)
251
+ @description)
246
252
  @validation_report.errors.push NoInternetError
247
253
  rescue Exception
248
254
  @validation_report = ValidationReport.new('Unexpected error', :error,
249
255
  @short_header, @header,
250
- @description, @explanation,
251
- @conclusion)
256
+ @description)
252
257
  @validation_report.errors.push 'Unexpected Error'
253
258
  end
254
259
 
@@ -256,7 +261,6 @@ module GeneValidator
256
261
  # wilcox test implementation from statsample ruby gem
257
262
  # many thanks to Claudio for helping us with the implementation!
258
263
  def wilcox_test(averages)
259
- require 'statsample'
260
264
  wilcox = Statsample::Test.wilcoxon_signed_rank(averages.to_scale,
261
265
  Array.new(averages.length,
262
266
  1).to_scale)
@@ -1,6 +1,11 @@
1
- require 'json'
2
- require 'genevalidator/validation_report'
1
+ require 'forwardable'
2
+ require 'statsample'
3
+
4
+ require 'genevalidator/exceptions'
3
5
  require 'genevalidator/ext/array'
6
+ require 'genevalidator/validation_report'
7
+ require 'genevalidator/validation_test'
8
+
4
9
  module GeneValidator
5
10
  ##
6
11
  # Class that stores the validation output information
@@ -83,26 +88,23 @@ module GeneValidator
83
88
  # checking whether there is evidence that the
84
89
  # prediction is a merge of multiple genes
85
90
  class GeneMergeValidation < ValidationTest
86
- attr_reader :hits
87
91
  attr_reader :prediction
88
- attr_reader :filename
92
+ attr_reader :hits
89
93
 
90
94
  ##
91
95
  # Initilizes the object
92
96
  # Params:
93
- # +type+: type of the predicted sequence (:nucleotide or :protein)
94
97
  # +prediction+: a +Sequence+ object representing the blast query
95
98
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
96
- # +filename+: name of the input file, used when generatig the plot files
99
+ # +plot_path+: name of the input file, used when generatig the plot files
97
100
  # +boundary+: the offset of the hit from which we start analysing the hit
98
- def initialize(type, prediction, hits, filename, boundary = 10)
101
+ def initialize(prediction, hits, boundary = 10)
99
102
  super
100
- @short_header = 'Gene_Merge'
103
+ @short_header = 'GeneMerge'
101
104
  @header = 'Gene Merge'
102
105
  @description = 'Check whether BLAST hits make evidence about a merge' \
103
106
  ' of two genes that match the predicted gene.'
104
107
  @cli_name = 'merge'
105
- @filename = filename
106
108
  @boundary = boundary
107
109
  end
108
110
 
@@ -158,14 +160,18 @@ module GeneValidator
158
160
  @validation_report.plot_files.push(plot1)
159
161
  plot2 = plot_matched_regions
160
162
  @validation_report.plot_files.push(plot2)
161
- @validation_report.running_time = Time.now - start
163
+ @validation_report.run_time = Time.now - start
162
164
  @validation_report
163
165
 
164
166
  rescue NotEnoughHitsError
165
167
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
166
168
  @short_header, @header,
167
- @description, @approach,
168
- @explanation, @conclusion)
169
+ @description)
170
+ rescue Exception
171
+ @validation_report = ValidationReport.new('Unexpected error', :error,
172
+ @short_header, @header,
173
+ @description)
174
+ @validation_report.errors.push 'Unexpected Error'
169
175
  end
170
176
 
171
177
  ##
@@ -175,29 +181,25 @@ module GeneValidator
175
181
  # +output+: location where the plot will be saved in jped file format
176
182
  # +hits+: array of Sequence objects
177
183
  # +prediction+: Sequence objects
178
- def plot_matched_regions(output = "#{filename}_match.json", hits = @hits)
179
-
180
- colors = ['orange', 'blue'] ##{colors[i%2]
181
- f = File.open(output, 'w')
184
+ def plot_matched_regions(hits = @hits)
182
185
  no_lines = hits.length
183
186
 
184
187
  hits_less = hits[0..[no_lines, hits.length - 1].min]
185
188
 
186
- f.write((hits_less.each_with_index.map { |hit, i|
189
+ data = hits_less.each_with_index.map { |hit, i|
187
190
  { 'y' => i,
188
191
  'start' => hit.hsp_list.map(&:match_query_from).min,
189
192
  'stop' => hit.hsp_list.map(&:match_query_to).max,
190
- 'color'=>'black',
191
- 'dotted'=>'true'}}.flatten +
193
+ 'color' =>'black',
194
+ 'dotted' =>'true'}}.flatten +
192
195
  hits_less.each_with_index.map { |hit, i|
193
196
  hit.hsp_list.map { |hsp|
194
197
  { 'y' => i,
195
198
  'start' => hsp.match_query_from,
196
199
  'stop' => hsp.match_query_to,
197
- 'color' => 'orange'} } }.flatten).to_json)
198
- f.close
200
+ 'color' => 'orange'} } }.flatten
199
201
 
200
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
202
+ Plot.new(data,
201
203
  :lines,
202
204
  'Gene Merge Validation: Query coord covered by blast hit (1 line/hit)',
203
205
  '',
@@ -214,60 +216,24 @@ module GeneValidator
214
216
  # +y_intercept+: the ecuation of the line is y= slope*x + y_intercept
215
217
  # +output+: location where the plot will be saved in jped file format
216
218
  # +hits+: array of Sequence objects
217
- def plot_2d_start_from(slope = nil, y_intercept = nil,
218
- output = "#{filename}_match_2d.json", hits = @hits)
219
+ def plot_2d_start_from(slope = nil, y_intercept = nil, hits = @hits)
219
220
  pairs = hits.map do |hit|
220
221
  Pair.new(hit.hsp_list.map(&:match_query_from).min,
221
222
  hit.hsp_list.map(&:match_query_to).max)
222
223
  end
223
224
 
224
- xx = pairs.map(&:x)
225
- yy = pairs.map(&:y)
226
-
227
- freq_x = xx.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
228
- filename_x = "#{filename}_merge_x.json"
229
- f = File.open(filename_x, 'w')
230
- f.write([freq_x.collect { |k,v|
231
- { 'key' => k, 'value' => v, 'main' => (1==2) }
232
- }].to_json)
233
- f.close
234
- plot3 = Plot.new(filename_x.scan(%r{([^/]+)$})[0][0],
235
- :simplebars,
236
- '[Gene Merge] X projection',
237
- '',
238
- 'x projection',
239
- 'number of sequences')
240
- # @validation_report.plot_files.push(plot3)
241
-
242
- freq_y = yy.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
243
- filename_y = "#{filename}_merge_y.json"
244
- f = File.open(filename_y, 'w')
245
- f.write([freq_y.collect { |k, v|
246
- { 'key' => k, 'value' => v, 'main' => (1 == 2) }
247
- }].to_json)
248
- f.close
249
- plot4 = Plot.new(filename_y.scan(%r{([^/]+)$})[0][0],
250
- :simplebars,
251
- '[Gene Merge] Y projection',
252
- '',
253
- 'y projection',
254
- 'number of sequences')
255
- # @validation_report.plot_files.push(plot4)
256
-
257
- f = File.open(output, 'w')
258
- f.write(hits.map { |hit| {'x' => hit.hsp_list.map(&:match_query_from).min,
225
+ data = hits.map { |hit| { 'x' => hit.hsp_list.map(&:match_query_from).min,
259
226
  'y' => hit.hsp_list.map(&:match_query_to).max,
260
- 'color' => 'red'}}.to_json)
261
- f.close
227
+ 'color' => 'red'}}
262
228
 
263
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
229
+ Plot.new(data,
264
230
  :scatter,
265
231
  'Gene Merge Validation: Start/end of matching hit coord. on query (1 point/hit)',
266
232
  '',
267
233
  'Start Offset (most left hsp)',
268
234
  'End Offset (most right hsp)',
269
- y_intercept,
270
- slope)
235
+ y_intercept.to_s,
236
+ slope.to_s)
271
237
  end
272
238
 
273
239
  ##
@@ -314,7 +280,6 @@ module GeneValidator
314
280
  # Output:
315
281
  # The ecuation of the regression line: [y slope]
316
282
  def slope_statsample(xx, yy)
317
- require 'statsample'
318
283
  sr = Statsample::Regression.simple(xx.to_scale, yy.to_scale)
319
284
  [sr.a, sr.b]
320
285
  end
@@ -1,8 +1,10 @@
1
- require 'json'
1
+ require 'forwardable'
2
+
2
3
  require 'genevalidator/clusterization'
4
+ require 'genevalidator/exceptions'
3
5
  require 'genevalidator/validation_report'
4
6
  require 'genevalidator/validation_test'
5
- require 'genevalidator/exceptions'
7
+
6
8
  module GeneValidator
7
9
  ##
8
10
  # Class that stores the validation output information
@@ -65,7 +67,6 @@ module GeneValidator
65
67
  # This class contains the methods necessary for
66
68
  # length validation by hit length clusterization
67
69
  class LengthClusterValidation < ValidationTest
68
- attr_reader :filename
69
70
  attr_reader :clusters
70
71
  attr_reader :max_density_cluster
71
72
 
@@ -76,9 +77,8 @@ module GeneValidator
76
77
  # +prediction+: a +Sequence+ object representing the blast query
77
78
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
78
79
  # +dilename+: +String+ with the name of the fasta file
79
- def initialize(type, prediction, hits, filename)
80
+ def initialize(prediction, hits)
80
81
  super
81
- @filename = filename
82
82
  @short_header = 'LengthCluster'
83
83
  @header = 'Length Cluster'
84
84
  @description = 'Check whether the prediction length fits most of the' \
@@ -117,20 +117,18 @@ module GeneValidator
117
117
  plot1 = plot_histo_clusters
118
118
  @validation_report.plot_files.push(plot1)
119
119
 
120
- @validation_report.running_time = Time.now - start
120
+ @validation_report.run_time = Time.now - start
121
121
 
122
122
  @validation_report
123
123
 
124
124
  rescue NotEnoughHitsError
125
125
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
126
126
  @short_header, @header,
127
- @description, @approach,
128
- @explanation, @conclusion)
127
+ @description)
129
128
  rescue Exception
130
129
  @validation_report = ValidationReport.new('Unexpected error', :error,
131
130
  @short_header, @header,
132
- @description, @approach,
133
- @explanation, @conclusion)
131
+ @description)
134
132
  @validation_report.errors.push 'Unexpected Error'
135
133
  end
136
134
 
@@ -175,25 +173,24 @@ module GeneValidator
175
173
  ##
176
174
  # Generates a json file containing data used for plotting the histogram
177
175
  # of the length distribution given a lust of Cluster objects
178
- # +output+: filename where to save the graph
176
+ # +output+: plot_path where to save the graph
179
177
  # +clusters+: array of +Cluster+ objects
180
178
  # +max_density_cluster+: index of the most dense cluster
181
179
  # +prediction+: +Sequence+ object
182
180
  # Output:
183
181
  # +Plot+ object
184
- def plot_histo_clusters(output = "#{@filename}_len_clusters.json",
182
+ def plot_histo_clusters(output = "#{@plot_path}_len_clusters.json",
185
183
  clusters = @clusters,
186
184
  max_density_cluster = @max_density_cluster,
187
185
  prediction = @prediction)
188
186
 
189
- f = File.open(output, 'w')
190
- f.write(clusters.each_with_index.map { |cluster, i|
187
+ data = clusters.each_with_index.map { |cluster, i|
191
188
  cluster.lengths.collect { |k, v|
192
189
  { 'key' => k, 'value' => v, 'main' => (i == max_density_cluster) }
193
190
  }
194
- }.to_json)
195
- f.close
196
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
191
+ }
192
+
193
+ Plot.new(data,
197
194
  :bars,
198
195
  'Length Cluster Validation: Distribution of BLAST hit lengths',
199
196
  'Query Sequence, black;Most Dense Cluster,red;Other Hits, blue',
@@ -201,41 +198,5 @@ module GeneValidator
201
198
  'Number of Sequences',
202
199
  prediction.length_protein)
203
200
  end
204
-
205
- ##
206
- # Generates a json file cotaining data used for plotting
207
- # lines corresponding to the start and end hit offsets
208
- # Params:
209
- # +output+: filename where to save the graph
210
- # +hits+: array of Sequence objects
211
- # Output:
212
- # +Plot+ object
213
- def plot_len_clusters(output = "#{@filename}_len.json", _hits = @hits)
214
- f = File.open(output, 'w')
215
- lst = @hits.sort { |a, b| a.length_protein <=> b.length_protein }
216
-
217
- no_lines = 100
218
-
219
- lst_less = lst[0..[no_lines, lst.length - 1].min]
220
-
221
- f.write((lst_less.each_with_index.map { |hit, i|
222
- { 'y' => i, 'start' => 0, 'stop' => hit.length_protein,
223
- 'color' => 'gray' }
224
- } + lst_less.each_with_index.map { |hit, i|
225
- hit.hsp_list.map { |hsp|
226
- { 'y' => i, 'start' => hsp.hit_from, 'stop' => hsp.hit_to,
227
- 'color' => 'red' }
228
- }
229
- }.flatten).to_json)
230
-
231
- f.close
232
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
233
- :lines,
234
- '[Length Cluster] Matched regions in hits',
235
- 'hit, gray;high-scoring segment pairs (hsp), red',
236
- 'offset in the hit',
237
- 'number of the hit',
238
- lst_less.length)
239
- end
240
201
  end
241
202
  end