genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,24 +1,29 @@
1
+ require 'forwardable'
2
+
3
+ require 'genevalidator/exceptions'
1
4
  require 'genevalidator/validation_report'
5
+ require 'genevalidator/validation_test'
6
+
2
7
  module GeneValidator
3
8
  ##
4
9
  # Class that stores the validation output information
5
10
  class BlastRFValidationOutput < ValidationReport
6
- attr_reader :frames_histo
11
+ attr_reader :frames
7
12
  attr_reader :msg
8
13
  attr_reader :total_hsp
9
14
  attr_reader :result
10
15
 
11
- def initialize(short_header, header, description, frames_histo,
16
+ def initialize(short_header, header, description, frames,
12
17
  expected = :yes)
13
18
  @short_header, @header, @description = short_header, header, description
14
- @frames_histo = frames_histo
19
+ @frames = frames
15
20
  @expected = expected
16
21
  @result = validation
17
22
 
18
23
  @msg = ''
19
24
  @exp_msg = ''
20
25
  @total_hsp = 0
21
- @frames_histo.each do |x, y|
26
+ @frames.each do |x, y|
22
27
  @msg << "#{y}&nbsp;HSPs&nbsp;align&nbsp;in&nbsp;frame&nbsp;#{x}; "
23
28
  @exp_msg << "#{y} HSPs align in frame #{x}; "
24
29
  @total_hsp += y.to_i
@@ -36,7 +41,7 @@ module GeneValidator
36
41
  t = "BLAST identified #{@total_hsp} High-scoring Segment Pairs" \
37
42
  ' (HSPs)'
38
43
  if @result == :yes # i.e. if there is only one ORF...
39
- frame = @frames_histo.keys[0].to_s
44
+ frame = @frames.keys[0].to_s
40
45
  t1 = "; all of these align in frame #{frame}."
41
46
  else
42
47
  t1 = ": #{@exp_msg.gsub(/; $/, '')}."
@@ -61,7 +66,7 @@ module GeneValidator
61
66
  # chack if there are different reading frames
62
67
  count_p = 0
63
68
  count_n = 0
64
- frames_histo.each do |x, _y|
69
+ frames.each do |x, _y|
65
70
  count_p += 1 if x > 0
66
71
  count_n += 1 if x < 0
67
72
  end
@@ -75,7 +80,7 @@ module GeneValidator
75
80
  class BlastReadingFrameValidation < ValidationTest
76
81
  def initialize(type, prediction, hits = nil)
77
82
  super
78
- @short_header = 'Frame'
83
+ @short_header = 'ReadingFrame'
79
84
  @header = 'Reading Frame'
80
85
  @description = 'Check whether there is a single reading frame among' \
81
86
  ' BLAST hits. Otherwise there might be a reading frame' \
@@ -101,29 +106,25 @@ module GeneValidator
101
106
  start = Time.now
102
107
 
103
108
  rfs = lst.map { |x| x.hsp_list.map(&:query_reading_frame) }.flatten
104
- frames_histo = Hash[rfs.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
109
+ frames = Hash[rfs.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
105
110
 
106
111
  # get the main reading frame
107
- main_rf = frames_histo.map { |_k, v| v }.max
108
- @prediction.nucleotide_rf = frames_histo.select { |_k, v| v == main_rf }.first.first
112
+ main_rf = frames.map { |_k, v| v }.max
113
+ @prediction.nucleotide_rf = frames.select { |_k, v| v == main_rf }.first.first
109
114
 
110
115
  @validation_report = BlastRFValidationOutput.new(@short_header, @header,
111
- @description,
112
- frames_histo)
113
- @validation_report.running_time = Time.now - start
116
+ @description, frames)
117
+ @validation_report.run_time = Time.now - start
114
118
  @validation_report
115
119
 
116
120
  rescue NotEnoughHitsError
117
121
  @validation_report = ValidationReport.new('Not enough evidence',
118
122
  :warning, @short_header,
119
- @header, @description,
120
- @approach, @explanation,
121
- @conclusion)
123
+ @header, @description)
122
124
  rescue Exception
123
125
  @validation_report = ValidationReport.new('Unexpected error', :error,
124
126
  @short_header, @header,
125
- @description, @approach,
126
- @explanation, @conclusion)
127
+ @description)
127
128
  @validation_report.errors.push 'Unexpected Error'
128
129
  end
129
130
  end
@@ -1,6 +1,12 @@
1
- require 'genevalidator/validation_report'
1
+ require 'bio'
2
+ require 'forwardable'
3
+ require 'statsample'
4
+
2
5
  require 'genevalidator/exceptions'
3
6
  require 'genevalidator/ext/array'
7
+ require 'genevalidator/validation_report'
8
+ require 'genevalidator/validation_test'
9
+
4
10
  module GeneValidator
5
11
  ##
6
12
  # Class that stores the validation output information
@@ -71,12 +77,14 @@ module GeneValidator
71
77
  # This class contains the methods necessary for
72
78
  # finding duplicated subsequences in the predicted gene
73
79
  class DuplicationValidation < ValidationTest
80
+ extend Forwardable
81
+ def_delegators GeneValidator, :opt, :config
82
+
74
83
  attr_reader :raw_seq_file
75
84
  attr_reader :index_file_name
76
85
  attr_reader :raw_seq_file_load
77
86
 
78
- def initialize(type, prediction, hits, raw_seq_file, index_file_name,
79
- raw_seq_file_load, db, num_threads)
87
+ def initialize(prediction, hits)
80
88
  super
81
89
  @short_header = 'Duplication'
82
90
  @header = 'Duplication'
@@ -84,11 +92,12 @@ module GeneValidator
84
92
  ' in the predicted gene by counting the hsp' \
85
93
  ' residue coverage of the prediction, for each hit.'
86
94
  @cli_name = 'dup'
87
- @raw_seq_file = raw_seq_file
88
- @index_file_name = index_file_name
89
- @raw_seq_file_load = raw_seq_file_load
90
- @db = db
91
- @num_threads = num_threads
95
+ @raw_seq_file = opt[:raw_sequences]
96
+ @index_file_name = config[:raw_seq_file_index]
97
+ @raw_seq_file_load = config[:raw_seq_file_load]
98
+ @db = opt[:db]
99
+ @num_threads = opt[:num_threads]
100
+ @type = config[:type]
92
101
  end
93
102
 
94
103
  def in_range?(ranges, idx)
@@ -214,7 +223,7 @@ module GeneValidator
214
223
  @header,
215
224
  @description, 1,
216
225
  averages)
217
- @validation_report.running_time = Time.now - start
226
+ @validation_report.run_time = Time.now - start
218
227
  return @validation_report
219
228
  end
220
229
 
@@ -224,31 +233,27 @@ module GeneValidator
224
233
  @header,
225
234
  @description, pval,
226
235
  averages)
227
- @running_time = Time.now - start
236
+ @run_time = Time.now - start
228
237
  @validation_report
229
238
 
230
239
  rescue NotEnoughHitsError
231
240
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
232
241
  @short_header, @header,
233
- @description, @explanation,
234
- @conclusion)
242
+ @description)
235
243
  rescue NoMafftInstallationError
236
244
  @validation_report = ValidationReport.new('Mafft error', :error,
237
245
  @short_header, @header,
238
- @description, @explanation,
239
- @conclusion)
246
+ @description)
240
247
  @validation_report.errors.push NoMafftInstallationError
241
248
  rescue NoInternetError
242
249
  @validation_report = ValidationReport.new('Internet error', :error,
243
250
  @short_header, @header,
244
- @description, @explanation,
245
- @conclusion)
251
+ @description)
246
252
  @validation_report.errors.push NoInternetError
247
253
  rescue Exception
248
254
  @validation_report = ValidationReport.new('Unexpected error', :error,
249
255
  @short_header, @header,
250
- @description, @explanation,
251
- @conclusion)
256
+ @description)
252
257
  @validation_report.errors.push 'Unexpected Error'
253
258
  end
254
259
 
@@ -256,7 +261,6 @@ module GeneValidator
256
261
  # wilcox test implementation from statsample ruby gem
257
262
  # many thanks to Claudio for helping us with the implementation!
258
263
  def wilcox_test(averages)
259
- require 'statsample'
260
264
  wilcox = Statsample::Test.wilcoxon_signed_rank(averages.to_scale,
261
265
  Array.new(averages.length,
262
266
  1).to_scale)
@@ -1,6 +1,11 @@
1
- require 'json'
2
- require 'genevalidator/validation_report'
1
+ require 'forwardable'
2
+ require 'statsample'
3
+
4
+ require 'genevalidator/exceptions'
3
5
  require 'genevalidator/ext/array'
6
+ require 'genevalidator/validation_report'
7
+ require 'genevalidator/validation_test'
8
+
4
9
  module GeneValidator
5
10
  ##
6
11
  # Class that stores the validation output information
@@ -83,26 +88,23 @@ module GeneValidator
83
88
  # checking whether there is evidence that the
84
89
  # prediction is a merge of multiple genes
85
90
  class GeneMergeValidation < ValidationTest
86
- attr_reader :hits
87
91
  attr_reader :prediction
88
- attr_reader :filename
92
+ attr_reader :hits
89
93
 
90
94
  ##
91
95
  # Initilizes the object
92
96
  # Params:
93
- # +type+: type of the predicted sequence (:nucleotide or :protein)
94
97
  # +prediction+: a +Sequence+ object representing the blast query
95
98
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
96
- # +filename+: name of the input file, used when generatig the plot files
99
+ # +plot_path+: name of the input file, used when generatig the plot files
97
100
  # +boundary+: the offset of the hit from which we start analysing the hit
98
- def initialize(type, prediction, hits, filename, boundary = 10)
101
+ def initialize(prediction, hits, boundary = 10)
99
102
  super
100
- @short_header = 'Gene_Merge'
103
+ @short_header = 'GeneMerge'
101
104
  @header = 'Gene Merge'
102
105
  @description = 'Check whether BLAST hits make evidence about a merge' \
103
106
  ' of two genes that match the predicted gene.'
104
107
  @cli_name = 'merge'
105
- @filename = filename
106
108
  @boundary = boundary
107
109
  end
108
110
 
@@ -158,14 +160,18 @@ module GeneValidator
158
160
  @validation_report.plot_files.push(plot1)
159
161
  plot2 = plot_matched_regions
160
162
  @validation_report.plot_files.push(plot2)
161
- @validation_report.running_time = Time.now - start
163
+ @validation_report.run_time = Time.now - start
162
164
  @validation_report
163
165
 
164
166
  rescue NotEnoughHitsError
165
167
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
166
168
  @short_header, @header,
167
- @description, @approach,
168
- @explanation, @conclusion)
169
+ @description)
170
+ rescue Exception
171
+ @validation_report = ValidationReport.new('Unexpected error', :error,
172
+ @short_header, @header,
173
+ @description)
174
+ @validation_report.errors.push 'Unexpected Error'
169
175
  end
170
176
 
171
177
  ##
@@ -175,29 +181,25 @@ module GeneValidator
175
181
  # +output+: location where the plot will be saved in jped file format
176
182
  # +hits+: array of Sequence objects
177
183
  # +prediction+: Sequence objects
178
- def plot_matched_regions(output = "#{filename}_match.json", hits = @hits)
179
-
180
- colors = ['orange', 'blue'] ##{colors[i%2]
181
- f = File.open(output, 'w')
184
+ def plot_matched_regions(hits = @hits)
182
185
  no_lines = hits.length
183
186
 
184
187
  hits_less = hits[0..[no_lines, hits.length - 1].min]
185
188
 
186
- f.write((hits_less.each_with_index.map { |hit, i|
189
+ data = hits_less.each_with_index.map { |hit, i|
187
190
  { 'y' => i,
188
191
  'start' => hit.hsp_list.map(&:match_query_from).min,
189
192
  'stop' => hit.hsp_list.map(&:match_query_to).max,
190
- 'color'=>'black',
191
- 'dotted'=>'true'}}.flatten +
193
+ 'color' =>'black',
194
+ 'dotted' =>'true'}}.flatten +
192
195
  hits_less.each_with_index.map { |hit, i|
193
196
  hit.hsp_list.map { |hsp|
194
197
  { 'y' => i,
195
198
  'start' => hsp.match_query_from,
196
199
  'stop' => hsp.match_query_to,
197
- 'color' => 'orange'} } }.flatten).to_json)
198
- f.close
200
+ 'color' => 'orange'} } }.flatten
199
201
 
200
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
202
+ Plot.new(data,
201
203
  :lines,
202
204
  'Gene Merge Validation: Query coord covered by blast hit (1 line/hit)',
203
205
  '',
@@ -214,60 +216,24 @@ module GeneValidator
214
216
  # +y_intercept+: the ecuation of the line is y= slope*x + y_intercept
215
217
  # +output+: location where the plot will be saved in jped file format
216
218
  # +hits+: array of Sequence objects
217
- def plot_2d_start_from(slope = nil, y_intercept = nil,
218
- output = "#{filename}_match_2d.json", hits = @hits)
219
+ def plot_2d_start_from(slope = nil, y_intercept = nil, hits = @hits)
219
220
  pairs = hits.map do |hit|
220
221
  Pair.new(hit.hsp_list.map(&:match_query_from).min,
221
222
  hit.hsp_list.map(&:match_query_to).max)
222
223
  end
223
224
 
224
- xx = pairs.map(&:x)
225
- yy = pairs.map(&:y)
226
-
227
- freq_x = xx.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
228
- filename_x = "#{filename}_merge_x.json"
229
- f = File.open(filename_x, 'w')
230
- f.write([freq_x.collect { |k,v|
231
- { 'key' => k, 'value' => v, 'main' => (1==2) }
232
- }].to_json)
233
- f.close
234
- plot3 = Plot.new(filename_x.scan(%r{([^/]+)$})[0][0],
235
- :simplebars,
236
- '[Gene Merge] X projection',
237
- '',
238
- 'x projection',
239
- 'number of sequences')
240
- # @validation_report.plot_files.push(plot3)
241
-
242
- freq_y = yy.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
243
- filename_y = "#{filename}_merge_y.json"
244
- f = File.open(filename_y, 'w')
245
- f.write([freq_y.collect { |k, v|
246
- { 'key' => k, 'value' => v, 'main' => (1 == 2) }
247
- }].to_json)
248
- f.close
249
- plot4 = Plot.new(filename_y.scan(%r{([^/]+)$})[0][0],
250
- :simplebars,
251
- '[Gene Merge] Y projection',
252
- '',
253
- 'y projection',
254
- 'number of sequences')
255
- # @validation_report.plot_files.push(plot4)
256
-
257
- f = File.open(output, 'w')
258
- f.write(hits.map { |hit| {'x' => hit.hsp_list.map(&:match_query_from).min,
225
+ data = hits.map { |hit| { 'x' => hit.hsp_list.map(&:match_query_from).min,
259
226
  'y' => hit.hsp_list.map(&:match_query_to).max,
260
- 'color' => 'red'}}.to_json)
261
- f.close
227
+ 'color' => 'red'}}
262
228
 
263
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
229
+ Plot.new(data,
264
230
  :scatter,
265
231
  'Gene Merge Validation: Start/end of matching hit coord. on query (1 point/hit)',
266
232
  '',
267
233
  'Start Offset (most left hsp)',
268
234
  'End Offset (most right hsp)',
269
- y_intercept,
270
- slope)
235
+ y_intercept.to_s,
236
+ slope.to_s)
271
237
  end
272
238
 
273
239
  ##
@@ -314,7 +280,6 @@ module GeneValidator
314
280
  # Output:
315
281
  # The ecuation of the regression line: [y slope]
316
282
  def slope_statsample(xx, yy)
317
- require 'statsample'
318
283
  sr = Statsample::Regression.simple(xx.to_scale, yy.to_scale)
319
284
  [sr.a, sr.b]
320
285
  end
@@ -1,8 +1,10 @@
1
- require 'json'
1
+ require 'forwardable'
2
+
2
3
  require 'genevalidator/clusterization'
4
+ require 'genevalidator/exceptions'
3
5
  require 'genevalidator/validation_report'
4
6
  require 'genevalidator/validation_test'
5
- require 'genevalidator/exceptions'
7
+
6
8
  module GeneValidator
7
9
  ##
8
10
  # Class that stores the validation output information
@@ -65,7 +67,6 @@ module GeneValidator
65
67
  # This class contains the methods necessary for
66
68
  # length validation by hit length clusterization
67
69
  class LengthClusterValidation < ValidationTest
68
- attr_reader :filename
69
70
  attr_reader :clusters
70
71
  attr_reader :max_density_cluster
71
72
 
@@ -76,9 +77,8 @@ module GeneValidator
76
77
  # +prediction+: a +Sequence+ object representing the blast query
77
78
  # +hits+: a vector of +Sequence+ objects (representing blast hits)
78
79
  # +dilename+: +String+ with the name of the fasta file
79
- def initialize(type, prediction, hits, filename)
80
+ def initialize(prediction, hits)
80
81
  super
81
- @filename = filename
82
82
  @short_header = 'LengthCluster'
83
83
  @header = 'Length Cluster'
84
84
  @description = 'Check whether the prediction length fits most of the' \
@@ -117,20 +117,18 @@ module GeneValidator
117
117
  plot1 = plot_histo_clusters
118
118
  @validation_report.plot_files.push(plot1)
119
119
 
120
- @validation_report.running_time = Time.now - start
120
+ @validation_report.run_time = Time.now - start
121
121
 
122
122
  @validation_report
123
123
 
124
124
  rescue NotEnoughHitsError
125
125
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
126
126
  @short_header, @header,
127
- @description, @approach,
128
- @explanation, @conclusion)
127
+ @description)
129
128
  rescue Exception
130
129
  @validation_report = ValidationReport.new('Unexpected error', :error,
131
130
  @short_header, @header,
132
- @description, @approach,
133
- @explanation, @conclusion)
131
+ @description)
134
132
  @validation_report.errors.push 'Unexpected Error'
135
133
  end
136
134
 
@@ -175,25 +173,24 @@ module GeneValidator
175
173
  ##
176
174
  # Generates a json file containing data used for plotting the histogram
177
175
  # of the length distribution given a lust of Cluster objects
178
- # +output+: filename where to save the graph
176
+ # +output+: plot_path where to save the graph
179
177
  # +clusters+: array of +Cluster+ objects
180
178
  # +max_density_cluster+: index of the most dense cluster
181
179
  # +prediction+: +Sequence+ object
182
180
  # Output:
183
181
  # +Plot+ object
184
- def plot_histo_clusters(output = "#{@filename}_len_clusters.json",
182
+ def plot_histo_clusters(output = "#{@plot_path}_len_clusters.json",
185
183
  clusters = @clusters,
186
184
  max_density_cluster = @max_density_cluster,
187
185
  prediction = @prediction)
188
186
 
189
- f = File.open(output, 'w')
190
- f.write(clusters.each_with_index.map { |cluster, i|
187
+ data = clusters.each_with_index.map { |cluster, i|
191
188
  cluster.lengths.collect { |k, v|
192
189
  { 'key' => k, 'value' => v, 'main' => (i == max_density_cluster) }
193
190
  }
194
- }.to_json)
195
- f.close
196
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
191
+ }
192
+
193
+ Plot.new(data,
197
194
  :bars,
198
195
  'Length Cluster Validation: Distribution of BLAST hit lengths',
199
196
  'Query Sequence, black;Most Dense Cluster,red;Other Hits, blue',
@@ -201,41 +198,5 @@ module GeneValidator
201
198
  'Number of Sequences',
202
199
  prediction.length_protein)
203
200
  end
204
-
205
- ##
206
- # Generates a json file cotaining data used for plotting
207
- # lines corresponding to the start and end hit offsets
208
- # Params:
209
- # +output+: filename where to save the graph
210
- # +hits+: array of Sequence objects
211
- # Output:
212
- # +Plot+ object
213
- def plot_len_clusters(output = "#{@filename}_len.json", _hits = @hits)
214
- f = File.open(output, 'w')
215
- lst = @hits.sort { |a, b| a.length_protein <=> b.length_protein }
216
-
217
- no_lines = 100
218
-
219
- lst_less = lst[0..[no_lines, lst.length - 1].min]
220
-
221
- f.write((lst_less.each_with_index.map { |hit, i|
222
- { 'y' => i, 'start' => 0, 'stop' => hit.length_protein,
223
- 'color' => 'gray' }
224
- } + lst_less.each_with_index.map { |hit, i|
225
- hit.hsp_list.map { |hsp|
226
- { 'y' => i, 'start' => hsp.hit_from, 'stop' => hsp.hit_to,
227
- 'color' => 'red' }
228
- }
229
- }.flatten).to_json)
230
-
231
- f.close
232
- Plot.new(output.scan(%r{([^/]+)$})[0][0],
233
- :lines,
234
- '[Length Cluster] Matched regions in hits',
235
- 'hit, gray;high-scoring segment pairs (hsp), red',
236
- 'offset in the hit',
237
- 'number of the hit',
238
- lst_less.length)
239
- end
240
201
  end
241
202
  end