genevalidator 1.6.2 → 1.6.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +147 -76
  3. data/Rakefile +1 -1
  4. data/aux/files/css/genevalidator.compiled.min.css +16 -0
  5. data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
  6. data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
  7. data/aux/files/css/{style.css → src/style.css} +0 -0
  8. data/aux/files/js/genevalidator.compiled.min.js +28 -0
  9. data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
  10. data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
  11. data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
  12. data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
  13. data/aux/files/js/src/plots.js +814 -0
  14. data/aux/files/js/src/script.js +43 -0
  15. data/aux/json_header.erb +6 -6
  16. data/aux/json_query.erb +2 -1
  17. data/aux/template_footer.erb +0 -11
  18. data/aux/template_header.erb +4 -4
  19. data/aux/template_query.erb +1 -1
  20. data/bin/genevalidator +8 -6
  21. data/genevalidator.gemspec +1 -1
  22. data/lib/genevalidator.rb +7 -5
  23. data/lib/genevalidator/arg_validation.rb +12 -9
  24. data/lib/genevalidator/blast.rb +18 -11
  25. data/lib/genevalidator/clusterization.rb +35 -31
  26. data/lib/genevalidator/exceptions.rb +0 -1
  27. data/lib/genevalidator/get_raw_sequences.rb +115 -69
  28. data/lib/genevalidator/hsp.rb +8 -8
  29. data/lib/genevalidator/json_to_gv_results.rb +4 -4
  30. data/lib/genevalidator/output.rb +40 -41
  31. data/lib/genevalidator/pool.rb +5 -4
  32. data/lib/genevalidator/query.rb +37 -0
  33. data/lib/genevalidator/tabular_parser.rb +3 -4
  34. data/lib/genevalidator/validation.rb +16 -11
  35. data/lib/genevalidator/validation_alignment.rb +17 -23
  36. data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
  37. data/lib/genevalidator/validation_duplication.rb +8 -18
  38. data/lib/genevalidator/validation_gene_merge.rb +11 -9
  39. data/lib/genevalidator/validation_length_cluster.rb +8 -11
  40. data/lib/genevalidator/validation_length_rank.rb +5 -4
  41. data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
  42. data/lib/genevalidator/version.rb +1 -1
  43. data/test/test_all_validations.rb +2 -1
  44. data/test/test_blast.rb +4 -3
  45. data/test/test_extended_array_methods.rb +2 -1
  46. data/test/{test_sequences.rb → test_query.rb} +5 -23
  47. data/test/test_validation_open_reading_frame.rb +7 -7
  48. data/test/test_validations.rb +8 -6
  49. metadata +16 -16
  50. data/aux/app_template_footer.erb +0 -1
  51. data/aux/app_template_header.erb +0 -12
  52. data/aux/files/js/plots.js +0 -828
  53. data/aux/files/js/script.js +0 -71
  54. data/lib/genevalidator/sequences.rb +0 -101
@@ -4,7 +4,7 @@
4
4
  # Copyright © 2012, Kim Burgestrand kim@burgestrand.se
5
5
  #
6
6
  # Permission is hereby granted, free of charge, to any person obtaining a copy
7
- # of this software and associated documentation files (the Software), to deal
7
+ # of this software and associated documentation files (the "Software"), to deal
8
8
  # in the Software without restriction, including without limitation the rights
9
9
  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
10
  # copies of the Software, and to permit persons to whom the Software is
@@ -13,7 +13,7 @@
13
13
  # The above copyright notice and this permission notice shall be included in
14
14
  # all copies or substantial portions of the Software.
15
15
  #
16
- # THE SOFTWARE IS PROVIDED AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
17
  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
18
  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
19
  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
@@ -23,6 +23,7 @@
23
23
 
24
24
  require 'thread'
25
25
 
26
+ # Class that creates a thread safe pool.
26
27
  class Pool
27
28
  def initialize(size)
28
29
  @size = size
@@ -52,13 +53,13 @@ class Pool
52
53
  end
53
54
  end
54
55
 
55
- if $0 == __FILE__
56
+ if $PROGRAM_NAME == __FILE__
56
57
  p = Pool.new(10)
57
58
 
58
59
  20.times do |i|
59
60
  p.schedule do
60
61
  sleep rand(4) + 2
61
- $stderr.puts "Job #{i} finished by thread #{Thread.current[:id]}"
62
+ $stderr.puts "Job #{i} finished by thread #{Thread.current[:id]}\n"
62
63
  end
63
64
  end
64
65
  at_exit { p.shutdown }
@@ -0,0 +1,37 @@
1
+ module GeneValidator
2
+ # This is a class for the storing data on each sequence
3
+ class Query
4
+ attr_accessor :type # protein | mRNA
5
+ attr_accessor :definition
6
+ attr_accessor :identifier
7
+ attr_accessor :species
8
+ attr_accessor :accession_no
9
+ attr_accessor :length_protein
10
+ attr_accessor :reading_frame
11
+ attr_accessor :hsp_list # array of Hsp objects
12
+
13
+ attr_accessor :raw_sequence
14
+ attr_accessor :protein_translation # used only for nucleotides
15
+ attr_accessor :nucleotide_rf # used only for nucleotides
16
+
17
+ def initialize
18
+ @hsp_list = []
19
+ @raw_sequence = nil
20
+ @protein_translation = nil
21
+ @nucleotide_rf = nil
22
+ end
23
+
24
+ def protein_translation
25
+ (@type == :protein) ? raw_sequence : @protein_translation
26
+ end
27
+
28
+ ##
29
+ # Initializes the corresponding attribute of the sequence
30
+ # with respect to the column name of the tabular blast output
31
+ def init_tabular_attribute(hash)
32
+ @identifier = hash['sseqid'] if hash['sseqid']
33
+ @accession_no = hash['sacc'] if hash['sacc']
34
+ @length_protein = hash['slen'].to_i if hash['slen']
35
+ end
36
+ end
37
+ end
@@ -1,9 +1,9 @@
1
1
  require 'csv'
2
2
  require 'forwardable'
3
3
 
4
- require 'genevalidator/sequences'
5
- require 'genevalidator/hsp'
6
4
  require 'genevalidator/exceptions'
5
+ require 'genevalidator/hsp'
6
+ require 'genevalidator/query'
7
7
 
8
8
  #
9
9
  module GeneValidator
@@ -52,7 +52,6 @@ module GeneValidator
52
52
  @rows.next
53
53
  break unless entry == current_entry
54
54
  end
55
- # rescue StopIteration
56
55
  end
57
56
 
58
57
  alias move_to_next_query next
@@ -79,7 +78,7 @@ module GeneValidator
79
78
  grouped_hits = hits.group_by { |row| row['sseqid'] }
80
79
 
81
80
  grouped_hits.each do |query_id, row|
82
- hit_seq = Sequence.new
81
+ hit_seq = Query.new
83
82
  hit_seq.init_tabular_attribute(row[0])
84
83
 
85
84
  initialise_all_hsps(query_id, hits, hit_seq)
@@ -4,7 +4,7 @@ require 'genevalidator/blast'
4
4
  require 'genevalidator/exceptions'
5
5
  require 'genevalidator/output'
6
6
  require 'genevalidator/pool'
7
- require 'genevalidator/sequences'
7
+ require 'genevalidator/query'
8
8
  require 'genevalidator/validation_length_cluster'
9
9
  require 'genevalidator/validation_length_rank'
10
10
  require 'genevalidator/validation_blast_reading_frame'
@@ -17,7 +17,7 @@ require 'genevalidator/validation_alignment'
17
17
  module GeneValidator
18
18
  Pair1 = Struct.new(:x, :y)
19
19
 
20
- # Class that initalises separate Validate.new() instances for each query.
20
+ # Class that initalises a separate Validate.new() instance for each query.
21
21
  class Validations
22
22
  extend Forwardable
23
23
  def_delegators GeneValidator, :opt, :config, :query_idx
@@ -64,7 +64,7 @@ module GeneValidator
64
64
  query = IO.binread(input_file, start_offset, end_offset)
65
65
  parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
66
66
 
67
- prediction = Sequence.new
67
+ prediction = Query.new
68
68
  prediction.definition = parse_query[0].gsub("\n", '')
69
69
  prediction.identifier = prediction.definition.gsub(/ .*/, '')
70
70
  prediction.type = seq_type
@@ -87,7 +87,8 @@ module GeneValidator
87
87
  # Class that runs the validations (Instatiated for each query)
88
88
  class Validate
89
89
  extend Forwardable
90
- def_delegators GeneValidator, :opt, :config, :mutex_array, :overview
90
+ def_delegators GeneValidator, :opt, :config, :mutex_array, :overview,
91
+ :query_idx
91
92
 
92
93
  ##
93
94
  # Initilizes the object
@@ -102,8 +103,8 @@ module GeneValidator
102
103
  @config = config
103
104
  @mutex_array = mutex_array
104
105
  @run_output = nil
105
-
106
106
  @overview = overview
107
+ @query_idx = query_idx
107
108
  end
108
109
 
109
110
  ##
@@ -200,14 +201,18 @@ module GeneValidator
200
201
  def compute_scores
201
202
  validations = @run_output.validations
202
203
  scores = {}
203
- scores[:successes] = validations.map { |v| v.result == v.expected }.count(true)
204
- scores[:fails] = validations.map { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }.count(true)
204
+ scores[:successes] = validations.count { |v| v.result == v.expected }
205
+ scores[:fails] = validations.count { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }
205
206
  scores = length_validation_scores(validations, scores)
206
207
 
207
208
  @run_output.successes = scores[:successes]
208
209
  @run_output.fails = scores[:fails]
209
- total_query = scores[:successes].to_i + scores[:fails]
210
- @run_output.overall_score = (scores[:successes] * 100 / total_query).round
210
+ total_query = scores[:successes].to_i + scores[:fails]
211
+ if total_query == 0
212
+ @run_output.overall_score = 0
213
+ else
214
+ @run_output.overall_score = (scores[:successes] * 90 / total_query).round
215
+ end
211
216
  end
212
217
 
213
218
  # Since there are two length validations, it is necessary to adjust the
@@ -244,8 +249,8 @@ module GeneValidator
244
249
  errors = []
245
250
  vals.each do |v|
246
251
  unless v.errors.nil?
247
- no_mafft += v.errors.select { |e| e == NoMafftInstallationError }.length
248
- no_internet += v.errors.select { |e| e == NoInternetError }.length
252
+ no_mafft += v.errors.count { |e| e == NoMafftInstallationError }
253
+ no_internet += v.errors.count { |e| e == NoInternetError }
249
254
  end
250
255
  errors.push(v.short_header) if v.validation == :error
251
256
  end
@@ -2,6 +2,7 @@ require 'bio'
2
2
  require 'forwardable'
3
3
 
4
4
  require 'genevalidator/exceptions'
5
+ require 'genevalidator/get_raw_sequences'
5
6
  require 'genevalidator/validation_report'
6
7
  require 'genevalidator/validation_test'
7
8
 
@@ -126,8 +127,8 @@ module GeneValidator
126
127
  n = 50 if n > 50
127
128
 
128
129
  fail NotEnoughHitsError unless hits.length >= n
129
- fail Exception unless prediction.is_a?(Sequence) &&
130
- hits[0].is_a?(Sequence)
130
+ fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
131
+
131
132
  start = Time.new
132
133
  # get the first n hits
133
134
  less_hits = @hits[0..[n - 1, @hits.length].min]
@@ -135,19 +136,10 @@ module GeneValidator
135
136
 
136
137
  # get raw sequences for less_hits
137
138
  less_hits.map do |hit|
138
- # get gene by accession number
139
139
  next unless hit.raw_sequence.nil?
140
-
141
- hit.get_sequence_from_index_file(@raw_seq_file, @index_file_name,
142
- hit.identifier, @raw_seq_file_load)
143
-
144
- if hit.raw_sequence.nil? || hit.raw_sequence.empty?
145
- seq_type = (hit.type == :protein) ? 'protein' : 'nucleotide'
146
- hit.get_sequence_by_accession_no(hit.accession_no, seq_type, @db)
147
- end
148
-
140
+ hit.raw_sequence = FetchRawSequences.run(hit.identifier,
141
+ hit.accession_no)
149
142
  useless_hits.push(hit) if hit.raw_sequence.nil?
150
- useless_hits.push(hit) if hit.raw_sequence.empty?
151
143
  end
152
144
 
153
145
  useless_hits.each { |hit| less_hits.delete(hit) }
@@ -210,7 +202,7 @@ module GeneValidator
210
202
  :error, @short_header,
211
203
  @header, @description)
212
204
  @validation_report.errors.push 'Multiple reading frames Error'
213
- rescue Exception
205
+ rescue
214
206
  @validation_report = ValidationReport.new('Unexpected error', :error,
215
207
  @short_header, @header,
216
208
  @description)
@@ -230,7 +222,7 @@ module GeneValidator
230
222
  # Array of +String+s, corresponding to the multiple aligned sequences
231
223
  # the prediction is the last sequence in the vector
232
224
  def multiple_align_mafft(prediction = @prediction, hits = @hits)
233
- fail Exception unless prediction.is_a?(Sequence) && hits[0].is_a?(Sequence)
225
+ fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
234
226
 
235
227
  options = ['--maxiterate', '1000', '--localpair', '--anysymbol',
236
228
  '--quiet', '--thread', "#{@num_threads}"]
@@ -247,7 +239,7 @@ module GeneValidator
247
239
  end
248
240
 
249
241
  @multiple_alignment
250
- rescue Exception
242
+ rescue
251
243
  raise NoMafftInstallationError
252
244
  end
253
245
 
@@ -319,7 +311,7 @@ module GeneValidator
319
311
  return 1 if no_conserved_residues == 0
320
312
 
321
313
  # no of conserved residues from the hita that appear in the prediction
322
- no_conserved_pred = consensus.split(//).each_index.select { |j| consensus[j] != '-' && consensus[j] != '?' && consensus[j] == prediction_raw[j] }.length
314
+ no_conserved_pred = consensus.split(//).each_index.count { |j| consensus[j] != '-' && consensus[j] != '?' && consensus[j] == prediction_raw[j] }
323
315
 
324
316
  no_conserved_pred / (no_conserved_residues + 0.0)
325
317
  end
@@ -333,7 +325,8 @@ module GeneValidator
333
325
  # +threshold+: percentage of genes that are considered in statistical model
334
326
  # Output:
335
327
  # +String+ representing the statistical model
336
- # +Array+ with the maximum frequeny of the majoritary residue for each position
328
+ # +Array+ with the maximum frequeny of the majoritary residue for each
329
+ # position
337
330
  def get_sm_pssm(ma = @multiple_alignment, threshold = 0.7)
338
331
  sm = ''
339
332
  freq = []
@@ -442,19 +435,20 @@ module GeneValidator
442
435
  # plot consensus
443
436
  consensus_all_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }.flatten
444
437
 
445
- yAxisValues = 'Prediction'
446
- (1..ma.length - 1).each { |i| yAxisValues << ", hit #{i}" }
438
+ y_axis_values = 'Prediction'
439
+ (1..ma.length - 1).each { |i| y_axis_values << ", hit #{i}" }
447
440
 
448
- yAxisValues << ', Statistical Model'
441
+ y_axis_values << ', Statistical Model'
449
442
 
450
443
  Plot.new(data,
451
444
  :align,
452
- 'Missing/Extra sequences Validation: Multiple Align. & Statistical model of hits',
445
+ 'Missing/Extra sequences Validation: Multiple Align. &' \
446
+ 'Statistical model of hits',
453
447
  'Conserved Region, Yellow',
454
448
  'Offset in the Alignment',
455
449
  '',
456
450
  ma.length + 1,
457
- yAxisValues)
451
+ y_axis_values)
458
452
  end
459
453
  end
460
454
  end
@@ -101,7 +101,7 @@ module GeneValidator
101
101
  end
102
102
 
103
103
  fail NotEnoughHitsError unless hits.length >= 5
104
- fail Exception unless prediction.is_a?(Sequence) && hits[0].is_a?(Sequence)
104
+ fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
105
105
 
106
106
  start = Time.now
107
107
 
@@ -110,7 +110,7 @@ module GeneValidator
110
110
 
111
111
  # get the main reading frame
112
112
  main_rf = frames.map { |_k, v| v }.max
113
- @prediction.nucleotide_rf = frames.select { |_k, v| v == main_rf }.first.first
113
+ @prediction.nucleotide_rf = frames.find { |_k, v| v == main_rf }.first
114
114
 
115
115
  @validation_report = BlastRFValidationOutput.new(@short_header, @header,
116
116
  @description, frames)
@@ -121,7 +121,7 @@ module GeneValidator
121
121
  @validation_report = ValidationReport.new('Not enough evidence',
122
122
  :warning, @short_header,
123
123
  @header, @description)
124
- rescue Exception
124
+ rescue
125
125
  @validation_report = ValidationReport.new('Unexpected error', :error,
126
126
  @short_header, @header,
127
127
  @description)
@@ -4,6 +4,7 @@ require 'statsample'
4
4
 
5
5
  require 'genevalidator/exceptions'
6
6
  require 'genevalidator/ext/array'
7
+ require 'genevalidator/get_raw_sequences'
7
8
  require 'genevalidator/validation_report'
8
9
  require 'genevalidator/validation_test'
9
10
 
@@ -113,35 +114,24 @@ module GeneValidator
113
114
  # +DuplicationValidationOutput+ object
114
115
  def run(n = 10)
115
116
  fail NotEnoughHitsError unless hits.length >= 5
116
- fail Exception unless prediction.is_a?(Sequence) &&
117
- !prediction.raw_sequence.nil? &&
118
- hits[0].is_a?(Sequence)
117
+ fail unless prediction.is_a?(Query) && !prediction.raw_sequence.nil? &&
118
+ hits[0].is_a?(Query)
119
119
 
120
120
  start = Time.new
121
121
  # get the first n hits
122
122
  less_hits = @hits[0..[n - 1, @hits.length].min]
123
123
  useless_hits = []
124
-
125
124
  # get raw sequences for less_hits
126
125
  less_hits.map do |hit|
127
- # get gene by accession number
128
126
  next unless hit.raw_sequence.nil?
129
-
130
- hit.get_sequence_from_index_file(@raw_seq_file, @index_file_name,
131
- hit.identifier, @raw_seq_file_load)
132
-
133
- if hit.raw_sequence.nil? || hit.raw_sequence.empty?
134
- seq_type = (hit.type == :protein) ? 'protein' : 'nucleotide'
135
- hit.get_sequence_by_accession_no(hit.accession_no, seq_type, @db)
136
- end
137
-
127
+ hit.raw_sequence = FetchRawSequences.run(hit.identifier,
128
+ hit.accession_no)
138
129
  useless_hits.push(hit) if hit.raw_sequence.nil?
139
- useless_hits.push(hit) if hit.raw_sequence.empty?
140
130
  end
141
131
 
142
132
  useless_hits.each { |hit| less_hits.delete(hit) }
143
133
 
144
- fail NoInternetError if less_hits.length.nil?
134
+ fail NoInternetError if less_hits.length == 0
145
135
 
146
136
  averages = []
147
137
 
@@ -185,7 +175,7 @@ module GeneValidator
185
175
  raw_align.each { |seq| align.push(seq.to_s) }
186
176
  hit_alignment = align[0]
187
177
  query_alignment = align[1]
188
- rescue Exception
178
+ rescue
189
179
  raise NoMafftInstallationError
190
180
  end
191
181
  end
@@ -250,7 +240,7 @@ module GeneValidator
250
240
  @short_header, @header,
251
241
  @description)
252
242
  @validation_report.errors.push NoInternetError
253
- rescue Exception
243
+ rescue
254
244
  @validation_report = ValidationReport.new('Unexpected error', :error,
255
245
  @short_header, @header,
256
246
  @description)
@@ -114,7 +114,7 @@ module GeneValidator
114
114
  # +GeneMergeValidationOutput+ object
115
115
  def run
116
116
  fail NotEnoughHitsError unless hits.length >= 5
117
- fail Exception unless prediction.is_a?(Sequence) && hits[0].is_a?(Sequence)
117
+ fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
118
118
 
119
119
  start = Time.now
120
120
 
@@ -135,7 +135,7 @@ module GeneValidator
135
135
  end
136
136
  end
137
137
 
138
- line_slope = slope(xx, yy, (1..hits.length).map{ |x| 1 / (x + 0.0) })
138
+ line_slope = slope(xx, yy, (1..hits.length).map { |x| 1 / (x + 0.0) })
139
139
  ## YW - what is this weighting?
140
140
 
141
141
  unimodality = false
@@ -167,7 +167,7 @@ module GeneValidator
167
167
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
168
168
  @short_header, @header,
169
169
  @description)
170
- rescue Exception
170
+ rescue
171
171
  @validation_report = ValidationReport.new('Unexpected error', :error,
172
172
  @short_header, @header,
173
173
  @description)
@@ -190,18 +190,19 @@ module GeneValidator
190
190
  { 'y' => i,
191
191
  'start' => hit.hsp_list.map(&:match_query_from).min,
192
192
  'stop' => hit.hsp_list.map(&:match_query_to).max,
193
- 'color' =>'black',
194
- 'dotted' =>'true'}}.flatten +
193
+ 'color' => 'black',
194
+ 'dotted' => 'true' } }.flatten +
195
195
  hits_less.each_with_index.map { |hit, i|
196
196
  hit.hsp_list.map { |hsp|
197
197
  { 'y' => i,
198
198
  'start' => hsp.match_query_from,
199
199
  'stop' => hsp.match_query_to,
200
- 'color' => 'orange'} } }.flatten
200
+ 'color' => 'orange' } } }.flatten
201
201
 
202
202
  Plot.new(data,
203
203
  :lines,
204
- 'Gene Merge Validation: Query coord covered by blast hit (1 line/hit)',
204
+ 'Gene Merge Validation: Query coord covered by blast hit' \
205
+ ' (1 line/hit)',
205
206
  '',
206
207
  'Offset in Prediction',
207
208
  'Hit Number',
@@ -224,11 +225,12 @@ module GeneValidator
224
225
 
225
226
  data = hits.map { |hit| { 'x' => hit.hsp_list.map(&:match_query_from).min,
226
227
  'y' => hit.hsp_list.map(&:match_query_to).max,
227
- 'color' => 'red'}}
228
+ 'color' => 'red' } }
228
229
 
229
230
  Plot.new(data,
230
231
  :scatter,
231
- 'Gene Merge Validation: Start/end of matching hit coord. on query (1 point/hit)',
232
+ 'Gene Merge Validation: Start/end of matching hit coord. on' \
233
+ ' query (1 point/hit)',
232
234
  '',
233
235
  'Start Offset (most left hsp)',
234
236
  'End Offset (most right hsp)',