genevalidator 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +147 -76
- data/Rakefile +1 -1
- data/aux/files/css/genevalidator.compiled.min.css +16 -0
- data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
- data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
- data/aux/files/css/{style.css → src/style.css} +0 -0
- data/aux/files/js/genevalidator.compiled.min.js +28 -0
- data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
- data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
- data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
- data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
- data/aux/files/js/src/plots.js +814 -0
- data/aux/files/js/src/script.js +43 -0
- data/aux/json_header.erb +6 -6
- data/aux/json_query.erb +2 -1
- data/aux/template_footer.erb +0 -11
- data/aux/template_header.erb +4 -4
- data/aux/template_query.erb +1 -1
- data/bin/genevalidator +8 -6
- data/genevalidator.gemspec +1 -1
- data/lib/genevalidator.rb +7 -5
- data/lib/genevalidator/arg_validation.rb +12 -9
- data/lib/genevalidator/blast.rb +18 -11
- data/lib/genevalidator/clusterization.rb +35 -31
- data/lib/genevalidator/exceptions.rb +0 -1
- data/lib/genevalidator/get_raw_sequences.rb +115 -69
- data/lib/genevalidator/hsp.rb +8 -8
- data/lib/genevalidator/json_to_gv_results.rb +4 -4
- data/lib/genevalidator/output.rb +40 -41
- data/lib/genevalidator/pool.rb +5 -4
- data/lib/genevalidator/query.rb +37 -0
- data/lib/genevalidator/tabular_parser.rb +3 -4
- data/lib/genevalidator/validation.rb +16 -11
- data/lib/genevalidator/validation_alignment.rb +17 -23
- data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
- data/lib/genevalidator/validation_duplication.rb +8 -18
- data/lib/genevalidator/validation_gene_merge.rb +11 -9
- data/lib/genevalidator/validation_length_cluster.rb +8 -11
- data/lib/genevalidator/validation_length_rank.rb +5 -4
- data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +2 -1
- data/test/test_blast.rb +4 -3
- data/test/test_extended_array_methods.rb +2 -1
- data/test/{test_sequences.rb → test_query.rb} +5 -23
- data/test/test_validation_open_reading_frame.rb +7 -7
- data/test/test_validations.rb +8 -6
- metadata +16 -16
- data/aux/app_template_footer.erb +0 -1
- data/aux/app_template_header.erb +0 -12
- data/aux/files/js/plots.js +0 -828
- data/aux/files/js/script.js +0 -71
- data/lib/genevalidator/sequences.rb +0 -101
data/lib/genevalidator/pool.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright © 2012, Kim Burgestrand kim@burgestrand.se
|
|
5
5
|
#
|
|
6
6
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
-
# of this software and associated documentation files (the
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
8
|
# in the Software without restriction, including without limitation the rights
|
|
9
9
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
10
|
# copies of the Software, and to permit persons to whom the Software is
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# The above copyright notice and this permission notice shall be included in
|
|
14
14
|
# all copies or substantial portions of the Software.
|
|
15
15
|
#
|
|
16
|
-
# THE SOFTWARE IS PROVIDED
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
17
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
18
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
19
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
require 'thread'
|
|
25
25
|
|
|
26
|
+
# Class that creates a thread safe pool.
|
|
26
27
|
class Pool
|
|
27
28
|
def initialize(size)
|
|
28
29
|
@size = size
|
|
@@ -52,13 +53,13 @@ class Pool
|
|
|
52
53
|
end
|
|
53
54
|
end
|
|
54
55
|
|
|
55
|
-
if $
|
|
56
|
+
if $PROGRAM_NAME == __FILE__
|
|
56
57
|
p = Pool.new(10)
|
|
57
58
|
|
|
58
59
|
20.times do |i|
|
|
59
60
|
p.schedule do
|
|
60
61
|
sleep rand(4) + 2
|
|
61
|
-
$stderr.puts "Job #{i} finished by thread #{Thread.current[:id]}"
|
|
62
|
+
$stderr.puts "Job #{i} finished by thread #{Thread.current[:id]}\n"
|
|
62
63
|
end
|
|
63
64
|
end
|
|
64
65
|
at_exit { p.shutdown }
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module GeneValidator
|
|
2
|
+
# This is a class for the storing data on each sequence
|
|
3
|
+
class Query
|
|
4
|
+
attr_accessor :type # protein | mRNA
|
|
5
|
+
attr_accessor :definition
|
|
6
|
+
attr_accessor :identifier
|
|
7
|
+
attr_accessor :species
|
|
8
|
+
attr_accessor :accession_no
|
|
9
|
+
attr_accessor :length_protein
|
|
10
|
+
attr_accessor :reading_frame
|
|
11
|
+
attr_accessor :hsp_list # array of Hsp objects
|
|
12
|
+
|
|
13
|
+
attr_accessor :raw_sequence
|
|
14
|
+
attr_accessor :protein_translation # used only for nucleotides
|
|
15
|
+
attr_accessor :nucleotide_rf # used only for nucleotides
|
|
16
|
+
|
|
17
|
+
def initialize
|
|
18
|
+
@hsp_list = []
|
|
19
|
+
@raw_sequence = nil
|
|
20
|
+
@protein_translation = nil
|
|
21
|
+
@nucleotide_rf = nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def protein_translation
|
|
25
|
+
(@type == :protein) ? raw_sequence : @protein_translation
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
##
|
|
29
|
+
# Initializes the corresponding attribute of the sequence
|
|
30
|
+
# with respect to the column name of the tabular blast output
|
|
31
|
+
def init_tabular_attribute(hash)
|
|
32
|
+
@identifier = hash['sseqid'] if hash['sseqid']
|
|
33
|
+
@accession_no = hash['sacc'] if hash['sacc']
|
|
34
|
+
@length_protein = hash['slen'].to_i if hash['slen']
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
require 'csv'
|
|
2
2
|
require 'forwardable'
|
|
3
3
|
|
|
4
|
-
require 'genevalidator/sequences'
|
|
5
|
-
require 'genevalidator/hsp'
|
|
6
4
|
require 'genevalidator/exceptions'
|
|
5
|
+
require 'genevalidator/hsp'
|
|
6
|
+
require 'genevalidator/query'
|
|
7
7
|
|
|
8
8
|
#
|
|
9
9
|
module GeneValidator
|
|
@@ -52,7 +52,6 @@ module GeneValidator
|
|
|
52
52
|
@rows.next
|
|
53
53
|
break unless entry == current_entry
|
|
54
54
|
end
|
|
55
|
-
# rescue StopIteration
|
|
56
55
|
end
|
|
57
56
|
|
|
58
57
|
alias move_to_next_query next
|
|
@@ -79,7 +78,7 @@ module GeneValidator
|
|
|
79
78
|
grouped_hits = hits.group_by { |row| row['sseqid'] }
|
|
80
79
|
|
|
81
80
|
grouped_hits.each do |query_id, row|
|
|
82
|
-
hit_seq =
|
|
81
|
+
hit_seq = Query.new
|
|
83
82
|
hit_seq.init_tabular_attribute(row[0])
|
|
84
83
|
|
|
85
84
|
initialise_all_hsps(query_id, hits, hit_seq)
|
|
@@ -4,7 +4,7 @@ require 'genevalidator/blast'
|
|
|
4
4
|
require 'genevalidator/exceptions'
|
|
5
5
|
require 'genevalidator/output'
|
|
6
6
|
require 'genevalidator/pool'
|
|
7
|
-
require 'genevalidator/
|
|
7
|
+
require 'genevalidator/query'
|
|
8
8
|
require 'genevalidator/validation_length_cluster'
|
|
9
9
|
require 'genevalidator/validation_length_rank'
|
|
10
10
|
require 'genevalidator/validation_blast_reading_frame'
|
|
@@ -17,7 +17,7 @@ require 'genevalidator/validation_alignment'
|
|
|
17
17
|
module GeneValidator
|
|
18
18
|
Pair1 = Struct.new(:x, :y)
|
|
19
19
|
|
|
20
|
-
# Class that initalises separate Validate.new()
|
|
20
|
+
# Class that initalises a separate Validate.new() instance for each query.
|
|
21
21
|
class Validations
|
|
22
22
|
extend Forwardable
|
|
23
23
|
def_delegators GeneValidator, :opt, :config, :query_idx
|
|
@@ -64,7 +64,7 @@ module GeneValidator
|
|
|
64
64
|
query = IO.binread(input_file, start_offset, end_offset)
|
|
65
65
|
parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
|
|
66
66
|
|
|
67
|
-
prediction =
|
|
67
|
+
prediction = Query.new
|
|
68
68
|
prediction.definition = parse_query[0].gsub("\n", '')
|
|
69
69
|
prediction.identifier = prediction.definition.gsub(/ .*/, '')
|
|
70
70
|
prediction.type = seq_type
|
|
@@ -87,7 +87,8 @@ module GeneValidator
|
|
|
87
87
|
# Class that runs the validations (Instatiated for each query)
|
|
88
88
|
class Validate
|
|
89
89
|
extend Forwardable
|
|
90
|
-
def_delegators GeneValidator, :opt, :config, :mutex_array, :overview
|
|
90
|
+
def_delegators GeneValidator, :opt, :config, :mutex_array, :overview,
|
|
91
|
+
:query_idx
|
|
91
92
|
|
|
92
93
|
##
|
|
93
94
|
# Initilizes the object
|
|
@@ -102,8 +103,8 @@ module GeneValidator
|
|
|
102
103
|
@config = config
|
|
103
104
|
@mutex_array = mutex_array
|
|
104
105
|
@run_output = nil
|
|
105
|
-
|
|
106
106
|
@overview = overview
|
|
107
|
+
@query_idx = query_idx
|
|
107
108
|
end
|
|
108
109
|
|
|
109
110
|
##
|
|
@@ -200,14 +201,18 @@ module GeneValidator
|
|
|
200
201
|
def compute_scores
|
|
201
202
|
validations = @run_output.validations
|
|
202
203
|
scores = {}
|
|
203
|
-
scores[:successes] = validations.
|
|
204
|
-
scores[:fails] = validations.
|
|
204
|
+
scores[:successes] = validations.count { |v| v.result == v.expected }
|
|
205
|
+
scores[:fails] = validations.count { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }
|
|
205
206
|
scores = length_validation_scores(validations, scores)
|
|
206
207
|
|
|
207
208
|
@run_output.successes = scores[:successes]
|
|
208
209
|
@run_output.fails = scores[:fails]
|
|
209
|
-
total_query
|
|
210
|
-
|
|
210
|
+
total_query = scores[:successes].to_i + scores[:fails]
|
|
211
|
+
if total_query == 0
|
|
212
|
+
@run_output.overall_score = 0
|
|
213
|
+
else
|
|
214
|
+
@run_output.overall_score = (scores[:successes] * 90 / total_query).round
|
|
215
|
+
end
|
|
211
216
|
end
|
|
212
217
|
|
|
213
218
|
# Since there are two length validations, it is necessary to adjust the
|
|
@@ -244,8 +249,8 @@ module GeneValidator
|
|
|
244
249
|
errors = []
|
|
245
250
|
vals.each do |v|
|
|
246
251
|
unless v.errors.nil?
|
|
247
|
-
no_mafft += v.errors.
|
|
248
|
-
no_internet += v.errors.
|
|
252
|
+
no_mafft += v.errors.count { |e| e == NoMafftInstallationError }
|
|
253
|
+
no_internet += v.errors.count { |e| e == NoInternetError }
|
|
249
254
|
end
|
|
250
255
|
errors.push(v.short_header) if v.validation == :error
|
|
251
256
|
end
|
|
@@ -2,6 +2,7 @@ require 'bio'
|
|
|
2
2
|
require 'forwardable'
|
|
3
3
|
|
|
4
4
|
require 'genevalidator/exceptions'
|
|
5
|
+
require 'genevalidator/get_raw_sequences'
|
|
5
6
|
require 'genevalidator/validation_report'
|
|
6
7
|
require 'genevalidator/validation_test'
|
|
7
8
|
|
|
@@ -126,8 +127,8 @@ module GeneValidator
|
|
|
126
127
|
n = 50 if n > 50
|
|
127
128
|
|
|
128
129
|
fail NotEnoughHitsError unless hits.length >= n
|
|
129
|
-
fail
|
|
130
|
-
|
|
130
|
+
fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
|
|
131
|
+
|
|
131
132
|
start = Time.new
|
|
132
133
|
# get the first n hits
|
|
133
134
|
less_hits = @hits[0..[n - 1, @hits.length].min]
|
|
@@ -135,19 +136,10 @@ module GeneValidator
|
|
|
135
136
|
|
|
136
137
|
# get raw sequences for less_hits
|
|
137
138
|
less_hits.map do |hit|
|
|
138
|
-
# get gene by accession number
|
|
139
139
|
next unless hit.raw_sequence.nil?
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
hit.identifier, @raw_seq_file_load)
|
|
143
|
-
|
|
144
|
-
if hit.raw_sequence.nil? || hit.raw_sequence.empty?
|
|
145
|
-
seq_type = (hit.type == :protein) ? 'protein' : 'nucleotide'
|
|
146
|
-
hit.get_sequence_by_accession_no(hit.accession_no, seq_type, @db)
|
|
147
|
-
end
|
|
148
|
-
|
|
140
|
+
hit.raw_sequence = FetchRawSequences.run(hit.identifier,
|
|
141
|
+
hit.accession_no)
|
|
149
142
|
useless_hits.push(hit) if hit.raw_sequence.nil?
|
|
150
|
-
useless_hits.push(hit) if hit.raw_sequence.empty?
|
|
151
143
|
end
|
|
152
144
|
|
|
153
145
|
useless_hits.each { |hit| less_hits.delete(hit) }
|
|
@@ -210,7 +202,7 @@ module GeneValidator
|
|
|
210
202
|
:error, @short_header,
|
|
211
203
|
@header, @description)
|
|
212
204
|
@validation_report.errors.push 'Multiple reading frames Error'
|
|
213
|
-
rescue
|
|
205
|
+
rescue
|
|
214
206
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
215
207
|
@short_header, @header,
|
|
216
208
|
@description)
|
|
@@ -230,7 +222,7 @@ module GeneValidator
|
|
|
230
222
|
# Array of +String+s, corresponding to the multiple aligned sequences
|
|
231
223
|
# the prediction is the last sequence in the vector
|
|
232
224
|
def multiple_align_mafft(prediction = @prediction, hits = @hits)
|
|
233
|
-
fail
|
|
225
|
+
fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
|
|
234
226
|
|
|
235
227
|
options = ['--maxiterate', '1000', '--localpair', '--anysymbol',
|
|
236
228
|
'--quiet', '--thread', "#{@num_threads}"]
|
|
@@ -247,7 +239,7 @@ module GeneValidator
|
|
|
247
239
|
end
|
|
248
240
|
|
|
249
241
|
@multiple_alignment
|
|
250
|
-
rescue
|
|
242
|
+
rescue
|
|
251
243
|
raise NoMafftInstallationError
|
|
252
244
|
end
|
|
253
245
|
|
|
@@ -319,7 +311,7 @@ module GeneValidator
|
|
|
319
311
|
return 1 if no_conserved_residues == 0
|
|
320
312
|
|
|
321
313
|
# no of conserved residues from the hita that appear in the prediction
|
|
322
|
-
no_conserved_pred = consensus.split(//).each_index.
|
|
314
|
+
no_conserved_pred = consensus.split(//).each_index.count { |j| consensus[j] != '-' && consensus[j] != '?' && consensus[j] == prediction_raw[j] }
|
|
323
315
|
|
|
324
316
|
no_conserved_pred / (no_conserved_residues + 0.0)
|
|
325
317
|
end
|
|
@@ -333,7 +325,8 @@ module GeneValidator
|
|
|
333
325
|
# +threshold+: percentage of genes that are considered in statistical model
|
|
334
326
|
# Output:
|
|
335
327
|
# +String+ representing the statistical model
|
|
336
|
-
# +Array+ with the maximum frequeny of the majoritary residue for each
|
|
328
|
+
# +Array+ with the maximum frequeny of the majoritary residue for each
|
|
329
|
+
# position
|
|
337
330
|
def get_sm_pssm(ma = @multiple_alignment, threshold = 0.7)
|
|
338
331
|
sm = ''
|
|
339
332
|
freq = []
|
|
@@ -442,19 +435,20 @@ module GeneValidator
|
|
|
442
435
|
# plot consensus
|
|
443
436
|
consensus_all_ranges.map { |range| { 'y' => 0, 'start' => range.first, 'stop' => range.last, 'color' => 'yellow', 'height' => -1 } }.flatten
|
|
444
437
|
|
|
445
|
-
|
|
446
|
-
(1..ma.length - 1).each { |i|
|
|
438
|
+
y_axis_values = 'Prediction'
|
|
439
|
+
(1..ma.length - 1).each { |i| y_axis_values << ", hit #{i}" }
|
|
447
440
|
|
|
448
|
-
|
|
441
|
+
y_axis_values << ', Statistical Model'
|
|
449
442
|
|
|
450
443
|
Plot.new(data,
|
|
451
444
|
:align,
|
|
452
|
-
'Missing/Extra sequences Validation: Multiple Align. &
|
|
445
|
+
'Missing/Extra sequences Validation: Multiple Align. &' \
|
|
446
|
+
'Statistical model of hits',
|
|
453
447
|
'Conserved Region, Yellow',
|
|
454
448
|
'Offset in the Alignment',
|
|
455
449
|
'',
|
|
456
450
|
ma.length + 1,
|
|
457
|
-
|
|
451
|
+
y_axis_values)
|
|
458
452
|
end
|
|
459
453
|
end
|
|
460
454
|
end
|
|
@@ -101,7 +101,7 @@ module GeneValidator
|
|
|
101
101
|
end
|
|
102
102
|
|
|
103
103
|
fail NotEnoughHitsError unless hits.length >= 5
|
|
104
|
-
fail
|
|
104
|
+
fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
|
|
105
105
|
|
|
106
106
|
start = Time.now
|
|
107
107
|
|
|
@@ -110,7 +110,7 @@ module GeneValidator
|
|
|
110
110
|
|
|
111
111
|
# get the main reading frame
|
|
112
112
|
main_rf = frames.map { |_k, v| v }.max
|
|
113
|
-
@prediction.nucleotide_rf = frames.
|
|
113
|
+
@prediction.nucleotide_rf = frames.find { |_k, v| v == main_rf }.first
|
|
114
114
|
|
|
115
115
|
@validation_report = BlastRFValidationOutput.new(@short_header, @header,
|
|
116
116
|
@description, frames)
|
|
@@ -121,7 +121,7 @@ module GeneValidator
|
|
|
121
121
|
@validation_report = ValidationReport.new('Not enough evidence',
|
|
122
122
|
:warning, @short_header,
|
|
123
123
|
@header, @description)
|
|
124
|
-
rescue
|
|
124
|
+
rescue
|
|
125
125
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
126
126
|
@short_header, @header,
|
|
127
127
|
@description)
|
|
@@ -4,6 +4,7 @@ require 'statsample'
|
|
|
4
4
|
|
|
5
5
|
require 'genevalidator/exceptions'
|
|
6
6
|
require 'genevalidator/ext/array'
|
|
7
|
+
require 'genevalidator/get_raw_sequences'
|
|
7
8
|
require 'genevalidator/validation_report'
|
|
8
9
|
require 'genevalidator/validation_test'
|
|
9
10
|
|
|
@@ -113,35 +114,24 @@ module GeneValidator
|
|
|
113
114
|
# +DuplicationValidationOutput+ object
|
|
114
115
|
def run(n = 10)
|
|
115
116
|
fail NotEnoughHitsError unless hits.length >= 5
|
|
116
|
-
fail
|
|
117
|
-
|
|
118
|
-
hits[0].is_a?(Sequence)
|
|
117
|
+
fail unless prediction.is_a?(Query) && !prediction.raw_sequence.nil? &&
|
|
118
|
+
hits[0].is_a?(Query)
|
|
119
119
|
|
|
120
120
|
start = Time.new
|
|
121
121
|
# get the first n hits
|
|
122
122
|
less_hits = @hits[0..[n - 1, @hits.length].min]
|
|
123
123
|
useless_hits = []
|
|
124
|
-
|
|
125
124
|
# get raw sequences for less_hits
|
|
126
125
|
less_hits.map do |hit|
|
|
127
|
-
# get gene by accession number
|
|
128
126
|
next unless hit.raw_sequence.nil?
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
hit.identifier, @raw_seq_file_load)
|
|
132
|
-
|
|
133
|
-
if hit.raw_sequence.nil? || hit.raw_sequence.empty?
|
|
134
|
-
seq_type = (hit.type == :protein) ? 'protein' : 'nucleotide'
|
|
135
|
-
hit.get_sequence_by_accession_no(hit.accession_no, seq_type, @db)
|
|
136
|
-
end
|
|
137
|
-
|
|
127
|
+
hit.raw_sequence = FetchRawSequences.run(hit.identifier,
|
|
128
|
+
hit.accession_no)
|
|
138
129
|
useless_hits.push(hit) if hit.raw_sequence.nil?
|
|
139
|
-
useless_hits.push(hit) if hit.raw_sequence.empty?
|
|
140
130
|
end
|
|
141
131
|
|
|
142
132
|
useless_hits.each { |hit| less_hits.delete(hit) }
|
|
143
133
|
|
|
144
|
-
fail NoInternetError if less_hits.length
|
|
134
|
+
fail NoInternetError if less_hits.length == 0
|
|
145
135
|
|
|
146
136
|
averages = []
|
|
147
137
|
|
|
@@ -185,7 +175,7 @@ module GeneValidator
|
|
|
185
175
|
raw_align.each { |seq| align.push(seq.to_s) }
|
|
186
176
|
hit_alignment = align[0]
|
|
187
177
|
query_alignment = align[1]
|
|
188
|
-
rescue
|
|
178
|
+
rescue
|
|
189
179
|
raise NoMafftInstallationError
|
|
190
180
|
end
|
|
191
181
|
end
|
|
@@ -250,7 +240,7 @@ module GeneValidator
|
|
|
250
240
|
@short_header, @header,
|
|
251
241
|
@description)
|
|
252
242
|
@validation_report.errors.push NoInternetError
|
|
253
|
-
rescue
|
|
243
|
+
rescue
|
|
254
244
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
255
245
|
@short_header, @header,
|
|
256
246
|
@description)
|
|
@@ -114,7 +114,7 @@ module GeneValidator
|
|
|
114
114
|
# +GeneMergeValidationOutput+ object
|
|
115
115
|
def run
|
|
116
116
|
fail NotEnoughHitsError unless hits.length >= 5
|
|
117
|
-
fail
|
|
117
|
+
fail unless prediction.is_a?(Query) && hits[0].is_a?(Query)
|
|
118
118
|
|
|
119
119
|
start = Time.now
|
|
120
120
|
|
|
@@ -135,7 +135,7 @@ module GeneValidator
|
|
|
135
135
|
end
|
|
136
136
|
end
|
|
137
137
|
|
|
138
|
-
line_slope = slope(xx, yy, (1..hits.length).map{ |x| 1 / (x + 0.0) })
|
|
138
|
+
line_slope = slope(xx, yy, (1..hits.length).map { |x| 1 / (x + 0.0) })
|
|
139
139
|
## YW - what is this weighting?
|
|
140
140
|
|
|
141
141
|
unimodality = false
|
|
@@ -167,7 +167,7 @@ module GeneValidator
|
|
|
167
167
|
@validation_report = ValidationReport.new('Not enough evidence', :warning,
|
|
168
168
|
@short_header, @header,
|
|
169
169
|
@description)
|
|
170
|
-
rescue
|
|
170
|
+
rescue
|
|
171
171
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
172
172
|
@short_header, @header,
|
|
173
173
|
@description)
|
|
@@ -190,18 +190,19 @@ module GeneValidator
|
|
|
190
190
|
{ 'y' => i,
|
|
191
191
|
'start' => hit.hsp_list.map(&:match_query_from).min,
|
|
192
192
|
'stop' => hit.hsp_list.map(&:match_query_to).max,
|
|
193
|
-
'color' =>'black',
|
|
194
|
-
'dotted' =>'true'}}.flatten +
|
|
193
|
+
'color' => 'black',
|
|
194
|
+
'dotted' => 'true' } }.flatten +
|
|
195
195
|
hits_less.each_with_index.map { |hit, i|
|
|
196
196
|
hit.hsp_list.map { |hsp|
|
|
197
197
|
{ 'y' => i,
|
|
198
198
|
'start' => hsp.match_query_from,
|
|
199
199
|
'stop' => hsp.match_query_to,
|
|
200
|
-
'color' => 'orange'} } }.flatten
|
|
200
|
+
'color' => 'orange' } } }.flatten
|
|
201
201
|
|
|
202
202
|
Plot.new(data,
|
|
203
203
|
:lines,
|
|
204
|
-
'Gene Merge Validation: Query coord covered by blast hit
|
|
204
|
+
'Gene Merge Validation: Query coord covered by blast hit' \
|
|
205
|
+
' (1 line/hit)',
|
|
205
206
|
'',
|
|
206
207
|
'Offset in Prediction',
|
|
207
208
|
'Hit Number',
|
|
@@ -224,11 +225,12 @@ module GeneValidator
|
|
|
224
225
|
|
|
225
226
|
data = hits.map { |hit| { 'x' => hit.hsp_list.map(&:match_query_from).min,
|
|
226
227
|
'y' => hit.hsp_list.map(&:match_query_to).max,
|
|
227
|
-
'color' => 'red'}}
|
|
228
|
+
'color' => 'red' } }
|
|
228
229
|
|
|
229
230
|
Plot.new(data,
|
|
230
231
|
:scatter,
|
|
231
|
-
'Gene Merge Validation: Start/end of matching hit coord. on
|
|
232
|
+
'Gene Merge Validation: Start/end of matching hit coord. on' \
|
|
233
|
+
' query (1 point/hit)',
|
|
232
234
|
'',
|
|
233
235
|
'Start Offset (most left hsp)',
|
|
234
236
|
'End Offset (most right hsp)',
|