genevalidator 1.6.12 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +30 -1
- data/.ruby-version +1 -0
- data/.travis.yml +13 -12
- data/Gemfile +4 -1
- data/Gemfile.lock +135 -0
- data/README.md +104 -122
- data/Rakefile +377 -5
- data/aux/gv_results.slim +155 -0
- data/aux/html_files/css/gv.compiled.min.css +8 -0
- data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
- data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
- data/aux/{files → html_files}/css/src/style.css +0 -0
- data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
- data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
- data/aux/{files → html_files}/img/gene.png +0 -0
- data/aux/html_files/js/gv.compiled.min.js +1 -0
- data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
- data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
- data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
- data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
- data/aux/{files → html_files}/js/src/plots.js +1 -1
- data/aux/{files → html_files}/js/src/script.js +0 -0
- data/aux/{files → html_files}/json/.gitkeep +0 -0
- data/bin/genevalidator +393 -56
- data/exemplar_data/README.md +60 -0
- data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
- data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
- data/genevalidator.gemspec +35 -20
- data/install.sh +92 -0
- data/lib/genevalidator.rb +171 -56
- data/lib/genevalidator/arg_validation.rb +26 -55
- data/lib/genevalidator/blast.rb +44 -99
- data/lib/genevalidator/clusterization.rb +18 -22
- data/lib/genevalidator/exceptions.rb +17 -17
- data/lib/genevalidator/ext/array.rb +21 -4
- data/lib/genevalidator/get_raw_sequences.rb +32 -31
- data/lib/genevalidator/hsp.rb +31 -2
- data/lib/genevalidator/json_to_gv_results.rb +38 -122
- data/lib/genevalidator/output.rb +158 -172
- data/lib/genevalidator/output_files.rb +134 -0
- data/lib/genevalidator/pool.rb +2 -5
- data/lib/genevalidator/query.rb +1 -1
- data/lib/genevalidator/tabular_parser.rb +8 -29
- data/lib/genevalidator/validation.rb +48 -90
- data/lib/genevalidator/validation_alignment.rb +64 -75
- data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
- data/lib/genevalidator/validation_duplication.rb +85 -84
- data/lib/genevalidator/validation_gene_merge.rb +46 -35
- data/lib/genevalidator/validation_length_cluster.rb +18 -15
- data/lib/genevalidator/validation_length_rank.rb +19 -15
- data/lib/genevalidator/validation_maker_qi.rb +13 -12
- data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
- data/lib/genevalidator/validation_report.rb +1 -1
- data/lib/genevalidator/validation_test.rb +1 -1
- data/lib/genevalidator/version.rb +1 -1
- data/test/overall.rb +1 -1
- data/test/test_all_validations.rb +36 -24
- data/test/test_blast.rb +39 -24
- data/test/test_clusterization_2d.rb +4 -4
- data/test/test_helper.rb +2 -2
- data/test/test_query.rb +16 -20
- data/test/test_validation_open_reading_frame.rb +122 -122
- data/test/test_validations.rb +12 -10
- metadata +94 -79
- data/aux/files/css/genevalidator.compiled.min.css +0 -16
- data/aux/files/js/genevalidator.compiled.min.js +0 -28
- data/aux/json_footer.erb +0 -8
- data/aux/json_header.erb +0 -19
- data/aux/json_query.erb +0 -15
- data/aux/template_footer.erb +0 -8
- data/aux/template_header.erb +0 -19
- data/aux/template_query.erb +0 -14
- data/data/README.md +0 -57
- data/data/mrna_data.fasta.blast_tabular +0 -3567
- data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
- data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
- data/data/mrna_data.fasta.blast_xml +0 -39800
- data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
- data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
- data/data/mrna_data.fasta.json +0 -1
- data/data/protein_data.fasta.blast_tabular +0 -3278
- data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
- data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
- data/data/protein_data.fasta.blast_xml +0 -26228
- data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
- data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
- data/data/protein_data.fasta.json +0 -1
|
@@ -15,7 +15,9 @@ module GeneValidator
|
|
|
15
15
|
|
|
16
16
|
def initialize(short_header, header, description, frames,
|
|
17
17
|
expected = :yes)
|
|
18
|
-
@short_header
|
|
18
|
+
@short_header = short_header
|
|
19
|
+
@header = header
|
|
20
|
+
@description = description
|
|
19
21
|
@frames = frames
|
|
20
22
|
@expected = expected
|
|
21
23
|
@result = validation
|
|
@@ -70,7 +72,7 @@ module GeneValidator
|
|
|
70
72
|
count_p += 1 if x > 0
|
|
71
73
|
count_n += 1 if x < 0
|
|
72
74
|
end
|
|
73
|
-
|
|
75
|
+
count_p > 1 || count_n > 1 ? :no : :yes
|
|
74
76
|
end
|
|
75
77
|
end
|
|
76
78
|
|
|
@@ -78,6 +80,9 @@ module GeneValidator
|
|
|
78
80
|
# This class contains the methods necessary for
|
|
79
81
|
# reading frame validation based on BLAST output
|
|
80
82
|
class BlastReadingFrameValidation < ValidationTest
|
|
83
|
+
extend Forwardable
|
|
84
|
+
def_delegators GeneValidator, :opt
|
|
85
|
+
|
|
81
86
|
def initialize(type, prediction, hits = nil)
|
|
82
87
|
super
|
|
83
88
|
@short_header = 'ReadingFrame'
|
|
@@ -100,8 +105,8 @@ module GeneValidator
|
|
|
100
105
|
return @validation_report
|
|
101
106
|
end
|
|
102
107
|
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
raise NotEnoughHitsError if hits.length < opt[:min_blast_hits]
|
|
109
|
+
raise unless prediction.is_a?(Query) && hits[0].is_a?(Query)
|
|
105
110
|
|
|
106
111
|
start = Time.now
|
|
107
112
|
|
|
@@ -116,12 +121,11 @@ module GeneValidator
|
|
|
116
121
|
@description, frames)
|
|
117
122
|
@validation_report.run_time = Time.now - start
|
|
118
123
|
@validation_report
|
|
119
|
-
|
|
120
124
|
rescue NotEnoughHitsError
|
|
121
|
-
@validation_report =
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
rescue
|
|
125
|
+
@validation_report = ValidationReport.new('Not enough evidence',
|
|
126
|
+
:warning, @short_header,
|
|
127
|
+
@header, @description)
|
|
128
|
+
rescue StandardError
|
|
125
129
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
126
130
|
@short_header, @header,
|
|
127
131
|
@description)
|
|
@@ -19,7 +19,9 @@ module GeneValidator
|
|
|
19
19
|
|
|
20
20
|
def initialize(short_header, header, description, pvalue, averages,
|
|
21
21
|
threshold = 0.05, expected = :yes)
|
|
22
|
-
@short_header
|
|
22
|
+
@short_header = short_header
|
|
23
|
+
@header = header
|
|
24
|
+
@description = description
|
|
23
25
|
@pvalue = pvalue
|
|
24
26
|
@threshold = threshold
|
|
25
27
|
@result = validation
|
|
@@ -36,7 +38,7 @@ module GeneValidator
|
|
|
36
38
|
|
|
37
39
|
def explain
|
|
38
40
|
"The Wilcoxon test produced a p-value of #{prettify_evalue(@pvalue)}" \
|
|
39
|
-
"#{
|
|
41
|
+
"#{@result == :no ? " (average = #{@average.round(2)})." : '.'}"
|
|
40
42
|
end
|
|
41
43
|
|
|
42
44
|
def conclude
|
|
@@ -50,15 +52,15 @@ module GeneValidator
|
|
|
50
52
|
end
|
|
51
53
|
|
|
52
54
|
def print
|
|
53
|
-
|
|
55
|
+
@pvalue.round(2).to_s
|
|
54
56
|
end
|
|
55
57
|
|
|
56
58
|
def validation
|
|
57
|
-
|
|
59
|
+
@pvalue > @threshold ? :yes : :no
|
|
58
60
|
end
|
|
59
61
|
|
|
60
62
|
def color
|
|
61
|
-
|
|
63
|
+
validation == :yes ? 'success' : 'danger'
|
|
62
64
|
end
|
|
63
65
|
|
|
64
66
|
private
|
|
@@ -97,41 +99,33 @@ module GeneValidator
|
|
|
97
99
|
@index_file_name = config[:raw_seq_file_index]
|
|
98
100
|
@raw_seq_file_load = config[:raw_seq_file_load]
|
|
99
101
|
@db = opt[:db]
|
|
100
|
-
@num_threads = opt[:
|
|
102
|
+
@num_threads = opt[:mafft_threads]
|
|
101
103
|
@type = config[:type]
|
|
102
104
|
end
|
|
103
105
|
|
|
104
|
-
def in_range?(ranges, idx)
|
|
105
|
-
ranges.each do |range|
|
|
106
|
-
return (range.member?(idx)) ? true : false
|
|
107
|
-
end
|
|
108
|
-
false
|
|
109
|
-
end
|
|
110
|
-
|
|
111
106
|
##
|
|
112
107
|
# Check duplication in the first n hits
|
|
113
108
|
# Output:
|
|
114
109
|
# +DuplicationValidationOutput+ object
|
|
115
110
|
def run(n = 10)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
111
|
+
raise NotEnoughHitsError if hits.length < opt[:min_blast_hits]
|
|
112
|
+
raise unless prediction.is_a?(Query) && !prediction.raw_sequence.nil? &&
|
|
113
|
+
hits[0].is_a?(Query)
|
|
119
114
|
|
|
120
115
|
start = Time.new
|
|
121
116
|
# get the first n hits
|
|
122
|
-
|
|
123
|
-
|
|
117
|
+
n_hits = [n - 1, @hits.length].min
|
|
118
|
+
less_hits = @hits[0..n_hits]
|
|
124
119
|
# get raw sequences for less_hits
|
|
125
|
-
less_hits.
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
120
|
+
less_hits.delete_if do |hit|
|
|
121
|
+
if hit.raw_sequence.nil?
|
|
122
|
+
hit.raw_sequence = FetchRawSequences.run(hit.identifier,
|
|
123
|
+
hit.accession_no)
|
|
124
|
+
end
|
|
125
|
+
hit.raw_sequence.nil? ? true : false
|
|
130
126
|
end
|
|
131
127
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
fail NoInternetError if less_hits.length == 0
|
|
128
|
+
raise NoInternetError if less_hits.length.zero?
|
|
135
129
|
|
|
136
130
|
averages = []
|
|
137
131
|
|
|
@@ -146,62 +140,17 @@ module GeneValidator
|
|
|
146
140
|
hit_alignment = hsp.hit_alignment
|
|
147
141
|
query_alignment = hsp.query_alignment
|
|
148
142
|
else
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
1..hsp.match_query_to - 1]
|
|
153
|
-
|
|
154
|
-
# in case of nucleotide prediction sequence translate into protein
|
|
155
|
-
# use translate with reading frame 1 because
|
|
156
|
-
# to/from coordinates of the hsp already correspond to the
|
|
157
|
-
# reading frame in which the prediction was read to match this hsp
|
|
158
|
-
if @type == :nucleotide
|
|
159
|
-
s = Bio::Sequence::NA.new(query_local)
|
|
160
|
-
query_local = s.translate
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
# local alignment for hit and query
|
|
164
|
-
seqs = [hit_local, query_local]
|
|
165
|
-
|
|
166
|
-
begin
|
|
167
|
-
options = ['--maxiterate', '1000', '--localpair', '--anysymbol',
|
|
168
|
-
'--quiet', '--thread', "#{@num_threads}"]
|
|
169
|
-
mafft = Bio::MAFFT.new('mafft', options)
|
|
170
|
-
|
|
171
|
-
report = mafft.query_align(seqs)
|
|
172
|
-
raw_align = report.alignment
|
|
173
|
-
align = []
|
|
174
|
-
|
|
175
|
-
raw_align.each { |seq| align.push(seq.to_s) }
|
|
176
|
-
hit_alignment = align[0]
|
|
177
|
-
query_alignment = align[1]
|
|
178
|
-
rescue
|
|
179
|
-
raise NoMafftInstallationError
|
|
180
|
-
end
|
|
143
|
+
align = find_local_alignment(hit, prediction, hsp)
|
|
144
|
+
hit_alignment = align[0]
|
|
145
|
+
query_alignment = align[1]
|
|
181
146
|
end
|
|
182
147
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
# for each hsp of the curent hit
|
|
186
|
-
# iterate through the alignment and count the matching residues
|
|
187
|
-
[*(0..hit_alignment.length - 1)].each do |i|
|
|
188
|
-
residue_hit = hit_alignment[i]
|
|
189
|
-
residue_query = query_alignment[i]
|
|
190
|
-
next if residue_hit == ' ' || residue_hit == '+' ||
|
|
191
|
-
residue_hit == '-' || residue_hit != residue_query
|
|
192
|
-
# indexing in blast starts from 1
|
|
193
|
-
idx_hit = i + (hsp.hit_from - 1) -
|
|
194
|
-
hit_alignment[0..i].scan(/-/).length
|
|
195
|
-
idx_query = i + (hsp.match_query_from - 1) -
|
|
196
|
-
query_alignment[0..i].scan(/-/).length
|
|
197
|
-
unless in_range?(ranges_prediction, idx_query)
|
|
198
|
-
coverage[idx_hit] += 1
|
|
199
|
-
end
|
|
200
|
-
end
|
|
148
|
+
coverage = check_multiple_coverage(hit_alignment, query_alignment,
|
|
149
|
+
hsp, coverage, ranges_prediction)
|
|
201
150
|
|
|
202
|
-
ranges_prediction
|
|
151
|
+
ranges_prediction << (hsp.match_query_from..hsp.match_query_to)
|
|
203
152
|
end
|
|
204
|
-
overlap = coverage.reject
|
|
153
|
+
overlap = coverage.reject(&:zero?)
|
|
205
154
|
if overlap != []
|
|
206
155
|
averages.push((overlap.inject(:+) / (overlap.length + 0.0)).round(2))
|
|
207
156
|
end
|
|
@@ -225,7 +174,6 @@ module GeneValidator
|
|
|
225
174
|
averages)
|
|
226
175
|
@run_time = Time.now - start
|
|
227
176
|
@validation_report
|
|
228
|
-
|
|
229
177
|
rescue NotEnoughHitsError
|
|
230
178
|
@validation_report = ValidationReport.new('Not enough evidence', :warning,
|
|
231
179
|
@short_header, @header,
|
|
@@ -240,22 +188,75 @@ module GeneValidator
|
|
|
240
188
|
@short_header, @header,
|
|
241
189
|
@description)
|
|
242
190
|
@validation_report.errors.push NoInternetError
|
|
243
|
-
rescue
|
|
191
|
+
rescue StandardError
|
|
244
192
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
245
193
|
@short_header, @header,
|
|
246
194
|
@description)
|
|
247
195
|
@validation_report.errors.push 'Unexpected Error'
|
|
248
196
|
end
|
|
249
197
|
|
|
198
|
+
# Only run if the BLAST output does not contain hit alignmment
|
|
199
|
+
def find_local_alignment(hit, prediction, hsp)
|
|
200
|
+
# indexing in blast starts from 1
|
|
201
|
+
hit_local = hit.raw_sequence[hsp.hit_from - 1..hsp.hit_to - 1]
|
|
202
|
+
query_local = prediction.raw_sequence[hsp.match_query_from -
|
|
203
|
+
1..hsp.match_query_to - 1]
|
|
204
|
+
|
|
205
|
+
# in case of nucleotide prediction sequence translate into protein
|
|
206
|
+
# use translate with reading frame 1 because
|
|
207
|
+
# to/from coordinates of the hsp already correspond to the
|
|
208
|
+
# reading frame in which the prediction was read to match this hsp
|
|
209
|
+
if @type == :nucleotide
|
|
210
|
+
s = Bio::Sequence::NA.new(query_local)
|
|
211
|
+
query_local = s.translate
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
opt = ['--maxiterate', '1000', '--localpair', '--anysymbol', '--quiet',
|
|
215
|
+
'--thread', @num_threads.to_s]
|
|
216
|
+
mafft = Bio::MAFFT.new('mafft', opt)
|
|
217
|
+
|
|
218
|
+
# local alignment for hit and query
|
|
219
|
+
seqs = [hit_local, query_local]
|
|
220
|
+
report = mafft.query_align(seqs)
|
|
221
|
+
report.alignment.map(&:to_s)
|
|
222
|
+
rescue StandardError
|
|
223
|
+
raise NoMafftInstallationError
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def check_multiple_coverage(hit_alignment, query_alignment, hsp, coverage,
|
|
227
|
+
ranges_prediction)
|
|
228
|
+
# for each hsp of the curent hit
|
|
229
|
+
# iterate through the alignment and count the matching residues
|
|
230
|
+
[*(0..hit_alignment.length - 1)].each do |i|
|
|
231
|
+
residue_hit = hit_alignment[i]
|
|
232
|
+
residue_query = query_alignment[i]
|
|
233
|
+
next if [' ', '+', '-'].include?(residue_hit)
|
|
234
|
+
next if residue_hit != residue_query
|
|
235
|
+
# indexing in blast starts from 1
|
|
236
|
+
idx_hit = i + (hsp.hit_from - 1) -
|
|
237
|
+
hit_alignment[0..i].scan(/-/).length
|
|
238
|
+
idx_query = i + (hsp.match_query_from - 1) -
|
|
239
|
+
query_alignment[0..i].scan(/-/).length
|
|
240
|
+
coverage[idx_hit] += 1 unless in_range?(ranges_prediction, idx_query)
|
|
241
|
+
end
|
|
242
|
+
coverage
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def in_range?(ranges, idx)
|
|
246
|
+
ranges.each { |range| return true if range.member?(idx) }
|
|
247
|
+
false
|
|
248
|
+
end
|
|
249
|
+
|
|
250
250
|
##
|
|
251
251
|
# wilcox test implementation from statsample ruby gem
|
|
252
252
|
# many thanks to Claudio for helping us with the implementation!
|
|
253
253
|
def wilcox_test(averages)
|
|
254
|
-
wilcox = Statsample::Test.wilcoxon_signed_rank(
|
|
255
|
-
|
|
256
|
-
|
|
254
|
+
wilcox = Statsample::Test.wilcoxon_signed_rank(
|
|
255
|
+
Daru::Vector.new(averages),
|
|
256
|
+
Daru::Vector.new(Array.new(averages.length, 1))
|
|
257
|
+
)
|
|
257
258
|
|
|
258
|
-
|
|
259
|
+
averages.length < 15 ? wilcox.probability_exact : wilcox.probability_z
|
|
259
260
|
end
|
|
260
261
|
end
|
|
261
262
|
end
|
|
@@ -22,7 +22,9 @@ module GeneValidator
|
|
|
22
22
|
|
|
23
23
|
def initialize(short_header, header, description, slope, unimodality,
|
|
24
24
|
expected = :no)
|
|
25
|
-
@short_header
|
|
25
|
+
@short_header = short_header
|
|
26
|
+
@header = header
|
|
27
|
+
@description = description
|
|
26
28
|
@slope = slope.round(1)
|
|
27
29
|
@slope = @slope.abs if @slope == -0.0
|
|
28
30
|
@unimodality = unimodality
|
|
@@ -56,30 +58,30 @@ module GeneValidator
|
|
|
56
58
|
if @unimodality
|
|
57
59
|
'This suggest that the query sequence represents a single gene.'
|
|
58
60
|
else
|
|
59
|
-
diff =
|
|
61
|
+
diff = @result == :yes ? ' within' : ' outside'
|
|
60
62
|
t = "This slope is #{diff} our empirically calculated thresholds" \
|
|
61
63
|
' (0.4 and 1.2).'
|
|
62
|
-
if @result == :yes
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
64
|
+
t << if @result == :yes
|
|
65
|
+
' This suggests the query contains sequence from two or more' \
|
|
66
|
+
' different genes.'
|
|
67
|
+
else
|
|
68
|
+
' There is no evidence that the query contains sequence from' \
|
|
69
|
+
' multiple genes.'
|
|
70
|
+
end
|
|
69
71
|
t
|
|
70
72
|
end
|
|
71
73
|
end
|
|
72
74
|
|
|
73
75
|
def print
|
|
74
|
-
|
|
76
|
+
@slope.nan? ? 'Inf' : @slope.to_s
|
|
75
77
|
end
|
|
76
78
|
|
|
77
79
|
def validation
|
|
78
|
-
|
|
80
|
+
@slope > threshold_down && @slope < threshold_up ? :yes : :no
|
|
79
81
|
end
|
|
80
82
|
|
|
81
83
|
def color
|
|
82
|
-
|
|
84
|
+
validation == :no ? 'success' : 'danger'
|
|
83
85
|
end
|
|
84
86
|
end
|
|
85
87
|
|
|
@@ -88,6 +90,9 @@ module GeneValidator
|
|
|
88
90
|
# checking whether there is evidence that the
|
|
89
91
|
# prediction is a merge of multiple genes
|
|
90
92
|
class GeneMergeValidation < ValidationTest
|
|
93
|
+
extend Forwardable
|
|
94
|
+
def_delegators GeneValidator, :opt
|
|
95
|
+
|
|
91
96
|
attr_reader :prediction
|
|
92
97
|
attr_reader :hits
|
|
93
98
|
|
|
@@ -113,18 +118,20 @@ module GeneValidator
|
|
|
113
118
|
# Output:
|
|
114
119
|
# +GeneMergeValidationOutput+ object
|
|
115
120
|
def run
|
|
116
|
-
|
|
117
|
-
|
|
121
|
+
raise NotEnoughHitsError if hits.length < opt[:min_blast_hits]
|
|
122
|
+
raise unless prediction.is_a?(Query) && hits[0].is_a?(Query)
|
|
118
123
|
|
|
119
124
|
start = Time.now
|
|
120
125
|
|
|
121
|
-
pairs = hits.map
|
|
122
|
-
|
|
126
|
+
pairs = hits.map do |hit|
|
|
127
|
+
Pair.new(hit.hsp_list.map(&:match_query_from).min,
|
|
128
|
+
hit.hsp_list.map(&:match_query_to).max)
|
|
129
|
+
end
|
|
123
130
|
xx_0 = pairs.map(&:x)
|
|
124
131
|
yy_0 = pairs.map(&:y)
|
|
125
132
|
|
|
126
133
|
# minimum start shoud be at 'boundary' residues
|
|
127
|
-
xx = xx_0.map { |x|
|
|
134
|
+
xx = xx_0.map { |x| x < @boundary ? @boundary : x }
|
|
128
135
|
|
|
129
136
|
# maximum end should be at length - 'boundary' residues
|
|
130
137
|
yy = yy_0.map do |y|
|
|
@@ -151,23 +158,22 @@ module GeneValidator
|
|
|
151
158
|
@validation_report = GeneMergeValidationOutput.new(@short_header, @header,
|
|
152
159
|
@description, lm_slope,
|
|
153
160
|
unimodality)
|
|
154
|
-
if unimodality
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
161
|
+
plot1 = if unimodality
|
|
162
|
+
plot_2d_start_from
|
|
163
|
+
else
|
|
164
|
+
plot_2d_start_from(lm_slope, y_intercept)
|
|
165
|
+
end
|
|
159
166
|
|
|
160
167
|
@validation_report.plot_files.push(plot1)
|
|
161
168
|
plot2 = plot_matched_regions
|
|
162
169
|
@validation_report.plot_files.push(plot2)
|
|
163
170
|
@validation_report.run_time = Time.now - start
|
|
164
171
|
@validation_report
|
|
165
|
-
|
|
166
172
|
rescue NotEnoughHitsError
|
|
167
173
|
@validation_report = ValidationReport.new('Not enough evidence', :warning,
|
|
168
174
|
@short_header, @header,
|
|
169
175
|
@description)
|
|
170
|
-
rescue
|
|
176
|
+
rescue StandardError
|
|
171
177
|
@validation_report = ValidationReport.new('Unexpected error', :error,
|
|
172
178
|
@short_header, @header,
|
|
173
179
|
@description)
|
|
@@ -186,18 +192,21 @@ module GeneValidator
|
|
|
186
192
|
|
|
187
193
|
hits_less = hits[0..[no_lines, hits.length - 1].min]
|
|
188
194
|
|
|
189
|
-
data = hits_less.each_with_index.map
|
|
195
|
+
data = hits_less.each_with_index.map do |hit, i|
|
|
190
196
|
{ 'y' => i,
|
|
191
197
|
'start' => hit.hsp_list.map(&:match_query_from).min,
|
|
192
198
|
'stop' => hit.hsp_list.map(&:match_query_to).max,
|
|
193
199
|
'color' => 'black',
|
|
194
|
-
'dotted' => 'true' }
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
200
|
+
'dotted' => 'true' }
|
|
201
|
+
end .flatten +
|
|
202
|
+
hits_less.each_with_index.map do |hit, i|
|
|
203
|
+
hit.hsp_list.map do |hsp|
|
|
204
|
+
{ 'y' => i,
|
|
205
|
+
'start' => hsp.match_query_from,
|
|
206
|
+
'stop' => hsp.match_query_to,
|
|
207
|
+
'color' => 'orange' }
|
|
208
|
+
end
|
|
209
|
+
end .flatten
|
|
201
210
|
|
|
202
211
|
Plot.new(data,
|
|
203
212
|
:lines,
|
|
@@ -223,9 +232,11 @@ module GeneValidator
|
|
|
223
232
|
hit.hsp_list.map(&:match_query_to).max)
|
|
224
233
|
end
|
|
225
234
|
|
|
226
|
-
data = hits.map
|
|
227
|
-
|
|
228
|
-
|
|
235
|
+
data = hits.map do |hit|
|
|
236
|
+
{ 'x' => hit.hsp_list.map(&:match_query_from).min,
|
|
237
|
+
'y' => hit.hsp_list.map(&:match_query_to).max,
|
|
238
|
+
'color' => 'red' }
|
|
239
|
+
end
|
|
229
240
|
|
|
230
241
|
Plot.new(data,
|
|
231
242
|
:scatter,
|