genevalidator 1.6.1 → 1.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/.travis.yml +2 -0
- data/README.md +78 -30
- data/Rakefile +11 -8
- data/aux/app_template_footer.erb +1 -6
- data/aux/app_template_header.erb +12 -32
- data/aux/files/css/style.css +2 -8
- data/aux/files/js/plots.js +564 -576
- data/aux/files/js/script.js +10 -0
- data/aux/json_footer.erb +8 -0
- data/aux/json_header.erb +19 -0
- data/aux/json_query.erb +14 -0
- data/aux/template_footer.erb +9 -58
- data/aux/template_header.erb +18 -58
- data/aux/template_query.erb +8 -36
- data/bin/genevalidator +45 -32
- data/genevalidator.gemspec +11 -7
- data/lib/genevalidator.rb +75 -455
- data/lib/genevalidator/arg_validation.rb +78 -107
- data/lib/genevalidator/blast.rb +57 -60
- data/lib/genevalidator/clusterization.rb +15 -15
- data/lib/genevalidator/exceptions.rb +32 -5
- data/lib/genevalidator/get_raw_sequences.rb +70 -33
- data/lib/genevalidator/hsp.rb +1 -4
- data/lib/genevalidator/json_to_gv_results.rb +109 -0
- data/lib/genevalidator/output.rb +177 -185
- data/lib/genevalidator/pool.rb +2 -1
- data/lib/genevalidator/sequences.rb +3 -3
- data/lib/genevalidator/tabular_parser.rb +24 -18
- data/lib/genevalidator/validation.rb +279 -0
- data/lib/genevalidator/validation_alignment.rb +31 -47
- data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
- data/lib/genevalidator/validation_duplication.rb +23 -19
- data/lib/genevalidator/validation_gene_merge.rb +30 -65
- data/lib/genevalidator/validation_length_cluster.rb +14 -53
- data/lib/genevalidator/validation_length_rank.rb +10 -11
- data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
- data/lib/genevalidator/validation_report.rb +2 -5
- data/lib/genevalidator/validation_test.rb +8 -4
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +51 -66
- data/test/test_blast.rb +68 -51
- data/test/test_clusterization.rb +1 -1
- data/test/test_clusterization_2d.rb +19 -13
- data/test/test_extended_array_methods.rb +1 -1
- data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
- data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
- data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
- data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
- data/test/test_sequences.rb +46 -41
- data/test/test_validation_open_reading_frame.rb +318 -202
- data/test/test_validations.rb +48 -32
- metadata +76 -102
- data/doc/AliasDuplicationError.html +0 -134
- data/doc/AlignmentValidation.html +0 -1687
- data/doc/AlignmentValidationOutput.html +0 -659
- data/doc/Blast.html +0 -1905
- data/doc/BlastRFValidationOutput.html +0 -545
- data/doc/BlastReadingFrameValidation.html +0 -370
- data/doc/BlastUtils.html +0 -875
- data/doc/ClasspathError.html +0 -134
- data/doc/Cluster.html +0 -1316
- data/doc/DuplciationValidationOutput.html +0 -564
- data/doc/DuplicationValidation.html +0 -920
- data/doc/DuplicationValidationOutput.html +0 -564
- data/doc/FileNotFoundException.html +0 -134
- data/doc/GeneMergeValidation.html +0 -935
- data/doc/GeneMergeValidationOutput.html +0 -652
- data/doc/HierarchicalClusterization.html +0 -994
- data/doc/Hsp.html +0 -1485
- data/doc/InconsistentTabularFormat.html +0 -135
- data/doc/LengthClusterValidation.html +0 -982
- data/doc/LengthClusterValidationOutput.html +0 -515
- data/doc/LengthRankValidation.html +0 -496
- data/doc/LengthRankValidationOutput.html +0 -517
- data/doc/NoInternetError.html +0 -135
- data/doc/NoMafftInstallationError.html +0 -134
- data/doc/NoPIdentError.html +0 -134
- data/doc/NoValidationError.html +0 -134
- data/doc/NotEnoughHitsError.html +0 -135
- data/doc/ORFValidationOutput.html +0 -593
- data/doc/OpenReadingFrameValidation.html +0 -1107
- data/doc/OtherError.html +0 -123
- data/doc/Output.html +0 -1540
- data/doc/Pair.html +0 -309
- data/doc/PairCluster.html +0 -767
- data/doc/Plot.html +0 -837
- data/doc/QueryError.html +0 -134
- data/doc/ReportClassError.html +0 -135
- data/doc/Sequence.html +0 -1299
- data/doc/SequenceTypeError.html +0 -135
- data/doc/TabularEntry.html +0 -837
- data/doc/TabularParser.html +0 -1104
- data/doc/Validation.html +0 -2147
- data/doc/ValidationClassError.html +0 -134
- data/doc/ValidationOutput.html +0 -460
- data/doc/ValidationReport.html +0 -940
- data/doc/ValidationTest.html +0 -939
- data/doc/_index.html +0 -449
- data/doc/class_list.html +0 -54
- data/doc/css/common.css +0 -1
- data/doc/css/full_list.css +0 -57
- data/doc/css/style.css +0 -338
- data/doc/file.README.html +0 -151
- data/doc/file_list.html +0 -56
- data/doc/frames.html +0 -26
- data/doc/index.html +0 -151
- data/doc/js/app.js +0 -214
- data/doc/js/full_list.js +0 -178
- data/doc/js/jquery.js +0 -4
- data/doc/method_list.html +0 -1505
- data/doc/top-level-namespace.html +0 -112
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -13,7 +13,7 @@ module GeneValidator
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def print
|
16
|
-
puts "Cluster: #{x} #{y}"
|
16
|
+
$stderr.puts "Cluster: #{x} #{y}"
|
17
17
|
end
|
18
18
|
|
19
19
|
##
|
@@ -66,7 +66,7 @@ module GeneValidator
|
|
66
66
|
|
67
67
|
def print
|
68
68
|
objects.each do |elem|
|
69
|
-
puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
|
69
|
+
$stderr.puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -270,11 +270,11 @@ module GeneValidator
|
|
270
270
|
##
|
271
271
|
# Prints the current cluster
|
272
272
|
def print
|
273
|
-
puts "Cluster: mean = #{mean}, density = #{density}"
|
273
|
+
$stderr.puts "Cluster: mean = #{mean}, density = #{density}"
|
274
274
|
lengths.sort { |a, b| a <=> b }.each do |elem|
|
275
|
-
puts "#{elem[0]}, #{elem[1]}"
|
275
|
+
$stderr.puts "#{elem[0]}, #{elem[1]}"
|
276
276
|
end
|
277
|
-
puts '--------------------------'
|
277
|
+
$stderr.puts '--------------------------'
|
278
278
|
end
|
279
279
|
|
280
280
|
##
|
@@ -334,7 +334,7 @@ module GeneValidator
|
|
334
334
|
# initially each length belongs to a different cluster
|
335
335
|
histogram.each do |elem|
|
336
336
|
if debug
|
337
|
-
puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
|
337
|
+
$stderr.puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
|
338
338
|
end
|
339
339
|
hash = { elem[0] => elem[1] }
|
340
340
|
cluster = PairCluster.new(hash)
|
@@ -353,7 +353,7 @@ module GeneValidator
|
|
353
353
|
break if no_clusters != 0 && clusters.length == no_clusters
|
354
354
|
|
355
355
|
iteration = iteration + 1
|
356
|
-
puts "\nIteration #{iteration}" if debug
|
356
|
+
$stderr.puts "\nIteration #{iteration}" if debug
|
357
357
|
|
358
358
|
min_distance = 100_000_000
|
359
359
|
cluster1 = 0
|
@@ -363,7 +363,7 @@ module GeneValidator
|
|
363
363
|
[*(0..(clusters.length - 2))].each do |i|
|
364
364
|
[*((i + 1)..(clusters.length - 1))].each do |j|
|
365
365
|
dist = clusters[i].distance(clusters[j], distance_method)
|
366
|
-
puts "distance between clusters #{i} and #{j} is #{dist}" if debug
|
366
|
+
$stderr.puts "distance between clusters #{i} and #{j} is #{dist}" if debug
|
367
367
|
current_density = clusters[i].density + clusters[j].density
|
368
368
|
if dist < min_distance
|
369
369
|
min_distance = dist
|
@@ -379,14 +379,14 @@ module GeneValidator
|
|
379
379
|
end
|
380
380
|
|
381
381
|
# merge clusters 'cluster1' and 'cluster2'
|
382
|
-
puts "clusters to merge #{cluster1} and #{cluster2}" if debug
|
382
|
+
$stderr.puts "clusters to merge #{cluster1} and #{cluster2}" if debug
|
383
383
|
|
384
384
|
clusters[cluster1].add(clusters[cluster2])
|
385
385
|
clusters.delete_at(cluster2)
|
386
386
|
|
387
387
|
if debug
|
388
388
|
clusters.each_with_index do |elem, i|
|
389
|
-
puts "cluster #{i}"
|
389
|
+
$stderr.puts "cluster #{i}"
|
390
390
|
elem.print
|
391
391
|
end
|
392
392
|
end
|
@@ -433,7 +433,7 @@ module GeneValidator
|
|
433
433
|
# clusters = array of clusters
|
434
434
|
# initially each length belongs to a different cluster
|
435
435
|
histogram.sort { |a, b| a[0] <=> b[0] }.each do |elem|
|
436
|
-
puts "len #{elem[0]} appears #{elem[1]} times" if debug
|
436
|
+
$stderr.puts "len #{elem[0]} appears #{elem[1]} times" if debug
|
437
437
|
hash = { elem[0] => elem[1] }
|
438
438
|
cluster = Cluster.new(hash)
|
439
439
|
clusters.push(cluster)
|
@@ -452,7 +452,7 @@ module GeneValidator
|
|
452
452
|
break if no_clusters != 0 && clusters.length == no_clusters
|
453
453
|
|
454
454
|
iteration = iteration + 1
|
455
|
-
puts "\nIteration #{iteration}" if debug
|
455
|
+
$stderr.puts "\nIteration #{iteration}" if debug
|
456
456
|
|
457
457
|
min_distance = 100_000_000
|
458
458
|
cluster = 0
|
@@ -460,7 +460,7 @@ module GeneValidator
|
|
460
460
|
|
461
461
|
clusters[0..clusters.length - 2].each_with_index do |_item, i|
|
462
462
|
dist = clusters[i].distance(clusters[i + 1], distance_method)
|
463
|
-
puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
|
463
|
+
$stderr.puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
|
464
464
|
current_density = clusters[i].density + clusters[i + 1].density
|
465
465
|
if dist < min_distance
|
466
466
|
min_distance = dist
|
@@ -479,14 +479,14 @@ module GeneValidator
|
|
479
479
|
end
|
480
480
|
|
481
481
|
# merge clusters 'cluster' and 'cluster'+1
|
482
|
-
puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
|
482
|
+
$stderr.puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
|
483
483
|
|
484
484
|
clusters[cluster].add(clusters[cluster + 1])
|
485
485
|
clusters.delete_at(cluster + 1)
|
486
486
|
|
487
487
|
if debug
|
488
488
|
clusters.each_with_index do |elem, i|
|
489
|
-
puts "cluster #{i}"
|
489
|
+
$stderr.puts "cluster #{i}"
|
490
490
|
elem.print
|
491
491
|
end
|
492
492
|
end
|
@@ -3,9 +3,14 @@ module GeneValidator
|
|
3
3
|
class ClasspathError < Exception
|
4
4
|
end
|
5
5
|
|
6
|
+
|
6
7
|
# Exception raised when the command line type argument
|
7
8
|
# does not corrsepond to the type of the sequences in the fasta file
|
8
9
|
class SequenceTypeError < Exception
|
10
|
+
def to_s
|
11
|
+
"\nSequence Type error: Possible cause include that the blast output" \
|
12
|
+
" was not obtained against a protein database.\n"
|
13
|
+
end
|
9
14
|
end
|
10
15
|
|
11
16
|
# Exception raised when an unexisting file is accessed
|
@@ -18,17 +23,29 @@ module GeneValidator
|
|
18
23
|
|
19
24
|
# Exception raised when a validation class is not instance of ValidationTest
|
20
25
|
class ValidationClassError < Exception
|
26
|
+
def to_s
|
27
|
+
"\nClass Type error: Possible cause include that one of the validations" \
|
28
|
+
" is not a sub-class of ValidationTest\n"
|
29
|
+
end
|
21
30
|
end
|
22
31
|
|
23
|
-
# Exception raised when a validation report class is not instance of
|
32
|
+
# Exception raised when a validation report class is not instance of
|
33
|
+
# ValidationReport
|
24
34
|
class ReportClassError < Exception
|
35
|
+
def to_s
|
36
|
+
"\nClass Type error: Possible causes include that the type of one of" \
|
37
|
+
' the validation reports is not a subclass of the ValidationReport' \
|
38
|
+
" class.\n"
|
39
|
+
end
|
25
40
|
end
|
26
41
|
|
27
|
-
# Exception raised when there are not enough blast hits to make a
|
42
|
+
# Exception raised when there are not enough blast hits to make a statistical
|
43
|
+
# validation
|
28
44
|
class NotEnoughHitsError < Exception
|
29
45
|
end
|
30
46
|
|
31
|
-
# Exception raised when function dependig on the internet connection raise
|
47
|
+
# Exception raised when function dependig on the internet connection raise
|
48
|
+
# Exception
|
32
49
|
class NoInternetError < Exception
|
33
50
|
end
|
34
51
|
|
@@ -38,21 +55,31 @@ module GeneValidator
|
|
38
55
|
|
39
56
|
# Exception raised when the -v argument didn't filter any validatio test
|
40
57
|
class NoValidationError < Exception
|
58
|
+
def to_s
|
59
|
+
"\nValidation error: Possible cause inlcude that the -v arguments" \
|
60
|
+
" supplied is not valid\n"
|
61
|
+
end
|
41
62
|
end
|
42
63
|
|
43
64
|
# Exception raised when the are alias duplications
|
44
65
|
class AliasDuplicationError < Exception
|
66
|
+
def to_s
|
67
|
+
"\nAlias Duplication error: Possible cause: At least two validations" \
|
68
|
+
" have the same CLI alias\n"
|
69
|
+
end
|
45
70
|
end
|
46
71
|
|
47
72
|
# Exception raised when the are alias duplications
|
48
73
|
class NoPIdentError < Exception
|
49
74
|
end
|
50
75
|
|
51
|
-
# Exception raised when the tabular format does not correspond to the tabular
|
76
|
+
# Exception raised when the tabular format does not correspond to the tabular
|
77
|
+
# argument
|
52
78
|
class InconsistentTabularFormat < Exception
|
53
79
|
end
|
54
80
|
|
55
|
-
# Exception raised when there are more than one reading frame among the hits
|
81
|
+
# Exception raised when there are more than one reading frame among the hits
|
82
|
+
# of one prediction
|
56
83
|
class ReadingFrameError < Exception
|
57
84
|
end
|
58
85
|
|
@@ -1,36 +1,73 @@
|
|
1
|
-
require 'genevalidator/sequences'
|
2
|
-
require 'genevalidator/exceptions'
|
3
1
|
require 'bio-blastxmlparser'
|
2
|
+
require 'forwardable'
|
4
3
|
require 'net/http'
|
5
|
-
require 'open-uri'
|
6
4
|
require 'uri'
|
7
|
-
require 'io/console'
|
8
5
|
require 'yaml'
|
6
|
+
|
7
|
+
require 'genevalidator/exceptions'
|
8
|
+
require 'genevalidator/sequences'
|
9
|
+
|
9
10
|
module GeneValidator
|
10
11
|
# Gets the raw sequences for each hit in a BLAST output file
|
11
|
-
|
12
|
+
class RawSequences
|
12
13
|
class <<self
|
14
|
+
extend Forwardable
|
15
|
+
def_delegators GeneValidator, :opt, :config
|
16
|
+
|
13
17
|
##
|
14
18
|
# Obtains raw_sequences from BLAST output file...
|
15
|
-
def run
|
19
|
+
def run
|
16
20
|
@opt = opt
|
21
|
+
@config = config
|
17
22
|
|
18
|
-
|
19
|
-
|
23
|
+
$stderr.puts 'Extracting sequences within the BLAST output file from' \
|
24
|
+
' the BLAST database'
|
25
|
+
|
26
|
+
if @opt[:blast_xml_file]
|
27
|
+
@blast_file = @opt[:blast_xml_file]
|
20
28
|
else
|
21
|
-
@blast_file = opt[:blast_tabular_file]
|
29
|
+
@blast_file = @opt[:blast_tabular_file]
|
22
30
|
end
|
23
31
|
|
24
|
-
|
25
|
-
index_file
|
32
|
+
@opt[:raw_sequences] = @blast_file + '.raw_seq'
|
33
|
+
index_file = @blast_file + '.index'
|
26
34
|
|
27
35
|
if opt[:db] =~ /remote/
|
28
|
-
|
36
|
+
write_a_raw_seq_file(@opt[:raw_sequences], 'remote')
|
29
37
|
else
|
30
38
|
write_an_index_file(index_file, 'local')
|
31
|
-
obtain_raw_seqs_from_local_db(index_file,
|
39
|
+
obtain_raw_seqs_from_local_db(index_file, @opt[:raw_sequences])
|
40
|
+
end
|
41
|
+
index_raw_seq_file(@opt[:raw_sequences])
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Index the raw sequences file...
|
46
|
+
def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
|
47
|
+
# leave only the identifiers in the fasta description
|
48
|
+
content = File.open(raw_seq_file, 'rb').read.gsub(/ .*/, '')
|
49
|
+
File.open(raw_seq_file, 'w+') { |f| f.write(content) }
|
50
|
+
|
51
|
+
# index the fasta file
|
52
|
+
keys = content.scan(/>(.*)\n/).flatten
|
53
|
+
values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
|
54
|
+
|
55
|
+
# make an index hash
|
56
|
+
index_hash = {}
|
57
|
+
keys.each_with_index do |k, i|
|
58
|
+
start = values[i]
|
59
|
+
endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
|
60
|
+
index_hash[k] = [start, endf]
|
61
|
+
end
|
62
|
+
|
63
|
+
# create FASTA index
|
64
|
+
config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
|
65
|
+
config[:raw_seq_file_load] = index_hash
|
66
|
+
|
67
|
+
File.open(config[:raw_seq_file_index], 'w') do |f|
|
68
|
+
YAML.dump(index_hash, f)
|
32
69
|
end
|
33
|
-
|
70
|
+
content = nil
|
34
71
|
end
|
35
72
|
|
36
73
|
private
|
@@ -43,13 +80,13 @@ module GeneValidator
|
|
43
80
|
file.close unless file.nil?
|
44
81
|
end
|
45
82
|
|
46
|
-
alias_method :
|
83
|
+
alias_method :write_a_raw_seq_file, :write_an_index_file
|
47
84
|
|
48
85
|
def iterate_xml(file, db_type)
|
49
86
|
n = Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
|
50
87
|
n.each do |iter|
|
51
88
|
iter.each do |hit|
|
52
|
-
if db_type == 'remote'
|
89
|
+
if db_type == 'remote' || hit.hit_id.nil?
|
53
90
|
file.puts obtain_raw_seqs_from_remote_db(hit.accession)
|
54
91
|
else
|
55
92
|
file.puts hit.hit_id
|
@@ -57,10 +94,10 @@ module GeneValidator
|
|
57
94
|
end
|
58
95
|
end
|
59
96
|
rescue
|
60
|
-
puts '*** Error: There was an error in analysing the BLAST XML file.'
|
61
|
-
puts ' Please ensure that BLAST XML file is in the correct format'
|
62
|
-
puts ' and then try again. If you are using a remote database,'
|
63
|
-
puts ' please ensure that you have internet access.'
|
97
|
+
$stderr.puts '*** Error: There was an error in analysing the BLAST XML file.'
|
98
|
+
$stderr.puts ' Please ensure that BLAST XML file is in the correct format'
|
99
|
+
$stderr.puts ' and then try again. If you are using a remote database,'
|
100
|
+
$stderr.puts ' please ensure that you have internet access.'
|
64
101
|
exit 1
|
65
102
|
end
|
66
103
|
|
@@ -73,23 +110,23 @@ module GeneValidator
|
|
73
110
|
assert_table_has_correct_no_of_collumns(rows, table_headers)
|
74
111
|
|
75
112
|
rows.each do |row|
|
76
|
-
if db_type == 'remote'
|
113
|
+
if db_type == 'remote' || row['sseqid'].nil?
|
77
114
|
file.puts obtain_raw_seqs_from_remote_db(row['sacc'])
|
78
115
|
else
|
79
116
|
file.puts row['sseqid']
|
80
117
|
end
|
81
118
|
end
|
82
119
|
rescue
|
83
|
-
puts '*** Error: There was an error in analysing the BLAST tabular'
|
84
|
-
puts ' file. Please ensure that BLAST tabular file is in the correct'
|
85
|
-
puts ' format and then try again. If you are using a remote'
|
86
|
-
puts ' database, please ensure that you have internet access.'
|
120
|
+
$stderr.puts '*** Error: There was an error in analysing the BLAST tabular'
|
121
|
+
$stderr.puts ' file. Please ensure that BLAST tabular file is in the correct'
|
122
|
+
$stderr.puts ' format and then try again. If you are using a remote'
|
123
|
+
$stderr.puts ' database, please ensure that you have internet access.'
|
87
124
|
exit 1
|
88
125
|
end
|
89
126
|
|
90
127
|
def obtain_raw_seqs_from_local_db(index_file, raw_seq_file)
|
91
|
-
cmd = "blastdbcmd -entry_batch #{index_file} -db #{@opt[:db]}
|
92
|
-
" '%f' -out #{raw_seq_file}"
|
128
|
+
cmd = "blastdbcmd -entry_batch '#{index_file}' -db '#{@opt[:db]}'" \
|
129
|
+
" -outfmt '%f' -out '#{raw_seq_file}'"
|
93
130
|
`#{cmd}`
|
94
131
|
end
|
95
132
|
|
@@ -106,8 +143,8 @@ module GeneValidator
|
|
106
143
|
result = Net::HTTP.get(URI.parse(uri))
|
107
144
|
raw_seqs = result[0..result.length - 2]
|
108
145
|
unless raw_seqs.downcase.index(/error/).nil?
|
109
|
-
puts '*** Error: There was an error in obtaining the raw sequence' \
|
110
|
-
|
146
|
+
$stderr.puts '*** Error: There was an error in obtaining the raw sequence' \
|
147
|
+
' of a BLAST hit. Please ensure that you have internet access.'
|
111
148
|
exit 1
|
112
149
|
end
|
113
150
|
raw_seqs
|
@@ -116,10 +153,10 @@ module GeneValidator
|
|
116
153
|
def assert_table_has_correct_no_of_collumns(rows, table_headers)
|
117
154
|
rows.each do |row|
|
118
155
|
unless row.length == table_headers.length
|
119
|
-
puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
|
120
|
-
|
121
|
-
|
122
|
-
|
156
|
+
$stderr.puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
|
157
|
+
' could possibly be due to an incorrect BLAST tabular' \
|
158
|
+
' options ("-o", "--blast_tabular_options") being supplied.' \
|
159
|
+
' Please correct this and try again.'
|
123
160
|
exit 1
|
124
161
|
end
|
125
162
|
break # break after checking the first column
|
data/lib/genevalidator/hsp.rb
CHANGED
@@ -33,8 +33,7 @@ module GeneValidator
|
|
33
33
|
# Params:
|
34
34
|
# +column+: String with column name.
|
35
35
|
# +value+: Value of the column
|
36
|
-
|
37
|
-
def init_tabular_attribute(hash, _type = :protein)
|
36
|
+
def init_tabular_attribute(hash)
|
38
37
|
@match_query_from = hash['qstart'].to_i if hash['qstart']
|
39
38
|
@match_query_to = hash['qend'].to_i if hash['qend']
|
40
39
|
@query_reading_frame = hash['qframe'].to_i if hash['qframe']
|
@@ -47,12 +46,10 @@ module GeneValidator
|
|
47
46
|
@identity = hash['nident'].to_f if hash['nident']
|
48
47
|
@hsp_evalue = hash['evalue'].to_f if hash['evalue']
|
49
48
|
if hash['qseq']
|
50
|
-
puts @query_alignment
|
51
49
|
query_seq_type = BlastUtils.guess_sequence_type(@query_alignment)
|
52
50
|
fail SequenceTypeError if query_seq_type != :protein
|
53
51
|
end
|
54
52
|
if hash['sseq']
|
55
|
-
puts @hit_alignment
|
56
53
|
hit_seq_type = BlastUtils.guess_sequence_type(@hit_alignment)
|
57
54
|
fail SequenceTypeError if hit_seq_type != :protein
|
58
55
|
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'forwardable'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
require 'genevalidator'
|
7
|
+
require 'genevalidator/output'
|
8
|
+
require 'genevalidator/version'
|
9
|
+
|
10
|
+
module GeneValidator
|
11
|
+
# produce GV results from a JSON previously produced from GV
|
12
|
+
class JsonToGVResults
|
13
|
+
class << self
|
14
|
+
extend Forwardable
|
15
|
+
def_delegators GeneValidator, :opt
|
16
|
+
|
17
|
+
def init
|
18
|
+
@opt = opt
|
19
|
+
@config = {
|
20
|
+
html_path: "#{@opt[:json_file]}.html",
|
21
|
+
plot_dir: "#{@opt[:json_file]}.html/files/json",
|
22
|
+
aux: File.expand_path(File.join(File.dirname(__FILE__), '../../aux')),
|
23
|
+
filename: File.basename(@opt[:json_file]),
|
24
|
+
output_max: 2500,
|
25
|
+
run_no: 0
|
26
|
+
}
|
27
|
+
@json_array = load_json_file
|
28
|
+
end
|
29
|
+
|
30
|
+
def run
|
31
|
+
init
|
32
|
+
GeneValidator.create_output_folder(@config[:html_path], @config[:aux])
|
33
|
+
@json_array.each do |row|
|
34
|
+
@config[:run_no] += 1
|
35
|
+
create_json_file(row)
|
36
|
+
output_html = output_filename
|
37
|
+
generate_html_header(output_html) unless File.exist?(output_html)
|
38
|
+
generate_html_query(output_html, row)
|
39
|
+
end
|
40
|
+
html_footer
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_json_file
|
44
|
+
json_contents = File.read(File.expand_path(@opt[:json_file]))
|
45
|
+
JSON.load(json_contents)
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_json_file(row)
|
49
|
+
@json_file = File.join(@config[:plot_dir],
|
50
|
+
"#{@config[:filename]}_#{row['idx']}.json")
|
51
|
+
File.open(@json_file, 'w') { |f| f.write(row.to_json) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def output_filename
|
55
|
+
i = (@config[:run_no].to_f / @config[:output_max]).ceil
|
56
|
+
File.join(@config[:html_path], "results#{i}.html")
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_html_header(output_html)
|
60
|
+
return if File.exist?(output_html)
|
61
|
+
json_header_template = File.join(@config[:aux], 'json_header.erb')
|
62
|
+
template_contents = File.open(json_header_template, 'r').read
|
63
|
+
erb = ERB.new(template_contents, 0, '>')
|
64
|
+
File.open(output_html, 'w+') { |f| f.write(erb.result(binding)) }
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_html_query(output_html, row)
|
68
|
+
@row = row
|
69
|
+
json_query_template = File.join(@config[:aux], 'json_query.erb')
|
70
|
+
template_contents = File.open(json_query_template, 'r').read
|
71
|
+
erb = ERB.new(template_contents, 0, '>')
|
72
|
+
File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add footer to all output files
|
76
|
+
def html_footer
|
77
|
+
no_of_output_files = (@config[:run_no].to_f / @config[:output_max]).ceil
|
78
|
+
|
79
|
+
output_files = []
|
80
|
+
(1..no_of_output_files).each { |i| output_files << "results#{i}.html" }
|
81
|
+
|
82
|
+
write_html_footer(no_of_output_files, output_files)
|
83
|
+
end
|
84
|
+
|
85
|
+
def write_html_footer(no_of_output_files, output_files)
|
86
|
+
turn_off_automated_sorting
|
87
|
+
json_footer_template = File.join(@config[:aux], 'json_footer.erb')
|
88
|
+
template_contents = File.open(json_footer_template, 'r').read
|
89
|
+
erb = ERB.new(template_contents, 0, '>')
|
90
|
+
(1..no_of_output_files).each do |i|
|
91
|
+
results_html = File.join(@config[:html_path], "results#{i}.html")
|
92
|
+
File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Since the whole idea is that users would sort by
|
97
|
+
def turn_off_automated_sorting
|
98
|
+
script_file = File.join(@config[:html_path], 'files/js/script.js')
|
99
|
+
temp_file = File.join(@config[:html_path], 'files/js/script.temp.js')
|
100
|
+
File.open(temp_file, 'w') do |out_file|
|
101
|
+
out_file.puts File.readlines(script_file)[0..23].join
|
102
|
+
out_file.puts '}'
|
103
|
+
out_file.puts File.readlines(script_file)[26..-1].join
|
104
|
+
end
|
105
|
+
FileUtils.mv(temp_file, script_file)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|