genevalidator 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/.travis.yml +2 -0
- data/README.md +78 -30
- data/Rakefile +11 -8
- data/aux/app_template_footer.erb +1 -6
- data/aux/app_template_header.erb +12 -32
- data/aux/files/css/style.css +2 -8
- data/aux/files/js/plots.js +564 -576
- data/aux/files/js/script.js +10 -0
- data/aux/json_footer.erb +8 -0
- data/aux/json_header.erb +19 -0
- data/aux/json_query.erb +14 -0
- data/aux/template_footer.erb +9 -58
- data/aux/template_header.erb +18 -58
- data/aux/template_query.erb +8 -36
- data/bin/genevalidator +45 -32
- data/genevalidator.gemspec +11 -7
- data/lib/genevalidator.rb +75 -455
- data/lib/genevalidator/arg_validation.rb +78 -107
- data/lib/genevalidator/blast.rb +57 -60
- data/lib/genevalidator/clusterization.rb +15 -15
- data/lib/genevalidator/exceptions.rb +32 -5
- data/lib/genevalidator/get_raw_sequences.rb +70 -33
- data/lib/genevalidator/hsp.rb +1 -4
- data/lib/genevalidator/json_to_gv_results.rb +109 -0
- data/lib/genevalidator/output.rb +177 -185
- data/lib/genevalidator/pool.rb +2 -1
- data/lib/genevalidator/sequences.rb +3 -3
- data/lib/genevalidator/tabular_parser.rb +24 -18
- data/lib/genevalidator/validation.rb +279 -0
- data/lib/genevalidator/validation_alignment.rb +31 -47
- data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
- data/lib/genevalidator/validation_duplication.rb +23 -19
- data/lib/genevalidator/validation_gene_merge.rb +30 -65
- data/lib/genevalidator/validation_length_cluster.rb +14 -53
- data/lib/genevalidator/validation_length_rank.rb +10 -11
- data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
- data/lib/genevalidator/validation_report.rb +2 -5
- data/lib/genevalidator/validation_test.rb +8 -4
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +51 -66
- data/test/test_blast.rb +68 -51
- data/test/test_clusterization.rb +1 -1
- data/test/test_clusterization_2d.rb +19 -13
- data/test/test_extended_array_methods.rb +1 -1
- data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
- data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
- data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
- data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
- data/test/test_sequences.rb +46 -41
- data/test/test_validation_open_reading_frame.rb +318 -202
- data/test/test_validations.rb +48 -32
- metadata +76 -102
- data/doc/AliasDuplicationError.html +0 -134
- data/doc/AlignmentValidation.html +0 -1687
- data/doc/AlignmentValidationOutput.html +0 -659
- data/doc/Blast.html +0 -1905
- data/doc/BlastRFValidationOutput.html +0 -545
- data/doc/BlastReadingFrameValidation.html +0 -370
- data/doc/BlastUtils.html +0 -875
- data/doc/ClasspathError.html +0 -134
- data/doc/Cluster.html +0 -1316
- data/doc/DuplciationValidationOutput.html +0 -564
- data/doc/DuplicationValidation.html +0 -920
- data/doc/DuplicationValidationOutput.html +0 -564
- data/doc/FileNotFoundException.html +0 -134
- data/doc/GeneMergeValidation.html +0 -935
- data/doc/GeneMergeValidationOutput.html +0 -652
- data/doc/HierarchicalClusterization.html +0 -994
- data/doc/Hsp.html +0 -1485
- data/doc/InconsistentTabularFormat.html +0 -135
- data/doc/LengthClusterValidation.html +0 -982
- data/doc/LengthClusterValidationOutput.html +0 -515
- data/doc/LengthRankValidation.html +0 -496
- data/doc/LengthRankValidationOutput.html +0 -517
- data/doc/NoInternetError.html +0 -135
- data/doc/NoMafftInstallationError.html +0 -134
- data/doc/NoPIdentError.html +0 -134
- data/doc/NoValidationError.html +0 -134
- data/doc/NotEnoughHitsError.html +0 -135
- data/doc/ORFValidationOutput.html +0 -593
- data/doc/OpenReadingFrameValidation.html +0 -1107
- data/doc/OtherError.html +0 -123
- data/doc/Output.html +0 -1540
- data/doc/Pair.html +0 -309
- data/doc/PairCluster.html +0 -767
- data/doc/Plot.html +0 -837
- data/doc/QueryError.html +0 -134
- data/doc/ReportClassError.html +0 -135
- data/doc/Sequence.html +0 -1299
- data/doc/SequenceTypeError.html +0 -135
- data/doc/TabularEntry.html +0 -837
- data/doc/TabularParser.html +0 -1104
- data/doc/Validation.html +0 -2147
- data/doc/ValidationClassError.html +0 -134
- data/doc/ValidationOutput.html +0 -460
- data/doc/ValidationReport.html +0 -940
- data/doc/ValidationTest.html +0 -939
- data/doc/_index.html +0 -449
- data/doc/class_list.html +0 -54
- data/doc/css/common.css +0 -1
- data/doc/css/full_list.css +0 -57
- data/doc/css/style.css +0 -338
- data/doc/file.README.html +0 -151
- data/doc/file_list.html +0 -56
- data/doc/frames.html +0 -26
- data/doc/index.html +0 -151
- data/doc/js/app.js +0 -214
- data/doc/js/full_list.js +0 -178
- data/doc/js/jquery.js +0 -4
- data/doc/method_list.html +0 -1505
- data/doc/top-level-namespace.html +0 -112
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -13,7 +13,7 @@ module GeneValidator
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def print
|
16
|
-
puts "Cluster: #{x} #{y}"
|
16
|
+
$stderr.puts "Cluster: #{x} #{y}"
|
17
17
|
end
|
18
18
|
|
19
19
|
##
|
@@ -66,7 +66,7 @@ module GeneValidator
|
|
66
66
|
|
67
67
|
def print
|
68
68
|
objects.each do |elem|
|
69
|
-
puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
|
69
|
+
$stderr.puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -270,11 +270,11 @@ module GeneValidator
|
|
270
270
|
##
|
271
271
|
# Prints the current cluster
|
272
272
|
def print
|
273
|
-
puts "Cluster: mean = #{mean}, density = #{density}"
|
273
|
+
$stderr.puts "Cluster: mean = #{mean}, density = #{density}"
|
274
274
|
lengths.sort { |a, b| a <=> b }.each do |elem|
|
275
|
-
puts "#{elem[0]}, #{elem[1]}"
|
275
|
+
$stderr.puts "#{elem[0]}, #{elem[1]}"
|
276
276
|
end
|
277
|
-
puts '--------------------------'
|
277
|
+
$stderr.puts '--------------------------'
|
278
278
|
end
|
279
279
|
|
280
280
|
##
|
@@ -334,7 +334,7 @@ module GeneValidator
|
|
334
334
|
# initially each length belongs to a different cluster
|
335
335
|
histogram.each do |elem|
|
336
336
|
if debug
|
337
|
-
puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
|
337
|
+
$stderr.puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
|
338
338
|
end
|
339
339
|
hash = { elem[0] => elem[1] }
|
340
340
|
cluster = PairCluster.new(hash)
|
@@ -353,7 +353,7 @@ module GeneValidator
|
|
353
353
|
break if no_clusters != 0 && clusters.length == no_clusters
|
354
354
|
|
355
355
|
iteration = iteration + 1
|
356
|
-
puts "\nIteration #{iteration}" if debug
|
356
|
+
$stderr.puts "\nIteration #{iteration}" if debug
|
357
357
|
|
358
358
|
min_distance = 100_000_000
|
359
359
|
cluster1 = 0
|
@@ -363,7 +363,7 @@ module GeneValidator
|
|
363
363
|
[*(0..(clusters.length - 2))].each do |i|
|
364
364
|
[*((i + 1)..(clusters.length - 1))].each do |j|
|
365
365
|
dist = clusters[i].distance(clusters[j], distance_method)
|
366
|
-
puts "distance between clusters #{i} and #{j} is #{dist}" if debug
|
366
|
+
$stderr.puts "distance between clusters #{i} and #{j} is #{dist}" if debug
|
367
367
|
current_density = clusters[i].density + clusters[j].density
|
368
368
|
if dist < min_distance
|
369
369
|
min_distance = dist
|
@@ -379,14 +379,14 @@ module GeneValidator
|
|
379
379
|
end
|
380
380
|
|
381
381
|
# merge clusters 'cluster1' and 'cluster2'
|
382
|
-
puts "clusters to merge #{cluster1} and #{cluster2}" if debug
|
382
|
+
$stderr.puts "clusters to merge #{cluster1} and #{cluster2}" if debug
|
383
383
|
|
384
384
|
clusters[cluster1].add(clusters[cluster2])
|
385
385
|
clusters.delete_at(cluster2)
|
386
386
|
|
387
387
|
if debug
|
388
388
|
clusters.each_with_index do |elem, i|
|
389
|
-
puts "cluster #{i}"
|
389
|
+
$stderr.puts "cluster #{i}"
|
390
390
|
elem.print
|
391
391
|
end
|
392
392
|
end
|
@@ -433,7 +433,7 @@ module GeneValidator
|
|
433
433
|
# clusters = array of clusters
|
434
434
|
# initially each length belongs to a different cluster
|
435
435
|
histogram.sort { |a, b| a[0] <=> b[0] }.each do |elem|
|
436
|
-
puts "len #{elem[0]} appears #{elem[1]} times" if debug
|
436
|
+
$stderr.puts "len #{elem[0]} appears #{elem[1]} times" if debug
|
437
437
|
hash = { elem[0] => elem[1] }
|
438
438
|
cluster = Cluster.new(hash)
|
439
439
|
clusters.push(cluster)
|
@@ -452,7 +452,7 @@ module GeneValidator
|
|
452
452
|
break if no_clusters != 0 && clusters.length == no_clusters
|
453
453
|
|
454
454
|
iteration = iteration + 1
|
455
|
-
puts "\nIteration #{iteration}" if debug
|
455
|
+
$stderr.puts "\nIteration #{iteration}" if debug
|
456
456
|
|
457
457
|
min_distance = 100_000_000
|
458
458
|
cluster = 0
|
@@ -460,7 +460,7 @@ module GeneValidator
|
|
460
460
|
|
461
461
|
clusters[0..clusters.length - 2].each_with_index do |_item, i|
|
462
462
|
dist = clusters[i].distance(clusters[i + 1], distance_method)
|
463
|
-
puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
|
463
|
+
$stderr.puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
|
464
464
|
current_density = clusters[i].density + clusters[i + 1].density
|
465
465
|
if dist < min_distance
|
466
466
|
min_distance = dist
|
@@ -479,14 +479,14 @@ module GeneValidator
|
|
479
479
|
end
|
480
480
|
|
481
481
|
# merge clusters 'cluster' and 'cluster'+1
|
482
|
-
puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
|
482
|
+
$stderr.puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
|
483
483
|
|
484
484
|
clusters[cluster].add(clusters[cluster + 1])
|
485
485
|
clusters.delete_at(cluster + 1)
|
486
486
|
|
487
487
|
if debug
|
488
488
|
clusters.each_with_index do |elem, i|
|
489
|
-
puts "cluster #{i}"
|
489
|
+
$stderr.puts "cluster #{i}"
|
490
490
|
elem.print
|
491
491
|
end
|
492
492
|
end
|
@@ -3,9 +3,14 @@ module GeneValidator
|
|
3
3
|
class ClasspathError < Exception
|
4
4
|
end
|
5
5
|
|
6
|
+
|
6
7
|
# Exception raised when the command line type argument
|
7
8
|
# does not corrsepond to the type of the sequences in the fasta file
|
8
9
|
class SequenceTypeError < Exception
|
10
|
+
def to_s
|
11
|
+
"\nSequence Type error: Possible cause include that the blast output" \
|
12
|
+
" was not obtained against a protein database.\n"
|
13
|
+
end
|
9
14
|
end
|
10
15
|
|
11
16
|
# Exception raised when an unexisting file is accessed
|
@@ -18,17 +23,29 @@ module GeneValidator
|
|
18
23
|
|
19
24
|
# Exception raised when a validation class is not instance of ValidationTest
|
20
25
|
class ValidationClassError < Exception
|
26
|
+
def to_s
|
27
|
+
"\nClass Type error: Possible cause include that one of the validations" \
|
28
|
+
" is not a sub-class of ValidationTest\n"
|
29
|
+
end
|
21
30
|
end
|
22
31
|
|
23
|
-
# Exception raised when a validation report class is not instance of
|
32
|
+
# Exception raised when a validation report class is not instance of
|
33
|
+
# ValidationReport
|
24
34
|
class ReportClassError < Exception
|
35
|
+
def to_s
|
36
|
+
"\nClass Type error: Possible causes include that the type of one of" \
|
37
|
+
' the validation reports is not a subclass of the ValidationReport' \
|
38
|
+
" class.\n"
|
39
|
+
end
|
25
40
|
end
|
26
41
|
|
27
|
-
# Exception raised when there are not enough blast hits to make a
|
42
|
+
# Exception raised when there are not enough blast hits to make a statistical
|
43
|
+
# validation
|
28
44
|
class NotEnoughHitsError < Exception
|
29
45
|
end
|
30
46
|
|
31
|
-
# Exception raised when function dependig on the internet connection raise
|
47
|
+
# Exception raised when function dependig on the internet connection raise
|
48
|
+
# Exception
|
32
49
|
class NoInternetError < Exception
|
33
50
|
end
|
34
51
|
|
@@ -38,21 +55,31 @@ module GeneValidator
|
|
38
55
|
|
39
56
|
# Exception raised when the -v argument didn't filter any validatio test
|
40
57
|
class NoValidationError < Exception
|
58
|
+
def to_s
|
59
|
+
"\nValidation error: Possible cause inlcude that the -v arguments" \
|
60
|
+
" supplied is not valid\n"
|
61
|
+
end
|
41
62
|
end
|
42
63
|
|
43
64
|
# Exception raised when the are alias duplications
|
44
65
|
class AliasDuplicationError < Exception
|
66
|
+
def to_s
|
67
|
+
"\nAlias Duplication error: Possible cause: At least two validations" \
|
68
|
+
" have the same CLI alias\n"
|
69
|
+
end
|
45
70
|
end
|
46
71
|
|
47
72
|
# Exception raised when the are alias duplications
|
48
73
|
class NoPIdentError < Exception
|
49
74
|
end
|
50
75
|
|
51
|
-
# Exception raised when the tabular format does not correspond to the tabular
|
76
|
+
# Exception raised when the tabular format does not correspond to the tabular
|
77
|
+
# argument
|
52
78
|
class InconsistentTabularFormat < Exception
|
53
79
|
end
|
54
80
|
|
55
|
-
# Exception raised when there are more than one reading frame among the hits
|
81
|
+
# Exception raised when there are more than one reading frame among the hits
|
82
|
+
# of one prediction
|
56
83
|
class ReadingFrameError < Exception
|
57
84
|
end
|
58
85
|
|
@@ -1,36 +1,73 @@
|
|
1
|
-
require 'genevalidator/sequences'
|
2
|
-
require 'genevalidator/exceptions'
|
3
1
|
require 'bio-blastxmlparser'
|
2
|
+
require 'forwardable'
|
4
3
|
require 'net/http'
|
5
|
-
require 'open-uri'
|
6
4
|
require 'uri'
|
7
|
-
require 'io/console'
|
8
5
|
require 'yaml'
|
6
|
+
|
7
|
+
require 'genevalidator/exceptions'
|
8
|
+
require 'genevalidator/sequences'
|
9
|
+
|
9
10
|
module GeneValidator
|
10
11
|
# Gets the raw sequences for each hit in a BLAST output file
|
11
|
-
|
12
|
+
class RawSequences
|
12
13
|
class <<self
|
14
|
+
extend Forwardable
|
15
|
+
def_delegators GeneValidator, :opt, :config
|
16
|
+
|
13
17
|
##
|
14
18
|
# Obtains raw_sequences from BLAST output file...
|
15
|
-
def run
|
19
|
+
def run
|
16
20
|
@opt = opt
|
21
|
+
@config = config
|
17
22
|
|
18
|
-
|
19
|
-
|
23
|
+
$stderr.puts 'Extracting sequences within the BLAST output file from' \
|
24
|
+
' the BLAST database'
|
25
|
+
|
26
|
+
if @opt[:blast_xml_file]
|
27
|
+
@blast_file = @opt[:blast_xml_file]
|
20
28
|
else
|
21
|
-
@blast_file = opt[:blast_tabular_file]
|
29
|
+
@blast_file = @opt[:blast_tabular_file]
|
22
30
|
end
|
23
31
|
|
24
|
-
|
25
|
-
index_file
|
32
|
+
@opt[:raw_sequences] = @blast_file + '.raw_seq'
|
33
|
+
index_file = @blast_file + '.index'
|
26
34
|
|
27
35
|
if opt[:db] =~ /remote/
|
28
|
-
|
36
|
+
write_a_raw_seq_file(@opt[:raw_sequences], 'remote')
|
29
37
|
else
|
30
38
|
write_an_index_file(index_file, 'local')
|
31
|
-
obtain_raw_seqs_from_local_db(index_file,
|
39
|
+
obtain_raw_seqs_from_local_db(index_file, @opt[:raw_sequences])
|
40
|
+
end
|
41
|
+
index_raw_seq_file(@opt[:raw_sequences])
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Index the raw sequences file...
|
46
|
+
def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
|
47
|
+
# leave only the identifiers in the fasta description
|
48
|
+
content = File.open(raw_seq_file, 'rb').read.gsub(/ .*/, '')
|
49
|
+
File.open(raw_seq_file, 'w+') { |f| f.write(content) }
|
50
|
+
|
51
|
+
# index the fasta file
|
52
|
+
keys = content.scan(/>(.*)\n/).flatten
|
53
|
+
values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
|
54
|
+
|
55
|
+
# make an index hash
|
56
|
+
index_hash = {}
|
57
|
+
keys.each_with_index do |k, i|
|
58
|
+
start = values[i]
|
59
|
+
endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
|
60
|
+
index_hash[k] = [start, endf]
|
61
|
+
end
|
62
|
+
|
63
|
+
# create FASTA index
|
64
|
+
config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
|
65
|
+
config[:raw_seq_file_load] = index_hash
|
66
|
+
|
67
|
+
File.open(config[:raw_seq_file_index], 'w') do |f|
|
68
|
+
YAML.dump(index_hash, f)
|
32
69
|
end
|
33
|
-
|
70
|
+
content = nil
|
34
71
|
end
|
35
72
|
|
36
73
|
private
|
@@ -43,13 +80,13 @@ module GeneValidator
|
|
43
80
|
file.close unless file.nil?
|
44
81
|
end
|
45
82
|
|
46
|
-
alias_method :
|
83
|
+
alias_method :write_a_raw_seq_file, :write_an_index_file
|
47
84
|
|
48
85
|
def iterate_xml(file, db_type)
|
49
86
|
n = Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
|
50
87
|
n.each do |iter|
|
51
88
|
iter.each do |hit|
|
52
|
-
if db_type == 'remote'
|
89
|
+
if db_type == 'remote' || hit.hit_id.nil?
|
53
90
|
file.puts obtain_raw_seqs_from_remote_db(hit.accession)
|
54
91
|
else
|
55
92
|
file.puts hit.hit_id
|
@@ -57,10 +94,10 @@ module GeneValidator
|
|
57
94
|
end
|
58
95
|
end
|
59
96
|
rescue
|
60
|
-
puts '*** Error: There was an error in analysing the BLAST XML file.'
|
61
|
-
puts ' Please ensure that BLAST XML file is in the correct format'
|
62
|
-
puts ' and then try again. If you are using a remote database,'
|
63
|
-
puts ' please ensure that you have internet access.'
|
97
|
+
$stderr.puts '*** Error: There was an error in analysing the BLAST XML file.'
|
98
|
+
$stderr.puts ' Please ensure that BLAST XML file is in the correct format'
|
99
|
+
$stderr.puts ' and then try again. If you are using a remote database,'
|
100
|
+
$stderr.puts ' please ensure that you have internet access.'
|
64
101
|
exit 1
|
65
102
|
end
|
66
103
|
|
@@ -73,23 +110,23 @@ module GeneValidator
|
|
73
110
|
assert_table_has_correct_no_of_collumns(rows, table_headers)
|
74
111
|
|
75
112
|
rows.each do |row|
|
76
|
-
if db_type == 'remote'
|
113
|
+
if db_type == 'remote' || row['sseqid'].nil?
|
77
114
|
file.puts obtain_raw_seqs_from_remote_db(row['sacc'])
|
78
115
|
else
|
79
116
|
file.puts row['sseqid']
|
80
117
|
end
|
81
118
|
end
|
82
119
|
rescue
|
83
|
-
puts '*** Error: There was an error in analysing the BLAST tabular'
|
84
|
-
puts ' file. Please ensure that BLAST tabular file is in the correct'
|
85
|
-
puts ' format and then try again. If you are using a remote'
|
86
|
-
puts ' database, please ensure that you have internet access.'
|
120
|
+
$stderr.puts '*** Error: There was an error in analysing the BLAST tabular'
|
121
|
+
$stderr.puts ' file. Please ensure that BLAST tabular file is in the correct'
|
122
|
+
$stderr.puts ' format and then try again. If you are using a remote'
|
123
|
+
$stderr.puts ' database, please ensure that you have internet access.'
|
87
124
|
exit 1
|
88
125
|
end
|
89
126
|
|
90
127
|
def obtain_raw_seqs_from_local_db(index_file, raw_seq_file)
|
91
|
-
cmd = "blastdbcmd -entry_batch #{index_file} -db #{@opt[:db]}
|
92
|
-
" '%f' -out #{raw_seq_file}"
|
128
|
+
cmd = "blastdbcmd -entry_batch '#{index_file}' -db '#{@opt[:db]}'" \
|
129
|
+
" -outfmt '%f' -out '#{raw_seq_file}'"
|
93
130
|
`#{cmd}`
|
94
131
|
end
|
95
132
|
|
@@ -106,8 +143,8 @@ module GeneValidator
|
|
106
143
|
result = Net::HTTP.get(URI.parse(uri))
|
107
144
|
raw_seqs = result[0..result.length - 2]
|
108
145
|
unless raw_seqs.downcase.index(/error/).nil?
|
109
|
-
puts '*** Error: There was an error in obtaining the raw sequence' \
|
110
|
-
|
146
|
+
$stderr.puts '*** Error: There was an error in obtaining the raw sequence' \
|
147
|
+
' of a BLAST hit. Please ensure that you have internet access.'
|
111
148
|
exit 1
|
112
149
|
end
|
113
150
|
raw_seqs
|
@@ -116,10 +153,10 @@ module GeneValidator
|
|
116
153
|
def assert_table_has_correct_no_of_collumns(rows, table_headers)
|
117
154
|
rows.each do |row|
|
118
155
|
unless row.length == table_headers.length
|
119
|
-
puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
|
120
|
-
|
121
|
-
|
122
|
-
|
156
|
+
$stderr.puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
|
157
|
+
' could possibly be due to an incorrect BLAST tabular' \
|
158
|
+
' options ("-o", "--blast_tabular_options") being supplied.' \
|
159
|
+
' Please correct this and try again.'
|
123
160
|
exit 1
|
124
161
|
end
|
125
162
|
break # break after checking the first column
|
data/lib/genevalidator/hsp.rb
CHANGED
@@ -33,8 +33,7 @@ module GeneValidator
|
|
33
33
|
# Params:
|
34
34
|
# +column+: String with column name.
|
35
35
|
# +value+: Value of the column
|
36
|
-
|
37
|
-
def init_tabular_attribute(hash, _type = :protein)
|
36
|
+
def init_tabular_attribute(hash)
|
38
37
|
@match_query_from = hash['qstart'].to_i if hash['qstart']
|
39
38
|
@match_query_to = hash['qend'].to_i if hash['qend']
|
40
39
|
@query_reading_frame = hash['qframe'].to_i if hash['qframe']
|
@@ -47,12 +46,10 @@ module GeneValidator
|
|
47
46
|
@identity = hash['nident'].to_f if hash['nident']
|
48
47
|
@hsp_evalue = hash['evalue'].to_f if hash['evalue']
|
49
48
|
if hash['qseq']
|
50
|
-
puts @query_alignment
|
51
49
|
query_seq_type = BlastUtils.guess_sequence_type(@query_alignment)
|
52
50
|
fail SequenceTypeError if query_seq_type != :protein
|
53
51
|
end
|
54
52
|
if hash['sseq']
|
55
|
-
puts @hit_alignment
|
56
53
|
hit_seq_type = BlastUtils.guess_sequence_type(@hit_alignment)
|
57
54
|
fail SequenceTypeError if hit_seq_type != :protein
|
58
55
|
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'forwardable'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
require 'genevalidator'
|
7
|
+
require 'genevalidator/output'
|
8
|
+
require 'genevalidator/version'
|
9
|
+
|
10
|
+
module GeneValidator
|
11
|
+
# produce GV results from a JSON previously produced from GV
|
12
|
+
class JsonToGVResults
|
13
|
+
class << self
|
14
|
+
extend Forwardable
|
15
|
+
def_delegators GeneValidator, :opt
|
16
|
+
|
17
|
+
def init
|
18
|
+
@opt = opt
|
19
|
+
@config = {
|
20
|
+
html_path: "#{@opt[:json_file]}.html",
|
21
|
+
plot_dir: "#{@opt[:json_file]}.html/files/json",
|
22
|
+
aux: File.expand_path(File.join(File.dirname(__FILE__), '../../aux')),
|
23
|
+
filename: File.basename(@opt[:json_file]),
|
24
|
+
output_max: 2500,
|
25
|
+
run_no: 0
|
26
|
+
}
|
27
|
+
@json_array = load_json_file
|
28
|
+
end
|
29
|
+
|
30
|
+
def run
|
31
|
+
init
|
32
|
+
GeneValidator.create_output_folder(@config[:html_path], @config[:aux])
|
33
|
+
@json_array.each do |row|
|
34
|
+
@config[:run_no] += 1
|
35
|
+
create_json_file(row)
|
36
|
+
output_html = output_filename
|
37
|
+
generate_html_header(output_html) unless File.exist?(output_html)
|
38
|
+
generate_html_query(output_html, row)
|
39
|
+
end
|
40
|
+
html_footer
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_json_file
|
44
|
+
json_contents = File.read(File.expand_path(@opt[:json_file]))
|
45
|
+
JSON.load(json_contents)
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_json_file(row)
|
49
|
+
@json_file = File.join(@config[:plot_dir],
|
50
|
+
"#{@config[:filename]}_#{row['idx']}.json")
|
51
|
+
File.open(@json_file, 'w') { |f| f.write(row.to_json) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def output_filename
|
55
|
+
i = (@config[:run_no].to_f / @config[:output_max]).ceil
|
56
|
+
File.join(@config[:html_path], "results#{i}.html")
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_html_header(output_html)
|
60
|
+
return if File.exist?(output_html)
|
61
|
+
json_header_template = File.join(@config[:aux], 'json_header.erb')
|
62
|
+
template_contents = File.open(json_header_template, 'r').read
|
63
|
+
erb = ERB.new(template_contents, 0, '>')
|
64
|
+
File.open(output_html, 'w+') { |f| f.write(erb.result(binding)) }
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_html_query(output_html, row)
|
68
|
+
@row = row
|
69
|
+
json_query_template = File.join(@config[:aux], 'json_query.erb')
|
70
|
+
template_contents = File.open(json_query_template, 'r').read
|
71
|
+
erb = ERB.new(template_contents, 0, '>')
|
72
|
+
File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add footer to all output files
|
76
|
+
def html_footer
|
77
|
+
no_of_output_files = (@config[:run_no].to_f / @config[:output_max]).ceil
|
78
|
+
|
79
|
+
output_files = []
|
80
|
+
(1..no_of_output_files).each { |i| output_files << "results#{i}.html" }
|
81
|
+
|
82
|
+
write_html_footer(no_of_output_files, output_files)
|
83
|
+
end
|
84
|
+
|
85
|
+
def write_html_footer(no_of_output_files, output_files)
|
86
|
+
turn_off_automated_sorting
|
87
|
+
json_footer_template = File.join(@config[:aux], 'json_footer.erb')
|
88
|
+
template_contents = File.open(json_footer_template, 'r').read
|
89
|
+
erb = ERB.new(template_contents, 0, '>')
|
90
|
+
(1..no_of_output_files).each do |i|
|
91
|
+
results_html = File.join(@config[:html_path], "results#{i}.html")
|
92
|
+
File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Since the whole idea is that users would sort by
|
97
|
+
def turn_off_automated_sorting
|
98
|
+
script_file = File.join(@config[:html_path], 'files/js/script.js')
|
99
|
+
temp_file = File.join(@config[:html_path], 'files/js/script.temp.js')
|
100
|
+
File.open(temp_file, 'w') do |out_file|
|
101
|
+
out_file.puts File.readlines(script_file)[0..23].join
|
102
|
+
out_file.puts '}'
|
103
|
+
out_file.puts File.readlines(script_file)[26..-1].join
|
104
|
+
end
|
105
|
+
FileUtils.mv(temp_file, script_file)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|