genevalidator 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -13,7 +13,7 @@ module GeneValidator
13
13
  end
14
14
 
15
15
  def print
16
- puts "Cluster: #{x} #{y}"
16
+ $stderr.puts "Cluster: #{x} #{y}"
17
17
  end
18
18
 
19
19
  ##
@@ -66,7 +66,7 @@ module GeneValidator
66
66
 
67
67
  def print
68
68
  objects.each do |elem|
69
- puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
69
+ $stderr.puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
70
70
  end
71
71
  end
72
72
 
@@ -270,11 +270,11 @@ module GeneValidator
270
270
  ##
271
271
  # Prints the current cluster
272
272
  def print
273
- puts "Cluster: mean = #{mean}, density = #{density}"
273
+ $stderr.puts "Cluster: mean = #{mean}, density = #{density}"
274
274
  lengths.sort { |a, b| a <=> b }.each do |elem|
275
- puts "#{elem[0]}, #{elem[1]}"
275
+ $stderr.puts "#{elem[0]}, #{elem[1]}"
276
276
  end
277
- puts '--------------------------'
277
+ $stderr.puts '--------------------------'
278
278
  end
279
279
 
280
280
  ##
@@ -334,7 +334,7 @@ module GeneValidator
334
334
  # initially each length belongs to a different cluster
335
335
  histogram.each do |elem|
336
336
  if debug
337
- puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
337
+ $stderr.puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
338
338
  end
339
339
  hash = { elem[0] => elem[1] }
340
340
  cluster = PairCluster.new(hash)
@@ -353,7 +353,7 @@ module GeneValidator
353
353
  break if no_clusters != 0 && clusters.length == no_clusters
354
354
 
355
355
  iteration = iteration + 1
356
- puts "\nIteration #{iteration}" if debug
356
+ $stderr.puts "\nIteration #{iteration}" if debug
357
357
 
358
358
  min_distance = 100_000_000
359
359
  cluster1 = 0
@@ -363,7 +363,7 @@ module GeneValidator
363
363
  [*(0..(clusters.length - 2))].each do |i|
364
364
  [*((i + 1)..(clusters.length - 1))].each do |j|
365
365
  dist = clusters[i].distance(clusters[j], distance_method)
366
- puts "distance between clusters #{i} and #{j} is #{dist}" if debug
366
+ $stderr.puts "distance between clusters #{i} and #{j} is #{dist}" if debug
367
367
  current_density = clusters[i].density + clusters[j].density
368
368
  if dist < min_distance
369
369
  min_distance = dist
@@ -379,14 +379,14 @@ module GeneValidator
379
379
  end
380
380
 
381
381
  # merge clusters 'cluster1' and 'cluster2'
382
- puts "clusters to merge #{cluster1} and #{cluster2}" if debug
382
+ $stderr.puts "clusters to merge #{cluster1} and #{cluster2}" if debug
383
383
 
384
384
  clusters[cluster1].add(clusters[cluster2])
385
385
  clusters.delete_at(cluster2)
386
386
 
387
387
  if debug
388
388
  clusters.each_with_index do |elem, i|
389
- puts "cluster #{i}"
389
+ $stderr.puts "cluster #{i}"
390
390
  elem.print
391
391
  end
392
392
  end
@@ -433,7 +433,7 @@ module GeneValidator
433
433
  # clusters = array of clusters
434
434
  # initially each length belongs to a different cluster
435
435
  histogram.sort { |a, b| a[0] <=> b[0] }.each do |elem|
436
- puts "len #{elem[0]} appears #{elem[1]} times" if debug
436
+ $stderr.puts "len #{elem[0]} appears #{elem[1]} times" if debug
437
437
  hash = { elem[0] => elem[1] }
438
438
  cluster = Cluster.new(hash)
439
439
  clusters.push(cluster)
@@ -452,7 +452,7 @@ module GeneValidator
452
452
  break if no_clusters != 0 && clusters.length == no_clusters
453
453
 
454
454
  iteration = iteration + 1
455
- puts "\nIteration #{iteration}" if debug
455
+ $stderr.puts "\nIteration #{iteration}" if debug
456
456
 
457
457
  min_distance = 100_000_000
458
458
  cluster = 0
@@ -460,7 +460,7 @@ module GeneValidator
460
460
 
461
461
  clusters[0..clusters.length - 2].each_with_index do |_item, i|
462
462
  dist = clusters[i].distance(clusters[i + 1], distance_method)
463
- puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
463
+ $stderr.puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
464
464
  current_density = clusters[i].density + clusters[i + 1].density
465
465
  if dist < min_distance
466
466
  min_distance = dist
@@ -479,14 +479,14 @@ module GeneValidator
479
479
  end
480
480
 
481
481
  # merge clusters 'cluster' and 'cluster'+1
482
- puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
482
+ $stderr.puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
483
483
 
484
484
  clusters[cluster].add(clusters[cluster + 1])
485
485
  clusters.delete_at(cluster + 1)
486
486
 
487
487
  if debug
488
488
  clusters.each_with_index do |elem, i|
489
- puts "cluster #{i}"
489
+ $stderr.puts "cluster #{i}"
490
490
  elem.print
491
491
  end
492
492
  end
@@ -3,9 +3,14 @@ module GeneValidator
3
3
  class ClasspathError < Exception
4
4
  end
5
5
 
6
+
6
7
  # Exception raised when the command line type argument
7
8
  # does not corrsepond to the type of the sequences in the fasta file
8
9
  class SequenceTypeError < Exception
10
+ def to_s
11
+ "\nSequence Type error: Possible cause include that the blast output" \
12
+ " was not obtained against a protein database.\n"
13
+ end
9
14
  end
10
15
 
11
16
  # Exception raised when an unexisting file is accessed
@@ -18,17 +23,29 @@ module GeneValidator
18
23
 
19
24
  # Exception raised when a validation class is not instance of ValidationTest
20
25
  class ValidationClassError < Exception
26
+ def to_s
27
+ "\nClass Type error: Possible cause include that one of the validations" \
28
+ " is not a sub-class of ValidationTest\n"
29
+ end
21
30
  end
22
31
 
23
- # Exception raised when a validation report class is not instance of ValidationReport
32
+ # Exception raised when a validation report class is not instance of
33
+ # ValidationReport
24
34
  class ReportClassError < Exception
35
+ def to_s
36
+ "\nClass Type error: Possible causes include that the type of one of" \
37
+ ' the validation reports is not a subclass of the ValidationReport' \
38
+ " class.\n"
39
+ end
25
40
  end
26
41
 
27
- # Exception raised when there are not enough blast hits to make a statisticl validation
42
+ # Exception raised when there are not enough blast hits to make a statistical
43
+ # validation
28
44
  class NotEnoughHitsError < Exception
29
45
  end
30
46
 
31
- # Exception raised when function dependig on the internet connection raise Exception
47
+ # Exception raised when function dependig on the internet connection raise
48
+ # Exception
32
49
  class NoInternetError < Exception
33
50
  end
34
51
 
@@ -38,21 +55,31 @@ module GeneValidator
38
55
 
39
56
  # Exception raised when the -v argument didn't filter any validatio test
40
57
  class NoValidationError < Exception
58
+ def to_s
59
+ "\nValidation error: Possible cause inlcude that the -v arguments" \
60
+ " supplied is not valid\n"
61
+ end
41
62
  end
42
63
 
43
64
  # Exception raised when the are alias duplications
44
65
  class AliasDuplicationError < Exception
66
+ def to_s
67
+ "\nAlias Duplication error: Possible cause: At least two validations" \
68
+ " have the same CLI alias\n"
69
+ end
45
70
  end
46
71
 
47
72
  # Exception raised when the are alias duplications
48
73
  class NoPIdentError < Exception
49
74
  end
50
75
 
51
- # Exception raised when the tabular format does not correspond to the tabular argumet
76
+ # Exception raised when the tabular format does not correspond to the tabular
77
+ # argument
52
78
  class InconsistentTabularFormat < Exception
53
79
  end
54
80
 
55
- # Exception raised when there are more than one reading frame among the hits of one prediction
81
+ # Exception raised when there are more than one reading frame among the hits
82
+ # of one prediction
56
83
  class ReadingFrameError < Exception
57
84
  end
58
85
 
@@ -1,36 +1,73 @@
1
- require 'genevalidator/sequences'
2
- require 'genevalidator/exceptions'
3
1
  require 'bio-blastxmlparser'
2
+ require 'forwardable'
4
3
  require 'net/http'
5
- require 'open-uri'
6
4
  require 'uri'
7
- require 'io/console'
8
5
  require 'yaml'
6
+
7
+ require 'genevalidator/exceptions'
8
+ require 'genevalidator/sequences'
9
+
9
10
  module GeneValidator
10
11
  # Gets the raw sequences for each hit in a BLAST output file
11
- module GetRawSequences
12
+ class RawSequences
12
13
  class <<self
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt, :config
16
+
13
17
  ##
14
18
  # Obtains raw_sequences from BLAST output file...
15
- def run(opt)
19
+ def run
16
20
  @opt = opt
21
+ @config = config
17
22
 
18
- if opt[:blast_xml_file]
19
- @blast_file = opt[:blast_xml_file]
23
+ $stderr.puts 'Extracting sequences within the BLAST output file from' \
24
+ ' the BLAST database'
25
+
26
+ if @opt[:blast_xml_file]
27
+ @blast_file = @opt[:blast_xml_file]
20
28
  else
21
- @blast_file = opt[:blast_tabular_file]
29
+ @blast_file = @opt[:blast_tabular_file]
22
30
  end
23
31
 
24
- raw_seq_file = @blast_file + '.raw_seq'
25
- index_file = @blast_file + '.index'
32
+ @opt[:raw_sequences] = @blast_file + '.raw_seq'
33
+ index_file = @blast_file + '.index'
26
34
 
27
35
  if opt[:db] =~ /remote/
28
- write_an_raw_seq_file(raw_seq_file, 'remote')
36
+ write_a_raw_seq_file(@opt[:raw_sequences], 'remote')
29
37
  else
30
38
  write_an_index_file(index_file, 'local')
31
- obtain_raw_seqs_from_local_db(index_file, raw_seq_file)
39
+ obtain_raw_seqs_from_local_db(index_file, @opt[:raw_sequences])
40
+ end
41
+ index_raw_seq_file(@opt[:raw_sequences])
42
+ end
43
+
44
+ ##
45
+ # Index the raw sequences file...
46
+ def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
47
+ # leave only the identifiers in the fasta description
48
+ content = File.open(raw_seq_file, 'rb').read.gsub(/ .*/, '')
49
+ File.open(raw_seq_file, 'w+') { |f| f.write(content) }
50
+
51
+ # index the fasta file
52
+ keys = content.scan(/>(.*)\n/).flatten
53
+ values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
54
+
55
+ # make an index hash
56
+ index_hash = {}
57
+ keys.each_with_index do |k, i|
58
+ start = values[i]
59
+ endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
60
+ index_hash[k] = [start, endf]
61
+ end
62
+
63
+ # create FASTA index
64
+ config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
65
+ config[:raw_seq_file_load] = index_hash
66
+
67
+ File.open(config[:raw_seq_file_index], 'w') do |f|
68
+ YAML.dump(index_hash, f)
32
69
  end
33
- raw_seq_file
70
+ content = nil
34
71
  end
35
72
 
36
73
  private
@@ -43,13 +80,13 @@ module GeneValidator
43
80
  file.close unless file.nil?
44
81
  end
45
82
 
46
- alias_method :write_an_raw_seq_file, :write_an_index_file
83
+ alias_method :write_a_raw_seq_file, :write_an_index_file
47
84
 
48
85
  def iterate_xml(file, db_type)
49
86
  n = Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
50
87
  n.each do |iter|
51
88
  iter.each do |hit|
52
- if db_type == 'remote'
89
+ if db_type == 'remote' || hit.hit_id.nil?
53
90
  file.puts obtain_raw_seqs_from_remote_db(hit.accession)
54
91
  else
55
92
  file.puts hit.hit_id
@@ -57,10 +94,10 @@ module GeneValidator
57
94
  end
58
95
  end
59
96
  rescue
60
- puts '*** Error: There was an error in analysing the BLAST XML file.'
61
- puts ' Please ensure that BLAST XML file is in the correct format'
62
- puts ' and then try again. If you are using a remote database,'
63
- puts ' please ensure that you have internet access.'
97
+ $stderr.puts '*** Error: There was an error in analysing the BLAST XML file.'
98
+ $stderr.puts ' Please ensure that BLAST XML file is in the correct format'
99
+ $stderr.puts ' and then try again. If you are using a remote database,'
100
+ $stderr.puts ' please ensure that you have internet access.'
64
101
  exit 1
65
102
  end
66
103
 
@@ -73,23 +110,23 @@ module GeneValidator
73
110
  assert_table_has_correct_no_of_collumns(rows, table_headers)
74
111
 
75
112
  rows.each do |row|
76
- if db_type == 'remote'
113
+ if db_type == 'remote' || row['sseqid'].nil?
77
114
  file.puts obtain_raw_seqs_from_remote_db(row['sacc'])
78
115
  else
79
116
  file.puts row['sseqid']
80
117
  end
81
118
  end
82
119
  rescue
83
- puts '*** Error: There was an error in analysing the BLAST tabular'
84
- puts ' file. Please ensure that BLAST tabular file is in the correct'
85
- puts ' format and then try again. If you are using a remote'
86
- puts ' database, please ensure that you have internet access.'
120
+ $stderr.puts '*** Error: There was an error in analysing the BLAST tabular'
121
+ $stderr.puts ' file. Please ensure that BLAST tabular file is in the correct'
122
+ $stderr.puts ' format and then try again. If you are using a remote'
123
+ $stderr.puts ' database, please ensure that you have internet access.'
87
124
  exit 1
88
125
  end
89
126
 
90
127
  def obtain_raw_seqs_from_local_db(index_file, raw_seq_file)
91
- cmd = "blastdbcmd -entry_batch #{index_file} -db #{@opt[:db]} -outfmt" \
92
- " '%f' -out #{raw_seq_file}"
128
+ cmd = "blastdbcmd -entry_batch '#{index_file}' -db '#{@opt[:db]}'" \
129
+ " -outfmt '%f' -out '#{raw_seq_file}'"
93
130
  `#{cmd}`
94
131
  end
95
132
 
@@ -106,8 +143,8 @@ module GeneValidator
106
143
  result = Net::HTTP.get(URI.parse(uri))
107
144
  raw_seqs = result[0..result.length - 2]
108
145
  unless raw_seqs.downcase.index(/error/).nil?
109
- puts '*** Error: There was an error in obtaining the raw sequence' \
110
- ' of a BLAST hit. Please ensure that you have internet access.'
146
+ $stderr.puts '*** Error: There was an error in obtaining the raw sequence' \
147
+ ' of a BLAST hit. Please ensure that you have internet access.'
111
148
  exit 1
112
149
  end
113
150
  raw_seqs
@@ -116,10 +153,10 @@ module GeneValidator
116
153
  def assert_table_has_correct_no_of_collumns(rows, table_headers)
117
154
  rows.each do |row|
118
155
  unless row.length == table_headers.length
119
- puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
120
- ' could possibly be due to an incorrect BLAST tabular' \
121
- ' options ("-o", "--blast_tabular_options") being supplied.' \
122
- ' Please correct this and try again.'
156
+ $stderr.puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
157
+ ' could possibly be due to an incorrect BLAST tabular' \
158
+ ' options ("-o", "--blast_tabular_options") being supplied.' \
159
+ ' Please correct this and try again.'
123
160
  exit 1
124
161
  end
125
162
  break # break after checking the first column
@@ -33,8 +33,7 @@ module GeneValidator
33
33
  # Params:
34
34
  # +column+: String with column name.
35
35
  # +value+: Value of the column
36
- # +type+: type of the sequences: :nucleotide or :protein
37
- def init_tabular_attribute(hash, _type = :protein)
36
+ def init_tabular_attribute(hash)
38
37
  @match_query_from = hash['qstart'].to_i if hash['qstart']
39
38
  @match_query_to = hash['qend'].to_i if hash['qend']
40
39
  @query_reading_frame = hash['qframe'].to_i if hash['qframe']
@@ -47,12 +46,10 @@ module GeneValidator
47
46
  @identity = hash['nident'].to_f if hash['nident']
48
47
  @hsp_evalue = hash['evalue'].to_f if hash['evalue']
49
48
  if hash['qseq']
50
- puts @query_alignment
51
49
  query_seq_type = BlastUtils.guess_sequence_type(@query_alignment)
52
50
  fail SequenceTypeError if query_seq_type != :protein
53
51
  end
54
52
  if hash['sseq']
55
- puts @hit_alignment
56
53
  hit_seq_type = BlastUtils.guess_sequence_type(@hit_alignment)
57
54
  fail SequenceTypeError if hit_seq_type != :protein
58
55
  end
@@ -0,0 +1,109 @@
1
+ require 'erb'
2
+ require 'fileutils'
3
+ require 'forwardable'
4
+ require 'json'
5
+
6
+ require 'genevalidator'
7
+ require 'genevalidator/output'
8
+ require 'genevalidator/version'
9
+
10
+ module GeneValidator
11
+ # produce GV results from a JSON previously produced from GV
12
+ class JsonToGVResults
13
+ class << self
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt
16
+
17
+ def init
18
+ @opt = opt
19
+ @config = {
20
+ html_path: "#{@opt[:json_file]}.html",
21
+ plot_dir: "#{@opt[:json_file]}.html/files/json",
22
+ aux: File.expand_path(File.join(File.dirname(__FILE__), '../../aux')),
23
+ filename: File.basename(@opt[:json_file]),
24
+ output_max: 2500,
25
+ run_no: 0
26
+ }
27
+ @json_array = load_json_file
28
+ end
29
+
30
+ def run
31
+ init
32
+ GeneValidator.create_output_folder(@config[:html_path], @config[:aux])
33
+ @json_array.each do |row|
34
+ @config[:run_no] += 1
35
+ create_json_file(row)
36
+ output_html = output_filename
37
+ generate_html_header(output_html) unless File.exist?(output_html)
38
+ generate_html_query(output_html, row)
39
+ end
40
+ html_footer
41
+ end
42
+
43
+ def load_json_file
44
+ json_contents = File.read(File.expand_path(@opt[:json_file]))
45
+ JSON.load(json_contents)
46
+ end
47
+
48
+ def create_json_file(row)
49
+ @json_file = File.join(@config[:plot_dir],
50
+ "#{@config[:filename]}_#{row['idx']}.json")
51
+ File.open(@json_file, 'w') { |f| f.write(row.to_json) }
52
+ end
53
+
54
+ def output_filename
55
+ i = (@config[:run_no].to_f / @config[:output_max]).ceil
56
+ File.join(@config[:html_path], "results#{i}.html")
57
+ end
58
+
59
+ def generate_html_header(output_html)
60
+ return if File.exist?(output_html)
61
+ json_header_template = File.join(@config[:aux], 'json_header.erb')
62
+ template_contents = File.open(json_header_template, 'r').read
63
+ erb = ERB.new(template_contents, 0, '>')
64
+ File.open(output_html, 'w+') { |f| f.write(erb.result(binding)) }
65
+ end
66
+
67
+ def generate_html_query(output_html, row)
68
+ @row = row
69
+ json_query_template = File.join(@config[:aux], 'json_query.erb')
70
+ template_contents = File.open(json_query_template, 'r').read
71
+ erb = ERB.new(template_contents, 0, '>')
72
+ File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
73
+ end
74
+
75
+ # Add footer to all output files
76
+ def html_footer
77
+ no_of_output_files = (@config[:run_no].to_f / @config[:output_max]).ceil
78
+
79
+ output_files = []
80
+ (1..no_of_output_files).each { |i| output_files << "results#{i}.html" }
81
+
82
+ write_html_footer(no_of_output_files, output_files)
83
+ end
84
+
85
+ def write_html_footer(no_of_output_files, output_files)
86
+ turn_off_automated_sorting
87
+ json_footer_template = File.join(@config[:aux], 'json_footer.erb')
88
+ template_contents = File.open(json_footer_template, 'r').read
89
+ erb = ERB.new(template_contents, 0, '>')
90
+ (1..no_of_output_files).each do |i|
91
+ results_html = File.join(@config[:html_path], "results#{i}.html")
92
+ File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
93
+ end
94
+ end
95
+
96
+ # Since the whole idea is that users would sort by
97
+ def turn_off_automated_sorting
98
+ script_file = File.join(@config[:html_path], 'files/js/script.js')
99
+ temp_file = File.join(@config[:html_path], 'files/js/script.temp.js')
100
+ File.open(temp_file, 'w') do |out_file|
101
+ out_file.puts File.readlines(script_file)[0..23].join
102
+ out_file.puts '}'
103
+ out_file.puts File.readlines(script_file)[26..-1].join
104
+ end
105
+ FileUtils.mv(temp_file, script_file)
106
+ end
107
+ end
108
+ end
109
+ end