genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -13,7 +13,7 @@ module GeneValidator
13
13
  end
14
14
 
15
15
  def print
16
- puts "Cluster: #{x} #{y}"
16
+ $stderr.puts "Cluster: #{x} #{y}"
17
17
  end
18
18
 
19
19
  ##
@@ -66,7 +66,7 @@ module GeneValidator
66
66
 
67
67
  def print
68
68
  objects.each do |elem|
69
- puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
69
+ $stderr.puts "(#{elem[0].x},#{elem[0].y}): #{elem[1]}"
70
70
  end
71
71
  end
72
72
 
@@ -270,11 +270,11 @@ module GeneValidator
270
270
  ##
271
271
  # Prints the current cluster
272
272
  def print
273
- puts "Cluster: mean = #{mean}, density = #{density}"
273
+ $stderr.puts "Cluster: mean = #{mean}, density = #{density}"
274
274
  lengths.sort { |a, b| a <=> b }.each do |elem|
275
- puts "#{elem[0]}, #{elem[1]}"
275
+ $stderr.puts "#{elem[0]}, #{elem[1]}"
276
276
  end
277
- puts '--------------------------'
277
+ $stderr.puts '--------------------------'
278
278
  end
279
279
 
280
280
  ##
@@ -334,7 +334,7 @@ module GeneValidator
334
334
  # initially each length belongs to a different cluster
335
335
  histogram.each do |elem|
336
336
  if debug
337
- puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
337
+ $stderr.puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
338
338
  end
339
339
  hash = { elem[0] => elem[1] }
340
340
  cluster = PairCluster.new(hash)
@@ -353,7 +353,7 @@ module GeneValidator
353
353
  break if no_clusters != 0 && clusters.length == no_clusters
354
354
 
355
355
  iteration = iteration + 1
356
- puts "\nIteration #{iteration}" if debug
356
+ $stderr.puts "\nIteration #{iteration}" if debug
357
357
 
358
358
  min_distance = 100_000_000
359
359
  cluster1 = 0
@@ -363,7 +363,7 @@ module GeneValidator
363
363
  [*(0..(clusters.length - 2))].each do |i|
364
364
  [*((i + 1)..(clusters.length - 1))].each do |j|
365
365
  dist = clusters[i].distance(clusters[j], distance_method)
366
- puts "distance between clusters #{i} and #{j} is #{dist}" if debug
366
+ $stderr.puts "distance between clusters #{i} and #{j} is #{dist}" if debug
367
367
  current_density = clusters[i].density + clusters[j].density
368
368
  if dist < min_distance
369
369
  min_distance = dist
@@ -379,14 +379,14 @@ module GeneValidator
379
379
  end
380
380
 
381
381
  # merge clusters 'cluster1' and 'cluster2'
382
- puts "clusters to merge #{cluster1} and #{cluster2}" if debug
382
+ $stderr.puts "clusters to merge #{cluster1} and #{cluster2}" if debug
383
383
 
384
384
  clusters[cluster1].add(clusters[cluster2])
385
385
  clusters.delete_at(cluster2)
386
386
 
387
387
  if debug
388
388
  clusters.each_with_index do |elem, i|
389
- puts "cluster #{i}"
389
+ $stderr.puts "cluster #{i}"
390
390
  elem.print
391
391
  end
392
392
  end
@@ -433,7 +433,7 @@ module GeneValidator
433
433
  # clusters = array of clusters
434
434
  # initially each length belongs to a different cluster
435
435
  histogram.sort { |a, b| a[0] <=> b[0] }.each do |elem|
436
- puts "len #{elem[0]} appears #{elem[1]} times" if debug
436
+ $stderr.puts "len #{elem[0]} appears #{elem[1]} times" if debug
437
437
  hash = { elem[0] => elem[1] }
438
438
  cluster = Cluster.new(hash)
439
439
  clusters.push(cluster)
@@ -452,7 +452,7 @@ module GeneValidator
452
452
  break if no_clusters != 0 && clusters.length == no_clusters
453
453
 
454
454
  iteration = iteration + 1
455
- puts "\nIteration #{iteration}" if debug
455
+ $stderr.puts "\nIteration #{iteration}" if debug
456
456
 
457
457
  min_distance = 100_000_000
458
458
  cluster = 0
@@ -460,7 +460,7 @@ module GeneValidator
460
460
 
461
461
  clusters[0..clusters.length - 2].each_with_index do |_item, i|
462
462
  dist = clusters[i].distance(clusters[i + 1], distance_method)
463
- puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
463
+ $stderr.puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
464
464
  current_density = clusters[i].density + clusters[i + 1].density
465
465
  if dist < min_distance
466
466
  min_distance = dist
@@ -479,14 +479,14 @@ module GeneValidator
479
479
  end
480
480
 
481
481
  # merge clusters 'cluster' and 'cluster'+1
482
- puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
482
+ $stderr.puts "clusters to merge #{cluster} and #{cluster + 1}" if debug
483
483
 
484
484
  clusters[cluster].add(clusters[cluster + 1])
485
485
  clusters.delete_at(cluster + 1)
486
486
 
487
487
  if debug
488
488
  clusters.each_with_index do |elem, i|
489
- puts "cluster #{i}"
489
+ $stderr.puts "cluster #{i}"
490
490
  elem.print
491
491
  end
492
492
  end
@@ -3,9 +3,14 @@ module GeneValidator
3
3
  class ClasspathError < Exception
4
4
  end
5
5
 
6
+
6
7
  # Exception raised when the command line type argument
7
8
  # does not corrsepond to the type of the sequences in the fasta file
8
9
  class SequenceTypeError < Exception
10
+ def to_s
11
+ "\nSequence Type error: Possible cause include that the blast output" \
12
+ " was not obtained against a protein database.\n"
13
+ end
9
14
  end
10
15
 
11
16
  # Exception raised when an unexisting file is accessed
@@ -18,17 +23,29 @@ module GeneValidator
18
23
 
19
24
  # Exception raised when a validation class is not instance of ValidationTest
20
25
  class ValidationClassError < Exception
26
+ def to_s
27
+ "\nClass Type error: Possible cause include that one of the validations" \
28
+ " is not a sub-class of ValidationTest\n"
29
+ end
21
30
  end
22
31
 
23
- # Exception raised when a validation report class is not instance of ValidationReport
32
+ # Exception raised when a validation report class is not instance of
33
+ # ValidationReport
24
34
  class ReportClassError < Exception
35
+ def to_s
36
+ "\nClass Type error: Possible causes include that the type of one of" \
37
+ ' the validation reports is not a subclass of the ValidationReport' \
38
+ " class.\n"
39
+ end
25
40
  end
26
41
 
27
- # Exception raised when there are not enough blast hits to make a statisticl validation
42
+ # Exception raised when there are not enough blast hits to make a statistical
43
+ # validation
28
44
  class NotEnoughHitsError < Exception
29
45
  end
30
46
 
31
- # Exception raised when function dependig on the internet connection raise Exception
47
+ # Exception raised when function dependig on the internet connection raise
48
+ # Exception
32
49
  class NoInternetError < Exception
33
50
  end
34
51
 
@@ -38,21 +55,31 @@ module GeneValidator
38
55
 
39
56
  # Exception raised when the -v argument didn't filter any validatio test
40
57
  class NoValidationError < Exception
58
+ def to_s
59
+ "\nValidation error: Possible cause inlcude that the -v arguments" \
60
+ " supplied is not valid\n"
61
+ end
41
62
  end
42
63
 
43
64
  # Exception raised when the are alias duplications
44
65
  class AliasDuplicationError < Exception
66
+ def to_s
67
+ "\nAlias Duplication error: Possible cause: At least two validations" \
68
+ " have the same CLI alias\n"
69
+ end
45
70
  end
46
71
 
47
72
  # Exception raised when the are alias duplications
48
73
  class NoPIdentError < Exception
49
74
  end
50
75
 
51
- # Exception raised when the tabular format does not correspond to the tabular argumet
76
+ # Exception raised when the tabular format does not correspond to the tabular
77
+ # argument
52
78
  class InconsistentTabularFormat < Exception
53
79
  end
54
80
 
55
- # Exception raised when there are more than one reading frame among the hits of one prediction
81
+ # Exception raised when there are more than one reading frame among the hits
82
+ # of one prediction
56
83
  class ReadingFrameError < Exception
57
84
  end
58
85
 
@@ -1,36 +1,73 @@
1
- require 'genevalidator/sequences'
2
- require 'genevalidator/exceptions'
3
1
  require 'bio-blastxmlparser'
2
+ require 'forwardable'
4
3
  require 'net/http'
5
- require 'open-uri'
6
4
  require 'uri'
7
- require 'io/console'
8
5
  require 'yaml'
6
+
7
+ require 'genevalidator/exceptions'
8
+ require 'genevalidator/sequences'
9
+
9
10
  module GeneValidator
10
11
  # Gets the raw sequences for each hit in a BLAST output file
11
- module GetRawSequences
12
+ class RawSequences
12
13
  class <<self
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt, :config
16
+
13
17
  ##
14
18
  # Obtains raw_sequences from BLAST output file...
15
- def run(opt)
19
+ def run
16
20
  @opt = opt
21
+ @config = config
17
22
 
18
- if opt[:blast_xml_file]
19
- @blast_file = opt[:blast_xml_file]
23
+ $stderr.puts 'Extracting sequences within the BLAST output file from' \
24
+ ' the BLAST database'
25
+
26
+ if @opt[:blast_xml_file]
27
+ @blast_file = @opt[:blast_xml_file]
20
28
  else
21
- @blast_file = opt[:blast_tabular_file]
29
+ @blast_file = @opt[:blast_tabular_file]
22
30
  end
23
31
 
24
- raw_seq_file = @blast_file + '.raw_seq'
25
- index_file = @blast_file + '.index'
32
+ @opt[:raw_sequences] = @blast_file + '.raw_seq'
33
+ index_file = @blast_file + '.index'
26
34
 
27
35
  if opt[:db] =~ /remote/
28
- write_an_raw_seq_file(raw_seq_file, 'remote')
36
+ write_a_raw_seq_file(@opt[:raw_sequences], 'remote')
29
37
  else
30
38
  write_an_index_file(index_file, 'local')
31
- obtain_raw_seqs_from_local_db(index_file, raw_seq_file)
39
+ obtain_raw_seqs_from_local_db(index_file, @opt[:raw_sequences])
40
+ end
41
+ index_raw_seq_file(@opt[:raw_sequences])
42
+ end
43
+
44
+ ##
45
+ # Index the raw sequences file...
46
+ def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
47
+ # leave only the identifiers in the fasta description
48
+ content = File.open(raw_seq_file, 'rb').read.gsub(/ .*/, '')
49
+ File.open(raw_seq_file, 'w+') { |f| f.write(content) }
50
+
51
+ # index the fasta file
52
+ keys = content.scan(/>(.*)\n/).flatten
53
+ values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
54
+
55
+ # make an index hash
56
+ index_hash = {}
57
+ keys.each_with_index do |k, i|
58
+ start = values[i]
59
+ endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
60
+ index_hash[k] = [start, endf]
61
+ end
62
+
63
+ # create FASTA index
64
+ config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
65
+ config[:raw_seq_file_load] = index_hash
66
+
67
+ File.open(config[:raw_seq_file_index], 'w') do |f|
68
+ YAML.dump(index_hash, f)
32
69
  end
33
- raw_seq_file
70
+ content = nil
34
71
  end
35
72
 
36
73
  private
@@ -43,13 +80,13 @@ module GeneValidator
43
80
  file.close unless file.nil?
44
81
  end
45
82
 
46
- alias_method :write_an_raw_seq_file, :write_an_index_file
83
+ alias_method :write_a_raw_seq_file, :write_an_index_file
47
84
 
48
85
  def iterate_xml(file, db_type)
49
86
  n = Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
50
87
  n.each do |iter|
51
88
  iter.each do |hit|
52
- if db_type == 'remote'
89
+ if db_type == 'remote' || hit.hit_id.nil?
53
90
  file.puts obtain_raw_seqs_from_remote_db(hit.accession)
54
91
  else
55
92
  file.puts hit.hit_id
@@ -57,10 +94,10 @@ module GeneValidator
57
94
  end
58
95
  end
59
96
  rescue
60
- puts '*** Error: There was an error in analysing the BLAST XML file.'
61
- puts ' Please ensure that BLAST XML file is in the correct format'
62
- puts ' and then try again. If you are using a remote database,'
63
- puts ' please ensure that you have internet access.'
97
+ $stderr.puts '*** Error: There was an error in analysing the BLAST XML file.'
98
+ $stderr.puts ' Please ensure that BLAST XML file is in the correct format'
99
+ $stderr.puts ' and then try again. If you are using a remote database,'
100
+ $stderr.puts ' please ensure that you have internet access.'
64
101
  exit 1
65
102
  end
66
103
 
@@ -73,23 +110,23 @@ module GeneValidator
73
110
  assert_table_has_correct_no_of_collumns(rows, table_headers)
74
111
 
75
112
  rows.each do |row|
76
- if db_type == 'remote'
113
+ if db_type == 'remote' || row['sseqid'].nil?
77
114
  file.puts obtain_raw_seqs_from_remote_db(row['sacc'])
78
115
  else
79
116
  file.puts row['sseqid']
80
117
  end
81
118
  end
82
119
  rescue
83
- puts '*** Error: There was an error in analysing the BLAST tabular'
84
- puts ' file. Please ensure that BLAST tabular file is in the correct'
85
- puts ' format and then try again. If you are using a remote'
86
- puts ' database, please ensure that you have internet access.'
120
+ $stderr.puts '*** Error: There was an error in analysing the BLAST tabular'
121
+ $stderr.puts ' file. Please ensure that BLAST tabular file is in the correct'
122
+ $stderr.puts ' format and then try again. If you are using a remote'
123
+ $stderr.puts ' database, please ensure that you have internet access.'
87
124
  exit 1
88
125
  end
89
126
 
90
127
  def obtain_raw_seqs_from_local_db(index_file, raw_seq_file)
91
- cmd = "blastdbcmd -entry_batch #{index_file} -db #{@opt[:db]} -outfmt" \
92
- " '%f' -out #{raw_seq_file}"
128
+ cmd = "blastdbcmd -entry_batch '#{index_file}' -db '#{@opt[:db]}'" \
129
+ " -outfmt '%f' -out '#{raw_seq_file}'"
93
130
  `#{cmd}`
94
131
  end
95
132
 
@@ -106,8 +143,8 @@ module GeneValidator
106
143
  result = Net::HTTP.get(URI.parse(uri))
107
144
  raw_seqs = result[0..result.length - 2]
108
145
  unless raw_seqs.downcase.index(/error/).nil?
109
- puts '*** Error: There was an error in obtaining the raw sequence' \
110
- ' of a BLAST hit. Please ensure that you have internet access.'
146
+ $stderr.puts '*** Error: There was an error in obtaining the raw sequence' \
147
+ ' of a BLAST hit. Please ensure that you have internet access.'
111
148
  exit 1
112
149
  end
113
150
  raw_seqs
@@ -116,10 +153,10 @@ module GeneValidator
116
153
  def assert_table_has_correct_no_of_collumns(rows, table_headers)
117
154
  rows.each do |row|
118
155
  unless row.length == table_headers.length
119
- puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
120
- ' could possibly be due to an incorrect BLAST tabular' \
121
- ' options ("-o", "--blast_tabular_options") being supplied.' \
122
- ' Please correct this and try again.'
156
+ $stderr.puts '*** Error: The BLAST tabular file cannot be parsed. This is' \
157
+ ' could possibly be due to an incorrect BLAST tabular' \
158
+ ' options ("-o", "--blast_tabular_options") being supplied.' \
159
+ ' Please correct this and try again.'
123
160
  exit 1
124
161
  end
125
162
  break # break after checking the first column
@@ -33,8 +33,7 @@ module GeneValidator
33
33
  # Params:
34
34
  # +column+: String with column name.
35
35
  # +value+: Value of the column
36
- # +type+: type of the sequences: :nucleotide or :protein
37
- def init_tabular_attribute(hash, _type = :protein)
36
+ def init_tabular_attribute(hash)
38
37
  @match_query_from = hash['qstart'].to_i if hash['qstart']
39
38
  @match_query_to = hash['qend'].to_i if hash['qend']
40
39
  @query_reading_frame = hash['qframe'].to_i if hash['qframe']
@@ -47,12 +46,10 @@ module GeneValidator
47
46
  @identity = hash['nident'].to_f if hash['nident']
48
47
  @hsp_evalue = hash['evalue'].to_f if hash['evalue']
49
48
  if hash['qseq']
50
- puts @query_alignment
51
49
  query_seq_type = BlastUtils.guess_sequence_type(@query_alignment)
52
50
  fail SequenceTypeError if query_seq_type != :protein
53
51
  end
54
52
  if hash['sseq']
55
- puts @hit_alignment
56
53
  hit_seq_type = BlastUtils.guess_sequence_type(@hit_alignment)
57
54
  fail SequenceTypeError if hit_seq_type != :protein
58
55
  end
@@ -0,0 +1,109 @@
1
+ require 'erb'
2
+ require 'fileutils'
3
+ require 'forwardable'
4
+ require 'json'
5
+
6
+ require 'genevalidator'
7
+ require 'genevalidator/output'
8
+ require 'genevalidator/version'
9
+
10
+ module GeneValidator
11
+ # produce GV results from a JSON previously produced from GV
12
+ class JsonToGVResults
13
+ class << self
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt
16
+
17
+ def init
18
+ @opt = opt
19
+ @config = {
20
+ html_path: "#{@opt[:json_file]}.html",
21
+ plot_dir: "#{@opt[:json_file]}.html/files/json",
22
+ aux: File.expand_path(File.join(File.dirname(__FILE__), '../../aux')),
23
+ filename: File.basename(@opt[:json_file]),
24
+ output_max: 2500,
25
+ run_no: 0
26
+ }
27
+ @json_array = load_json_file
28
+ end
29
+
30
+ def run
31
+ init
32
+ GeneValidator.create_output_folder(@config[:html_path], @config[:aux])
33
+ @json_array.each do |row|
34
+ @config[:run_no] += 1
35
+ create_json_file(row)
36
+ output_html = output_filename
37
+ generate_html_header(output_html) unless File.exist?(output_html)
38
+ generate_html_query(output_html, row)
39
+ end
40
+ html_footer
41
+ end
42
+
43
+ def load_json_file
44
+ json_contents = File.read(File.expand_path(@opt[:json_file]))
45
+ JSON.load(json_contents)
46
+ end
47
+
48
+ def create_json_file(row)
49
+ @json_file = File.join(@config[:plot_dir],
50
+ "#{@config[:filename]}_#{row['idx']}.json")
51
+ File.open(@json_file, 'w') { |f| f.write(row.to_json) }
52
+ end
53
+
54
+ def output_filename
55
+ i = (@config[:run_no].to_f / @config[:output_max]).ceil
56
+ File.join(@config[:html_path], "results#{i}.html")
57
+ end
58
+
59
+ def generate_html_header(output_html)
60
+ return if File.exist?(output_html)
61
+ json_header_template = File.join(@config[:aux], 'json_header.erb')
62
+ template_contents = File.open(json_header_template, 'r').read
63
+ erb = ERB.new(template_contents, 0, '>')
64
+ File.open(output_html, 'w+') { |f| f.write(erb.result(binding)) }
65
+ end
66
+
67
+ def generate_html_query(output_html, row)
68
+ @row = row
69
+ json_query_template = File.join(@config[:aux], 'json_query.erb')
70
+ template_contents = File.open(json_query_template, 'r').read
71
+ erb = ERB.new(template_contents, 0, '>')
72
+ File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
73
+ end
74
+
75
+ # Add footer to all output files
76
+ def html_footer
77
+ no_of_output_files = (@config[:run_no].to_f / @config[:output_max]).ceil
78
+
79
+ output_files = []
80
+ (1..no_of_output_files).each { |i| output_files << "results#{i}.html" }
81
+
82
+ write_html_footer(no_of_output_files, output_files)
83
+ end
84
+
85
+ def write_html_footer(no_of_output_files, output_files)
86
+ turn_off_automated_sorting
87
+ json_footer_template = File.join(@config[:aux], 'json_footer.erb')
88
+ template_contents = File.open(json_footer_template, 'r').read
89
+ erb = ERB.new(template_contents, 0, '>')
90
+ (1..no_of_output_files).each do |i|
91
+ results_html = File.join(@config[:html_path], "results#{i}.html")
92
+ File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
93
+ end
94
+ end
95
+
96
+ # Since the whole idea is that users would sort by
97
+ def turn_off_automated_sorting
98
+ script_file = File.join(@config[:html_path], 'files/js/script.js')
99
+ temp_file = File.join(@config[:html_path], 'files/js/script.temp.js')
100
+ File.open(temp_file, 'w') do |out_file|
101
+ out_file.puts File.readlines(script_file)[0..23].join
102
+ out_file.puts '}'
103
+ out_file.puts File.readlines(script_file)[26..-1].join
104
+ end
105
+ FileUtils.mv(temp_file, script_file)
106
+ end
107
+ end
108
+ end
109
+ end