genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,26 +1,39 @@
1
+ require 'forwardable'
2
+
3
+ require 'genevalidator/blast'
4
+
1
5
  # A module to validate the command line Arguments
2
6
  ## CREDIT: some of these methods have been adapted from SequenceServer
3
7
  module GeneValidator
4
8
  # TODO: If a tabular file is provided, ensure that a tabular file has the
5
9
  # right number of columns
6
10
  # TODO: assert_if_ruby_version_is_supported
7
- # A module to validate the arguments passed to the Validation Class
8
- module GVArgValidation
11
+ # A class to validate the arguments passed to the Validation Class
12
+ class GVArgValidation
9
13
  class << self
10
- def validate_args(opt)
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt
16
+
17
+ def validate_args
11
18
  @opt = opt
12
19
  assert_output_dir_does_not_exist
13
20
  assert_file_present('input file', opt[:input_fasta_file])
14
21
  assert_input_file_probably_fasta
15
- assert_input_contains_single_type_sequence
22
+ assert_input_sequence
16
23
  assert_BLAST_output_files
17
24
 
18
25
  assert_validations_arg
19
26
  check_num_threads
20
27
 
28
+ export_bin_dirs unless @opt[:bin].nil?
29
+
21
30
  Blast.validate(opt) unless @opt[:test]
22
- Mafft.assert_mafft_installation(opt)
23
- @opt
31
+ assert_mafft_installation
32
+ end
33
+
34
+ # Return `true` if the given command exists and is executable.
35
+ def command?(command)
36
+ system("which #{command} > /dev/null 2>&1")
24
37
  end
25
38
 
26
39
  private
@@ -37,10 +50,10 @@ module GeneValidator
37
50
  def check_num_threads
38
51
  @opt[:num_threads] = Integer(@opt[:num_threads])
39
52
  unless @opt[:num_threads] > 0
40
- puts 'Number of threads can not be lower than 0'
53
+ $stderr.puts 'Number of threads can not be lower than 0'
41
54
  end
42
55
  return unless @opt[:num_threads] > 256
43
- puts "Number of threads set at #{@opt[:num_threads]} is unusually high."
56
+ $stderr.puts "Number of threads set at #{@opt[:num_threads]} is unusually high."
44
57
  end
45
58
 
46
59
  def assert_BLAST_output_files
@@ -56,18 +69,18 @@ module GeneValidator
56
69
  def assert_output_dir_does_not_exist
57
70
  output_dir = "#{@opt[:input_fasta_file]}.html"
58
71
  return unless File.exist?(output_dir)
59
- puts "The output directory already exists for this fasta file.\n"
60
- puts "Please remove the following directory: #{output_dir}\n"
61
- puts "You can run the following command to remove the folder.\n"
62
- puts "\n $ rm -r #{output_dir} \n"
72
+ $stderr.puts "The output directory already exists for this fasta file.\n"
73
+ $stderr.puts "Please remove the following directory: #{output_dir}\n"
74
+ $stderr.puts "You can run the following command to remove the folder.\n"
75
+ $stderr.puts "\n $ rm -r #{output_dir} \n"
63
76
  exit 1
64
77
  end
65
78
 
66
79
  def assert_tabular_options_exists
67
80
  return if @opt[:blast_tabular_options]
68
- puts '*** Error: BLAST tabular options (-o) have not been set.'
69
- puts ' Please set the "-o" option with the custom format'
70
- puts ' used in the BLAST -outfmt argument'
81
+ $stderr.puts '*** Error: BLAST tabular options (-o) have not been set.'
82
+ $stderr.puts ' Please set the "-o" option with the custom format'
83
+ $stderr.puts ' used in the BLAST -outfmt argument'
71
84
  exit 1
72
85
  end
73
86
 
@@ -79,20 +92,45 @@ module GeneValidator
79
92
 
80
93
  def assert_file_present(desc, file, exit_code = 1)
81
94
  return if file && File.exist?(File.expand_path(file))
82
- puts "*** Error: Couldn't find the #{desc}: #{file}."
95
+ $stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
83
96
  exit exit_code
84
97
  end
85
98
 
86
99
  alias_method :assert_dir_present, :assert_file_present
87
100
 
88
- def assert_input_contains_single_type_sequence
101
+ def assert_input_sequence
89
102
  fasta_content = IO.binread(@opt[:input_fasta_file])
90
103
  type = BlastUtils.type_of_sequences(fasta_content)
91
104
  return if type == :nucleotide || type == :protein
92
- puts '*** Error: The input files does not contain just protein or'
93
- puts ' nucleotide data. Please correct this and try again.'
105
+ $stderr.puts '*** Error: The input files does not contain just protein or'
106
+ $stderr.puts ' nucleotide data. Please correct this and try again.'
94
107
  exit 1
95
108
  end
109
+
110
+ def export_bin_dirs
111
+ @opt[:bin].each do |bin|
112
+ if File.directory?(bin)
113
+ add_to_path(bin)
114
+ else
115
+ $stderr.puts '*** The following bin directory does not exist:'
116
+ $stderr.puts " #{bin}"
117
+ end
118
+ end
119
+ end
120
+
121
+ ## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
122
+ def add_to_path(bin_dir)
123
+ return if ENV['PATH'].split(':').include?(bin_dir)
124
+ ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
125
+ end
126
+
127
+ def assert_mafft_installation
128
+ return if command?('mafft')
129
+ $stderr.puts '*** Could not find Mafft binaries.'
130
+ $stderr.puts ' Ignoring error and continuing - Please note that' \
131
+ ' some validations may be skipped.'
132
+ $stderr.puts # a blank line
133
+ end
96
134
  end
97
135
 
98
136
  # Validates BLAST Installation (And BLAST databases)
@@ -106,40 +144,32 @@ module GeneValidator
106
144
  EXIT_NO_BLAST_DATABASE = 4
107
145
 
108
146
  def validate(opt)
109
- @opt = opt
110
147
  assert_blast_installation
111
- assert_blast_database_provided
112
- assert_local_blast_database_exists if @opt[:db] !~ /remote/
148
+ warn_if_remote_database(opt[:db])
149
+ assert_local_blast_database_exists(opt[:db]) if opt[:db] !~ /remote/
113
150
  end
114
151
 
115
152
  def assert_blast_installation
116
153
  # Validate BLAST installation
117
- if @opt[:blast_bin].nil?
118
- assert_blast_installed
119
- assert_blast_compatible
120
- else
121
- export_bin_dir
122
- end
154
+ assert_blast_installed
155
+ assert_blast_compatible
123
156
  end
124
157
 
125
- def assert_blast_database_provided
126
- return unless @opt[:db].nil?
127
- puts '*** Error: A BLAST database is required. Please pass a local or'
128
- puts ' remote BLAST database to GeneValidator as follows:'
129
- puts # a blank line
130
- puts " $ genevalidator -d '~/blastdb/SwissProt' Input_File"
131
- puts # a blank line
132
- puts ' Or use a remote database:'
133
- puts # a blank line
134
- puts " $ genevalidator -d 'swissprot -remote' Input_File"
135
- exit 1
158
+ def warn_if_remote_database(db)
159
+ return if db !~ /remote/
160
+ $stderr.puts a blank line
161
+ $stderr.puts 'Warning: BLAST will be carried out on remote servers.'
162
+ $stderr.puts 'This may take quite a bit of time.'
163
+ $stderr.puts 'You may want to install a local BLAST database for' \
164
+ ' faster analyses.'
165
+ $stderr.puts a blank line
136
166
  end
137
167
 
138
- def assert_local_blast_database_exists
139
- return if system("blastdbcmd -db #{@opt[:db]} -info > /dev/null 2>&1")
140
- puts '*** No BLAST database found at the provided path.'
141
- puts ' Please ensure that the provided path is correct and then' \
142
- ' try again.'
168
+ def assert_local_blast_database_exists(db)
169
+ return if system("blastdbcmd -db #{db} -info > /dev/null 2>&1")
170
+ $stderr.puts '*** No BLAST database found at the provided path.'
171
+ $stderr.puts ' Please ensure that the provided path is correct' \
172
+ ' and then try again.'
143
173
  exit EXIT_NO_BLAST_DATABASE
144
174
  end
145
175
 
@@ -147,77 +177,18 @@ module GeneValidator
147
177
 
148
178
  def assert_blast_installed
149
179
  return if GVArgValidation.command?('blastdbcmd')
150
- puts '*** Could not find BLAST+ binaries.'
180
+ $stderr.puts '*** Could not find BLAST+ binaries.'
151
181
  exit EXIT_BLAST_NOT_INSTALLED
152
182
  end
153
183
 
154
184
  def assert_blast_compatible
155
185
  version = `blastdbcmd -version`.split[1]
156
186
  return if version >= MINIMUM_BLAST_VERSION
157
- puts "*** Your BLAST+ version #{version} is outdated."
158
- puts ' GeneValidator needs NCBI BLAST+ version' \
159
- " #{MINIMUM_BLAST_VERSION} or higher."
187
+ $stderr.puts "*** Your BLAST+ version #{version} is outdated."
188
+ $stderr.puts ' GeneValidator needs NCBI BLAST+ version' \
189
+ " #{MINIMUM_BLAST_VERSION} or higher."
160
190
  exit EXIT_BLAST_NOT_COMPATIBLE
161
191
  end
162
-
163
- def export_bin_dir
164
- if File.directory?(@opt[:blast_bin])
165
- GVArgValidation.add_to_path(@opt[:blast_bin])
166
- else
167
- puts '*** The provided BLAST bin directory does not exist.'
168
- puts ' Please ensure that the provided BLAST bin directory is' \
169
- ' correct and try again.'
170
- exit EXIT_BLAST_NOT_INSTALLED
171
- end
172
- end
173
- end
174
- end
175
-
176
- # Validates Mafft installation
177
- class Mafft
178
- class << self
179
- def assert_mafft_installation(opt)
180
- @opt = opt
181
- if @opt[:mafft_bin].nil?
182
- assert_mafft_installed
183
- else
184
- export_bin_dir
185
- end
186
- end
187
-
188
- private
189
-
190
- def assert_mafft_installed
191
- return if GVArgValidation.command?('mafft')
192
- puts '*** Could not find Mafft binaries.'
193
- puts ' Ignoring error and continuing - Please note that some' \
194
- ' validations may be skipped.'
195
- puts # a blank line
196
- end
197
-
198
- def export_bin_dir
199
- if File.directory?(@opt[:mafft_bin])
200
- GVArgValidation.add_to_path(@opt[:mafft_bin])
201
- else
202
- puts '*** The provided Mafft bin directory does not exist.'
203
- puts ' Ignoring error and continuing - Please note that some' \
204
- ' validations may be skipped.'
205
- puts # a blank line
206
- end
207
- end
208
- end
209
- end
210
-
211
- class << self
212
- ## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
213
- def add_to_path(bin_dir)
214
- return if ENV['PATH'].split(':').include?(bin_dir)
215
- ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
216
- end
217
-
218
- # Return `true` if the given command exists and is executable.
219
- def command?(command)
220
- system("which #{command} > /dev/null 2>&1")
221
192
  end
222
193
  end
223
194
  end
@@ -1,19 +1,19 @@
1
- require 'genevalidator/sequences'
1
+ require 'bio'
2
+ require 'bio-blastxmlparser'
3
+ require 'forwardable'
4
+
5
+ require 'genevalidator/exceptions'
2
6
  require 'genevalidator/hsp'
7
+ require 'genevalidator/sequences'
3
8
  require 'genevalidator/output'
4
- require 'genevalidator/exceptions'
5
- require 'bio-blastxmlparser'
6
- require 'net/http'
7
- require 'open-uri'
8
- require 'uri'
9
- require 'io/console'
10
- require 'yaml'
11
- require 'bio'
12
9
 
13
10
  module GeneValidator
14
11
  # Contains methods that run BLAST and methods that analyse sequences
15
12
  class BlastUtils
16
13
  class << self
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt, :config
16
+
17
17
  EVALUE = 1e-5
18
18
 
19
19
  ##
@@ -25,7 +25,10 @@ module GeneValidator
25
25
  # +num_threads+: The number of threads to run BLAST with.
26
26
  # Output:
27
27
  # String with the blast xml output
28
- def run_blast(blast_type, query, db, num_threads)
28
+ def run_blast(query, db = opt[:db], seq_type = config[:type],
29
+ num_threads = opt[:num_threads])
30
+
31
+ blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
29
32
  # -num_threads is not supported on remote databases
30
33
  threads = (db !~ /remote/) ? "-num_threads #{num_threads}" : ''
31
34
 
@@ -46,20 +49,26 @@ module GeneValidator
46
49
  # +nr_hits+: max number of hits
47
50
  # Output:
48
51
  # XML file
49
- def run_blast_on_file(opt)
50
- seq_type = guess_sequence_type_from_file(opt[:input_fasta_file])
52
+ def run_blast_on_input_file(input_file = opt[:input_fasta_file],
53
+ db = opt[:db], seq_type = config[:type],
54
+ num_threads = opt[:num_threads])
55
+ return if opt[:blast_xml_file] || opt[:blast_tabular_file]
56
+
57
+ $stderr.puts 'Running BLAST'
58
+ opt[:blast_xml_file] = input_file + '.blast_xml'
59
+
51
60
  blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
52
61
  # -num_threads is not supported on remote databases
53
- threads = (opt[:db] !~ /remote/) ? "-num_threads #{opt[:num_threads]}" : ''
62
+ threads = (opt[:db] !~ /remote/) ? "-num_threads #{num_threads}" : ''
54
63
 
55
- blastcmd = "#{blast_type} -query '#{opt[:input_fasta_file]}'" \
56
- " -out '#{opt[:blast_xml_file]}' -db #{opt[:db]} " \
64
+ blastcmd = "#{blast_type} -query '#{input_file}'" \
65
+ " -out '#{opt[:blast_xml_file]}' -db #{db} " \
57
66
  " -evalue #{EVALUE} -outfmt 5 #{threads}"
58
67
 
59
68
  `#{blastcmd}`
60
69
  return unless File.zero?(opt[:blast_xml_file])
61
- puts 'Blast failed to run on the input file. Please ensure that the'
62
- puts 'BLAST database exists and try again'
70
+ $stderr.puts 'Blast failed to run on the input file. Please ensure that the'
71
+ $stderr.puts 'BLAST database exists and try again'
63
72
  exit 1
64
73
  end
65
74
 
@@ -70,9 +79,7 @@ module GeneValidator
70
79
  # +type+: the type of the sequence: :nucleotide or :protein
71
80
  # Outputs:
72
81
  # Array of +Sequence+ objects corresponding to the list of hits
73
- def parse_next(iterator, type)
74
- fail TypeError unless iterator.is_a? Enumerator
75
-
82
+ def parse_next(iterator, type = config[:type])
76
83
  hits = []
77
84
  iter = iterator.next
78
85
 
@@ -85,7 +92,6 @@ module GeneValidator
85
92
  seq.type = :protein
86
93
  seq.identifier = hit.hit_id
87
94
  seq.definition = hit.hit_def
88
- # puts seq.identifier
89
95
  seq.accession_no = hit.accession
90
96
 
91
97
  # get all high-scoring segment pairs (hsp)
@@ -93,7 +99,7 @@ module GeneValidator
93
99
 
94
100
  hit.hsps.each do |hsp|
95
101
  current_hsp = Hsp.new
96
- current_hsp.hsp_evalue = '%.0e' % hsp.evalue
102
+ current_hsp.hsp_evalue = format('%.0e', hsp.evalue)
97
103
 
98
104
  current_hsp.hit_from = hsp.hit_from.to_i
99
105
  current_hsp.hit_to = hsp.hit_to.to_i
@@ -102,20 +108,20 @@ module GeneValidator
102
108
 
103
109
  if type == :nucleotide
104
110
  current_hsp.match_query_from /= 3
105
- current_hsp.match_query_to /= 3
111
+ current_hsp.match_query_to /= 3
106
112
  current_hsp.match_query_from += 1
107
- current_hsp.match_query_to += 1
113
+ current_hsp.match_query_to += 1
108
114
  end
109
115
 
110
116
  current_hsp.query_reading_frame = hsp.query_frame.to_i
111
117
 
112
118
  current_hsp.hit_alignment = hsp.hseq.to_s
113
- if BlastUtils.guess_sequence_type(current_hsp.hit_alignment) != :protein
119
+ if guess_sequence_type(current_hsp.hit_alignment) != :protein
114
120
  fail SequenceTypeError
115
121
  end
116
122
 
117
123
  current_hsp.query_alignment = hsp.qseq.to_s
118
- if BlastUtils.guess_sequence_type(current_hsp.query_alignment) != :protein
124
+ if guess_sequence_type(current_hsp.query_alignment) != :protein
119
125
  fail SequenceTypeError
120
126
  end
121
127
  current_hsp.align_len = hsp.align_len.to_i
@@ -129,23 +135,35 @@ module GeneValidator
129
135
  hits.push(seq)
130
136
  end
131
137
 
132
- return hits
133
-
134
- rescue TypeError => error
135
- line = error.backtrace[0].scan(/\/([^\/]+:\d+):.*/)[0][0]
136
- $stderr.print "Type error at #{line}. Possible cause: you didn't call" \
137
- " parse method first!\n"
138
- exit 1
139
- rescue SequenceTypeError => error
140
- line = error.backtrace[0].scan(/\/([^\/]+:\d+):.*/)[0][0]
141
- $stderr.print "Sequence Type error at #{line}. Possible cause: the" \
142
- 'blast output was not obtained against a protein' \
143
- " database.\n"
138
+ hits
139
+ rescue SequenceTypeError => e
140
+ $stderr.puts e
144
141
  exit 1
145
142
  rescue StopIteration
146
143
  nil
147
144
  end
148
145
 
146
+ ##
147
+ # Method copied from sequenceserver/sequencehelpers.rb
148
+ # Splits input at putative fasta definition lines (like ">adsfadsf");
149
+ # then guesses sequence type for each sequence.
150
+ # If not enough sequence to determine, returns nil.
151
+ # If 2 kinds of sequence mixed together, raises ArgumentError
152
+ # Otherwise, returns :nucleotide or :protein
153
+ # Params:
154
+ # +sequence_string+: String to validate
155
+ # Output:
156
+ # nil, :nucleotide or :protein
157
+ def type_of_sequences(fasta_format_string)
158
+ # the first sequence does not need to have a fasta definition line
159
+ sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
160
+ # get all sequence types
161
+ sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }.uniq.compact
162
+
163
+ return nil if sequence_types.empty?
164
+ return sequence_types.first if sequence_types.length == 1
165
+ end
166
+
149
167
  ##
150
168
  # Strips all non-letter characters. guestimates sequence based on that.
151
169
  # If less than 10 useable characters... returns nil
@@ -165,7 +183,7 @@ module GeneValidator
165
183
 
166
184
  ##
167
185
  #
168
- def guess_sequence_type_from_file(file)
186
+ def guess_sequence_type_from_input_file(file = opt[:input_fasta_file])
169
187
  lines = File.foreach(file).first(10)
170
188
  seqs = ''
171
189
  lines.each do |l|
@@ -173,27 +191,6 @@ module GeneValidator
173
191
  end
174
192
  guess_sequence_type(seqs)
175
193
  end
176
-
177
- ##
178
- # Method copied from sequenceserver/sequencehelpers.rb
179
- # Splits input at putative fasta definition lines (like ">adsfadsf");
180
- # then guesses sequence type for each sequence.
181
- # If not enough sequence to determine, returns nil.
182
- # If 2 kinds of sequence mixed together, raises ArgumentError
183
- # Otherwise, returns :nucleotide or :protein
184
- # Params:
185
- # +sequence_string+: String to validate
186
- # Output:
187
- # nil, :nucleotide or :protein
188
- def type_of_sequences(fasta_format_string)
189
- # the first sequence does not need to have a fasta definition line
190
- sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
191
- # get all sequence types
192
- sequence_types = sequences.collect { |seq| BlastUtils.guess_sequence_type(seq) }.uniq.compact
193
-
194
- return nil if sequence_types.empty?
195
- return sequence_types.first if sequence_types.length == 1
196
- end
197
194
  end
198
195
  end
199
196
  end