genevalidator 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,26 +1,39 @@
1
+ require 'forwardable'
2
+
3
+ require 'genevalidator/blast'
4
+
1
5
  # A module to validate the command line Arguments
2
6
  ## CREDIT: some of these methods have been adapted from SequenceServer
3
7
  module GeneValidator
4
8
  # TODO: If a tabular file is provided, ensure that a tabular file has the
5
9
  # right number of columns
6
10
  # TODO: assert_if_ruby_version_is_supported
7
- # A module to validate the arguments passed to the Validation Class
8
- module GVArgValidation
11
+ # A class to validate the arguments passed to the Validation Class
12
+ class GVArgValidation
9
13
  class << self
10
- def validate_args(opt)
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt
16
+
17
+ def validate_args
11
18
  @opt = opt
12
19
  assert_output_dir_does_not_exist
13
20
  assert_file_present('input file', opt[:input_fasta_file])
14
21
  assert_input_file_probably_fasta
15
- assert_input_contains_single_type_sequence
22
+ assert_input_sequence
16
23
  assert_BLAST_output_files
17
24
 
18
25
  assert_validations_arg
19
26
  check_num_threads
20
27
 
28
+ export_bin_dirs unless @opt[:bin].nil?
29
+
21
30
  Blast.validate(opt) unless @opt[:test]
22
- Mafft.assert_mafft_installation(opt)
23
- @opt
31
+ assert_mafft_installation
32
+ end
33
+
34
+ # Return `true` if the given command exists and is executable.
35
+ def command?(command)
36
+ system("which #{command} > /dev/null 2>&1")
24
37
  end
25
38
 
26
39
  private
@@ -37,10 +50,10 @@ module GeneValidator
37
50
  def check_num_threads
38
51
  @opt[:num_threads] = Integer(@opt[:num_threads])
39
52
  unless @opt[:num_threads] > 0
40
- puts 'Number of threads can not be lower than 0'
53
+ $stderr.puts 'Number of threads can not be lower than 0'
41
54
  end
42
55
  return unless @opt[:num_threads] > 256
43
- puts "Number of threads set at #{@opt[:num_threads]} is unusually high."
56
+ $stderr.puts "Number of threads set at #{@opt[:num_threads]} is unusually high."
44
57
  end
45
58
 
46
59
  def assert_BLAST_output_files
@@ -56,18 +69,18 @@ module GeneValidator
56
69
  def assert_output_dir_does_not_exist
57
70
  output_dir = "#{@opt[:input_fasta_file]}.html"
58
71
  return unless File.exist?(output_dir)
59
- puts "The output directory already exists for this fasta file.\n"
60
- puts "Please remove the following directory: #{output_dir}\n"
61
- puts "You can run the following command to remove the folder.\n"
62
- puts "\n $ rm -r #{output_dir} \n"
72
+ $stderr.puts "The output directory already exists for this fasta file.\n"
73
+ $stderr.puts "Please remove the following directory: #{output_dir}\n"
74
+ $stderr.puts "You can run the following command to remove the folder.\n"
75
+ $stderr.puts "\n $ rm -r #{output_dir} \n"
63
76
  exit 1
64
77
  end
65
78
 
66
79
  def assert_tabular_options_exists
67
80
  return if @opt[:blast_tabular_options]
68
- puts '*** Error: BLAST tabular options (-o) have not been set.'
69
- puts ' Please set the "-o" option with the custom format'
70
- puts ' used in the BLAST -outfmt argument'
81
+ $stderr.puts '*** Error: BLAST tabular options (-o) have not been set.'
82
+ $stderr.puts ' Please set the "-o" option with the custom format'
83
+ $stderr.puts ' used in the BLAST -outfmt argument'
71
84
  exit 1
72
85
  end
73
86
 
@@ -79,20 +92,45 @@ module GeneValidator
79
92
 
80
93
  def assert_file_present(desc, file, exit_code = 1)
81
94
  return if file && File.exist?(File.expand_path(file))
82
- puts "*** Error: Couldn't find the #{desc}: #{file}."
95
+ $stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
83
96
  exit exit_code
84
97
  end
85
98
 
86
99
  alias_method :assert_dir_present, :assert_file_present
87
100
 
88
- def assert_input_contains_single_type_sequence
101
+ def assert_input_sequence
89
102
  fasta_content = IO.binread(@opt[:input_fasta_file])
90
103
  type = BlastUtils.type_of_sequences(fasta_content)
91
104
  return if type == :nucleotide || type == :protein
92
- puts '*** Error: The input files does not contain just protein or'
93
- puts ' nucleotide data. Please correct this and try again.'
105
+ $stderr.puts '*** Error: The input files does not contain just protein or'
106
+ $stderr.puts ' nucleotide data. Please correct this and try again.'
94
107
  exit 1
95
108
  end
109
+
110
+ def export_bin_dirs
111
+ @opt[:bin].each do |bin|
112
+ if File.directory?(bin)
113
+ add_to_path(bin)
114
+ else
115
+ $stderr.puts '*** The following bin directory does not exist:'
116
+ $stderr.puts " #{bin}"
117
+ end
118
+ end
119
+ end
120
+
121
+ ## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
122
+ def add_to_path(bin_dir)
123
+ return if ENV['PATH'].split(':').include?(bin_dir)
124
+ ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
125
+ end
126
+
127
+ def assert_mafft_installation
128
+ return if command?('mafft')
129
+ $stderr.puts '*** Could not find Mafft binaries.'
130
+ $stderr.puts ' Ignoring error and continuing - Please note that' \
131
+ ' some validations may be skipped.'
132
+ $stderr.puts # a blank line
133
+ end
96
134
  end
97
135
 
98
136
  # Validates BLAST Installation (And BLAST databases)
@@ -106,40 +144,32 @@ module GeneValidator
106
144
  EXIT_NO_BLAST_DATABASE = 4
107
145
 
108
146
  def validate(opt)
109
- @opt = opt
110
147
  assert_blast_installation
111
- assert_blast_database_provided
112
- assert_local_blast_database_exists if @opt[:db] !~ /remote/
148
+ warn_if_remote_database(opt[:db])
149
+ assert_local_blast_database_exists(opt[:db]) if opt[:db] !~ /remote/
113
150
  end
114
151
 
115
152
  def assert_blast_installation
116
153
  # Validate BLAST installation
117
- if @opt[:blast_bin].nil?
118
- assert_blast_installed
119
- assert_blast_compatible
120
- else
121
- export_bin_dir
122
- end
154
+ assert_blast_installed
155
+ assert_blast_compatible
123
156
  end
124
157
 
125
- def assert_blast_database_provided
126
- return unless @opt[:db].nil?
127
- puts '*** Error: A BLAST database is required. Please pass a local or'
128
- puts ' remote BLAST database to GeneValidator as follows:'
129
- puts # a blank line
130
- puts " $ genevalidator -d '~/blastdb/SwissProt' Input_File"
131
- puts # a blank line
132
- puts ' Or use a remote database:'
133
- puts # a blank line
134
- puts " $ genevalidator -d 'swissprot -remote' Input_File"
135
- exit 1
158
+ def warn_if_remote_database(db)
159
+ return if db !~ /remote/
160
+ $stderr.puts a blank line
161
+ $stderr.puts 'Warning: BLAST will be carried out on remote servers.'
162
+ $stderr.puts 'This may take quite a bit of time.'
163
+ $stderr.puts 'You may want to install a local BLAST database for' \
164
+ ' faster analyses.'
165
+ $stderr.puts a blank line
136
166
  end
137
167
 
138
- def assert_local_blast_database_exists
139
- return if system("blastdbcmd -db #{@opt[:db]} -info > /dev/null 2>&1")
140
- puts '*** No BLAST database found at the provided path.'
141
- puts ' Please ensure that the provided path is correct and then' \
142
- ' try again.'
168
+ def assert_local_blast_database_exists(db)
169
+ return if system("blastdbcmd -db #{db} -info > /dev/null 2>&1")
170
+ $stderr.puts '*** No BLAST database found at the provided path.'
171
+ $stderr.puts ' Please ensure that the provided path is correct' \
172
+ ' and then try again.'
143
173
  exit EXIT_NO_BLAST_DATABASE
144
174
  end
145
175
 
@@ -147,77 +177,18 @@ module GeneValidator
147
177
 
148
178
  def assert_blast_installed
149
179
  return if GVArgValidation.command?('blastdbcmd')
150
- puts '*** Could not find BLAST+ binaries.'
180
+ $stderr.puts '*** Could not find BLAST+ binaries.'
151
181
  exit EXIT_BLAST_NOT_INSTALLED
152
182
  end
153
183
 
154
184
  def assert_blast_compatible
155
185
  version = `blastdbcmd -version`.split[1]
156
186
  return if version >= MINIMUM_BLAST_VERSION
157
- puts "*** Your BLAST+ version #{version} is outdated."
158
- puts ' GeneValidator needs NCBI BLAST+ version' \
159
- " #{MINIMUM_BLAST_VERSION} or higher."
187
+ $stderr.puts "*** Your BLAST+ version #{version} is outdated."
188
+ $stderr.puts ' GeneValidator needs NCBI BLAST+ version' \
189
+ " #{MINIMUM_BLAST_VERSION} or higher."
160
190
  exit EXIT_BLAST_NOT_COMPATIBLE
161
191
  end
162
-
163
- def export_bin_dir
164
- if File.directory?(@opt[:blast_bin])
165
- GVArgValidation.add_to_path(@opt[:blast_bin])
166
- else
167
- puts '*** The provided BLAST bin directory does not exist.'
168
- puts ' Please ensure that the provided BLAST bin directory is' \
169
- ' correct and try again.'
170
- exit EXIT_BLAST_NOT_INSTALLED
171
- end
172
- end
173
- end
174
- end
175
-
176
- # Validates Mafft installation
177
- class Mafft
178
- class << self
179
- def assert_mafft_installation(opt)
180
- @opt = opt
181
- if @opt[:mafft_bin].nil?
182
- assert_mafft_installed
183
- else
184
- export_bin_dir
185
- end
186
- end
187
-
188
- private
189
-
190
- def assert_mafft_installed
191
- return if GVArgValidation.command?('mafft')
192
- puts '*** Could not find Mafft binaries.'
193
- puts ' Ignoring error and continuing - Please note that some' \
194
- ' validations may be skipped.'
195
- puts # a blank line
196
- end
197
-
198
- def export_bin_dir
199
- if File.directory?(@opt[:mafft_bin])
200
- GVArgValidation.add_to_path(@opt[:mafft_bin])
201
- else
202
- puts '*** The provided Mafft bin directory does not exist.'
203
- puts ' Ignoring error and continuing - Please note that some' \
204
- ' validations may be skipped.'
205
- puts # a blank line
206
- end
207
- end
208
- end
209
- end
210
-
211
- class << self
212
- ## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
213
- def add_to_path(bin_dir)
214
- return if ENV['PATH'].split(':').include?(bin_dir)
215
- ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
216
- end
217
-
218
- # Return `true` if the given command exists and is executable.
219
- def command?(command)
220
- system("which #{command} > /dev/null 2>&1")
221
192
  end
222
193
  end
223
194
  end
@@ -1,19 +1,19 @@
1
- require 'genevalidator/sequences'
1
+ require 'bio'
2
+ require 'bio-blastxmlparser'
3
+ require 'forwardable'
4
+
5
+ require 'genevalidator/exceptions'
2
6
  require 'genevalidator/hsp'
7
+ require 'genevalidator/sequences'
3
8
  require 'genevalidator/output'
4
- require 'genevalidator/exceptions'
5
- require 'bio-blastxmlparser'
6
- require 'net/http'
7
- require 'open-uri'
8
- require 'uri'
9
- require 'io/console'
10
- require 'yaml'
11
- require 'bio'
12
9
 
13
10
  module GeneValidator
14
11
  # Contains methods that run BLAST and methods that analyse sequences
15
12
  class BlastUtils
16
13
  class << self
14
+ extend Forwardable
15
+ def_delegators GeneValidator, :opt, :config
16
+
17
17
  EVALUE = 1e-5
18
18
 
19
19
  ##
@@ -25,7 +25,10 @@ module GeneValidator
25
25
  # +num_threads+: The number of threads to run BLAST with.
26
26
  # Output:
27
27
  # String with the blast xml output
28
- def run_blast(blast_type, query, db, num_threads)
28
+ def run_blast(query, db = opt[:db], seq_type = config[:type],
29
+ num_threads = opt[:num_threads])
30
+
31
+ blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
29
32
  # -num_threads is not supported on remote databases
30
33
  threads = (db !~ /remote/) ? "-num_threads #{num_threads}" : ''
31
34
 
@@ -46,20 +49,26 @@ module GeneValidator
46
49
  # +nr_hits+: max number of hits
47
50
  # Output:
48
51
  # XML file
49
- def run_blast_on_file(opt)
50
- seq_type = guess_sequence_type_from_file(opt[:input_fasta_file])
52
+ def run_blast_on_input_file(input_file = opt[:input_fasta_file],
53
+ db = opt[:db], seq_type = config[:type],
54
+ num_threads = opt[:num_threads])
55
+ return if opt[:blast_xml_file] || opt[:blast_tabular_file]
56
+
57
+ $stderr.puts 'Running BLAST'
58
+ opt[:blast_xml_file] = input_file + '.blast_xml'
59
+
51
60
  blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
52
61
  # -num_threads is not supported on remote databases
53
- threads = (opt[:db] !~ /remote/) ? "-num_threads #{opt[:num_threads]}" : ''
62
+ threads = (opt[:db] !~ /remote/) ? "-num_threads #{num_threads}" : ''
54
63
 
55
- blastcmd = "#{blast_type} -query '#{opt[:input_fasta_file]}'" \
56
- " -out '#{opt[:blast_xml_file]}' -db #{opt[:db]} " \
64
+ blastcmd = "#{blast_type} -query '#{input_file}'" \
65
+ " -out '#{opt[:blast_xml_file]}' -db #{db} " \
57
66
  " -evalue #{EVALUE} -outfmt 5 #{threads}"
58
67
 
59
68
  `#{blastcmd}`
60
69
  return unless File.zero?(opt[:blast_xml_file])
61
- puts 'Blast failed to run on the input file. Please ensure that the'
62
- puts 'BLAST database exists and try again'
70
+ $stderr.puts 'Blast failed to run on the input file. Please ensure that the'
71
+ $stderr.puts 'BLAST database exists and try again'
63
72
  exit 1
64
73
  end
65
74
 
@@ -70,9 +79,7 @@ module GeneValidator
70
79
  # +type+: the type of the sequence: :nucleotide or :protein
71
80
  # Outputs:
72
81
  # Array of +Sequence+ objects corresponding to the list of hits
73
- def parse_next(iterator, type)
74
- fail TypeError unless iterator.is_a? Enumerator
75
-
82
+ def parse_next(iterator, type = config[:type])
76
83
  hits = []
77
84
  iter = iterator.next
78
85
 
@@ -85,7 +92,6 @@ module GeneValidator
85
92
  seq.type = :protein
86
93
  seq.identifier = hit.hit_id
87
94
  seq.definition = hit.hit_def
88
- # puts seq.identifier
89
95
  seq.accession_no = hit.accession
90
96
 
91
97
  # get all high-scoring segment pairs (hsp)
@@ -93,7 +99,7 @@ module GeneValidator
93
99
 
94
100
  hit.hsps.each do |hsp|
95
101
  current_hsp = Hsp.new
96
- current_hsp.hsp_evalue = '%.0e' % hsp.evalue
102
+ current_hsp.hsp_evalue = format('%.0e', hsp.evalue)
97
103
 
98
104
  current_hsp.hit_from = hsp.hit_from.to_i
99
105
  current_hsp.hit_to = hsp.hit_to.to_i
@@ -102,20 +108,20 @@ module GeneValidator
102
108
 
103
109
  if type == :nucleotide
104
110
  current_hsp.match_query_from /= 3
105
- current_hsp.match_query_to /= 3
111
+ current_hsp.match_query_to /= 3
106
112
  current_hsp.match_query_from += 1
107
- current_hsp.match_query_to += 1
113
+ current_hsp.match_query_to += 1
108
114
  end
109
115
 
110
116
  current_hsp.query_reading_frame = hsp.query_frame.to_i
111
117
 
112
118
  current_hsp.hit_alignment = hsp.hseq.to_s
113
- if BlastUtils.guess_sequence_type(current_hsp.hit_alignment) != :protein
119
+ if guess_sequence_type(current_hsp.hit_alignment) != :protein
114
120
  fail SequenceTypeError
115
121
  end
116
122
 
117
123
  current_hsp.query_alignment = hsp.qseq.to_s
118
- if BlastUtils.guess_sequence_type(current_hsp.query_alignment) != :protein
124
+ if guess_sequence_type(current_hsp.query_alignment) != :protein
119
125
  fail SequenceTypeError
120
126
  end
121
127
  current_hsp.align_len = hsp.align_len.to_i
@@ -129,23 +135,35 @@ module GeneValidator
129
135
  hits.push(seq)
130
136
  end
131
137
 
132
- return hits
133
-
134
- rescue TypeError => error
135
- line = error.backtrace[0].scan(/\/([^\/]+:\d+):.*/)[0][0]
136
- $stderr.print "Type error at #{line}. Possible cause: you didn't call" \
137
- " parse method first!\n"
138
- exit 1
139
- rescue SequenceTypeError => error
140
- line = error.backtrace[0].scan(/\/([^\/]+:\d+):.*/)[0][0]
141
- $stderr.print "Sequence Type error at #{line}. Possible cause: the" \
142
- 'blast output was not obtained against a protein' \
143
- " database.\n"
138
+ hits
139
+ rescue SequenceTypeError => e
140
+ $stderr.puts e
144
141
  exit 1
145
142
  rescue StopIteration
146
143
  nil
147
144
  end
148
145
 
146
+ ##
147
+ # Method copied from sequenceserver/sequencehelpers.rb
148
+ # Splits input at putative fasta definition lines (like ">adsfadsf");
149
+ # then guesses sequence type for each sequence.
150
+ # If not enough sequence to determine, returns nil.
151
+ # If 2 kinds of sequence mixed together, raises ArgumentError
152
+ # Otherwise, returns :nucleotide or :protein
153
+ # Params:
154
+ # +sequence_string+: String to validate
155
+ # Output:
156
+ # nil, :nucleotide or :protein
157
+ def type_of_sequences(fasta_format_string)
158
+ # the first sequence does not need to have a fasta definition line
159
+ sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
160
+ # get all sequence types
161
+ sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }.uniq.compact
162
+
163
+ return nil if sequence_types.empty?
164
+ return sequence_types.first if sequence_types.length == 1
165
+ end
166
+
149
167
  ##
150
168
  # Strips all non-letter characters. guestimates sequence based on that.
151
169
  # If less than 10 useable characters... returns nil
@@ -165,7 +183,7 @@ module GeneValidator
165
183
 
166
184
  ##
167
185
  #
168
- def guess_sequence_type_from_file(file)
186
+ def guess_sequence_type_from_input_file(file = opt[:input_fasta_file])
169
187
  lines = File.foreach(file).first(10)
170
188
  seqs = ''
171
189
  lines.each do |l|
@@ -173,27 +191,6 @@ module GeneValidator
173
191
  end
174
192
  guess_sequence_type(seqs)
175
193
  end
176
-
177
- ##
178
- # Method copied from sequenceserver/sequencehelpers.rb
179
- # Splits input at putative fasta definition lines (like ">adsfadsf");
180
- # then guesses sequence type for each sequence.
181
- # If not enough sequence to determine, returns nil.
182
- # If 2 kinds of sequence mixed together, raises ArgumentError
183
- # Otherwise, returns :nucleotide or :protein
184
- # Params:
185
- # +sequence_string+: String to validate
186
- # Output:
187
- # nil, :nucleotide or :protein
188
- def type_of_sequences(fasta_format_string)
189
- # the first sequence does not need to have a fasta definition line
190
- sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
191
- # get all sequence types
192
- sequence_types = sequences.collect { |seq| BlastUtils.guess_sequence_type(seq) }.uniq.compact
193
-
194
- return nil if sequence_types.empty?
195
- return sequence_types.first if sequence_types.length == 1
196
- end
197
194
  end
198
195
  end
199
196
  end