sequenceserver 2.1.0 → 3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/COPYRIGHT.txt +1 -1
  3. data/bin/sequenceserver +10 -3
  4. data/lib/sequenceserver/blast/error.rb +53 -0
  5. data/lib/sequenceserver/blast/formatter.rb +13 -4
  6. data/lib/sequenceserver/blast/job.rb +2 -43
  7. data/lib/sequenceserver/blast/report.rb +33 -3
  8. data/lib/sequenceserver/config.rb +4 -1
  9. data/lib/sequenceserver/job.rb +21 -11
  10. data/lib/sequenceserver/makeblastdb-modified-with-cache.rb +345 -0
  11. data/lib/sequenceserver/makeblastdb.rb +97 -75
  12. data/lib/sequenceserver/pool.rb +1 -1
  13. data/lib/sequenceserver/report.rb +1 -5
  14. data/lib/sequenceserver/routes.rb +52 -5
  15. data/lib/sequenceserver/server.rb +1 -1
  16. data/lib/sequenceserver/sys.rb +1 -1
  17. data/lib/sequenceserver/version.rb +1 -1
  18. data/lib/sequenceserver.rb +11 -2
  19. data/public/404.html +27 -0
  20. data/public/config.js +0 -6
  21. data/public/css/grapher.css +1 -1
  22. data/public/css/sequenceserver.css +22 -11
  23. data/public/css/sequenceserver.min.css +2 -2
  24. data/public/js/circos.js +7 -3
  25. data/public/js/dnd.js +3 -3
  26. data/public/js/fastq_to_fasta.js +35 -0
  27. data/public/js/form.js +30 -11
  28. data/public/js/grapher.js +123 -113
  29. data/public/js/hit.js +8 -2
  30. data/public/js/hits_overview.js +4 -1
  31. data/public/js/jquery_world.js +0 -1
  32. data/public/js/kablammo.js +4 -0
  33. data/public/js/length_distribution.js +5 -1
  34. data/public/js/null_plugins/download_links.js +7 -0
  35. data/public/js/null_plugins/hit_buttons.js +11 -0
  36. data/public/js/null_plugins/report_plugins.js +18 -0
  37. data/public/js/query.js +26 -6
  38. data/public/js/report.js +92 -22
  39. data/public/js/search.js +0 -8
  40. data/public/js/sidebar.js +11 -1
  41. data/public/js/tests/advanced_parameters.spec.js +36 -0
  42. data/public/js/tests/mock_data/sequences.js +49 -0
  43. data/public/js/tests/report.spec.js +62 -6
  44. data/public/js/tests/search_query.spec.js +45 -19
  45. data/public/js/visualisation_helpers.js +1 -1
  46. data/public/sequenceserver-report.min.js +76 -42
  47. data/public/sequenceserver-search.min.js +34 -33
  48. data/views/layout.erb +9 -12
  49. metadata +34 -23
@@ -0,0 +1,345 @@
1
+ require 'find'
2
+ require 'forwardable'
3
+
4
+ module SequenceServer
5
+ # Smart `makeblastdb` wrapper: recursively scans database directory determining
6
+ # which files need to be formatted or re-formatted.
7
+ #
8
+ # Example usage:
9
+ #
10
+ # makeblastdb = MAKEBLASTDB.new(database_dir)
11
+ # makeblastdb.scan && makeblastdb.run
12
+ #
13
+ class MAKEBLASTDB
14
+ extend Forwardable
15
+
16
+ def_delegators SequenceServer, :config, :sys, :logger
17
+
18
+ def initialize(database_dir)
19
+ @database_dir = database_dir
20
+ end
21
+
22
+ attr_reader :database_dir
23
+ attr_reader :formatted_fastas
24
+ attr_reader :fastas_to_format
25
+ attr_reader :fastas_to_reformat
26
+
27
+ # Scans the database directory to determine which FASTA files require
28
+ # formatting or re-formatting.
29
+ #
30
+ # Returns `true` if there are files to (re-)format, `false` otherwise.
31
+ def scan
32
+ # We need to know the list of formatted FASTAs as reported by blastdbcmd
33
+ # first. This is required to determine both unformatted FASTAs and those
34
+ # that require reformatting.
35
+ @formatted_fastas = []
36
+ determine_formatted_fastas
37
+
38
+ # Now determine FASTA files that are unformatted or require reformatting.
39
+ @fastas_to_format = []
40
+ determine_unformatted_fastas
41
+ @fastas_to_reformat = []
42
+ determine_fastas_to_reformat
43
+
44
+ # Return true if there are files to be (re-)formatted or false otherwise.
45
+ !@fastas_to_format.empty? || !@fastas_to_reformat.empty?
46
+ end
47
+
48
+ # Returns true if at least one database in database directory is formatted.
49
+ def any_formatted?
50
+ !@formatted_fastas.empty?
51
+ end
52
+
53
+ # Returns true if there is at least one unformatted FASTA in the databases
54
+ # directory.
55
+ def any_unformatted?
56
+ !@fastas_to_format.empty?
57
+ end
58
+
59
+ # Returns true if the databases directory contains one or more incompatible
60
+ # databases.
61
+ #
62
+ # Note that it is okay to only use V4 databases or only V5 databases.
63
+ # Incompatibility arises when they are mixed.
64
+ def any_incompatible?
65
+ return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? }
66
+ return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? }
67
+ true
68
+ end
69
+
70
+ # Runs makeblastdb on each file in `@fastas_to_format` and
71
+ # `@fastas_to_reformat`. Will do nothing unless `#scan`
72
+ # has been run before.
73
+ def run
74
+ format
75
+ reformat
76
+ end
77
+
78
+ # Format any unformatted FASTA files in database directory. Returns Array
79
+ # of files that were formatted.
80
+ def format
81
+ # Make the intent clear as well as ensure the program won't crash if we
82
+ # accidentally call format before calling scan.
83
+ return unless @fastas_to_format
84
+ @fastas_to_format.select do |path, title, type|
85
+ make_blast_database('format', path, title, type)
86
+ end
87
+ end
88
+
89
+ # Re-format databases that require reformatting. Returns Array of files
90
+ # that were reformatted.
91
+ def reformat
92
+ # Make the intent clear as well as ensure the program won't crash if
93
+ # we accidentally call reformat before calling scan.
94
+ return unless @fastas_to_reformat
95
+ @fastas_to_reformat.select do |path, title, type, non_parse_seqids|
96
+ make_blast_database('reformat', path, title, type, non_parse_seqids)
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ # Determines which FASTA files in the database directory are already
103
+ # formatted. Adds to @formatted_fastas.
104
+ def determine_formatted_fastas
105
+ blastdbcmd.each_line do |line|
106
+ path, *rest = line.chomp.split("\t")
107
+ next if multipart_database_name?(path)
108
+ rest << get_categories(path)
109
+ @formatted_fastas << Database.new(path, *rest)
110
+ end
111
+ end
112
+
113
+ # Determines which FASTA files in the database directory require
114
+ # reformatting. Adds to @fastas_to_format.
115
+ def determine_fastas_to_reformat
116
+ @formatted_fastas.each do |ff|
117
+ if ff.v4? || ff.non_parse_seqids?
118
+ @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?]
119
+ end
120
+ end
121
+ end
122
+
123
+ # Determines which FASTA files in the database directory are
124
+ # unformatted. Adds to @fastas_to_format.
125
+ def determine_unformatted_fastas
126
+ # Add a trailing slash to database_dir - Find.find doesn't work as
127
+ # expected without the trailing slash if database_dir is a symlink
128
+ # inside a docker container.
129
+ Find.find(database_dir + '/') do |path|
130
+ next if File.directory?(path)
131
+ next unless probably_fasta?(path)
132
+ next if @formatted_fastas.any? { |f| f[0] == path }
133
+
134
+ @fastas_to_format << [path,
135
+ make_db_title(path),
136
+ guess_sequence_type_in_fasta(path)]
137
+ end
138
+ end
139
+
140
+ # Runs `blastdbcmd` to determine formatted FASTA files in the database
141
+ # directory. Returns the output of `blastdbcmd`. This method is called
142
+ # by `determine_formatted_fastas`.
143
+ def blastdbcmd
144
+ # calculate checksum of database directory
145
+ current_db_checksum = Zlib::crc32(Dir.glob(File.join(config[:database_dir], '/**/*')).map {
146
+ |path| path.to_s + "_" + File.mtime(path).to_s + "_" + File.size(path).to_s
147
+ }.to_s)
148
+
149
+ checksum_path = config[:database_dir].chomp('/') + '.checksum'
150
+ index_path = config[:database_dir].chomp('/') + '.index'
151
+
152
+ if File.exists?(checksum_path)
153
+ if current_db_checksum == File.read(checksum_path).to_i # db directory hasn't changed
154
+ if File.exists?(index_path) # lets use existing index
155
+ logger.info "Using existing database index: #{index_path}"
156
+ return File.read(index_path)
157
+ end
158
+ end
159
+ end rescue logger.error "Could not read: #{checksum_path} or #{index_path}"
160
+
161
+ # database directory has changed, or index file doesn't exist
162
+ # thus we run blastdbcmd to get formatted FASTA files
163
+ logger.info "Scanning for BLAST databases & creating index"
164
+ cmd = "blastdbcmd -recursive -list #{config[:database_dir]}" \
165
+ ' -list_outfmt "%f %t %p %n %l %d %v"'
166
+ out, err = sys(cmd, path: config[:bin])
167
+ errpat = /BLAST Database error/
168
+ fail BLAST_DATABASE_ERROR.new(cmd, err) if err.match(errpat)
169
+
170
+ # write checksum and index to file
171
+ File.open(checksum_path, 'w') { |f| f.write(current_db_checksum) } rescue
172
+ logger.error "Could not write database checksum to file" + checksum_path
173
+ File.open(index_path, 'w') { |f| f.write(out) } rescue
174
+ logger.error "Could not write database index to file" + index_path
175
+
176
+ return out
177
+ rescue CommandFailed => e
178
+ fail BLAST_DATABASE_ERROR.new(cmd, e.stderr)
179
+ end
180
+
181
+ # Create BLAST database, given FASTA file and sequence type in FASTA file.
182
+ def make_blast_database(action, file, title, type, non_parse_seqids = false)
183
+ return unless make_blast_database?(action, file, type)
184
+ title = confirm_database_title(title)
185
+ extract_fasta(file) unless File.exist?(file)
186
+ taxonomy = taxid_map(file, non_parse_seqids) || taxid
187
+ _make_blast_database(file, type, title, taxonomy)
188
+ end
189
+
190
+ # Show file path and guessed sequence type to the user and obtain a y/n
191
+ # response.
192
+ #
193
+ # Returns true if the user entered anything but 'n' or 'N'.
194
+ def make_blast_database?(action, file, type)
195
+ puts
196
+ puts
197
+ puts "FASTA file to #{action}: #{file}"
198
+ puts "FASTA type: #{type}"
199
+ print 'Proceed? [y/n] (Default: y): '
200
+ response = STDIN.gets.to_s.strip
201
+ !response.match(/n/i)
202
+ end
203
+
204
+ # Show the database title that we are going to use to the user for
205
+ # confirmation.
206
+ #
207
+ # Returns user input if any. Auto-determined title otherwise.
208
+ def confirm_database_title(default)
209
+ print "Enter a database title or will use '#{default}': "
210
+ from_user = STDIN.gets.to_s.strip
211
+ from_user.empty? && default || from_user
212
+ end
213
+
214
+ # Check if a '.taxid_map.txt' file exists. If not, try getting it
215
+ # using blastdbcmd.
216
+ def taxid_map(db, non_parse_seqids)
217
+ return if non_parse_seqids
218
+ taxid_map = db.sub(/#{File.extname(db)}$/, '.taxid_map.txt')
219
+ extract_taxid_map(db, taxid_map) if !File.exist?(taxid_map)
220
+ "-taxid_map #{taxid_map}" if !File.zero?(taxid_map)
221
+ end
222
+
223
+ # Get taxid from the user. Returns user input or 0.
224
+ #
225
+ # Using 0 as taxid is equivalent to not setting taxid for the database
226
+ # that will be created.
227
+ def taxid
228
+ default = 0
229
+ print 'Enter taxid (optional): '
230
+ user_response = STDIN.gets.strip
231
+ "-taxid #{user_response.empty? && default || Integer(user_response)}"
232
+ rescue ArgumentError # presumably from call to Interger()
233
+ puts 'taxid should be a number'
234
+ retry
235
+ end
236
+
237
+ def _make_blast_database(file, type, title, taxonomy)
238
+ cmd = "makeblastdb -parse_seqids -hash_index -in '#{file}'" \
239
+ " -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
240
+ " #{taxonomy}"
241
+ out, err = sys(cmd, path: config[:bin])
242
+ puts out.strip
243
+ puts err.strip
244
+ return true
245
+ rescue CommandFailed => e
246
+ puts <<~MSG
247
+ Could not create BLAST database for: #{file}
248
+ Tried: #{cmd}
249
+ stdout: #{e.stdout}
250
+ stderr: #{e.stderr}
251
+ MSG
252
+ exit!
253
+ end
254
+
255
+ # Extract FASTA file from BLAST database.
256
+ #
257
+ # Invoked while reformatting a BLAST database if the corresponding
258
+ # FASTA file does not exist.
259
+ def extract_fasta(db)
260
+ puts
261
+ puts 'Extracting sequences ...'
262
+ cmd = "blastdbcmd -entry all -db #{db}"
263
+ sys(cmd, stdout: db, path: config[:bin])
264
+ rescue CommandFailed => e
265
+ puts <<~MSG
266
+ Could not extract sequences from: #{db}
267
+ Tried: #{cmd}
268
+ stdout: #{e.stdout}
269
+ stderr: #{e.stderr}
270
+ MSG
271
+ exit!
272
+ end
273
+
274
+ def extract_taxid_map(db, taxmap_file)
275
+ cmd = "blastdbcmd -entry all -db #{db} -outfmt '%i %T'"
276
+ sys(cmd, stdout: taxmap_file, path: config[:bin])
277
+ rescue CommandFailed => e
278
+ # silence error
279
+ end
280
+
281
+ # Returns true if the database name appears to be a multi-part database
282
+ # name.
283
+ #
284
+ # e.g.
285
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
286
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
287
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
288
+ # /home/ben/pd.ben/sequenceserver/db/nr00 => no
289
+ # /mnt/blast-db/refseq_genomic.100 => yes
290
+ def multipart_database_name?(db_name)
291
+ !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
292
+ end
293
+
294
+ def get_categories(path)
295
+ path
296
+ .gsub(config[:database_dir], '') # remove database_dir from path
297
+ .split('/')
298
+ .reject(&:empty?)[0..-2] # the first entry might be '' if database_dir does not end with /
299
+ end
300
+
301
+ # Returns true if first character of the file is '>'.
302
+ def probably_fasta?(file)
303
+ return false unless file.match(/((cds)|(fasta)|(fna)|(pep)|(cdna)|(fa)|(prot)|(fas)|(genome)|(nuc)|(dna)|(nt))$/i)
304
+ File.read(file, 1) == '>'
305
+ end
306
+
307
+ # Suggests improved titles when generating database names from files
308
+ # for improved apperance and readability in web interface.
309
+ # For example:
310
+ # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
311
+ # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
312
+ def make_db_title(path)
313
+ db_name = File.basename(path)
314
+ db_name.tr!('"', "'")
315
+ # removes .fasta like extension names
316
+ db_name.gsub!(File.extname(db_name), '')
317
+ # replaces _ with ' ',
318
+ db_name.gsub!(/(_)/, ' ')
319
+ # replaces '.' with ' ' when no numbers are on either side,
320
+ db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
321
+ # preserves version numbers
322
+ db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
323
+ db_name
324
+ end
325
+
326
+ # Guess whether FASTA file contains protein or nucleotide sequences by
327
+ # sampling a few few characters of the file.
328
+ def guess_sequence_type_in_fasta(file)
329
+ sequences = sample_sequences(file)
330
+ sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
331
+ sequence_types = sequence_types.uniq.compact
332
+ (sequence_types.length == 1) && sequence_types.first
333
+ end
334
+
335
+ # Read first 1,048,576 characters of the file, split the read text on
336
+ # fasta def line pattern and return.
337
+ #
338
+ # If the given file is FASTA, returns Array of as many different
339
+ # sequences in the portion of the file read. Returns the portion
340
+ # of the file read wrapped in an Array otherwise.
341
+ def sample_sequences(file)
342
+ File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
343
+ end
344
+ end
345
+ end
@@ -8,7 +8,8 @@ module SequenceServer
8
8
  # Example usage:
9
9
  #
10
10
  # makeblastdb = MAKEBLASTDB.new(database_dir)
11
- # makeblastdb.scan && makeblastdb.run
11
+ # makeblastdb.run # formats and re-formats databases in database_dir
12
+ # makeblastdb.formatted_fastas # lists formatted databases
12
13
  #
13
14
  class MAKEBLASTDB
14
15
  extend Forwardable
@@ -20,56 +21,21 @@ module SequenceServer
20
21
  end
21
22
 
22
23
  attr_reader :database_dir
23
- attr_reader :formatted_fastas
24
- attr_reader :fastas_to_format
25
- attr_reader :fastas_to_reformat
26
24
 
27
- # Scans the database directory to determine which FASTA files require
28
- # formatting or re-formatting.
29
- #
30
- # Returns `true` if there are files to (re-)format, `false` otherwise.
31
- def scan
32
- # We need to know the list of formatted FASTAs as reported by blastdbcmd
33
- # first. This is required to determine both unformatted FASTAs and those
34
- # that require reformatting.
35
- @formatted_fastas = []
36
- determine_formatted_fastas
37
-
38
- # Now determine FASTA files that are unformatted or require reformatting.
39
- @fastas_to_format = []
40
- determine_unformatted_fastas
41
- @fastas_to_reformat = []
42
- determine_fastas_to_reformat
43
-
44
- # Return true if there are files to be (re-)formatted or false otherwise.
45
- !@fastas_to_format.empty? || !@fastas_to_reformat.empty?
46
- end
47
-
48
- # Returns true if at least one database in database directory is formatted.
49
25
  def any_formatted?
50
- !@formatted_fastas.empty?
26
+ formatted_fastas.any?
51
27
  end
52
28
 
53
- # Returns true if there is at least one unformatted FASTA in the databases
54
- # directory.
55
- def any_unformatted?
56
- !@fastas_to_format.empty?
29
+ def any_to_format_or_reformat?
30
+ any_to_format? || any_to_reformat?
57
31
  end
58
32
 
59
- # Returns true if the databases directory contains one or more incompatible
60
- # databases.
61
- #
62
- # Note that it is okay to only use V4 databases or only V5 databases.
63
- # Incompatibility arises when they are mixed.
64
- def any_incompatible?
65
- return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? }
66
- return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? }
67
- true
33
+ def no_fastas?
34
+ probably_fastas.empty?
68
35
  end
69
36
 
70
- # Runs makeblastdb on each file in `@fastas_to_format` and
71
- # `@fastas_to_reformat`. Will do nothing unless `#scan`
72
- # has been run before.
37
+ # Runs makeblastdb on each file in `fastas_to_format` and
38
+ # `fastas_to_reformat`.
73
39
  def run
74
40
  format
75
41
  reformat
@@ -80,8 +46,9 @@ module SequenceServer
80
46
  def format
81
47
  # Make the intent clear as well as ensure the program won't crash if we
82
48
  # accidentally call format before calling scan.
83
- return unless @fastas_to_format
84
- @fastas_to_format.select do |path, title, type|
49
+ return unless any_to_format?
50
+
51
+ fastas_to_format.select do |path, title, type|
85
52
  make_blast_database('format', path, title, type)
86
53
  end
87
54
  end
@@ -91,50 +58,85 @@ module SequenceServer
91
58
  def reformat
92
59
  # Make the intent clear as well as ensure the program won't crash if
93
60
  # we accidentally call reformat before calling scan.
94
- return unless @fastas_to_reformat
95
- @fastas_to_reformat.select do |path, title, type, non_parse_seqids|
61
+ return unless any_to_reformat?
62
+
63
+ fastas_to_reformat.select do |path, title, type, non_parse_seqids|
96
64
  make_blast_database('reformat', path, title, type, non_parse_seqids)
97
65
  end
98
66
  end
99
67
 
100
- private
101
-
102
68
  # Determines which FASTA files in the database directory are already
103
- # formatted. Adds to @formatted_fastas.
104
- def determine_formatted_fastas
69
+ # formatted.
70
+ def formatted_fastas
71
+ return @formatted_fastas if defined?(@formatted_fastas)
72
+
73
+ @formatted_fastas = []
74
+
105
75
  blastdbcmd.each_line do |line|
106
76
  path, *rest = line.chomp.split("\t")
107
77
  next if multipart_database_name?(path)
78
+
108
79
  rest << get_categories(path)
109
80
  @formatted_fastas << Database.new(path, *rest)
110
81
  end
82
+
83
+ @formatted_fastas
84
+ end
85
+
86
+ def any_to_format?
87
+ fastas_to_format.any?
88
+ end
89
+
90
+ private
91
+
92
+ def any_to_reformat?
93
+ fastas_to_reformat.any?
111
94
  end
112
95
 
113
96
  # Determines which FASTA files in the database directory require
114
- # reformatting. Adds to @fastas_to_format.
115
- def determine_fastas_to_reformat
116
- @formatted_fastas.each do |ff|
117
- if ff.v4? || ff.non_parse_seqids?
118
- @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?]
119
- end
97
+ # reformatting.
98
+ def fastas_to_reformat
99
+ return @fastas_to_reformat if defined?(@fastas_to_reformat)
100
+
101
+ @fastas_to_reformat = []
102
+ formatted_fastas.each do |ff|
103
+ @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?] if ff.v4? || ff.non_parse_seqids?
120
104
  end
105
+
106
+ @fastas_to_reformat
121
107
  end
122
108
 
123
109
  # Determines which FASTA files in the database directory are
124
- # unformatted. Adds to @fastas_to_format.
125
- def determine_unformatted_fastas
126
- # Add a trailing slash to database_dir - Find.find doesn't work as
127
- # expected without the trailing slash if database_dir is a symlink
128
- # inside a docker container.
110
+ # unformatted.
111
+ def fastas_to_format
112
+ return @fastas_to_format if defined?(@fastas_to_format)
113
+
114
+ formatted_fasta_paths = formatted_fastas.map { |f| f[0] }
115
+ fasta_paths_to_format = probably_fastas - formatted_fasta_paths
116
+
117
+ @fastas_to_format = fasta_paths_to_format.map do |path|
118
+ [
119
+ path,
120
+ make_db_title(path),
121
+ guess_sequence_type_in_fasta(path)
122
+ ]
123
+ end
124
+
125
+ @fastas_to_format
126
+ end
127
+
128
+ def probably_fastas
129
+ return @probably_fastas if defined?(@probably_fastas)
130
+
131
+ @probably_fastas = []
132
+
129
133
  Find.find(database_dir + '/') do |path|
130
134
  next if File.directory?(path)
131
- next unless probably_fasta?(path)
132
- next if @formatted_fastas.any? { |f| f[0] == path }
133
135
 
134
- @fastas_to_format << [path,
135
- make_db_title(path),
136
- guess_sequence_type_in_fasta(path)]
136
+ @probably_fastas << path if probably_fasta?(path)
137
137
  end
138
+
139
+ @probably_fastas
138
140
  end
139
141
 
140
142
  # Runs `blastdbcmd` to determine formatted FASTA files in the database
@@ -146,14 +148,16 @@ module SequenceServer
146
148
  out, err = sys(cmd, path: config[:bin])
147
149
  errpat = /BLAST Database error/
148
150
  fail BLAST_DATABASE_ERROR.new(cmd, err) if err.match(errpat)
149
- return out
151
+
152
+ out
150
153
  rescue CommandFailed => e
151
- fail BLAST_DATABASE_ERROR.new(cmd, e.stderr)
154
+ raise BLAST_DATABASE_ERROR.new(cmd, e.stderr)
152
155
  end
153
156
 
154
157
  # Create BLAST database, given FASTA file and sequence type in FASTA file.
155
158
  def make_blast_database(action, file, title, type, non_parse_seqids = false)
156
159
  return unless make_blast_database?(action, file, type)
160
+
157
161
  title = confirm_database_title(title)
158
162
  extract_fasta(file) unless File.exist?(file)
159
163
  taxonomy = taxid_map(file, non_parse_seqids) || taxid
@@ -188,9 +192,10 @@ module SequenceServer
188
192
  # using blastdbcmd.
189
193
  def taxid_map(db, non_parse_seqids)
190
194
  return if non_parse_seqids
195
+
191
196
  taxid_map = db.sub(/#{File.extname(db)}$/, '.taxid_map.txt')
192
- extract_taxid_map(db, taxid_map) if !File.exist?(taxid_map)
193
- "-taxid_map #{taxid_map}" if !File.zero?(taxid_map)
197
+ extract_taxid_map(db, taxid_map) unless File.exist?(taxid_map)
198
+ "-taxid_map #{taxid_map}" unless File.zero?(taxid_map)
194
199
  end
195
200
 
196
201
  # Get taxid from the user. Returns user input or 0.
@@ -211,10 +216,24 @@ module SequenceServer
211
216
  cmd = "makeblastdb -parse_seqids -hash_index -in '#{file}'" \
212
217
  " -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
213
218
  " #{taxonomy}"
214
- out, err = sys(cmd, path: config[:bin])
219
+
220
+ output = if File.directory?(file)
221
+ File.join(file, 'makeblastdb')
222
+ else
223
+ "#{file}.makeblastdb"
224
+ end
225
+
226
+ out, err = sys(
227
+ cmd,
228
+ path: config[:bin],
229
+ stderr: [output, 'stderr'].join,
230
+ stdout: [output, 'stdout'].join
231
+ )
232
+
215
233
  puts out.strip
216
234
  puts err.strip
217
- return true
235
+
236
+ true
218
237
  rescue CommandFailed => e
219
238
  puts <<~MSG
220
239
  Could not create BLAST database for: #{file}
@@ -261,7 +280,7 @@ module SequenceServer
261
280
  # /home/ben/pd.ben/sequenceserver/db/nr00 => no
262
281
  # /mnt/blast-db/refseq_genomic.100 => yes
263
282
  def multipart_database_name?(db_name)
264
- !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
283
+ !db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?
265
284
  end
266
285
 
267
286
  def get_categories(path)
@@ -273,7 +292,10 @@ module SequenceServer
273
292
 
274
293
  # Returns true if first character of the file is '>'.
275
294
  def probably_fasta?(file)
276
- return false unless file.match(/((cds)|(fasta)|(fna)|(pep)|(cdna)|(fa)|(prot)|(fas)|(genome)|(nuc)|(dna)|(nt))$/i)
295
+ unless file.match(/((cdna)|(cds)|(dna)|(fa)|(faa)|(fas)|(fasta)|(fna)|(genome)|(nt)|(nuc)|(pep)|(prot))$/i)
296
+ return false
297
+ end
298
+
277
299
  File.read(file, 1) == '>'
278
300
  end
279
301
 
@@ -42,7 +42,7 @@ class Pool
42
42
 
43
43
  def shutdown
44
44
  @size.times do
45
- schedule { throw :exit }
45
+ queue { throw :exit }
46
46
  end
47
47
  @pool.map(&:join)
48
48
  end
@@ -7,11 +7,8 @@ module SequenceServer
7
7
  # own report subclass.
8
8
  class Report
9
9
  class << self
10
- # Generates report for the given job. Returns generated report object.
11
- #
12
- # TODO: Dynamic dispatch.
13
10
  def generate(job)
14
- BLAST::Report.new(job)
11
+ BLAST::Report.new(job).generate
15
12
  end
16
13
  end
17
14
 
@@ -23,7 +20,6 @@ module SequenceServer
23
20
  def initialize(job)
24
21
  @job = job
25
22
  yield if block_given?
26
- generate
27
23
  end
28
24
 
29
25
  attr_reader :job