sequenceserver 3.0.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/bin/sequenceserver +2 -2
  3. data/lib/sequenceserver/api_errors.rb +1 -1
  4. data/lib/sequenceserver/blast/job.rb +20 -3
  5. data/lib/sequenceserver/blast/report.rb +74 -86
  6. data/lib/sequenceserver/blast/tasks.rb +38 -0
  7. data/lib/sequenceserver/config.rb +54 -20
  8. data/lib/sequenceserver/makeblastdb.rb +16 -2
  9. data/lib/sequenceserver/report.rb +0 -6
  10. data/lib/sequenceserver/routes.rb +32 -21
  11. data/lib/sequenceserver/version.rb +1 -1
  12. data/lib/sequenceserver.rb +1 -1
  13. data/public/css/app.css +121 -0
  14. data/public/css/app.min.css +1 -0
  15. data/public/css/sequenceserver.css +0 -148
  16. data/public/css/sequenceserver.min.css +3 -3
  17. data/public/js/circos.js +2 -2
  18. data/public/js/collapse_preferences.js +37 -0
  19. data/public/js/databases.js +65 -37
  20. data/public/js/databases_tree.js +2 -1
  21. data/public/js/dnd.js +37 -50
  22. data/public/js/form.js +78 -50
  23. data/public/js/grapher.js +23 -37
  24. data/public/js/hits_overview.js +2 -2
  25. data/public/js/kablammo.js +2 -2
  26. data/public/js/length_distribution.js +3 -3
  27. data/public/js/null_plugins/grapher/histogram.js +25 -0
  28. data/public/js/null_plugins/options.js +3 -0
  29. data/public/js/null_plugins/query_stats.js +11 -0
  30. data/public/js/null_plugins/report_plugins.js +6 -1
  31. data/public/js/null_plugins/search_header_plugin.js +4 -0
  32. data/public/js/options.js +161 -56
  33. data/public/js/query.js +85 -59
  34. data/public/js/report.js +1 -1
  35. data/public/js/search.js +2 -0
  36. data/public/js/search_button.js +67 -56
  37. data/public/js/sidebar.js +1 -1
  38. data/public/js/tests/database.spec.js +5 -5
  39. data/public/js/tests/{advanced_parameters.spec.js → form.spec.js} +35 -1
  40. data/public/js/tests/mock_data/databases.json +5 -5
  41. data/public/js/tests/mocks/circos.js +6 -0
  42. data/public/js/tests/report.spec.js +4 -3
  43. data/public/js/tests/search_query.spec.js +5 -6
  44. data/public/sequenceserver-report.min.js +45 -23
  45. data/public/sequenceserver-search.min.js +57 -13
  46. data/public/sequenceserver_logo.webp +0 -0
  47. data/views/blastn_options.erb +66 -66
  48. data/views/blastp_options.erb +59 -59
  49. data/views/blastx_options.erb +68 -68
  50. data/views/layout.erb +60 -3
  51. data/views/search.erb +33 -38
  52. data/views/search_layout.erb +152 -0
  53. data/views/tblastn_options.erb +57 -57
  54. data/views/tblastx_options.erb +64 -64
  55. metadata +31 -22
  56. data/lib/sequenceserver/makeblastdb-modified-with-cache.rb +0 -345
  57. data/public/SequenceServer_logo.png +0 -0
@@ -1,345 +0,0 @@
1
- require 'find'
2
- require 'forwardable'
3
-
4
- module SequenceServer
5
- # Smart `makeblastdb` wrapper: recursively scans database directory determining
6
- # which files need to be formatted or re-formatted.
7
- #
8
- # Example usage:
9
- #
10
- # makeblastdb = MAKEBLASTDB.new(database_dir)
11
- # makeblastdb.scan && makeblastdb.run
12
- #
13
- class MAKEBLASTDB
14
- extend Forwardable
15
-
16
- def_delegators SequenceServer, :config, :sys, :logger
17
-
18
- def initialize(database_dir)
19
- @database_dir = database_dir
20
- end
21
-
22
- attr_reader :database_dir
23
- attr_reader :formatted_fastas
24
- attr_reader :fastas_to_format
25
- attr_reader :fastas_to_reformat
26
-
27
- # Scans the database directory to determine which FASTA files require
28
- # formatting or re-formatting.
29
- #
30
- # Returns `true` if there are files to (re-)format, `false` otherwise.
31
- def scan
32
- # We need to know the list of formatted FASTAs as reported by blastdbcmd
33
- # first. This is required to determine both unformatted FASTAs and those
34
- # that require reformatting.
35
- @formatted_fastas = []
36
- determine_formatted_fastas
37
-
38
- # Now determine FASTA files that are unformatted or require reformatting.
39
- @fastas_to_format = []
40
- determine_unformatted_fastas
41
- @fastas_to_reformat = []
42
- determine_fastas_to_reformat
43
-
44
- # Return true if there are files to be (re-)formatted or false otherwise.
45
- !@fastas_to_format.empty? || !@fastas_to_reformat.empty?
46
- end
47
-
48
- # Returns true if at least one database in database directory is formatted.
49
- def any_formatted?
50
- !@formatted_fastas.empty?
51
- end
52
-
53
- # Returns true if there is at least one unformatted FASTA in the databases
54
- # directory.
55
- def any_unformatted?
56
- !@fastas_to_format.empty?
57
- end
58
-
59
- # Returns true if the databases directory contains one or more incompatible
60
- # databases.
61
- #
62
- # Note that it is okay to only use V4 databases or only V5 databases.
63
- # Incompatibility arises when they are mixed.
64
- def any_incompatible?
65
- return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? }
66
- return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? }
67
- true
68
- end
69
-
70
- # Runs makeblastdb on each file in `@fastas_to_format` and
71
- # `@fastas_to_reformat`. Will do nothing unless `#scan`
72
- # has been run before.
73
- def run
74
- format
75
- reformat
76
- end
77
-
78
- # Format any unformatted FASTA files in database directory. Returns Array
79
- # of files that were formatted.
80
- def format
81
- # Make the intent clear as well as ensure the program won't crash if we
82
- # accidentally call format before calling scan.
83
- return unless @fastas_to_format
84
- @fastas_to_format.select do |path, title, type|
85
- make_blast_database('format', path, title, type)
86
- end
87
- end
88
-
89
- # Re-format databases that require reformatting. Returns Array of files
90
- # that were reformatted.
91
- def reformat
92
- # Make the intent clear as well as ensure the program won't crash if
93
- # we accidentally call reformat before calling scan.
94
- return unless @fastas_to_reformat
95
- @fastas_to_reformat.select do |path, title, type, non_parse_seqids|
96
- make_blast_database('reformat', path, title, type, non_parse_seqids)
97
- end
98
- end
99
-
100
- private
101
-
102
- # Determines which FASTA files in the database directory are already
103
- # formatted. Adds to @formatted_fastas.
104
- def determine_formatted_fastas
105
- blastdbcmd.each_line do |line|
106
- path, *rest = line.chomp.split("\t")
107
- next if multipart_database_name?(path)
108
- rest << get_categories(path)
109
- @formatted_fastas << Database.new(path, *rest)
110
- end
111
- end
112
-
113
- # Determines which FASTA files in the database directory require
114
- # reformatting. Adds to @fastas_to_format.
115
- def determine_fastas_to_reformat
116
- @formatted_fastas.each do |ff|
117
- if ff.v4? || ff.non_parse_seqids?
118
- @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?]
119
- end
120
- end
121
- end
122
-
123
- # Determines which FASTA files in the database directory are
124
- # unformatted. Adds to @fastas_to_format.
125
- def determine_unformatted_fastas
126
- # Add a trailing slash to database_dir - Find.find doesn't work as
127
- # expected without the trailing slash if database_dir is a symlink
128
- # inside a docker container.
129
- Find.find(database_dir + '/') do |path|
130
- next if File.directory?(path)
131
- next unless probably_fasta?(path)
132
- next if @formatted_fastas.any? { |f| f[0] == path }
133
-
134
- @fastas_to_format << [path,
135
- make_db_title(path),
136
- guess_sequence_type_in_fasta(path)]
137
- end
138
- end
139
-
140
- # Runs `blastdbcmd` to determine formatted FASTA files in the database
141
- # directory. Returns the output of `blastdbcmd`. This method is called
142
- # by `determine_formatted_fastas`.
143
- def blastdbcmd
144
- # calculate checksum of database directory
145
- current_db_checksum = Zlib::crc32(Dir.glob(File.join(config[:database_dir], '/**/*')).map {
146
- |path| path.to_s + "_" + File.mtime(path).to_s + "_" + File.size(path).to_s
147
- }.to_s)
148
-
149
- checksum_path = config[:database_dir].chomp('/') + '.checksum'
150
- index_path = config[:database_dir].chomp('/') + '.index'
151
-
152
- if File.exists?(checksum_path)
153
- if current_db_checksum == File.read(checksum_path).to_i # db directory hasn't changed
154
- if File.exists?(index_path) # lets use existing index
155
- logger.info "Using existing database index: #{index_path}"
156
- return File.read(index_path)
157
- end
158
- end
159
- end rescue logger.error "Could not read: #{checksum_path} or #{index_path}"
160
-
161
- # database directory has changed, or index file doesn't exist
162
- # thus we run blastdbcmd to get formatted FASTA files
163
- logger.info "Scanning for BLAST databases & creating index"
164
- cmd = "blastdbcmd -recursive -list #{config[:database_dir]}" \
165
- ' -list_outfmt "%f %t %p %n %l %d %v"'
166
- out, err = sys(cmd, path: config[:bin])
167
- errpat = /BLAST Database error/
168
- fail BLAST_DATABASE_ERROR.new(cmd, err) if err.match(errpat)
169
-
170
- # write checksum and index to file
171
- File.open(checksum_path, 'w') { |f| f.write(current_db_checksum) } rescue
172
- logger.error "Could not write database checksum to file" + checksum_path
173
- File.open(index_path, 'w') { |f| f.write(out) } rescue
174
- logger.error "Could not write database index to file" + index_path
175
-
176
- return out
177
- rescue CommandFailed => e
178
- fail BLAST_DATABASE_ERROR.new(cmd, e.stderr)
179
- end
180
-
181
- # Create BLAST database, given FASTA file and sequence type in FASTA file.
182
- def make_blast_database(action, file, title, type, non_parse_seqids = false)
183
- return unless make_blast_database?(action, file, type)
184
- title = confirm_database_title(title)
185
- extract_fasta(file) unless File.exist?(file)
186
- taxonomy = taxid_map(file, non_parse_seqids) || taxid
187
- _make_blast_database(file, type, title, taxonomy)
188
- end
189
-
190
- # Show file path and guessed sequence type to the user and obtain a y/n
191
- # response.
192
- #
193
- # Returns true if the user entered anything but 'n' or 'N'.
194
- def make_blast_database?(action, file, type)
195
- puts
196
- puts
197
- puts "FASTA file to #{action}: #{file}"
198
- puts "FASTA type: #{type}"
199
- print 'Proceed? [y/n] (Default: y): '
200
- response = STDIN.gets.to_s.strip
201
- !response.match(/n/i)
202
- end
203
-
204
- # Show the database title that we are going to use to the user for
205
- # confirmation.
206
- #
207
- # Returns user input if any. Auto-determined title otherwise.
208
- def confirm_database_title(default)
209
- print "Enter a database title or will use '#{default}': "
210
- from_user = STDIN.gets.to_s.strip
211
- from_user.empty? && default || from_user
212
- end
213
-
214
- # Check if a '.taxid_map.txt' file exists. If not, try getting it
215
- # using blastdbcmd.
216
- def taxid_map(db, non_parse_seqids)
217
- return if non_parse_seqids
218
- taxid_map = db.sub(/#{File.extname(db)}$/, '.taxid_map.txt')
219
- extract_taxid_map(db, taxid_map) if !File.exist?(taxid_map)
220
- "-taxid_map #{taxid_map}" if !File.zero?(taxid_map)
221
- end
222
-
223
- # Get taxid from the user. Returns user input or 0.
224
- #
225
- # Using 0 as taxid is equivalent to not setting taxid for the database
226
- # that will be created.
227
- def taxid
228
- default = 0
229
- print 'Enter taxid (optional): '
230
- user_response = STDIN.gets.strip
231
- "-taxid #{user_response.empty? && default || Integer(user_response)}"
232
- rescue ArgumentError # presumably from call to Interger()
233
- puts 'taxid should be a number'
234
- retry
235
- end
236
-
237
- def _make_blast_database(file, type, title, taxonomy)
238
- cmd = "makeblastdb -parse_seqids -hash_index -in '#{file}'" \
239
- " -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
240
- " #{taxonomy}"
241
- out, err = sys(cmd, path: config[:bin])
242
- puts out.strip
243
- puts err.strip
244
- return true
245
- rescue CommandFailed => e
246
- puts <<~MSG
247
- Could not create BLAST database for: #{file}
248
- Tried: #{cmd}
249
- stdout: #{e.stdout}
250
- stderr: #{e.stderr}
251
- MSG
252
- exit!
253
- end
254
-
255
- # Extract FASTA file from BLAST database.
256
- #
257
- # Invoked while reformatting a BLAST database if the corresponding
258
- # FASTA file does not exist.
259
- def extract_fasta(db)
260
- puts
261
- puts 'Extracting sequences ...'
262
- cmd = "blastdbcmd -entry all -db #{db}"
263
- sys(cmd, stdout: db, path: config[:bin])
264
- rescue CommandFailed => e
265
- puts <<~MSG
266
- Could not extract sequences from: #{db}
267
- Tried: #{cmd}
268
- stdout: #{e.stdout}
269
- stderr: #{e.stderr}
270
- MSG
271
- exit!
272
- end
273
-
274
- def extract_taxid_map(db, taxmap_file)
275
- cmd = "blastdbcmd -entry all -db #{db} -outfmt '%i %T'"
276
- sys(cmd, stdout: taxmap_file, path: config[:bin])
277
- rescue CommandFailed => e
278
- # silence error
279
- end
280
-
281
- # Returns true if the database name appears to be a multi-part database
282
- # name.
283
- #
284
- # e.g.
285
- # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
286
- # /home/ben/pd.ben/sequenceserver/db/nr => no
287
- # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
288
- # /home/ben/pd.ben/sequenceserver/db/nr00 => no
289
- # /mnt/blast-db/refseq_genomic.100 => yes
290
- def multipart_database_name?(db_name)
291
- !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
292
- end
293
-
294
- def get_categories(path)
295
- path
296
- .gsub(config[:database_dir], '') # remove database_dir from path
297
- .split('/')
298
- .reject(&:empty?)[0..-2] # the first entry might be '' if database_dir does not end with /
299
- end
300
-
301
- # Returns true if first character of the file is '>'.
302
- def probably_fasta?(file)
303
- return false unless file.match(/((cds)|(fasta)|(fna)|(pep)|(cdna)|(fa)|(prot)|(fas)|(genome)|(nuc)|(dna)|(nt))$/i)
304
- File.read(file, 1) == '>'
305
- end
306
-
307
- # Suggests improved titles when generating database names from files
308
- # for improved apperance and readability in web interface.
309
- # For example:
310
- # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
311
- # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
312
- def make_db_title(path)
313
- db_name = File.basename(path)
314
- db_name.tr!('"', "'")
315
- # removes .fasta like extension names
316
- db_name.gsub!(File.extname(db_name), '')
317
- # replaces _ with ' ',
318
- db_name.gsub!(/(_)/, ' ')
319
- # replaces '.' with ' ' when no numbers are on either side,
320
- db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
321
- # preserves version numbers
322
- db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
323
- db_name
324
- end
325
-
326
- # Guess whether FASTA file contains protein or nucleotide sequences by
327
- # sampling a few few characters of the file.
328
- def guess_sequence_type_in_fasta(file)
329
- sequences = sample_sequences(file)
330
- sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
331
- sequence_types = sequence_types.uniq.compact
332
- (sequence_types.length == 1) && sequence_types.first
333
- end
334
-
335
- # Read first 1,048,576 characters of the file, split the read text on
336
- # fasta def line pattern and return.
337
- #
338
- # If the given file is FASTA, returns Array of as many different
339
- # sequences in the portion of the file read. Returns the portion
340
- # of the file read wrapped in an Array otherwise.
341
- def sample_sequences(file)
342
- File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
343
- end
344
- end
345
- end
Binary file