sequenceserver 3.0.1 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/bin/sequenceserver +2 -2
  3. data/lib/sequenceserver/api_errors.rb +32 -2
  4. data/lib/sequenceserver/blast/job.rb +20 -3
  5. data/lib/sequenceserver/blast/report.rb +74 -86
  6. data/lib/sequenceserver/blast/tasks.rb +38 -0
  7. data/lib/sequenceserver/config.rb +54 -20
  8. data/lib/sequenceserver/makeblastdb.rb +16 -2
  9. data/lib/sequenceserver/report.rb +0 -6
  10. data/lib/sequenceserver/routes.rb +66 -25
  11. data/lib/sequenceserver/sequence.rb +35 -7
  12. data/lib/sequenceserver/server.rb +1 -1
  13. data/lib/sequenceserver/version.rb +1 -1
  14. data/lib/sequenceserver.rb +1 -1
  15. data/public/404.html +1 -1
  16. data/public/css/app.css +121 -0
  17. data/public/css/app.min.css +1 -0
  18. data/public/css/sequenceserver.css +0 -148
  19. data/public/css/sequenceserver.min.css +3 -3
  20. data/public/js/circos.js +2 -2
  21. data/public/js/collapse_preferences.js +37 -0
  22. data/public/js/databases.js +65 -37
  23. data/public/js/databases_tree.js +2 -1
  24. data/public/js/dnd.js +37 -50
  25. data/public/js/download_fasta.js +1 -0
  26. data/public/js/form.js +79 -50
  27. data/public/js/grapher.js +23 -37
  28. data/public/js/hits_overview.js +2 -2
  29. data/public/js/kablammo.js +2 -2
  30. data/public/js/length_distribution.js +3 -3
  31. data/public/js/null_plugins/grapher/histogram.js +25 -0
  32. data/public/js/null_plugins/options.js +3 -0
  33. data/public/js/null_plugins/query_stats.js +11 -0
  34. data/public/js/null_plugins/report_plugins.js +6 -1
  35. data/public/js/null_plugins/search_header_plugin.js +4 -0
  36. data/public/js/options.js +161 -56
  37. data/public/js/query.js +85 -59
  38. data/public/js/report.js +1 -1
  39. data/public/js/search.js +2 -0
  40. data/public/js/search_button.js +67 -56
  41. data/public/js/sidebar.js +10 -1
  42. data/public/js/tests/database.spec.js +5 -5
  43. data/public/js/tests/form.spec.js +98 -0
  44. data/public/js/tests/mock_data/databases.json +5 -5
  45. data/public/js/tests/mocks/circos.js +6 -0
  46. data/public/js/tests/report.spec.js +4 -3
  47. data/public/js/tests/search_query.spec.js +16 -6
  48. data/public/sequenceserver-report.min.js +46 -24
  49. data/public/sequenceserver-search.min.js +57 -13
  50. data/public/sequenceserver_logo.webp +0 -0
  51. data/views/blastn_options.erb +66 -66
  52. data/views/blastp_options.erb +59 -59
  53. data/views/blastx_options.erb +68 -68
  54. data/views/layout.erb +61 -3
  55. data/views/search.erb +33 -38
  56. data/views/search_layout.erb +153 -0
  57. data/views/tblastn_options.erb +57 -57
  58. data/views/tblastx_options.erb +64 -64
  59. metadata +51 -22
  60. data/lib/sequenceserver/makeblastdb-modified-with-cache.rb +0 -345
  61. data/public/SequenceServer_logo.png +0 -0
  62. data/public/js/tests/advanced_parameters.spec.js +0 -36
@@ -1,345 +0,0 @@
1
- require 'find'
2
- require 'forwardable'
3
-
4
- module SequenceServer
5
- # Smart `makeblastdb` wrapper: recursively scans database directory determining
6
- # which files need to be formatted or re-formatted.
7
- #
8
- # Example usage:
9
- #
10
- # makeblastdb = MAKEBLASTDB.new(database_dir)
11
- # makeblastdb.scan && makeblastdb.run
12
- #
13
- class MAKEBLASTDB
14
- extend Forwardable
15
-
16
- def_delegators SequenceServer, :config, :sys, :logger
17
-
18
- def initialize(database_dir)
19
- @database_dir = database_dir
20
- end
21
-
22
- attr_reader :database_dir
23
- attr_reader :formatted_fastas
24
- attr_reader :fastas_to_format
25
- attr_reader :fastas_to_reformat
26
-
27
- # Scans the database directory to determine which FASTA files require
28
- # formatting or re-formatting.
29
- #
30
- # Returns `true` if there are files to (re-)format, `false` otherwise.
31
- def scan
32
- # We need to know the list of formatted FASTAs as reported by blastdbcmd
33
- # first. This is required to determine both unformatted FASTAs and those
34
- # that require reformatting.
35
- @formatted_fastas = []
36
- determine_formatted_fastas
37
-
38
- # Now determine FASTA files that are unformatted or require reformatting.
39
- @fastas_to_format = []
40
- determine_unformatted_fastas
41
- @fastas_to_reformat = []
42
- determine_fastas_to_reformat
43
-
44
- # Return true if there are files to be (re-)formatted or false otherwise.
45
- !@fastas_to_format.empty? || !@fastas_to_reformat.empty?
46
- end
47
-
48
- # Returns true if at least one database in database directory is formatted.
49
- def any_formatted?
50
- !@formatted_fastas.empty?
51
- end
52
-
53
- # Returns true if there is at least one unformatted FASTA in the databases
54
- # directory.
55
- def any_unformatted?
56
- !@fastas_to_format.empty?
57
- end
58
-
59
- # Returns true if the databases directory contains one or more incompatible
60
- # databases.
61
- #
62
- # Note that it is okay to only use V4 databases or only V5 databases.
63
- # Incompatibility arises when they are mixed.
64
- def any_incompatible?
65
- return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? }
66
- return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? }
67
- true
68
- end
69
-
70
- # Runs makeblastdb on each file in `@fastas_to_format` and
71
- # `@fastas_to_reformat`. Will do nothing unless `#scan`
72
- # has been run before.
73
- def run
74
- format
75
- reformat
76
- end
77
-
78
- # Format any unformatted FASTA files in database directory. Returns Array
79
- # of files that were formatted.
80
- def format
81
- # Make the intent clear as well as ensure the program won't crash if we
82
- # accidentally call format before calling scan.
83
- return unless @fastas_to_format
84
- @fastas_to_format.select do |path, title, type|
85
- make_blast_database('format', path, title, type)
86
- end
87
- end
88
-
89
- # Re-format databases that require reformatting. Returns Array of files
90
- # that were reformatted.
91
- def reformat
92
- # Make the intent clear as well as ensure the program won't crash if
93
- # we accidentally call reformat before calling scan.
94
- return unless @fastas_to_reformat
95
- @fastas_to_reformat.select do |path, title, type, non_parse_seqids|
96
- make_blast_database('reformat', path, title, type, non_parse_seqids)
97
- end
98
- end
99
-
100
- private
101
-
102
- # Determines which FASTA files in the database directory are already
103
- # formatted. Adds to @formatted_fastas.
104
- def determine_formatted_fastas
105
- blastdbcmd.each_line do |line|
106
- path, *rest = line.chomp.split("\t")
107
- next if multipart_database_name?(path)
108
- rest << get_categories(path)
109
- @formatted_fastas << Database.new(path, *rest)
110
- end
111
- end
112
-
113
- # Determines which FASTA files in the database directory require
114
- # reformatting. Adds to @fastas_to_format.
115
- def determine_fastas_to_reformat
116
- @formatted_fastas.each do |ff|
117
- if ff.v4? || ff.non_parse_seqids?
118
- @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?]
119
- end
120
- end
121
- end
122
-
123
- # Determines which FASTA files in the database directory are
124
- # unformatted. Adds to @fastas_to_format.
125
- def determine_unformatted_fastas
126
- # Add a trailing slash to database_dir - Find.find doesn't work as
127
- # expected without the trailing slash if database_dir is a symlink
128
- # inside a docker container.
129
- Find.find(database_dir + '/') do |path|
130
- next if File.directory?(path)
131
- next unless probably_fasta?(path)
132
- next if @formatted_fastas.any? { |f| f[0] == path }
133
-
134
- @fastas_to_format << [path,
135
- make_db_title(path),
136
- guess_sequence_type_in_fasta(path)]
137
- end
138
- end
139
-
140
- # Runs `blastdbcmd` to determine formatted FASTA files in the database
141
- # directory. Returns the output of `blastdbcmd`. This method is called
142
- # by `determine_formatted_fastas`.
143
- def blastdbcmd
144
- # calculate checksum of database directory
145
- current_db_checksum = Zlib::crc32(Dir.glob(File.join(config[:database_dir], '/**/*')).map {
146
- |path| path.to_s + "_" + File.mtime(path).to_s + "_" + File.size(path).to_s
147
- }.to_s)
148
-
149
- checksum_path = config[:database_dir].chomp('/') + '.checksum'
150
- index_path = config[:database_dir].chomp('/') + '.index'
151
-
152
- if File.exists?(checksum_path)
153
- if current_db_checksum == File.read(checksum_path).to_i # db directory hasn't changed
154
- if File.exists?(index_path) # lets use existing index
155
- logger.info "Using existing database index: #{index_path}"
156
- return File.read(index_path)
157
- end
158
- end
159
- end rescue logger.error "Could not read: #{checksum_path} or #{index_path}"
160
-
161
- # database directory has changed, or index file doesn't exist
162
- # thus we run blastdbcmd to get formatted FASTA files
163
- logger.info "Scanning for BLAST databases & creating index"
164
- cmd = "blastdbcmd -recursive -list #{config[:database_dir]}" \
165
- ' -list_outfmt "%f %t %p %n %l %d %v"'
166
- out, err = sys(cmd, path: config[:bin])
167
- errpat = /BLAST Database error/
168
- fail BLAST_DATABASE_ERROR.new(cmd, err) if err.match(errpat)
169
-
170
- # write checksum and index to file
171
- File.open(checksum_path, 'w') { |f| f.write(current_db_checksum) } rescue
172
- logger.error "Could not write database checksum to file" + checksum_path
173
- File.open(index_path, 'w') { |f| f.write(out) } rescue
174
- logger.error "Could not write database index to file" + index_path
175
-
176
- return out
177
- rescue CommandFailed => e
178
- fail BLAST_DATABASE_ERROR.new(cmd, e.stderr)
179
- end
180
-
181
- # Create BLAST database, given FASTA file and sequence type in FASTA file.
182
- def make_blast_database(action, file, title, type, non_parse_seqids = false)
183
- return unless make_blast_database?(action, file, type)
184
- title = confirm_database_title(title)
185
- extract_fasta(file) unless File.exist?(file)
186
- taxonomy = taxid_map(file, non_parse_seqids) || taxid
187
- _make_blast_database(file, type, title, taxonomy)
188
- end
189
-
190
- # Show file path and guessed sequence type to the user and obtain a y/n
191
- # response.
192
- #
193
- # Returns true if the user entered anything but 'n' or 'N'.
194
- def make_blast_database?(action, file, type)
195
- puts
196
- puts
197
- puts "FASTA file to #{action}: #{file}"
198
- puts "FASTA type: #{type}"
199
- print 'Proceed? [y/n] (Default: y): '
200
- response = STDIN.gets.to_s.strip
201
- !response.match(/n/i)
202
- end
203
-
204
- # Show the database title that we are going to use to the user for
205
- # confirmation.
206
- #
207
- # Returns user input if any. Auto-determined title otherwise.
208
- def confirm_database_title(default)
209
- print "Enter a database title or will use '#{default}': "
210
- from_user = STDIN.gets.to_s.strip
211
- from_user.empty? && default || from_user
212
- end
213
-
214
- # Check if a '.taxid_map.txt' file exists. If not, try getting it
215
- # using blastdbcmd.
216
- def taxid_map(db, non_parse_seqids)
217
- return if non_parse_seqids
218
- taxid_map = db.sub(/#{File.extname(db)}$/, '.taxid_map.txt')
219
- extract_taxid_map(db, taxid_map) if !File.exist?(taxid_map)
220
- "-taxid_map #{taxid_map}" if !File.zero?(taxid_map)
221
- end
222
-
223
- # Get taxid from the user. Returns user input or 0.
224
- #
225
- # Using 0 as taxid is equivalent to not setting taxid for the database
226
- # that will be created.
227
- def taxid
228
- default = 0
229
- print 'Enter taxid (optional): '
230
- user_response = STDIN.gets.strip
231
- "-taxid #{user_response.empty? && default || Integer(user_response)}"
232
- rescue ArgumentError # presumably from call to Interger()
233
- puts 'taxid should be a number'
234
- retry
235
- end
236
-
237
- def _make_blast_database(file, type, title, taxonomy)
238
- cmd = "makeblastdb -parse_seqids -hash_index -in '#{file}'" \
239
- " -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
240
- " #{taxonomy}"
241
- out, err = sys(cmd, path: config[:bin])
242
- puts out.strip
243
- puts err.strip
244
- return true
245
- rescue CommandFailed => e
246
- puts <<~MSG
247
- Could not create BLAST database for: #{file}
248
- Tried: #{cmd}
249
- stdout: #{e.stdout}
250
- stderr: #{e.stderr}
251
- MSG
252
- exit!
253
- end
254
-
255
- # Extract FASTA file from BLAST database.
256
- #
257
- # Invoked while reformatting a BLAST database if the corresponding
258
- # FASTA file does not exist.
259
- def extract_fasta(db)
260
- puts
261
- puts 'Extracting sequences ...'
262
- cmd = "blastdbcmd -entry all -db #{db}"
263
- sys(cmd, stdout: db, path: config[:bin])
264
- rescue CommandFailed => e
265
- puts <<~MSG
266
- Could not extract sequences from: #{db}
267
- Tried: #{cmd}
268
- stdout: #{e.stdout}
269
- stderr: #{e.stderr}
270
- MSG
271
- exit!
272
- end
273
-
274
- def extract_taxid_map(db, taxmap_file)
275
- cmd = "blastdbcmd -entry all -db #{db} -outfmt '%i %T'"
276
- sys(cmd, stdout: taxmap_file, path: config[:bin])
277
- rescue CommandFailed => e
278
- # silence error
279
- end
280
-
281
- # Returns true if the database name appears to be a multi-part database
282
- # name.
283
- #
284
- # e.g.
285
- # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
286
- # /home/ben/pd.ben/sequenceserver/db/nr => no
287
- # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
288
- # /home/ben/pd.ben/sequenceserver/db/nr00 => no
289
- # /mnt/blast-db/refseq_genomic.100 => yes
290
- def multipart_database_name?(db_name)
291
- !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
292
- end
293
-
294
- def get_categories(path)
295
- path
296
- .gsub(config[:database_dir], '') # remove database_dir from path
297
- .split('/')
298
- .reject(&:empty?)[0..-2] # the first entry might be '' if database_dir does not end with /
299
- end
300
-
301
- # Returns true if first character of the file is '>'.
302
- def probably_fasta?(file)
303
- return false unless file.match(/((cds)|(fasta)|(fna)|(pep)|(cdna)|(fa)|(prot)|(fas)|(genome)|(nuc)|(dna)|(nt))$/i)
304
- File.read(file, 1) == '>'
305
- end
306
-
307
- # Suggests improved titles when generating database names from files
308
- # for improved apperance and readability in web interface.
309
- # For example:
310
- # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
311
- # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
312
- def make_db_title(path)
313
- db_name = File.basename(path)
314
- db_name.tr!('"', "'")
315
- # removes .fasta like extension names
316
- db_name.gsub!(File.extname(db_name), '')
317
- # replaces _ with ' ',
318
- db_name.gsub!(/(_)/, ' ')
319
- # replaces '.' with ' ' when no numbers are on either side,
320
- db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
321
- # preserves version numbers
322
- db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
323
- db_name
324
- end
325
-
326
- # Guess whether FASTA file contains protein or nucleotide sequences by
327
- # sampling a few few characters of the file.
328
- def guess_sequence_type_in_fasta(file)
329
- sequences = sample_sequences(file)
330
- sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
331
- sequence_types = sequence_types.uniq.compact
332
- (sequence_types.length == 1) && sequence_types.first
333
- end
334
-
335
- # Read first 1,048,576 characters of the file, split the read text on
336
- # fasta def line pattern and return.
337
- #
338
- # If the given file is FASTA, returns Array of as many different
339
- # sequences in the portion of the file read. Returns the portion
340
- # of the file read wrapped in an Array otherwise.
341
- def sample_sequences(file)
342
- File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
343
- end
344
- end
345
- end
Binary file
@@ -1,36 +0,0 @@
1
- /* eslint-disable no-unused-vars */
2
- /* eslint-disable no-undef */
3
- import { render, screen, fireEvent } from '@testing-library/react';
4
- import { Form } from '../form';
5
- import { AMINO_ACID_SEQUENCE } from './mock_data/sequences';
6
- import data from './mock_data/databases.json';
7
- import userEvent from '@testing-library/user-event';
8
- import '@testing-library/jest-dom/extend-expect';
9
- import '@testing-library/react/dont-cleanup-after-each';
10
-
11
- export const setMockJSONResult = (result) => {
12
- global.$.getJSON = (_, cb) => cb(result);
13
- };
14
- describe('ADVANCED PARAMETERS', () => {
15
- const getInputElement = () => screen.getByRole('textbox', { name: '' });
16
- test('should not render the link to advanced parameters modal if blast algorithm is unknown', () => {
17
- setMockJSONResult(data);
18
- const {container } =render(<Form onSequenceTypeChanged={() => { }
19
- } />);
20
- const modalButton = container.querySelector('[data-target="#help"]');
21
- expect(modalButton).toBeNull();
22
- });
23
- test('should render the link to advanced parameters modal if blast algorithm is known', () => {
24
- setMockJSONResult(data);
25
- const {container } =render(<Form onSequenceTypeChanged={() => { }
26
- } />);
27
-
28
- const inputEl = getInputElement();
29
- // populate search and select dbs to determine blast algorithm
30
- fireEvent.change(inputEl, { target: { value: AMINO_ACID_SEQUENCE } });
31
- const proteinSelectAllBtn = screen.getByRole('heading', { name: /protein databases/i }).parentElement.querySelector('button');
32
- fireEvent.click(proteinSelectAllBtn);
33
- const modalButton = container.querySelector('[data-target="#help"]');
34
- expect(modalButton).not.toBeNull();
35
- });
36
- });