sequenceserver 3.0.1 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/sequenceserver +2 -2
- data/lib/sequenceserver/api_errors.rb +56 -2
- data/lib/sequenceserver/blast/job.rb +20 -3
- data/lib/sequenceserver/blast/report.rb +74 -86
- data/lib/sequenceserver/blast/tasks.rb +38 -0
- data/lib/sequenceserver/blast.rb +6 -0
- data/lib/sequenceserver/config.rb +54 -20
- data/lib/sequenceserver/database.rb +13 -0
- data/lib/sequenceserver/makeblastdb.rb +16 -2
- data/lib/sequenceserver/report.rb +0 -6
- data/lib/sequenceserver/routes.rb +66 -25
- data/lib/sequenceserver/sequence.rb +34 -7
- data/lib/sequenceserver/server.rb +1 -1
- data/lib/sequenceserver/version.rb +1 -1
- data/lib/sequenceserver.rb +1 -1
- data/public/404.html +1 -1
- data/public/css/app.css +121 -0
- data/public/css/app.min.css +1 -0
- data/public/css/sequenceserver.css +0 -148
- data/public/css/sequenceserver.min.css +3 -3
- data/public/js/circos.js +2 -2
- data/public/js/collapse_preferences.js +37 -0
- data/public/js/databases.js +65 -37
- data/public/js/databases_tree.js +2 -1
- data/public/js/dnd.js +37 -50
- data/public/js/download_fasta.js +1 -0
- data/public/js/form.js +79 -50
- data/public/js/grapher.js +23 -37
- data/public/js/hits_overview.js +2 -2
- data/public/js/kablammo.js +2 -2
- data/public/js/length_distribution.js +3 -3
- data/public/js/null_plugins/grapher/histogram.js +25 -0
- data/public/js/null_plugins/options.js +3 -0
- data/public/js/null_plugins/query_stats.js +11 -0
- data/public/js/null_plugins/report_plugins.js +6 -1
- data/public/js/null_plugins/search_header_plugin.js +4 -0
- data/public/js/options.js +161 -56
- data/public/js/query.js +85 -59
- data/public/js/report.js +1 -1
- data/public/js/search.js +2 -0
- data/public/js/search_button.js +67 -56
- data/public/js/sidebar.js +10 -1
- data/public/js/tests/database.spec.js +5 -5
- data/public/js/tests/form.spec.js +98 -0
- data/public/js/tests/mock_data/databases.json +5 -5
- data/public/js/tests/mocks/circos.js +6 -0
- data/public/js/tests/report.spec.js +4 -3
- data/public/js/tests/search_query.spec.js +16 -6
- data/public/sequenceserver-report.min.js +46 -24
- data/public/sequenceserver-search.min.js +57 -13
- data/public/sequenceserver_logo.webp +0 -0
- data/views/blastn_options.erb +66 -66
- data/views/blastp_options.erb +59 -59
- data/views/blastx_options.erb +68 -68
- data/views/layout.erb +61 -3
- data/views/search.erb +33 -38
- data/views/search_layout.erb +153 -0
- data/views/tblastn_options.erb +57 -57
- data/views/tblastx_options.erb +64 -64
- metadata +51 -22
- data/lib/sequenceserver/makeblastdb-modified-with-cache.rb +0 -345
- data/public/SequenceServer_logo.png +0 -0
- data/public/js/tests/advanced_parameters.spec.js +0 -36
@@ -1,345 +0,0 @@
|
|
1
|
-
require 'find'
|
2
|
-
require 'forwardable'
|
3
|
-
|
4
|
-
module SequenceServer
|
5
|
-
# Smart `makeblastdb` wrapper: recursively scans database directory determining
|
6
|
-
# which files need to be formatted or re-formatted.
|
7
|
-
#
|
8
|
-
# Example usage:
|
9
|
-
#
|
10
|
-
# makeblastdb = MAKEBLASTDB.new(database_dir)
|
11
|
-
# makeblastdb.scan && makeblastdb.run
|
12
|
-
#
|
13
|
-
class MAKEBLASTDB
|
14
|
-
extend Forwardable
|
15
|
-
|
16
|
-
def_delegators SequenceServer, :config, :sys, :logger
|
17
|
-
|
18
|
-
def initialize(database_dir)
|
19
|
-
@database_dir = database_dir
|
20
|
-
end
|
21
|
-
|
22
|
-
attr_reader :database_dir
|
23
|
-
attr_reader :formatted_fastas
|
24
|
-
attr_reader :fastas_to_format
|
25
|
-
attr_reader :fastas_to_reformat
|
26
|
-
|
27
|
-
# Scans the database directory to determine which FASTA files require
|
28
|
-
# formatting or re-formatting.
|
29
|
-
#
|
30
|
-
# Returns `true` if there are files to (re-)format, `false` otherwise.
|
31
|
-
def scan
|
32
|
-
# We need to know the list of formatted FASTAs as reported by blastdbcmd
|
33
|
-
# first. This is required to determine both unformatted FASTAs and those
|
34
|
-
# that require reformatting.
|
35
|
-
@formatted_fastas = []
|
36
|
-
determine_formatted_fastas
|
37
|
-
|
38
|
-
# Now determine FASTA files that are unformatted or require reformatting.
|
39
|
-
@fastas_to_format = []
|
40
|
-
determine_unformatted_fastas
|
41
|
-
@fastas_to_reformat = []
|
42
|
-
determine_fastas_to_reformat
|
43
|
-
|
44
|
-
# Return true if there are files to be (re-)formatted or false otherwise.
|
45
|
-
!@fastas_to_format.empty? || !@fastas_to_reformat.empty?
|
46
|
-
end
|
47
|
-
|
48
|
-
# Returns true if at least one database in database directory is formatted.
|
49
|
-
def any_formatted?
|
50
|
-
!@formatted_fastas.empty?
|
51
|
-
end
|
52
|
-
|
53
|
-
# Returns true if there is at least one unformatted FASTA in the databases
|
54
|
-
# directory.
|
55
|
-
def any_unformatted?
|
56
|
-
!@fastas_to_format.empty?
|
57
|
-
end
|
58
|
-
|
59
|
-
# Returns true if the databases directory contains one or more incompatible
|
60
|
-
# databases.
|
61
|
-
#
|
62
|
-
# Note that it is okay to only use V4 databases or only V5 databases.
|
63
|
-
# Incompatibility arises when they are mixed.
|
64
|
-
def any_incompatible?
|
65
|
-
return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? }
|
66
|
-
return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? }
|
67
|
-
true
|
68
|
-
end
|
69
|
-
|
70
|
-
# Runs makeblastdb on each file in `@fastas_to_format` and
|
71
|
-
# `@fastas_to_reformat`. Will do nothing unless `#scan`
|
72
|
-
# has been run before.
|
73
|
-
def run
|
74
|
-
format
|
75
|
-
reformat
|
76
|
-
end
|
77
|
-
|
78
|
-
# Format any unformatted FASTA files in database directory. Returns Array
|
79
|
-
# of files that were formatted.
|
80
|
-
def format
|
81
|
-
# Make the intent clear as well as ensure the program won't crash if we
|
82
|
-
# accidentally call format before calling scan.
|
83
|
-
return unless @fastas_to_format
|
84
|
-
@fastas_to_format.select do |path, title, type|
|
85
|
-
make_blast_database('format', path, title, type)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
# Re-format databases that require reformatting. Returns Array of files
|
90
|
-
# that were reformatted.
|
91
|
-
def reformat
|
92
|
-
# Make the intent clear as well as ensure the program won't crash if
|
93
|
-
# we accidentally call reformat before calling scan.
|
94
|
-
return unless @fastas_to_reformat
|
95
|
-
@fastas_to_reformat.select do |path, title, type, non_parse_seqids|
|
96
|
-
make_blast_database('reformat', path, title, type, non_parse_seqids)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
private
|
101
|
-
|
102
|
-
# Determines which FASTA files in the database directory are already
|
103
|
-
# formatted. Adds to @formatted_fastas.
|
104
|
-
def determine_formatted_fastas
|
105
|
-
blastdbcmd.each_line do |line|
|
106
|
-
path, *rest = line.chomp.split("\t")
|
107
|
-
next if multipart_database_name?(path)
|
108
|
-
rest << get_categories(path)
|
109
|
-
@formatted_fastas << Database.new(path, *rest)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Determines which FASTA files in the database directory require
|
114
|
-
# reformatting. Adds to @fastas_to_format.
|
115
|
-
def determine_fastas_to_reformat
|
116
|
-
@formatted_fastas.each do |ff|
|
117
|
-
if ff.v4? || ff.non_parse_seqids?
|
118
|
-
@fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?]
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# Determines which FASTA files in the database directory are
|
124
|
-
# unformatted. Adds to @fastas_to_format.
|
125
|
-
def determine_unformatted_fastas
|
126
|
-
# Add a trailing slash to database_dir - Find.find doesn't work as
|
127
|
-
# expected without the trailing slash if database_dir is a symlink
|
128
|
-
# inside a docker container.
|
129
|
-
Find.find(database_dir + '/') do |path|
|
130
|
-
next if File.directory?(path)
|
131
|
-
next unless probably_fasta?(path)
|
132
|
-
next if @formatted_fastas.any? { |f| f[0] == path }
|
133
|
-
|
134
|
-
@fastas_to_format << [path,
|
135
|
-
make_db_title(path),
|
136
|
-
guess_sequence_type_in_fasta(path)]
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Runs `blastdbcmd` to determine formatted FASTA files in the database
|
141
|
-
# directory. Returns the output of `blastdbcmd`. This method is called
|
142
|
-
# by `determine_formatted_fastas`.
|
143
|
-
def blastdbcmd
|
144
|
-
# calculate checksum of database directory
|
145
|
-
current_db_checksum = Zlib::crc32(Dir.glob(File.join(config[:database_dir], '/**/*')).map {
|
146
|
-
|path| path.to_s + "_" + File.mtime(path).to_s + "_" + File.size(path).to_s
|
147
|
-
}.to_s)
|
148
|
-
|
149
|
-
checksum_path = config[:database_dir].chomp('/') + '.checksum'
|
150
|
-
index_path = config[:database_dir].chomp('/') + '.index'
|
151
|
-
|
152
|
-
if File.exists?(checksum_path)
|
153
|
-
if current_db_checksum == File.read(checksum_path).to_i # db directory hasn't changed
|
154
|
-
if File.exists?(index_path) # lets use existing index
|
155
|
-
logger.info "Using existing database index: #{index_path}"
|
156
|
-
return File.read(index_path)
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end rescue logger.error "Could not read: #{checksum_path} or #{index_path}"
|
160
|
-
|
161
|
-
# database directory has changed, or index file doesn't exist
|
162
|
-
# thus we run blastdbcmd to get formatted FASTA files
|
163
|
-
logger.info "Scanning for BLAST databases & creating index"
|
164
|
-
cmd = "blastdbcmd -recursive -list #{config[:database_dir]}" \
|
165
|
-
' -list_outfmt "%f %t %p %n %l %d %v"'
|
166
|
-
out, err = sys(cmd, path: config[:bin])
|
167
|
-
errpat = /BLAST Database error/
|
168
|
-
fail BLAST_DATABASE_ERROR.new(cmd, err) if err.match(errpat)
|
169
|
-
|
170
|
-
# write checksum and index to file
|
171
|
-
File.open(checksum_path, 'w') { |f| f.write(current_db_checksum) } rescue
|
172
|
-
logger.error "Could not write database checksum to file" + checksum_path
|
173
|
-
File.open(index_path, 'w') { |f| f.write(out) } rescue
|
174
|
-
logger.error "Could not write database index to file" + index_path
|
175
|
-
|
176
|
-
return out
|
177
|
-
rescue CommandFailed => e
|
178
|
-
fail BLAST_DATABASE_ERROR.new(cmd, e.stderr)
|
179
|
-
end
|
180
|
-
|
181
|
-
# Create BLAST database, given FASTA file and sequence type in FASTA file.
|
182
|
-
def make_blast_database(action, file, title, type, non_parse_seqids = false)
|
183
|
-
return unless make_blast_database?(action, file, type)
|
184
|
-
title = confirm_database_title(title)
|
185
|
-
extract_fasta(file) unless File.exist?(file)
|
186
|
-
taxonomy = taxid_map(file, non_parse_seqids) || taxid
|
187
|
-
_make_blast_database(file, type, title, taxonomy)
|
188
|
-
end
|
189
|
-
|
190
|
-
# Show file path and guessed sequence type to the user and obtain a y/n
|
191
|
-
# response.
|
192
|
-
#
|
193
|
-
# Returns true if the user entered anything but 'n' or 'N'.
|
194
|
-
def make_blast_database?(action, file, type)
|
195
|
-
puts
|
196
|
-
puts
|
197
|
-
puts "FASTA file to #{action}: #{file}"
|
198
|
-
puts "FASTA type: #{type}"
|
199
|
-
print 'Proceed? [y/n] (Default: y): '
|
200
|
-
response = STDIN.gets.to_s.strip
|
201
|
-
!response.match(/n/i)
|
202
|
-
end
|
203
|
-
|
204
|
-
# Show the database title that we are going to use to the user for
|
205
|
-
# confirmation.
|
206
|
-
#
|
207
|
-
# Returns user input if any. Auto-determined title otherwise.
|
208
|
-
def confirm_database_title(default)
|
209
|
-
print "Enter a database title or will use '#{default}': "
|
210
|
-
from_user = STDIN.gets.to_s.strip
|
211
|
-
from_user.empty? && default || from_user
|
212
|
-
end
|
213
|
-
|
214
|
-
# Check if a '.taxid_map.txt' file exists. If not, try getting it
|
215
|
-
# using blastdbcmd.
|
216
|
-
def taxid_map(db, non_parse_seqids)
|
217
|
-
return if non_parse_seqids
|
218
|
-
taxid_map = db.sub(/#{File.extname(db)}$/, '.taxid_map.txt')
|
219
|
-
extract_taxid_map(db, taxid_map) if !File.exist?(taxid_map)
|
220
|
-
"-taxid_map #{taxid_map}" if !File.zero?(taxid_map)
|
221
|
-
end
|
222
|
-
|
223
|
-
# Get taxid from the user. Returns user input or 0.
|
224
|
-
#
|
225
|
-
# Using 0 as taxid is equivalent to not setting taxid for the database
|
226
|
-
# that will be created.
|
227
|
-
def taxid
|
228
|
-
default = 0
|
229
|
-
print 'Enter taxid (optional): '
|
230
|
-
user_response = STDIN.gets.strip
|
231
|
-
"-taxid #{user_response.empty? && default || Integer(user_response)}"
|
232
|
-
rescue ArgumentError # presumably from call to Interger()
|
233
|
-
puts 'taxid should be a number'
|
234
|
-
retry
|
235
|
-
end
|
236
|
-
|
237
|
-
def _make_blast_database(file, type, title, taxonomy)
|
238
|
-
cmd = "makeblastdb -parse_seqids -hash_index -in '#{file}'" \
|
239
|
-
" -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
|
240
|
-
" #{taxonomy}"
|
241
|
-
out, err = sys(cmd, path: config[:bin])
|
242
|
-
puts out.strip
|
243
|
-
puts err.strip
|
244
|
-
return true
|
245
|
-
rescue CommandFailed => e
|
246
|
-
puts <<~MSG
|
247
|
-
Could not create BLAST database for: #{file}
|
248
|
-
Tried: #{cmd}
|
249
|
-
stdout: #{e.stdout}
|
250
|
-
stderr: #{e.stderr}
|
251
|
-
MSG
|
252
|
-
exit!
|
253
|
-
end
|
254
|
-
|
255
|
-
# Extract FASTA file from BLAST database.
|
256
|
-
#
|
257
|
-
# Invoked while reformatting a BLAST database if the corresponding
|
258
|
-
# FASTA file does not exist.
|
259
|
-
def extract_fasta(db)
|
260
|
-
puts
|
261
|
-
puts 'Extracting sequences ...'
|
262
|
-
cmd = "blastdbcmd -entry all -db #{db}"
|
263
|
-
sys(cmd, stdout: db, path: config[:bin])
|
264
|
-
rescue CommandFailed => e
|
265
|
-
puts <<~MSG
|
266
|
-
Could not extract sequences from: #{db}
|
267
|
-
Tried: #{cmd}
|
268
|
-
stdout: #{e.stdout}
|
269
|
-
stderr: #{e.stderr}
|
270
|
-
MSG
|
271
|
-
exit!
|
272
|
-
end
|
273
|
-
|
274
|
-
def extract_taxid_map(db, taxmap_file)
|
275
|
-
cmd = "blastdbcmd -entry all -db #{db} -outfmt '%i %T'"
|
276
|
-
sys(cmd, stdout: taxmap_file, path: config[:bin])
|
277
|
-
rescue CommandFailed => e
|
278
|
-
# silence error
|
279
|
-
end
|
280
|
-
|
281
|
-
# Returns true if the database name appears to be a multi-part database
|
282
|
-
# name.
|
283
|
-
#
|
284
|
-
# e.g.
|
285
|
-
# /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
|
286
|
-
# /home/ben/pd.ben/sequenceserver/db/nr => no
|
287
|
-
# /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
|
288
|
-
# /home/ben/pd.ben/sequenceserver/db/nr00 => no
|
289
|
-
# /mnt/blast-db/refseq_genomic.100 => yes
|
290
|
-
def multipart_database_name?(db_name)
|
291
|
-
!(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
|
292
|
-
end
|
293
|
-
|
294
|
-
def get_categories(path)
|
295
|
-
path
|
296
|
-
.gsub(config[:database_dir], '') # remove database_dir from path
|
297
|
-
.split('/')
|
298
|
-
.reject(&:empty?)[0..-2] # the first entry might be '' if database_dir does not end with /
|
299
|
-
end
|
300
|
-
|
301
|
-
# Returns true if first character of the file is '>'.
|
302
|
-
def probably_fasta?(file)
|
303
|
-
return false unless file.match(/((cds)|(fasta)|(fna)|(pep)|(cdna)|(fa)|(prot)|(fas)|(genome)|(nuc)|(dna)|(nt))$/i)
|
304
|
-
File.read(file, 1) == '>'
|
305
|
-
end
|
306
|
-
|
307
|
-
# Suggests improved titles when generating database names from files
|
308
|
-
# for improved apperance and readability in web interface.
|
309
|
-
# For example:
|
310
|
-
# Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
|
311
|
-
# S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
|
312
|
-
def make_db_title(path)
|
313
|
-
db_name = File.basename(path)
|
314
|
-
db_name.tr!('"', "'")
|
315
|
-
# removes .fasta like extension names
|
316
|
-
db_name.gsub!(File.extname(db_name), '')
|
317
|
-
# replaces _ with ' ',
|
318
|
-
db_name.gsub!(/(_)/, ' ')
|
319
|
-
# replaces '.' with ' ' when no numbers are on either side,
|
320
|
-
db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
|
321
|
-
# preserves version numbers
|
322
|
-
db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
|
323
|
-
db_name
|
324
|
-
end
|
325
|
-
|
326
|
-
# Guess whether FASTA file contains protein or nucleotide sequences by
|
327
|
-
# sampling a few few characters of the file.
|
328
|
-
def guess_sequence_type_in_fasta(file)
|
329
|
-
sequences = sample_sequences(file)
|
330
|
-
sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
|
331
|
-
sequence_types = sequence_types.uniq.compact
|
332
|
-
(sequence_types.length == 1) && sequence_types.first
|
333
|
-
end
|
334
|
-
|
335
|
-
# Read first 1,048,576 characters of the file, split the read text on
|
336
|
-
# fasta def line pattern and return.
|
337
|
-
#
|
338
|
-
# If the given file is FASTA, returns Array of as many different
|
339
|
-
# sequences in the portion of the file read. Returns the portion
|
340
|
-
# of the file read wrapped in an Array otherwise.
|
341
|
-
def sample_sequences(file)
|
342
|
-
File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
|
343
|
-
end
|
344
|
-
end
|
345
|
-
end
|
Binary file
|
@@ -1,36 +0,0 @@
|
|
1
|
-
/* eslint-disable no-unused-vars */
|
2
|
-
/* eslint-disable no-undef */
|
3
|
-
import { render, screen, fireEvent } from '@testing-library/react';
|
4
|
-
import { Form } from '../form';
|
5
|
-
import { AMINO_ACID_SEQUENCE } from './mock_data/sequences';
|
6
|
-
import data from './mock_data/databases.json';
|
7
|
-
import userEvent from '@testing-library/user-event';
|
8
|
-
import '@testing-library/jest-dom/extend-expect';
|
9
|
-
import '@testing-library/react/dont-cleanup-after-each';
|
10
|
-
|
11
|
-
export const setMockJSONResult = (result) => {
|
12
|
-
global.$.getJSON = (_, cb) => cb(result);
|
13
|
-
};
|
14
|
-
describe('ADVANCED PARAMETERS', () => {
|
15
|
-
const getInputElement = () => screen.getByRole('textbox', { name: '' });
|
16
|
-
test('should not render the link to advanced parameters modal if blast algorithm is unknown', () => {
|
17
|
-
setMockJSONResult(data);
|
18
|
-
const {container } =render(<Form onSequenceTypeChanged={() => { }
|
19
|
-
} />);
|
20
|
-
const modalButton = container.querySelector('[data-target="#help"]');
|
21
|
-
expect(modalButton).toBeNull();
|
22
|
-
});
|
23
|
-
test('should render the link to advanced parameters modal if blast algorithm is known', () => {
|
24
|
-
setMockJSONResult(data);
|
25
|
-
const {container } =render(<Form onSequenceTypeChanged={() => { }
|
26
|
-
} />);
|
27
|
-
|
28
|
-
const inputEl = getInputElement();
|
29
|
-
// populate search and select dbs to determine blast algorithm
|
30
|
-
fireEvent.change(inputEl, { target: { value: AMINO_ACID_SEQUENCE } });
|
31
|
-
const proteinSelectAllBtn = screen.getByRole('heading', { name: /protein databases/i }).parentElement.querySelector('button');
|
32
|
-
fireEvent.click(proteinSelectAllBtn);
|
33
|
-
const modalButton = container.querySelector('[data-target="#help"]');
|
34
|
-
expect(modalButton).not.toBeNull();
|
35
|
-
});
|
36
|
-
});
|