sequenceserver 2.0.0.rc2 → 2.0.0.rc7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sequenceserver might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.dockerignore +1 -0
- data/Dockerfile +14 -12
- data/bin/sequenceserver +10 -27
- data/lib/sequenceserver.rb +42 -21
- data/lib/sequenceserver/blast/job.rb +11 -1
- data/lib/sequenceserver/database.rb +0 -126
- data/lib/sequenceserver/makeblastdb.rb +243 -0
- data/lib/sequenceserver/routes.rb +5 -0
- data/lib/sequenceserver/sequence.rb +1 -1
- data/lib/sequenceserver/version.rb +1 -1
- data/public/css/sequenceserver.css +2 -1
- data/public/css/sequenceserver.min.css +1 -1
- data/public/js/error_modal.js +27 -29
- data/public/js/hit.js +14 -5
- data/public/js/query.js +31 -15
- data/public/js/report.js +13 -5
- data/public/js/search.js +4 -6
- data/public/js/sequence_modal.js +10 -5
- data/public/js/sidebar.js +2 -2
- data/public/sequenceserver-report.min.js +16 -16
- data/public/sequenceserver-search.min.js +1 -1
- data/spec/blast_versions/blast_2.2.30/import_spec_capybara_local_2.2.30.rb +10 -10
- data/spec/blast_versions/blast_2.2.31/import_spec_capybara_local_2.2.31.rb +10 -10
- data/spec/blast_versions/blast_2.3.0/import_spec_capybara_local_2.3.0.rb +10 -10
- data/spec/blast_versions/blast_2.4.0/import_spec_capybara_local_2.4.0.rb +10 -10
- data/spec/blast_versions/blast_2.5.0/import_spec_capybara_local_2.5.0.rb +10 -10
- data/spec/blast_versions/blast_2.6.0/import_spec_capybara_local_2.6.0.rb +10 -10
- data/spec/blast_versions/blast_2.7.1/import_spec_capybara_local_2.7.1.rb +10 -10
- data/spec/blast_versions/blast_2.8.1/import_spec_capybara_local_2.8.1.rb +10 -10
- data/spec/blast_versions/blast_2.9.0/import_spec_capybara_local_2.9.0.rb +10 -10
- data/spec/blast_versions/diamond_0.9.24/import_spec_capybara_local_0.9.24.rb +4 -4
- data/spec/capybara_spec.rb +11 -0
- data/spec/database/funky_ids/{funky_ids.fa.nhd → v4/funky_ids.fa.nhd} +0 -0
- data/spec/database/funky_ids/{funky_ids.fa.nhi → v4/funky_ids.fa.nhi} +0 -0
- data/spec/database/funky_ids/{funky_ids.fa.nhr → v4/funky_ids.fa.nhr} +0 -0
- data/spec/database/funky_ids/v4/funky_ids.fa.nin +0 -0
- data/spec/database/funky_ids/{funky_ids.fa.nog → v4/funky_ids.fa.nog} +0 -0
- data/spec/database/funky_ids/{funky_ids.fa.nsd → v4/funky_ids.fa.nsd} +0 -0
- data/spec/database/funky_ids/{funky_ids.fa.nsi → v4/funky_ids.fa.nsi} +0 -0
- data/spec/database/funky_ids/{funky_ids.fa.nsq → v4/funky_ids.fa.nsq} +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.ndb +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nhd +8 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nhi +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nhr +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nin +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nog +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nos +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.not +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nsq +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.ntf +0 -0
- data/spec/database/funky_ids/v5/funky_ids.fa.nto +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ndb +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nos +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.not +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ntf +0 -0
- data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nto +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pdb +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pos +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pot +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.ptf +0 -0
- data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pto +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pdb +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pos +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pot +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.ptf +0 -0
- data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pto +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ndb +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nos +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.not +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ntf +0 -0
- data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nto +0 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhd +8 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhi +0 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nog +0 -0
- data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsd +0 -0
- data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsi +0 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsq +0 -0
- data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.txt +8 -0
- data/spec/database/v4/links.rb +23 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta +6449 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phd +1189 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phi +0 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pog +0 -0
- data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psd +0 -0
- data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psi +0 -0
- data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
- data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phd +9140 -0
- data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phi +0 -0
- data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
- data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
- data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pog +0 -0
- data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psd +0 -0
- data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psi +0 -0
- data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psq +0 -0
- data/spec/database/v4/proteins/uniprot/URL +1 -0
- data/spec/database/v4/si_uniprot_idmap.yml +14180 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhd +473 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhi +0 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nog +0 -0
- data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsd +0 -0
- data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsi +0 -0
- data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
- data/spec/database_spec.rb +0 -76
- data/spec/makeblastdb_spec.rb +121 -0
- data/spec/sequence_spec.rb +2 -3
- data/views/layout.erb +4 -0
- metadata +86 -21
- data/spec/database/funky_ids/funky_ids.fa.nin +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f7be0b03d87a6322d843166c7cdcf1aea0236fdd82a932b9eeda936a43f4a53
|
4
|
+
data.tar.gz: eb13c9ab355e3424e979606273606c6334ee203b649126d9d2860cdbb7d76ee4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9183f80e0a6909e9e88b4f0416eeaac5ef50e4d75777764f82278b9377bbd5720f34c82cca9aff446eeb735ab79ef69872b8b6e8feabca161b8e81b52e995e4f
|
7
|
+
data.tar.gz: 99375aca038464b785883f12922656d5733d6cf114c37f62b83bc1c222f371b0b491ced709e678b2c07e52d8bae479ee8cdabdbaf06f1f91add2074f1e8652e5
|
data/.dockerignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
_site
|
data/Dockerfile
CHANGED
@@ -1,23 +1,25 @@
|
|
1
|
-
FROM debian:
|
1
|
+
FROM debian:buster-slim
|
2
2
|
|
3
3
|
LABEL Description="Intuitive local web frontend for the BLAST bioinformatics tool"
|
4
4
|
LABEL MailingList="https://groups.google.com/forum/#!forum/sequenceserver"
|
5
5
|
LABEL Website="http://www.sequenceserver.com"
|
6
|
-
LABEL Version="1.1.0 beta"
|
7
6
|
|
8
|
-
RUN apt-get update
|
9
|
-
build-essential \
|
10
|
-
|
11
|
-
curl wget \
|
12
|
-
gnupg \
|
13
|
-
git \
|
14
|
-
zlib1g-dev
|
7
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
8
|
+
ruby ruby-dev build-essential curl gnupg git wget \
|
9
|
+
zlib1g-dev && rm -rf /var/lib/apt/lists/*
|
15
10
|
|
16
11
|
VOLUME ["/db"]
|
17
12
|
EXPOSE 4567
|
18
13
|
|
19
14
|
COPY . /sequenceserver
|
20
15
|
WORKDIR /sequenceserver
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
# Install bundler, then use bundler to install SequenceServer's dependencies,
|
17
|
+
# and then use SequenceServer to install BLAST. In the last step, -s is used
|
18
|
+
# so that SequenceServer will exit after writing configuration file instead
|
19
|
+
# of starting up, while -d is used to suppress questions about database dir.
|
20
|
+
RUN gem install bundler && \
|
21
|
+
bundle install --without=development && \
|
22
|
+
yes '' | bundle exec bin/sequenceserver -s -d spec/database/sample
|
23
|
+
RUN touch ~/.sequenceserver/asked_to_join
|
24
|
+
|
25
|
+
CMD ["bundle", "exec", "bin/sequenceserver", "-d", "/db"]
|
data/bin/sequenceserver
CHANGED
@@ -68,10 +68,6 @@ begin
|
|
68
68
|
# of threads to use in config file.
|
69
69
|
$ sequenceserver -s -n 16
|
70
70
|
|
71
|
-
# See if you have FASTA files in database dir that haven't
|
72
|
-
# been converted into BLAST database.
|
73
|
-
$ sequenceserver -u
|
74
|
-
|
75
71
|
# Search for FASTA files in database dir that haven't been
|
76
72
|
# converted into BLAST database yet, and convert them.
|
77
73
|
$ sequenceserver -m
|
@@ -135,9 +131,6 @@ begin
|
|
135
131
|
on 'l', 'list_databases',
|
136
132
|
'List BLAST databases'
|
137
133
|
|
138
|
-
on 'u', 'list-unformatted-fastas',
|
139
|
-
'List unformatted FASTA files'
|
140
|
-
|
141
134
|
on 'i', 'interactive',
|
142
135
|
'Run SequenceServer in interactive mode'
|
143
136
|
|
@@ -285,8 +278,7 @@ begin
|
|
285
278
|
fetch_option(:database_dir).value = response
|
286
279
|
redo
|
287
280
|
rescue SequenceServer::NO_BLAST_DATABASE_FOUND => e
|
288
|
-
unless list_databases? ||
|
289
|
-
make_blast_databases?
|
281
|
+
unless list_databases? || make_blast_databases?
|
290
282
|
|
291
283
|
# Print error raised.
|
292
284
|
puts
|
@@ -305,13 +297,13 @@ begin
|
|
305
297
|
unless response =~ /^[n]$/i
|
306
298
|
puts
|
307
299
|
puts 'Searching ...'
|
308
|
-
if SequenceServer
|
309
|
-
|
310
|
-
exit!
|
311
|
-
else
|
312
|
-
formatted = SequenceServer::Database.make_blast_databases
|
300
|
+
if SequenceServer.makeblastdb.scan
|
301
|
+
formatted = SequenceServer.makeblastdb.run
|
313
302
|
exit! if formatted.empty? && !set?
|
314
303
|
redo unless set?
|
304
|
+
else
|
305
|
+
puts "Couldn't find any FASTA files."
|
306
|
+
exit!
|
315
307
|
end
|
316
308
|
else
|
317
309
|
exit! unless set?
|
@@ -361,22 +353,13 @@ begin
|
|
361
353
|
exit
|
362
354
|
end
|
363
355
|
|
364
|
-
if
|
365
|
-
|
366
|
-
|
356
|
+
if make_blast_databases?
|
357
|
+
if SequenceServer.makeblastdb.scan
|
358
|
+
SequenceServer.makeblastdb.run
|
359
|
+
else
|
367
360
|
puts "All FASTA files in #{SequenceServer.config[:database_dir]} " \
|
368
361
|
'are formatted.'
|
369
|
-
exit
|
370
362
|
end
|
371
|
-
end
|
372
|
-
|
373
|
-
if list_unformatted_fastas?
|
374
|
-
puts unformatted_fastas
|
375
|
-
exit
|
376
|
-
end
|
377
|
-
|
378
|
-
if make_blast_databases?
|
379
|
-
SequenceServer::Database.make_blast_databases
|
380
363
|
exit
|
381
364
|
end
|
382
365
|
|
data/lib/sequenceserver.rb
CHANGED
@@ -4,8 +4,12 @@ require 'resolv'
|
|
4
4
|
|
5
5
|
# Top level module / namespace.
|
6
6
|
module SequenceServer
|
7
|
-
#
|
7
|
+
# The default version of BLAST that will be downloaded and configured for use.
|
8
8
|
BLAST_VERSION = '2.10.0+'.freeze
|
9
|
+
# The minimum version of BLAST that SequenceServer is happy to run with. This
|
10
|
+
# is for compatiblity with older database formats. Users will download BLAST
|
11
|
+
# themselves.
|
12
|
+
MIN_BLAST_VERSION = '2.9.0+'.freeze
|
9
13
|
|
10
14
|
# Default location of configuration file.
|
11
15
|
DEFAULT_CONFIG_FILE = '~/.sequenceserver.conf'.freeze
|
@@ -20,6 +24,7 @@ module SequenceServer
|
|
20
24
|
require 'sequenceserver/config'
|
21
25
|
require 'sequenceserver/server'
|
22
26
|
require 'sequenceserver/routes'
|
27
|
+
require 'sequenceserver/makeblastdb'
|
23
28
|
require 'sequenceserver/job_remover'
|
24
29
|
require 'sequenceserver/exceptions'
|
25
30
|
require 'sequenceserver/sys'
|
@@ -53,6 +58,11 @@ module SequenceServer
|
|
53
58
|
end
|
54
59
|
end
|
55
60
|
|
61
|
+
# MAKEBLASTDB service object.
|
62
|
+
def makeblastdb
|
63
|
+
@makeblastdb ||= MAKEBLASTDB.new(config[:database_dir])
|
64
|
+
end
|
65
|
+
|
56
66
|
# SequenceServer initialisation routine.
|
57
67
|
def init(config = {})
|
58
68
|
# Use default config file if caller didn't specify one.
|
@@ -106,7 +116,6 @@ module SequenceServer
|
|
106
116
|
|
107
117
|
# Run SequenceServer using WEBrick.
|
108
118
|
def run
|
109
|
-
check_host
|
110
119
|
Server.run(self)
|
111
120
|
rescue Errno::EADDRINUSE
|
112
121
|
puts "** Could not bind to port #{config[:port]}."
|
@@ -126,10 +135,17 @@ module SequenceServer
|
|
126
135
|
def on_start
|
127
136
|
puts '** SequenceServer is ready.'
|
128
137
|
puts " Go to #{server_url} in your browser and start BLASTing!"
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
138
|
+
if ip_address
|
139
|
+
puts ' To share your setup, try one of the following addresses. These'
|
140
|
+
puts ' may only work within your home, office, or university network.'
|
141
|
+
puts " - http://#{ip_address}:#{config[:port]}"
|
142
|
+
puts " - http://#{hostname}:#{config[:port]}" if hostname
|
143
|
+
puts ' To share your setup with anyone in the world, ask your IT team'
|
144
|
+
puts ' for a public IP address.'
|
145
|
+
puts ' To disable sharing, set :host: key in config file to 127.0.0.1'
|
146
|
+
puts ' and restart server.'
|
147
|
+
end
|
148
|
+
puts ' To terminate server, press CTRL+C'
|
133
149
|
open_in_browser(server_url)
|
134
150
|
end
|
135
151
|
|
@@ -204,16 +220,6 @@ module SequenceServer
|
|
204
220
|
raise NUM_THREADS_INCORRECT
|
205
221
|
end
|
206
222
|
|
207
|
-
# Check and warn user if host is 0.0.0.0 (default).
|
208
|
-
def check_host
|
209
|
-
# rubocop:disable Style/GuardClause
|
210
|
-
if config[:host] == '0.0.0.0'
|
211
|
-
logger.warn 'Will listen on all interfaces (0.0.0.0).' \
|
212
|
-
' Consider using 127.0.0.1 (--host option).'
|
213
|
-
end
|
214
|
-
# rubocop:enable Style/GuardClause
|
215
|
-
end
|
216
|
-
|
217
223
|
def load_extension
|
218
224
|
return unless config[:require]
|
219
225
|
|
@@ -234,7 +240,7 @@ module SequenceServer
|
|
234
240
|
end
|
235
241
|
version = out.split[1]
|
236
242
|
fail BLAST_NOT_INSTALLED_OR_NOT_EXECUTABLE if version.empty?
|
237
|
-
fail BLAST_NOT_COMPATIBLE, version unless version
|
243
|
+
fail BLAST_NOT_COMPATIBLE, version unless is_compatible(version, MIN_BLAST_VERSION)
|
238
244
|
end
|
239
245
|
|
240
246
|
def server_url
|
@@ -243,13 +249,14 @@ module SequenceServer
|
|
243
249
|
"http://#{host}:#{config[:port]}"
|
244
250
|
end
|
245
251
|
|
246
|
-
# Returns a local ip adress
|
252
|
+
# Returns a local ip adress.
|
247
253
|
def ip_address
|
248
|
-
Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }
|
254
|
+
addrinfo = Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }
|
255
|
+
addrinfo.ip_address if addrinfo
|
249
256
|
end
|
250
257
|
|
251
|
-
# Returns machine's hostname based on the local ip
|
252
|
-
#
|
258
|
+
# Returns machine's hostname based on the local ip. If hostname cannot be
|
259
|
+
# determined returns nil.
|
253
260
|
def hostname
|
254
261
|
Resolv.getname(ip_address) rescue nil
|
255
262
|
end
|
@@ -285,5 +292,19 @@ module SequenceServer
|
|
285
292
|
def command?(command)
|
286
293
|
system("which #{command} > /dev/null 2>&1")
|
287
294
|
end
|
295
|
+
|
296
|
+
# Returns true if the given version is higher than the minimum expected
|
297
|
+
# version string.
|
298
|
+
def is_compatible(given, expected)
|
299
|
+
# The speceship operator (<=>) below returns -1, 0, 1 depending on
|
300
|
+
# on whether the left operand is lower, same, or higher than the
|
301
|
+
# right operand. We want the left operand to be the same or higher.
|
302
|
+
(parse_version(given) <=> parse_version(expected)) >= 0
|
303
|
+
end
|
304
|
+
|
305
|
+
# Turn version string into an arrary of its component numbers.
|
306
|
+
def parse_version(version_string)
|
307
|
+
version_string.split('.').map(&:to_i)
|
308
|
+
end
|
288
309
|
end
|
289
310
|
end
|
@@ -65,6 +65,16 @@ module SequenceServer
|
|
65
65
|
error = IO.foreach(stderr).grep(ERROR_LINE).join
|
66
66
|
error = File.read(stderr) if error.empty?
|
67
67
|
fail InputError, error
|
68
|
+
when 2
|
69
|
+
fail InputError, <<~MSG
|
70
|
+
BLAST signalled a problem with the databases that you searched.
|
71
|
+
|
72
|
+
Most likely one or more of your databases were created using an
|
73
|
+
older version of BLAST. Please consider recreating the databases
|
74
|
+
using BLAST #{BLAST_VERSION}.
|
75
|
+
|
76
|
+
As a temporary solution, you can try searching one database at a time.
|
77
|
+
MSG
|
68
78
|
when 4
|
69
79
|
# Out of memory. User can retry with a shorter search, so raising
|
70
80
|
# InputError here instead of SystemError.
|
@@ -79,7 +89,7 @@ module SequenceServer
|
|
79
89
|
# the job. This is a SystemError.
|
80
90
|
fail SystemError, 'Ran out of disk space.'
|
81
91
|
else
|
82
|
-
# I am not sure what the exit codes
|
92
|
+
# I am not sure what the exit codes 3 means and we should not
|
83
93
|
# encounter exit code 5. The only other error that I know can happen
|
84
94
|
# but is not yet handled is when BLAST+ binaries break such as after
|
85
95
|
# macOS updates. So raise SystemError, include the exit status in the
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'find'
|
2
1
|
require 'open3'
|
3
2
|
require 'digest/md5'
|
4
3
|
require 'forwardable'
|
@@ -209,89 +208,6 @@ module SequenceServer
|
|
209
208
|
end
|
210
209
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
211
210
|
|
212
|
-
# Recursively scan `database_dir` for un-formatted FASTA and format them
|
213
|
-
# for use with BLAST+.
|
214
|
-
def make_blast_databases
|
215
|
-
unformatted_fastas.select do |file, sequence_type|
|
216
|
-
make_blast_database(file, sequence_type)
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
# Returns an Array of FASTA files that may require formatting, and the
|
221
|
-
# type of sequence contained in each FASTA.
|
222
|
-
#
|
223
|
-
# > unformatted_fastas
|
224
|
-
# => [['/foo/bar.fasta', :nulceotide], ...]
|
225
|
-
def unformatted_fastas
|
226
|
-
list = []
|
227
|
-
database_dir = config[:database_dir]
|
228
|
-
Find.find database_dir do |file|
|
229
|
-
next if File.directory? file
|
230
|
-
next if Database.include? file
|
231
|
-
next unless probably_fasta? file
|
232
|
-
sequence_type = guess_sequence_type_in_fasta file
|
233
|
-
if %i[protein nucleotide].include?(sequence_type)
|
234
|
-
list << [file, sequence_type]
|
235
|
-
end
|
236
|
-
end
|
237
|
-
list
|
238
|
-
end
|
239
|
-
|
240
|
-
# Create BLAST database, given FASTA file and sequence type in FASTA file.
|
241
|
-
def make_blast_database(file, type)
|
242
|
-
return unless make_blast_database? file, type
|
243
|
-
title = get_database_title(file)
|
244
|
-
taxid = fetch_tax_id
|
245
|
-
_make_blast_database(file, type, title, taxid)
|
246
|
-
end
|
247
|
-
|
248
|
-
def _make_blast_database(file, type, title, taxid, quiet = false)
|
249
|
-
cmd = 'makeblastdb -parse_seqids -hash_index ' \
|
250
|
-
"-in #{file} -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
|
251
|
-
" -taxid #{taxid}"
|
252
|
-
out, err = sys(cmd, path: config[:bin])
|
253
|
-
puts out, err unless quiet
|
254
|
-
end
|
255
|
-
|
256
|
-
# Show file path and guessed sequence type to the user and obtain a y/n
|
257
|
-
# response.
|
258
|
-
#
|
259
|
-
# Returns true if the user entered anything but 'n' or 'N'.
|
260
|
-
def make_blast_database?(file, type)
|
261
|
-
puts
|
262
|
-
puts
|
263
|
-
puts "FASTA file: #{file}"
|
264
|
-
puts "FASTA type: #{type}"
|
265
|
-
print 'Proceed? [y/n] (Default: y): '
|
266
|
-
response = STDIN.gets.to_s.strip
|
267
|
-
!response.match(/n/i)
|
268
|
-
end
|
269
|
-
|
270
|
-
# Generate a title for the given database and show it to the user for
|
271
|
-
# confirmation.
|
272
|
-
#
|
273
|
-
# Returns user input if any. Auto-generated title otherwise.
|
274
|
-
def get_database_title(path)
|
275
|
-
default = make_db_title(File.basename(path))
|
276
|
-
print "Enter a database title or will use '#{default}': "
|
277
|
-
from_user = STDIN.gets.to_s.strip
|
278
|
-
from_user.empty? && default || from_user
|
279
|
-
end
|
280
|
-
|
281
|
-
# Get taxid from the user. Returns user input or 0.
|
282
|
-
#
|
283
|
-
# Using 0 as taxid is equivalent to not setting taxid for the database
|
284
|
-
# that will be created.
|
285
|
-
def fetch_tax_id
|
286
|
-
default = 0
|
287
|
-
print 'Enter taxid (optional): '
|
288
|
-
user_response = STDIN.gets.strip
|
289
|
-
user_response.empty? && default || Integer(user_response)
|
290
|
-
rescue
|
291
|
-
puts 'taxid should be a number'
|
292
|
-
retry
|
293
|
-
end
|
294
|
-
|
295
211
|
# Returns true if the database name appears to be a multi-part database
|
296
212
|
# name.
|
297
213
|
#
|
@@ -304,48 +220,6 @@ module SequenceServer
|
|
304
220
|
def multipart_database_name?(db_name)
|
305
221
|
!(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
|
306
222
|
end
|
307
|
-
|
308
|
-
# Returns true if first character of the file is '>'.
|
309
|
-
def probably_fasta?(file)
|
310
|
-
File.read(file, 1) == '>'
|
311
|
-
end
|
312
|
-
|
313
|
-
# Suggests improved titles when generating database names from files
|
314
|
-
# for improved apperance and readability in web interface.
|
315
|
-
# For example:
|
316
|
-
# Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
|
317
|
-
# S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
|
318
|
-
def make_db_title(db_name)
|
319
|
-
db_name.tr!('"', "'")
|
320
|
-
# removes .fasta like extension names
|
321
|
-
db_name.gsub!(File.extname(db_name), '')
|
322
|
-
# replaces _ with ' ',
|
323
|
-
db_name.gsub!(/(_)/, ' ')
|
324
|
-
# replaces '.' with ' ' when no numbers are on either side,
|
325
|
-
db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
|
326
|
-
# preserves version numbers
|
327
|
-
db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
|
328
|
-
db_name
|
329
|
-
end
|
330
|
-
|
331
|
-
# Guess whether FASTA file contains protein or nucleotide sequences by
|
332
|
-
# sampling a few few characters of the file.
|
333
|
-
def guess_sequence_type_in_fasta(file)
|
334
|
-
sequences = sample_sequences(file)
|
335
|
-
sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
|
336
|
-
sequence_types = sequence_types.uniq.compact
|
337
|
-
(sequence_types.length == 1) && sequence_types.first
|
338
|
-
end
|
339
|
-
|
340
|
-
# Read first 1,048,576 characters of the file, split the read text on
|
341
|
-
# fasta def line pattern and return.
|
342
|
-
#
|
343
|
-
# If the given file is FASTA, returns Array of as many different
|
344
|
-
# sequences in the portion of the file read. Returns the portion
|
345
|
-
# of the file read wrapped in an Array otherwise.
|
346
|
-
def sample_sequences(file)
|
347
|
-
File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
|
348
|
-
end
|
349
223
|
end
|
350
224
|
end
|
351
225
|
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
require 'find'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module SequenceServer
|
5
|
+
# Smart `makeblastdb` wrapper: recursively scans database directory determining
|
6
|
+
# which files need to be formatted or re-formatted.
|
7
|
+
#
|
8
|
+
# Example usage:
|
9
|
+
#
|
10
|
+
# makeblastdb = MAKEBLASTDB.new(database_dir)
|
11
|
+
# makeblastdb.scan && makeblastdb.run
|
12
|
+
#
|
13
|
+
class MAKEBLASTDB
|
14
|
+
# We want V5 databases created using -parse_seqids for proper function of
|
15
|
+
# SequenceServer. This means each database should be comprised of at least 9
|
16
|
+
# files with the following extensions. Databases created by us will have two
|
17
|
+
# additional files with the extensions nhd and nhi, or phd and phi, due to
|
18
|
+
# the use of -hash_index option. Finally, multipart databases will have one
|
19
|
+
# additional file with the extension nal or pal.
|
20
|
+
REQUIRED_EXTENSIONS = {
|
21
|
+
'nucleotide' => %w{ndb nhr nin nog nos not nsq ntf nto}.freeze,
|
22
|
+
'protein' => %w{pdb phr pin pog pos pot psq ptf pto}.freeze
|
23
|
+
}
|
24
|
+
|
25
|
+
extend Forwardable
|
26
|
+
|
27
|
+
def_delegators SequenceServer, :config, :sys
|
28
|
+
|
29
|
+
def initialize(database_dir)
|
30
|
+
@database_dir = database_dir
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :database_dir
|
34
|
+
|
35
|
+
# Scans the database directory to determine which FASTA files require
|
36
|
+
# formatting or re-formatting.
|
37
|
+
#
|
38
|
+
# Returns `true` if there are files to (re-)format, `false` otherwise.
|
39
|
+
def scan
|
40
|
+
# We need to know the list of formatted FASTAs as reported by blastdbcmd
|
41
|
+
# first. This is required to determine both unformatted FASTAs and those
|
42
|
+
# that require reformatting.
|
43
|
+
@formatted_fastas = []
|
44
|
+
determine_formatted_fastas
|
45
|
+
|
46
|
+
# Now determine FASTA files that are unformatted or require reformatting.
|
47
|
+
@fastas_to_format = []
|
48
|
+
determine_unformatted_fastas
|
49
|
+
determine_fastas_to_reformat
|
50
|
+
|
51
|
+
# Return true if there are files to be (re-)formatted or false otherwise.
|
52
|
+
!@fastas_to_format.empty?
|
53
|
+
end
|
54
|
+
|
55
|
+
# Runs makeblastdb on each file in `@fastas_to_format`. Will do nothing
|
56
|
+
# unless `#scan` has been run before.
|
57
|
+
def run
|
58
|
+
return unless @fastas_to_format || @fastas_to_format.empty?
|
59
|
+
@fastas_to_format.each do |path, title, type|
|
60
|
+
make_blast_database(path, title, type)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Determines which FASTA files in the database directory are already
|
67
|
+
# formatted. Adds to @formatted_fastas.
|
68
|
+
def determine_formatted_fastas
|
69
|
+
blastdbcmd.each_line do |line|
|
70
|
+
path, title, type = line.split(' ')
|
71
|
+
next if multipart_database_name?(path)
|
72
|
+
@formatted_fastas << [path, title, type.strip.downcase]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Determines which FASTA files in the database directory require
|
77
|
+
# reformatting. Adds to @fastas_to_format.
|
78
|
+
def determine_fastas_to_reformat
|
79
|
+
@formatted_fastas.each do |path, title, type|
|
80
|
+
required_extensions = REQUIRED_EXTENSIONS[type]
|
81
|
+
exts = Dir["#{path}.*"].map { |p| p.split('.').last }.sort
|
82
|
+
next if (exts & required_extensions) == required_extensions
|
83
|
+
|
84
|
+
@fastas_to_format << [path, title, type]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Determines which FASTA files in the database directory are
|
89
|
+
# unformatted. Adds to @fastas_to_format.
|
90
|
+
def determine_unformatted_fastas
|
91
|
+
Find.find(database_dir) do |path|
|
92
|
+
next if File.directory?(path)
|
93
|
+
next unless probably_fasta?(path)
|
94
|
+
next if @formatted_fastas.any? { |f| f[0] == path }
|
95
|
+
|
96
|
+
@fastas_to_format << [path,
|
97
|
+
make_db_title(File.basename(path)),
|
98
|
+
guess_sequence_type_in_fasta(path)]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Runs `blastdbcmd` to determine formatted FASTA files in the database
|
103
|
+
# directory. Returns the output of `blastdbcmd`. This method is called
|
104
|
+
# by `determine_formatted_fastas`.
|
105
|
+
def blastdbcmd
|
106
|
+
cmd = "blastdbcmd -recursive -list #{database_dir}" \
|
107
|
+
' -list_outfmt "%f %t %p"'
|
108
|
+
out, _ = sys(cmd, path: config[:bin])
|
109
|
+
out
|
110
|
+
end
|
111
|
+
|
112
|
+
# Create BLAST database, given FASTA file and sequence type in FASTA file.
|
113
|
+
def make_blast_database(file, title, type)
|
114
|
+
return unless make_blast_database? file, type
|
115
|
+
title = confirm_database_title(title)
|
116
|
+
taxid = fetch_tax_id
|
117
|
+
_make_blast_database(file, type, title, taxid)
|
118
|
+
end
|
119
|
+
|
120
|
+
def _make_blast_database(file, type, title, taxid)
|
121
|
+
extract_fasta(file) unless File.exist?(file)
|
122
|
+
cmd = "makeblastdb -parse_seqids -hash_index -in #{file} " \
|
123
|
+
"-dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
|
124
|
+
" -taxid #{taxid}"
|
125
|
+
out, err = sys(cmd, path: config[:bin])
|
126
|
+
puts out.strip
|
127
|
+
puts err.strip
|
128
|
+
rescue CommandFailed => e
|
129
|
+
puts <<~MSG
|
130
|
+
Could not create BLAST database for: #{file}
|
131
|
+
Tried: #{cmd}
|
132
|
+
stdout: #{e.stdout}
|
133
|
+
stderr: #{e.stderr}
|
134
|
+
MSG
|
135
|
+
exit!
|
136
|
+
end
|
137
|
+
|
138
|
+
# Show file path and guessed sequence type to the user and obtain a y/n
|
139
|
+
# response.
|
140
|
+
#
|
141
|
+
# Returns true if the user entered anything but 'n' or 'N'.
|
142
|
+
def make_blast_database?(file, type)
|
143
|
+
puts
|
144
|
+
puts
|
145
|
+
puts "FASTA file to format/reformat: #{file}"
|
146
|
+
puts "FASTA type: #{type}"
|
147
|
+
print 'Proceed? [y/n] (Default: y): '
|
148
|
+
response = STDIN.gets.to_s.strip
|
149
|
+
!response.match(/n/i)
|
150
|
+
end
|
151
|
+
|
152
|
+
# Show the database title that we are going to use to the user for
|
153
|
+
# confirmation.
|
154
|
+
#
|
155
|
+
# Returns user input if any. Auto-determined title otherwise.
|
156
|
+
def confirm_database_title(default)
|
157
|
+
print "Enter a database title or will use '#{default}': "
|
158
|
+
from_user = STDIN.gets.to_s.strip
|
159
|
+
from_user.empty? && default || from_user
|
160
|
+
end
|
161
|
+
|
162
|
+
# Get taxid from the user. Returns user input or 0.
|
163
|
+
#
|
164
|
+
# Using 0 as taxid is equivalent to not setting taxid for the database
|
165
|
+
# that will be created.
|
166
|
+
def fetch_tax_id
|
167
|
+
default = 0
|
168
|
+
print 'Enter taxid (optional): '
|
169
|
+
user_response = STDIN.gets.strip
|
170
|
+
user_response.empty? && default || Integer(user_response)
|
171
|
+
rescue
|
172
|
+
puts 'taxid should be a number'
|
173
|
+
retry
|
174
|
+
end
|
175
|
+
|
176
|
+
# Extract FASTA file from BLAST database.
|
177
|
+
#
|
178
|
+
# Invoked while reformatting a BLAST database if the corresponding
|
179
|
+
# FASTA file does not exist.
|
180
|
+
def extract_fasta(db)
|
181
|
+
puts
|
182
|
+
puts 'Extracting sequences ...'
|
183
|
+
cmd = "blastdbcmd -entry all -db #{db}"
|
184
|
+
sys(cmd, stdout: db, path: config[:bin])
|
185
|
+
rescue CommandFailed => e
|
186
|
+
puts <<~MSG
|
187
|
+
Could not extract sequences from: #{db}
|
188
|
+
Tried: #{cmd}
|
189
|
+
stdout: #{e.stdout}
|
190
|
+
stderr: #{e.stderr}
|
191
|
+
MSG
|
192
|
+
exit!
|
193
|
+
end
|
194
|
+
|
195
|
+
# Returns true if the database name appears to be a multi-part database
|
196
|
+
# name.
|
197
|
+
def multipart_database_name?(db_name)
|
198
|
+
Database.multipart_database_name? db_name
|
199
|
+
end
|
200
|
+
|
201
|
+
# Returns true if first character of the file is '>'.
|
202
|
+
def probably_fasta?(file)
|
203
|
+
File.read(file, 1) == '>'
|
204
|
+
end
|
205
|
+
|
206
|
+
# Suggests improved titles when generating database names from files
|
207
|
+
# for improved apperance and readability in web interface.
|
208
|
+
# For example:
|
209
|
+
# Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
|
210
|
+
# S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
|
211
|
+
def make_db_title(db_name)
|
212
|
+
db_name.tr!('"', "'")
|
213
|
+
# removes .fasta like extension names
|
214
|
+
db_name.gsub!(File.extname(db_name), '')
|
215
|
+
# replaces _ with ' ',
|
216
|
+
db_name.gsub!(/(_)/, ' ')
|
217
|
+
# replaces '.' with ' ' when no numbers are on either side,
|
218
|
+
db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
|
219
|
+
# preserves version numbers
|
220
|
+
db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
|
221
|
+
db_name
|
222
|
+
end
|
223
|
+
|
224
|
+
# Guess whether FASTA file contains protein or nucleotide sequences by
|
225
|
+
# sampling a few few characters of the file.
|
226
|
+
def guess_sequence_type_in_fasta(file)
|
227
|
+
sequences = sample_sequences(file)
|
228
|
+
sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
|
229
|
+
sequence_types = sequence_types.uniq.compact
|
230
|
+
(sequence_types.length == 1) && sequence_types.first
|
231
|
+
end
|
232
|
+
|
233
|
+
# Read first 1,048,576 characters of the file, split the read text on
|
234
|
+
# fasta def line pattern and return.
|
235
|
+
#
|
236
|
+
# If the given file is FASTA, returns Array of as many different
|
237
|
+
# sequences in the portion of the file read. Returns the portion
|
238
|
+
# of the file read wrapped in an Array otherwise.
|
239
|
+
def sample_sequences(file)
|
240
|
+
File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|