sequenceserver 2.0.0.rc2 → 2.0.0.rc7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +1 -0
  3. data/Dockerfile +14 -12
  4. data/bin/sequenceserver +10 -27
  5. data/lib/sequenceserver.rb +42 -21
  6. data/lib/sequenceserver/blast/job.rb +11 -1
  7. data/lib/sequenceserver/database.rb +0 -126
  8. data/lib/sequenceserver/makeblastdb.rb +243 -0
  9. data/lib/sequenceserver/routes.rb +5 -0
  10. data/lib/sequenceserver/sequence.rb +1 -1
  11. data/lib/sequenceserver/version.rb +1 -1
  12. data/public/css/sequenceserver.css +2 -1
  13. data/public/css/sequenceserver.min.css +1 -1
  14. data/public/js/error_modal.js +27 -29
  15. data/public/js/hit.js +14 -5
  16. data/public/js/query.js +31 -15
  17. data/public/js/report.js +13 -5
  18. data/public/js/search.js +4 -6
  19. data/public/js/sequence_modal.js +10 -5
  20. data/public/js/sidebar.js +2 -2
  21. data/public/sequenceserver-report.min.js +16 -16
  22. data/public/sequenceserver-search.min.js +1 -1
  23. data/spec/blast_versions/blast_2.2.30/import_spec_capybara_local_2.2.30.rb +10 -10
  24. data/spec/blast_versions/blast_2.2.31/import_spec_capybara_local_2.2.31.rb +10 -10
  25. data/spec/blast_versions/blast_2.3.0/import_spec_capybara_local_2.3.0.rb +10 -10
  26. data/spec/blast_versions/blast_2.4.0/import_spec_capybara_local_2.4.0.rb +10 -10
  27. data/spec/blast_versions/blast_2.5.0/import_spec_capybara_local_2.5.0.rb +10 -10
  28. data/spec/blast_versions/blast_2.6.0/import_spec_capybara_local_2.6.0.rb +10 -10
  29. data/spec/blast_versions/blast_2.7.1/import_spec_capybara_local_2.7.1.rb +10 -10
  30. data/spec/blast_versions/blast_2.8.1/import_spec_capybara_local_2.8.1.rb +10 -10
  31. data/spec/blast_versions/blast_2.9.0/import_spec_capybara_local_2.9.0.rb +10 -10
  32. data/spec/blast_versions/diamond_0.9.24/import_spec_capybara_local_0.9.24.rb +4 -4
  33. data/spec/capybara_spec.rb +11 -0
  34. data/spec/database/funky_ids/{funky_ids.fa.nhd → v4/funky_ids.fa.nhd} +0 -0
  35. data/spec/database/funky_ids/{funky_ids.fa.nhi → v4/funky_ids.fa.nhi} +0 -0
  36. data/spec/database/funky_ids/{funky_ids.fa.nhr → v4/funky_ids.fa.nhr} +0 -0
  37. data/spec/database/funky_ids/v4/funky_ids.fa.nin +0 -0
  38. data/spec/database/funky_ids/{funky_ids.fa.nog → v4/funky_ids.fa.nog} +0 -0
  39. data/spec/database/funky_ids/{funky_ids.fa.nsd → v4/funky_ids.fa.nsd} +0 -0
  40. data/spec/database/funky_ids/{funky_ids.fa.nsi → v4/funky_ids.fa.nsi} +0 -0
  41. data/spec/database/funky_ids/{funky_ids.fa.nsq → v4/funky_ids.fa.nsq} +0 -0
  42. data/spec/database/funky_ids/v5/funky_ids.fa.ndb +0 -0
  43. data/spec/database/funky_ids/v5/funky_ids.fa.nhd +8 -0
  44. data/spec/database/funky_ids/v5/funky_ids.fa.nhi +0 -0
  45. data/spec/database/funky_ids/v5/funky_ids.fa.nhr +0 -0
  46. data/spec/database/funky_ids/v5/funky_ids.fa.nin +0 -0
  47. data/spec/database/funky_ids/v5/funky_ids.fa.nog +0 -0
  48. data/spec/database/funky_ids/v5/funky_ids.fa.nos +0 -0
  49. data/spec/database/funky_ids/v5/funky_ids.fa.not +0 -0
  50. data/spec/database/funky_ids/v5/funky_ids.fa.nsq +0 -0
  51. data/spec/database/funky_ids/v5/funky_ids.fa.ntf +0 -0
  52. data/spec/database/funky_ids/v5/funky_ids.fa.nto +0 -0
  53. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ndb +0 -0
  54. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
  55. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
  56. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nos +0 -0
  57. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.not +0 -0
  58. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ntf +0 -0
  59. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nto +0 -0
  60. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pdb +0 -0
  61. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  62. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  63. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pos +0 -0
  64. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pot +0 -0
  65. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.ptf +0 -0
  66. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pto +0 -0
  67. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pdb +0 -0
  68. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
  69. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
  70. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pos +0 -0
  71. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pot +0 -0
  72. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.ptf +0 -0
  73. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pto +0 -0
  74. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ndb +0 -0
  75. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  76. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  77. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nos +0 -0
  78. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.not +0 -0
  79. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  80. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ntf +0 -0
  81. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nto +0 -0
  82. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhd +8 -0
  83. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhi +0 -0
  84. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
  85. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
  86. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nog +0 -0
  87. data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsd +0 -0
  88. data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsi +0 -0
  89. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsq +0 -0
  90. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.txt +8 -0
  91. data/spec/database/v4/links.rb +23 -0
  92. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  93. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phd +1189 -0
  94. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phi +0 -0
  95. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  96. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  97. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pog +0 -0
  98. data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psd +0 -0
  99. data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psi +0 -0
  100. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  101. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phd +9140 -0
  102. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phi +0 -0
  103. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
  104. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
  105. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pog +0 -0
  106. data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psd +0 -0
  107. data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psi +0 -0
  108. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psq +0 -0
  109. data/spec/database/v4/proteins/uniprot/URL +1 -0
  110. data/spec/database/v4/si_uniprot_idmap.yml +14180 -0
  111. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  112. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhd +473 -0
  113. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhi +0 -0
  114. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  115. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  116. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nog +0 -0
  117. data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsd +0 -0
  118. data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsi +0 -0
  119. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  120. data/spec/database_spec.rb +0 -76
  121. data/spec/makeblastdb_spec.rb +121 -0
  122. data/spec/sequence_spec.rb +2 -3
  123. data/views/layout.erb +4 -0
  124. metadata +86 -21
  125. data/spec/database/funky_ids/funky_ids.fa.nin +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 164573dfee9002d6a1e164899e5d6f8a22696d7a13e3f6d48dd96a134673d8b3
4
- data.tar.gz: 06fd8269f49b72a04d41d05903e7d7273e647d4e451047722d285840fecd207f
3
+ metadata.gz: 2f7be0b03d87a6322d843166c7cdcf1aea0236fdd82a932b9eeda936a43f4a53
4
+ data.tar.gz: eb13c9ab355e3424e979606273606c6334ee203b649126d9d2860cdbb7d76ee4
5
5
  SHA512:
6
- metadata.gz: a3d66c251f7f5e3a96a45878001245a49c811e02bebeac271a350ec9b8e944e8066cabf55c8851bf4e4ce1d6e9cd86e3b76cce53bde7f6230afd3db4592a9175
7
- data.tar.gz: 14cdb61bbfbf1fb316df829aaf2c602c0753db51174b5166bb8c4de646ac43a2f5a0023593e063cd12c05e3dcf4163df1f48551e190df12607206afa0593ae5a
6
+ metadata.gz: 9183f80e0a6909e9e88b4f0416eeaac5ef50e4d75777764f82278b9377bbd5720f34c82cca9aff446eeb735ab79ef69872b8b6e8feabca161b8e81b52e995e4f
7
+ data.tar.gz: 99375aca038464b785883f12922656d5733d6cf114c37f62b83bc1c222f371b0b491ced709e678b2c07e52d8bae479ee8cdabdbaf06f1f91add2074f1e8652e5
@@ -0,0 +1 @@
1
+ _site
data/Dockerfile CHANGED
@@ -1,23 +1,25 @@
1
- FROM debian:stretch-backports
1
+ FROM debian:buster-slim
2
2
 
3
3
  LABEL Description="Intuitive local web frontend for the BLAST bioinformatics tool"
4
4
  LABEL MailingList="https://groups.google.com/forum/#!forum/sequenceserver"
5
5
  LABEL Website="http://www.sequenceserver.com"
6
- LABEL Version="1.1.0 beta"
7
6
 
8
- RUN apt-get update && apt-get install -y --no-install-recommends \
9
- build-essential \
10
- ruby ruby-dev \
11
- curl wget \
12
- gnupg \
13
- git \
14
- zlib1g-dev
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ ruby ruby-dev build-essential curl gnupg git wget \
9
+ zlib1g-dev && rm -rf /var/lib/apt/lists/*
15
10
 
16
11
  VOLUME ["/db"]
17
12
  EXPOSE 4567
18
13
 
19
14
  COPY . /sequenceserver
20
15
  WORKDIR /sequenceserver
21
- RUN gem install bundler && bundle install --without=development
22
- RUN yes '' | bundle exec bin/sequenceserver -s
23
- ENTRYPOINT ["bundle", "exec", "bin/sequenceserver", "-d", "/db"]
16
+ # Install bundler, then use bundler to install SequenceServer's dependencies,
17
+ # and then use SequenceServer to install BLAST. In the last step, -s is used
18
+ # so that SequenceServer will exit after writing configuration file instead
19
+ # of starting up, while -d is used to suppress questions about database dir.
20
+ RUN gem install bundler && \
21
+ bundle install --without=development && \
22
+ yes '' | bundle exec bin/sequenceserver -s -d spec/database/sample
23
+ RUN touch ~/.sequenceserver/asked_to_join
24
+
25
+ CMD ["bundle", "exec", "bin/sequenceserver", "-d", "/db"]
@@ -68,10 +68,6 @@ begin
68
68
  # of threads to use in config file.
69
69
  $ sequenceserver -s -n 16
70
70
 
71
- # See if you have FASTA files in database dir that haven't
72
- # been converted into BLAST database.
73
- $ sequenceserver -u
74
-
75
71
  # Search for FASTA files in database dir that haven't been
76
72
  # converted into BLAST database yet, and convert them.
77
73
  $ sequenceserver -m
@@ -135,9 +131,6 @@ begin
135
131
  on 'l', 'list_databases',
136
132
  'List BLAST databases'
137
133
 
138
- on 'u', 'list-unformatted-fastas',
139
- 'List unformatted FASTA files'
140
-
141
134
  on 'i', 'interactive',
142
135
  'Run SequenceServer in interactive mode'
143
136
 
@@ -285,8 +278,7 @@ begin
285
278
  fetch_option(:database_dir).value = response
286
279
  redo
287
280
  rescue SequenceServer::NO_BLAST_DATABASE_FOUND => e
288
- unless list_databases? || list_unformatted_fastas? ||
289
- make_blast_databases?
281
+ unless list_databases? || make_blast_databases?
290
282
 
291
283
  # Print error raised.
292
284
  puts
@@ -305,13 +297,13 @@ begin
305
297
  unless response =~ /^[n]$/i
306
298
  puts
307
299
  puts 'Searching ...'
308
- if SequenceServer::Database.unformatted_fastas.empty?
309
- puts "Couldn't find any FASTA files."
310
- exit!
311
- else
312
- formatted = SequenceServer::Database.make_blast_databases
300
+ if SequenceServer.makeblastdb.scan
301
+ formatted = SequenceServer.makeblastdb.run
313
302
  exit! if formatted.empty? && !set?
314
303
  redo unless set?
304
+ else
305
+ puts "Couldn't find any FASTA files."
306
+ exit!
315
307
  end
316
308
  else
317
309
  exit! unless set?
@@ -361,22 +353,13 @@ begin
361
353
  exit
362
354
  end
363
355
 
364
- if list_unformatted_fastas? || make_blast_databases?
365
- unformatted_fastas = SequenceServer::Database.unformatted_fastas
366
- if unformatted_fastas.empty?
356
+ if make_blast_databases?
357
+ if SequenceServer.makeblastdb.scan
358
+ SequenceServer.makeblastdb.run
359
+ else
367
360
  puts "All FASTA files in #{SequenceServer.config[:database_dir]} " \
368
361
  'are formatted.'
369
- exit
370
362
  end
371
- end
372
-
373
- if list_unformatted_fastas?
374
- puts unformatted_fastas
375
- exit
376
- end
377
-
378
- if make_blast_databases?
379
- SequenceServer::Database.make_blast_databases
380
363
  exit
381
364
  end
382
365
 
@@ -4,8 +4,12 @@ require 'resolv'
4
4
 
5
5
  # Top level module / namespace.
6
6
  module SequenceServer
7
- # Use a fixed minimum version of BLAST+
7
+ # The default version of BLAST that will be downloaded and configured for use.
8
8
  BLAST_VERSION = '2.10.0+'.freeze
9
+ # The minimum version of BLAST that SequenceServer is happy to run with. This
10
+ # is for compatiblity with older database formats. Users will download BLAST
11
+ # themselves.
12
+ MIN_BLAST_VERSION = '2.9.0+'.freeze
9
13
 
10
14
  # Default location of configuration file.
11
15
  DEFAULT_CONFIG_FILE = '~/.sequenceserver.conf'.freeze
@@ -20,6 +24,7 @@ module SequenceServer
20
24
  require 'sequenceserver/config'
21
25
  require 'sequenceserver/server'
22
26
  require 'sequenceserver/routes'
27
+ require 'sequenceserver/makeblastdb'
23
28
  require 'sequenceserver/job_remover'
24
29
  require 'sequenceserver/exceptions'
25
30
  require 'sequenceserver/sys'
@@ -53,6 +58,11 @@ module SequenceServer
53
58
  end
54
59
  end
55
60
 
61
+ # MAKEBLASTDB service object.
62
+ def makeblastdb
63
+ @makeblastdb ||= MAKEBLASTDB.new(config[:database_dir])
64
+ end
65
+
56
66
  # SequenceServer initialisation routine.
57
67
  def init(config = {})
58
68
  # Use default config file if caller didn't specify one.
@@ -106,7 +116,6 @@ module SequenceServer
106
116
 
107
117
  # Run SequenceServer using WEBrick.
108
118
  def run
109
- check_host
110
119
  Server.run(self)
111
120
  rescue Errno::EADDRINUSE
112
121
  puts "** Could not bind to port #{config[:port]}."
@@ -126,10 +135,17 @@ module SequenceServer
126
135
  def on_start
127
136
  puts '** SequenceServer is ready.'
128
137
  puts " Go to #{server_url} in your browser and start BLASTing!"
129
- puts ' To share your setup, please try one of the following: '
130
- puts " - http://#{ip_address}:#{config[:port]}"
131
- puts " - http://#{hostname}:#{config[:port]}" if hostname
132
- puts ' Press CTRL+C to quit.'
138
+ if ip_address
139
+ puts ' To share your setup, try one of the following addresses. These'
140
+ puts ' may only work within your home, office, or university network.'
141
+ puts " - http://#{ip_address}:#{config[:port]}"
142
+ puts " - http://#{hostname}:#{config[:port]}" if hostname
143
+ puts ' To share your setup with anyone in the world, ask your IT team'
144
+ puts ' for a public IP address.'
145
+ puts ' To disable sharing, set :host: key in config file to 127.0.0.1'
146
+ puts ' and restart server.'
147
+ end
148
+ puts ' To terminate server, press CTRL+C'
133
149
  open_in_browser(server_url)
134
150
  end
135
151
 
@@ -204,16 +220,6 @@ module SequenceServer
204
220
  raise NUM_THREADS_INCORRECT
205
221
  end
206
222
 
207
- # Check and warn user if host is 0.0.0.0 (default).
208
- def check_host
209
- # rubocop:disable Style/GuardClause
210
- if config[:host] == '0.0.0.0'
211
- logger.warn 'Will listen on all interfaces (0.0.0.0).' \
212
- ' Consider using 127.0.0.1 (--host option).'
213
- end
214
- # rubocop:enable Style/GuardClause
215
- end
216
-
217
223
  def load_extension
218
224
  return unless config[:require]
219
225
 
@@ -234,7 +240,7 @@ module SequenceServer
234
240
  end
235
241
  version = out.split[1]
236
242
  fail BLAST_NOT_INSTALLED_OR_NOT_EXECUTABLE if version.empty?
237
- fail BLAST_NOT_COMPATIBLE, version unless version == BLAST_VERSION
243
+ fail BLAST_NOT_COMPATIBLE, version unless is_compatible(version, MIN_BLAST_VERSION)
238
244
  end
239
245
 
240
246
  def server_url
@@ -243,13 +249,14 @@ module SequenceServer
243
249
  "http://#{host}:#{config[:port]}"
244
250
  end
245
251
 
246
- # Returns a local ip adress
252
+ # Returns a local ip adress.
247
253
  def ip_address
248
- Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }.ip_address
254
+ addrinfo = Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }
255
+ addrinfo.ip_address if addrinfo
249
256
  end
250
257
 
251
- # Returns machine's hostname based on the local ip;
252
- # If hostname cannot be determined then print nothing
258
+ # Returns machine's hostname based on the local ip. If hostname cannot be
259
+ # determined returns nil.
253
260
  def hostname
254
261
  Resolv.getname(ip_address) rescue nil
255
262
  end
@@ -285,5 +292,19 @@ module SequenceServer
285
292
  def command?(command)
286
293
  system("which #{command} > /dev/null 2>&1")
287
294
  end
295
+
296
+ # Returns true if the given version is higher than the minimum expected
297
+ # version string.
298
+ def is_compatible(given, expected)
299
+ # The speceship operator (<=>) below returns -1, 0, 1 depending on
300
+ # on whether the left operand is lower, same, or higher than the
301
+ # right operand. We want the left operand to be the same or higher.
302
+ (parse_version(given) <=> parse_version(expected)) >= 0
303
+ end
304
+
305
+ # Turn version string into an arrary of its component numbers.
306
+ def parse_version(version_string)
307
+ version_string.split('.').map(&:to_i)
308
+ end
288
309
  end
289
310
  end
@@ -65,6 +65,16 @@ module SequenceServer
65
65
  error = IO.foreach(stderr).grep(ERROR_LINE).join
66
66
  error = File.read(stderr) if error.empty?
67
67
  fail InputError, error
68
+ when 2
69
+ fail InputError, <<~MSG
70
+ BLAST signalled a problem with the databases that you searched.
71
+
72
+ Most likely one or more of your databases were created using an
73
+ older version of BLAST. Please consider recreating the databases
74
+ using BLAST #{BLAST_VERSION}.
75
+
76
+ As a temporary solution, you can try searching one database at a time.
77
+ MSG
68
78
  when 4
69
79
  # Out of memory. User can retry with a shorter search, so raising
70
80
  # InputError here instead of SystemError.
@@ -79,7 +89,7 @@ module SequenceServer
79
89
  # the job. This is a SystemError.
80
90
  fail SystemError, 'Ran out of disk space.'
81
91
  else
82
- # I am not sure what the exit codes 2 & 3 means and we should note
92
+ # I am not sure what the exit codes 3 means and we should not
83
93
  # encounter exit code 5. The only other error that I know can happen
84
94
  # but is not yet handled is when BLAST+ binaries break such as after
85
95
  # macOS updates. So raise SystemError, include the exit status in the
@@ -1,4 +1,3 @@
1
- require 'find'
2
1
  require 'open3'
3
2
  require 'digest/md5'
4
3
  require 'forwardable'
@@ -209,89 +208,6 @@ module SequenceServer
209
208
  end
210
209
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
211
210
 
212
- # Recursively scan `database_dir` for un-formatted FASTA and format them
213
- # for use with BLAST+.
214
- def make_blast_databases
215
- unformatted_fastas.select do |file, sequence_type|
216
- make_blast_database(file, sequence_type)
217
- end
218
- end
219
-
220
- # Returns an Array of FASTA files that may require formatting, and the
221
- # type of sequence contained in each FASTA.
222
- #
223
- # > unformatted_fastas
224
- # => [['/foo/bar.fasta', :nulceotide], ...]
225
- def unformatted_fastas
226
- list = []
227
- database_dir = config[:database_dir]
228
- Find.find database_dir do |file|
229
- next if File.directory? file
230
- next if Database.include? file
231
- next unless probably_fasta? file
232
- sequence_type = guess_sequence_type_in_fasta file
233
- if %i[protein nucleotide].include?(sequence_type)
234
- list << [file, sequence_type]
235
- end
236
- end
237
- list
238
- end
239
-
240
- # Create BLAST database, given FASTA file and sequence type in FASTA file.
241
- def make_blast_database(file, type)
242
- return unless make_blast_database? file, type
243
- title = get_database_title(file)
244
- taxid = fetch_tax_id
245
- _make_blast_database(file, type, title, taxid)
246
- end
247
-
248
- def _make_blast_database(file, type, title, taxid, quiet = false)
249
- cmd = 'makeblastdb -parse_seqids -hash_index ' \
250
- "-in #{file} -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
251
- " -taxid #{taxid}"
252
- out, err = sys(cmd, path: config[:bin])
253
- puts out, err unless quiet
254
- end
255
-
256
- # Show file path and guessed sequence type to the user and obtain a y/n
257
- # response.
258
- #
259
- # Returns true if the user entered anything but 'n' or 'N'.
260
- def make_blast_database?(file, type)
261
- puts
262
- puts
263
- puts "FASTA file: #{file}"
264
- puts "FASTA type: #{type}"
265
- print 'Proceed? [y/n] (Default: y): '
266
- response = STDIN.gets.to_s.strip
267
- !response.match(/n/i)
268
- end
269
-
270
- # Generate a title for the given database and show it to the user for
271
- # confirmation.
272
- #
273
- # Returns user input if any. Auto-generated title otherwise.
274
- def get_database_title(path)
275
- default = make_db_title(File.basename(path))
276
- print "Enter a database title or will use '#{default}': "
277
- from_user = STDIN.gets.to_s.strip
278
- from_user.empty? && default || from_user
279
- end
280
-
281
- # Get taxid from the user. Returns user input or 0.
282
- #
283
- # Using 0 as taxid is equivalent to not setting taxid for the database
284
- # that will be created.
285
- def fetch_tax_id
286
- default = 0
287
- print 'Enter taxid (optional): '
288
- user_response = STDIN.gets.strip
289
- user_response.empty? && default || Integer(user_response)
290
- rescue
291
- puts 'taxid should be a number'
292
- retry
293
- end
294
-
295
211
  # Returns true if the database name appears to be a multi-part database
296
212
  # name.
297
213
  #
@@ -304,48 +220,6 @@ module SequenceServer
304
220
  def multipart_database_name?(db_name)
305
221
  !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
306
222
  end
307
-
308
- # Returns true if first character of the file is '>'.
309
- def probably_fasta?(file)
310
- File.read(file, 1) == '>'
311
- end
312
-
313
- # Suggests improved titles when generating database names from files
314
- # for improved apperance and readability in web interface.
315
- # For example:
316
- # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
317
- # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
318
- def make_db_title(db_name)
319
- db_name.tr!('"', "'")
320
- # removes .fasta like extension names
321
- db_name.gsub!(File.extname(db_name), '')
322
- # replaces _ with ' ',
323
- db_name.gsub!(/(_)/, ' ')
324
- # replaces '.' with ' ' when no numbers are on either side,
325
- db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
326
- # preserves version numbers
327
- db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
328
- db_name
329
- end
330
-
331
- # Guess whether FASTA file contains protein or nucleotide sequences by
332
- # sampling a few few characters of the file.
333
- def guess_sequence_type_in_fasta(file)
334
- sequences = sample_sequences(file)
335
- sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
336
- sequence_types = sequence_types.uniq.compact
337
- (sequence_types.length == 1) && sequence_types.first
338
- end
339
-
340
- # Read first 1,048,576 characters of the file, split the read text on
341
- # fasta def line pattern and return.
342
- #
343
- # If the given file is FASTA, returns Array of as many different
344
- # sequences in the portion of the file read. Returns the portion
345
- # of the file read wrapped in an Array otherwise.
346
- def sample_sequences(file)
347
- File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
348
- end
349
223
  end
350
224
  end
351
225
  end
@@ -0,0 +1,243 @@
1
+ require 'find'
2
+ require 'forwardable'
3
+
4
+ module SequenceServer
5
+ # Smart `makeblastdb` wrapper: recursively scans database directory determining
6
+ # which files need to be formatted or re-formatted.
7
+ #
8
+ # Example usage:
9
+ #
10
+ # makeblastdb = MAKEBLASTDB.new(database_dir)
11
+ # makeblastdb.scan && makeblastdb.run
12
+ #
13
+ class MAKEBLASTDB
14
+ # We want V5 databases created using -parse_seqids for proper function of
15
+ # SequenceServer. This means each database should be comprised of at least 9
16
+ # files with the following extensions. Databases created by us will have two
17
+ # additional files with the extensions nhd and nhi, or phd and phi, due to
18
+ # the use of -hash_index option. Finally, multipart databases will have one
19
+ # additional file with the extension nal or pal.
20
+ REQUIRED_EXTENSIONS = {
21
+ 'nucleotide' => %w{ndb nhr nin nog nos not nsq ntf nto}.freeze,
22
+ 'protein' => %w{pdb phr pin pog pos pot psq ptf pto}.freeze
23
+ }
24
+
25
+ extend Forwardable
26
+
27
+ def_delegators SequenceServer, :config, :sys
28
+
29
+ def initialize(database_dir)
30
+ @database_dir = database_dir
31
+ end
32
+
33
+ attr_reader :database_dir
34
+
35
+ # Scans the database directory to determine which FASTA files require
36
+ # formatting or re-formatting.
37
+ #
38
+ # Returns `true` if there are files to (re-)format, `false` otherwise.
39
+ def scan
40
+ # We need to know the list of formatted FASTAs as reported by blastdbcmd
41
+ # first. This is required to determine both unformatted FASTAs and those
42
+ # that require reformatting.
43
+ @formatted_fastas = []
44
+ determine_formatted_fastas
45
+
46
+ # Now determine FASTA files that are unformatted or require reformatting.
47
+ @fastas_to_format = []
48
+ determine_unformatted_fastas
49
+ determine_fastas_to_reformat
50
+
51
+ # Return true if there are files to be (re-)formatted or false otherwise.
52
+ !@fastas_to_format.empty?
53
+ end
54
+
55
+ # Runs makeblastdb on each file in `@fastas_to_format`. Will do nothing
56
+ # unless `#scan` has been run before.
57
+ def run
58
+ return unless @fastas_to_format || @fastas_to_format.empty?
59
+ @fastas_to_format.each do |path, title, type|
60
+ make_blast_database(path, title, type)
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ # Determines which FASTA files in the database directory are already
67
+ # formatted. Adds to @formatted_fastas.
68
+ def determine_formatted_fastas
69
+ blastdbcmd.each_line do |line|
70
+ path, title, type = line.split(' ')
71
+ next if multipart_database_name?(path)
72
+ @formatted_fastas << [path, title, type.strip.downcase]
73
+ end
74
+ end
75
+
76
+ # Determines which FASTA files in the database directory require
77
+ # reformatting. Adds to @fastas_to_format.
78
+ def determine_fastas_to_reformat
79
+ @formatted_fastas.each do |path, title, type|
80
+ required_extensions = REQUIRED_EXTENSIONS[type]
81
+ exts = Dir["#{path}.*"].map { |p| p.split('.').last }.sort
82
+ next if (exts & required_extensions) == required_extensions
83
+
84
+ @fastas_to_format << [path, title, type]
85
+ end
86
+ end
87
+
88
+ # Determines which FASTA files in the database directory are
89
+ # unformatted. Adds to @fastas_to_format.
90
+ def determine_unformatted_fastas
91
+ Find.find(database_dir) do |path|
92
+ next if File.directory?(path)
93
+ next unless probably_fasta?(path)
94
+ next if @formatted_fastas.any? { |f| f[0] == path }
95
+
96
+ @fastas_to_format << [path,
97
+ make_db_title(File.basename(path)),
98
+ guess_sequence_type_in_fasta(path)]
99
+ end
100
+ end
101
+
102
+ # Runs `blastdbcmd` to determine formatted FASTA files in the database
103
+ # directory. Returns the output of `blastdbcmd`. This method is called
104
+ # by `determine_formatted_fastas`.
105
+ def blastdbcmd
106
+ cmd = "blastdbcmd -recursive -list #{database_dir}" \
107
+ ' -list_outfmt "%f %t %p"'
108
+ out, _ = sys(cmd, path: config[:bin])
109
+ out
110
+ end
111
+
112
+ # Create BLAST database, given FASTA file and sequence type in FASTA file.
113
+ def make_blast_database(file, title, type)
114
+ return unless make_blast_database? file, type
115
+ title = confirm_database_title(title)
116
+ taxid = fetch_tax_id
117
+ _make_blast_database(file, type, title, taxid)
118
+ end
119
+
120
+ def _make_blast_database(file, type, title, taxid)
121
+ extract_fasta(file) unless File.exist?(file)
122
+ cmd = "makeblastdb -parse_seqids -hash_index -in #{file} " \
123
+ "-dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
124
+ " -taxid #{taxid}"
125
+ out, err = sys(cmd, path: config[:bin])
126
+ puts out.strip
127
+ puts err.strip
128
+ rescue CommandFailed => e
129
+ puts <<~MSG
130
+ Could not create BLAST database for: #{file}
131
+ Tried: #{cmd}
132
+ stdout: #{e.stdout}
133
+ stderr: #{e.stderr}
134
+ MSG
135
+ exit!
136
+ end
137
+
138
+ # Show file path and guessed sequence type to the user and obtain a y/n
139
+ # response.
140
+ #
141
+ # Returns true if the user entered anything but 'n' or 'N'.
142
+ def make_blast_database?(file, type)
143
+ puts
144
+ puts
145
+ puts "FASTA file to format/reformat: #{file}"
146
+ puts "FASTA type: #{type}"
147
+ print 'Proceed? [y/n] (Default: y): '
148
+ response = STDIN.gets.to_s.strip
149
+ !response.match(/n/i)
150
+ end
151
+
152
+ # Show the database title that we are going to use to the user for
153
+ # confirmation.
154
+ #
155
+ # Returns user input if any. Auto-determined title otherwise.
156
+ def confirm_database_title(default)
157
+ print "Enter a database title or will use '#{default}': "
158
+ from_user = STDIN.gets.to_s.strip
159
+ from_user.empty? && default || from_user
160
+ end
161
+
162
+ # Get taxid from the user. Returns user input or 0.
163
+ #
164
+ # Using 0 as taxid is equivalent to not setting taxid for the database
165
+ # that will be created.
166
+ def fetch_tax_id
167
+ default = 0
168
+ print 'Enter taxid (optional): '
169
+ user_response = STDIN.gets.strip
170
+ user_response.empty? && default || Integer(user_response)
171
+ rescue
172
+ puts 'taxid should be a number'
173
+ retry
174
+ end
175
+
176
+ # Extract FASTA file from BLAST database.
177
+ #
178
+ # Invoked while reformatting a BLAST database if the corresponding
179
+ # FASTA file does not exist.
180
+ def extract_fasta(db)
181
+ puts
182
+ puts 'Extracting sequences ...'
183
+ cmd = "blastdbcmd -entry all -db #{db}"
184
+ sys(cmd, stdout: db, path: config[:bin])
185
+ rescue CommandFailed => e
186
+ puts <<~MSG
187
+ Could not extract sequences from: #{db}
188
+ Tried: #{cmd}
189
+ stdout: #{e.stdout}
190
+ stderr: #{e.stderr}
191
+ MSG
192
+ exit!
193
+ end
194
+
195
+ # Returns true if the database name appears to be a multi-part database
196
+ # name.
197
+ def multipart_database_name?(db_name)
198
+ Database.multipart_database_name? db_name
199
+ end
200
+
201
+ # Returns true if first character of the file is '>'.
202
+ def probably_fasta?(file)
203
+ File.read(file, 1) == '>'
204
+ end
205
+
206
+ # Suggests improved titles when generating database names from files
207
+ # for improved apperance and readability in web interface.
208
+ # For example:
209
+ # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
210
+ # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
211
+ def make_db_title(db_name)
212
+ db_name.tr!('"', "'")
213
+ # removes .fasta like extension names
214
+ db_name.gsub!(File.extname(db_name), '')
215
+ # replaces _ with ' ',
216
+ db_name.gsub!(/(_)/, ' ')
217
+ # replaces '.' with ' ' when no numbers are on either side,
218
+ db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
219
+ # preserves version numbers
220
+ db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
221
+ db_name
222
+ end
223
+
224
+ # Guess whether FASTA file contains protein or nucleotide sequences by
225
+ # sampling a few few characters of the file.
226
+ def guess_sequence_type_in_fasta(file)
227
+ sequences = sample_sequences(file)
228
+ sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
229
+ sequence_types = sequence_types.uniq.compact
230
+ (sequence_types.length == 1) && sequence_types.first
231
+ end
232
+
233
+ # Read first 1,048,576 characters of the file, split the read text on
234
+ # fasta def line pattern and return.
235
+ #
236
+ # If the given file is FASTA, returns Array of as many different
237
+ # sequences in the portion of the file read. Returns the portion
238
+ # of the file read wrapped in an Array otherwise.
239
+ def sample_sequences(file)
240
+ File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
241
+ end
242
+ end
243
+ end