sequenceserver 2.0.0.beta4 → 2.0.0.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +5 -5
  2. data/.dockerignore +1 -0
  3. data/.travis.yml +7 -4
  4. data/AppImage/sequenceserver.sh +5 -0
  5. data/Dockerfile +14 -12
  6. data/bin/sequenceserver +37 -28
  7. data/lib/sequenceserver.rb +35 -7
  8. data/lib/sequenceserver/blast/job.rb +18 -25
  9. data/lib/sequenceserver/blast/report.rb +68 -34
  10. data/lib/sequenceserver/config.rb +1 -1
  11. data/lib/sequenceserver/database.rb +0 -129
  12. data/lib/sequenceserver/makeblastdb.rb +243 -0
  13. data/lib/sequenceserver/routes.rb +28 -2
  14. data/lib/sequenceserver/version.rb +1 -1
  15. data/public/SequenceServer_logo.png +0 -0
  16. data/public/css/grapher.css +8 -15
  17. data/public/css/sequenceserver.css +119 -55
  18. data/public/css/sequenceserver.min.css +3 -3
  19. data/public/js/circos.js +1 -1
  20. data/public/js/download_fasta.js +17 -0
  21. data/public/js/grapher.js +7 -9
  22. data/public/js/hit.js +217 -0
  23. data/public/js/hits_overview.js +12 -13
  24. data/public/js/hsp.js +104 -84
  25. data/public/js/{sequenceserver.js → jquery_world.js} +1 -18
  26. data/public/js/kablammo.js +337 -334
  27. data/public/js/length_distribution.js +1 -1
  28. data/public/js/query.js +147 -0
  29. data/public/js/report.js +216 -836
  30. data/public/js/search.js +194 -192
  31. data/public/js/sequence_modal.js +167 -0
  32. data/public/js/sidebar.js +210 -0
  33. data/public/js/utils.js +2 -19
  34. data/public/js/visualisation_helpers.js +2 -2
  35. data/public/sequenceserver-report.min.js +19 -19
  36. data/public/sequenceserver-search.min.js +11 -11
  37. data/public/vendor/github/twbs/bootstrap@3.3.5/js/bootstrap.js +2 -2
  38. data/spec/blast_versions/blast_2.2.30/import_spec_capybara_local_2.2.30.rb +15 -15
  39. data/spec/blast_versions/blast_2.2.31/import_spec_capybara_local_2.2.31.rb +15 -15
  40. data/spec/blast_versions/blast_2.3.0/import_spec_capybara_local_2.3.0.rb +15 -15
  41. data/spec/blast_versions/blast_2.4.0/import_spec_capybara_local_2.4.0.rb +15 -15
  42. data/spec/blast_versions/blast_2.5.0/import_spec_capybara_local_2.5.0.rb +15 -15
  43. data/spec/blast_versions/blast_2.6.0/import_spec_capybara_local_2.6.0.rb +15 -15
  44. data/spec/blast_versions/blast_2.7.1/import_spec_capybara_local_2.7.1.rb +15 -15
  45. data/spec/blast_versions/blast_2.8.1/import_spec_capybara_local_2.8.1.rb +15 -15
  46. data/spec/blast_versions/blast_2.9.0/import_spec_capybara_local_2.9.0.rb +15 -15
  47. data/spec/blast_versions/diamond_0.9.24/import_spec_capybara_local_0.9.24.rb +6 -6
  48. data/spec/capybara_spec.rb +14 -3
  49. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ndb +0 -0
  50. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
  51. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
  52. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nos +0 -0
  53. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.not +0 -0
  54. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ntf +0 -0
  55. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nto +0 -0
  56. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pdb +0 -0
  57. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  58. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  59. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pos +0 -0
  60. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pot +0 -0
  61. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.ptf +0 -0
  62. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pto +0 -0
  63. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pdb +0 -0
  64. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
  65. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
  66. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pos +0 -0
  67. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pot +0 -0
  68. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.ptf +0 -0
  69. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pto +0 -0
  70. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ndb +0 -0
  71. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  72. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  73. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nos +0 -0
  74. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.not +0 -0
  75. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  76. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ntf +0 -0
  77. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nto +0 -0
  78. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhd +8 -0
  79. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhi +0 -0
  80. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
  81. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
  82. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nog +0 -0
  83. data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsd +0 -0
  84. data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsi +0 -0
  85. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsq +0 -0
  86. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.txt +8 -0
  87. data/spec/database/v4/links.rb +23 -0
  88. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  89. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phd +1189 -0
  90. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phi +0 -0
  91. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  92. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  93. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pog +0 -0
  94. data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psd +0 -0
  95. data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psi +0 -0
  96. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  97. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phd +9140 -0
  98. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phi +0 -0
  99. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
  100. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
  101. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pog +0 -0
  102. data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psd +0 -0
  103. data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psi +0 -0
  104. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psq +0 -0
  105. data/spec/database/v4/proteins/uniprot/URL +1 -0
  106. data/spec/database/v4/si_uniprot_idmap.yml +14180 -0
  107. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  108. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhd +473 -0
  109. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhi +0 -0
  110. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  111. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  112. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nog +0 -0
  113. data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsd +0 -0
  114. data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsi +0 -0
  115. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  116. data/spec/database_spec.rb +0 -76
  117. data/spec/makeblastdb_spec.rb +121 -0
  118. data/views/layout.erb +5 -1
  119. metadata +75 -15
@@ -81,7 +81,7 @@ module SequenceServer
81
81
  # otherwise.
82
82
  def parse_config_file
83
83
  unless file? config_file
84
- logger.info "Configuration file not found: #{config_file}"
84
+ logger.debug "Configuration file not found: #{config_file}"
85
85
  return {}
86
86
  end
87
87
  logger.info "Reading configuration file: #{config_file}."
@@ -1,4 +1,3 @@
1
- require 'find'
2
1
  require 'open3'
3
2
  require 'digest/md5'
4
3
  require 'forwardable'
@@ -209,89 +208,6 @@ module SequenceServer
209
208
  end
210
209
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
211
210
 
212
- # Recursively scan `database_dir` for un-formatted FASTA and format them
213
- # for use with BLAST+.
214
- def make_blast_databases
215
- unformatted_fastas.select do |file, sequence_type|
216
- make_blast_database(file, sequence_type)
217
- end
218
- end
219
-
220
- # Returns an Array of FASTA files that may require formatting, and the
221
- # type of sequence contained in each FASTA.
222
- #
223
- # > unformatted_fastas
224
- # => [['/foo/bar.fasta', :nulceotide], ...]
225
- def unformatted_fastas
226
- list = []
227
- database_dir = config[:database_dir]
228
- Find.find database_dir do |file|
229
- next if File.directory? file
230
- next if Database.include? file
231
- next unless probably_fasta? file
232
- sequence_type = guess_sequence_type_in_fasta file
233
- if %i[protein nucleotide].include?(sequence_type)
234
- list << [file, sequence_type]
235
- end
236
- end
237
- list
238
- end
239
-
240
- # Create BLAST database, given FASTA file and sequence type in FASTA file.
241
- def make_blast_database(file, type)
242
- return unless make_blast_database? file, type
243
- title = get_database_title(file)
244
- taxid = fetch_tax_id
245
- _make_blast_database(file, type, title, taxid)
246
- end
247
-
248
- def _make_blast_database(file, type, title, taxid, quiet = false)
249
- cmd = 'makeblastdb -parse_seqids -hash_index ' \
250
- "-in #{file} -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
251
- " -taxid #{taxid}"
252
- out, err = sys(cmd, path: config[:bin])
253
- puts out, err unless quiet
254
- end
255
-
256
- # Show file path and guessed sequence type to the user and obtain a y/n
257
- # response.
258
- #
259
- # Returns true if the user entered anything but 'n' or 'N'.
260
- def make_blast_database?(file, type)
261
- puts
262
- puts
263
- puts "FASTA file: #{file}"
264
- puts "FASTA type: #{type}"
265
- print 'Proceed? [y/n] (Default: y): '
266
- response = STDIN.gets.to_s.strip
267
- !response.match(/n/i)
268
- end
269
-
270
- # Generate a title for the given database and show it to the user for
271
- # confirmation.
272
- #
273
- # Returns user input if any. Auto-generated title otherwise.
274
- def get_database_title(path)
275
- default = make_db_title(File.basename(path))
276
- print "Enter a database title or will use '#{default}': "
277
- from_user = STDIN.gets.to_s.strip
278
- from_user.empty? && default || from_user
279
- end
280
-
281
- # Get taxid from the user. Returns user input or 0.
282
- #
283
- # Using 0 as taxid is equivalent to not setting taxid for the database
284
- # that will be created.
285
- def fetch_tax_id
286
- default = 0
287
- print 'Enter taxid (optional): '
288
- user_response = STDIN.gets.strip
289
- user_response.empty? && default || Integer(user_response)
290
- rescue
291
- puts 'taxid should be a number'
292
- retry
293
- end
294
-
295
211
  # Returns true if the database name appears to be a multi-part database
296
212
  # name.
297
213
  #
@@ -304,51 +220,6 @@ module SequenceServer
304
220
  def multipart_database_name?(db_name)
305
221
  !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
306
222
  end
307
-
308
- # Returns true if first character of the file is '>'.
309
- def probably_fasta?(file)
310
- File.read(file, 1) == '>'
311
- end
312
-
313
- # Suggests improved titles when generating database names from files
314
- # for improved apperance and readability in web interface.
315
- # For example:
316
- # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
317
- # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
318
- def make_db_title(db_name)
319
- db_name.tr!('"', "'")
320
- # removes .fasta like extension names
321
- db_name.gsub!(File.extname(db_name), '')
322
- # replaces _ with ' ',
323
- db_name.gsub!(/(_)/, ' ')
324
- # replaces '.' with ' ' when no numbers are on either side,
325
- db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
326
- # preserves version numbers
327
- db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
328
- db_name
329
- end
330
-
331
- # Guess whether FASTA file contains protein or nucleotide sequences based
332
- # on first 32768 characters.
333
- #
334
- # NOTE: 2^15 == 32786. Approximately 546 lines, assuming 60 characters on
335
- # each line.
336
- def guess_sequence_type_in_fasta(file)
337
- sequences = sample_sequences(file)
338
- sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
339
- sequence_types = sequence_types.uniq.compact
340
- (sequence_types.length == 1) && sequence_types.first
341
- end
342
-
343
- # Read first 32768 characters of the file. Split on fasta def line
344
- # pattern and return.
345
- #
346
- # If the given file is FASTA, returns Array of as many different
347
- # sequences in the portion of the file read. Returns the portion
348
- # of the file read wrapped in an Array otherwise.
349
- def sample_sequences(file)
350
- File.read(file, 32_768).split(/^>.+$/).delete_if(&:empty?)
351
- end
352
223
  end
353
224
  end
354
225
  end
@@ -0,0 +1,243 @@
1
+ require 'find'
2
+ require 'forwardable'
3
+
4
+ module SequenceServer
5
+ # Smart `makeblastdb` wrapper: recursively scans database directory determining
6
+ # which files need to be formatted or re-formatted.
7
+ #
8
+ # Example usage:
9
+ #
10
+ # makeblastdb = MAKEBLASTDB.new(database_dir)
11
+ # makeblastdb.scan && makeblastdb.run
12
+ #
13
+ class MAKEBLASTDB
14
+ # We want V5 databases created using -parse_seqids for proper function of
15
+ # SequenceServer. This means each database should be comprised of at least 9
16
+ # files with the following extensions. Databases created by us will have two
17
+ # additional files with the extensions nhd and nhi, or phd and phi, due to
18
+ # the use of -hash_index option. Finally, multipart databases will have one
19
+ # additional file with the extension nal or pal.
20
+ REQUIRED_EXTENSIONS = {
21
+ 'nucleotide' => %w{ndb nhr nin nog nos not nsq ntf nto}.freeze,
22
+ 'protein' => %w{pdb phr pin pog pos pot psq ptf pto}.freeze
23
+ }
24
+
25
+ extend Forwardable
26
+
27
+ def_delegators SequenceServer, :config, :sys
28
+
29
+ def initialize(database_dir)
30
+ @database_dir = database_dir
31
+ end
32
+
33
+ attr_reader :database_dir
34
+
35
+ # Scans the database directory to determine which FASTA files require
36
+ # formatting or re-formatting.
37
+ #
38
+ # Returns `true` if there are files to (re-)format, `false` otherwise.
39
+ def scan
40
+ # We need to know the list of formatted FASTAs as reported by blastdbcmd
41
+ # first. This is required to determine both unformatted FASTAs and those
42
+ # that require reformatting.
43
+ @formatted_fastas = []
44
+ determine_formatted_fastas
45
+
46
+ # Now determine FASTA files that are unformatted or require reformatting.
47
+ @fastas_to_format = []
48
+ determine_unformatted_fastas
49
+ determine_fastas_to_reformat
50
+
51
+ # Return true if there are files to be (re-)formatted or false otherwise.
52
+ !@fastas_to_format.empty?
53
+ end
54
+
55
+ # Runs makeblastdb on each file in `@fastas_to_format`. Will do nothing
56
+ # unless `#scan` has been run before.
57
+ def run
58
+ return unless @fastas_to_format || @fastas_to_format.empty?
59
+ @fastas_to_format.each do |path, title, type|
60
+ make_blast_database(path, title, type)
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ # Determines which FASTA files in the database directory are already
67
+ # formatted. Adds to @formatted_fastas.
68
+ def determine_formatted_fastas
69
+ blastdbcmd.each_line do |line|
70
+ path, title, type = line.split(' ')
71
+ next if multipart_database_name?(path)
72
+ @formatted_fastas << [path, title, type.strip.downcase]
73
+ end
74
+ end
75
+
76
+ # Determines which FASTA files in the database directory require
77
+ # reformatting. Adds to @fastas_to_format.
78
+ def determine_fastas_to_reformat
79
+ @formatted_fastas.each do |path, title, type|
80
+ required_extensions = REQUIRED_EXTENSIONS[type]
81
+ exts = Dir["#{path}.*"].map { |p| p.split('.').last }.sort
82
+ next if (exts & required_extensions) == required_extensions
83
+
84
+ @fastas_to_format << [path, title, type]
85
+ end
86
+ end
87
+
88
+ # Determines which FASTA files in the database directory are
89
+ # unformatted. Adds to @fastas_to_format.
90
+ def determine_unformatted_fastas
91
+ Find.find(database_dir) do |path|
92
+ next if File.directory?(path)
93
+ next unless probably_fasta?(path)
94
+ next if @formatted_fastas.any? { |f| f[0] == path }
95
+
96
+ @fastas_to_format << [path,
97
+ make_db_title(File.basename(path)),
98
+ guess_sequence_type_in_fasta(path)]
99
+ end
100
+ end
101
+
102
+ # Runs `blastdbcmd` to determine formatted FASTA files in the database
103
+ # directory. Returns the output of `blastdbcmd`. This method is called
104
+ # by `determine_formatted_fastas`.
105
+ def blastdbcmd
106
+ cmd = "blastdbcmd -recursive -list #{database_dir}" \
107
+ ' -list_outfmt "%f %t %p"'
108
+ out, _ = sys(cmd, path: config[:bin])
109
+ out
110
+ end
111
+
112
+ # Create BLAST database, given FASTA file and sequence type in FASTA file.
113
+ def make_blast_database(file, title, type)
114
+ return unless make_blast_database? file, type
115
+ title = confirm_database_title(title)
116
+ taxid = fetch_tax_id
117
+ _make_blast_database(file, type, title, taxid)
118
+ end
119
+
120
+ def _make_blast_database(file, type, title, taxid)
121
+ extract_fasta(file) unless File.exist?(file)
122
+ cmd = "makeblastdb -parse_seqids -hash_index -in #{file} " \
123
+ "-dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
124
+ " -taxid #{taxid}"
125
+ out, err = sys(cmd, path: config[:bin])
126
+ puts out.strip
127
+ puts err.strip
128
+ rescue CommandFailed => e
129
+ puts <<~MSG
130
+ Could not create BLAST database for: #{file}
131
+ Tried: #{cmd}
132
+ stdout: #{e.stdout}
133
+ stderr: #{e.stderr}
134
+ MSG
135
+ exit!
136
+ end
137
+
138
+ # Show file path and guessed sequence type to the user and obtain a y/n
139
+ # response.
140
+ #
141
+ # Returns true if the user entered anything but 'n' or 'N'.
142
+ def make_blast_database?(file, type)
143
+ puts
144
+ puts
145
+ puts "FASTA file to format/reformat: #{file}"
146
+ puts "FASTA type: #{type}"
147
+ print 'Proceed? [y/n] (Default: y): '
148
+ response = STDIN.gets.to_s.strip
149
+ !response.match(/n/i)
150
+ end
151
+
152
+ # Show the database title that we are going to use to the user for
153
+ # confirmation.
154
+ #
155
+ # Returns user input if any. Auto-determined title otherwise.
156
+ def confirm_database_title(default)
157
+ print "Enter a database title or will use '#{default}': "
158
+ from_user = STDIN.gets.to_s.strip
159
+ from_user.empty? && default || from_user
160
+ end
161
+
162
+ # Get taxid from the user. Returns user input or 0.
163
+ #
164
+ # Using 0 as taxid is equivalent to not setting taxid for the database
165
+ # that will be created.
166
+ def fetch_tax_id
167
+ default = 0
168
+ print 'Enter taxid (optional): '
169
+ user_response = STDIN.gets.strip
170
+ user_response.empty? && default || Integer(user_response)
171
+ rescue
172
+ puts 'taxid should be a number'
173
+ retry
174
+ end
175
+
176
+ # Extract FASTA file from BLAST database.
177
+ #
178
+ # Invoked while reformatting a BLAST database if the corresponding
179
+ # FASTA file does not exist.
180
+ def extract_fasta(db)
181
+ puts
182
+ puts 'Extracting sequences ...'
183
+ cmd = "blastdbcmd -entry all -db #{db}"
184
+ sys(cmd, stdout: db, path: config[:bin])
185
+ rescue CommandFailed => e
186
+ puts <<~MSG
187
+ Could not extract sequences from: #{db}
188
+ Tried: #{cmd}
189
+ stdout: #{e.stdout}
190
+ stderr: #{e.stderr}
191
+ MSG
192
+ exit!
193
+ end
194
+
195
+ # Returns true if the database name appears to be a multi-part database
196
+ # name.
197
+ def multipart_database_name?(db_name)
198
+ Database.multipart_database_name? db_name
199
+ end
200
+
201
+ # Returns true if first character of the file is '>'.
202
+ def probably_fasta?(file)
203
+ File.read(file, 1) == '>'
204
+ end
205
+
206
+ # Suggests improved titles when generating database names from files
207
+ # for improved apperance and readability in web interface.
208
+ # For example:
209
+ # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
210
+ # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
211
+ def make_db_title(db_name)
212
+ db_name.tr!('"', "'")
213
+ # removes .fasta like extension names
214
+ db_name.gsub!(File.extname(db_name), '')
215
+ # replaces _ with ' ',
216
+ db_name.gsub!(/(_)/, ' ')
217
+ # replaces '.' with ' ' when no numbers are on either side,
218
+ db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
219
+ # preserves version numbers
220
+ db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
221
+ db_name
222
+ end
223
+
224
+ # Guess whether FASTA file contains protein or nucleotide sequences by
225
+ # sampling a few few characters of the file.
226
+ def guess_sequence_type_in_fasta(file)
227
+ sequences = sample_sequences(file)
228
+ sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
229
+ sequence_types = sequence_types.uniq.compact
230
+ (sequence_types.length == 1) && sequence_types.first
231
+ end
232
+
233
+ # Read first 1,048,576 characters of the file, split the read text on
234
+ # fasta def line pattern and return.
235
+ #
236
+ # If the given file is FASTA, returns Array of as many different
237
+ # sequences in the portion of the file read. Returns the portion
238
+ # of the file read wrapped in an Array otherwise.
239
+ def sample_sequences(file)
240
+ File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
241
+ end
242
+ end
243
+ end
@@ -69,10 +69,16 @@ module SequenceServer
69
69
  # include available databases and user-defined search options.
70
70
  get '/searchdata.json' do
71
71
  searchdata = {
72
+ query: Database.retrieve(params[:query]),
72
73
  database: Database.all,
73
- options: SequenceServer.config[:options],
74
- query: Database.retrieve(params[:query])
74
+ options: SequenceServer.config[:options]
75
75
  }
76
+
77
+ # If a job_id is specified, update searchdata from job meta data (i.e.,
78
+ # query, pre-selected databases, advanced options used). Query is only
79
+ # updated if params[:query] is not specified.
80
+ update_searchdata_from_job(searchdata) if params[:job_id]
81
+
76
82
  searchdata.to_json
77
83
  end
78
84
 
@@ -180,5 +186,25 @@ module SequenceServer
180
186
 
181
187
  error_data.to_json
182
188
  end
189
+
190
+ # Get the query sequences, selected databases, and advanced params used.
191
+ def update_searchdata_from_job(searchdata)
192
+ job = Job.fetch(params[:job_id])
193
+ return if job.imported_xml_file
194
+
195
+ # Only read job.qfile if we are not going to use Database.retrieve.
196
+ searchdata[:query] = File.read(job.qfile) if !params[:query]
197
+
198
+ # Which databases to pre-select.
199
+ searchdata[:preSelectedDbs] = job.databases
200
+
201
+ # job.advanced may be nil in case of old jobs. In this case, we do not
202
+ # override searchdata so that default advanced parameters can be applied.
203
+ # Note that, job.advanced will be an empty string if a user deletes the
204
+ # default advanced parameters from the advanced params input field. In
205
+ # this case, we do want the advanced params input field to be empty when
206
+ # the user hits the back button. Thus we do not test for empty string.
207
+ searchdata[:options][job.method] = [job.advanced] if job.advanced
208
+ end
183
209
  end
184
210
  end
@@ -1,4 +1,4 @@
1
1
  # Define version number.
2
2
  module SequenceServer
3
- VERSION = '2.0.0.beta4'.freeze
3
+ VERSION = '2.0.0.rc5'.freeze
4
4
  end