sequenceserver 2.0.0.beta4 → 2.0.0.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. checksums.yaml +5 -5
  2. data/.dockerignore +1 -0
  3. data/.travis.yml +7 -4
  4. data/AppImage/sequenceserver.sh +5 -0
  5. data/Dockerfile +14 -12
  6. data/bin/sequenceserver +37 -28
  7. data/lib/sequenceserver.rb +35 -7
  8. data/lib/sequenceserver/blast/job.rb +18 -25
  9. data/lib/sequenceserver/blast/report.rb +68 -34
  10. data/lib/sequenceserver/config.rb +1 -1
  11. data/lib/sequenceserver/database.rb +0 -129
  12. data/lib/sequenceserver/makeblastdb.rb +243 -0
  13. data/lib/sequenceserver/routes.rb +28 -2
  14. data/lib/sequenceserver/version.rb +1 -1
  15. data/public/SequenceServer_logo.png +0 -0
  16. data/public/css/grapher.css +8 -15
  17. data/public/css/sequenceserver.css +119 -55
  18. data/public/css/sequenceserver.min.css +3 -3
  19. data/public/js/circos.js +1 -1
  20. data/public/js/download_fasta.js +17 -0
  21. data/public/js/grapher.js +7 -9
  22. data/public/js/hit.js +217 -0
  23. data/public/js/hits_overview.js +12 -13
  24. data/public/js/hsp.js +104 -84
  25. data/public/js/{sequenceserver.js → jquery_world.js} +1 -18
  26. data/public/js/kablammo.js +337 -334
  27. data/public/js/length_distribution.js +1 -1
  28. data/public/js/query.js +147 -0
  29. data/public/js/report.js +216 -836
  30. data/public/js/search.js +194 -192
  31. data/public/js/sequence_modal.js +167 -0
  32. data/public/js/sidebar.js +210 -0
  33. data/public/js/utils.js +2 -19
  34. data/public/js/visualisation_helpers.js +2 -2
  35. data/public/sequenceserver-report.min.js +19 -19
  36. data/public/sequenceserver-search.min.js +11 -11
  37. data/public/vendor/github/twbs/bootstrap@3.3.5/js/bootstrap.js +2 -2
  38. data/spec/blast_versions/blast_2.2.30/import_spec_capybara_local_2.2.30.rb +15 -15
  39. data/spec/blast_versions/blast_2.2.31/import_spec_capybara_local_2.2.31.rb +15 -15
  40. data/spec/blast_versions/blast_2.3.0/import_spec_capybara_local_2.3.0.rb +15 -15
  41. data/spec/blast_versions/blast_2.4.0/import_spec_capybara_local_2.4.0.rb +15 -15
  42. data/spec/blast_versions/blast_2.5.0/import_spec_capybara_local_2.5.0.rb +15 -15
  43. data/spec/blast_versions/blast_2.6.0/import_spec_capybara_local_2.6.0.rb +15 -15
  44. data/spec/blast_versions/blast_2.7.1/import_spec_capybara_local_2.7.1.rb +15 -15
  45. data/spec/blast_versions/blast_2.8.1/import_spec_capybara_local_2.8.1.rb +15 -15
  46. data/spec/blast_versions/blast_2.9.0/import_spec_capybara_local_2.9.0.rb +15 -15
  47. data/spec/blast_versions/diamond_0.9.24/import_spec_capybara_local_0.9.24.rb +6 -6
  48. data/spec/capybara_spec.rb +14 -3
  49. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ndb +0 -0
  50. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
  51. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
  52. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nos +0 -0
  53. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.not +0 -0
  54. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.ntf +0 -0
  55. data/spec/database/sample/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nto +0 -0
  56. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pdb +0 -0
  57. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  58. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  59. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pos +0 -0
  60. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pot +0 -0
  61. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.ptf +0 -0
  62. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pto +0 -0
  63. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pdb +0 -0
  64. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
  65. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
  66. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pos +0 -0
  67. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pot +0 -0
  68. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.ptf +0 -0
  69. data/spec/database/sample/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pto +0 -0
  70. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ndb +0 -0
  71. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  72. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  73. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nos +0 -0
  74. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.not +0 -0
  75. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  76. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.ntf +0 -0
  77. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nto +0 -0
  78. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhd +8 -0
  79. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhi +0 -0
  80. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nhr +0 -0
  81. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nin +0 -0
  82. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nog +0 -0
  83. data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsd +0 -0
  84. data/spec/database/{sample → v4}/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsi +0 -0
  85. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.fasta.nsq +0 -0
  86. data/spec/database/v4/genome/Solenopsis_invicta/Solenopsis_invicta_gnG_subset.txt +8 -0
  87. data/spec/database/v4/links.rb +23 -0
  88. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  89. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phd +1189 -0
  90. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phi +0 -0
  91. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  92. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  93. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pog +0 -0
  94. data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psd +0 -0
  95. data/spec/database/{sample → v4}/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psi +0 -0
  96. data/spec/database/v4/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  97. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phd +9140 -0
  98. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phi +0 -0
  99. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.phr +0 -0
  100. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pin +0 -0
  101. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.pog +0 -0
  102. data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psd +0 -0
  103. data/spec/database/{sample → v4}/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psi +0 -0
  104. data/spec/database/v4/proteins/uniprot/2018-04-Swiss-Prot_insecta.fasta.psq +0 -0
  105. data/spec/database/v4/proteins/uniprot/URL +1 -0
  106. data/spec/database/v4/si_uniprot_idmap.yml +14180 -0
  107. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  108. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhd +473 -0
  109. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhi +0 -0
  110. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  111. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  112. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nog +0 -0
  113. data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsd +0 -0
  114. data/spec/database/{sample → v4}/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsi +0 -0
  115. data/spec/database/v4/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  116. data/spec/database_spec.rb +0 -76
  117. data/spec/makeblastdb_spec.rb +121 -0
  118. data/views/layout.erb +5 -1
  119. metadata +75 -15
@@ -81,7 +81,7 @@ module SequenceServer
81
81
  # otherwise.
82
82
  def parse_config_file
83
83
  unless file? config_file
84
- logger.info "Configuration file not found: #{config_file}"
84
+ logger.debug "Configuration file not found: #{config_file}"
85
85
  return {}
86
86
  end
87
87
  logger.info "Reading configuration file: #{config_file}."
@@ -1,4 +1,3 @@
1
- require 'find'
2
1
  require 'open3'
3
2
  require 'digest/md5'
4
3
  require 'forwardable'
@@ -209,89 +208,6 @@ module SequenceServer
209
208
  end
210
209
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
211
210
 
212
- # Recursively scan `database_dir` for un-formatted FASTA and format them
213
- # for use with BLAST+.
214
- def make_blast_databases
215
- unformatted_fastas.select do |file, sequence_type|
216
- make_blast_database(file, sequence_type)
217
- end
218
- end
219
-
220
- # Returns an Array of FASTA files that may require formatting, and the
221
- # type of sequence contained in each FASTA.
222
- #
223
- # > unformatted_fastas
224
- # => [['/foo/bar.fasta', :nulceotide], ...]
225
- def unformatted_fastas
226
- list = []
227
- database_dir = config[:database_dir]
228
- Find.find database_dir do |file|
229
- next if File.directory? file
230
- next if Database.include? file
231
- next unless probably_fasta? file
232
- sequence_type = guess_sequence_type_in_fasta file
233
- if %i[protein nucleotide].include?(sequence_type)
234
- list << [file, sequence_type]
235
- end
236
- end
237
- list
238
- end
239
-
240
- # Create BLAST database, given FASTA file and sequence type in FASTA file.
241
- def make_blast_database(file, type)
242
- return unless make_blast_database? file, type
243
- title = get_database_title(file)
244
- taxid = fetch_tax_id
245
- _make_blast_database(file, type, title, taxid)
246
- end
247
-
248
- def _make_blast_database(file, type, title, taxid, quiet = false)
249
- cmd = 'makeblastdb -parse_seqids -hash_index ' \
250
- "-in #{file} -dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
251
- " -taxid #{taxid}"
252
- out, err = sys(cmd, path: config[:bin])
253
- puts out, err unless quiet
254
- end
255
-
256
- # Show file path and guessed sequence type to the user and obtain a y/n
257
- # response.
258
- #
259
- # Returns true if the user entered anything but 'n' or 'N'.
260
- def make_blast_database?(file, type)
261
- puts
262
- puts
263
- puts "FASTA file: #{file}"
264
- puts "FASTA type: #{type}"
265
- print 'Proceed? [y/n] (Default: y): '
266
- response = STDIN.gets.to_s.strip
267
- !response.match(/n/i)
268
- end
269
-
270
- # Generate a title for the given database and show it to the user for
271
- # confirmation.
272
- #
273
- # Returns user input if any. Auto-generated title otherwise.
274
- def get_database_title(path)
275
- default = make_db_title(File.basename(path))
276
- print "Enter a database title or will use '#{default}': "
277
- from_user = STDIN.gets.to_s.strip
278
- from_user.empty? && default || from_user
279
- end
280
-
281
- # Get taxid from the user. Returns user input or 0.
282
- #
283
- # Using 0 as taxid is equivalent to not setting taxid for the database
284
- # that will be created.
285
- def fetch_tax_id
286
- default = 0
287
- print 'Enter taxid (optional): '
288
- user_response = STDIN.gets.strip
289
- user_response.empty? && default || Integer(user_response)
290
- rescue
291
- puts 'taxid should be a number'
292
- retry
293
- end
294
-
295
211
  # Returns true if the database name appears to be a multi-part database
296
212
  # name.
297
213
  #
@@ -304,51 +220,6 @@ module SequenceServer
304
220
  def multipart_database_name?(db_name)
305
221
  !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
306
222
  end
307
-
308
- # Returns true if first character of the file is '>'.
309
- def probably_fasta?(file)
310
- File.read(file, 1) == '>'
311
- end
312
-
313
- # Suggests improved titles when generating database names from files
314
- # for improved apperance and readability in web interface.
315
- # For example:
316
- # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
317
- # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
318
- def make_db_title(db_name)
319
- db_name.tr!('"', "'")
320
- # removes .fasta like extension names
321
- db_name.gsub!(File.extname(db_name), '')
322
- # replaces _ with ' ',
323
- db_name.gsub!(/(_)/, ' ')
324
- # replaces '.' with ' ' when no numbers are on either side,
325
- db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
326
- # preserves version numbers
327
- db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
328
- db_name
329
- end
330
-
331
- # Guess whether FASTA file contains protein or nucleotide sequences based
332
- # on first 32768 characters.
333
- #
334
- # NOTE: 2^15 == 32786. Approximately 546 lines, assuming 60 characters on
335
- # each line.
336
- def guess_sequence_type_in_fasta(file)
337
- sequences = sample_sequences(file)
338
- sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
339
- sequence_types = sequence_types.uniq.compact
340
- (sequence_types.length == 1) && sequence_types.first
341
- end
342
-
343
- # Read first 32768 characters of the file. Split on fasta def line
344
- # pattern and return.
345
- #
346
- # If the given file is FASTA, returns Array of as many different
347
- # sequences in the portion of the file read. Returns the portion
348
- # of the file read wrapped in an Array otherwise.
349
- def sample_sequences(file)
350
- File.read(file, 32_768).split(/^>.+$/).delete_if(&:empty?)
351
- end
352
223
  end
353
224
  end
354
225
  end
@@ -0,0 +1,243 @@
1
+ require 'find'
2
+ require 'forwardable'
3
+
4
+ module SequenceServer
5
+ # Smart `makeblastdb` wrapper: recursively scans database directory determining
6
+ # which files need to be formatted or re-formatted.
7
+ #
8
+ # Example usage:
9
+ #
10
+ # makeblastdb = MAKEBLASTDB.new(database_dir)
11
+ # makeblastdb.scan && makeblastdb.run
12
+ #
13
+ class MAKEBLASTDB
14
+ # We want V5 databases created using -parse_seqids for proper function of
15
+ # SequenceServer. This means each database should be comprised of at least 9
16
+ # files with the following extensions. Databases created by us will have two
17
+ # additional files with the extensions nhd and nhi, or phd and phi, due to
18
+ # the use of -hash_index option. Finally, multipart databases will have one
19
+ # additional file with the extension nal or pal.
20
+ REQUIRED_EXTENSIONS = {
21
+ 'nucleotide' => %w{ndb nhr nin nog nos not nsq ntf nto}.freeze,
22
+ 'protein' => %w{pdb phr pin pog pos pot psq ptf pto}.freeze
23
+ }
24
+
25
+ extend Forwardable
26
+
27
+ def_delegators SequenceServer, :config, :sys
28
+
29
+ def initialize(database_dir)
30
+ @database_dir = database_dir
31
+ end
32
+
33
+ attr_reader :database_dir
34
+
35
+ # Scans the database directory to determine which FASTA files require
36
+ # formatting or re-formatting.
37
+ #
38
+ # Returns `true` if there are files to (re-)format, `false` otherwise.
39
+ def scan
40
+ # We need to know the list of formatted FASTAs as reported by blastdbcmd
41
+ # first. This is required to determine both unformatted FASTAs and those
42
+ # that require reformatting.
43
+ @formatted_fastas = []
44
+ determine_formatted_fastas
45
+
46
+ # Now determine FASTA files that are unformatted or require reformatting.
47
+ @fastas_to_format = []
48
+ determine_unformatted_fastas
49
+ determine_fastas_to_reformat
50
+
51
+ # Return true if there are files to be (re-)formatted or false otherwise.
52
+ !@fastas_to_format.empty?
53
+ end
54
+
55
+ # Runs makeblastdb on each file in `@fastas_to_format`. Will do nothing
56
+ # unless `#scan` has been run before.
57
+ def run
58
+ return unless @fastas_to_format || @fastas_to_format.empty?
59
+ @fastas_to_format.each do |path, title, type|
60
+ make_blast_database(path, title, type)
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ # Determines which FASTA files in the database directory are already
67
+ # formatted. Adds to @formatted_fastas.
68
+ def determine_formatted_fastas
69
+ blastdbcmd.each_line do |line|
70
+ path, title, type = line.split(' ')
71
+ next if multipart_database_name?(path)
72
+ @formatted_fastas << [path, title, type.strip.downcase]
73
+ end
74
+ end
75
+
76
+ # Determines which FASTA files in the database directory require
77
+ # reformatting. Adds to @fastas_to_format.
78
+ def determine_fastas_to_reformat
79
+ @formatted_fastas.each do |path, title, type|
80
+ required_extensions = REQUIRED_EXTENSIONS[type]
81
+ exts = Dir["#{path}.*"].map { |p| p.split('.').last }.sort
82
+ next if (exts & required_extensions) == required_extensions
83
+
84
+ @fastas_to_format << [path, title, type]
85
+ end
86
+ end
87
+
88
+ # Determines which FASTA files in the database directory are
89
+ # unformatted. Adds to @fastas_to_format.
90
+ def determine_unformatted_fastas
91
+ Find.find(database_dir) do |path|
92
+ next if File.directory?(path)
93
+ next unless probably_fasta?(path)
94
+ next if @formatted_fastas.any? { |f| f[0] == path }
95
+
96
+ @fastas_to_format << [path,
97
+ make_db_title(File.basename(path)),
98
+ guess_sequence_type_in_fasta(path)]
99
+ end
100
+ end
101
+
102
+ # Runs `blastdbcmd` to determine formatted FASTA files in the database
103
+ # directory. Returns the output of `blastdbcmd`. This method is called
104
+ # by `determine_formatted_fastas`.
105
+ def blastdbcmd
106
+ cmd = "blastdbcmd -recursive -list #{database_dir}" \
107
+ ' -list_outfmt "%f %t %p"'
108
+ out, _ = sys(cmd, path: config[:bin])
109
+ out
110
+ end
111
+
112
+ # Create BLAST database, given FASTA file and sequence type in FASTA file.
113
+ def make_blast_database(file, title, type)
114
+ return unless make_blast_database? file, type
115
+ title = confirm_database_title(title)
116
+ taxid = fetch_tax_id
117
+ _make_blast_database(file, type, title, taxid)
118
+ end
119
+
120
+ def _make_blast_database(file, type, title, taxid)
121
+ extract_fasta(file) unless File.exist?(file)
122
+ cmd = "makeblastdb -parse_seqids -hash_index -in #{file} " \
123
+ "-dbtype #{type.to_s.slice(0, 4)} -title '#{title}'" \
124
+ " -taxid #{taxid}"
125
+ out, err = sys(cmd, path: config[:bin])
126
+ puts out.strip
127
+ puts err.strip
128
+ rescue CommandFailed => e
129
+ puts <<~MSG
130
+ Could not create BLAST database for: #{file}
131
+ Tried: #{cmd}
132
+ stdout: #{e.stdout}
133
+ stderr: #{e.stderr}
134
+ MSG
135
+ exit!
136
+ end
137
+
138
+ # Show file path and guessed sequence type to the user and obtain a y/n
139
+ # response.
140
+ #
141
+ # Returns true if the user entered anything but 'n' or 'N'.
142
+ def make_blast_database?(file, type)
143
+ puts
144
+ puts
145
+ puts "FASTA file to format/reformat: #{file}"
146
+ puts "FASTA type: #{type}"
147
+ print 'Proceed? [y/n] (Default: y): '
148
+ response = STDIN.gets.to_s.strip
149
+ !response.match(/n/i)
150
+ end
151
+
152
+ # Show the database title that we are going to use to the user for
153
+ # confirmation.
154
+ #
155
+ # Returns user input if any. Auto-determined title otherwise.
156
+ def confirm_database_title(default)
157
+ print "Enter a database title or will use '#{default}': "
158
+ from_user = STDIN.gets.to_s.strip
159
+ from_user.empty? && default || from_user
160
+ end
161
+
162
+ # Get taxid from the user. Returns user input or 0.
163
+ #
164
+ # Using 0 as taxid is equivalent to not setting taxid for the database
165
+ # that will be created.
166
+ def fetch_tax_id
167
+ default = 0
168
+ print 'Enter taxid (optional): '
169
+ user_response = STDIN.gets.strip
170
+ user_response.empty? && default || Integer(user_response)
171
+ rescue
172
+ puts 'taxid should be a number'
173
+ retry
174
+ end
175
+
176
+ # Extract FASTA file from BLAST database.
177
+ #
178
+ # Invoked while reformatting a BLAST database if the corresponding
179
+ # FASTA file does not exist.
180
+ def extract_fasta(db)
181
+ puts
182
+ puts 'Extracting sequences ...'
183
+ cmd = "blastdbcmd -entry all -db #{db}"
184
+ sys(cmd, stdout: db, path: config[:bin])
185
+ rescue CommandFailed => e
186
+ puts <<~MSG
187
+ Could not extract sequences from: #{db}
188
+ Tried: #{cmd}
189
+ stdout: #{e.stdout}
190
+ stderr: #{e.stderr}
191
+ MSG
192
+ exit!
193
+ end
194
+
195
+ # Returns true if the database name appears to be a multi-part database
196
+ # name.
197
+ def multipart_database_name?(db_name)
198
+ Database.multipart_database_name? db_name
199
+ end
200
+
201
+ # Returns true if first character of the file is '>'.
202
+ def probably_fasta?(file)
203
+ File.read(file, 1) == '>'
204
+ end
205
+
206
+ # Suggests improved titles when generating database names from files
207
+ # for improved apperance and readability in web interface.
208
+ # For example:
209
+ # Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
210
+ # S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
211
+ def make_db_title(db_name)
212
+ db_name.tr!('"', "'")
213
+ # removes .fasta like extension names
214
+ db_name.gsub!(File.extname(db_name), '')
215
+ # replaces _ with ' ',
216
+ db_name.gsub!(/(_)/, ' ')
217
+ # replaces '.' with ' ' when no numbers are on either side,
218
+ db_name.gsub!(/(\D)\.(?=\D)/, '\1 ')
219
+ # preserves version numbers
220
+ db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
221
+ db_name
222
+ end
223
+
224
+ # Guess whether FASTA file contains protein or nucleotide sequences by
225
+ # sampling a few few characters of the file.
226
+ def guess_sequence_type_in_fasta(file)
227
+ sequences = sample_sequences(file)
228
+ sequence_types = sequences.map { |seq| Sequence.guess_type(seq) }
229
+ sequence_types = sequence_types.uniq.compact
230
+ (sequence_types.length == 1) && sequence_types.first
231
+ end
232
+
233
+ # Read first 1,048,576 characters of the file, split the read text on
234
+ # fasta def line pattern and return.
235
+ #
236
+ # If the given file is FASTA, returns Array of as many different
237
+ # sequences in the portion of the file read. Returns the portion
238
+ # of the file read wrapped in an Array otherwise.
239
+ def sample_sequences(file)
240
+ File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
241
+ end
242
+ end
243
+ end
@@ -69,10 +69,16 @@ module SequenceServer
69
69
  # include available databases and user-defined search options.
70
70
  get '/searchdata.json' do
71
71
  searchdata = {
72
+ query: Database.retrieve(params[:query]),
72
73
  database: Database.all,
73
- options: SequenceServer.config[:options],
74
- query: Database.retrieve(params[:query])
74
+ options: SequenceServer.config[:options]
75
75
  }
76
+
77
+ # If a job_id is specified, update searchdata from job meta data (i.e.,
78
+ # query, pre-selected databases, advanced options used). Query is only
79
+ # updated if params[:query] is not specified.
80
+ update_searchdata_from_job(searchdata) if params[:job_id]
81
+
76
82
  searchdata.to_json
77
83
  end
78
84
 
@@ -180,5 +186,25 @@ module SequenceServer
180
186
 
181
187
  error_data.to_json
182
188
  end
189
+
190
+ # Get the query sequences, selected databases, and advanced params used.
191
+ def update_searchdata_from_job(searchdata)
192
+ job = Job.fetch(params[:job_id])
193
+ return if job.imported_xml_file
194
+
195
+ # Only read job.qfile if we are not going to use Database.retrieve.
196
+ searchdata[:query] = File.read(job.qfile) if !params[:query]
197
+
198
+ # Which databases to pre-select.
199
+ searchdata[:preSelectedDbs] = job.databases
200
+
201
+ # job.advanced may be nil in case of old jobs. In this case, we do not
202
+ # override searchdata so that default advanced parameters can be applied.
203
+ # Note that, job.advanced will be an empty string if a user deletes the
204
+ # default advanced parameters from the advanced params input field. In
205
+ # this case, we do want the advanced params input field to be empty when
206
+ # the user hits the back button. Thus we do not test for empty string.
207
+ searchdata[:options][job.method] = [job.advanced] if job.advanced
208
+ end
183
209
  end
184
210
  end
@@ -1,4 +1,4 @@
1
1
  # Define version number.
2
2
  module SequenceServer
3
- VERSION = '2.0.0.beta4'.freeze
3
+ VERSION = '2.0.0.rc5'.freeze
4
4
  end