sequenceserver 3.0.1 → 3.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/bin/sequenceserver +2 -2
  3. data/lib/sequenceserver/api_errors.rb +56 -2
  4. data/lib/sequenceserver/blast/job.rb +20 -3
  5. data/lib/sequenceserver/blast/report.rb +74 -86
  6. data/lib/sequenceserver/blast/tasks.rb +38 -0
  7. data/lib/sequenceserver/blast.rb +6 -0
  8. data/lib/sequenceserver/config.rb +54 -20
  9. data/lib/sequenceserver/database.rb +13 -0
  10. data/lib/sequenceserver/makeblastdb.rb +16 -2
  11. data/lib/sequenceserver/report.rb +0 -6
  12. data/lib/sequenceserver/routes.rb +66 -25
  13. data/lib/sequenceserver/sequence.rb +34 -7
  14. data/lib/sequenceserver/server.rb +1 -1
  15. data/lib/sequenceserver/version.rb +1 -1
  16. data/lib/sequenceserver.rb +1 -1
  17. data/public/404.html +1 -1
  18. data/public/css/app.css +121 -0
  19. data/public/css/app.min.css +1 -0
  20. data/public/css/sequenceserver.css +0 -148
  21. data/public/css/sequenceserver.min.css +3 -3
  22. data/public/js/circos.js +2 -2
  23. data/public/js/collapse_preferences.js +37 -0
  24. data/public/js/databases.js +65 -37
  25. data/public/js/databases_tree.js +2 -1
  26. data/public/js/dnd.js +37 -50
  27. data/public/js/download_fasta.js +1 -0
  28. data/public/js/form.js +79 -50
  29. data/public/js/grapher.js +23 -37
  30. data/public/js/hits_overview.js +2 -2
  31. data/public/js/kablammo.js +2 -2
  32. data/public/js/length_distribution.js +3 -3
  33. data/public/js/null_plugins/grapher/histogram.js +25 -0
  34. data/public/js/null_plugins/options.js +3 -0
  35. data/public/js/null_plugins/query_stats.js +11 -0
  36. data/public/js/null_plugins/report_plugins.js +6 -1
  37. data/public/js/null_plugins/search_header_plugin.js +4 -0
  38. data/public/js/options.js +161 -56
  39. data/public/js/query.js +85 -59
  40. data/public/js/report.js +1 -1
  41. data/public/js/search.js +2 -0
  42. data/public/js/search_button.js +67 -56
  43. data/public/js/sidebar.js +10 -1
  44. data/public/js/tests/database.spec.js +5 -5
  45. data/public/js/tests/form.spec.js +98 -0
  46. data/public/js/tests/mock_data/databases.json +5 -5
  47. data/public/js/tests/mocks/circos.js +6 -0
  48. data/public/js/tests/report.spec.js +4 -3
  49. data/public/js/tests/search_query.spec.js +16 -6
  50. data/public/sequenceserver-report.min.js +46 -24
  51. data/public/sequenceserver-search.min.js +57 -13
  52. data/public/sequenceserver_logo.webp +0 -0
  53. data/views/blastn_options.erb +66 -66
  54. data/views/blastp_options.erb +59 -59
  55. data/views/blastx_options.erb +68 -68
  56. data/views/layout.erb +61 -3
  57. data/views/search.erb +33 -38
  58. data/views/search_layout.erb +153 -0
  59. data/views/tblastn_options.erb +57 -57
  60. data/views/tblastx_options.erb +64 -64
  61. metadata +51 -22
  62. data/lib/sequenceserver/makeblastdb-modified-with-cache.rb +0 -345
  63. data/public/SequenceServer_logo.png +0 -0
  64. data/public/js/tests/advanced_parameters.spec.js +0 -36
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 05745f148bb0f76da55ed9093dd77152b218f5d20656294fc879802128c24bdf
4
- data.tar.gz: 76944e8d19192955b7c0f44c59466da4ff7c3db45a03190d62685abb0e5f3ccf
3
+ metadata.gz: 1579fdcd4e8dc694025ccde8534ab3a48d935cfef4830a8de2c446657449aaf8
4
+ data.tar.gz: 25561e738f1511d4ee40730eec04c943148749313d29ccf362098b15a1e922c5
5
5
  SHA512:
6
- metadata.gz: ea823aec0b9328ebeba756547b7d9f523105a6d87230ac18e27e106e86e2e44ec9a21c3a17f01b325bc12682c35b982f3606d73689d708a8611509695a973eba
7
- data.tar.gz: a0702e4ee63201fa75a366d5aa0cd7a6459d3e7e034744977d18139937008d60c68baffdb0ae0888ecc7e021b0072c1c94e2a429a0bdcb995e8738b2033c2604
6
+ metadata.gz: 965a7ab34577a0180d8ac1ac830c84737118e4c3fe2997cc61886424929066a95d3df3869c01ca46ffceb3a9ef3d39b96676e87841bf57c0c4903cd99700fde0
7
+ data.tar.gz: 3a5fa2dce619076a5e0b626981c2bb95ffc544ae8b920942b2979cabe4869120e549f494cbd15a02bf10e265bb303f8988264824839ea934f80797e280b8045a
data/bin/sequenceserver CHANGED
@@ -398,8 +398,8 @@ begin
398
398
  end
399
399
 
400
400
  if import?
401
- xml_file = fetch_option(:import).value
402
- params = {:xml => xml_file}
401
+ xml_file_path = fetch_option(:import).value
402
+ params = {:xml => xml_file_path}
403
403
  job = SequenceServer::BLAST::Job.new(params)
404
404
  puts job.id
405
405
  exit
@@ -1,9 +1,10 @@
1
1
  module SequenceServer
2
- Error = Class.new(StandardError)
2
+ Error = Class.new(Sinatra::Error)
3
3
 
4
+ ValidationError = Class.new(Error)
4
5
  # DatabaseUnreachableError is raised when the serialised Job object is
5
6
  # refering to a database that is not present in the current filesystem.
6
- class DatabaseUnreachableError < Error
7
+ class DatabaseUnreachableError < ValidationError
7
8
  attr_reader :more_info
8
9
 
9
10
  def initialize(more_info)
@@ -11,6 +12,10 @@ module SequenceServer
11
12
  @more_info = more_info
12
13
  end
13
14
 
15
+ def http_status
16
+ 422
17
+ end
18
+
14
19
  def title
15
20
  'Sequence database unreachable'
16
21
  end
@@ -22,6 +27,55 @@ module SequenceServer
22
27
  end
23
28
  end
24
29
 
30
+ # InvalidSequenceIdError is raised when the FASTA sequence ID provided by the
31
+ # frontend appears to be invalid. It is important to validate the sequence ID
32
+ # format for security reasons.
33
+ class InvalidSequenceIdError < ValidationError
34
+ attr_reader :more_info
35
+
36
+ def initialize(more_info)
37
+ super
38
+ @more_info = more_info
39
+ end
40
+
41
+ def http_status
42
+ 422
43
+ end
44
+
45
+ def title
46
+ 'Sequence ID invalid'
47
+ end
48
+
49
+ def message
50
+ "The action you're trying to perform is not possible because \
51
+ one of the FASTA ids seems to be invalid."
52
+ end
53
+ end
54
+
55
+ # InvalidParameterError is a more generic error class that can be
56
+ # raised when the frontend sends a request with an invalid parameter
57
+ class InvalidParameterError < ValidationError
58
+ attr_reader :more_info
59
+
60
+ def initialize(more_info)
61
+ super
62
+ @more_info = more_info
63
+ end
64
+
65
+ def http_status
66
+ 422
67
+ end
68
+
69
+ def title
70
+ 'Invalid parameter'
71
+ end
72
+
73
+ def message
74
+ "The action you're trying to perform is not possible because \
75
+ one of the provided parameters is invalid."
76
+ end
77
+ end
78
+
25
79
  # API errors have an http status, title, message, and additional information
26
80
  # like stacktrace or information from program output.
27
81
  APIError = Class.new(Error)
@@ -21,7 +21,8 @@ module SequenceServer
21
21
  else
22
22
  validate params
23
23
  super do
24
- @method = params[:method]
24
+ @method = params[:method]
25
+ @query = params[:sequence]
25
26
  @qfile = store('query.fa', params[:sequence])
26
27
  @databases = Database[params[:databases]]
27
28
  @advanced = params[:advanced].to_s.strip
@@ -29,6 +30,7 @@ module SequenceServer
29
30
  # The following params are for analytics only
30
31
  @num_threads = config[:num_threads]
31
32
  @query_length = calculate_query_size
33
+ @number_of_query_sequences = calculate_number_of_sequences
32
34
  @databases_ncharacters_total = calculate_databases_ncharacters_total
33
35
  end
34
36
  end
@@ -36,8 +38,15 @@ module SequenceServer
36
38
 
37
39
  # :nodoc:
38
40
  # Attributes used by us - should be considered private.
39
- attr_reader :advanced
40
- attr_reader :databases, :databases_ncharacters_total, :method, :num_threads, :options, :qfile, :query_length
41
+ attr_reader :advanced,
42
+ :databases,
43
+ :databases_ncharacters_total,
44
+ :method,
45
+ :num_threads,
46
+ :options,
47
+ :qfile,
48
+ :query_length,
49
+ :number_of_query_sequences
41
50
 
42
51
  # :nodoc:
43
52
  # Deprecated; see Report#extract_params
@@ -92,6 +101,14 @@ module SequenceServer
92
101
  size
93
102
  end
94
103
 
104
+ def calculate_number_of_sequences
105
+ # splitting the query by ">" starting a new line lets us determine number of sequences
106
+ sequences = @query.split(/\n\s*>\s*+/)
107
+ # Remove any empty strings from the split result
108
+ sequences.reject!(&:empty?)
109
+ sequences.length
110
+ end
111
+
95
112
  def validate(params)
96
113
  validate_method params[:method]
97
114
  validate_sequences params[:sequence]
@@ -24,20 +24,11 @@ module SequenceServer
24
24
  class Report < Report
25
25
  def initialize(job)
26
26
  super do
27
- @queries = []
28
27
  @querydb = job.databases
29
28
  end
30
29
  end
31
30
 
32
- # Attributes parsed out from BLAST output.
33
- attr_reader :program, :program_version, :stats, :queries
34
-
35
- # Attributes parsed from job metadata and BLAST output.
36
- attr_reader :querydb, :dbtype, :params
37
-
38
31
  def to_json(*_args)
39
- generate
40
-
41
32
  %i[querydb program program_version params stats
42
33
  queries].inject({}) do |h, k|
43
34
  h[k] = send(k)
@@ -53,83 +44,89 @@ module SequenceServer
53
44
  def xml_file_size
54
45
  return File.size(job.imported_xml_file) if job.imported_xml_file
55
46
 
56
- generate
57
-
58
47
  xml_formatter.size
59
48
  end
60
49
 
61
- # Generate report.
62
- def generate
63
- return self if @_generated
64
-
65
- job.raise!
66
- xml_ir = nil
67
- tsv_ir = nil
68
- if job.imported_xml_file
69
- xml_ir = parse_xml File.read(job.imported_xml_file)
70
- tsv_ir = Hash.new do |h1, k1|
71
- h1[k1] = Hash.new do |h2, k2|
72
- h2[k2] = ['', '', []]
73
- end
74
- end
75
- else
76
- xml_ir = parse_xml(xml_formatter.read_file)
77
- tsv_ir = parse_tsv(tsv_formatter.read_file)
50
+ def done?
51
+ return true if job.imported_xml_file
52
+
53
+ File.exist?(xml_formatter.filepath) && File.exist?(tsv_formatter.filepath)
54
+ end
55
+
56
+ def program
57
+ @program ||= xml_ir[0]
58
+ end
59
+
60
+ def program_version
61
+ @program_version ||= xml_ir[1]
62
+ end
63
+
64
+ def querydb
65
+ @querydb ||= xml_ir[3].split.map do |path|
66
+ { title: File.basename(path) }
78
67
  end
79
- extract_program_info xml_ir
80
- extract_db_info xml_ir
81
- extract_params xml_ir
82
- extract_stats xml_ir
83
- extract_queries xml_ir, tsv_ir
68
+ end
69
+
70
+ def dbtype
71
+ @dbtype ||= querydb&.first&.type || dbtype_from_program
72
+ end
84
73
 
85
- @_generated = true
74
+ def params
75
+ @params ||= extract_params
76
+ end
86
77
 
87
- self
78
+ def stats
79
+ @stats ||= extract_stats
88
80
  end
89
81
 
90
- def done?
91
- return true if job.imported_xml_file
82
+ def queries
83
+ @queries ||= xml_ir[8].map do |n|
84
+ query = Query.new(self, n[0], n[2], n[3], [])
85
+ query.hits = query_hits(n[4], tsv_ir[query.id], query)
92
86
 
93
- File.exist?(xml_formatter.filepath) && File.exist?(tsv_formatter.filepath)
87
+ query
88
+ end
94
89
  end
95
90
 
96
91
  private
97
92
 
98
- def xml_formatter
99
- @xml_formatter ||= Formatter.run(job, 'xml')
93
+ def xml_ir
94
+ @xml_ir ||=
95
+ if job.imported_xml_file
96
+ parse_xml File.read(job.imported_xml_file)
97
+ else
98
+ job.raise!
99
+ parse_xml(xml_formatter.read_file)
100
+ end
100
101
  end
101
102
 
102
- def tsv_formatter
103
- @tsv_formatter ||= Formatter.run(job, 'custom_tsv')
103
+ def tsv_ir
104
+ @tsv_ir ||=
105
+ if job.imported_xml_file
106
+ Hash.new do |h1, k1|
107
+ h1[k1] = Hash.new do |h2, k2|
108
+ h2[k2] = ['', '', []]
109
+ end
110
+ end
111
+ else
112
+ job.raise!
113
+ parse_tsv(tsv_formatter.read_file)
114
+ end
104
115
  end
105
116
 
106
- # Make program name and program name + version available via `program`
107
- # and `program_version` attributes.
108
- def extract_program_info(ir)
109
- @program = ir[0]
110
- @program_version = ir[1]
117
+ def xml_formatter
118
+ @xml_formatter ||= Formatter.run(job, 'xml')
111
119
  end
112
120
 
113
- # Get database information (title and type) from job yaml or from XML.
114
- # Sets `querydb` and `dbtype` attributes.
115
- def extract_db_info(ir)
116
- if @querydb.empty?
117
- @querydb = ir[3].split.map do |path|
118
- { title: File.basename(path) }
119
- end
120
- @dbtype = dbtype_from_program
121
- else
122
- @dbtype = @querydb.first.type
123
- end
121
+ def tsv_formatter
122
+ @tsv_formatter ||= Formatter.run(job, 'custom_tsv')
124
123
  end
125
124
 
126
- # Make search params available via `params` attribute.
127
- #
128
125
  # Search params tweak the results. Like evalue cutoff or penalty to open
129
126
  # a gap. BLAST+ doesn't list all input params in the XML output. Only
130
127
  # matrix, evalue, gapopen, gapextend, and filters are available from XML
131
128
  # output.
132
- def extract_params(ir)
129
+ def extract_params
133
130
  # Parse/get params from the job first.
134
131
  job_params = parse_advanced(job.advanced)
135
132
  # Old jobs from beta releases may not have the advanced key but they
@@ -138,7 +135,7 @@ module SequenceServer
138
135
 
139
136
  # Parse params from BLAST XML.
140
137
  @params = Hash[
141
- *ir[7].first.map { |k, v| [k.gsub('Parameters_', ''), v] }.flatten
138
+ *xml_ir[7].first.map { |k, v| [k.gsub('Parameters_', ''), v] }.flatten
142
139
  ]
143
140
  @params['evalue'] = @params.delete('expect')
144
141
 
@@ -146,13 +143,11 @@ module SequenceServer
146
143
  @params = job_params.merge(@params)
147
144
  end
148
145
 
149
- # Make search stats available via `stats` attribute.
150
- #
151
146
  # Search stats are computed metrics. Like total number of sequences or
152
147
  # effective search space.
153
- def extract_stats(ir)
154
- stats = ir[8].first[5][0]
155
- @stats = {
148
+ def extract_stats
149
+ stats = xml_ir[8].first[5][0]
150
+ {
156
151
  nsequences: stats[0],
157
152
  ncharacters: stats[1],
158
153
  hsp_length: stats[2],
@@ -163,20 +158,11 @@ module SequenceServer
163
158
  }
164
159
  end
165
160
 
166
- # Create query objects for the given report from the given ir.
167
- def extract_queries(xml_ir, tsv_ir)
168
- xml_ir[8].each do |n|
169
- query = Query.new(self, n[0], n[2], n[3], [])
170
- extract_hits(n[4], tsv_ir[query.id], query)
171
- queries << query
172
- end
173
- end
174
-
175
161
  # Create Hit objects for the given query from the given ir.
176
- def extract_hits(xml_ir, tsv_ir, query)
177
- return if xml_ir == ["\n"] # => No hits.
162
+ def query_hits(xml_ir, tsv_ir, query)
163
+ return [] if xml_ir == ["\n"] # => No hits.
178
164
 
179
- xml_ir.each do |n|
165
+ xml_ir.map do |n|
180
166
  # If hit comes from a non -parse_seqids database, then id (n[1]) is a
181
167
  # BLAST assigned internal id of the format 'gnl|BL_ORD_ID|serial'. We
182
168
  # assign the id to accession (because we use accession for sequence
@@ -190,19 +176,21 @@ module SequenceServer
190
176
  n[1] = defline.shift
191
177
  n[2] = defline.join(' ')
192
178
  end
179
+
193
180
  hit = Hit.new(query, n[0], n[1], n[3], n[2], n[4],
194
181
  tsv_ir[n[1]][0], tsv_ir[n[1]][1], [])
195
- extract_hsps(n[5], tsv_ir[n[1]][2], hit)
196
- query.hits << hit
182
+
183
+ hit.hsps = hsps(n[5], tsv_ir[n[1]][2], hit)
184
+
185
+ hit
197
186
  end
198
187
  end
199
188
 
200
- # Create HSP objects for the given hit from the given ir.
201
- def extract_hsps(xml_ir, tsv_ir, hit)
202
- xml_ir.each_with_index do |n, i|
189
+ def hsps(xml_ir, tsv_ir, hit)
190
+ xml_ir.map.with_index do |n, i|
203
191
  n.insert(14, tsv_ir[i])
204
- hsp = HSP.new(hit, *n)
205
- hit.hsps << hsp
192
+
193
+ HSP.new(hit, *n)
206
194
  end
207
195
  end
208
196
 
@@ -0,0 +1,38 @@
1
+ module SequenceServer
2
+ module BLAST
3
+ # Shells out to each blast algorithm to get the help text and then parses it to extract the tasks.
4
+ module Tasks
5
+ ALGORITHMS = %w[blastn blastp blastx tblastn tblastx].freeze
6
+
7
+ def self.to_h
8
+ @to_h ||= ALGORITHMS.map do |algorithm|
9
+ help_text = `#{algorithm} -help`
10
+ [algorithm, extract_tasks(help_text)]
11
+ end.to_h
12
+ end
13
+
14
+ def self.extract_tasks(help_text)
15
+ lines = help_text.split("\n")
16
+
17
+ # Find task help paragraph start
18
+ task_line_index = lines.find_index { |line| line =~ /^\W-task/ }
19
+ return [] unless task_line_index.to_i.positive?
20
+
21
+ lines.slice!(0...task_line_index)
22
+
23
+ # Find the end of task help paragraph
24
+ next_option_line_index = lines.find_index { |line| line =~ /^\W-/ && !line.include?('-task') }
25
+ lines.slice!(next_option_line_index..-1)
26
+
27
+ extract_tasks_from_paragraph(lines)
28
+ end
29
+
30
+ def self.extract_tasks_from_paragraph(paragraph_lines)
31
+ as_one_liner = paragraph_lines.map(&:strip).join(' ')
32
+ as_one_liner.split('Permissible values:').last.split('>').first.split(' ').map do |task|
33
+ task.strip.gsub("'", '')
34
+ end.reject(&:empty?)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,3 +1,9 @@
1
1
  require_relative 'blast/job'
2
2
  require_relative 'blast/report'
3
3
  require_relative 'blast/constants'
4
+
5
+ module SequenceServer
6
+ module BLAST
7
+ VALID_SEQUENCE_ID = /\A[a-zA-Z0-9\-_.:*#|\[\]]+\z/
8
+ end
9
+ end
@@ -22,7 +22,7 @@ module SequenceServer
22
22
 
23
23
  return unless @upgraded
24
24
 
25
- logger.info 'You are using old configuration syntax. ' \
25
+ logger.warn 'You are using old configuration syntax. ' \
26
26
  'Run `sequenceserver -s` to update your config file syntax.'
27
27
  end
28
28
 
@@ -69,20 +69,7 @@ module SequenceServer
69
69
  @upgrade = true
70
70
  end
71
71
 
72
- # Old config files may have an options hash with array values. We turn the
73
- # array values into a hash. The logic is simple: If the array value is the
74
- # same as default, we give it the key 'default', otherwise we give it the
75
- # key 'custom'
76
- data[:options]&.each do |key, val|
77
- next if val.is_a? Hash
78
-
79
- data[:options][key] = if val == defaults[:options][key][:default]
80
- { default: val }
81
- else
82
- { custom: val }
83
- end
84
- @upgraded = true
85
- end
72
+ data[:options] = convert_deprecated_options(data[:options]) if data[:options]
86
73
 
87
74
  data
88
75
  end
@@ -112,19 +99,34 @@ module SequenceServer
112
99
  databases_widget: 'classic',
113
100
  options: {
114
101
  blastn: {
115
- default: ['-task blastn', '-evalue 1e-5']
102
+ default: {
103
+ description: nil,
104
+ attributes: ['-task blastn', '-evalue 1e-5']
105
+ }
116
106
  },
117
107
  blastp: {
118
- default: ['-evalue 1e-5']
108
+ default: {
109
+ description: nil,
110
+ attributes: ['-evalue 1e-5']
111
+ }
119
112
  },
120
113
  blastx: {
121
- default: ['-evalue 1e-5']
114
+ default: {
115
+ description: nil,
116
+ attributes: ['-evalue 1e-5']
117
+ }
122
118
  },
123
119
  tblastx: {
124
- default: ['-evalue 1e-5']
120
+ default: {
121
+ description: nil,
122
+ attributes: ['-evalue 1e-5']
123
+ }
125
124
  },
126
125
  tblastn: {
127
- default: ['-evalue 1e-5']
126
+ default: {
127
+ description: nil,
128
+ attributes: ['-evalue 1e-5']
129
+ }
128
130
  }
129
131
  },
130
132
  num_threads: 1,
@@ -137,5 +139,37 @@ module SequenceServer
137
139
  optimistic: false # Faster, but does not perform DB compatibility checks
138
140
  }
139
141
  end
142
+
143
+ def convert_deprecated_options(options)
144
+ options.each do |blast_algo, algo_config|
145
+ if algo_config.is_a?(Array)
146
+ # Very old config files may have a single array with CLI args.
147
+ # e.g. { blastn: ['-task blastn', '-evalue 1e-5'] }
148
+ # Convert the array values into a single hash naming it 'default' if
149
+ # the values match SequenceServer defaults.
150
+ options[blast_algo] = if algo_config == defaults.dig(:options, blast_algo, :default, :attributes)
151
+ { default: { attributes: algo_config } }
152
+ else
153
+ { custom: { attributes: algo_config } }
154
+ end
155
+ @upgraded = true
156
+ elsif algo_config.is_a?(Hash)
157
+ # v3.0.1 and older config files contain a flatter structure
158
+ # with an array instead of 'description' and 'attributes' keys.
159
+ # e.g. { blastn: { default: ['-task blastn', '-evalue 1e-5'] }
160
+ algo_config.each do |config_name, config|
161
+ next unless config.is_a?(Array)
162
+
163
+ options[blast_algo][config_name] = {
164
+ description: nil,
165
+ attributes: config
166
+ }
167
+ @upgraded = true
168
+ end
169
+ end
170
+ end
171
+
172
+ options
173
+ end
140
174
  end
141
175
  end
@@ -37,8 +37,19 @@ module SequenceServer
37
37
  alias path name
38
38
 
39
39
  def retrieve(accession, coords = nil)
40
+ fail(
41
+ InvalidSequenceIdError,
42
+ "Invalid sequence id: #{accession}"
43
+ ) unless accession =~ SequenceServer::BLAST::VALID_SEQUENCE_ID
44
+
40
45
  cmd = "blastdbcmd -db #{name} -entry '#{accession}'"
46
+
41
47
  if coords
48
+ fail(
49
+ InvalidParameterError,
50
+ "Invalid range coordinates: #{coords}"
51
+ ) unless coords =~ /[0-9]+-[0-9]*/
52
+
42
53
  cmd << " -range #{coords}"
43
54
  end
44
55
  out, = sys(cmd, path: config[:bin])
@@ -52,6 +63,8 @@ module SequenceServer
52
63
  # Returns true if the database contains the given sequence id.
53
64
  # Returns false otherwise.
54
65
  def include?(id)
66
+ fail ArgumentError, "Invalid sequence id: #{id}" unless id =~ SequenceServer::BLAST::VALID_SEQUENCE_ID
67
+
55
68
  cmd = "blastdbcmd -entry '#{id}' -db #{name}"
56
69
  sys(cmd, path: config[:bin]) rescue false
57
70
  end
@@ -13,6 +13,7 @@ module SequenceServer
13
13
  #
14
14
  class MAKEBLASTDB
15
15
  extend Forwardable
16
+ GUESS_SAMPLE_SIZE = 1_048_576
16
17
 
17
18
  def_delegators SequenceServer, :config, :sys
18
19
 
@@ -333,8 +334,21 @@ module SequenceServer
333
334
  # If the given file is FASTA, returns Array of as many different
334
335
  # sequences in the portion of the file read. Returns the portion
335
336
  # of the file read wrapped in an Array otherwise.
336
- def sample_sequences(file)
337
- File.read(file, 1_048_576).split(/^>.+$/).delete_if(&:empty?)
337
+ def sample_sequences(file, offset = 0)
338
+ sample = File.read(file, GUESS_SAMPLE_SIZE, offset)
339
+
340
+ return [] if sample.nil?
341
+
342
+ # remove all unknown bases (indicated by 'N') before sampling
343
+ sample.gsub!(/N/, '')
344
+ meaningful_samples = sample.split(/^>.+$/).map { |line| line.gsub(/^\n+$/, '') }.delete_if(&:empty?)
345
+
346
+ if meaningful_samples.empty?
347
+ offset += GUESS_SAMPLE_SIZE
348
+ sample_sequences(file, offset)
349
+ else
350
+ meaningful_samples
351
+ end
338
352
  end
339
353
  end
340
354
  end
@@ -6,12 +6,6 @@ module SequenceServer
6
6
  # Report is a generic superclass. Programs, like BLAST, must implement their
7
7
  # own report subclass.
8
8
  class Report
9
- class << self
10
- def generate(job)
11
- BLAST::Report.new(job).generate
12
- end
13
- end
14
-
15
9
  # Provide access to global `config` & `logger` services to the report
16
10
  # objects.
17
11
  extend Forwardable