sequenceserver 0.7.9 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

@@ -93,8 +93,7 @@ module SequenceServer
93
93
  # blast methods (executables) and their corresponding absolute path
94
94
  set :binaries, {}
95
95
 
96
- # list of sorted blast databases grouped by databse type:
97
- # 'protein', or 'nucleotide'
96
+ # list of blast databases indexed by their hash value
98
97
  set :databases, {}
99
98
  end
100
99
 
@@ -186,10 +185,8 @@ module SequenceServer
186
185
  self.databases = scan_blast_db(database, binaries['blastdbcmd']).freeze
187
186
 
188
187
  # Log the discovery of databases.
189
- databases.each do |type, dbs|
190
- dbs.each do |d|
191
- log.info("Found #{type} database: #{d.title} at #{d.name}")
192
- end
188
+ databases.each do |id, database|
189
+ log.info("Found #{database.type} database: #{database.title} at #{database.name}")
193
190
  end
194
191
  rescue IOError => error
195
192
  log.fatal("Fail: #{error}")
@@ -221,48 +218,60 @@ module SequenceServer
221
218
  erb :search
222
219
  end
223
220
 
224
- post '/' do
225
- method = params['method']
226
- db_type_param = params['db']
227
- sequence = params[:sequence]
221
+ before '/' do
222
+ pass if params.empty?
228
223
 
229
- # evaluate empty sequence as nil, otherwise as fasta
230
- sequence = sequence.empty? ? nil : to_fasta(sequence)
224
+ # ensure required params present
225
+ #
226
+ # If a required parameter is missing, SequnceServer returns 'Bad Request
227
+ # (400)' error.
228
+ #
229
+ # See Twitter's [Error Codes & Responses][1] page for reference.
230
+ #
231
+ # [1]: https://dev.twitter.com/docs/error-codes-responses
231
232
 
232
- # Raise ArgumentError if there is no database selected
233
- if db_type_param.nil?
234
- raise ArgumentError, "No BLAST database selected"
233
+ if params[:method].nil? or params[:method].empty?
234
+ halt 400, "No BLAST method provided."
235
235
  end
236
- db_type = db_type_param.first.first
237
236
 
238
- # can not proceed if one of these is missing
239
- raise ArgumentError unless sequence and db_type and method
240
- settings.log.info("requested #{method} against #{db_type.to_s} database")
237
+ if params[:sequence].nil? or params[:sequence].empty?
238
+ halt 400, "No input sequence provided."
239
+ end
241
240
 
242
- # only allowed blast methods should be used
243
- blast_methods = %w|blastn blastp blastx tblastn tblastx|
244
- raise ArgumentError, "wrong method: #{method}" unless blast_methods.include?(method)
241
+ if params[:databases].nil?
242
+ halt 400, "No BLAST database provided."
243
+ end
245
244
 
246
- # check if input_fasta is compatible with the selected blast method
247
- sequence_type = type_of_sequences(sequence)
248
- settings.log.debug('sequence: ' + sequence)
249
- settings.log.debug('input seq type: ' + sequence_type.to_s)
250
- settings.log.debug('blast db type: ' + db_type)
251
- settings.log.debug('blast method: ' + method)
245
+ # ensure params are valid #
252
246
 
253
- unless blast_methods_for(sequence_type).include?(method)
254
- raise ArgumentError, "Cannot #{method} a #{sequence_type} query."
247
+ # only allowed blast methods should be used
248
+ blast_methods = %w|blastn blastp blastx tblastn tblastx|
249
+ unless blast_methods.include?(params[:method])
250
+ halt 400, "Unknown BLAST method: #{params[:method]}."
255
251
  end
256
252
 
257
- # check if selected db is comaptible with the selected blast method
258
- allowed_db_type = db_type_for(method)
259
- unless allowed_db_type.to_s == db_type
260
- raise ArgumentError, "Cannot #{method} against a #{db_type} database.
261
- Need #{allowed_db_type} database."
253
+ # check the advanced options are sensible
254
+ begin #FIXME
255
+ validate_advanced_parameters(params[:advanced])
256
+ rescue ArgumentError => error
257
+ halt 400, "Advanced parameters invalid: #{error}"
262
258
  end
263
259
 
260
+ # log params
261
+ settings.log.debug('method: ' + params[:method])
262
+ settings.log.debug('sequence: ' + params[:sequence])
263
+ settings.log.debug('database: ' + params[:databases].inspect)
264
+ settings.log.debug('advanced: ' + params[:advanced])
265
+ end
266
+
267
+ post '/' do
268
+ method = params['method']
269
+ databases = params[:databases]
270
+ sequence = params[:sequence]
264
271
  advanced_opts = params['advanced']
265
- validate_advanced_parameters(advanced_opts) #check the advanced options are sensible
272
+
273
+ # evaluate empty sequence as nil, otherwise as fasta
274
+ sequence = sequence.empty? ? nil : to_fasta(sequence)
266
275
 
267
276
  # blastn implies blastn, not megablast; but let's not interfere if a user
268
277
  # specifies `task` herself
@@ -270,29 +279,21 @@ module SequenceServer
270
279
  advanced_opts << ' -task blastn '
271
280
  end
272
281
 
273
- method = settings.binaries[ method ]
274
- settings.log.debug('settings.databases: ' + settings.databases.inspect)
275
- databases = params['db'][db_type].map{|index|
276
- settings.databases[db_type][index.to_i].name
282
+ method = settings.binaries[ method ]
283
+ databases = params[:databases].map{|index|
284
+ settings.databases[index].name
277
285
  }
278
286
 
279
- # run blast to blast archive
280
- blast = Blast.blast_string_to_blast_archive(method, databases.join(' '), sequence, advanced_opts)
281
- # log the command that was run
282
- settings.log.info('Ran to blast archive: ' + blast.command) if settings.logging
283
-
284
- # convert blast archive to HTML version
285
- blast.convert_blast_archive_to_html_result(settings.binaries['blast_formatter'])
286
- # log the command that was run
287
- settings.log.info('Ran to get HTML output: ' + blast.command) if settings.logging
287
+ # run blast and log
288
+ blast = Blast.new(method, sequence, databases.join(' '), advanced_opts)
289
+ blast.run!
290
+ settings.log.info('Ran: ' + blast.command)
288
291
 
289
- @blast = format_blast_results(blast.result, databases)
290
-
291
- if request.xhr?
292
- return @blast
292
+ unless blast.success?
293
+ halt *blast.error
293
294
  end
294
295
 
295
- erb :search
296
+ format_blast_results(blast.result, databases)
296
297
  end
297
298
 
298
299
  # get '/get_sequence/?id=sequence_ids&db=retreival_databases'
@@ -372,9 +373,6 @@ HEADER
372
373
  end
373
374
 
374
375
  def format_blast_results(result, databases)
375
- raise ArgumentError, 'Problem: empty result! Maybe your query was invalid?' if !result.class == String
376
- raise ArgumentError, 'Problem: empty result! Maybe your query was invalid?' if result.empty?
377
-
378
376
  formatted_result = ''
379
377
  @all_retrievable_ids = []
380
378
  string_of_used_databases = databases.join(' ')
@@ -1,200 +1,93 @@
1
1
  require 'tempfile'
2
- require 'open3'
3
2
 
4
3
  module SequenceServer
5
- # Simple ncbi-blast wrapper. Check examples below.
4
+ # Simple BLAST+ wrapper.
6
5
  class Blast
7
- # blast method
8
- attr_accessor :method
9
6
 
10
- # database name
11
- attr_accessor :db
12
-
13
- # query sequence string
14
- attr_accessor :qstring
15
-
16
- # query file name
17
- attr_accessor :qfile
18
-
19
- # advanced blast options
20
- attr_accessor :options
7
+ ERROR_LINE = /\(CArgException.*\)\s(.*)/
21
8
 
22
9
  # command string to be executed
23
- attr_reader :command
10
+ attr_reader :command
24
11
 
25
12
  # result of executing command
26
- attr_reader :result
13
+ attr_reader :result
27
14
 
28
- # blast archive file output
29
- attr_reader :blast_archive_tempfile
30
-
31
- # errors if any while executing command
32
- attr_reader :error
15
+ # errors as [status, message]
16
+ attr_reader :error
33
17
 
34
18
  # Initialize a new blast search.
35
19
  # ---
36
- # Arguments(optional):
37
- # * method(String) - blast executable (shell executable, or absolute path)
38
- # * db(String) - database name as returned by 'blastdbcmd -list'
39
- # * query(Hash) - query string/file, and options.
40
- #
41
- # In the query Hash, use:
42
- # * :qfile(String) - to run Blast against a file.
43
- # * :qstrin(String) - to run Blast against a string.
44
- # * :options(String) - to specify multiple blast options.
20
+ # Arguments:
21
+ # * method (String) - blast executable (shell executable, or absolute path)
22
+ # * query (String) - query string
23
+ # * databases (String) - database name as returned by 'blastdbcmd -list'
24
+ # * options (String) - other options
45
25
  #
46
- # Either :qfile, or :qstring should be used. If both are given, by design
47
- # :qstring will be used to run blast.
48
26
  # ---
49
27
  # Examples:
50
28
  #
51
- # b = Blast.new("blastn", "S.cdna.fasta", :qfile => 'query.seq', :options => "-html -num_threads 4")
52
- # b = Blast.new("blastn", "S.cdna.fasta", :qstring => 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG')
29
+ # b = Blast.new("blastn", 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG', "S.cdna.fasta", "-html -num_threads 4")
53
30
  #
54
31
  # b.run! => true
55
32
  # b.result => "blast output"
56
- #
57
- # # change the blast method.
58
- # b.method = 'blastp'
59
- #
60
- # b.run! => false
61
- # b.error => "blast error output"
62
- def initialize(method = nil, db = nil, query = {})
63
- @method = method
64
- @db = db
65
- @qstring = query[:qstring]
66
- @qfile = query[:qfile]
67
- @options = query[:options]
33
+ def initialize(method, query, databases, options = nil)
34
+ @method = method
35
+ @databases = databases
36
+
37
+ # create a tempfile for the given query
38
+ qfile = Tempfile.new('sequenceserver_query')
39
+ qfile.puts(query)
40
+ qfile.close
41
+ @query = qfile.path
42
+
43
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
44
+ @options = options.to_s
45
+ @options += " -html"
68
46
  end
69
47
 
70
48
  # Run blast everytime it is called. Returns the success
71
- # status - true, or false. Blast method, db, and qfile/qstring
72
- # need to be set before calling this method, else blast will fail.
73
- #
74
- # b = Blast.new
75
- # b.run! => false
76
- #
77
- # # set blast method, and db
78
- # b.method = 'blastn'
79
- # b.db = 'S.cdna.fasta'
80
- #
81
- # b.run! => false
82
- # b.errors => "blast error output"
83
- #
84
- # # set qfile
85
- # b.qfile = 'query1.seq'
86
- #
87
- # b.run! => true
88
- # b.reuslt => "blast output"
49
+ # status - true, or false.
89
50
  def run!
90
- # can not run blast if method is not specified
91
- return false unless @method
92
-
93
- # create a tempfile if qstring is given
94
- if @qstring
95
- @tempfile = Tempfile.new('qfile')
96
- @tempfile.puts(qstring)
97
- @tempfile.close
98
- @qfile = @tempfile.path
99
- end
51
+ @result, @error, status = execute(command)
100
52
 
101
- # form command to execute
102
- @command = to_s
53
+ status == 0 and return @success = true
103
54
 
104
- # execute command and capture both stdout, and stderr
105
- Open3.popen3(@command) do |stdin, stdout, stderr|
106
- @result = stdout.readlines # convert to string?
107
- @error = stderr.readlines
55
+ if status == 1
56
+ message = @error.each{|l| l.match(ERROR_LINE) and break Regexp.last_match[1]}
57
+ message = message || @error
58
+ @error = [400, message]
59
+ else
60
+ @error = [500, @error]
108
61
  end
109
62
 
110
- # set and return success status
111
- return @success = @error.empty?
112
-
113
- ensure
114
- # delete tempfile if it was created
115
- @tempfile.unlink if @tempfile
63
+ false
116
64
  end
117
65
 
118
- # Return the blast type used as a String.
119
- #
120
- # b.method = '/home/yeban/opt/blastn'
121
- # b.type => 'blastn'
122
- def type
123
- @type ||= @method[(@method.rindex('/') + 1)..-1]
66
+ # The command that will be executed.
67
+ def command
68
+ @command ||= "#@method -db '#@databases' -query '#@query' #@options"
124
69
  end
125
70
 
126
- # Return success status - true, false, or nil.
127
- # 'nil' implies that blast has not been run yet.
71
+ # Return success status.
128
72
  def success?
129
73
  @success
130
74
  end
131
75
 
132
- # String representation of the blast object - same as
133
- # the command to be executed.
134
- def to_s
135
- s = "#@method "
136
- s << "-db '#@db' " if @db
137
- s << "-query #@qfile " if @qfile
138
- s << @options.to_s if @options
139
- s
140
- end
76
+ private
141
77
 
142
- # Especially helpful in irb - "status : command"
143
- def inspect
144
- return to_s if success?.nil?
145
- (success? ? "success : " : "fail : ") + @command
146
- end
78
+ # Execute a command and return its stdout, stderr, and exit status.
79
+ def execute(command)
80
+ rfile = Tempfile.new('sequenceserver_result')
81
+ efile = Tempfile.new('sequenceserver_error')
82
+ [rfile, efile].each {|file| file.close}
147
83
 
148
- # Run the blast with the options specified by the user, returning a blast archive file, which can be further transformed into other formats
149
- def run_to_blast_archive!
150
- @blast_archive_tempfile = Tempfile.open('seqserve_formatter')
151
-
152
- # Add -outfmt 11 to list of options so that it outputs a blast archive
153
- @options ||= ''
154
- @options += " -outfmt 11 -out #{@blast_archive_tempfile.path}"
84
+ system("#{command} > #{rfile.path} 2> #{efile.path}")
85
+ status = $?.exitstatus
155
86
 
156
- # Run the blast
157
- run!
158
- return @success unless @success
159
- end
160
-
161
- # convert the blast archive to a regular HTML result, stored
162
- # as an instance variable Blast#result
163
- def convert_blast_archive_to_html_result(blast_formatter_path)
164
- @command = "#{blast_formatter_path} -archive #{blast_archive_tempfile.path} -html"
165
-
166
- # execute command and capture both stdout, and stderr
167
- Open3.popen3(@command) do |stdin, stdout, stderr|
168
- @result = stdout.readlines # convert to string?
169
- @error = stderr.readlines
170
- end
171
- end
172
-
173
- class << self
174
- # shortcut method to run blast against a query file
175
- def blast_file(method, db, qfile, options = nil)
176
- b = Blast.new(method, db, :qfile => qfile, :options => options)
177
- b.run!
178
- b
179
- end
180
-
181
- # shortcut method to run blast against a query string
182
- def blast_string(method, db, qstring, options = nil)
183
- b = Blast.new(method, db, :qstring => qstring, :options => options)
184
- b.run!
185
- b
186
- end
187
-
188
- # shortcut method to run blast with a query string and return a
189
- # blast archive, which can then be further processed into other useful
190
- # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
191
- # is a Tempfile accessible as an instance variable of the returned
192
- # Blast object.
193
- def blast_string_to_blast_archive(method, db, qstring, options = nil)
194
- b = Blast.new(method, db, :qstring => qstring, :options => options)
195
- b.run_to_blast_archive!
196
- b
197
- end
87
+ return File.readlines(rfile.path), File.readlines(efile.path), status
88
+ ensure
89
+ rfile.unlink
90
+ efile.unlink
198
91
  end
199
92
  end
200
93
  end
@@ -1,7 +1,9 @@
1
+ require 'digest/md5'
2
+
1
3
  module SequenceServer
2
- class Database < Struct.new("Database", :name, :title)
4
+ class Database < Struct.new("Database", :name, :title, :type)
3
5
  def to_s
4
- "#{title} #{name}"
6
+ "#{type}: #{title} #{name}"
5
7
  end
6
8
 
7
9
  # Its not very meaningful to compare Database objects, however,
@@ -19,5 +21,9 @@ module SequenceServer
19
21
  self.name <=> other.name
20
22
  end
21
23
  end
24
+
25
+ def hash
26
+ @hash ||= Digest::MD5.hexdigest(self.name)
27
+ end
22
28
  end
23
29
  end
@@ -92,29 +92,21 @@ module SequenceServer
92
92
  db_list.each_line do |line|
93
93
  next if line.empty? # required for BLAST+ 2.2.22
94
94
  type, name, *title = line.split(' ')
95
- type = type.downcase
95
+ type = type.downcase.intern
96
96
  name = name.freeze
97
97
  title = title.join(' ').freeze
98
98
 
99
99
  # skip past all but alias file of a NCBI multi-part BLAST database
100
- if name.match(/\/\w*[.]\d{2,}[.\w]*/)
100
+ if multipart_database_name?(name)
101
101
  log.info(%|Found a multi-part database volume at #{name} - ignoring it.|)
102
102
  next
103
103
  end
104
104
 
105
105
  #LOG.info("Found #{type} database: #{title} at #{name}")
106
- (db[type] ||= []) << Database.new(name, title)
106
+ database = Database.new(name, title, type)
107
+ db[database.hash] = database
107
108
  end
108
109
 
109
-
110
- # the erb would fail as calling nil.each_with_index if a dbtype was undefined.
111
- db['protein'] = [] unless db.keys.include?('protein')
112
- db['nucleotide'] = [] unless db.keys.include?('nucleotide')
113
-
114
- # sort the list of dbs
115
- db['protein'].sort!
116
- db['nucleotide'].sort!
117
-
118
110
  db
119
111
  end
120
112
 
@@ -125,6 +117,16 @@ module SequenceServer
125
117
  def command?(command)
126
118
  system("which #{command} > /dev/null 2>&1")
127
119
  end
120
+
121
+ # Returns true if the database name appears to be a multi-part database name.
122
+ #
123
+ # e.g.
124
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
125
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
126
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
127
+ def multipart_database_name?(db_name)
128
+ !(db_name.match(/.+\/\S+\d{2}$/).nil?)
129
+ end
128
130
  end
129
131
 
130
132
  def self.included(klass)