sequenceserver 0.7.9 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -93,8 +93,7 @@ module SequenceServer
93
93
  # blast methods (executables) and their corresponding absolute path
94
94
  set :binaries, {}
95
95
 
96
- # list of sorted blast databases grouped by databse type:
97
- # 'protein', or 'nucleotide'
96
+ # list of blast databases indexed by their hash value
98
97
  set :databases, {}
99
98
  end
100
99
 
@@ -186,10 +185,8 @@ module SequenceServer
186
185
  self.databases = scan_blast_db(database, binaries['blastdbcmd']).freeze
187
186
 
188
187
  # Log the discovery of databases.
189
- databases.each do |type, dbs|
190
- dbs.each do |d|
191
- log.info("Found #{type} database: #{d.title} at #{d.name}")
192
- end
188
+ databases.each do |id, database|
189
+ log.info("Found #{database.type} database: #{database.title} at #{database.name}")
193
190
  end
194
191
  rescue IOError => error
195
192
  log.fatal("Fail: #{error}")
@@ -221,48 +218,60 @@ module SequenceServer
221
218
  erb :search
222
219
  end
223
220
 
224
- post '/' do
225
- method = params['method']
226
- db_type_param = params['db']
227
- sequence = params[:sequence]
221
+ before '/' do
222
+ pass if params.empty?
228
223
 
229
- # evaluate empty sequence as nil, otherwise as fasta
230
- sequence = sequence.empty? ? nil : to_fasta(sequence)
224
+ # ensure required params present
225
+ #
226
+ # If a required parameter is missing, SequnceServer returns 'Bad Request
227
+ # (400)' error.
228
+ #
229
+ # See Twitter's [Error Codes & Responses][1] page for reference.
230
+ #
231
+ # [1]: https://dev.twitter.com/docs/error-codes-responses
231
232
 
232
- # Raise ArgumentError if there is no database selected
233
- if db_type_param.nil?
234
- raise ArgumentError, "No BLAST database selected"
233
+ if params[:method].nil? or params[:method].empty?
234
+ halt 400, "No BLAST method provided."
235
235
  end
236
- db_type = db_type_param.first.first
237
236
 
238
- # can not proceed if one of these is missing
239
- raise ArgumentError unless sequence and db_type and method
240
- settings.log.info("requested #{method} against #{db_type.to_s} database")
237
+ if params[:sequence].nil? or params[:sequence].empty?
238
+ halt 400, "No input sequence provided."
239
+ end
241
240
 
242
- # only allowed blast methods should be used
243
- blast_methods = %w|blastn blastp blastx tblastn tblastx|
244
- raise ArgumentError, "wrong method: #{method}" unless blast_methods.include?(method)
241
+ if params[:databases].nil?
242
+ halt 400, "No BLAST database provided."
243
+ end
245
244
 
246
- # check if input_fasta is compatible with the selected blast method
247
- sequence_type = type_of_sequences(sequence)
248
- settings.log.debug('sequence: ' + sequence)
249
- settings.log.debug('input seq type: ' + sequence_type.to_s)
250
- settings.log.debug('blast db type: ' + db_type)
251
- settings.log.debug('blast method: ' + method)
245
+ # ensure params are valid #
252
246
 
253
- unless blast_methods_for(sequence_type).include?(method)
254
- raise ArgumentError, "Cannot #{method} a #{sequence_type} query."
247
+ # only allowed blast methods should be used
248
+ blast_methods = %w|blastn blastp blastx tblastn tblastx|
249
+ unless blast_methods.include?(params[:method])
250
+ halt 400, "Unknown BLAST method: #{params[:method]}."
255
251
  end
256
252
 
257
- # check if selected db is comaptible with the selected blast method
258
- allowed_db_type = db_type_for(method)
259
- unless allowed_db_type.to_s == db_type
260
- raise ArgumentError, "Cannot #{method} against a #{db_type} database.
261
- Need #{allowed_db_type} database."
253
+ # check the advanced options are sensible
254
+ begin #FIXME
255
+ validate_advanced_parameters(params[:advanced])
256
+ rescue ArgumentError => error
257
+ halt 400, "Advanced parameters invalid: #{error}"
262
258
  end
263
259
 
260
+ # log params
261
+ settings.log.debug('method: ' + params[:method])
262
+ settings.log.debug('sequence: ' + params[:sequence])
263
+ settings.log.debug('database: ' + params[:databases].inspect)
264
+ settings.log.debug('advanced: ' + params[:advanced])
265
+ end
266
+
267
+ post '/' do
268
+ method = params['method']
269
+ databases = params[:databases]
270
+ sequence = params[:sequence]
264
271
  advanced_opts = params['advanced']
265
- validate_advanced_parameters(advanced_opts) #check the advanced options are sensible
272
+
273
+ # evaluate empty sequence as nil, otherwise as fasta
274
+ sequence = sequence.empty? ? nil : to_fasta(sequence)
266
275
 
267
276
  # blastn implies blastn, not megablast; but let's not interfere if a user
268
277
  # specifies `task` herself
@@ -270,29 +279,21 @@ module SequenceServer
270
279
  advanced_opts << ' -task blastn '
271
280
  end
272
281
 
273
- method = settings.binaries[ method ]
274
- settings.log.debug('settings.databases: ' + settings.databases.inspect)
275
- databases = params['db'][db_type].map{|index|
276
- settings.databases[db_type][index.to_i].name
282
+ method = settings.binaries[ method ]
283
+ databases = params[:databases].map{|index|
284
+ settings.databases[index].name
277
285
  }
278
286
 
279
- # run blast to blast archive
280
- blast = Blast.blast_string_to_blast_archive(method, databases.join(' '), sequence, advanced_opts)
281
- # log the command that was run
282
- settings.log.info('Ran to blast archive: ' + blast.command) if settings.logging
283
-
284
- # convert blast archive to HTML version
285
- blast.convert_blast_archive_to_html_result(settings.binaries['blast_formatter'])
286
- # log the command that was run
287
- settings.log.info('Ran to get HTML output: ' + blast.command) if settings.logging
287
+ # run blast and log
288
+ blast = Blast.new(method, sequence, databases.join(' '), advanced_opts)
289
+ blast.run!
290
+ settings.log.info('Ran: ' + blast.command)
288
291
 
289
- @blast = format_blast_results(blast.result, databases)
290
-
291
- if request.xhr?
292
- return @blast
292
+ unless blast.success?
293
+ halt *blast.error
293
294
  end
294
295
 
295
- erb :search
296
+ format_blast_results(blast.result, databases)
296
297
  end
297
298
 
298
299
  # get '/get_sequence/?id=sequence_ids&db=retreival_databases'
@@ -372,9 +373,6 @@ HEADER
372
373
  end
373
374
 
374
375
  def format_blast_results(result, databases)
375
- raise ArgumentError, 'Problem: empty result! Maybe your query was invalid?' if !result.class == String
376
- raise ArgumentError, 'Problem: empty result! Maybe your query was invalid?' if result.empty?
377
-
378
376
  formatted_result = ''
379
377
  @all_retrievable_ids = []
380
378
  string_of_used_databases = databases.join(' ')
@@ -1,200 +1,93 @@
1
1
  require 'tempfile'
2
- require 'open3'
3
2
 
4
3
  module SequenceServer
5
- # Simple ncbi-blast wrapper. Check examples below.
4
+ # Simple BLAST+ wrapper.
6
5
  class Blast
7
- # blast method
8
- attr_accessor :method
9
6
 
10
- # database name
11
- attr_accessor :db
12
-
13
- # query sequence string
14
- attr_accessor :qstring
15
-
16
- # query file name
17
- attr_accessor :qfile
18
-
19
- # advanced blast options
20
- attr_accessor :options
7
+ ERROR_LINE = /\(CArgException.*\)\s(.*)/
21
8
 
22
9
  # command string to be executed
23
- attr_reader :command
10
+ attr_reader :command
24
11
 
25
12
  # result of executing command
26
- attr_reader :result
13
+ attr_reader :result
27
14
 
28
- # blast archive file output
29
- attr_reader :blast_archive_tempfile
30
-
31
- # errors if any while executing command
32
- attr_reader :error
15
+ # errors as [status, message]
16
+ attr_reader :error
33
17
 
34
18
  # Initialize a new blast search.
35
19
  # ---
36
- # Arguments(optional):
37
- # * method(String) - blast executable (shell executable, or absolute path)
38
- # * db(String) - database name as returned by 'blastdbcmd -list'
39
- # * query(Hash) - query string/file, and options.
40
- #
41
- # In the query Hash, use:
42
- # * :qfile(String) - to run Blast against a file.
43
- # * :qstrin(String) - to run Blast against a string.
44
- # * :options(String) - to specify multiple blast options.
20
+ # Arguments:
21
+ # * method (String) - blast executable (shell executable, or absolute path)
22
+ # * query (String) - query string
23
+ # * databases (String) - database name as returned by 'blastdbcmd -list'
24
+ # * options (String) - other options
45
25
  #
46
- # Either :qfile, or :qstring should be used. If both are given, by design
47
- # :qstring will be used to run blast.
48
26
  # ---
49
27
  # Examples:
50
28
  #
51
- # b = Blast.new("blastn", "S.cdna.fasta", :qfile => 'query.seq', :options => "-html -num_threads 4")
52
- # b = Blast.new("blastn", "S.cdna.fasta", :qstring => 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG')
29
+ # b = Blast.new("blastn", 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG', "S.cdna.fasta", "-html -num_threads 4")
53
30
  #
54
31
  # b.run! => true
55
32
  # b.result => "blast output"
56
- #
57
- # # change the blast method.
58
- # b.method = 'blastp'
59
- #
60
- # b.run! => false
61
- # b.error => "blast error output"
62
- def initialize(method = nil, db = nil, query = {})
63
- @method = method
64
- @db = db
65
- @qstring = query[:qstring]
66
- @qfile = query[:qfile]
67
- @options = query[:options]
33
+ def initialize(method, query, databases, options = nil)
34
+ @method = method
35
+ @databases = databases
36
+
37
+ # create a tempfile for the given query
38
+ qfile = Tempfile.new('sequenceserver_query')
39
+ qfile.puts(query)
40
+ qfile.close
41
+ @query = qfile.path
42
+
43
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
44
+ @options = options.to_s
45
+ @options += " -html"
68
46
  end
69
47
 
70
48
  # Run blast everytime it is called. Returns the success
71
- # status - true, or false. Blast method, db, and qfile/qstring
72
- # need to be set before calling this method, else blast will fail.
73
- #
74
- # b = Blast.new
75
- # b.run! => false
76
- #
77
- # # set blast method, and db
78
- # b.method = 'blastn'
79
- # b.db = 'S.cdna.fasta'
80
- #
81
- # b.run! => false
82
- # b.errors => "blast error output"
83
- #
84
- # # set qfile
85
- # b.qfile = 'query1.seq'
86
- #
87
- # b.run! => true
88
- # b.reuslt => "blast output"
49
+ # status - true, or false.
89
50
  def run!
90
- # can not run blast if method is not specified
91
- return false unless @method
92
-
93
- # create a tempfile if qstring is given
94
- if @qstring
95
- @tempfile = Tempfile.new('qfile')
96
- @tempfile.puts(qstring)
97
- @tempfile.close
98
- @qfile = @tempfile.path
99
- end
51
+ @result, @error, status = execute(command)
100
52
 
101
- # form command to execute
102
- @command = to_s
53
+ status == 0 and return @success = true
103
54
 
104
- # execute command and capture both stdout, and stderr
105
- Open3.popen3(@command) do |stdin, stdout, stderr|
106
- @result = stdout.readlines # convert to string?
107
- @error = stderr.readlines
55
+ if status == 1
56
+ message = @error.each{|l| l.match(ERROR_LINE) and break Regexp.last_match[1]}
57
+ message = message || @error
58
+ @error = [400, message]
59
+ else
60
+ @error = [500, @error]
108
61
  end
109
62
 
110
- # set and return success status
111
- return @success = @error.empty?
112
-
113
- ensure
114
- # delete tempfile if it was created
115
- @tempfile.unlink if @tempfile
63
+ false
116
64
  end
117
65
 
118
- # Return the blast type used as a String.
119
- #
120
- # b.method = '/home/yeban/opt/blastn'
121
- # b.type => 'blastn'
122
- def type
123
- @type ||= @method[(@method.rindex('/') + 1)..-1]
66
+ # The command that will be executed.
67
+ def command
68
+ @command ||= "#@method -db '#@databases' -query '#@query' #@options"
124
69
  end
125
70
 
126
- # Return success status - true, false, or nil.
127
- # 'nil' implies that blast has not been run yet.
71
+ # Return success status.
128
72
  def success?
129
73
  @success
130
74
  end
131
75
 
132
- # String representation of the blast object - same as
133
- # the command to be executed.
134
- def to_s
135
- s = "#@method "
136
- s << "-db '#@db' " if @db
137
- s << "-query #@qfile " if @qfile
138
- s << @options.to_s if @options
139
- s
140
- end
76
+ private
141
77
 
142
- # Especially helpful in irb - "status : command"
143
- def inspect
144
- return to_s if success?.nil?
145
- (success? ? "success : " : "fail : ") + @command
146
- end
78
+ # Execute a command and return its stdout, stderr, and exit status.
79
+ def execute(command)
80
+ rfile = Tempfile.new('sequenceserver_result')
81
+ efile = Tempfile.new('sequenceserver_error')
82
+ [rfile, efile].each {|file| file.close}
147
83
 
148
- # Run the blast with the options specified by the user, returning a blast archive file, which can be further transformed into other formats
149
- def run_to_blast_archive!
150
- @blast_archive_tempfile = Tempfile.open('seqserve_formatter')
151
-
152
- # Add -outfmt 11 to list of options so that it outputs a blast archive
153
- @options ||= ''
154
- @options += " -outfmt 11 -out #{@blast_archive_tempfile.path}"
84
+ system("#{command} > #{rfile.path} 2> #{efile.path}")
85
+ status = $?.exitstatus
155
86
 
156
- # Run the blast
157
- run!
158
- return @success unless @success
159
- end
160
-
161
- # convert the blast archive to a regular HTML result, stored
162
- # as an instance variable Blast#result
163
- def convert_blast_archive_to_html_result(blast_formatter_path)
164
- @command = "#{blast_formatter_path} -archive #{blast_archive_tempfile.path} -html"
165
-
166
- # execute command and capture both stdout, and stderr
167
- Open3.popen3(@command) do |stdin, stdout, stderr|
168
- @result = stdout.readlines # convert to string?
169
- @error = stderr.readlines
170
- end
171
- end
172
-
173
- class << self
174
- # shortcut method to run blast against a query file
175
- def blast_file(method, db, qfile, options = nil)
176
- b = Blast.new(method, db, :qfile => qfile, :options => options)
177
- b.run!
178
- b
179
- end
180
-
181
- # shortcut method to run blast against a query string
182
- def blast_string(method, db, qstring, options = nil)
183
- b = Blast.new(method, db, :qstring => qstring, :options => options)
184
- b.run!
185
- b
186
- end
187
-
188
- # shortcut method to run blast with a query string and return a
189
- # blast archive, which can then be further processed into other useful
190
- # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
191
- # is a Tempfile accessible as an instance variable of the returned
192
- # Blast object.
193
- def blast_string_to_blast_archive(method, db, qstring, options = nil)
194
- b = Blast.new(method, db, :qstring => qstring, :options => options)
195
- b.run_to_blast_archive!
196
- b
197
- end
87
+ return File.readlines(rfile.path), File.readlines(efile.path), status
88
+ ensure
89
+ rfile.unlink
90
+ efile.unlink
198
91
  end
199
92
  end
200
93
  end
@@ -1,7 +1,9 @@
1
+ require 'digest/md5'
2
+
1
3
  module SequenceServer
2
- class Database < Struct.new("Database", :name, :title)
4
+ class Database < Struct.new("Database", :name, :title, :type)
3
5
  def to_s
4
- "#{title} #{name}"
6
+ "#{type}: #{title} #{name}"
5
7
  end
6
8
 
7
9
  # Its not very meaningful to compare Database objects, however,
@@ -19,5 +21,9 @@ module SequenceServer
19
21
  self.name <=> other.name
20
22
  end
21
23
  end
24
+
25
+ def hash
26
+ @hash ||= Digest::MD5.hexdigest(self.name)
27
+ end
22
28
  end
23
29
  end
@@ -92,29 +92,21 @@ module SequenceServer
92
92
  db_list.each_line do |line|
93
93
  next if line.empty? # required for BLAST+ 2.2.22
94
94
  type, name, *title = line.split(' ')
95
- type = type.downcase
95
+ type = type.downcase.intern
96
96
  name = name.freeze
97
97
  title = title.join(' ').freeze
98
98
 
99
99
  # skip past all but alias file of a NCBI multi-part BLAST database
100
- if name.match(/\/\w*[.]\d{2,}[.\w]*/)
100
+ if multipart_database_name?(name)
101
101
  log.info(%|Found a multi-part database volume at #{name} - ignoring it.|)
102
102
  next
103
103
  end
104
104
 
105
105
  #LOG.info("Found #{type} database: #{title} at #{name}")
106
- (db[type] ||= []) << Database.new(name, title)
106
+ database = Database.new(name, title, type)
107
+ db[database.hash] = database
107
108
  end
108
109
 
109
-
110
- # the erb would fail as calling nil.each_with_index if a dbtype was undefined.
111
- db['protein'] = [] unless db.keys.include?('protein')
112
- db['nucleotide'] = [] unless db.keys.include?('nucleotide')
113
-
114
- # sort the list of dbs
115
- db['protein'].sort!
116
- db['nucleotide'].sort!
117
-
118
110
  db
119
111
  end
120
112
 
@@ -125,6 +117,16 @@ module SequenceServer
125
117
  def command?(command)
126
118
  system("which #{command} > /dev/null 2>&1")
127
119
  end
120
+
121
+ # Returns true if the database name appears to be a multi-part database name.
122
+ #
123
+ # e.g.
124
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
125
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
126
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
127
+ def multipart_database_name?(db_name)
128
+ !(db_name.match(/.+\/\S+\d{2}$/).nil?)
129
+ end
128
130
  end
129
131
 
130
132
  def self.included(klass)