sequenceserver 0.6.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'ptools'
4
+ gem 'sinatra', '>=1.0'
data/LICENSE.txt ADDED
@@ -0,0 +1,64 @@
1
+ SequenceServer (http://sequenceserver.com)
2
+ Copyright (c) 2010-2011 Yannick Wurm, Benjamin J. Woodcroft, Anurag Priyam.
3
+
4
+
5
+ 1. Definitions
6
+
7
+ The software: the SequenceServer software and associated documentation.
8
+ The authors: Yannick Wurm, Anurag Priyam, Benjamin J. Woodcroft.
9
+
10
+
11
+ 2. Fair usage
12
+
13
+ Within not-for-profit and educational organizations the software can
14
+ be freely used by individuals and on internal and public websites.
15
+
16
+ Installation of the software or use of the software by for-profit
17
+ organizations (including but not limited to companies, service providers
18
+ and consultancies) requires purchase of a usage license or other specific
19
+ arrangements with the authors.
20
+
21
+
22
+ 3. Modifications
23
+
24
+ Modifications can be made by not-for-profit and educational organizations.
25
+ However, citation requests and links to sequenceserver.com must remain
26
+ visible and unaltered.
27
+
28
+
29
+ 4. Redistribution of the software
30
+
31
+ Redistribution without modification is permitted unless associated to
32
+ a financial transaction. In particular, the software may not be
33
+ distributed with nor installed via a commercial software package, nor
34
+ be made available by commercial service providers.
35
+
36
+ Redistribution with modification is permitted only if ALL of the
37
+ following conditions are respected:
38
+
39
+ a. no financial transaction is associated (see above).
40
+ b. This license and copyright notices are included and remain
41
+ unchanged; citation requests and links to sequenceserver.com
42
+ remain visible and unaltered.
43
+ c. Modifications are made freely available on github.com (or an
44
+ equivalent medium) and the authors are allowed to freely
45
+ incorporate modifications into the software.
46
+
47
+
48
+ 5. Specific needs
49
+
50
+ The authors reserve the right to make specific licensing, modification
51
+ and distribution arrangements with interested parties.
52
+
53
+
54
+ 6. Disclaimer
55
+
56
+ THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
57
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
58
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59
+ PURPOSE. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
60
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
61
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
62
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
63
+ SOFTWARE.
64
+
data/README.txt ADDED
@@ -0,0 +1,5 @@
1
+ Thanks for downloading SequenceServer!
2
+
3
+ Documentation available at http://www.sequenceserver.com
4
+
5
+ -- Yannick Wurm, Ben Woodcroft, Anurag Priyam
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env ruby
2
+ # copyright yannick . wurm at unil . ch
3
+ # Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
4
+
5
+ # ensure 'lib/' is in the load path
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+
8
+ require 'rubygems'
9
+ require 'ptools' # for File.binary?(file)
10
+ require 'find'
11
+ require 'logger'
12
+ require 'optparse'
13
+ require 'sequenceserver'
14
+ require 'sequenceserver/helpers.rb'
15
+ require 'sequenceserver/sequencehelpers.rb'
16
+
17
+ LOG = Logger.new(STDOUT)
18
+ LOG.level = Logger::INFO
19
+
20
+ class DatabaseFormatter
21
+ include SequenceServer
22
+ include Helpers
23
+ include SystemHelpers
24
+ include SequenceHelpers
25
+
26
+ attr_accessor :db_path
27
+
28
+ def initialize(db_path = nil)
29
+ @app = SequenceServer::App
30
+ @app.config = @app.parse_config
31
+ @app.binaries = @app.scan_blast_executables(@app.bin).freeze
32
+
33
+ @db_path = (db_path or @app.database)
34
+ end
35
+
36
+ def format_databases
37
+ unless File.directory?(db_path)
38
+ LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
39
+ exit
40
+ end
41
+
42
+ formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
43
+ commands = []
44
+ Find.find(db_path) do |file|
45
+ LOG.debug("Assessing file #{file}..")
46
+ if File.directory?(file)
47
+ LOG.debug("Ignoring file #{file} since it is a directory")
48
+ next
49
+ end
50
+ if formatted_dbs.include?(file)
51
+ LOG.debug("Ignoring file #{file} since it is already a blast database")
52
+ next
53
+ end
54
+ if File.binary?(file)
55
+ LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
56
+ next
57
+ end
58
+
59
+ if probably_fasta?(file)
60
+ LOG.info("Found #{file}")
61
+ ## guess whether protein or nucleotide based on first 500 lines
62
+ first_lines = ''
63
+ File.open(file, 'r') do |file_stream|
64
+ file_stream.each do |line|
65
+ first_lines += line
66
+ break if file_stream.lineno == 500
67
+ end
68
+ end
69
+ begin
70
+ sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
71
+ rescue
72
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
73
+ end
74
+ if [ :protein, :nucleotide ].include?(sequence_type)
75
+ command = ask_make_db_command(file, sequence_type)
76
+ unless command.nil?
77
+ commands.push(command)
78
+ end
79
+ else
80
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
81
+ end
82
+ else
83
+ LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
84
+ end
85
+ end
86
+ LOG.info("Will now create DBs")
87
+ if commands.empty?
88
+ puts "", "#{db_path} does not contain any unformatted database."
89
+ exit
90
+ end
91
+ commands.each do |command|
92
+ LOG.info("Will run: " + command.to_s)
93
+ system(command)
94
+ end
95
+ LOG.info("Done formatting databases. ")
96
+ db_table(db_path)
97
+ end
98
+
99
+ def db_table(db_path)
100
+ LOG.info("Summary of formatted blast databases:\n")
101
+ output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
102
+ LOG.info(output)
103
+ end
104
+
105
+ def probably_fasta?(file)
106
+ return FALSE if File.zero?(file)
107
+ File.open(file, 'r') do |file_stream|
108
+ first_line = file_stream.readline
109
+ if first_line.slice(0,1) == '>'
110
+ return TRUE
111
+ else
112
+ return FALSE
113
+ end
114
+ end
115
+ end
116
+
117
+
118
+ # returns command than needs to be run to make db
119
+ def ask_make_db_command(file, type)
120
+ LOG.info("FASTA file: #{file}")
121
+ LOG.info("Fasta type: " + type.to_s)
122
+
123
+ response = ''
124
+ until response.match(/^[yn]$/i) do
125
+ LOG.info("Proceed? [y/n]: ")
126
+ response = STDIN.gets.chomp
127
+ end
128
+
129
+ if response.match(/y/i)
130
+ LOG.info("Enter a database title (or will use '#{File.basename(file)}'")
131
+ title = STDIN.gets.chomp
132
+ title.gsub!('"', "'")
133
+ title = File.basename(file) if title.empty?
134
+
135
+ return make_db_command(file,type,title)
136
+ end
137
+ end
138
+
139
+ def make_db_command(file,type, title)
140
+ LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
141
+ command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
142
+ LOG.info("Returning: #{command}")
143
+ return(command)
144
+ end
145
+ end
146
+
147
+ OptionParser.new do |opts|
148
+ opts.banner =<<BANNER
149
+ NAME
150
+
151
+ database_formatter.rb - prepare BLAST databases for SequenceServer
152
+
153
+ SYNOPSIS
154
+
155
+ ./database_formatter.rb [--verbose] [blast_database_directory]
156
+
157
+ Example:
158
+
159
+ $ ./database_formatter.rb ~/db # explicitly specify a database directory
160
+ $ ./database_formatter # use the database directory in config.yml
161
+
162
+ DESCRIPTION
163
+
164
+ database_formatter recursively scans the given 'blast_database_directory' for
165
+ BLAST databases and formats them for use with SequenceServer.
166
+
167
+ It automagically detects the database type, and ignores non-db files and
168
+ pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
169
+
170
+ 'blast_database_directory' can be passed as a command line parameter or
171
+ through config.yml by setting the 'database' key (the same option used by
172
+ SequenceServer). See example.config.yml. database_formatter will check
173
+ config.yml only if the command line parameter is missing.
174
+
175
+ Failing both, database_formatter will look for 'database' directory relative
176
+ to the current working directory i.e ./database.
177
+
178
+ database_formatter can be used standalone too.
179
+
180
+ OPTIONS
181
+
182
+ BANNER
183
+
184
+ opts.on_tail('-h', '--help', 'Show this message') do
185
+ puts opts
186
+ exit
187
+ end
188
+
189
+ opts.on('-v', '--verbose', 'Print lots of output') do
190
+ LOG.level = Logger::DEBUG
191
+ end
192
+ end.parse!
193
+
194
+ app = DatabaseFormatter.new(ARGV[0])
195
+ app.format_databases
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # application root
4
+ root = File.dirname(File.dirname(__FILE__))
5
+
6
+ require 'rubygems'
7
+ require File.join(root, 'lib', 'sequenceserver')
8
+
9
+ # display name for tools like `ps`
10
+ $PROGRAM_NAME = 'sequenceserver'
11
+
12
+ SequenceServer::App.run!
data/config.ru ADDED
@@ -0,0 +1,5 @@
1
+ # ensure 'lib/' is in the load path
2
+ require File.join(File.dirname(__FILE__), 'lib', 'sequenceserver')
3
+
4
+ SequenceServer::App.init
5
+ run SequenceServer::App
@@ -0,0 +1,39 @@
1
+ # Path to the blast executables.
2
+ #
3
+ # Sequence Server scans the given directory for blast binaries. Ideally it
4
+ # should point the `bin` of your BLAST+ installation. Not setting this
5
+ # value, or setting it to `nil` will default to searching the system `PATH`.
6
+ #
7
+ # Uncomment the following line, and change to appropriate value to use.
8
+ #
9
+ # bin: ~/ncbi-blast-2.2.25+/bin/
10
+
11
+ # Path to blast database.
12
+ #
13
+ # Sequence Server scans the given directory (including the sub-directories)
14
+ # for blast database. You can not specify more than one top-level directory.
15
+ # Not setting this value, will default to searching `database` directory
16
+ # relative to the current working directory.
17
+ #
18
+ # Uncomment the following line, and change to appropriate value to use.
19
+ #
20
+ # database: ~/blast_databases/
21
+
22
+ # Port to run Sequence Server on.
23
+ #
24
+ # The app will then be accessible at http://your-ip:port. Defaults to 4567.
25
+ # http://localhost:port is also valid for local access.
26
+ #
27
+ # Uncomment the following line, and change to appropriate value to use.
28
+ #
29
+ # port: 4567
30
+
31
+ # number of threads to be use when blasting
32
+ #
33
+ # This option is passed directly to BLAST+. Setting this option to more
34
+ # than 1 may crash BLAST+ if it was not compiled with threading support.
35
+ # Default is to use the safe value of 1.
36
+ #
37
+ # Uncomment the following line, and change to appropriate value to use.
38
+ #
39
+ # num_threads: 1
@@ -0,0 +1,211 @@
1
+ require 'tempfile'
2
+ require 'open3'
3
+
4
+ module SequenceServer
5
+ # Simple ncbi-blast wrapper. Check examples below.
6
+ class Blast
7
+ # blast method
8
+ attr_accessor :method
9
+
10
+ # database name
11
+ attr_accessor :db
12
+
13
+ # query sequence string
14
+ attr_accessor :qstring
15
+
16
+ # query file name
17
+ attr_accessor :qfile
18
+
19
+ # advanced blast options
20
+ attr_accessor :options
21
+
22
+ # command string to be executed
23
+ attr_reader :command
24
+
25
+ # result of executing command
26
+ attr_reader :result
27
+
28
+ # blast archive file output
29
+ attr_reader :blast_archive_tempfile
30
+
31
+ # errors if any while executing command
32
+ attr_reader :error
33
+
34
+ # Initialize a new blast search.
35
+ # ---
36
+ # Arguments(optional):
37
+ # * method(String) - blast executable (shell executable, or absolute path)
38
+ # * db(String) - database name as returned by 'blastdbcmd -list'
39
+ # * query(Hash) - query string/file, and options.
40
+ #
41
+ # In the query Hash, use:
42
+ # * :qfile(String) - to run Blast against a file.
43
+ # * :qstrin(String) - to run Blast against a string.
44
+ # * :options(String) - to specify multiple blast options.
45
+ #
46
+ # Either :qfile, or :qstring should be used. If both are given, by design
47
+ # :qstring will be used to run blast.
48
+ # ---
49
+ # Examples:
50
+ #
51
+ # b = Blast.new("blastn", "S.cdna.fasta", :qfile => 'query.seq', :options => "-html -num_threads 4")
52
+ # b = Blast.new("blastn", "S.cdna.fasta", :qstring => 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG')
53
+ #
54
+ # b.run! => true
55
+ # b.result => "blast output"
56
+ #
57
+ # # change the blast method.
58
+ # b.method = 'blastp'
59
+ #
60
+ # b.run! => false
61
+ # b.error => "blast error output"
62
+ def initialize(method = nil, db = nil, query = {})
63
+ @method = method
64
+ @db = db
65
+ @qstring = query[:qstring]
66
+ @qfile = query[:qfile]
67
+ @options = query[:options]
68
+ end
69
+
70
+ # Run blast everytime it is called. Returns the success
71
+ # status - true, or false. Blast method, db, and qfile/qstring
72
+ # need to be set before calling this method, else blast will fail.
73
+ #
74
+ # b = Blast.new
75
+ # b.run! => false
76
+ #
77
+ # # set blast method, and db
78
+ # b.method = 'blastn'
79
+ # b.db = 'S.cdna.fasta'
80
+ #
81
+ # b.run! => false
82
+ # b.errors => "blast error output"
83
+ #
84
+ # # set qfile
85
+ # b.qfile = 'query1.seq'
86
+ #
87
+ # b.run! => true
88
+ # b.reuslt => "blast output"
89
+ def run!
90
+ # can not run blast if method is not specified
91
+ return false unless @method
92
+
93
+ # create a tempfile if qstring is given
94
+ if @qstring
95
+ @tempfile = Tempfile.new('qfile')
96
+ @tempfile.puts(qstring)
97
+ @tempfile.close
98
+ @qfile = @tempfile.path
99
+ end
100
+
101
+ # form command to execute
102
+ @command = to_s
103
+
104
+ # execute command and capture both stdout, and stderr
105
+ Open3.popen3(@command) do |stdin, stdout, stderr|
106
+ @result = stdout.readlines # convert to string?
107
+ @error = stderr.readlines
108
+ end
109
+
110
+ # set and return success status
111
+ return @success = @error.empty?
112
+
113
+ ensure
114
+ # delete tempfile if it was created
115
+ @tempfile.unlink if @tempfile
116
+ end
117
+
118
+ # Return the blast type used as a String.
119
+ #
120
+ # b.method = '/home/yeban/opt/blastn'
121
+ # b.type => 'blastn'
122
+ def type
123
+ @type ||= @method[(@method.rindex('/') + 1)..-1]
124
+ end
125
+
126
+ # Return success status - true, false, or nil.
127
+ # 'nil' implies that blast has not been run yet.
128
+ def success?
129
+ @success
130
+ end
131
+
132
+ # String representation of the blast object - same as
133
+ # the command to be executed.
134
+ def to_s
135
+ s = "#@method "
136
+ s << "-db '#@db' " if @db
137
+ s << "-query #@qfile " if @qfile
138
+ s << @options.to_s if @options
139
+ s
140
+ end
141
+
142
+ # Especially helpful in irb - "status : command"
143
+ def inspect
144
+ return to_s if success?.nil?
145
+ (success? ? "success : " : "fail : ") + @command
146
+ end
147
+
148
+ # Run the blast with the options specified by the user, returning a blast archive file, which can be further transformed into other formats
149
+ def run_to_blast_archive!
150
+ @blast_archive_tempfile = Tempfile.open('seqserve_formatter')
151
+
152
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
153
+ @options ||= ''
154
+ @options += " -outfmt 11 -out #{@blast_archive_tempfile.path}"
155
+
156
+ # Run the blast
157
+ run!
158
+ return @success unless @success
159
+ end
160
+
161
+ # convert the blast archive to a regular HTML result, stored
162
+ # as an instance variable Blast#result
163
+ def convert_blast_archive_to_html_result(blast_formatter_path)
164
+ @command = "#{blast_formatter_path} -archive #{blast_archive_tempfile.path} -html"
165
+
166
+ # execute command and capture both stdout, and stderr
167
+ Open3.popen3(@command) do |stdin, stdout, stderr|
168
+ @result = stdout.readlines # convert to string?
169
+ @error = stderr.readlines
170
+ end
171
+ end
172
+
173
+ class << self
174
+ # shortcut method to run blast against a query file
175
+ def blast_file(method, db, qfile, options = nil)
176
+ b = Blast.new(method, db, :qfile => qfile, :options => options)
177
+ b.run!
178
+ b
179
+ end
180
+
181
+ # shortcut method to run blast against a query string
182
+ def blast_string(method, db, qstring, options = nil)
183
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
184
+ b.run!
185
+ b
186
+ end
187
+
188
+ # shortcut method to run blast with a query string and return a
189
+ # blast archive, which can then be further processed into other useful
190
+ # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
191
+ # is a Tempfile accessible as an instance variable of the returned
192
+ # Blast object.
193
+ def blast_string_to_blast_archive(method, db, qstring, options = nil)
194
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
195
+ b.run_to_blast_archive!
196
+ b
197
+ end
198
+
199
+ # shortcut method to run blast with a query string and return a
200
+ # blast archive, which can then be further processed into other useful
201
+ # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
202
+ # is a Tempfile accessible as an instance variable of the returned
203
+ # Blast object.
204
+ def blast_string_to_blast_archive(method, db, qstring, options = nil)
205
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
206
+ b.run_to_blast_archive!
207
+ b
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,60 @@
1
+ module SequenceServer
2
+ module Customisation
3
+ ## When not commented out, this method is used to take a
4
+ ## sequence ID, and return a hyperlink that
5
+ ## replaces the hit in the BLAST output.
6
+ ##
7
+ ## Return the hyperlink to link to, or nil
8
+ ## to not not include a hyperlink.
9
+ ##
10
+ ## When this method
11
+ ## is commented out, the default link is used. The default
12
+ ## is a link to the full sequence of
13
+ ## the hit is displayed (if makeblastdb has been run with
14
+ ## -parse_seqids), or no link at all otherwise.
15
+ # def construct_custom_sequence_hyperlink(options)
16
+ # ## Example:
17
+ # ## sequence_id comes in like "psu|MAL13P1.200 | organism=Plasmodium_falciparum_3D7 | product=mitochondrial"
18
+ # ## output: "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/MAL13P1.200"
19
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
20
+ # if matches #if the sequence_id conforms to our expectations
21
+ # # All is good. Return the hyperlink.
22
+ # return "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
23
+ # else
24
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
25
+ # # sequence_id, but log that there has been a problem.
26
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
27
+ # # Return nil so no hyperlink is generated.
28
+ # return nil
29
+ # end
30
+ # end
31
+
32
+ ## Much like construct_custom_sequence_hyperlink, except
33
+ ## instead of just a hyperlink being defined, the whole
34
+ ## line as it appears in the blast results is generated.
35
+ ##
36
+ ## This is a therefore more flexible setup than is possible
37
+ ## with construct_custom_sequence_hyperlink, because doing
38
+ ## things such as adding two hyperlinks for the one hit
39
+ ## are possible.
40
+ ##
41
+ ## When this method is commented out, the behaviour is that
42
+ ## the construct_custom_sequence_hyperlink method is used,
43
+ ## or failing that the default method of that is used.
44
+ # def construct_custom_sequence_hyperlinking_line(options)
45
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
46
+ # if matches #if the sequence_id conforms to our expectations
47
+ # # All is good. Return the hyperlink.
48
+ # link1 = "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
49
+ # link2 = "http://google.com/?q=#{matches[1]}"
50
+ # return "<a href='#{link1}'>ApiLoc page</a>, <a href='#{link2}'>Google search</a>"
51
+ # else
52
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
53
+ # # sequence_id, but log that there has been a problem.
54
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
55
+ # # Return nil so no hyperlink is generated.
56
+ # return nil
57
+ # end
58
+ # end
59
+ end
60
+ end
@@ -0,0 +1,23 @@
1
+ module SequenceServer
2
+ class Database < Struct.new("Database", :name, :title)
3
+ def to_s
4
+ "#{title} #{name}"
5
+ end
6
+
7
+ # Its not very meaningful to compare Database objects, however,
8
+ # we still add the 'spaceship' operator to be able to sort the
9
+ # databases by 'title', or 'name' for better visual presentation.
10
+ #
11
+ # We use 'title' for comparison, while relying on 'name' as fallback.
12
+ #
13
+ # Trying to sort a list of dbs with 'title' set only for some of them
14
+ # will obviously produce unpredictable sorting order.
15
+ def <=>(other)
16
+ if self.title and other.title
17
+ self.title <=> other.title
18
+ else
19
+ self.name <=> other.name
20
+ end
21
+ end
22
+ end
23
+ end