sequenceserver 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'ptools'
4
+ gem 'sinatra', '>=1.0'
data/LICENSE.txt ADDED
@@ -0,0 +1,64 @@
1
+ SequenceServer (http://sequenceserver.com)
2
+ Copyright (c) 2010-2011 Yannick Wurm, Benjamin J. Woodcroft, Anurag Priyam.
3
+
4
+
5
+ 1. Definitions
6
+
7
+ The software: the SequenceServer software and associated documentation.
8
+ The authors: Yannick Wurm, Anurag Priyam, Benjamin J. Woodcroft.
9
+
10
+
11
+ 2. Fair usage
12
+
13
+ Within not-for-profit and educational organizations the software can
14
+ be freely used by individuals and on internal and public websites.
15
+
16
+ Installation of the software or use of the software by for-profit
17
+ organizations (including but not limited to companies, service providers
18
+ and consultancies) requires purchase of a usage license or other specific
19
+ arrangements with the authors.
20
+
21
+
22
+ 3. Modifications
23
+
24
+ Modifications can be made by not-for-profit and educational organizations.
25
+ However, citation requests and links to sequenceserver.com must remain
26
+ visible and unaltered.
27
+
28
+
29
+ 4. Redistribution of the software
30
+
31
+ Redistribution without modification is permitted unless associated to
32
+ a financial transaction. In particular, the software may not be
33
+ distributed with nor installed via a commercial software package, nor
34
+ be made available by commercial service providers.
35
+
36
+ Redistribution with modification is permitted only if ALL of the
37
+ following conditions are respected:
38
+
39
+ a. no financial transaction is associated (see above).
40
+ b. This license and copyright notices are included and remain
41
+ unchanged; citation requests and links to sequenceserver.com
42
+ remain visible and unaltered.
43
+ c. Modifications are made freely available on github.com (or an
44
+ equivalent medium) and the authors are allowed to freely
45
+ incorporate modifications into the software.
46
+
47
+
48
+ 5. Specific needs
49
+
50
+ The authors reserve the right to make specific licensing, modification
51
+ and distribution arrangements with interested parties.
52
+
53
+
54
+ 6. Disclaimer
55
+
56
+ THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
57
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
58
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59
+ PURPOSE. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
60
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
61
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
62
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
63
+ SOFTWARE.
64
+
data/README.txt ADDED
@@ -0,0 +1,5 @@
1
+ Thanks for downloading SequenceServer!
2
+
3
+ Documentation available at http://www.sequenceserver.com
4
+
5
+ -- Yannick Wurm, Ben Woodcroft, Anurag Priyam
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env ruby
2
+ # copyright yannick . wurm at unil . ch
3
+ # Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
4
+
5
+ # ensure 'lib/' is in the load path
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+
8
+ require 'rubygems'
9
+ require 'ptools' # for File.binary?(file)
10
+ require 'find'
11
+ require 'logger'
12
+ require 'optparse'
13
+ require 'sequenceserver'
14
+ require 'sequenceserver/helpers.rb'
15
+ require 'sequenceserver/sequencehelpers.rb'
16
+
17
+ LOG = Logger.new(STDOUT)
18
+ LOG.level = Logger::INFO
19
+
20
+ class DatabaseFormatter
21
+ include SequenceServer
22
+ include Helpers
23
+ include SystemHelpers
24
+ include SequenceHelpers
25
+
26
+ attr_accessor :db_path
27
+
28
+ def initialize(db_path = nil)
29
+ @app = SequenceServer::App
30
+ @app.config = @app.parse_config
31
+ @app.binaries = @app.scan_blast_executables(@app.bin).freeze
32
+
33
+ @db_path = (db_path or @app.database)
34
+ end
35
+
36
+ def format_databases
37
+ unless File.directory?(db_path)
38
+ LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
39
+ exit
40
+ end
41
+
42
+ formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
43
+ commands = []
44
+ Find.find(db_path) do |file|
45
+ LOG.debug("Assessing file #{file}..")
46
+ if File.directory?(file)
47
+ LOG.debug("Ignoring file #{file} since it is a directory")
48
+ next
49
+ end
50
+ if formatted_dbs.include?(file)
51
+ LOG.debug("Ignoring file #{file} since it is already a blast database")
52
+ next
53
+ end
54
+ if File.binary?(file)
55
+ LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
56
+ next
57
+ end
58
+
59
+ if probably_fasta?(file)
60
+ LOG.info("Found #{file}")
61
+ ## guess whether protein or nucleotide based on first 500 lines
62
+ first_lines = ''
63
+ File.open(file, 'r') do |file_stream|
64
+ file_stream.each do |line|
65
+ first_lines += line
66
+ break if file_stream.lineno == 500
67
+ end
68
+ end
69
+ begin
70
+ sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
71
+ rescue
72
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
73
+ end
74
+ if [ :protein, :nucleotide ].include?(sequence_type)
75
+ command = ask_make_db_command(file, sequence_type)
76
+ unless command.nil?
77
+ commands.push(command)
78
+ end
79
+ else
80
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
81
+ end
82
+ else
83
+ LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
84
+ end
85
+ end
86
+ LOG.info("Will now create DBs")
87
+ if commands.empty?
88
+ puts "", "#{db_path} does not contain any unformatted database."
89
+ exit
90
+ end
91
+ commands.each do |command|
92
+ LOG.info("Will run: " + command.to_s)
93
+ system(command)
94
+ end
95
+ LOG.info("Done formatting databases. ")
96
+ db_table(db_path)
97
+ end
98
+
99
+ def db_table(db_path)
100
+ LOG.info("Summary of formatted blast databases:\n")
101
+ output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
102
+ LOG.info(output)
103
+ end
104
+
105
+ def probably_fasta?(file)
106
+ return FALSE if File.zero?(file)
107
+ File.open(file, 'r') do |file_stream|
108
+ first_line = file_stream.readline
109
+ if first_line.slice(0,1) == '>'
110
+ return TRUE
111
+ else
112
+ return FALSE
113
+ end
114
+ end
115
+ end
116
+
117
+
118
+ # returns command than needs to be run to make db
119
+ def ask_make_db_command(file, type)
120
+ LOG.info("FASTA file: #{file}")
121
+ LOG.info("Fasta type: " + type.to_s)
122
+
123
+ response = ''
124
+ until response.match(/^[yn]$/i) do
125
+ LOG.info("Proceed? [y/n]: ")
126
+ response = STDIN.gets.chomp
127
+ end
128
+
129
+ if response.match(/y/i)
130
+ LOG.info("Enter a database title (or will use '#{File.basename(file)}'")
131
+ title = STDIN.gets.chomp
132
+ title.gsub!('"', "'")
133
+ title = File.basename(file) if title.empty?
134
+
135
+ return make_db_command(file,type,title)
136
+ end
137
+ end
138
+
139
+ def make_db_command(file,type, title)
140
+ LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
141
+ command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
142
+ LOG.info("Returning: #{command}")
143
+ return(command)
144
+ end
145
+ end
146
+
147
+ OptionParser.new do |opts|
148
+ opts.banner =<<BANNER
149
+ NAME
150
+
151
+ database_formatter.rb - prepare BLAST databases for SequenceServer
152
+
153
+ SYNOPSIS
154
+
155
+ ./database_formatter.rb [--verbose] [blast_database_directory]
156
+
157
+ Example:
158
+
159
+ $ ./database_formatter.rb ~/db # explicitly specify a database directory
160
+ $ ./database_formatter # use the database directory in config.yml
161
+
162
+ DESCRIPTION
163
+
164
+ database_formatter recursively scans the given 'blast_database_directory' for
165
+ BLAST databases and formats them for use with SequenceServer.
166
+
167
+ It automagically detects the database type, and ignores non-db files and
168
+ pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
169
+
170
+ 'blast_database_directory' can be passed as a command line parameter or
171
+ through config.yml by setting the 'database' key (the same option used by
172
+ SequenceServer). See example.config.yml. database_formatter will check
173
+ config.yml only if the command line parameter is missing.
174
+
175
+ Failing both, database_formatter will look for 'database' directory relative
176
+ to the current working directory i.e ./database.
177
+
178
+ database_formatter can be used standalone too.
179
+
180
+ OPTIONS
181
+
182
+ BANNER
183
+
184
+ opts.on_tail('-h', '--help', 'Show this message') do
185
+ puts opts
186
+ exit
187
+ end
188
+
189
+ opts.on('-v', '--verbose', 'Print lots of output') do
190
+ LOG.level = Logger::DEBUG
191
+ end
192
+ end.parse!
193
+
194
+ app = DatabaseFormatter.new(ARGV[0])
195
+ app.format_databases
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # application root
4
+ root = File.dirname(File.dirname(__FILE__))
5
+
6
+ require 'rubygems'
7
+ require File.join(root, 'lib', 'sequenceserver')
8
+
9
+ # display name for tools like `ps`
10
+ $PROGRAM_NAME = 'sequenceserver'
11
+
12
+ SequenceServer::App.run!
data/config.ru ADDED
@@ -0,0 +1,5 @@
1
+ # ensure 'lib/' is in the load path
2
+ require File.join(File.dirname(__FILE__), 'lib', 'sequenceserver')
3
+
4
+ SequenceServer::App.init
5
+ run SequenceServer::App
@@ -0,0 +1,39 @@
1
+ # Path to the blast executables.
2
+ #
3
+ # Sequence Server scans the given directory for blast binaries. Ideally it
4
+ # should point the `bin` of your BLAST+ installation. Not setting this
5
+ # value, or setting it to `nil` will default to searching the system `PATH`.
6
+ #
7
+ # Uncomment the following line, and change to appropriate value to use.
8
+ #
9
+ # bin: ~/ncbi-blast-2.2.25+/bin/
10
+
11
+ # Path to blast database.
12
+ #
13
+ # Sequence Server scans the given directory (including the sub-directories)
14
+ # for blast database. You can not specify more than one top-level directory.
15
+ # Not setting this value, will default to searching `database` directory
16
+ # relative to the current working directory.
17
+ #
18
+ # Uncomment the following line, and change to appropriate value to use.
19
+ #
20
+ # database: ~/blast_databases/
21
+
22
+ # Port to run Sequence Server on.
23
+ #
24
+ # The app will then be accessible at http://your-ip:port. Defaults to 4567.
25
+ # http://localhost:port is also valid for local access.
26
+ #
27
+ # Uncomment the following line, and change to appropriate value to use.
28
+ #
29
+ # port: 4567
30
+
31
+ # number of threads to be use when blasting
32
+ #
33
+ # This option is passed directly to BLAST+. Setting this option to more
34
+ # than 1 may crash BLAST+ if it was not compiled with threading support.
35
+ # Default is to use the safe value of 1.
36
+ #
37
+ # Uncomment the following line, and change to appropriate value to use.
38
+ #
39
+ # num_threads: 1
@@ -0,0 +1,211 @@
1
+ require 'tempfile'
2
+ require 'open3'
3
+
4
+ module SequenceServer
5
+ # Simple ncbi-blast wrapper. Check examples below.
6
+ class Blast
7
+ # blast method
8
+ attr_accessor :method
9
+
10
+ # database name
11
+ attr_accessor :db
12
+
13
+ # query sequence string
14
+ attr_accessor :qstring
15
+
16
+ # query file name
17
+ attr_accessor :qfile
18
+
19
+ # advanced blast options
20
+ attr_accessor :options
21
+
22
+ # command string to be executed
23
+ attr_reader :command
24
+
25
+ # result of executing command
26
+ attr_reader :result
27
+
28
+ # blast archive file output
29
+ attr_reader :blast_archive_tempfile
30
+
31
+ # errors if any while executing command
32
+ attr_reader :error
33
+
34
+ # Initialize a new blast search.
35
+ # ---
36
+ # Arguments(optional):
37
+ # * method(String) - blast executable (shell executable, or absolute path)
38
+ # * db(String) - database name as returned by 'blastdbcmd -list'
39
+ # * query(Hash) - query string/file, and options.
40
+ #
41
+ # In the query Hash, use:
42
+ # * :qfile(String) - to run Blast against a file.
43
+ # * :qstrin(String) - to run Blast against a string.
44
+ # * :options(String) - to specify multiple blast options.
45
+ #
46
+ # Either :qfile, or :qstring should be used. If both are given, by design
47
+ # :qstring will be used to run blast.
48
+ # ---
49
+ # Examples:
50
+ #
51
+ # b = Blast.new("blastn", "S.cdna.fasta", :qfile => 'query.seq', :options => "-html -num_threads 4")
52
+ # b = Blast.new("blastn", "S.cdna.fasta", :qstring => 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG')
53
+ #
54
+ # b.run! => true
55
+ # b.result => "blast output"
56
+ #
57
+ # # change the blast method.
58
+ # b.method = 'blastp'
59
+ #
60
+ # b.run! => false
61
+ # b.error => "blast error output"
62
+ def initialize(method = nil, db = nil, query = {})
63
+ @method = method
64
+ @db = db
65
+ @qstring = query[:qstring]
66
+ @qfile = query[:qfile]
67
+ @options = query[:options]
68
+ end
69
+
70
+ # Run blast everytime it is called. Returns the success
71
+ # status - true, or false. Blast method, db, and qfile/qstring
72
+ # need to be set before calling this method, else blast will fail.
73
+ #
74
+ # b = Blast.new
75
+ # b.run! => false
76
+ #
77
+ # # set blast method, and db
78
+ # b.method = 'blastn'
79
+ # b.db = 'S.cdna.fasta'
80
+ #
81
+ # b.run! => false
82
+ # b.errors => "blast error output"
83
+ #
84
+ # # set qfile
85
+ # b.qfile = 'query1.seq'
86
+ #
87
+ # b.run! => true
88
+ # b.reuslt => "blast output"
89
+ def run!
90
+ # can not run blast if method is not specified
91
+ return false unless @method
92
+
93
+ # create a tempfile if qstring is given
94
+ if @qstring
95
+ @tempfile = Tempfile.new('qfile')
96
+ @tempfile.puts(qstring)
97
+ @tempfile.close
98
+ @qfile = @tempfile.path
99
+ end
100
+
101
+ # form command to execute
102
+ @command = to_s
103
+
104
+ # execute command and capture both stdout, and stderr
105
+ Open3.popen3(@command) do |stdin, stdout, stderr|
106
+ @result = stdout.readlines # convert to string?
107
+ @error = stderr.readlines
108
+ end
109
+
110
+ # set and return success status
111
+ return @success = @error.empty?
112
+
113
+ ensure
114
+ # delete tempfile if it was created
115
+ @tempfile.unlink if @tempfile
116
+ end
117
+
118
+ # Return the blast type used as a String.
119
+ #
120
+ # b.method = '/home/yeban/opt/blastn'
121
+ # b.type => 'blastn'
122
+ def type
123
+ @type ||= @method[(@method.rindex('/') + 1)..-1]
124
+ end
125
+
126
+ # Return success status - true, false, or nil.
127
+ # 'nil' implies that blast has not been run yet.
128
+ def success?
129
+ @success
130
+ end
131
+
132
+ # String representation of the blast object - same as
133
+ # the command to be executed.
134
+ def to_s
135
+ s = "#@method "
136
+ s << "-db '#@db' " if @db
137
+ s << "-query #@qfile " if @qfile
138
+ s << @options.to_s if @options
139
+ s
140
+ end
141
+
142
+ # Especially helpful in irb - "status : command"
143
+ def inspect
144
+ return to_s if success?.nil?
145
+ (success? ? "success : " : "fail : ") + @command
146
+ end
147
+
148
+ # Run the blast with the options specified by the user, returning a blast archive file, which can be further transformed into other formats
149
+ def run_to_blast_archive!
150
+ @blast_archive_tempfile = Tempfile.open('seqserve_formatter')
151
+
152
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
153
+ @options ||= ''
154
+ @options += " -outfmt 11 -out #{@blast_archive_tempfile.path}"
155
+
156
+ # Run the blast
157
+ run!
158
+ return @success unless @success
159
+ end
160
+
161
+ # convert the blast archive to a regular HTML result, stored
162
+ # as an instance variable Blast#result
163
+ def convert_blast_archive_to_html_result(blast_formatter_path)
164
+ @command = "#{blast_formatter_path} -archive #{blast_archive_tempfile.path} -html"
165
+
166
+ # execute command and capture both stdout, and stderr
167
+ Open3.popen3(@command) do |stdin, stdout, stderr|
168
+ @result = stdout.readlines # convert to string?
169
+ @error = stderr.readlines
170
+ end
171
+ end
172
+
173
+ class << self
174
+ # shortcut method to run blast against a query file
175
+ def blast_file(method, db, qfile, options = nil)
176
+ b = Blast.new(method, db, :qfile => qfile, :options => options)
177
+ b.run!
178
+ b
179
+ end
180
+
181
+ # shortcut method to run blast against a query string
182
+ def blast_string(method, db, qstring, options = nil)
183
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
184
+ b.run!
185
+ b
186
+ end
187
+
188
+ # shortcut method to run blast with a query string and return a
189
+ # blast archive, which can then be further processed into other useful
190
+ # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
191
+ # is a Tempfile accessible as an instance variable of the returned
192
+ # Blast object.
193
+ def blast_string_to_blast_archive(method, db, qstring, options = nil)
194
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
195
+ b.run_to_blast_archive!
196
+ b
197
+ end
198
+
199
+ # shortcut method to run blast with a query string and return a
200
+ # blast archive, which can then be further processed into other useful
201
+ # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
202
+ # is a Tempfile accessible as an instance variable of the returned
203
+ # Blast object.
204
+ def blast_string_to_blast_archive(method, db, qstring, options = nil)
205
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
206
+ b.run_to_blast_archive!
207
+ b
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,60 @@
1
+ module SequenceServer
2
+ module Customisation
3
+ ## When not commented out, this method is used to take a
4
+ ## sequence ID, and return a hyperlink that
5
+ ## replaces the hit in the BLAST output.
6
+ ##
7
+ ## Return the hyperlink to link to, or nil
8
+ ## to not not include a hyperlink.
9
+ ##
10
+ ## When this method
11
+ ## is commented out, the default link is used. The default
12
+ ## is a link to the full sequence of
13
+ ## the hit is displayed (if makeblastdb has been run with
14
+ ## -parse_seqids), or no link at all otherwise.
15
+ # def construct_custom_sequence_hyperlink(options)
16
+ # ## Example:
17
+ # ## sequence_id comes in like "psu|MAL13P1.200 | organism=Plasmodium_falciparum_3D7 | product=mitochondrial"
18
+ # ## output: "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/MAL13P1.200"
19
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
20
+ # if matches #if the sequence_id conforms to our expectations
21
+ # # All is good. Return the hyperlink.
22
+ # return "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
23
+ # else
24
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
25
+ # # sequence_id, but log that there has been a problem.
26
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
27
+ # # Return nil so no hyperlink is generated.
28
+ # return nil
29
+ # end
30
+ # end
31
+
32
+ ## Much like construct_custom_sequence_hyperlink, except
33
+ ## instead of just a hyperlink being defined, the whole
34
+ ## line as it appears in the blast results is generated.
35
+ ##
36
+ ## This is a therefore more flexible setup than is possible
37
+ ## with construct_custom_sequence_hyperlink, because doing
38
+ ## things such as adding two hyperlinks for the one hit
39
+ ## are possible.
40
+ ##
41
+ ## When this method is commented out, the behaviour is that
42
+ ## the construct_custom_sequence_hyperlink method is used,
43
+ ## or failing that the default method of that is used.
44
+ # def construct_custom_sequence_hyperlinking_line(options)
45
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
46
+ # if matches #if the sequence_id conforms to our expectations
47
+ # # All is good. Return the hyperlink.
48
+ # link1 = "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
49
+ # link2 = "http://google.com/?q=#{matches[1]}"
50
+ # return "<a href='#{link1}'>ApiLoc page</a>, <a href='#{link2}'>Google search</a>"
51
+ # else
52
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
53
+ # # sequence_id, but log that there has been a problem.
54
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
55
+ # # Return nil so no hyperlink is generated.
56
+ # return nil
57
+ # end
58
+ # end
59
+ end
60
+ end
@@ -0,0 +1,23 @@
1
+ module SequenceServer
2
+ class Database < Struct.new("Database", :name, :title)
3
+ def to_s
4
+ "#{title} #{name}"
5
+ end
6
+
7
+ # Its not very meaningful to compare Database objects, however,
8
+ # we still add the 'spaceship' operator to be able to sort the
9
+ # databases by 'title', or 'name' for better visual presentation.
10
+ #
11
+ # We use 'title' for comparison, while relying on 'name' as fallback.
12
+ #
13
+ # Trying to sort a list of dbs with 'title' set only for some of them
14
+ # will obviously produce unpredictable sorting order.
15
+ def <=>(other)
16
+ if self.title and other.title
17
+ self.title <=> other.title
18
+ else
19
+ self.name <=> other.name
20
+ end
21
+ end
22
+ end
23
+ end