sequenceserver-beta 0.8.7.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.Apache.txt +176 -0
- data/LICENSE.txt +69 -0
- data/README.txt +5 -0
- data/bin/sequenceserver +82 -0
- data/config.ru +6 -0
- data/example.config.yml +39 -0
- data/lib/profile_code.rb +217 -0
- data/lib/sequenceserver.rb +527 -0
- data/lib/sequenceserver/blast.rb +92 -0
- data/lib/sequenceserver/customisation.rb +60 -0
- data/lib/sequenceserver/database.rb +29 -0
- data/lib/sequenceserver/database_formatter.rb +190 -0
- data/lib/sequenceserver/helpers.rb +136 -0
- data/lib/sequenceserver/sequencehelpers.rb +93 -0
- data/lib/sequenceserver/sinatralikeloggerformatter.rb +12 -0
- data/lib/sequenceserver/version.rb +9 -0
- data/public/css/beige.css.css +254 -0
- data/public/css/bootstrap.dropdown.css +29 -0
- data/public/css/bootstrap.icons.css +155 -0
- data/public/css/bootstrap.min.css +415 -0
- data/public/css/bootstrap.modal.css +28 -0
- data/public/css/custom.css +232 -0
- data/public/img/glyphicons-halflings-white.png +0 -0
- data/public/img/glyphicons-halflings.png +0 -0
- data/public/js/bootstrap.dropdown.js +92 -0
- data/public/js/bootstrap.modal.js +7 -0
- data/public/js/bootstrap.transition.js +7 -0
- data/public/js/jquery-scrollspy.js +98 -0
- data/public/js/jquery-ui.js +14987 -0
- data/public/js/jquery.activity.js +10 -0
- data/public/js/jquery.enablePlaceholder.min.js +10 -0
- data/public/js/jquery.js +5 -0
- data/public/js/sequenceserver.blast.js +208 -0
- data/public/js/sequenceserver.js +304 -0
- data/public/js/store.min.js +2 -0
- data/sequenceserver.gemspec +49 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta +6449 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
- data/tests/run +26 -0
- data/tests/test_sequencehelpers.rb +77 -0
- data/tests/test_sequenceserver_blast.rb +60 -0
- data/tests/test_ui.rb +104 -0
- data/tests/test_ui.rb~ +104 -0
- data/tests/ui.specs.todo +10 -0
- data/views/500.erb +22 -0
- data/views/_options.erb +144 -0
- data/views/search.erb +220 -0
- metadata +226 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
# Simple BLAST+ wrapper.
|
5
|
+
class Blast
|
6
|
+
|
7
|
+
ERROR_LINE = /\(CArgException.*\)\s(.*)/
|
8
|
+
|
9
|
+
# command string to be executed
|
10
|
+
attr_reader :command
|
11
|
+
|
12
|
+
# result of executing command
|
13
|
+
attr_reader :result
|
14
|
+
|
15
|
+
# errors as [status, message]
|
16
|
+
attr_reader :error
|
17
|
+
|
18
|
+
# Initialize a new blast search.
|
19
|
+
# ---
|
20
|
+
# Arguments:
|
21
|
+
# * method (String) - blast executable (shell executable, or absolute path)
|
22
|
+
# * query (String) - query string
|
23
|
+
# * databases (String) - database name as returned by 'blastdbcmd -list'
|
24
|
+
# * options (String) - other options
|
25
|
+
#
|
26
|
+
# ---
|
27
|
+
# Examples:
|
28
|
+
#
|
29
|
+
# b = Blast.new("blastn", 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG', "S.cdna.fasta", "-html -num_threads 4")
|
30
|
+
#
|
31
|
+
# b.run! => true
|
32
|
+
# b.result => "blast output"
|
33
|
+
def initialize(method, query, databases, options = nil)
|
34
|
+
@method = method
|
35
|
+
@databases = databases
|
36
|
+
|
37
|
+
# create a tempfile for the given query
|
38
|
+
@qfile = Tempfile.new('sequenceserver_query')
|
39
|
+
@qfile.puts(query)
|
40
|
+
@qfile.close
|
41
|
+
|
42
|
+
# Add -outfmt 11 to list of options so that it outputs a blast archive
|
43
|
+
@options = options.to_s
|
44
|
+
@options += " -html"
|
45
|
+
end
|
46
|
+
|
47
|
+
# Run blast everytime it is called. Returns the success
|
48
|
+
# status - true, or false.
|
49
|
+
def run!
|
50
|
+
@result, @error, status = execute(command)
|
51
|
+
|
52
|
+
status == 0 and return @success = true
|
53
|
+
|
54
|
+
if status == 1
|
55
|
+
message = @error.each{|l| l.match(ERROR_LINE) and break Regexp.last_match[1]}
|
56
|
+
message = message || @error
|
57
|
+
@error = [400, message]
|
58
|
+
else
|
59
|
+
@error = [500, @error]
|
60
|
+
end
|
61
|
+
|
62
|
+
false
|
63
|
+
end
|
64
|
+
|
65
|
+
# The command that will be executed.
|
66
|
+
def command
|
67
|
+
@command ||= "#@method -db '#@databases' -query '#{@qfile.path}' #@options"
|
68
|
+
end
|
69
|
+
|
70
|
+
# Return success status.
|
71
|
+
def success?
|
72
|
+
@success
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
# Execute a command and return its stdout, stderr, and exit status.
|
78
|
+
def execute(command)
|
79
|
+
rfile = Tempfile.new('sequenceserver_result')
|
80
|
+
efile = Tempfile.new('sequenceserver_error')
|
81
|
+
[rfile, efile].each {|file| file.close}
|
82
|
+
|
83
|
+
system("#{command} > #{rfile.path} 2> #{efile.path}")
|
84
|
+
status = $?.exitstatus
|
85
|
+
|
86
|
+
return File.readlines(rfile.path), File.readlines(efile.path), status
|
87
|
+
ensure
|
88
|
+
rfile.unlink
|
89
|
+
efile.unlink
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module SequenceServer
|
2
|
+
module Customisation
|
3
|
+
## When not commented out, this method is used to take a
|
4
|
+
## sequence ID, and return a hyperlink that
|
5
|
+
## replaces the hit in the BLAST output.
|
6
|
+
##
|
7
|
+
## Return the hyperlink to link to, or nil
|
8
|
+
## to not not include a hyperlink.
|
9
|
+
##
|
10
|
+
## When this method
|
11
|
+
## is commented out, the default link is used. The default
|
12
|
+
## is a link to the full sequence of
|
13
|
+
## the hit is displayed (if makeblastdb has been run with
|
14
|
+
## -parse_seqids), or no link at all otherwise.
|
15
|
+
# def construct_custom_sequence_hyperlink(options)
|
16
|
+
# ## Example:
|
17
|
+
# ## sequence_id comes in like "psu|MAL13P1.200 | organism=Plasmodium_falciparum_3D7 | product=mitochondrial"
|
18
|
+
# ## output: "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/MAL13P1.200"
|
19
|
+
# matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
|
20
|
+
# if matches #if the sequence_id conforms to our expectations
|
21
|
+
# # All is good. Return the hyperlink.
|
22
|
+
# return "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
|
23
|
+
# else
|
24
|
+
# # Parsing the sequence_id didn't work. Don't include a hyperlink for this
|
25
|
+
# # sequence_id, but log that there has been a problem.
|
26
|
+
# settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
|
27
|
+
# # Return nil so no hyperlink is generated.
|
28
|
+
# return nil
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
|
32
|
+
## Much like construct_custom_sequence_hyperlink, except
|
33
|
+
## instead of just a hyperlink being defined, the whole
|
34
|
+
## line as it appears in the blast results is generated.
|
35
|
+
##
|
36
|
+
## This is a therefore more flexible setup than is possible
|
37
|
+
## with construct_custom_sequence_hyperlink, because doing
|
38
|
+
## things such as adding two hyperlinks for the one hit
|
39
|
+
## are possible.
|
40
|
+
##
|
41
|
+
## When this method is commented out, the behaviour is that
|
42
|
+
## the construct_custom_sequence_hyperlink method is used,
|
43
|
+
## or failing that the default method of that is used.
|
44
|
+
# def construct_custom_sequence_hyperlinking_line(options)
|
45
|
+
# matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
|
46
|
+
# if matches #if the sequence_id conforms to our expectations
|
47
|
+
# # All is good. Return the hyperlink.
|
48
|
+
# link1 = "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
|
49
|
+
# link2 = "http://google.com/?q=#{matches[1]}"
|
50
|
+
# return "<a href='#{link1}'>ApiLoc page</a>, <a href='#{link2}'>Google search</a>"
|
51
|
+
# else
|
52
|
+
# # Parsing the sequence_id didn't work. Don't include a hyperlink for this
|
53
|
+
# # sequence_id, but log that there has been a problem.
|
54
|
+
# settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
|
55
|
+
# # Return nil so no hyperlink is generated.
|
56
|
+
# return nil
|
57
|
+
# end
|
58
|
+
# end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
class Database < Struct.new("Database", :name, :title, :type)
|
5
|
+
def to_s
|
6
|
+
"#{type}: #{title} #{name}"
|
7
|
+
end
|
8
|
+
|
9
|
+
# Its not very meaningful to compare Database objects, however,
|
10
|
+
# we still add the 'spaceship' operator to be able to sort the
|
11
|
+
# databases by 'title', or 'name' for better visual presentation.
|
12
|
+
#
|
13
|
+
# We use 'title' for comparison, while relying on 'name' as fallback.
|
14
|
+
#
|
15
|
+
# Trying to sort a list of dbs with 'title' set only for some of them
|
16
|
+
# will obviously produce unpredictable sorting order.
|
17
|
+
def <=>(other)
|
18
|
+
if self.title and other.title
|
19
|
+
self.title <=> other.title
|
20
|
+
else
|
21
|
+
self.name <=> other.name
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def hash
|
26
|
+
@hash ||= Digest::MD5.hexdigest(self.name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
# copyright yannick . wurm at unil . ch
|
2
|
+
# Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
|
3
|
+
|
4
|
+
# TODO: bring it under SequenceServer namespace
|
5
|
+
# TODO: move the file to a 'command/' sub-directory (probably makes more sense if we have several subcommands)
|
6
|
+
# TODO: needs more love (read refactoring) overall
|
7
|
+
|
8
|
+
require 'ptools' # for File.binary?(file)
|
9
|
+
require 'find'
|
10
|
+
require 'logger'
|
11
|
+
require 'optparse'
|
12
|
+
require 'sequenceserver'
|
13
|
+
require 'sequenceserver/helpers.rb'
|
14
|
+
require 'sequenceserver/sequencehelpers.rb'
|
15
|
+
|
16
|
+
LOG = Logger.new(STDOUT)
|
17
|
+
LOG.level = Logger::INFO
|
18
|
+
|
19
|
+
class DatabaseFormatter
|
20
|
+
include SequenceServer
|
21
|
+
include Helpers
|
22
|
+
include SystemHelpers
|
23
|
+
include SequenceHelpers
|
24
|
+
|
25
|
+
attr_accessor :db_path
|
26
|
+
|
27
|
+
def initialize(db_path = nil)
|
28
|
+
@app = SequenceServer::App
|
29
|
+
@app.config = @app.parse_config
|
30
|
+
@app.binaries = @app.scan_blast_executables(@app.bin).freeze
|
31
|
+
|
32
|
+
@db_path = (db_path or @app.database)
|
33
|
+
end
|
34
|
+
|
35
|
+
def format_databases
|
36
|
+
unless File.directory?(db_path)
|
37
|
+
LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
|
38
|
+
exit
|
39
|
+
end
|
40
|
+
|
41
|
+
formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
|
42
|
+
commands = []
|
43
|
+
Find.find(db_path) do |file|
|
44
|
+
LOG.debug("Assessing file #{file}..")
|
45
|
+
if File.directory?(file)
|
46
|
+
LOG.debug("Ignoring file #{file} since it is a directory")
|
47
|
+
next
|
48
|
+
end
|
49
|
+
if formatted_dbs.include?(file)
|
50
|
+
LOG.debug("Ignoring file #{file} since it is already a blast database")
|
51
|
+
next
|
52
|
+
end
|
53
|
+
if File.binary?(file)
|
54
|
+
LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if probably_fasta?(file)
|
59
|
+
LOG.info("Found #{file}")
|
60
|
+
## guess whether protein or nucleotide based on first 500 lines
|
61
|
+
first_lines = ''
|
62
|
+
File.open(file, 'r') do |file_stream|
|
63
|
+
file_stream.each do |line|
|
64
|
+
first_lines += line
|
65
|
+
break if file_stream.lineno == 500
|
66
|
+
end
|
67
|
+
end
|
68
|
+
begin
|
69
|
+
sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
|
70
|
+
rescue
|
71
|
+
LOG.warn("Unable to guess sequence type for #{file}. Skipping")
|
72
|
+
end
|
73
|
+
if [ :protein, :nucleotide ].include?(sequence_type)
|
74
|
+
command = ask_make_db_command(file, sequence_type)
|
75
|
+
unless command.nil?
|
76
|
+
commands.push(command)
|
77
|
+
end
|
78
|
+
else
|
79
|
+
LOG.warn("Unable to guess sequence type for #{file}. Skipping")
|
80
|
+
end
|
81
|
+
else
|
82
|
+
LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
LOG.info("Will now create DBs")
|
86
|
+
if commands.empty?
|
87
|
+
puts "", "#{db_path} does not contain any unformatted database."
|
88
|
+
exit
|
89
|
+
end
|
90
|
+
commands.each do |command|
|
91
|
+
LOG.info("Will run: " + command.to_s)
|
92
|
+
system(command)
|
93
|
+
end
|
94
|
+
LOG.info("Done formatting databases. ")
|
95
|
+
db_table(db_path)
|
96
|
+
end
|
97
|
+
|
98
|
+
def db_table(db_path)
|
99
|
+
LOG.info("Summary of formatted blast databases:\n")
|
100
|
+
output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
|
101
|
+
LOG.info(output)
|
102
|
+
end
|
103
|
+
|
104
|
+
def probably_fasta?(file)
|
105
|
+
return FALSE if File.zero?(file)
|
106
|
+
File.open(file, 'r') do |file_stream|
|
107
|
+
first_line = file_stream.readline
|
108
|
+
if first_line.slice(0,1) == '>'
|
109
|
+
return TRUE
|
110
|
+
else
|
111
|
+
return FALSE
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
# returns command than needs to be run to make db
|
118
|
+
def ask_make_db_command(file, type)
|
119
|
+
LOG.info("FASTA file: #{file}")
|
120
|
+
LOG.info("Fasta type: " + type.to_s)
|
121
|
+
|
122
|
+
response = ''
|
123
|
+
until response.match(/^[yn]$/i) do
|
124
|
+
LOG.info("Proceed? [y/n]: ")
|
125
|
+
response = STDIN.gets.chomp
|
126
|
+
end
|
127
|
+
|
128
|
+
if response.match(/y/i)
|
129
|
+
LOG.info("Enter a database title (or will use '#{File.basename(file)}'")
|
130
|
+
title = STDIN.gets.chomp
|
131
|
+
title.gsub!('"', "'")
|
132
|
+
title = File.basename(file) if title.empty?
|
133
|
+
|
134
|
+
return make_db_command(file,type,title)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def make_db_command(file,type, title)
|
139
|
+
LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
|
140
|
+
command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
|
141
|
+
LOG.info("Returning: #{command}")
|
142
|
+
return(command)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
OptionParser.new do |opts|
|
147
|
+
opts.banner =<<BANNER
|
148
|
+
|
149
|
+
SUMMARY
|
150
|
+
|
151
|
+
prepare BLAST databases for SequenceServer
|
152
|
+
|
153
|
+
USAGE
|
154
|
+
|
155
|
+
sequenceserver format-databases [--verbose] [blast_database_directory]
|
156
|
+
|
157
|
+
Example:
|
158
|
+
|
159
|
+
$ sequenceserver format-databases ~/db # explicitly specify a database directory
|
160
|
+
$ sequenceserver format-databases # use the database directory in config.yml
|
161
|
+
|
162
|
+
DESCRIPTION
|
163
|
+
|
164
|
+
Recursively scan the given 'blast_database_directory' for BLAST databases and
|
165
|
+
formats them for use with SequenceServer.
|
166
|
+
|
167
|
+
It automagically detects the database type, and ignores non-db files and
|
168
|
+
pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
|
169
|
+
|
170
|
+
'blast_database_directory' can be passed as a command line parameter or
|
171
|
+
through a configuration file by setting the 'database' key (the same option
|
172
|
+
used by SequenceServer). Configuration file will be checked only if the
|
173
|
+
command line parameter is missing.
|
174
|
+
|
175
|
+
OPTIONS
|
176
|
+
|
177
|
+
BANNER
|
178
|
+
|
179
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
180
|
+
puts opts
|
181
|
+
exit
|
182
|
+
end
|
183
|
+
|
184
|
+
opts.on('-v', '--verbose', 'Print lots of output') do
|
185
|
+
LOG.level = Logger::DEBUG
|
186
|
+
end
|
187
|
+
end.parse!
|
188
|
+
|
189
|
+
app = DatabaseFormatter.new(ARGV[0])
|
190
|
+
app.format_databases
|
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'sequenceserver/database'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
module Helpers
|
5
|
+
module SystemHelpers
|
6
|
+
# Scan the given directory for blast executables. Passing `nil` scans the
|
7
|
+
# system `PATH`.
|
8
|
+
# ---
|
9
|
+
# Arguments:
|
10
|
+
# * bin(String) - absolute path to the directory containing blast binaries
|
11
|
+
# ---
|
12
|
+
# Returns:
|
13
|
+
# * a hash of blast methods, and their corresponding absolute path
|
14
|
+
# ---
|
15
|
+
# Raises:
|
16
|
+
# * IOError - if the executables can't be found
|
17
|
+
#
|
18
|
+
# > scan_blast_executables('/home/yeban/bin')
|
19
|
+
# => { "blastx"=>"/home/yeban/bin/blastx",
|
20
|
+
# "blastn"=>"/home/yeban/bin/blastn",
|
21
|
+
# ...
|
22
|
+
# }
|
23
|
+
def scan_blast_executables(bin)
|
24
|
+
if bin and not File.directory?(bin)
|
25
|
+
raise IOError, "Could not find '#{bin}' defined in config.yml."
|
26
|
+
end
|
27
|
+
|
28
|
+
binaries = {}
|
29
|
+
%w|blastn blastp blastx tblastn tblastx blastdbcmd makeblastdb blast_formatter|.each do |method|
|
30
|
+
path = File.join(bin, method) rescue method
|
31
|
+
if command?(path)
|
32
|
+
binaries[method] = path
|
33
|
+
else
|
34
|
+
blasturl = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download'
|
35
|
+
raise IOError, "Could not find blast binaries." +
|
36
|
+
"\n\nYou may need to download BLAST+ from #{blasturl}." +
|
37
|
+
" And/or edit #{settings.config_file} to indicate the location of BLAST+ binaries."
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
#LOG.info("Config bin dir: #{bin}")
|
42
|
+
binaries
|
43
|
+
end
|
44
|
+
|
45
|
+
# Scan the given directory (including subdirectory) for blast databases.
|
46
|
+
# ---
|
47
|
+
# Arguments:
|
48
|
+
# * db_root(String) - absolute path to the blast databases
|
49
|
+
# ---
|
50
|
+
# Returns:
|
51
|
+
# * a hash of sorted blast databases grouped by database type:
|
52
|
+
# protein, or nucleotide
|
53
|
+
# ---
|
54
|
+
# Raises:
|
55
|
+
# * IOError - if no database can be found
|
56
|
+
#
|
57
|
+
# > scan_blast_db('/home/yeban/blast_db')
|
58
|
+
# => { "protein" => [], "nucleotide" => [] }
|
59
|
+
def scan_blast_db(db_root, blastdbcmd = 'blastdbcmd')
|
60
|
+
raise IOError, "Database directory doesn't exist: #{db_root}" unless File.directory?( db_root )
|
61
|
+
|
62
|
+
find_dbs_command = %|#{blastdbcmd} -recursive -list #{db_root} -list_outfmt "%p %f %t" 2>&1|
|
63
|
+
|
64
|
+
begin
|
65
|
+
db_list = %x|#{find_dbs_command}|
|
66
|
+
if db_list.empty?
|
67
|
+
raise IOError, "No formatted blast databases found in '#{ db_root }'."
|
68
|
+
end
|
69
|
+
rescue => e
|
70
|
+
puts '', e.to_s
|
71
|
+
|
72
|
+
print "Do you want to format your blast databases now? [Y/n]: "
|
73
|
+
choice = gets.chomp[0,1].downcase
|
74
|
+
|
75
|
+
unless choice == 'n'
|
76
|
+
database_formatter = File.join(settings.root, 'database_formatter.rb')
|
77
|
+
system("#{database_formatter} #{db_root}")
|
78
|
+
retry
|
79
|
+
else
|
80
|
+
raise # let the caller decide what to do if database discovery fails
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
if db_list.match(/BLAST Database error/)
|
85
|
+
raise IOError, "Error parsing blast databases.\n" + "Tried: '#{find_dbs_command}'\n"+
|
86
|
+
"It crashed with the following error: '#{db_list}'\n" +
|
87
|
+
"Try reformatting databases using makeblastdb.\n"
|
88
|
+
end
|
89
|
+
|
90
|
+
db = {}
|
91
|
+
|
92
|
+
db_list.each_line do |line|
|
93
|
+
next if line.empty? # required for BLAST+ 2.2.22
|
94
|
+
type, name, *title = line.split(' ')
|
95
|
+
type = type.downcase.intern
|
96
|
+
name = name.freeze
|
97
|
+
title = title.join(' ').freeze
|
98
|
+
|
99
|
+
# skip past all but alias file of a NCBI multi-part BLAST database
|
100
|
+
if multipart_database_name?(name)
|
101
|
+
log.info(%|Found a multi-part database volume at #{name} - ignoring it.|)
|
102
|
+
next
|
103
|
+
end
|
104
|
+
|
105
|
+
#LOG.info("Found #{type} database: #{title} at #{name}")
|
106
|
+
database = Database.new(name, title, type)
|
107
|
+
db[database.hash] = database
|
108
|
+
end
|
109
|
+
|
110
|
+
db
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# check if the given command exists and is executable
|
116
|
+
# returns True if all is good.
|
117
|
+
def command?(command)
|
118
|
+
system("which #{command} > /dev/null 2>&1")
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns true if the database name appears to be a multi-part database name.
|
122
|
+
#
|
123
|
+
# e.g.
|
124
|
+
# /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
|
125
|
+
# /home/ben/pd.ben/sequenceserver/db/nr => no
|
126
|
+
# /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
|
127
|
+
def multipart_database_name?(db_name)
|
128
|
+
!(db_name.match(/.+\/\S+\d{2}$/).nil?)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.included(klass)
|
133
|
+
klass.extend SystemHelpers
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|