sequenceserver-beta 0.8.7.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.Apache.txt +176 -0
- data/LICENSE.txt +69 -0
- data/README.txt +5 -0
- data/bin/sequenceserver +82 -0
- data/config.ru +6 -0
- data/example.config.yml +39 -0
- data/lib/profile_code.rb +217 -0
- data/lib/sequenceserver.rb +527 -0
- data/lib/sequenceserver/blast.rb +92 -0
- data/lib/sequenceserver/customisation.rb +60 -0
- data/lib/sequenceserver/database.rb +29 -0
- data/lib/sequenceserver/database_formatter.rb +190 -0
- data/lib/sequenceserver/helpers.rb +136 -0
- data/lib/sequenceserver/sequencehelpers.rb +93 -0
- data/lib/sequenceserver/sinatralikeloggerformatter.rb +12 -0
- data/lib/sequenceserver/version.rb +9 -0
- data/public/css/beige.css.css +254 -0
- data/public/css/bootstrap.dropdown.css +29 -0
- data/public/css/bootstrap.icons.css +155 -0
- data/public/css/bootstrap.min.css +415 -0
- data/public/css/bootstrap.modal.css +28 -0
- data/public/css/custom.css +232 -0
- data/public/img/glyphicons-halflings-white.png +0 -0
- data/public/img/glyphicons-halflings.png +0 -0
- data/public/js/bootstrap.dropdown.js +92 -0
- data/public/js/bootstrap.modal.js +7 -0
- data/public/js/bootstrap.transition.js +7 -0
- data/public/js/jquery-scrollspy.js +98 -0
- data/public/js/jquery-ui.js +14987 -0
- data/public/js/jquery.activity.js +10 -0
- data/public/js/jquery.enablePlaceholder.min.js +10 -0
- data/public/js/jquery.js +5 -0
- data/public/js/sequenceserver.blast.js +208 -0
- data/public/js/sequenceserver.js +304 -0
- data/public/js/store.min.js +2 -0
- data/sequenceserver.gemspec +49 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta +6449 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
- data/tests/run +26 -0
- data/tests/test_sequencehelpers.rb +77 -0
- data/tests/test_sequenceserver_blast.rb +60 -0
- data/tests/test_ui.rb +104 -0
- data/tests/test_ui.rb~ +104 -0
- data/tests/ui.specs.todo +10 -0
- data/views/500.erb +22 -0
- data/views/_options.erb +144 -0
- data/views/search.erb +220 -0
- metadata +226 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
# Simple BLAST+ wrapper.
|
5
|
+
class Blast
|
6
|
+
|
7
|
+
ERROR_LINE = /\(CArgException.*\)\s(.*)/
|
8
|
+
|
9
|
+
# command string to be executed
|
10
|
+
attr_reader :command
|
11
|
+
|
12
|
+
# result of executing command
|
13
|
+
attr_reader :result
|
14
|
+
|
15
|
+
# errors as [status, message]
|
16
|
+
attr_reader :error
|
17
|
+
|
18
|
+
# Initialize a new blast search.
|
19
|
+
# ---
|
20
|
+
# Arguments:
|
21
|
+
# * method (String) - blast executable (shell executable, or absolute path)
|
22
|
+
# * query (String) - query string
|
23
|
+
# * databases (String) - database name as returned by 'blastdbcmd -list'
|
24
|
+
# * options (String) - other options
|
25
|
+
#
|
26
|
+
# ---
|
27
|
+
# Examples:
|
28
|
+
#
|
29
|
+
# b = Blast.new("blastn", 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG', "S.cdna.fasta", "-html -num_threads 4")
|
30
|
+
#
|
31
|
+
# b.run! => true
|
32
|
+
# b.result => "blast output"
|
33
|
+
def initialize(method, query, databases, options = nil)
|
34
|
+
@method = method
|
35
|
+
@databases = databases
|
36
|
+
|
37
|
+
# create a tempfile for the given query
|
38
|
+
@qfile = Tempfile.new('sequenceserver_query')
|
39
|
+
@qfile.puts(query)
|
40
|
+
@qfile.close
|
41
|
+
|
42
|
+
# Add -outfmt 11 to list of options so that it outputs a blast archive
|
43
|
+
@options = options.to_s
|
44
|
+
@options += " -html"
|
45
|
+
end
|
46
|
+
|
47
|
+
# Run blast everytime it is called. Returns the success
|
48
|
+
# status - true, or false.
|
49
|
+
def run!
|
50
|
+
@result, @error, status = execute(command)
|
51
|
+
|
52
|
+
status == 0 and return @success = true
|
53
|
+
|
54
|
+
if status == 1
|
55
|
+
message = @error.each{|l| l.match(ERROR_LINE) and break Regexp.last_match[1]}
|
56
|
+
message = message || @error
|
57
|
+
@error = [400, message]
|
58
|
+
else
|
59
|
+
@error = [500, @error]
|
60
|
+
end
|
61
|
+
|
62
|
+
false
|
63
|
+
end
|
64
|
+
|
65
|
+
# The command that will be executed.
|
66
|
+
def command
|
67
|
+
@command ||= "#@method -db '#@databases' -query '#{@qfile.path}' #@options"
|
68
|
+
end
|
69
|
+
|
70
|
+
# Return success status.
|
71
|
+
def success?
|
72
|
+
@success
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
# Execute a command and return its stdout, stderr, and exit status.
|
78
|
+
def execute(command)
|
79
|
+
rfile = Tempfile.new('sequenceserver_result')
|
80
|
+
efile = Tempfile.new('sequenceserver_error')
|
81
|
+
[rfile, efile].each {|file| file.close}
|
82
|
+
|
83
|
+
system("#{command} > #{rfile.path} 2> #{efile.path}")
|
84
|
+
status = $?.exitstatus
|
85
|
+
|
86
|
+
return File.readlines(rfile.path), File.readlines(efile.path), status
|
87
|
+
ensure
|
88
|
+
rfile.unlink
|
89
|
+
efile.unlink
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module SequenceServer
|
2
|
+
module Customisation
|
3
|
+
## When not commented out, this method is used to take a
|
4
|
+
## sequence ID, and return a hyperlink that
|
5
|
+
## replaces the hit in the BLAST output.
|
6
|
+
##
|
7
|
+
## Return the hyperlink to link to, or nil
|
8
|
+
## to not not include a hyperlink.
|
9
|
+
##
|
10
|
+
## When this method
|
11
|
+
## is commented out, the default link is used. The default
|
12
|
+
## is a link to the full sequence of
|
13
|
+
## the hit is displayed (if makeblastdb has been run with
|
14
|
+
## -parse_seqids), or no link at all otherwise.
|
15
|
+
# def construct_custom_sequence_hyperlink(options)
|
16
|
+
# ## Example:
|
17
|
+
# ## sequence_id comes in like "psu|MAL13P1.200 | organism=Plasmodium_falciparum_3D7 | product=mitochondrial"
|
18
|
+
# ## output: "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/MAL13P1.200"
|
19
|
+
# matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
|
20
|
+
# if matches #if the sequence_id conforms to our expectations
|
21
|
+
# # All is good. Return the hyperlink.
|
22
|
+
# return "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
|
23
|
+
# else
|
24
|
+
# # Parsing the sequence_id didn't work. Don't include a hyperlink for this
|
25
|
+
# # sequence_id, but log that there has been a problem.
|
26
|
+
# settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
|
27
|
+
# # Return nil so no hyperlink is generated.
|
28
|
+
# return nil
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
|
32
|
+
## Much like construct_custom_sequence_hyperlink, except
|
33
|
+
## instead of just a hyperlink being defined, the whole
|
34
|
+
## line as it appears in the blast results is generated.
|
35
|
+
##
|
36
|
+
## This is a therefore more flexible setup than is possible
|
37
|
+
## with construct_custom_sequence_hyperlink, because doing
|
38
|
+
## things such as adding two hyperlinks for the one hit
|
39
|
+
## are possible.
|
40
|
+
##
|
41
|
+
## When this method is commented out, the behaviour is that
|
42
|
+
## the construct_custom_sequence_hyperlink method is used,
|
43
|
+
## or failing that the default method of that is used.
|
44
|
+
# def construct_custom_sequence_hyperlinking_line(options)
|
45
|
+
# matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
|
46
|
+
# if matches #if the sequence_id conforms to our expectations
|
47
|
+
# # All is good. Return the hyperlink.
|
48
|
+
# link1 = "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
|
49
|
+
# link2 = "http://google.com/?q=#{matches[1]}"
|
50
|
+
# return "<a href='#{link1}'>ApiLoc page</a>, <a href='#{link2}'>Google search</a>"
|
51
|
+
# else
|
52
|
+
# # Parsing the sequence_id didn't work. Don't include a hyperlink for this
|
53
|
+
# # sequence_id, but log that there has been a problem.
|
54
|
+
# settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
|
55
|
+
# # Return nil so no hyperlink is generated.
|
56
|
+
# return nil
|
57
|
+
# end
|
58
|
+
# end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
class Database < Struct.new("Database", :name, :title, :type)
|
5
|
+
def to_s
|
6
|
+
"#{type}: #{title} #{name}"
|
7
|
+
end
|
8
|
+
|
9
|
+
# Its not very meaningful to compare Database objects, however,
|
10
|
+
# we still add the 'spaceship' operator to be able to sort the
|
11
|
+
# databases by 'title', or 'name' for better visual presentation.
|
12
|
+
#
|
13
|
+
# We use 'title' for comparison, while relying on 'name' as fallback.
|
14
|
+
#
|
15
|
+
# Trying to sort a list of dbs with 'title' set only for some of them
|
16
|
+
# will obviously produce unpredictable sorting order.
|
17
|
+
def <=>(other)
|
18
|
+
if self.title and other.title
|
19
|
+
self.title <=> other.title
|
20
|
+
else
|
21
|
+
self.name <=> other.name
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def hash
|
26
|
+
@hash ||= Digest::MD5.hexdigest(self.name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
# copyright yannick . wurm at unil . ch
|
2
|
+
# Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
|
3
|
+
|
4
|
+
# TODO: bring it under SequenceServer namespace
|
5
|
+
# TODO: move the file to a 'command/' sub-directory (probably makes more sense if we have several subcommands)
|
6
|
+
# TODO: needs more love (read refactoring) overall
|
7
|
+
|
8
|
+
require 'ptools' # for File.binary?(file)
|
9
|
+
require 'find'
|
10
|
+
require 'logger'
|
11
|
+
require 'optparse'
|
12
|
+
require 'sequenceserver'
|
13
|
+
require 'sequenceserver/helpers.rb'
|
14
|
+
require 'sequenceserver/sequencehelpers.rb'
|
15
|
+
|
16
|
+
LOG = Logger.new(STDOUT)
|
17
|
+
LOG.level = Logger::INFO
|
18
|
+
|
19
|
+
class DatabaseFormatter
|
20
|
+
include SequenceServer
|
21
|
+
include Helpers
|
22
|
+
include SystemHelpers
|
23
|
+
include SequenceHelpers
|
24
|
+
|
25
|
+
attr_accessor :db_path
|
26
|
+
|
27
|
+
def initialize(db_path = nil)
|
28
|
+
@app = SequenceServer::App
|
29
|
+
@app.config = @app.parse_config
|
30
|
+
@app.binaries = @app.scan_blast_executables(@app.bin).freeze
|
31
|
+
|
32
|
+
@db_path = (db_path or @app.database)
|
33
|
+
end
|
34
|
+
|
35
|
+
def format_databases
|
36
|
+
unless File.directory?(db_path)
|
37
|
+
LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
|
38
|
+
exit
|
39
|
+
end
|
40
|
+
|
41
|
+
formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
|
42
|
+
commands = []
|
43
|
+
Find.find(db_path) do |file|
|
44
|
+
LOG.debug("Assessing file #{file}..")
|
45
|
+
if File.directory?(file)
|
46
|
+
LOG.debug("Ignoring file #{file} since it is a directory")
|
47
|
+
next
|
48
|
+
end
|
49
|
+
if formatted_dbs.include?(file)
|
50
|
+
LOG.debug("Ignoring file #{file} since it is already a blast database")
|
51
|
+
next
|
52
|
+
end
|
53
|
+
if File.binary?(file)
|
54
|
+
LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if probably_fasta?(file)
|
59
|
+
LOG.info("Found #{file}")
|
60
|
+
## guess whether protein or nucleotide based on first 500 lines
|
61
|
+
first_lines = ''
|
62
|
+
File.open(file, 'r') do |file_stream|
|
63
|
+
file_stream.each do |line|
|
64
|
+
first_lines += line
|
65
|
+
break if file_stream.lineno == 500
|
66
|
+
end
|
67
|
+
end
|
68
|
+
begin
|
69
|
+
sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
|
70
|
+
rescue
|
71
|
+
LOG.warn("Unable to guess sequence type for #{file}. Skipping")
|
72
|
+
end
|
73
|
+
if [ :protein, :nucleotide ].include?(sequence_type)
|
74
|
+
command = ask_make_db_command(file, sequence_type)
|
75
|
+
unless command.nil?
|
76
|
+
commands.push(command)
|
77
|
+
end
|
78
|
+
else
|
79
|
+
LOG.warn("Unable to guess sequence type for #{file}. Skipping")
|
80
|
+
end
|
81
|
+
else
|
82
|
+
LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
LOG.info("Will now create DBs")
|
86
|
+
if commands.empty?
|
87
|
+
puts "", "#{db_path} does not contain any unformatted database."
|
88
|
+
exit
|
89
|
+
end
|
90
|
+
commands.each do |command|
|
91
|
+
LOG.info("Will run: " + command.to_s)
|
92
|
+
system(command)
|
93
|
+
end
|
94
|
+
LOG.info("Done formatting databases. ")
|
95
|
+
db_table(db_path)
|
96
|
+
end
|
97
|
+
|
98
|
+
def db_table(db_path)
|
99
|
+
LOG.info("Summary of formatted blast databases:\n")
|
100
|
+
output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
|
101
|
+
LOG.info(output)
|
102
|
+
end
|
103
|
+
|
104
|
+
def probably_fasta?(file)
|
105
|
+
return FALSE if File.zero?(file)
|
106
|
+
File.open(file, 'r') do |file_stream|
|
107
|
+
first_line = file_stream.readline
|
108
|
+
if first_line.slice(0,1) == '>'
|
109
|
+
return TRUE
|
110
|
+
else
|
111
|
+
return FALSE
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
# returns command than needs to be run to make db
|
118
|
+
def ask_make_db_command(file, type)
|
119
|
+
LOG.info("FASTA file: #{file}")
|
120
|
+
LOG.info("Fasta type: " + type.to_s)
|
121
|
+
|
122
|
+
response = ''
|
123
|
+
until response.match(/^[yn]$/i) do
|
124
|
+
LOG.info("Proceed? [y/n]: ")
|
125
|
+
response = STDIN.gets.chomp
|
126
|
+
end
|
127
|
+
|
128
|
+
if response.match(/y/i)
|
129
|
+
LOG.info("Enter a database title (or will use '#{File.basename(file)}'")
|
130
|
+
title = STDIN.gets.chomp
|
131
|
+
title.gsub!('"', "'")
|
132
|
+
title = File.basename(file) if title.empty?
|
133
|
+
|
134
|
+
return make_db_command(file,type,title)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def make_db_command(file,type, title)
|
139
|
+
LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
|
140
|
+
command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
|
141
|
+
LOG.info("Returning: #{command}")
|
142
|
+
return(command)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
OptionParser.new do |opts|
|
147
|
+
opts.banner =<<BANNER
|
148
|
+
|
149
|
+
SUMMARY
|
150
|
+
|
151
|
+
prepare BLAST databases for SequenceServer
|
152
|
+
|
153
|
+
USAGE
|
154
|
+
|
155
|
+
sequenceserver format-databases [--verbose] [blast_database_directory]
|
156
|
+
|
157
|
+
Example:
|
158
|
+
|
159
|
+
$ sequenceserver format-databases ~/db # explicitly specify a database directory
|
160
|
+
$ sequenceserver format-databases # use the database directory in config.yml
|
161
|
+
|
162
|
+
DESCRIPTION
|
163
|
+
|
164
|
+
Recursively scan the given 'blast_database_directory' for BLAST databases and
|
165
|
+
formats them for use with SequenceServer.
|
166
|
+
|
167
|
+
It automagically detects the database type, and ignores non-db files and
|
168
|
+
pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
|
169
|
+
|
170
|
+
'blast_database_directory' can be passed as a command line parameter or
|
171
|
+
through a configuration file by setting the 'database' key (the same option
|
172
|
+
used by SequenceServer). Configuration file will be checked only if the
|
173
|
+
command line parameter is missing.
|
174
|
+
|
175
|
+
OPTIONS
|
176
|
+
|
177
|
+
BANNER
|
178
|
+
|
179
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
180
|
+
puts opts
|
181
|
+
exit
|
182
|
+
end
|
183
|
+
|
184
|
+
opts.on('-v', '--verbose', 'Print lots of output') do
|
185
|
+
LOG.level = Logger::DEBUG
|
186
|
+
end
|
187
|
+
end.parse!
|
188
|
+
|
189
|
+
app = DatabaseFormatter.new(ARGV[0])
|
190
|
+
app.format_databases
|
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'sequenceserver/database'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
module Helpers
|
5
|
+
module SystemHelpers
|
6
|
+
# Scan the given directory for blast executables. Passing `nil` scans the
|
7
|
+
# system `PATH`.
|
8
|
+
# ---
|
9
|
+
# Arguments:
|
10
|
+
# * bin(String) - absolute path to the directory containing blast binaries
|
11
|
+
# ---
|
12
|
+
# Returns:
|
13
|
+
# * a hash of blast methods, and their corresponding absolute path
|
14
|
+
# ---
|
15
|
+
# Raises:
|
16
|
+
# * IOError - if the executables can't be found
|
17
|
+
#
|
18
|
+
# > scan_blast_executables('/home/yeban/bin')
|
19
|
+
# => { "blastx"=>"/home/yeban/bin/blastx",
|
20
|
+
# "blastn"=>"/home/yeban/bin/blastn",
|
21
|
+
# ...
|
22
|
+
# }
|
23
|
+
def scan_blast_executables(bin)
|
24
|
+
if bin and not File.directory?(bin)
|
25
|
+
raise IOError, "Could not find '#{bin}' defined in config.yml."
|
26
|
+
end
|
27
|
+
|
28
|
+
binaries = {}
|
29
|
+
%w|blastn blastp blastx tblastn tblastx blastdbcmd makeblastdb blast_formatter|.each do |method|
|
30
|
+
path = File.join(bin, method) rescue method
|
31
|
+
if command?(path)
|
32
|
+
binaries[method] = path
|
33
|
+
else
|
34
|
+
blasturl = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download'
|
35
|
+
raise IOError, "Could not find blast binaries." +
|
36
|
+
"\n\nYou may need to download BLAST+ from #{blasturl}." +
|
37
|
+
" And/or edit #{settings.config_file} to indicate the location of BLAST+ binaries."
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
#LOG.info("Config bin dir: #{bin}")
|
42
|
+
binaries
|
43
|
+
end
|
44
|
+
|
45
|
+
# Scan the given directory (including subdirectory) for blast databases.
|
46
|
+
# ---
|
47
|
+
# Arguments:
|
48
|
+
# * db_root(String) - absolute path to the blast databases
|
49
|
+
# ---
|
50
|
+
# Returns:
|
51
|
+
# * a hash of sorted blast databases grouped by database type:
|
52
|
+
# protein, or nucleotide
|
53
|
+
# ---
|
54
|
+
# Raises:
|
55
|
+
# * IOError - if no database can be found
|
56
|
+
#
|
57
|
+
# > scan_blast_db('/home/yeban/blast_db')
|
58
|
+
# => { "protein" => [], "nucleotide" => [] }
|
59
|
+
def scan_blast_db(db_root, blastdbcmd = 'blastdbcmd')
|
60
|
+
raise IOError, "Database directory doesn't exist: #{db_root}" unless File.directory?( db_root )
|
61
|
+
|
62
|
+
find_dbs_command = %|#{blastdbcmd} -recursive -list #{db_root} -list_outfmt "%p %f %t" 2>&1|
|
63
|
+
|
64
|
+
begin
|
65
|
+
db_list = %x|#{find_dbs_command}|
|
66
|
+
if db_list.empty?
|
67
|
+
raise IOError, "No formatted blast databases found in '#{ db_root }'."
|
68
|
+
end
|
69
|
+
rescue => e
|
70
|
+
puts '', e.to_s
|
71
|
+
|
72
|
+
print "Do you want to format your blast databases now? [Y/n]: "
|
73
|
+
choice = gets.chomp[0,1].downcase
|
74
|
+
|
75
|
+
unless choice == 'n'
|
76
|
+
database_formatter = File.join(settings.root, 'database_formatter.rb')
|
77
|
+
system("#{database_formatter} #{db_root}")
|
78
|
+
retry
|
79
|
+
else
|
80
|
+
raise # let the caller decide what to do if database discovery fails
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
if db_list.match(/BLAST Database error/)
|
85
|
+
raise IOError, "Error parsing blast databases.\n" + "Tried: '#{find_dbs_command}'\n"+
|
86
|
+
"It crashed with the following error: '#{db_list}'\n" +
|
87
|
+
"Try reformatting databases using makeblastdb.\n"
|
88
|
+
end
|
89
|
+
|
90
|
+
db = {}
|
91
|
+
|
92
|
+
db_list.each_line do |line|
|
93
|
+
next if line.empty? # required for BLAST+ 2.2.22
|
94
|
+
type, name, *title = line.split(' ')
|
95
|
+
type = type.downcase.intern
|
96
|
+
name = name.freeze
|
97
|
+
title = title.join(' ').freeze
|
98
|
+
|
99
|
+
# skip past all but alias file of a NCBI multi-part BLAST database
|
100
|
+
if multipart_database_name?(name)
|
101
|
+
log.info(%|Found a multi-part database volume at #{name} - ignoring it.|)
|
102
|
+
next
|
103
|
+
end
|
104
|
+
|
105
|
+
#LOG.info("Found #{type} database: #{title} at #{name}")
|
106
|
+
database = Database.new(name, title, type)
|
107
|
+
db[database.hash] = database
|
108
|
+
end
|
109
|
+
|
110
|
+
db
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# check if the given command exists and is executable
|
116
|
+
# returns True if all is good.
|
117
|
+
def command?(command)
|
118
|
+
system("which #{command} > /dev/null 2>&1")
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns true if the database name appears to be a multi-part database name.
|
122
|
+
#
|
123
|
+
# e.g.
|
124
|
+
# /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
|
125
|
+
# /home/ben/pd.ben/sequenceserver/db/nr => no
|
126
|
+
# /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
|
127
|
+
def multipart_database_name?(db_name)
|
128
|
+
!(db_name.match(/.+\/\S+\d{2}$/).nil?)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.included(klass)
|
133
|
+
klass.extend SystemHelpers
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|