sequenceserver-beta 0.8.7.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE.Apache.txt +176 -0
  4. data/LICENSE.txt +69 -0
  5. data/README.txt +5 -0
  6. data/bin/sequenceserver +82 -0
  7. data/config.ru +6 -0
  8. data/example.config.yml +39 -0
  9. data/lib/profile_code.rb +217 -0
  10. data/lib/sequenceserver.rb +527 -0
  11. data/lib/sequenceserver/blast.rb +92 -0
  12. data/lib/sequenceserver/customisation.rb +60 -0
  13. data/lib/sequenceserver/database.rb +29 -0
  14. data/lib/sequenceserver/database_formatter.rb +190 -0
  15. data/lib/sequenceserver/helpers.rb +136 -0
  16. data/lib/sequenceserver/sequencehelpers.rb +93 -0
  17. data/lib/sequenceserver/sinatralikeloggerformatter.rb +12 -0
  18. data/lib/sequenceserver/version.rb +9 -0
  19. data/public/css/beige.css.css +254 -0
  20. data/public/css/bootstrap.dropdown.css +29 -0
  21. data/public/css/bootstrap.icons.css +155 -0
  22. data/public/css/bootstrap.min.css +415 -0
  23. data/public/css/bootstrap.modal.css +28 -0
  24. data/public/css/custom.css +232 -0
  25. data/public/img/glyphicons-halflings-white.png +0 -0
  26. data/public/img/glyphicons-halflings.png +0 -0
  27. data/public/js/bootstrap.dropdown.js +92 -0
  28. data/public/js/bootstrap.modal.js +7 -0
  29. data/public/js/bootstrap.transition.js +7 -0
  30. data/public/js/jquery-scrollspy.js +98 -0
  31. data/public/js/jquery-ui.js +14987 -0
  32. data/public/js/jquery.activity.js +10 -0
  33. data/public/js/jquery.enablePlaceholder.min.js +10 -0
  34. data/public/js/jquery.js +5 -0
  35. data/public/js/sequenceserver.blast.js +208 -0
  36. data/public/js/sequenceserver.js +304 -0
  37. data/public/js/store.min.js +2 -0
  38. data/sequenceserver.gemspec +49 -0
  39. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  40. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  41. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  42. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  43. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  44. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  45. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  46. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  47. data/tests/run +26 -0
  48. data/tests/test_sequencehelpers.rb +77 -0
  49. data/tests/test_sequenceserver_blast.rb +60 -0
  50. data/tests/test_ui.rb +104 -0
  51. data/tests/test_ui.rb~ +104 -0
  52. data/tests/ui.specs.todo +10 -0
  53. data/views/500.erb +22 -0
  54. data/views/_options.erb +144 -0
  55. data/views/search.erb +220 -0
  56. metadata +226 -0
@@ -0,0 +1,92 @@
1
+ require 'tempfile'
2
+
3
+ module SequenceServer
4
+ # Simple BLAST+ wrapper.
5
+ class Blast
6
+
7
+ ERROR_LINE = /\(CArgException.*\)\s(.*)/
8
+
9
+ # command string to be executed
10
+ attr_reader :command
11
+
12
+ # result of executing command
13
+ attr_reader :result
14
+
15
+ # errors as [status, message]
16
+ attr_reader :error
17
+
18
+ # Initialize a new blast search.
19
+ # ---
20
+ # Arguments:
21
+ # * method (String) - blast executable (shell executable, or absolute path)
22
+ # * query (String) - query string
23
+ # * databases (String) - database name as returned by 'blastdbcmd -list'
24
+ # * options (String) - other options
25
+ #
26
+ # ---
27
+ # Examples:
28
+ #
29
+ # b = Blast.new("blastn", 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG', "S.cdna.fasta", "-html -num_threads 4")
30
+ #
31
+ # b.run! => true
32
+ # b.result => "blast output"
33
+ def initialize(method, query, databases, options = nil)
34
+ @method = method
35
+ @databases = databases
36
+
37
+ # create a tempfile for the given query
38
+ @qfile = Tempfile.new('sequenceserver_query')
39
+ @qfile.puts(query)
40
+ @qfile.close
41
+
42
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
43
+ @options = options.to_s
44
+ @options += " -html"
45
+ end
46
+
47
+ # Run blast everytime it is called. Returns the success
48
+ # status - true, or false.
49
+ def run!
50
+ @result, @error, status = execute(command)
51
+
52
+ status == 0 and return @success = true
53
+
54
+ if status == 1
55
+ message = @error.each{|l| l.match(ERROR_LINE) and break Regexp.last_match[1]}
56
+ message = message || @error
57
+ @error = [400, message]
58
+ else
59
+ @error = [500, @error]
60
+ end
61
+
62
+ false
63
+ end
64
+
65
+ # The command that will be executed.
66
+ def command
67
+ @command ||= "#@method -db '#@databases' -query '#{@qfile.path}' #@options"
68
+ end
69
+
70
+ # Return success status.
71
+ def success?
72
+ @success
73
+ end
74
+
75
+ private
76
+
77
+ # Execute a command and return its stdout, stderr, and exit status.
78
+ def execute(command)
79
+ rfile = Tempfile.new('sequenceserver_result')
80
+ efile = Tempfile.new('sequenceserver_error')
81
+ [rfile, efile].each {|file| file.close}
82
+
83
+ system("#{command} > #{rfile.path} 2> #{efile.path}")
84
+ status = $?.exitstatus
85
+
86
+ return File.readlines(rfile.path), File.readlines(efile.path), status
87
+ ensure
88
+ rfile.unlink
89
+ efile.unlink
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,60 @@
1
+ module SequenceServer
2
+ module Customisation
3
+ ## When not commented out, this method is used to take a
4
+ ## sequence ID, and return a hyperlink that
5
+ ## replaces the hit in the BLAST output.
6
+ ##
7
+ ## Return the hyperlink to link to, or nil
8
+ ## to not not include a hyperlink.
9
+ ##
10
+ ## When this method
11
+ ## is commented out, the default link is used. The default
12
+ ## is a link to the full sequence of
13
+ ## the hit is displayed (if makeblastdb has been run with
14
+ ## -parse_seqids), or no link at all otherwise.
15
+ # def construct_custom_sequence_hyperlink(options)
16
+ # ## Example:
17
+ # ## sequence_id comes in like "psu|MAL13P1.200 | organism=Plasmodium_falciparum_3D7 | product=mitochondrial"
18
+ # ## output: "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/MAL13P1.200"
19
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
20
+ # if matches #if the sequence_id conforms to our expectations
21
+ # # All is good. Return the hyperlink.
22
+ # return "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
23
+ # else
24
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
25
+ # # sequence_id, but log that there has been a problem.
26
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
27
+ # # Return nil so no hyperlink is generated.
28
+ # return nil
29
+ # end
30
+ # end
31
+
32
+ ## Much like construct_custom_sequence_hyperlink, except
33
+ ## instead of just a hyperlink being defined, the whole
34
+ ## line as it appears in the blast results is generated.
35
+ ##
36
+ ## This is a therefore more flexible setup than is possible
37
+ ## with construct_custom_sequence_hyperlink, because doing
38
+ ## things such as adding two hyperlinks for the one hit
39
+ ## are possible.
40
+ ##
41
+ ## When this method is commented out, the behaviour is that
42
+ ## the construct_custom_sequence_hyperlink method is used,
43
+ ## or failing that the default method of that is used.
44
+ # def construct_custom_sequence_hyperlinking_line(options)
45
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
46
+ # if matches #if the sequence_id conforms to our expectations
47
+ # # All is good. Return the hyperlink.
48
+ # link1 = "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
49
+ # link2 = "http://google.com/?q=#{matches[1]}"
50
+ # return "<a href='#{link1}'>ApiLoc page</a>, <a href='#{link2}'>Google search</a>"
51
+ # else
52
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
53
+ # # sequence_id, but log that there has been a problem.
54
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
55
+ # # Return nil so no hyperlink is generated.
56
+ # return nil
57
+ # end
58
+ # end
59
+ end
60
+ end
@@ -0,0 +1,29 @@
1
+ require 'digest/md5'
2
+
3
+ module SequenceServer
4
+ class Database < Struct.new("Database", :name, :title, :type)
5
+ def to_s
6
+ "#{type}: #{title} #{name}"
7
+ end
8
+
9
+ # Its not very meaningful to compare Database objects, however,
10
+ # we still add the 'spaceship' operator to be able to sort the
11
+ # databases by 'title', or 'name' for better visual presentation.
12
+ #
13
+ # We use 'title' for comparison, while relying on 'name' as fallback.
14
+ #
15
+ # Trying to sort a list of dbs with 'title' set only for some of them
16
+ # will obviously produce unpredictable sorting order.
17
+ def <=>(other)
18
+ if self.title and other.title
19
+ self.title <=> other.title
20
+ else
21
+ self.name <=> other.name
22
+ end
23
+ end
24
+
25
+ def hash
26
+ @hash ||= Digest::MD5.hexdigest(self.name)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,190 @@
1
+ # copyright yannick . wurm at unil . ch
2
+ # Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
3
+
4
+ # TODO: bring it under SequenceServer namespace
5
+ # TODO: move the file to a 'command/' sub-directory (probably makes more sense if we have several subcommands)
6
+ # TODO: needs more love (read refactoring) overall
7
+
8
+ require 'ptools' # for File.binary?(file)
9
+ require 'find'
10
+ require 'logger'
11
+ require 'optparse'
12
+ require 'sequenceserver'
13
+ require 'sequenceserver/helpers.rb'
14
+ require 'sequenceserver/sequencehelpers.rb'
15
+
16
+ LOG = Logger.new(STDOUT)
17
+ LOG.level = Logger::INFO
18
+
19
+ class DatabaseFormatter
20
+ include SequenceServer
21
+ include Helpers
22
+ include SystemHelpers
23
+ include SequenceHelpers
24
+
25
+ attr_accessor :db_path
26
+
27
+ def initialize(db_path = nil)
28
+ @app = SequenceServer::App
29
+ @app.config = @app.parse_config
30
+ @app.binaries = @app.scan_blast_executables(@app.bin).freeze
31
+
32
+ @db_path = (db_path or @app.database)
33
+ end
34
+
35
+ def format_databases
36
+ unless File.directory?(db_path)
37
+ LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
38
+ exit
39
+ end
40
+
41
+ formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
42
+ commands = []
43
+ Find.find(db_path) do |file|
44
+ LOG.debug("Assessing file #{file}..")
45
+ if File.directory?(file)
46
+ LOG.debug("Ignoring file #{file} since it is a directory")
47
+ next
48
+ end
49
+ if formatted_dbs.include?(file)
50
+ LOG.debug("Ignoring file #{file} since it is already a blast database")
51
+ next
52
+ end
53
+ if File.binary?(file)
54
+ LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
55
+ next
56
+ end
57
+
58
+ if probably_fasta?(file)
59
+ LOG.info("Found #{file}")
60
+ ## guess whether protein or nucleotide based on first 500 lines
61
+ first_lines = ''
62
+ File.open(file, 'r') do |file_stream|
63
+ file_stream.each do |line|
64
+ first_lines += line
65
+ break if file_stream.lineno == 500
66
+ end
67
+ end
68
+ begin
69
+ sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
70
+ rescue
71
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
72
+ end
73
+ if [ :protein, :nucleotide ].include?(sequence_type)
74
+ command = ask_make_db_command(file, sequence_type)
75
+ unless command.nil?
76
+ commands.push(command)
77
+ end
78
+ else
79
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
80
+ end
81
+ else
82
+ LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
83
+ end
84
+ end
85
+ LOG.info("Will now create DBs")
86
+ if commands.empty?
87
+ puts "", "#{db_path} does not contain any unformatted database."
88
+ exit
89
+ end
90
+ commands.each do |command|
91
+ LOG.info("Will run: " + command.to_s)
92
+ system(command)
93
+ end
94
+ LOG.info("Done formatting databases. ")
95
+ db_table(db_path)
96
+ end
97
+
98
+ def db_table(db_path)
99
+ LOG.info("Summary of formatted blast databases:\n")
100
+ output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
101
+ LOG.info(output)
102
+ end
103
+
104
+ def probably_fasta?(file)
105
+ return FALSE if File.zero?(file)
106
+ File.open(file, 'r') do |file_stream|
107
+ first_line = file_stream.readline
108
+ if first_line.slice(0,1) == '>'
109
+ return TRUE
110
+ else
111
+ return FALSE
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # returns command than needs to be run to make db
118
+ def ask_make_db_command(file, type)
119
+ LOG.info("FASTA file: #{file}")
120
+ LOG.info("Fasta type: " + type.to_s)
121
+
122
+ response = ''
123
+ until response.match(/^[yn]$/i) do
124
+ LOG.info("Proceed? [y/n]: ")
125
+ response = STDIN.gets.chomp
126
+ end
127
+
128
+ if response.match(/y/i)
129
+ LOG.info("Enter a database title (or will use '#{File.basename(file)}'")
130
+ title = STDIN.gets.chomp
131
+ title.gsub!('"', "'")
132
+ title = File.basename(file) if title.empty?
133
+
134
+ return make_db_command(file,type,title)
135
+ end
136
+ end
137
+
138
+ def make_db_command(file,type, title)
139
+ LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
140
+ command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
141
+ LOG.info("Returning: #{command}")
142
+ return(command)
143
+ end
144
+ end
145
+
146
+ OptionParser.new do |opts|
147
+ opts.banner =<<BANNER
148
+
149
+ SUMMARY
150
+
151
+ prepare BLAST databases for SequenceServer
152
+
153
+ USAGE
154
+
155
+ sequenceserver format-databases [--verbose] [blast_database_directory]
156
+
157
+ Example:
158
+
159
+ $ sequenceserver format-databases ~/db # explicitly specify a database directory
160
+ $ sequenceserver format-databases # use the database directory in config.yml
161
+
162
+ DESCRIPTION
163
+
164
+ Recursively scan the given 'blast_database_directory' for BLAST databases and
165
+ formats them for use with SequenceServer.
166
+
167
+ It automagically detects the database type, and ignores non-db files and
168
+ pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
169
+
170
+ 'blast_database_directory' can be passed as a command line parameter or
171
+ through a configuration file by setting the 'database' key (the same option
172
+ used by SequenceServer). Configuration file will be checked only if the
173
+ command line parameter is missing.
174
+
175
+ OPTIONS
176
+
177
+ BANNER
178
+
179
+ opts.on_tail('-h', '--help', 'Show this message') do
180
+ puts opts
181
+ exit
182
+ end
183
+
184
+ opts.on('-v', '--verbose', 'Print lots of output') do
185
+ LOG.level = Logger::DEBUG
186
+ end
187
+ end.parse!
188
+
189
+ app = DatabaseFormatter.new(ARGV[0])
190
+ app.format_databases
@@ -0,0 +1,136 @@
1
+ require 'sequenceserver/database'
2
+
3
+ module SequenceServer
4
+ module Helpers
5
+ module SystemHelpers
6
+ # Scan the given directory for blast executables. Passing `nil` scans the
7
+ # system `PATH`.
8
+ # ---
9
+ # Arguments:
10
+ # * bin(String) - absolute path to the directory containing blast binaries
11
+ # ---
12
+ # Returns:
13
+ # * a hash of blast methods, and their corresponding absolute path
14
+ # ---
15
+ # Raises:
16
+ # * IOError - if the executables can't be found
17
+ #
18
+ # > scan_blast_executables('/home/yeban/bin')
19
+ # => { "blastx"=>"/home/yeban/bin/blastx",
20
+ # "blastn"=>"/home/yeban/bin/blastn",
21
+ # ...
22
+ # }
23
+ def scan_blast_executables(bin)
24
+ if bin and not File.directory?(bin)
25
+ raise IOError, "Could not find '#{bin}' defined in config.yml."
26
+ end
27
+
28
+ binaries = {}
29
+ %w|blastn blastp blastx tblastn tblastx blastdbcmd makeblastdb blast_formatter|.each do |method|
30
+ path = File.join(bin, method) rescue method
31
+ if command?(path)
32
+ binaries[method] = path
33
+ else
34
+ blasturl = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download'
35
+ raise IOError, "Could not find blast binaries." +
36
+ "\n\nYou may need to download BLAST+ from #{blasturl}." +
37
+ " And/or edit #{settings.config_file} to indicate the location of BLAST+ binaries."
38
+ end
39
+ end
40
+
41
+ #LOG.info("Config bin dir: #{bin}")
42
+ binaries
43
+ end
44
+
45
+ # Scan the given directory (including subdirectory) for blast databases.
46
+ # ---
47
+ # Arguments:
48
+ # * db_root(String) - absolute path to the blast databases
49
+ # ---
50
+ # Returns:
51
+ # * a hash of sorted blast databases grouped by database type:
52
+ # protein, or nucleotide
53
+ # ---
54
+ # Raises:
55
+ # * IOError - if no database can be found
56
+ #
57
+ # > scan_blast_db('/home/yeban/blast_db')
58
+ # => { "protein" => [], "nucleotide" => [] }
59
+ def scan_blast_db(db_root, blastdbcmd = 'blastdbcmd')
60
+ raise IOError, "Database directory doesn't exist: #{db_root}" unless File.directory?( db_root )
61
+
62
+ find_dbs_command = %|#{blastdbcmd} -recursive -list #{db_root} -list_outfmt "%p %f %t" 2>&1|
63
+
64
+ begin
65
+ db_list = %x|#{find_dbs_command}|
66
+ if db_list.empty?
67
+ raise IOError, "No formatted blast databases found in '#{ db_root }'."
68
+ end
69
+ rescue => e
70
+ puts '', e.to_s
71
+
72
+ print "Do you want to format your blast databases now? [Y/n]: "
73
+ choice = gets.chomp[0,1].downcase
74
+
75
+ unless choice == 'n'
76
+ database_formatter = File.join(settings.root, 'database_formatter.rb')
77
+ system("#{database_formatter} #{db_root}")
78
+ retry
79
+ else
80
+ raise # let the caller decide what to do if database discovery fails
81
+ end
82
+ end
83
+
84
+ if db_list.match(/BLAST Database error/)
85
+ raise IOError, "Error parsing blast databases.\n" + "Tried: '#{find_dbs_command}'\n"+
86
+ "It crashed with the following error: '#{db_list}'\n" +
87
+ "Try reformatting databases using makeblastdb.\n"
88
+ end
89
+
90
+ db = {}
91
+
92
+ db_list.each_line do |line|
93
+ next if line.empty? # required for BLAST+ 2.2.22
94
+ type, name, *title = line.split(' ')
95
+ type = type.downcase.intern
96
+ name = name.freeze
97
+ title = title.join(' ').freeze
98
+
99
+ # skip past all but alias file of a NCBI multi-part BLAST database
100
+ if multipart_database_name?(name)
101
+ log.info(%|Found a multi-part database volume at #{name} - ignoring it.|)
102
+ next
103
+ end
104
+
105
+ #LOG.info("Found #{type} database: #{title} at #{name}")
106
+ database = Database.new(name, title, type)
107
+ db[database.hash] = database
108
+ end
109
+
110
+ db
111
+ end
112
+
113
+ private
114
+
115
+ # check if the given command exists and is executable
116
+ # returns True if all is good.
117
+ def command?(command)
118
+ system("which #{command} > /dev/null 2>&1")
119
+ end
120
+
121
+ # Returns true if the database name appears to be a multi-part database name.
122
+ #
123
+ # e.g.
124
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
125
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
126
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
127
+ def multipart_database_name?(db_name)
128
+ !(db_name.match(/.+\/\S+\d{2}$/).nil?)
129
+ end
130
+ end
131
+
132
+ def self.included(klass)
133
+ klass.extend SystemHelpers
134
+ end
135
+ end
136
+ end