sequenceserver-beta 0.8.7.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE.Apache.txt +176 -0
  4. data/LICENSE.txt +69 -0
  5. data/README.txt +5 -0
  6. data/bin/sequenceserver +82 -0
  7. data/config.ru +6 -0
  8. data/example.config.yml +39 -0
  9. data/lib/profile_code.rb +217 -0
  10. data/lib/sequenceserver.rb +527 -0
  11. data/lib/sequenceserver/blast.rb +92 -0
  12. data/lib/sequenceserver/customisation.rb +60 -0
  13. data/lib/sequenceserver/database.rb +29 -0
  14. data/lib/sequenceserver/database_formatter.rb +190 -0
  15. data/lib/sequenceserver/helpers.rb +136 -0
  16. data/lib/sequenceserver/sequencehelpers.rb +93 -0
  17. data/lib/sequenceserver/sinatralikeloggerformatter.rb +12 -0
  18. data/lib/sequenceserver/version.rb +9 -0
  19. data/public/css/beige.css.css +254 -0
  20. data/public/css/bootstrap.dropdown.css +29 -0
  21. data/public/css/bootstrap.icons.css +155 -0
  22. data/public/css/bootstrap.min.css +415 -0
  23. data/public/css/bootstrap.modal.css +28 -0
  24. data/public/css/custom.css +232 -0
  25. data/public/img/glyphicons-halflings-white.png +0 -0
  26. data/public/img/glyphicons-halflings.png +0 -0
  27. data/public/js/bootstrap.dropdown.js +92 -0
  28. data/public/js/bootstrap.modal.js +7 -0
  29. data/public/js/bootstrap.transition.js +7 -0
  30. data/public/js/jquery-scrollspy.js +98 -0
  31. data/public/js/jquery-ui.js +14987 -0
  32. data/public/js/jquery.activity.js +10 -0
  33. data/public/js/jquery.enablePlaceholder.min.js +10 -0
  34. data/public/js/jquery.js +5 -0
  35. data/public/js/sequenceserver.blast.js +208 -0
  36. data/public/js/sequenceserver.js +304 -0
  37. data/public/js/store.min.js +2 -0
  38. data/sequenceserver.gemspec +49 -0
  39. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  40. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  41. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  42. data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  43. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  44. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  45. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  46. data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  47. data/tests/run +26 -0
  48. data/tests/test_sequencehelpers.rb +77 -0
  49. data/tests/test_sequenceserver_blast.rb +60 -0
  50. data/tests/test_ui.rb +104 -0
  51. data/tests/test_ui.rb~ +104 -0
  52. data/tests/ui.specs.todo +10 -0
  53. data/views/500.erb +22 -0
  54. data/views/_options.erb +144 -0
  55. data/views/search.erb +220 -0
  56. metadata +226 -0
@@ -0,0 +1,92 @@
1
+ require 'tempfile'
2
+
3
+ module SequenceServer
4
+ # Simple BLAST+ wrapper.
5
+ class Blast
6
+
7
+ ERROR_LINE = /\(CArgException.*\)\s(.*)/
8
+
9
+ # command string to be executed
10
+ attr_reader :command
11
+
12
+ # result of executing command
13
+ attr_reader :result
14
+
15
+ # errors as [status, message]
16
+ attr_reader :error
17
+
18
+ # Initialize a new blast search.
19
+ # ---
20
+ # Arguments:
21
+ # * method (String) - blast executable (shell executable, or absolute path)
22
+ # * query (String) - query string
23
+ # * databases (String) - database name as returned by 'blastdbcmd -list'
24
+ # * options (String) - other options
25
+ #
26
+ # ---
27
+ # Examples:
28
+ #
29
+ # b = Blast.new("blastn", 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG', "S.cdna.fasta", "-html -num_threads 4")
30
+ #
31
+ # b.run! => true
32
+ # b.result => "blast output"
33
+ def initialize(method, query, databases, options = nil)
34
+ @method = method
35
+ @databases = databases
36
+
37
+ # create a tempfile for the given query
38
+ @qfile = Tempfile.new('sequenceserver_query')
39
+ @qfile.puts(query)
40
+ @qfile.close
41
+
42
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
43
+ @options = options.to_s
44
+ @options += " -html"
45
+ end
46
+
47
+ # Run blast everytime it is called. Returns the success
48
+ # status - true, or false.
49
+ def run!
50
+ @result, @error, status = execute(command)
51
+
52
+ status == 0 and return @success = true
53
+
54
+ if status == 1
55
+ message = @error.each{|l| l.match(ERROR_LINE) and break Regexp.last_match[1]}
56
+ message = message || @error
57
+ @error = [400, message]
58
+ else
59
+ @error = [500, @error]
60
+ end
61
+
62
+ false
63
+ end
64
+
65
+ # The command that will be executed.
66
+ def command
67
+ @command ||= "#@method -db '#@databases' -query '#{@qfile.path}' #@options"
68
+ end
69
+
70
+ # Return success status.
71
+ def success?
72
+ @success
73
+ end
74
+
75
+ private
76
+
77
+ # Execute a command and return its stdout, stderr, and exit status.
78
+ def execute(command)
79
+ rfile = Tempfile.new('sequenceserver_result')
80
+ efile = Tempfile.new('sequenceserver_error')
81
+ [rfile, efile].each {|file| file.close}
82
+
83
+ system("#{command} > #{rfile.path} 2> #{efile.path}")
84
+ status = $?.exitstatus
85
+
86
+ return File.readlines(rfile.path), File.readlines(efile.path), status
87
+ ensure
88
+ rfile.unlink
89
+ efile.unlink
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,60 @@
1
+ module SequenceServer
2
+ module Customisation
3
+ ## When not commented out, this method is used to take a
4
+ ## sequence ID, and return a hyperlink that
5
+ ## replaces the hit in the BLAST output.
6
+ ##
7
+ ## Return the hyperlink to link to, or nil
8
+ ## to not not include a hyperlink.
9
+ ##
10
+ ## When this method
11
+ ## is commented out, the default link is used. The default
12
+ ## is a link to the full sequence of
13
+ ## the hit is displayed (if makeblastdb has been run with
14
+ ## -parse_seqids), or no link at all otherwise.
15
+ # def construct_custom_sequence_hyperlink(options)
16
+ # ## Example:
17
+ # ## sequence_id comes in like "psu|MAL13P1.200 | organism=Plasmodium_falciparum_3D7 | product=mitochondrial"
18
+ # ## output: "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/MAL13P1.200"
19
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
20
+ # if matches #if the sequence_id conforms to our expectations
21
+ # # All is good. Return the hyperlink.
22
+ # return "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
23
+ # else
24
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
25
+ # # sequence_id, but log that there has been a problem.
26
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
27
+ # # Return nil so no hyperlink is generated.
28
+ # return nil
29
+ # end
30
+ # end
31
+
32
+ ## Much like construct_custom_sequence_hyperlink, except
33
+ ## instead of just a hyperlink being defined, the whole
34
+ ## line as it appears in the blast results is generated.
35
+ ##
36
+ ## This is a therefore more flexible setup than is possible
37
+ ## with construct_custom_sequence_hyperlink, because doing
38
+ ## things such as adding two hyperlinks for the one hit
39
+ ## are possible.
40
+ ##
41
+ ## When this method is commented out, the behaviour is that
42
+ ## the construct_custom_sequence_hyperlink method is used,
43
+ ## or failing that the default method of that is used.
44
+ # def construct_custom_sequence_hyperlinking_line(options)
45
+ # matches = options[:sequence_id].match(/^\s*psu\|(\S+) /)
46
+ # if matches #if the sequence_id conforms to our expectations
47
+ # # All is good. Return the hyperlink.
48
+ # link1 = "http://apiloc.bio21.unimelb.edu.au/apiloc/gene/#{matches[1]}"
49
+ # link2 = "http://google.com/?q=#{matches[1]}"
50
+ # return "<a href='#{link1}'>ApiLoc page</a>, <a href='#{link2}'>Google search</a>"
51
+ # else
52
+ # # Parsing the sequence_id didn't work. Don't include a hyperlink for this
53
+ # # sequence_id, but log that there has been a problem.
54
+ # settings.log.warn "Unable to parse sequence id `#{options[:sequence_id]}'"
55
+ # # Return nil so no hyperlink is generated.
56
+ # return nil
57
+ # end
58
+ # end
59
+ end
60
+ end
@@ -0,0 +1,29 @@
1
+ require 'digest/md5'
2
+
3
+ module SequenceServer
4
+ class Database < Struct.new("Database", :name, :title, :type)
5
+ def to_s
6
+ "#{type}: #{title} #{name}"
7
+ end
8
+
9
+ # Its not very meaningful to compare Database objects, however,
10
+ # we still add the 'spaceship' operator to be able to sort the
11
+ # databases by 'title', or 'name' for better visual presentation.
12
+ #
13
+ # We use 'title' for comparison, while relying on 'name' as fallback.
14
+ #
15
+ # Trying to sort a list of dbs with 'title' set only for some of them
16
+ # will obviously produce unpredictable sorting order.
17
+ def <=>(other)
18
+ if self.title and other.title
19
+ self.title <=> other.title
20
+ else
21
+ self.name <=> other.name
22
+ end
23
+ end
24
+
25
+ def hash
26
+ @hash ||= Digest::MD5.hexdigest(self.name)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,190 @@
1
+ # copyright yannick . wurm at unil . ch
2
+ # Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
3
+
4
+ # TODO: bring it under SequenceServer namespace
5
+ # TODO: move the file to a 'command/' sub-directory (probably makes more sense if we have several subcommands)
6
+ # TODO: needs more love (read refactoring) overall
7
+
8
+ require 'ptools' # for File.binary?(file)
9
+ require 'find'
10
+ require 'logger'
11
+ require 'optparse'
12
+ require 'sequenceserver'
13
+ require 'sequenceserver/helpers.rb'
14
+ require 'sequenceserver/sequencehelpers.rb'
15
+
16
+ LOG = Logger.new(STDOUT)
17
+ LOG.level = Logger::INFO
18
+
19
+ class DatabaseFormatter
20
+ include SequenceServer
21
+ include Helpers
22
+ include SystemHelpers
23
+ include SequenceHelpers
24
+
25
+ attr_accessor :db_path
26
+
27
+ def initialize(db_path = nil)
28
+ @app = SequenceServer::App
29
+ @app.config = @app.parse_config
30
+ @app.binaries = @app.scan_blast_executables(@app.bin).freeze
31
+
32
+ @db_path = (db_path or @app.database)
33
+ end
34
+
35
+ def format_databases
36
+ unless File.directory?(db_path)
37
+ LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
38
+ exit
39
+ end
40
+
41
+ formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
42
+ commands = []
43
+ Find.find(db_path) do |file|
44
+ LOG.debug("Assessing file #{file}..")
45
+ if File.directory?(file)
46
+ LOG.debug("Ignoring file #{file} since it is a directory")
47
+ next
48
+ end
49
+ if formatted_dbs.include?(file)
50
+ LOG.debug("Ignoring file #{file} since it is already a blast database")
51
+ next
52
+ end
53
+ if File.binary?(file)
54
+ LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
55
+ next
56
+ end
57
+
58
+ if probably_fasta?(file)
59
+ LOG.info("Found #{file}")
60
+ ## guess whether protein or nucleotide based on first 500 lines
61
+ first_lines = ''
62
+ File.open(file, 'r') do |file_stream|
63
+ file_stream.each do |line|
64
+ first_lines += line
65
+ break if file_stream.lineno == 500
66
+ end
67
+ end
68
+ begin
69
+ sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
70
+ rescue
71
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
72
+ end
73
+ if [ :protein, :nucleotide ].include?(sequence_type)
74
+ command = ask_make_db_command(file, sequence_type)
75
+ unless command.nil?
76
+ commands.push(command)
77
+ end
78
+ else
79
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
80
+ end
81
+ else
82
+ LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
83
+ end
84
+ end
85
+ LOG.info("Will now create DBs")
86
+ if commands.empty?
87
+ puts "", "#{db_path} does not contain any unformatted database."
88
+ exit
89
+ end
90
+ commands.each do |command|
91
+ LOG.info("Will run: " + command.to_s)
92
+ system(command)
93
+ end
94
+ LOG.info("Done formatting databases. ")
95
+ db_table(db_path)
96
+ end
97
+
98
+ def db_table(db_path)
99
+ LOG.info("Summary of formatted blast databases:\n")
100
+ output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
101
+ LOG.info(output)
102
+ end
103
+
104
+ def probably_fasta?(file)
105
+ return FALSE if File.zero?(file)
106
+ File.open(file, 'r') do |file_stream|
107
+ first_line = file_stream.readline
108
+ if first_line.slice(0,1) == '>'
109
+ return TRUE
110
+ else
111
+ return FALSE
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # returns command than needs to be run to make db
118
+ def ask_make_db_command(file, type)
119
+ LOG.info("FASTA file: #{file}")
120
+ LOG.info("Fasta type: " + type.to_s)
121
+
122
+ response = ''
123
+ until response.match(/^[yn]$/i) do
124
+ LOG.info("Proceed? [y/n]: ")
125
+ response = STDIN.gets.chomp
126
+ end
127
+
128
+ if response.match(/y/i)
129
+ LOG.info("Enter a database title (or will use '#{File.basename(file)}'")
130
+ title = STDIN.gets.chomp
131
+ title.gsub!('"', "'")
132
+ title = File.basename(file) if title.empty?
133
+
134
+ return make_db_command(file,type,title)
135
+ end
136
+ end
137
+
138
+ def make_db_command(file,type, title)
139
+ LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
140
+ command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
141
+ LOG.info("Returning: #{command}")
142
+ return(command)
143
+ end
144
+ end
145
+
146
+ OptionParser.new do |opts|
147
+ opts.banner =<<BANNER
148
+
149
+ SUMMARY
150
+
151
+ prepare BLAST databases for SequenceServer
152
+
153
+ USAGE
154
+
155
+ sequenceserver format-databases [--verbose] [blast_database_directory]
156
+
157
+ Example:
158
+
159
+ $ sequenceserver format-databases ~/db # explicitly specify a database directory
160
+ $ sequenceserver format-databases # use the database directory in config.yml
161
+
162
+ DESCRIPTION
163
+
164
+ Recursively scan the given 'blast_database_directory' for BLAST databases and
165
+ formats them for use with SequenceServer.
166
+
167
+ It automagically detects the database type, and ignores non-db files and
168
+ pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
169
+
170
+ 'blast_database_directory' can be passed as a command line parameter or
171
+ through a configuration file by setting the 'database' key (the same option
172
+ used by SequenceServer). Configuration file will be checked only if the
173
+ command line parameter is missing.
174
+
175
+ OPTIONS
176
+
177
+ BANNER
178
+
179
+ opts.on_tail('-h', '--help', 'Show this message') do
180
+ puts opts
181
+ exit
182
+ end
183
+
184
+ opts.on('-v', '--verbose', 'Print lots of output') do
185
+ LOG.level = Logger::DEBUG
186
+ end
187
+ end.parse!
188
+
189
+ app = DatabaseFormatter.new(ARGV[0])
190
+ app.format_databases
@@ -0,0 +1,136 @@
1
+ require 'sequenceserver/database'
2
+
3
+ module SequenceServer
4
+ module Helpers
5
+ module SystemHelpers
6
+ # Scan the given directory for blast executables. Passing `nil` scans the
7
+ # system `PATH`.
8
+ # ---
9
+ # Arguments:
10
+ # * bin(String) - absolute path to the directory containing blast binaries
11
+ # ---
12
+ # Returns:
13
+ # * a hash of blast methods, and their corresponding absolute path
14
+ # ---
15
+ # Raises:
16
+ # * IOError - if the executables can't be found
17
+ #
18
+ # > scan_blast_executables('/home/yeban/bin')
19
+ # => { "blastx"=>"/home/yeban/bin/blastx",
20
+ # "blastn"=>"/home/yeban/bin/blastn",
21
+ # ...
22
+ # }
23
+ def scan_blast_executables(bin)
24
+ if bin and not File.directory?(bin)
25
+ raise IOError, "Could not find '#{bin}' defined in config.yml."
26
+ end
27
+
28
+ binaries = {}
29
+ %w|blastn blastp blastx tblastn tblastx blastdbcmd makeblastdb blast_formatter|.each do |method|
30
+ path = File.join(bin, method) rescue method
31
+ if command?(path)
32
+ binaries[method] = path
33
+ else
34
+ blasturl = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download'
35
+ raise IOError, "Could not find blast binaries." +
36
+ "\n\nYou may need to download BLAST+ from #{blasturl}." +
37
+ " And/or edit #{settings.config_file} to indicate the location of BLAST+ binaries."
38
+ end
39
+ end
40
+
41
+ #LOG.info("Config bin dir: #{bin}")
42
+ binaries
43
+ end
44
+
45
+ # Scan the given directory (including subdirectory) for blast databases.
46
+ # ---
47
+ # Arguments:
48
+ # * db_root(String) - absolute path to the blast databases
49
+ # ---
50
+ # Returns:
51
+ # * a hash of sorted blast databases grouped by database type:
52
+ # protein, or nucleotide
53
+ # ---
54
+ # Raises:
55
+ # * IOError - if no database can be found
56
+ #
57
+ # > scan_blast_db('/home/yeban/blast_db')
58
+ # => { "protein" => [], "nucleotide" => [] }
59
+ def scan_blast_db(db_root, blastdbcmd = 'blastdbcmd')
60
+ raise IOError, "Database directory doesn't exist: #{db_root}" unless File.directory?( db_root )
61
+
62
+ find_dbs_command = %|#{blastdbcmd} -recursive -list #{db_root} -list_outfmt "%p %f %t" 2>&1|
63
+
64
+ begin
65
+ db_list = %x|#{find_dbs_command}|
66
+ if db_list.empty?
67
+ raise IOError, "No formatted blast databases found in '#{ db_root }'."
68
+ end
69
+ rescue => e
70
+ puts '', e.to_s
71
+
72
+ print "Do you want to format your blast databases now? [Y/n]: "
73
+ choice = gets.chomp[0,1].downcase
74
+
75
+ unless choice == 'n'
76
+ database_formatter = File.join(settings.root, 'database_formatter.rb')
77
+ system("#{database_formatter} #{db_root}")
78
+ retry
79
+ else
80
+ raise # let the caller decide what to do if database discovery fails
81
+ end
82
+ end
83
+
84
+ if db_list.match(/BLAST Database error/)
85
+ raise IOError, "Error parsing blast databases.\n" + "Tried: '#{find_dbs_command}'\n"+
86
+ "It crashed with the following error: '#{db_list}'\n" +
87
+ "Try reformatting databases using makeblastdb.\n"
88
+ end
89
+
90
+ db = {}
91
+
92
+ db_list.each_line do |line|
93
+ next if line.empty? # required for BLAST+ 2.2.22
94
+ type, name, *title = line.split(' ')
95
+ type = type.downcase.intern
96
+ name = name.freeze
97
+ title = title.join(' ').freeze
98
+
99
+ # skip past all but alias file of a NCBI multi-part BLAST database
100
+ if multipart_database_name?(name)
101
+ log.info(%|Found a multi-part database volume at #{name} - ignoring it.|)
102
+ next
103
+ end
104
+
105
+ #LOG.info("Found #{type} database: #{title} at #{name}")
106
+ database = Database.new(name, title, type)
107
+ db[database.hash] = database
108
+ end
109
+
110
+ db
111
+ end
112
+
113
+ private
114
+
115
+ # check if the given command exists and is executable
116
+ # returns True if all is good.
117
+ def command?(command)
118
+ system("which #{command} > /dev/null 2>&1")
119
+ end
120
+
121
+ # Returns true if the database name appears to be a multi-part database name.
122
+ #
123
+ # e.g.
124
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
125
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
126
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
127
+ def multipart_database_name?(db_name)
128
+ !(db_name.match(/.+\/\S+\d{2}$/).nil?)
129
+ end
130
+ end
131
+
132
+ def self.included(klass)
133
+ klass.extend SystemHelpers
134
+ end
135
+ end
136
+ end