sequenceserver 0.8.3 → 0.8.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/blast.rb~ ADDED
@@ -0,0 +1,200 @@
1
+ require 'tempfile'
2
+ require 'open3'
3
+
4
+ module SequenceServer
5
+ # Simple ncbi-blast wrapper. Check examples below.
6
+ class Blast
7
+ # blast method
8
+ attr_accessor :method
9
+
10
+ # database name
11
+ attr_accessor :db
12
+
13
+ # query sequence string
14
+ attr_accessor :qstring
15
+
16
+ # query file name
17
+ attr_accessor :qfile
18
+
19
+ # advanced blast options
20
+ attr_accessor :options
21
+
22
+ # command string to be executed
23
+ attr_reader :command
24
+
25
+ # result of executing command
26
+ attr_reader :result
27
+
28
+ # blast archive file output
29
+ attr_reader :blast_archive_tempfile
30
+
31
+ # errors if any while executing command
32
+ attr_reader :error
33
+
34
+ # Initialize a new blast search.
35
+ # ---
36
+ # Arguments(optional):
37
+ # * method(String) - blast executable (shell executable, or absolute path)
38
+ # * db(String) - database name as returned by 'blastdbcmd -list'
39
+ # * query(Hash) - query string/file, and options.
40
+ #
41
+ # In the query Hash, use:
42
+ # * :qfile(String) - to run Blast against a file.
43
+ # * :qstrin(String) - to run Blast against a string.
44
+ # * :options(String) - to specify multiple blast options.
45
+ #
46
+ # Either :qfile, or :qstring should be used. If both are given, by design
47
+ # :qstring will be used to run blast.
48
+ # ---
49
+ # Examples:
50
+ #
51
+ # b = Blast.new("blastn", "S.cdna.fasta", :qfile => 'query.seq', :options => "-html -num_threads 4")
52
+ # b = Blast.new("blastn", "S.cdna.fasta", :qstring => 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG')
53
+ #
54
+ # b.run! => true
55
+ # b.result => "blast output"
56
+ #
57
+ # # change the blast method.
58
+ # b.method = 'blastp'
59
+ #
60
+ # b.run! => false
61
+ # b.error => "blast error output"
62
+ def initialize(method = nil, db = nil, query = {})
63
+ @method = method
64
+ @db = db
65
+ @qstring = query[:qstring]
66
+ @qfile = query[:qfile]
67
+ @options = query[:options]
68
+ end
69
+
70
+ # Run blast everytime it is called. Returns the success
71
+ # status - true, or false. Blast method, db, and qfile/qstring
72
+ # need to be set before calling this method, else blast will fail.
73
+ #
74
+ # b = Blast.new
75
+ # b.run! => false
76
+ #
77
+ # # set blast method, and db
78
+ # b.method = 'blastn'
79
+ # b.db = 'S.cdna.fasta'
80
+ #
81
+ # b.run! => false
82
+ # b.errors => "blast error output"
83
+ #
84
+ # # set qfile
85
+ # b.qfile = 'query1.seq'
86
+ #
87
+ # b.run! => true
88
+ # b.reuslt => "blast output"
89
+ def run!
90
+ # can not run blast if method is not specified
91
+ return false unless @method
92
+
93
+ # create a tempfile if qstring is given
94
+ if @qstring
95
+ @tempfile = Tempfile.new('qfile')
96
+ @tempfile.puts(qstring)
97
+ @tempfile.close
98
+ @qfile = @tempfile.path
99
+ end
100
+
101
+ # form command to execute
102
+ @command = to_s
103
+
104
+ # execute command and capture both stdout, and stderr
105
+ Open3.popen3(@command) do |stdin, stdout, stderr|
106
+ @result = stdout.readlines # convert to string?
107
+ @error = stderr.readlines
108
+ end
109
+
110
+ # set and return success status
111
+ return @success = @error.empty?
112
+
113
+ ensure
114
+ # delete tempfile if it was created
115
+ @tempfile.unlink if @tempfile
116
+ end
117
+
118
+ # Return the blast type used as a String.
119
+ #
120
+ # b.method = '/home/yeban/opt/blastn'
121
+ # b.type => 'blastn'
122
+ def type
123
+ @type ||= @method[(@method.rindex('/') + 1)..-1]
124
+ end
125
+
126
+ # Return success status - true, false, or nil.
127
+ # 'nil' implies that blast has not been run yet.
128
+ def success?
129
+ @success
130
+ end
131
+
132
+ # String representation of the blast object - same as
133
+ # the command to be executed.
134
+ def to_s
135
+ s = "#@method "
136
+ s << "-db '#@db' " if @db
137
+ s << "-query #@qfile " if @qfile
138
+ s << @options.to_s if @options
139
+ s
140
+ end
141
+
142
+ # Especially helpful in irb - "status : command"
143
+ def inspect
144
+ return to_s if success?.nil?
145
+ (success? ? "success : " : "fail : ") + @command
146
+ end
147
+
148
+ # Run the blast with the options specified by the user, returning a blast archive file, which can be further transformed into other formats
149
+ def run_to_blast_archive!
150
+ @blast_archive_tempfile = Tempfile.open('seqserve_formatter')
151
+
152
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
153
+ @options ||= ''
154
+ @options += " -outfmt 11 -out #{@blast_archive_tempfile.path}"
155
+
156
+ # Run the blast
157
+ run!
158
+ return @success unless @success
159
+ end
160
+
161
+ # convert the blast archive to a regular HTML result, stored
162
+ # as an instance variable Blast#result
163
+ def convert_blast_archive_to_html_result(blast_formatter_path)
164
+ @command = "#{blast_formatter_path} -archive #{blast_archive_tempfile.path} -html"
165
+
166
+ # execute command and capture both stdout, and stderr
167
+ Open3.popen3(@command) do |stdin, stdout, stderr|
168
+ @result = stdout.readlines # convert to string?
169
+ @error = stderr.readlines
170
+ end
171
+ end
172
+
173
+ class << self
174
+ # shortcut method to run blast against a query file
175
+ def blast_file(method, db, qfile, options = nil)
176
+ b = Blast.new(method, db, :qfile => qfile, :options => options)
177
+ b.run!
178
+ b
179
+ end
180
+
181
+ # shortcut method to run blast against a query string
182
+ def blast_string(method, db, qstring, options = nil)
183
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
184
+ b.run!
185
+ b
186
+ end
187
+
188
+ # shortcut method to run blast with a query string and return a
189
+ # blast archive, which can then be further processed into other useful
190
+ # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
191
+ # is a Tempfile accessible as an instance variable of the returned
192
+ # Blast object.
193
+ def blast_string_to_blast_archive(method, db, qstring, options = nil)
194
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
195
+ b.run_to_blast_archive!
196
+ b
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,190 @@
1
+ # copyright yannick . wurm at unil . ch
2
+ # Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
3
+
4
+ # TODO: bring it under SequenceServer namespace
5
+ # TODO: move the file to a 'command/' sub-directory (probably makes more sense if we have several subcommands)
6
+ # TODO: needs more love (read refactoring) overall
7
+
8
+ require 'ptools' # for File.binary?(file)
9
+ require 'find'
10
+ require 'logger'
11
+ require 'optparse'
12
+ require 'sequenceserver'
13
+ require 'sequenceserver/helpers.rb'
14
+ require 'sequenceserver/sequencehelpers.rb'
15
+
16
+ LOG = Logger.new(STDOUT)
17
+ LOG.level = Logger::INFO
18
+
19
+ class DatabaseFormatter
20
+ include SequenceServer
21
+ include Helpers
22
+ include SystemHelpers
23
+ include SequenceHelpers
24
+
25
+ attr_accessor :db_path
26
+
27
+ def initialize(db_path = nil)
28
+ @app = SequenceServer::App
29
+ @app.config = @app.parse_config
30
+ @app.binaries = @app.scan_blast_executables(@app.bin).freeze
31
+
32
+ @db_path = (db_path or @app.database)
33
+ end
34
+
35
+ def format_databases
36
+ unless File.directory?(db_path)
37
+ LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
38
+ exit
39
+ end
40
+
41
+ formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
42
+ commands = []
43
+ Find.find(db_path) do |file|
44
+ LOG.debug("Assessing file #{file}..")
45
+ if File.directory?(file)
46
+ LOG.debug("Ignoring file #{file} since it is a directory")
47
+ next
48
+ end
49
+ if formatted_dbs.include?(file)
50
+ LOG.debug("Ignoring file #{file} since it is already a blast database")
51
+ next
52
+ end
53
+ if File.binary?(file)
54
+ LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
55
+ next
56
+ end
57
+
58
+ if probably_fasta?(file)
59
+ LOG.info("Found #{file}")
60
+ ## guess whether protein or nucleotide based on first 500 lines
61
+ first_lines = ''
62
+ File.open(file, 'r') do |file_stream|
63
+ file_stream.each do |line|
64
+ first_lines += line
65
+ break if file_stream.lineno == 500
66
+ end
67
+ end
68
+ begin
69
+ sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
70
+ rescue
71
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
72
+ end
73
+ if [ :protein, :nucleotide ].include?(sequence_type)
74
+ command = ask_make_db_command(file, sequence_type)
75
+ unless command.nil?
76
+ commands.push(command)
77
+ end
78
+ else
79
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
80
+ end
81
+ else
82
+ LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
83
+ end
84
+ end
85
+ LOG.info("Will now create DBs")
86
+ if commands.empty?
87
+ puts "", "#{db_path} does not contain any unformatted database."
88
+ exit
89
+ end
90
+ commands.each do |command|
91
+ LOG.info("Will run: " + command.to_s)
92
+ system(command)
93
+ end
94
+ LOG.info("Done formatting databases. ")
95
+ db_table(db_path)
96
+ end
97
+
98
+ def db_table(db_path)
99
+ LOG.info("Summary of formatted blast databases:\n")
100
+ output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
101
+ LOG.info(output)
102
+ end
103
+
104
+ def probably_fasta?(file)
105
+ return FALSE if File.zero?(file)
106
+ File.open(file, 'r') do |file_stream|
107
+ first_line = file_stream.readline
108
+ if first_line.slice(0,1) == '>'
109
+ return TRUE
110
+ else
111
+ return FALSE
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # returns command than needs to be run to make db
118
+ def ask_make_db_command(file, type)
119
+ LOG.info("FASTA file: #{file}")
120
+ LOG.info("Fasta type: " + type.to_s)
121
+
122
+ response = ''
123
+ until response.match(/^[yn]$/i) do
124
+ LOG.info("Proceed? [y/n]: ")
125
+ response = STDIN.gets.chomp
126
+ end
127
+
128
+ if response.match(/y/i)
129
+ LOG.info("Enter a database title (or will use '#{File.basename(file)})'")
130
+ title = STDIN.gets.chomp
131
+ title.gsub!('"', "'")
132
+ title = File.basename(file) if title.empty?
133
+
134
+ return make_db_command(file,type,title)
135
+ end
136
+ end
137
+
138
+ def make_db_command(file,type, title)
139
+ LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
140
+ command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
141
+ LOG.info("Returning: #{command}")
142
+ return(command)
143
+ end
144
+ end
145
+
146
+ OptionParser.new do |opts|
147
+ opts.banner =<<BANNER
148
+
149
+ SUMMARY
150
+
151
+ prepare BLAST databases for SequenceServer
152
+
153
+ USAGE
154
+
155
+ sequenceserver format-databases [--verbose] [blast_database_directory]
156
+
157
+ Example:
158
+
159
+ $ sequenceserver format-databases ~/db # explicitly specify a database directory
160
+ $ sequenceserver format-databases # use the database directory in config.yml
161
+
162
+ DESCRIPTION
163
+
164
+ Recursively scan the given 'blast_database_directory' for BLAST databases and
165
+ formats them for use with SequenceServer.
166
+
167
+ It automagically detects the database type, and ignores non-db files and
168
+ pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
169
+
170
+ 'blast_database_directory' can be passed as a command line parameter or
171
+ through a configuration file by setting the 'database' key (the same option
172
+ used by SequenceServer). Configuration file will be checked only if the
173
+ command line parameter is missing.
174
+
175
+ OPTIONS
176
+
177
+ BANNER
178
+
179
+ opts.on_tail('-h', '--help', 'Show this message') do
180
+ puts opts
181
+ exit
182
+ end
183
+
184
+ opts.on('-v', '--verbose', 'Print lots of output') do
185
+ LOG.level = Logger::DEBUG
186
+ end
187
+ end.parse!
188
+
189
+ app = DatabaseFormatter.new(ARGV[0])
190
+ app.format_databases
@@ -0,0 +1,254 @@
1
+ body {
2
+ background-color: #2B3E42;
3
+ /*background: #3f4555;*/
4
+ font-family: Tahoma, Arial, sans-serif;
5
+ }
6
+
7
+ .container {
8
+ width: 800px;
9
+ margin: 50px auto;
10
+ background-color: white;
11
+ border: 2px solid white;
12
+ border-radius: 10px;
13
+ -moz-border-radius: 10px;
14
+ }
15
+
16
+ h1 {
17
+ margin: 0px;
18
+ padding: 0px;
19
+ }
20
+
21
+ .banner {
22
+ font-weight: bold;
23
+ background: #ebcd7b;
24
+ font-size: 300%;
25
+ font-family: Tahoma, Arial, sans-serif;
26
+ color: black;
27
+ padding: 2%;
28
+ margin-bottom: 2%;
29
+
30
+ border-bottom-left-radius: 0px;
31
+ border-bottom-right-radius: 0px;
32
+ border-top-left-radius: 10px;
33
+ border-top-right-radius: 10px;
34
+
35
+ -moz-border-radius-bottomleft: 0px;
36
+ -moz-border-radius-bottomright: 0px;
37
+ -moz-border-radius-topleft: 10px;
38
+ -moz-border-radius-topright: 10px;
39
+
40
+ }
41
+
42
+ .underbar {
43
+ background-color: #ebcd7b;
44
+ font-family:arial;
45
+ font-size: 12px;
46
+ text-align: center;
47
+ color: #333333;
48
+ padding: 1px 1px 1px 1px;
49
+ margin: 2% 0 0 0;
50
+ border-bottom-left-radius: 10px;
51
+ border-bottom-right-radius: 10px;
52
+ }
53
+
54
+
55
+ .entryfield {
56
+ padding:0;
57
+ margin:0;
58
+ }
59
+
60
+ .entryfield textarea {
61
+ padding:0;
62
+ margin:0;
63
+ width:100%;
64
+ height: 168px;
65
+ border-color:black;
66
+ }
67
+
68
+ .blastmethods {
69
+ float : right;
70
+ background : #222222;
71
+ border : none;
72
+ height : 150px;
73
+
74
+ border-top-right-radius : 10px;
75
+ border-top-left-radius : 0px;
76
+ border-bottom-right-radius: 10px;
77
+ border-bottom-left-radius : 0px;
78
+
79
+ -moz-border-radius-topright: 10px;
80
+ -moz-border-radius-topleft: 0px;
81
+ -moz-border-radius-bottomright: 10px;
82
+ -moz-border-radius-bottomleft: 0px;
83
+
84
+
85
+ font-size : 1.5em;
86
+ font-family: Tahoma, Arial, sans-serif;
87
+ color: black;
88
+ margin:0;
89
+ padding:0;
90
+ }
91
+
92
+ fieldset {
93
+ margin:0;
94
+ padding:0;
95
+ }
96
+
97
+ .horizontal {
98
+ margin: 2%;
99
+ padding-bottom:15px;
100
+ padding-top:15px;
101
+ width: 96%;
102
+ clear:both;
103
+ }
104
+
105
+ .rounded {
106
+ border-radius :10px;
107
+ -moz-border-radius:10px;
108
+ }
109
+
110
+ .box {
111
+ background : #D5E1DD;
112
+ /* background: -webkit-gradient(linear, left top, left bottom, from(#ccc), to(#000)); */
113
+
114
+ border : none;
115
+
116
+ padding : 10px;
117
+ margin: 0;
118
+ font-family : Tahoma, Arial, sans-serif;
119
+ color : black;
120
+ }
121
+
122
+ .databases {
123
+ width :46.5%;
124
+ }
125
+ .nucleotide {
126
+ float :left;
127
+ }
128
+ .protein {
129
+ float : right;
130
+ }
131
+
132
+ .radiobutton{
133
+ margin: 9px 5px 0px 0px;
134
+ padding: 0;
135
+ vertical-align: top;
136
+ }
137
+ .dbcheckbox { /* within each "database box" */
138
+ float:left;
139
+ clear:both;
140
+ margin: 5px 10px 0 0;
141
+ padding: 0;
142
+ }
143
+ .dbdescription {
144
+ float:left;
145
+ }
146
+
147
+ .advanced {
148
+ float:left;
149
+ width: 78%;
150
+ }
151
+
152
+ .greytext {
153
+ color: #A9A9A9;
154
+ }
155
+
156
+ .pointer {
157
+ cursor:pointer;
158
+ }
159
+
160
+ .advanced pre {
161
+ display: none;
162
+ }
163
+ .advanced input {
164
+ float:right;
165
+ width: 60%;
166
+ }
167
+
168
+ .submit_button {
169
+ float:right;
170
+ width: 17.5%;
171
+ }
172
+ .submit_button input {
173
+ width: 100%;
174
+ background-color : #323292;
175
+ background: -webkit-gradient(linear, left top, right bottom, from(#0066CC), to(#192D53));
176
+ background: -moz-linear-gradient(45deg,#0066CC,#192D53);
177
+
178
+ border : 2px solid #192D53;
179
+ border-radius : 10px;
180
+ -moz-border-radius: 10px;
181
+ color : white;
182
+ padding : 2.5px 10px 2.5px 10px;
183
+ cursor : pointer;
184
+ font-size : 1.8em;
185
+ font-family : Tahoma, Arial, sans-serif;
186
+ margin: 0 0 0 0;
187
+
188
+ }
189
+ .submit_button input:active {
190
+ color: #1f2126;
191
+
192
+ }
193
+ .submit_button input:hover {
194
+ background: -webkit-gradient(linear, left top, right bottom, from(#009cff), to(#0261c2));
195
+ background: -moz-linear-gradient(45deg,#009cff,#0261c2);
196
+
197
+ }
198
+
199
+ h2, .bigtext{
200
+ font-family: Tahoma, Arial, serif;
201
+ font-weight: bold;
202
+ font-size: 20px;
203
+ color: black;
204
+ margin:0;
205
+ padding: 0px;
206
+ /* margin: 8px 0 10px 0;*/
207
+ }
208
+
209
+ h3{
210
+ font-family: Tahoma, Arial, serif;
211
+ font-weight: bold;
212
+ font-size: 16px;
213
+ color: black;
214
+ margin:0;
215
+ padding: 0px;
216
+ /* margin: 8px 0 10px 0;*/
217
+ }
218
+
219
+ .smalltext{
220
+ font-size:10px;
221
+ }
222
+
223
+
224
+ #result{
225
+ clear:both;
226
+ background-color: white;
227
+ }
228
+ .result_even_true{
229
+ background-color: #ffe0f9;
230
+ margin: 0 18px 0 18px;
231
+ border-radius :10px;
232
+ -moz-border-radius:10px;
233
+ padding-left: 3%;
234
+ padding-top: 3%;
235
+ }
236
+ .result_even_false{
237
+ background-color: #dfefe6;
238
+ margin: 0 18px 0 18px;
239
+ border-radius :10px;
240
+ -moz-border-radius:10px;
241
+ padding-left: 3%;
242
+ padding-top: 3%;
243
+ }
244
+ .blast_result{
245
+ font-family: "Lucida Console", Lucida, monospace;
246
+ background-color: #A9A9A9;
247
+ border-radius :10px;
248
+ -moz-border-radius:10px;
249
+ padding-left: 2%;
250
+ }
251
+
252
+ a {
253
+ color: #b22222;
254
+ }