sequenceserver 0.8.3 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

data/lib/blast.rb~ ADDED
@@ -0,0 +1,200 @@
1
+ require 'tempfile'
2
+ require 'open3'
3
+
4
+ module SequenceServer
5
+ # Simple ncbi-blast wrapper. Check examples below.
6
+ class Blast
7
+ # blast method
8
+ attr_accessor :method
9
+
10
+ # database name
11
+ attr_accessor :db
12
+
13
+ # query sequence string
14
+ attr_accessor :qstring
15
+
16
+ # query file name
17
+ attr_accessor :qfile
18
+
19
+ # advanced blast options
20
+ attr_accessor :options
21
+
22
+ # command string to be executed
23
+ attr_reader :command
24
+
25
+ # result of executing command
26
+ attr_reader :result
27
+
28
+ # blast archive file output
29
+ attr_reader :blast_archive_tempfile
30
+
31
+ # errors if any while executing command
32
+ attr_reader :error
33
+
34
+ # Initialize a new blast search.
35
+ # ---
36
+ # Arguments(optional):
37
+ # * method(String) - blast executable (shell executable, or absolute path)
38
+ # * db(String) - database name as returned by 'blastdbcmd -list'
39
+ # * query(Hash) - query string/file, and options.
40
+ #
41
+ # In the query Hash, use:
42
+ # * :qfile(String) - to run Blast against a file.
43
+ # * :qstrin(String) - to run Blast against a string.
44
+ # * :options(String) - to specify multiple blast options.
45
+ #
46
+ # Either :qfile, or :qstring should be used. If both are given, by design
47
+ # :qstring will be used to run blast.
48
+ # ---
49
+ # Examples:
50
+ #
51
+ # b = Blast.new("blastn", "S.cdna.fasta", :qfile => 'query.seq', :options => "-html -num_threads 4")
52
+ # b = Blast.new("blastn", "S.cdna.fasta", :qstring => 'ATGTCCGCGAATCGATTGAACGTGCTGGTGACCCTGATGCTCGCCGTCGCGCTTCTTGTG')
53
+ #
54
+ # b.run! => true
55
+ # b.result => "blast output"
56
+ #
57
+ # # change the blast method.
58
+ # b.method = 'blastp'
59
+ #
60
+ # b.run! => false
61
+ # b.error => "blast error output"
62
+ def initialize(method = nil, db = nil, query = {})
63
+ @method = method
64
+ @db = db
65
+ @qstring = query[:qstring]
66
+ @qfile = query[:qfile]
67
+ @options = query[:options]
68
+ end
69
+
70
+ # Run blast everytime it is called. Returns the success
71
+ # status - true, or false. Blast method, db, and qfile/qstring
72
+ # need to be set before calling this method, else blast will fail.
73
+ #
74
+ # b = Blast.new
75
+ # b.run! => false
76
+ #
77
+ # # set blast method, and db
78
+ # b.method = 'blastn'
79
+ # b.db = 'S.cdna.fasta'
80
+ #
81
+ # b.run! => false
82
+ # b.errors => "blast error output"
83
+ #
84
+ # # set qfile
85
+ # b.qfile = 'query1.seq'
86
+ #
87
+ # b.run! => true
88
+ # b.reuslt => "blast output"
89
+ def run!
90
+ # can not run blast if method is not specified
91
+ return false unless @method
92
+
93
+ # create a tempfile if qstring is given
94
+ if @qstring
95
+ @tempfile = Tempfile.new('qfile')
96
+ @tempfile.puts(qstring)
97
+ @tempfile.close
98
+ @qfile = @tempfile.path
99
+ end
100
+
101
+ # form command to execute
102
+ @command = to_s
103
+
104
+ # execute command and capture both stdout, and stderr
105
+ Open3.popen3(@command) do |stdin, stdout, stderr|
106
+ @result = stdout.readlines # convert to string?
107
+ @error = stderr.readlines
108
+ end
109
+
110
+ # set and return success status
111
+ return @success = @error.empty?
112
+
113
+ ensure
114
+ # delete tempfile if it was created
115
+ @tempfile.unlink if @tempfile
116
+ end
117
+
118
+ # Return the blast type used as a String.
119
+ #
120
+ # b.method = '/home/yeban/opt/blastn'
121
+ # b.type => 'blastn'
122
+ def type
123
+ @type ||= @method[(@method.rindex('/') + 1)..-1]
124
+ end
125
+
126
+ # Return success status - true, false, or nil.
127
+ # 'nil' implies that blast has not been run yet.
128
+ def success?
129
+ @success
130
+ end
131
+
132
+ # String representation of the blast object - same as
133
+ # the command to be executed.
134
+ def to_s
135
+ s = "#@method "
136
+ s << "-db '#@db' " if @db
137
+ s << "-query #@qfile " if @qfile
138
+ s << @options.to_s if @options
139
+ s
140
+ end
141
+
142
+ # Especially helpful in irb - "status : command"
143
+ def inspect
144
+ return to_s if success?.nil?
145
+ (success? ? "success : " : "fail : ") + @command
146
+ end
147
+
148
+ # Run the blast with the options specified by the user, returning a blast archive file, which can be further transformed into other formats
149
+ def run_to_blast_archive!
150
+ @blast_archive_tempfile = Tempfile.open('seqserve_formatter')
151
+
152
+ # Add -outfmt 11 to list of options so that it outputs a blast archive
153
+ @options ||= ''
154
+ @options += " -outfmt 11 -out #{@blast_archive_tempfile.path}"
155
+
156
+ # Run the blast
157
+ run!
158
+ return @success unless @success
159
+ end
160
+
161
+ # convert the blast archive to a regular HTML result, stored
162
+ # as an instance variable Blast#result
163
+ def convert_blast_archive_to_html_result(blast_formatter_path)
164
+ @command = "#{blast_formatter_path} -archive #{blast_archive_tempfile.path} -html"
165
+
166
+ # execute command and capture both stdout, and stderr
167
+ Open3.popen3(@command) do |stdin, stdout, stderr|
168
+ @result = stdout.readlines # convert to string?
169
+ @error = stderr.readlines
170
+ end
171
+ end
172
+
173
+ class << self
174
+ # shortcut method to run blast against a query file
175
+ def blast_file(method, db, qfile, options = nil)
176
+ b = Blast.new(method, db, :qfile => qfile, :options => options)
177
+ b.run!
178
+ b
179
+ end
180
+
181
+ # shortcut method to run blast against a query string
182
+ def blast_string(method, db, qstring, options = nil)
183
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
184
+ b.run!
185
+ b
186
+ end
187
+
188
+ # shortcut method to run blast with a query string and return a
189
+ # blast archive, which can then be further processed into other useful
190
+ # output forms (e.g. HTML, GFF). If it ran successfully, the blast archive
191
+ # is a Tempfile accessible as an instance variable of the returned
192
+ # Blast object.
193
+ def blast_string_to_blast_archive(method, db, qstring, options = nil)
194
+ b = Blast.new(method, db, :qstring => qstring, :options => options)
195
+ b.run_to_blast_archive!
196
+ b
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,190 @@
1
+ # copyright yannick . wurm at unil . ch
2
+ # Finds files, reads first char. if its '>', read 500 lines. Guess sequence type, ask user for title to format as blast database.
3
+
4
+ # TODO: bring it under SequenceServer namespace
5
+ # TODO: move the file to a 'command/' sub-directory (probably makes more sense if we have several subcommands)
6
+ # TODO: needs more love (read refactoring) overall
7
+
8
+ require 'ptools' # for File.binary?(file)
9
+ require 'find'
10
+ require 'logger'
11
+ require 'optparse'
12
+ require 'sequenceserver'
13
+ require 'sequenceserver/helpers.rb'
14
+ require 'sequenceserver/sequencehelpers.rb'
15
+
16
+ LOG = Logger.new(STDOUT)
17
+ LOG.level = Logger::INFO
18
+
19
+ class DatabaseFormatter
20
+ include SequenceServer
21
+ include Helpers
22
+ include SystemHelpers
23
+ include SequenceHelpers
24
+
25
+ attr_accessor :db_path
26
+
27
+ def initialize(db_path = nil)
28
+ @app = SequenceServer::App
29
+ @app.config = @app.parse_config
30
+ @app.binaries = @app.scan_blast_executables(@app.bin).freeze
31
+
32
+ @db_path = (db_path or @app.database)
33
+ end
34
+
35
+ def format_databases
36
+ unless File.directory?(db_path)
37
+ LOG.fatal("Database directory #{db_path} not found. See './database_formatter --help' for instructions.")
38
+ exit
39
+ end
40
+
41
+ formatted_dbs = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%f" 2>&1|.split("\n")
42
+ commands = []
43
+ Find.find(db_path) do |file|
44
+ LOG.debug("Assessing file #{file}..")
45
+ if File.directory?(file)
46
+ LOG.debug("Ignoring file #{file} since it is a directory")
47
+ next
48
+ end
49
+ if formatted_dbs.include?(file)
50
+ LOG.debug("Ignoring file #{file} since it is already a blast database")
51
+ next
52
+ end
53
+ if File.binary?(file)
54
+ LOG.debug("Ignoring file #{file} since it is a binary file, not plaintext as FASTA files are")
55
+ next
56
+ end
57
+
58
+ if probably_fasta?(file)
59
+ LOG.info("Found #{file}")
60
+ ## guess whether protein or nucleotide based on first 500 lines
61
+ first_lines = ''
62
+ File.open(file, 'r') do |file_stream|
63
+ file_stream.each do |line|
64
+ first_lines += line
65
+ break if file_stream.lineno == 500
66
+ end
67
+ end
68
+ begin
69
+ sequence_type = type_of_sequences(first_lines) # returns :protein or :nucleotide
70
+ rescue
71
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
72
+ end
73
+ if [ :protein, :nucleotide ].include?(sequence_type)
74
+ command = ask_make_db_command(file, sequence_type)
75
+ unless command.nil?
76
+ commands.push(command)
77
+ end
78
+ else
79
+ LOG.warn("Unable to guess sequence type for #{file}. Skipping")
80
+ end
81
+ else
82
+ LOG.debug("Ignoring file #{file} since it was not judged to be a FASTA file.")
83
+ end
84
+ end
85
+ LOG.info("Will now create DBs")
86
+ if commands.empty?
87
+ puts "", "#{db_path} does not contain any unformatted database."
88
+ exit
89
+ end
90
+ commands.each do |command|
91
+ LOG.info("Will run: " + command.to_s)
92
+ system(command)
93
+ end
94
+ LOG.info("Done formatting databases. ")
95
+ db_table(db_path)
96
+ end
97
+
98
+ def db_table(db_path)
99
+ LOG.info("Summary of formatted blast databases:\n")
100
+ output = %x|#{@app.binaries['blastdbcmd']} -recursive -list #{db_path} -list_outfmt "%p %f %t" &2>1 |
101
+ LOG.info(output)
102
+ end
103
+
104
+ def probably_fasta?(file)
105
+ return FALSE if File.zero?(file)
106
+ File.open(file, 'r') do |file_stream|
107
+ first_line = file_stream.readline
108
+ if first_line.slice(0,1) == '>'
109
+ return TRUE
110
+ else
111
+ return FALSE
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # returns command than needs to be run to make db
118
+ def ask_make_db_command(file, type)
119
+ LOG.info("FASTA file: #{file}")
120
+ LOG.info("Fasta type: " + type.to_s)
121
+
122
+ response = ''
123
+ until response.match(/^[yn]$/i) do
124
+ LOG.info("Proceed? [y/n]: ")
125
+ response = STDIN.gets.chomp
126
+ end
127
+
128
+ if response.match(/y/i)
129
+ LOG.info("Enter a database title (or will use '#{File.basename(file)})'")
130
+ title = STDIN.gets.chomp
131
+ title.gsub!('"', "'")
132
+ title = File.basename(file) if title.empty?
133
+
134
+ return make_db_command(file,type,title)
135
+ end
136
+ end
137
+
138
+ def make_db_command(file,type, title)
139
+ LOG.info("Will make #{type.to_s} database from #{file} with #{title}")
140
+ command = %|#{@app.binaries['makeblastdb']} -in #{file} -dbtype #{ type.to_s.slice(0,4)} -title "#{title}" -parse_seqids|
141
+ LOG.info("Returning: #{command}")
142
+ return(command)
143
+ end
144
+ end
145
+
146
+ OptionParser.new do |opts|
147
+ opts.banner =<<BANNER
148
+
149
+ SUMMARY
150
+
151
+ prepare BLAST databases for SequenceServer
152
+
153
+ USAGE
154
+
155
+ sequenceserver format-databases [--verbose] [blast_database_directory]
156
+
157
+ Example:
158
+
159
+ $ sequenceserver format-databases ~/db # explicitly specify a database directory
160
+ $ sequenceserver format-databases # use the database directory in config.yml
161
+
162
+ DESCRIPTION
163
+
164
+ Recursively scan the given 'blast_database_directory' for BLAST databases and
165
+ formats them for use with SequenceServer.
166
+
167
+ It automagically detects the database type, and ignores non-db files and
168
+ pre-formatted databases. The 'parse_seqids' makeblastdb options is used.
169
+
170
+ 'blast_database_directory' can be passed as a command line parameter or
171
+ through a configuration file by setting the 'database' key (the same option
172
+ used by SequenceServer). Configuration file will be checked only if the
173
+ command line parameter is missing.
174
+
175
+ OPTIONS
176
+
177
+ BANNER
178
+
179
+ opts.on_tail('-h', '--help', 'Show this message') do
180
+ puts opts
181
+ exit
182
+ end
183
+
184
+ opts.on('-v', '--verbose', 'Print lots of output') do
185
+ LOG.level = Logger::DEBUG
186
+ end
187
+ end.parse!
188
+
189
+ app = DatabaseFormatter.new(ARGV[0])
190
+ app.format_databases
@@ -0,0 +1,254 @@
1
+ body {
2
+ background-color: #2B3E42;
3
+ /*background: #3f4555;*/
4
+ font-family: Tahoma, Arial, sans-serif;
5
+ }
6
+
7
+ .container {
8
+ width: 800px;
9
+ margin: 50px auto;
10
+ background-color: white;
11
+ border: 2px solid white;
12
+ border-radius: 10px;
13
+ -moz-border-radius: 10px;
14
+ }
15
+
16
+ h1 {
17
+ margin: 0px;
18
+ padding: 0px;
19
+ }
20
+
21
+ .banner {
22
+ font-weight: bold;
23
+ background: #ebcd7b;
24
+ font-size: 300%;
25
+ font-family: Tahoma, Arial, sans-serif;
26
+ color: black;
27
+ padding: 2%;
28
+ margin-bottom: 2%;
29
+
30
+ border-bottom-left-radius: 0px;
31
+ border-bottom-right-radius: 0px;
32
+ border-top-left-radius: 10px;
33
+ border-top-right-radius: 10px;
34
+
35
+ -moz-border-radius-bottomleft: 0px;
36
+ -moz-border-radius-bottomright: 0px;
37
+ -moz-border-radius-topleft: 10px;
38
+ -moz-border-radius-topright: 10px;
39
+
40
+ }
41
+
42
+ .underbar {
43
+ background-color: #ebcd7b;
44
+ font-family:arial;
45
+ font-size: 12px;
46
+ text-align: center;
47
+ color: #333333;
48
+ padding: 1px 1px 1px 1px;
49
+ margin: 2% 0 0 0;
50
+ border-bottom-left-radius: 10px;
51
+ border-bottom-right-radius: 10px;
52
+ }
53
+
54
+
55
+ .entryfield {
56
+ padding:0;
57
+ margin:0;
58
+ }
59
+
60
+ .entryfield textarea {
61
+ padding:0;
62
+ margin:0;
63
+ width:100%;
64
+ height: 168px;
65
+ border-color:black;
66
+ }
67
+
68
+ .blastmethods {
69
+ float : right;
70
+ background : #222222;
71
+ border : none;
72
+ height : 150px;
73
+
74
+ border-top-right-radius : 10px;
75
+ border-top-left-radius : 0px;
76
+ border-bottom-right-radius: 10px;
77
+ border-bottom-left-radius : 0px;
78
+
79
+ -moz-border-radius-topright: 10px;
80
+ -moz-border-radius-topleft: 0px;
81
+ -moz-border-radius-bottomright: 10px;
82
+ -moz-border-radius-bottomleft: 0px;
83
+
84
+
85
+ font-size : 1.5em;
86
+ font-family: Tahoma, Arial, sans-serif;
87
+ color: black;
88
+ margin:0;
89
+ padding:0;
90
+ }
91
+
92
+ fieldset {
93
+ margin:0;
94
+ padding:0;
95
+ }
96
+
97
+ .horizontal {
98
+ margin: 2%;
99
+ padding-bottom:15px;
100
+ padding-top:15px;
101
+ width: 96%;
102
+ clear:both;
103
+ }
104
+
105
+ .rounded {
106
+ border-radius :10px;
107
+ -moz-border-radius:10px;
108
+ }
109
+
110
+ .box {
111
+ background : #D5E1DD;
112
+ /* background: -webkit-gradient(linear, left top, left bottom, from(#ccc), to(#000)); */
113
+
114
+ border : none;
115
+
116
+ padding : 10px;
117
+ margin: 0;
118
+ font-family : Tahoma, Arial, sans-serif;
119
+ color : black;
120
+ }
121
+
122
+ .databases {
123
+ width :46.5%;
124
+ }
125
+ .nucleotide {
126
+ float :left;
127
+ }
128
+ .protein {
129
+ float : right;
130
+ }
131
+
132
+ .radiobutton{
133
+ margin: 9px 5px 0px 0px;
134
+ padding: 0;
135
+ vertical-align: top;
136
+ }
137
+ .dbcheckbox { /* within each "database box" */
138
+ float:left;
139
+ clear:both;
140
+ margin: 5px 10px 0 0;
141
+ padding: 0;
142
+ }
143
+ .dbdescription {
144
+ float:left;
145
+ }
146
+
147
+ .advanced {
148
+ float:left;
149
+ width: 78%;
150
+ }
151
+
152
+ .greytext {
153
+ color: #A9A9A9;
154
+ }
155
+
156
+ .pointer {
157
+ cursor:pointer;
158
+ }
159
+
160
+ .advanced pre {
161
+ display: none;
162
+ }
163
+ .advanced input {
164
+ float:right;
165
+ width: 60%;
166
+ }
167
+
168
+ .submit_button {
169
+ float:right;
170
+ width: 17.5%;
171
+ }
172
+ .submit_button input {
173
+ width: 100%;
174
+ background-color : #323292;
175
+ background: -webkit-gradient(linear, left top, right bottom, from(#0066CC), to(#192D53));
176
+ background: -moz-linear-gradient(45deg,#0066CC,#192D53);
177
+
178
+ border : 2px solid #192D53;
179
+ border-radius : 10px;
180
+ -moz-border-radius: 10px;
181
+ color : white;
182
+ padding : 2.5px 10px 2.5px 10px;
183
+ cursor : pointer;
184
+ font-size : 1.8em;
185
+ font-family : Tahoma, Arial, sans-serif;
186
+ margin: 0 0 0 0;
187
+
188
+ }
189
+ .submit_button input:active {
190
+ color: #1f2126;
191
+
192
+ }
193
+ .submit_button input:hover {
194
+ background: -webkit-gradient(linear, left top, right bottom, from(#009cff), to(#0261c2));
195
+ background: -moz-linear-gradient(45deg,#009cff,#0261c2);
196
+
197
+ }
198
+
199
+ h2, .bigtext{
200
+ font-family: Tahoma, Arial, serif;
201
+ font-weight: bold;
202
+ font-size: 20px;
203
+ color: black;
204
+ margin:0;
205
+ padding: 0px;
206
+ /* margin: 8px 0 10px 0;*/
207
+ }
208
+
209
+ h3{
210
+ font-family: Tahoma, Arial, serif;
211
+ font-weight: bold;
212
+ font-size: 16px;
213
+ color: black;
214
+ margin:0;
215
+ padding: 0px;
216
+ /* margin: 8px 0 10px 0;*/
217
+ }
218
+
219
+ .smalltext{
220
+ font-size:10px;
221
+ }
222
+
223
+
224
+ #result{
225
+ clear:both;
226
+ background-color: white;
227
+ }
228
+ .result_even_true{
229
+ background-color: #ffe0f9;
230
+ margin: 0 18px 0 18px;
231
+ border-radius :10px;
232
+ -moz-border-radius:10px;
233
+ padding-left: 3%;
234
+ padding-top: 3%;
235
+ }
236
+ .result_even_false{
237
+ background-color: #dfefe6;
238
+ margin: 0 18px 0 18px;
239
+ border-radius :10px;
240
+ -moz-border-radius:10px;
241
+ padding-left: 3%;
242
+ padding-top: 3%;
243
+ }
244
+ .blast_result{
245
+ font-family: "Lucida Console", Lucida, monospace;
246
+ background-color: #A9A9A9;
247
+ border-radius :10px;
248
+ -moz-border-radius:10px;
249
+ padding-left: 2%;
250
+ }
251
+
252
+ a {
253
+ color: #b22222;
254
+ }