bio-cnls_screenscraper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.5.2"
12
+ gem "rcov", ">= 0"
13
+ gem "bio", ">= 1.4.1"
14
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Ben J. Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = bio-cnls_screenscraper
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to bio-cnls_screenscraper
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2011 Ben J. Woodcroft. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "bio-cnls_screenscraper"
16
+ gem.homepage = "http://github.com/wwood/bioruby-cnls_screenscraper"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Programmatic interface to the cNLS nuclear localisation signal prediction software}
19
+ gem.description = %Q{Programmatic interface to the cNLS nuclear localisation signal prediction software}
20
+ gem.email = "donttrustben@gmail.com"
21
+ gem.authors = ["Ben J. Woodcroft"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+ end
27
+ Jeweler::RubygemsDotOrgTasks.new
28
+
29
+ require 'rake/testtask'
30
+ Rake::TestTask.new(:test) do |test|
31
+ test.libs << 'lib' << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+
36
+ require 'rcov/rcovtask'
37
+ Rcov::RcovTask.new do |test|
38
+ test.libs << 'test'
39
+ test.pattern = 'test/**/test_*.rb'
40
+ test.verbose = true
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "bio-cnls_screenscraper #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # A script to take a FASTA file, remove sequences that will fail, and automatically submit it to the cNLS server at http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi
4
+ # Unfortunately, the fasta upload seems to fail.
5
+ # and format it so that it can be uploaded to the cNLS mapper (classical(?) nuclear localisation signal mapper).
6
+ # The fasta output file can be uploaded to
7
+ # http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi
8
+
9
+ require 'bio'
10
+
11
+ module Bio
12
+ class CNLS
13
+ class Result
14
+ attr_accessor :signals
15
+
16
+ def initialize
17
+ @signals = []
18
+ end
19
+
20
+ class NLS
21
+ attr_accessor :position, :sequence, :score
22
+
23
+ # sort by score descending
24
+ def <=>(another)
25
+ -(@score<=>another.score)
26
+ end
27
+ end
28
+ class MonopartiteNLS<NLS; end
29
+ class BipartiteNLS<NLS; end
30
+
31
+ # Is this result a positive prediction or negative prediction?
32
+ def predicted?
33
+ !signals.nil? and !signals.empty?
34
+ end
35
+
36
+ def monopartite_predicted?(minimum_score=nil)
37
+ @signals.each do |s|
38
+ if s.kind_of?(MonopartiteNLS)
39
+ return true if minimum_score.nil? #if no cutoff, return true
40
+ return true if s.score >= minimum_score #otherwise apply the cutoff
41
+ end
42
+ end
43
+ return false
44
+ end
45
+
46
+ def bipartite_predicted?(minimum_score=nil)
47
+ @signals.each do |s|
48
+ if s.kind_of?(BipartiteNLS)
49
+ return true if minimum_score.nil? #if no cutoff, return true
50
+ return true if s.score >= minimum_score #otherwise apply the cutoff
51
+ end
52
+ end
53
+ return false
54
+ end
55
+
56
+ def max_monopartite_score
57
+ max = 0.0
58
+ @signals.each do |s|
59
+ if s.kind_of?(MonopartiteNLS) and s.score > max
60
+ max = s.score
61
+ end
62
+ end
63
+ return max
64
+ end
65
+
66
+ def max_bipartite_score
67
+ max = 0.0
68
+ @signals.each do |s|
69
+ if s.kind_of?(BipartiteNLS) and s.score > max
70
+ max = s.score
71
+ end
72
+ end
73
+ return max
74
+ end
75
+ end
76
+
77
+ # A class used to automatically submit results to the cNLS webserver and parse the HTML results.
78
+ class Screenscraper
79
+ require 'uri'
80
+ require 'net/http'
81
+
82
+ ACCEPTABLE_CUTOFFS = %w(2.0 3.0 4.0 5.0 6.0)
83
+
84
+ # Contact the cNLS prediction server and submit the amino acid sequence for prediction. Return a Bio::CNLS::Result object. Pause after each round for pause milliseconds, so as not to overload the server.
85
+ def self.submit(amino_acid_sequence, cut_off='3.0', seconds_pause=1)
86
+ # contact webserver and sleep
87
+ html = get_raw_html_result(amino_acid_sequence, cut_off, seconds_pause)
88
+
89
+ # Return the parsed HTML as a CNLS::Result object
90
+ return parse_html_result(html)
91
+ end
92
+
93
+ def self.get_raw_html_result(amino_acid_sequence, cut_off='3.0', seconds_pause=1)
94
+ unless ACCEPTABLE_CUTOFFS.include?(cut_off)
95
+ raise Exception, "Specified cutoff `#{cut_off}' for the cNLS screenscraper is invalid. Valid cutoffs are #{ACCEPTABLE_CUTOFFS.join(', ')}. They are strings, not floating point values."
96
+ end
97
+
98
+ # retrieve the webpage
99
+ res = Net::HTTP.post_form(URI.parse('http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_y.cgi'),
100
+ {'cut_off' => cut_off, 'typedseq' => amino_acid_sequence})
101
+
102
+ # if there is an error, raise it
103
+ unless res.kind_of?(Net::HTTPOK)
104
+ raise Exception, "Failed to retrieve cNLS, internet connectivity problem? Using cutoff/sequence #{cutoff}/#{amino_acid_sequence}"
105
+ end
106
+
107
+ # pause the specified number of seconds
108
+ sleep seconds_pause
109
+
110
+ return res.body
111
+ end
112
+
113
+ # Given HTML corresponding to a result, return a parse object that is more programmatically palatable.
114
+ def self.parse_html_result(html)
115
+ result = Result.new
116
+
117
+ # The mono and bi-partite regular expressions are equivalent except for the Predicted X NLS bit at the beginning, thanksfully. However, they sometimes appear to be slightly different, which is rather odd.
118
+ monopartite_regex = /Predicted monopartite NLS<\/th>\s+<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td align="center"><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><\/TR>/i
119
+ bipartite_regex = /Predicted bipartite NLS<\/th>\s+<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td align="center"><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><\/TR>/i
120
+
121
+ monopartite_no_hits = /Predicted monopartite NLS<\/th>\s*<\/tr>\s*<tr bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/tr>\s*<tr><td><strong><big><code><\/code><\/big><\/strong><\/td><td><strong><big><code><\/code><\/big><\/strong><\/td><td align="center"><strong><big><code><\/code><\/big><\/strong><\/td><\/tr>/i
122
+ bipartite_no_hits = /Predicted bipartite NLS<\/th>\s*<\/tr>\s*<tr bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/tr>\s*<tr><td><strong><big><code><\/code><\/big><\/strong><\/td><td><strong><big><code><\/code><\/big><\/strong><\/td><td align="center"><strong><big><code><\/code><\/big><\/strong><\/td><\/tr>/i
123
+ monopartite_no_hits2 = /Predicted monopartite NLS<\/th>\s*<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code \/><\/big><\/strong><\/td><td><strong><big><code \/><\/big><\/strong><\/td><td align="center"><strong><big><code \/><\/big><\/strong><\/td><\/TR>/i
124
+ bipartite_no_hits2 = /Predicted bipartite NLS<\/th>\s*<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code \/><\/big><\/strong><\/td><td><strong><big><code \/><\/big><\/strong><\/td><td align="center"><strong><big><code \/><\/big><\/strong><\/td><\/TR>/i
125
+
126
+ split_regex = /<\/code><\/big><\/strong><br.{0,2}><strong><big><code>/
127
+
128
+ # Make sure the sequence isn't too long
129
+ if html.match(/Query sequence should be < 5000 aa/)
130
+ raise Exception, "Query sequence provided was too long (> 5000 aa)"
131
+
132
+ # parse out monopartite signals
133
+ elsif matches = html.match(monopartite_regex)
134
+ positions = matches[1].split(split_regex)
135
+ seqs = matches[2].split(split_regex)
136
+ scores = matches[3].split(split_regex)
137
+
138
+ positions.each_with_index do |pos, i|
139
+ nls = Result::MonopartiteNLS.new
140
+ nls.position = pos.to_i
141
+ nls.sequence = seqs[i]
142
+ nls.score = scores[i].to_f
143
+ result.signals.push nls
144
+ end
145
+ elsif html.match(monopartite_no_hits) or html.match(monopartite_no_hits2)
146
+ # do nothing, except for not raising a parsing exception
147
+ else
148
+ raise Exception, "Could not parse HTML output returned from cNLS prediction server. In particular, looking for monopartite signals, but the whole document is likely problematic.\n#{html}"
149
+ end
150
+
151
+
152
+ # parse out the bipartite signals
153
+ if matches = html.match(bipartite_regex)
154
+ positions = matches[1].split(split_regex)
155
+ seqs = matches[2].split(split_regex)
156
+ scores = matches[3].split(split_regex)
157
+
158
+ positions.each_with_index do |pos, i|
159
+ nls = Result::BipartiteNLS.new
160
+ nls.position = pos.to_i
161
+ nls.sequence = seqs[i]
162
+ nls.score = scores[i].to_f
163
+ result.signals.push nls
164
+ end
165
+ elsif html.match(bipartite_no_hits) or html.match(bipartite_no_hits2)
166
+ # do nothing, except for not raising a parsing exception
167
+ else
168
+ raise Exception, "Could not parse HTML output returned from cNLS prediction server. In particular, looking for bipartite signals, monopartite signals seemed to be parsed OK.\n#{html}"
169
+ end
170
+
171
+ return result
172
+ end
173
+ end
174
+ end
175
+ end
176
+
177
+
178
+
179
+ if __FILE__ == $0
180
+ require 'optparse'
181
+
182
+ # When entering sequences less than this number of amino acids as a query
183
+ # it fails (if less than 10 it tells you, if less than 19 then it silently fails)
184
+ QUERY_LENGTH_MINIMUM = 19
185
+ ACCEPTABLE_AMINO_ACID_CHARACTERS = Bio::AminoAcid::Data::WEIGHT.keys.push('*')
186
+
187
+ options = {
188
+ :verbose => true,
189
+ :cache_html => false,
190
+ :use_cache => false,
191
+ :cutoff_score => nil,
192
+ :print_scores => false,
193
+ }
194
+ o = OptionParser.new do |opts|
195
+ opts.banner = [
196
+ 'Usage: bio-cnls_formatter.rb [-qh] [fasta_filename]',
197
+ '\tfasta file can also be piped in on STDIN.'
198
+ ]
199
+ opts.on('-q','--quiet','Opposite of verbose. Default is not quiet (verbose is on)') do
200
+ options[:verbose] = false
201
+ end
202
+ opts.on('-h','--html','Cache HTML results in the current directory instead of parsing them. Default false.') do
203
+ options[:cache_html] = true
204
+ end
205
+ opts.on('-c','--cached','Parse the cache HTML results (as previously generated using -h/--html) in the current directory. Default false.') do
206
+ options[:use_cache] = true
207
+ end
208
+ opts.on('-s','--score SCORE','Cutoff score to be used when parsing results, between 0 and 10. Used when parsing results, not when querying the server') do |s|
209
+ options[:cutoff_score] = s.to_f
210
+ end
211
+ opts.on('-p','--print-scores','Output scores as well as true/false predictions. Default false.') do |s|
212
+ options[:print_scores] = true
213
+ end
214
+ end
215
+ o.parse!
216
+
217
+ print_result_headers = lambda do
218
+ to_print = [
219
+ 'Name',
220
+ 'Monopartite signal?',
221
+ 'Bipartite signal?'
222
+ ]
223
+ if options[:print_scores]
224
+ to_print.push 'Max monopartite score'
225
+ to_print.push 'Max bipartite score'
226
+ end
227
+
228
+ puts to_print.join("\t")
229
+ end
230
+
231
+ # Define a procedure for printing parsed results so it is more DRY
232
+ print_parsed_results = lambda do |sequence_name, cnls_result, score|
233
+ to_print = [
234
+ sequence_name,
235
+ cnls_result.monopartite_predicted?(score),
236
+ cnls_result.bipartite_predicted?(score)
237
+ ]
238
+ if options[:print_scores]
239
+ to_print.push cnls_result.max_monopartite_score
240
+ to_print.push cnls_result.max_bipartite_score
241
+ end
242
+
243
+ puts to_print.join("\t")
244
+ end
245
+
246
+ # If
247
+ if options[:use_cache]
248
+ print_result_headers.call
249
+ Dir.foreach('.') do |file|
250
+ next if File.directory?(file) #skip '.', '..' etc.
251
+
252
+ begin
253
+ res = Bio::CNLS::Screenscraper.parse_html_result(File.read(file))
254
+ print_parsed_results.call(
255
+ file, res, options[:cutoff_score]
256
+ )
257
+ rescue Exception => e
258
+ $stderr.puts "Failed to parse #{file}: #{e}"
259
+ end
260
+ end
261
+ else
262
+ Bio::FlatFile.foreach(ARGF) do |entry|
263
+ # Sequences are automatically disqualified if they contain characters that are neither amino acids or stop codons
264
+ fails = entry.seq.gsub(/[#{ACCEPTABLE_AMINO_ACID_CHARACTERS.join('')}]/,'')
265
+ if fails.length > 0
266
+ if options[:verbose]
267
+ $stderr.puts "Found unacceptable characters in #{entry.definition}: #{fails}"
268
+ end
269
+ next
270
+
271
+ # Sequence length must be greater than the minimum, excluding
272
+ # stop codons
273
+ elsif entry.seq.gsub(/\*/,'').length < QUERY_LENGTH_MINIMUM
274
+ if options[:verbose]
275
+ $stderr.puts "Query sequence too short (less than #{QUERY_LENGTH_MINIMUM} residues excluding stop codons): #{entry.definition}"
276
+ end
277
+ else
278
+ # This sequence passes, run the prediction on it
279
+ if options[:cache_html]
280
+ res = Bio::CNLS::Screenscraper.get_raw_html_result(entry.seq)
281
+ File.open("#{entry.definition}.html",'w') do |f|
282
+ f.puts res
283
+ end
284
+ $stderr.print '.' if options[:verbose]
285
+ else
286
+ res = Bio::CNLS::Screenscraper.submit(entry.seq)
287
+ print_result_headers.call
288
+ print_parsed_results.call(entry, res, options[:cutoff_score])
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
@@ -0,0 +1,16 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xml:lang="en-US" xmlns="http://www.w3.org/1999/xhtml" lang="en-US"><head>
3
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
4
+
5
+ <title>NLS Mapper</title>
6
+ <style type="text/css">
7
+ <!--/* <![CDATA[ */
8
+ <!--
9
+ h2 {color: #B22222; face: Verdana;}
10
+ .align2 {line-height: 0%;}
11
+ -->
12
+
13
+
14
+ /* ]]> */-->
15
+ </style>
16
+ </head><body><p align="center"><b><font #990033="" size="5" color="#b22222" face="Helvetica, Tahoma">cNLS Mapper Result</font></b></p><hr size="5">Error in sequence; your sequence contains an illegal amino acid letter(s).<hr></body></html>
@@ -0,0 +1,47 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xml:lang="en-US" xmlns="http://www.w3.org/1999/xhtml" lang="en-US"><head>
3
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
4
+
5
+ <title>NLS Mapper</title>
6
+ <style type="text/css">
7
+ <!--/* <![CDATA[ */
8
+ <!--
9
+ h2 {color: #B22222; face: Verdana;}
10
+ .align2 {line-height: 0%;}
11
+ -->
12
+
13
+
14
+ /* ]]> */-->
15
+ </style>
16
+ </head><body><p align="center"><b><font #990033="" size="5" color="#b22222" face="Helvetica, Tahoma">cNLS Mapper Result</font></b></p><hr size="5"><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="1" align="center" bgcolor="whitesmoke" border="4">
17
+ <tbody>
18
+ <tr bgcolor="#d0d0d0">
19
+ <th>Predicted NLSs in query sequence</th>
20
+ </tr>
21
+ <tr><td><strong><code><font size="3"><font color="red">K</font><font color="red">K</font><font color="red">K</font><font color="red">R</font><font color="red">R</font><font color="red">R</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">A</font><font color="red">R</font><font color="red">K</font><font color="red">K</font><font color="red">K</font><font color="red">R</font><font color="red">R</font><font color="red">R</font> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 31</font></code></strong></td></tr> </tbody>
22
+ </table>
23
+ <br><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="0" align="center" bgcolor="white" border="3">
24
+ <tbody>
25
+ <tr bgcolor="darkseagreen">
26
+ <th colspan="3">Predicted monopartite NLS</th>
27
+ </tr>
28
+ <tr bgcolor="#d0d0d0">
29
+ <th>Pos.</th>
30
+ <th>Sequence</th>
31
+ <th>Score</th>
32
+ </tr>
33
+ <tr><td><strong><big><code></code></big></strong></td><td><strong><big><code></code></big></strong></td><td align="center"><strong><big><code></code></big></strong></td></tr> </tbody>
34
+ </table>
35
+ <br><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="0" align="center" bgcolor="white" border="3">
36
+ <tbody>
37
+ <tr bgcolor="darkseagreen">
38
+ <th colspan="3">Predicted bipartite NLS</th>
39
+ </tr>
40
+ <tr bgcolor="#d0d0d0">
41
+ <th>Pos.</th>
42
+ <th>Sequence</th>
43
+ <th>Score</th>
44
+ </tr>
45
+ <tr><td><strong><big><code>2</code></big></strong><br><strong><big><code>1</code></big></strong><br><strong><big><code></code></big></strong></td><td><strong><big><code>KKRRRAAAAAAAAAAAAAAAAAARKKKR</code></big></strong><br><strong><big><code>KKKRRRAAAAAAAAAAAAAAAAAARKKKRRR</code></big></strong><br><strong><big><code></code></big></strong></td><td align="center"><strong><big><code>6.2</code></big></strong><br><strong><big><code>5</code></big></strong><br><strong><big><code></code></big></strong></td></tr> </tbody>
46
+ </table>
47
+ <br></body></html>
@@ -0,0 +1,16 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xml:lang="en-US" xmlns="http://www.w3.org/1999/xhtml" lang="en-US"><head>
3
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
4
+
5
+ <title>NLS Mapper</title>
6
+ <style type="text/css">
7
+ <!--/* <![CDATA[ */
8
+ <!--
9
+ h2 {color: #B22222; face: Verdana;}
10
+ .align2 {line-height: 0%;}
11
+ -->
12
+
13
+
14
+ /* ]]> */-->
15
+ </style>
16
+ </head><body><p align="center"><b><font #990033="" size="5" color="#b22222" face="Helvetica, Tahoma">cNLS Mapper Result</font></b></p><hr size="5">Query sequence should be &gt; 10 aa.<hr></body></html>
@@ -0,0 +1,50 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xml:lang="en-US" xmlns="http://www.w3.org/1999/xhtml" lang="en-US"><head>
3
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
4
+
5
+ <title>NLS Mapper</title>
6
+ <style type="text/css">
7
+ <!--/* <![CDATA[ */
8
+ <!--
9
+ h2 {color: #B22222; face: Verdana;}
10
+ .align2 {line-height: 0%;}
11
+ -->
12
+
13
+
14
+ /* ]]> */-->
15
+ </style>
16
+ </head><body><p align="center"><b><font #990033="" size="5" color="#b22222" face="Helvetica, Tahoma">cNLS Mapper Result</font></b></p><hr size="5"><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="1" align="center" bgcolor="whitesmoke" border="4">
17
+ <tbody>
18
+ <tr bgcolor="#d0d0d0">
19
+ <th>Predicted NLSs in query sequence</th>
20
+ </tr>
21
+ <tr><td><strong><code><font size="3"><font color="red">K</font><font color="red">K</font><font color="red">K</font><font color="red">K</font><font color="red">R</font><font color="red">R</font><font color="red">R</font><font color="red">A</font><font color="red">A</font>AAAAAAAAAAAAAAAAAAAAA
22
+
23
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
24
+ 30</font></code></strong></td></tr> </tbody>
25
+ </table>
26
+ <br><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="0" align="center" bgcolor="white" border="3">
27
+ <tbody>
28
+ <tr bgcolor="darkseagreen">
29
+ <th colspan="3">Predicted monopartite NLS</th>
30
+ </tr>
31
+ <tr bgcolor="#d0d0d0">
32
+ <th>Pos.</th>
33
+ <th>Sequence</th>
34
+ <th>Score</th>
35
+ </tr>
36
+ <tr><td><strong><big><code>1</code></big></strong><br><strong><big><code></code></big></strong></td><td><strong><big><code>KKKKRRRAA</code></big></strong><br><strong><big><code></code></big></strong></td><td align="center"><strong><big><code>10</code></big></strong><br><strong><big><code></code></big></strong></td></tr> </tbody>
37
+ </table>
38
+ <br><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="0" align="center" bgcolor="white" border="3">
39
+ <tbody>
40
+ <tr bgcolor="darkseagreen">
41
+ <th colspan="3">Predicted bipartite NLS</th>
42
+ </tr>
43
+ <tr bgcolor="#d0d0d0">
44
+ <th>Pos.</th>
45
+ <th>Sequence</th>
46
+ <th>Score</th>
47
+ </tr>
48
+ <tr><td><strong><big><code></code></big></strong></td><td><strong><big><code></code></big></strong></td><td align="center"><strong><big><code></code></big></strong></td></tr> </tbody>
49
+ </table>
50
+ <br></body></html>
@@ -0,0 +1,47 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xml:lang="en-US" xmlns="http://www.w3.org/1999/xhtml" lang="en-US"><head>
3
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
4
+
5
+ <title>NLS Mapper</title>
6
+ <style type="text/css">
7
+ <!--/* <![CDATA[ */
8
+ <!--
9
+ h2 {color: #B22222; face: Verdana;}
10
+ .align2 {line-height: 0%;}
11
+ -->
12
+
13
+
14
+ /* ]]> */-->
15
+ </style>
16
+ </head><body><p align="center"><b><font #990033="" size="5" color="#b22222" face="Helvetica, Tahoma">cNLS Mapper Result</font></b></p><hr size="5"><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="1" align="center" bgcolor="whitesmoke" border="4">
17
+ <tbody>
18
+ <tr bgcolor="#d0d0d0">
19
+ <th>Predicted NLSs in query sequence</th>
20
+ </tr>
21
+ <tr><td><strong><code><font size="3">AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 40</font></code></strong></td></tr> </tbody>
22
+ </table>
23
+ <br><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="0" align="center" bgcolor="white" border="3">
24
+ <tbody>
25
+ <tr bgcolor="darkseagreen">
26
+ <th colspan="3">Predicted monopartite NLS</th>
27
+ </tr>
28
+ <tr bgcolor="#d0d0d0">
29
+ <th>Pos.</th>
30
+ <th>Sequence</th>
31
+ <th>Score</th>
32
+ </tr>
33
+ <tr><td><strong><big><code></code></big></strong></td><td><strong><big><code></code></big></strong></td><td align="center"><strong><big><code></code></big></strong></td></tr> </tbody>
34
+ </table>
35
+ <br><table bordercolorlight="darkgray" bordercolordark="darkslategray" cellpadding="5" cellspacing="0" align="center" bgcolor="white" border="3">
36
+ <tbody>
37
+ <tr bgcolor="darkseagreen">
38
+ <th colspan="3">Predicted bipartite NLS</th>
39
+ </tr>
40
+ <tr bgcolor="#d0d0d0">
41
+ <th>Pos.</th>
42
+ <th>Sequence</th>
43
+ <th>Score</th>
44
+ </tr>
45
+ <tr><td><strong><big><code></code></big></strong></td><td><strong><big><code></code></big></strong></td><td align="center"><strong><big><code></code></big></strong></td></tr> </tbody>
46
+ </table>
47
+ <br></body></html>
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-cnls_screenscraper'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,40 @@
1
+ require 'helper'
2
+ require 'bio-cnls_screenscraper'
3
+
4
+ class TestBioCnlsScreenscraper < Test::Unit::TestCase
5
+ @@data_dir = File.join(File.dirname(__FILE__),['data'])
6
+
7
+ should "correctly parse hit results with no hits" do
8
+ html = File.open(File.join(@@data_dir,'nohits.html')).read
9
+ result = Bio::CNLS::Screenscraper.parse_html_result(html)
10
+ assert_equal [], result.signals
11
+ end
12
+
13
+ should "correctly parse bipartite-only signals page" do
14
+ html = File.open(File.join(@@data_dir,'bipartiteHitOnly.html')).read
15
+ result = Bio::CNLS::Screenscraper.parse_html_result(html)
16
+ assert_equal 2, result.signals.length
17
+ assert_equal 'KKKRRRAAAAAAAAAAAAAAAAAARKKKRRR', result.signals.sort[1].sequence
18
+ assert_equal 5.0, result.signals.sort[1].score
19
+ assert_equal 1, result.signals.sort[1].position
20
+ end
21
+
22
+ should "correctly parse results with monopartite signals only" do
23
+ html = File.open(File.join(@@data_dir,'monopartiteHitOnly.html')).read
24
+ result = Bio::CNLS::Screenscraper.parse_html_result(html)
25
+ assert_equal 1, result.signals.length
26
+ assert_equal 'KKKKRRRAA', result.signals.sort[0].sequence
27
+ assert_equal 10.0, result.signals.sort[0].score
28
+ assert_equal 1, result.signals.sort[0].position
29
+ end
30
+
31
+ should "apply the correct monopartite cutoff" do
32
+ nls = Bio::CNLS::Result::MonopartiteNLS.new
33
+ nls.score = 8.0
34
+ result = Bio::CNLS::Result.new
35
+ result.signals.push nls
36
+ assert_equal true, result.monopartite_predicted?(7.0)
37
+ assert_equal true, result.monopartite_predicted?(8.0)
38
+ assert_equal false, result.monopartite_predicted?(9.0)
39
+ end
40
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-cnls_screenscraper
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Ben J. Woodcroft
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-21 00:00:00 +11:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ prerelease: false
23
+ name: shoulda
24
+ type: :development
25
+ version_requirements: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ requirement: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ prerelease: false
37
+ name: bundler
38
+ type: :development
39
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ hash: 23
45
+ segments:
46
+ - 1
47
+ - 0
48
+ - 0
49
+ version: 1.0.0
50
+ requirement: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ prerelease: false
53
+ name: jeweler
54
+ type: :development
55
+ version_requirements: &id003 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ~>
59
+ - !ruby/object:Gem::Version
60
+ hash: 7
61
+ segments:
62
+ - 1
63
+ - 5
64
+ - 2
65
+ version: 1.5.2
66
+ requirement: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ prerelease: false
69
+ name: rcov
70
+ type: :development
71
+ version_requirements: &id004 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ requirement: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ prerelease: false
83
+ name: bio
84
+ type: :development
85
+ version_requirements: &id005 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ hash: 5
91
+ segments:
92
+ - 1
93
+ - 4
94
+ - 1
95
+ version: 1.4.1
96
+ requirement: *id005
97
+ description: Programmatic interface to the cNLS nuclear localisation signal prediction software
98
+ email: donttrustben@gmail.com
99
+ executables: []
100
+
101
+ extensions: []
102
+
103
+ extra_rdoc_files:
104
+ - LICENSE.txt
105
+ - README.rdoc
106
+ files:
107
+ - .document
108
+ - Gemfile
109
+ - LICENSE.txt
110
+ - README.rdoc
111
+ - Rakefile
112
+ - VERSION
113
+ - lib/bio-cnls_screenscraper.rb
114
+ - test/data/badCharacters.html
115
+ - test/data/bipartiteHitOnly.html
116
+ - test/data/lessThan10Fail.html
117
+ - test/data/monopartiteHitOnly.html
118
+ - test/data/nohits.html
119
+ - test/helper.rb
120
+ - test/test_bio-cnls_screenscraper.rb
121
+ has_rdoc: true
122
+ homepage: http://github.com/wwood/bioruby-cnls_screenscraper
123
+ licenses:
124
+ - MIT
125
+ post_install_message:
126
+ rdoc_options: []
127
+
128
+ require_paths:
129
+ - lib
130
+ required_ruby_version: !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ hash: 3
136
+ segments:
137
+ - 0
138
+ version: "0"
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ hash: 3
145
+ segments:
146
+ - 0
147
+ version: "0"
148
+ requirements: []
149
+
150
+ rubyforge_project:
151
+ rubygems_version: 1.3.7
152
+ signing_key:
153
+ specification_version: 3
154
+ summary: Programmatic interface to the cNLS nuclear localisation signal prediction software
155
+ test_files:
156
+ - test/helper.rb
157
+ - test/test_bio-cnls_screenscraper.rb