bio-cnls_screenscraper 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +12 -0
- data/Gemfile +4 -4
- data/LICENSE.txt +1 -1
- data/README.rdoc +13 -1
- data/Rakefile +0 -7
- data/VERSION +1 -1
- data/bin/bio-cnls_screenscraper +117 -0
- data/lib/bio-cnls_screenscraper.rb +11 -292
- data/lib/bio/cnls_screenscraper.rb +2 -0
- data/lib/bio/cnls_screenscraper/cnls_screenscraper.rb +293 -0
- data/test/{test_bio-cnls_screenscraper.rb → bio/test_cnls_screenscraper.rb} +1 -2
- metadata +101 -107
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
- rbx-19mode
|
7
|
+
# - 1.8.7
|
8
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
9
|
+
# - rbx-18mode
|
10
|
+
|
11
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
12
|
+
# script: bundle exec rspec spec
|
data/Gemfile
CHANGED
@@ -7,8 +7,8 @@ source "http://rubygems.org"
|
|
7
7
|
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development do
|
9
9
|
gem "shoulda", ">= 0"
|
10
|
-
gem "
|
11
|
-
gem "jeweler", "~> 1.
|
12
|
-
gem "
|
13
|
-
gem "bio", ">= 1.4.
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "jeweler", "~> 1.8.3"
|
12
|
+
gem "bundler", ">= 1.0.21"
|
13
|
+
gem "bio", ">= 1.4.2"
|
14
14
|
end
|
data/LICENSE.txt
CHANGED
data/README.rdoc
CHANGED
@@ -1,6 +1,18 @@
|
|
1
1
|
= bio-cnls_screenscraper
|
2
2
|
|
3
|
-
|
3
|
+
bio-cnls_screenscraper is a programmatic biogem interface to http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi - a server for prediction of importin α-dependent nuclear localization signals.
|
4
|
+
|
5
|
+
First, cache the results for each sequence in your amino acid sequence fasta file. This contacts the cNLS server once for each sequence, waiting 1 second in between so as not to overload the server. Each result is saved as a separate HTML file, so it is best to do this command in an empty directory.
|
6
|
+
|
7
|
+
mkdir cNLS_cache
|
8
|
+
cd cNLS_cache
|
9
|
+
bio-nls_screenscraper.rb -h <fasta_file> 2>cNLS_caching.err
|
10
|
+
|
11
|
+
Then parse these HTML files and collate into a single tab-separated values file. Perhaps best to put the results file not in the cache directory. The parsing uses the default cutoff of 8.0 for monopartite NLSs, and 7.0 for bipartite NLSs.
|
12
|
+
|
13
|
+
bio-nls_screenscraper.rb -cp >../cNLS_results.csv
|
14
|
+
|
15
|
+
Some sequences are unacceptable to the cNLS server - sequences that are too short (<19 aa), too long, or contain non-standard amino acids such as 'X'.
|
4
16
|
|
5
17
|
== Contributing to bio-cnls_screenscraper
|
6
18
|
|
data/Rakefile
CHANGED
@@ -33,13 +33,6 @@ Rake::TestTask.new(:test) do |test|
|
|
33
33
|
test.verbose = true
|
34
34
|
end
|
35
35
|
|
36
|
-
require 'rcov/rcovtask'
|
37
|
-
Rcov::RcovTask.new do |test|
|
38
|
-
test.libs << 'test'
|
39
|
-
test.pattern = 'test/**/test_*.rb'
|
40
|
-
test.verbose = true
|
41
|
-
end
|
42
|
-
|
43
36
|
task :default => :test
|
44
37
|
|
45
38
|
require 'rake/rdoctask'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-cnls_screenscraper'
|
5
|
+
|
6
|
+
# When entering sequences less than this number of amino acids as a query
|
7
|
+
# it fails (if less than 10 it tells you, if less than 19 then it silently fails)
|
8
|
+
QUERY_LENGTH_MINIMUM = 19
|
9
|
+
ACCEPTABLE_AMINO_ACID_CHARACTERS = Bio::AminoAcid::Data::WEIGHT.keys.push('*')
|
10
|
+
|
11
|
+
options = {
|
12
|
+
:verbose => true,
|
13
|
+
:cache_html => false,
|
14
|
+
:use_cache => false,
|
15
|
+
:cutoff_score => nil,
|
16
|
+
:print_scores => false,
|
17
|
+
}
|
18
|
+
o = OptionParser.new do |opts|
|
19
|
+
opts.banner = ['',
|
20
|
+
'Usage: bio-cnls_formatter.rb [-qhcsp] [fasta_filename]',
|
21
|
+
'\tfasta file can also be piped in on STDIN.',''
|
22
|
+
].join("\n")
|
23
|
+
|
24
|
+
opts.on('-q','--quiet','Opposite of verbose. Default is not quiet (verbose is on)') do
|
25
|
+
options[:verbose] = false
|
26
|
+
end
|
27
|
+
opts.on('-h','--html','Cache HTML results in the current directory instead of parsing them. Default false.') do
|
28
|
+
options[:cache_html] = true
|
29
|
+
end
|
30
|
+
opts.on('-c','--cached','Parse the cache HTML results (as previously generated using -h/--html) in the current directory. Default false.') do
|
31
|
+
options[:use_cache] = true
|
32
|
+
end
|
33
|
+
opts.on('-s','--score SCORE','Cutoff score to be used when parsing results, between 0 and 10. Used when parsing results, not when querying the server') do |s|
|
34
|
+
options[:cutoff_score] = s.to_f
|
35
|
+
end
|
36
|
+
opts.on('-p','--print-scores','Output scores as well as true/false predictions. Default false.') do |s|
|
37
|
+
options[:print_scores] = true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
o.parse!
|
41
|
+
|
42
|
+
print_result_headers = lambda do
|
43
|
+
to_print = [
|
44
|
+
'Name',
|
45
|
+
'Monopartite signal?',
|
46
|
+
'Bipartite signal?'
|
47
|
+
]
|
48
|
+
if options[:print_scores]
|
49
|
+
to_print.push 'Max monopartite score'
|
50
|
+
to_print.push 'Max bipartite score'
|
51
|
+
end
|
52
|
+
|
53
|
+
puts to_print.join("\t")
|
54
|
+
end
|
55
|
+
|
56
|
+
# Define a procedure for printing parsed results so it is more DRY
|
57
|
+
print_parsed_results = lambda do |sequence_name, cnls_result, score|
|
58
|
+
to_print = [
|
59
|
+
sequence_name,
|
60
|
+
cnls_result.monopartite_predicted?(score),
|
61
|
+
cnls_result.bipartite_predicted?(score)
|
62
|
+
]
|
63
|
+
if options[:print_scores]
|
64
|
+
to_print.push cnls_result.max_monopartite_score
|
65
|
+
to_print.push cnls_result.max_bipartite_score
|
66
|
+
end
|
67
|
+
|
68
|
+
puts to_print.join("\t")
|
69
|
+
end
|
70
|
+
|
71
|
+
# If
|
72
|
+
if options[:use_cache]
|
73
|
+
print_result_headers.call
|
74
|
+
Dir.foreach('.') do |file|
|
75
|
+
next if File.directory?(file) #skip '.', '..' etc.
|
76
|
+
|
77
|
+
begin
|
78
|
+
res = Bio::CNLS::Screenscraper.parse_html_result(File.read(file))
|
79
|
+
print_parsed_results.call(
|
80
|
+
file, res, options[:cutoff_score]
|
81
|
+
)
|
82
|
+
rescue Exception => e
|
83
|
+
$stderr.puts "Failed to parse #{file}: #{e}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
else
|
87
|
+
Bio::FlatFile.foreach(ARGF) do |entry|
|
88
|
+
# Sequences are automatically disqualified if they contain characters that are neither amino acids or stop codons
|
89
|
+
fails = entry.seq.gsub(/[#{ACCEPTABLE_AMINO_ACID_CHARACTERS.join('')}]/,'')
|
90
|
+
if fails.length > 0
|
91
|
+
if options[:verbose]
|
92
|
+
$stderr.puts "Found unacceptable characters in #{entry.definition}: #{fails}"
|
93
|
+
end
|
94
|
+
next
|
95
|
+
|
96
|
+
# Sequence length must be greater than the minimum, excluding
|
97
|
+
# stop codons
|
98
|
+
elsif entry.seq.gsub(/\*/,'').length < QUERY_LENGTH_MINIMUM
|
99
|
+
if options[:verbose]
|
100
|
+
$stderr.puts "Query sequence too short (less than #{QUERY_LENGTH_MINIMUM} residues excluding stop codons): #{entry.definition}"
|
101
|
+
end
|
102
|
+
else
|
103
|
+
# This sequence passes, run the prediction on it
|
104
|
+
if options[:cache_html]
|
105
|
+
res = Bio::CNLS::Screenscraper.get_raw_html_result(entry.seq)
|
106
|
+
File.open("#{entry.definition}.html",'w') do |f|
|
107
|
+
f.puts res
|
108
|
+
end
|
109
|
+
$stderr.print '.' if options[:verbose]
|
110
|
+
else
|
111
|
+
res = Bio::CNLS::Screenscraper.submit(entry.seq)
|
112
|
+
print_result_headers.call
|
113
|
+
print_parsed_results.call(entry.definition, res, options[:cutoff_score])
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -1,293 +1,12 @@
|
|
1
|
-
|
1
|
+
# Please require your code below, respecting the naming conventions in the
|
2
|
+
# bioruby directory tree.
|
3
|
+
#
|
4
|
+
# For example, say you have a plugin named bio-plugin, the only uncommented
|
5
|
+
# line in this file would be
|
6
|
+
#
|
7
|
+
# require 'bio/bio-plugin/plugin'
|
8
|
+
#
|
9
|
+
# In this file only require other files. Avoid other source code.
|
10
|
+
|
11
|
+
require 'bio/cnls_screenscraper/cnls_screenscraper.rb'
|
2
12
|
|
3
|
-
# A script to take a FASTA file, remove sequences that will fail, and automatically submit it to the cNLS server at http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi
|
4
|
-
# Unfortunately, the fasta upload seems to fail.
|
5
|
-
# and format it so that it can be uploaded to the cNLS mapper (classical(?) nuclear localisation signal mapper).
|
6
|
-
# The fasta output file can be uploaded to
|
7
|
-
# http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi
|
8
|
-
|
9
|
-
require 'bio'
|
10
|
-
|
11
|
-
module Bio
|
12
|
-
class CNLS
|
13
|
-
class Result
|
14
|
-
attr_accessor :signals
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
@signals = []
|
18
|
-
end
|
19
|
-
|
20
|
-
class NLS
|
21
|
-
attr_accessor :position, :sequence, :score
|
22
|
-
|
23
|
-
# sort by score descending
|
24
|
-
def <=>(another)
|
25
|
-
-(@score<=>another.score)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
class MonopartiteNLS<NLS; end
|
29
|
-
class BipartiteNLS<NLS; end
|
30
|
-
|
31
|
-
# Is this result a positive prediction or negative prediction?
|
32
|
-
def predicted?
|
33
|
-
!signals.nil? and !signals.empty?
|
34
|
-
end
|
35
|
-
|
36
|
-
def monopartite_predicted?(minimum_score=nil)
|
37
|
-
@signals.each do |s|
|
38
|
-
if s.kind_of?(MonopartiteNLS)
|
39
|
-
return true if minimum_score.nil? #if no cutoff, return true
|
40
|
-
return true if s.score >= minimum_score #otherwise apply the cutoff
|
41
|
-
end
|
42
|
-
end
|
43
|
-
return false
|
44
|
-
end
|
45
|
-
|
46
|
-
def bipartite_predicted?(minimum_score=nil)
|
47
|
-
@signals.each do |s|
|
48
|
-
if s.kind_of?(BipartiteNLS)
|
49
|
-
return true if minimum_score.nil? #if no cutoff, return true
|
50
|
-
return true if s.score >= minimum_score #otherwise apply the cutoff
|
51
|
-
end
|
52
|
-
end
|
53
|
-
return false
|
54
|
-
end
|
55
|
-
|
56
|
-
def max_monopartite_score
|
57
|
-
max = 0.0
|
58
|
-
@signals.each do |s|
|
59
|
-
if s.kind_of?(MonopartiteNLS) and s.score > max
|
60
|
-
max = s.score
|
61
|
-
end
|
62
|
-
end
|
63
|
-
return max
|
64
|
-
end
|
65
|
-
|
66
|
-
def max_bipartite_score
|
67
|
-
max = 0.0
|
68
|
-
@signals.each do |s|
|
69
|
-
if s.kind_of?(BipartiteNLS) and s.score > max
|
70
|
-
max = s.score
|
71
|
-
end
|
72
|
-
end
|
73
|
-
return max
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# A class used to automatically submit results to the cNLS webserver and parse the HTML results.
|
78
|
-
class Screenscraper
|
79
|
-
require 'uri'
|
80
|
-
require 'net/http'
|
81
|
-
|
82
|
-
ACCEPTABLE_CUTOFFS = %w(2.0 3.0 4.0 5.0 6.0)
|
83
|
-
|
84
|
-
# Contact the cNLS prediction server and submit the amino acid sequence for prediction. Return a Bio::CNLS::Result object. Pause after each round for pause milliseconds, so as not to overload the server.
|
85
|
-
def self.submit(amino_acid_sequence, cut_off='3.0', seconds_pause=1)
|
86
|
-
# contact webserver and sleep
|
87
|
-
html = get_raw_html_result(amino_acid_sequence, cut_off, seconds_pause)
|
88
|
-
|
89
|
-
# Return the parsed HTML as a CNLS::Result object
|
90
|
-
return parse_html_result(html)
|
91
|
-
end
|
92
|
-
|
93
|
-
def self.get_raw_html_result(amino_acid_sequence, cut_off='3.0', seconds_pause=1)
|
94
|
-
unless ACCEPTABLE_CUTOFFS.include?(cut_off)
|
95
|
-
raise Exception, "Specified cutoff `#{cut_off}' for the cNLS screenscraper is invalid. Valid cutoffs are #{ACCEPTABLE_CUTOFFS.join(', ')}. They are strings, not floating point values."
|
96
|
-
end
|
97
|
-
|
98
|
-
# retrieve the webpage
|
99
|
-
res = Net::HTTP.post_form(URI.parse('http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_y.cgi'),
|
100
|
-
{'cut_off' => cut_off, 'typedseq' => amino_acid_sequence})
|
101
|
-
|
102
|
-
# if there is an error, raise it
|
103
|
-
unless res.kind_of?(Net::HTTPOK)
|
104
|
-
raise Exception, "Failed to retrieve cNLS, internet connectivity problem? Using cutoff/sequence #{cutoff}/#{amino_acid_sequence}"
|
105
|
-
end
|
106
|
-
|
107
|
-
# pause the specified number of seconds
|
108
|
-
sleep seconds_pause
|
109
|
-
|
110
|
-
return res.body
|
111
|
-
end
|
112
|
-
|
113
|
-
# Given HTML corresponding to a result, return a parse object that is more programmatically palatable.
|
114
|
-
def self.parse_html_result(html)
|
115
|
-
result = Result.new
|
116
|
-
|
117
|
-
# The mono and bi-partite regular expressions are equivalent except for the Predicted X NLS bit at the beginning, thanksfully. However, they sometimes appear to be slightly different, which is rather odd.
|
118
|
-
monopartite_regex = /Predicted monopartite NLS<\/th>\s+<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td align="center"><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><\/TR>/i
|
119
|
-
bipartite_regex = /Predicted bipartite NLS<\/th>\s+<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td align="center"><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><\/TR>/i
|
120
|
-
|
121
|
-
monopartite_no_hits = /Predicted monopartite NLS<\/th>\s*<\/tr>\s*<tr bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/tr>\s*<tr><td><strong><big><code><\/code><\/big><\/strong><\/td><td><strong><big><code><\/code><\/big><\/strong><\/td><td align="center"><strong><big><code><\/code><\/big><\/strong><\/td><\/tr>/i
|
122
|
-
bipartite_no_hits = /Predicted bipartite NLS<\/th>\s*<\/tr>\s*<tr bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/tr>\s*<tr><td><strong><big><code><\/code><\/big><\/strong><\/td><td><strong><big><code><\/code><\/big><\/strong><\/td><td align="center"><strong><big><code><\/code><\/big><\/strong><\/td><\/tr>/i
|
123
|
-
monopartite_no_hits2 = /Predicted monopartite NLS<\/th>\s*<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code \/><\/big><\/strong><\/td><td><strong><big><code \/><\/big><\/strong><\/td><td align="center"><strong><big><code \/><\/big><\/strong><\/td><\/TR>/i
|
124
|
-
bipartite_no_hits2 = /Predicted bipartite NLS<\/th>\s*<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code \/><\/big><\/strong><\/td><td><strong><big><code \/><\/big><\/strong><\/td><td align="center"><strong><big><code \/><\/big><\/strong><\/td><\/TR>/i
|
125
|
-
|
126
|
-
split_regex = /<\/code><\/big><\/strong><br.{0,2}><strong><big><code>/
|
127
|
-
|
128
|
-
# Make sure the sequence isn't too long
|
129
|
-
if html.match(/Query sequence should be < 5000 aa/)
|
130
|
-
raise Exception, "Query sequence provided was too long (> 5000 aa)"
|
131
|
-
|
132
|
-
# parse out monopartite signals
|
133
|
-
elsif matches = html.match(monopartite_regex)
|
134
|
-
positions = matches[1].split(split_regex)
|
135
|
-
seqs = matches[2].split(split_regex)
|
136
|
-
scores = matches[3].split(split_regex)
|
137
|
-
|
138
|
-
positions.each_with_index do |pos, i|
|
139
|
-
nls = Result::MonopartiteNLS.new
|
140
|
-
nls.position = pos.to_i
|
141
|
-
nls.sequence = seqs[i]
|
142
|
-
nls.score = scores[i].to_f
|
143
|
-
result.signals.push nls
|
144
|
-
end
|
145
|
-
elsif html.match(monopartite_no_hits) or html.match(monopartite_no_hits2)
|
146
|
-
# do nothing, except for not raising a parsing exception
|
147
|
-
else
|
148
|
-
raise Exception, "Could not parse HTML output returned from cNLS prediction server. In particular, looking for monopartite signals, but the whole document is likely problematic.\n#{html}"
|
149
|
-
end
|
150
|
-
|
151
|
-
|
152
|
-
# parse out the bipartite signals
|
153
|
-
if matches = html.match(bipartite_regex)
|
154
|
-
positions = matches[1].split(split_regex)
|
155
|
-
seqs = matches[2].split(split_regex)
|
156
|
-
scores = matches[3].split(split_regex)
|
157
|
-
|
158
|
-
positions.each_with_index do |pos, i|
|
159
|
-
nls = Result::BipartiteNLS.new
|
160
|
-
nls.position = pos.to_i
|
161
|
-
nls.sequence = seqs[i]
|
162
|
-
nls.score = scores[i].to_f
|
163
|
-
result.signals.push nls
|
164
|
-
end
|
165
|
-
elsif html.match(bipartite_no_hits) or html.match(bipartite_no_hits2)
|
166
|
-
# do nothing, except for not raising a parsing exception
|
167
|
-
else
|
168
|
-
raise Exception, "Could not parse HTML output returned from cNLS prediction server. In particular, looking for bipartite signals, monopartite signals seemed to be parsed OK.\n#{html}"
|
169
|
-
end
|
170
|
-
|
171
|
-
return result
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
if __FILE__ == $0
|
180
|
-
require 'optparse'
|
181
|
-
|
182
|
-
# When entering sequences less than this number of amino acids as a query
|
183
|
-
# it fails (if less than 10 it tells you, if less than 19 then it silently fails)
|
184
|
-
QUERY_LENGTH_MINIMUM = 19
|
185
|
-
ACCEPTABLE_AMINO_ACID_CHARACTERS = Bio::AminoAcid::Data::WEIGHT.keys.push('*')
|
186
|
-
|
187
|
-
options = {
|
188
|
-
:verbose => true,
|
189
|
-
:cache_html => false,
|
190
|
-
:use_cache => false,
|
191
|
-
:cutoff_score => nil,
|
192
|
-
:print_scores => false,
|
193
|
-
}
|
194
|
-
o = OptionParser.new do |opts|
|
195
|
-
opts.banner = [
|
196
|
-
'Usage: bio-cnls_formatter.rb [-qh] [fasta_filename]',
|
197
|
-
'\tfasta file can also be piped in on STDIN.'
|
198
|
-
]
|
199
|
-
opts.on('-q','--quiet','Opposite of verbose. Default is not quiet (verbose is on)') do
|
200
|
-
options[:verbose] = false
|
201
|
-
end
|
202
|
-
opts.on('-h','--html','Cache HTML results in the current directory instead of parsing them. Default false.') do
|
203
|
-
options[:cache_html] = true
|
204
|
-
end
|
205
|
-
opts.on('-c','--cached','Parse the cache HTML results (as previously generated using -h/--html) in the current directory. Default false.') do
|
206
|
-
options[:use_cache] = true
|
207
|
-
end
|
208
|
-
opts.on('-s','--score SCORE','Cutoff score to be used when parsing results, between 0 and 10. Used when parsing results, not when querying the server') do |s|
|
209
|
-
options[:cutoff_score] = s.to_f
|
210
|
-
end
|
211
|
-
opts.on('-p','--print-scores','Output scores as well as true/false predictions. Default false.') do |s|
|
212
|
-
options[:print_scores] = true
|
213
|
-
end
|
214
|
-
end
|
215
|
-
o.parse!
|
216
|
-
|
217
|
-
print_result_headers = lambda do
|
218
|
-
to_print = [
|
219
|
-
'Name',
|
220
|
-
'Monopartite signal?',
|
221
|
-
'Bipartite signal?'
|
222
|
-
]
|
223
|
-
if options[:print_scores]
|
224
|
-
to_print.push 'Max monopartite score'
|
225
|
-
to_print.push 'Max bipartite score'
|
226
|
-
end
|
227
|
-
|
228
|
-
puts to_print.join("\t")
|
229
|
-
end
|
230
|
-
|
231
|
-
# Define a procedure for printing parsed results so it is more DRY
|
232
|
-
print_parsed_results = lambda do |sequence_name, cnls_result, score|
|
233
|
-
to_print = [
|
234
|
-
sequence_name,
|
235
|
-
cnls_result.monopartite_predicted?(score),
|
236
|
-
cnls_result.bipartite_predicted?(score)
|
237
|
-
]
|
238
|
-
if options[:print_scores]
|
239
|
-
to_print.push cnls_result.max_monopartite_score
|
240
|
-
to_print.push cnls_result.max_bipartite_score
|
241
|
-
end
|
242
|
-
|
243
|
-
puts to_print.join("\t")
|
244
|
-
end
|
245
|
-
|
246
|
-
# If
|
247
|
-
if options[:use_cache]
|
248
|
-
print_result_headers.call
|
249
|
-
Dir.foreach('.') do |file|
|
250
|
-
next if File.directory?(file) #skip '.', '..' etc.
|
251
|
-
|
252
|
-
begin
|
253
|
-
res = Bio::CNLS::Screenscraper.parse_html_result(File.read(file))
|
254
|
-
print_parsed_results.call(
|
255
|
-
file, res, options[:cutoff_score]
|
256
|
-
)
|
257
|
-
rescue Exception => e
|
258
|
-
$stderr.puts "Failed to parse #{file}: #{e}"
|
259
|
-
end
|
260
|
-
end
|
261
|
-
else
|
262
|
-
Bio::FlatFile.foreach(ARGF) do |entry|
|
263
|
-
# Sequences are automatically disqualified if they contain characters that are neither amino acids or stop codons
|
264
|
-
fails = entry.seq.gsub(/[#{ACCEPTABLE_AMINO_ACID_CHARACTERS.join('')}]/,'')
|
265
|
-
if fails.length > 0
|
266
|
-
if options[:verbose]
|
267
|
-
$stderr.puts "Found unacceptable characters in #{entry.definition}: #{fails}"
|
268
|
-
end
|
269
|
-
next
|
270
|
-
|
271
|
-
# Sequence length must be greater than the minimum, excluding
|
272
|
-
# stop codons
|
273
|
-
elsif entry.seq.gsub(/\*/,'').length < QUERY_LENGTH_MINIMUM
|
274
|
-
if options[:verbose]
|
275
|
-
$stderr.puts "Query sequence too short (less than #{QUERY_LENGTH_MINIMUM} residues excluding stop codons): #{entry.definition}"
|
276
|
-
end
|
277
|
-
else
|
278
|
-
# This sequence passes, run the prediction on it
|
279
|
-
if options[:cache_html]
|
280
|
-
res = Bio::CNLS::Screenscraper.get_raw_html_result(entry.seq)
|
281
|
-
File.open("#{entry.definition}.html",'w') do |f|
|
282
|
-
f.puts res
|
283
|
-
end
|
284
|
-
$stderr.print '.' if options[:verbose]
|
285
|
-
else
|
286
|
-
res = Bio::CNLS::Screenscraper.submit(entry.seq)
|
287
|
-
print_result_headers.call
|
288
|
-
print_parsed_results.call(entry, res, options[:cutoff_score])
|
289
|
-
end
|
290
|
-
end
|
291
|
-
end
|
292
|
-
end
|
293
|
-
end
|
@@ -0,0 +1,293 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# A script to take a FASTA file, remove sequences that will fail, and automatically submit it to the cNLS server at http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi
|
4
|
+
# Unfortunately, the fasta upload seems to fail.
|
5
|
+
# and format it so that it can be uploaded to the cNLS mapper (classical(?) nuclear localisation signal mapper).
|
6
|
+
# The fasta output file can be uploaded to
|
7
|
+
# http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_form.cgi
|
8
|
+
|
9
|
+
require 'bio'
|
10
|
+
|
11
|
+
module Bio
|
12
|
+
class CNLS
|
13
|
+
class Result
|
14
|
+
attr_accessor :signals
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@signals = []
|
18
|
+
end
|
19
|
+
|
20
|
+
class NLS
|
21
|
+
attr_accessor :position, :sequence, :score
|
22
|
+
|
23
|
+
# sort by score descending
|
24
|
+
def <=>(another)
|
25
|
+
-(@score<=>another.score)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
class MonopartiteNLS<NLS; end
|
29
|
+
class BipartiteNLS<NLS; end
|
30
|
+
|
31
|
+
# Is this result a positive prediction or negative prediction?
|
32
|
+
def predicted?
|
33
|
+
!signals.nil? and !signals.empty?
|
34
|
+
end
|
35
|
+
|
36
|
+
def monopartite_predicted?(minimum_score=nil)
|
37
|
+
@signals.each do |s|
|
38
|
+
if s.kind_of?(MonopartiteNLS)
|
39
|
+
return true if minimum_score.nil? #if no cutoff, return true
|
40
|
+
return true if s.score >= minimum_score #otherwise apply the cutoff
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return false
|
44
|
+
end
|
45
|
+
|
46
|
+
def bipartite_predicted?(minimum_score=nil)
|
47
|
+
@signals.each do |s|
|
48
|
+
if s.kind_of?(BipartiteNLS)
|
49
|
+
return true if minimum_score.nil? #if no cutoff, return true
|
50
|
+
return true if s.score >= minimum_score #otherwise apply the cutoff
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return false
|
54
|
+
end
|
55
|
+
|
56
|
+
def max_monopartite_score
|
57
|
+
max = 0.0
|
58
|
+
@signals.each do |s|
|
59
|
+
if s.kind_of?(MonopartiteNLS) and s.score > max
|
60
|
+
max = s.score
|
61
|
+
end
|
62
|
+
end
|
63
|
+
return max
|
64
|
+
end
|
65
|
+
|
66
|
+
def max_bipartite_score
|
67
|
+
max = 0.0
|
68
|
+
@signals.each do |s|
|
69
|
+
if s.kind_of?(BipartiteNLS) and s.score > max
|
70
|
+
max = s.score
|
71
|
+
end
|
72
|
+
end
|
73
|
+
return max
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# A class used to automatically submit results to the cNLS webserver and parse the HTML results.
|
78
|
+
class Screenscraper
|
79
|
+
require 'uri'
|
80
|
+
require 'net/http'
|
81
|
+
|
82
|
+
ACCEPTABLE_CUTOFFS = %w(2.0 3.0 4.0 5.0 6.0)
|
83
|
+
|
84
|
+
# Contact the cNLS prediction server and submit the amino acid sequence for prediction. Return a Bio::CNLS::Result object. Pause after each round for pause milliseconds, so as not to overload the server.
|
85
|
+
def self.submit(amino_acid_sequence, cut_off='3.0', seconds_pause=1)
|
86
|
+
# contact webserver and sleep
|
87
|
+
html = get_raw_html_result(amino_acid_sequence, cut_off, seconds_pause)
|
88
|
+
|
89
|
+
# Return the parsed HTML as a CNLS::Result object
|
90
|
+
return parse_html_result(html)
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.get_raw_html_result(amino_acid_sequence, cut_off='3.0', seconds_pause=1)
|
94
|
+
unless ACCEPTABLE_CUTOFFS.include?(cut_off)
|
95
|
+
raise Exception, "Specified cutoff `#{cut_off}' for the cNLS screenscraper is invalid. Valid cutoffs are #{ACCEPTABLE_CUTOFFS.join(', ')}. They are strings, not floating point values."
|
96
|
+
end
|
97
|
+
|
98
|
+
# retrieve the webpage
|
99
|
+
res = Net::HTTP.post_form(URI.parse('http://nls-mapper.iab.keio.ac.jp/cgi-bin/NLS_Mapper_y.cgi'),
|
100
|
+
{'cut_off' => cut_off, 'typedseq' => amino_acid_sequence})
|
101
|
+
|
102
|
+
# if there is an error, raise it
|
103
|
+
unless res.kind_of?(Net::HTTPOK)
|
104
|
+
raise Exception, "Failed to retrieve cNLS, internet connectivity problem? Using cutoff/sequence #{cutoff}/#{amino_acid_sequence}"
|
105
|
+
end
|
106
|
+
|
107
|
+
# pause the specified number of seconds
|
108
|
+
sleep seconds_pause
|
109
|
+
|
110
|
+
return res.body
|
111
|
+
end
|
112
|
+
|
113
|
+
# Given HTML corresponding to a result, return a parse object that is more programmatically palatable.
|
114
|
+
def self.parse_html_result(html)
|
115
|
+
result = Result.new
|
116
|
+
|
117
|
+
# The mono and bi-partite regular expressions are equivalent except for the Predicted X NLS bit at the beginning, thanksfully. However, they sometimes appear to be slightly different, which is rather odd.
|
118
|
+
monopartite_regex = /Predicted monopartite NLS<\/th>\s+<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td align="center"><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><\/TR>/i
|
119
|
+
bipartite_regex = /Predicted bipartite NLS<\/th>\s+<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><td align="center"><strong><big><code>(.*?)<\/code><\/big><\/strong><br.{0,2}><strong><big><code.{2,8}><\/big><\/strong><\/td><\/TR>/i
|
120
|
+
|
121
|
+
monopartite_no_hits = /Predicted monopartite NLS<\/th>\s*<\/tr>\s*<tr bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/tr>\s*<tr><td><strong><big><code><\/code><\/big><\/strong><\/td><td><strong><big><code><\/code><\/big><\/strong><\/td><td align="center"><strong><big><code><\/code><\/big><\/strong><\/td><\/tr>/i
|
122
|
+
bipartite_no_hits = /Predicted bipartite NLS<\/th>\s*<\/tr>\s*<tr bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/tr>\s*<tr><td><strong><big><code><\/code><\/big><\/strong><\/td><td><strong><big><code><\/code><\/big><\/strong><\/td><td align="center"><strong><big><code><\/code><\/big><\/strong><\/td><\/tr>/i
|
123
|
+
monopartite_no_hits2 = /Predicted monopartite NLS<\/th>\s*<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code \/><\/big><\/strong><\/td><td><strong><big><code \/><\/big><\/strong><\/td><td align="center"><strong><big><code \/><\/big><\/strong><\/td><\/TR>/i
|
124
|
+
bipartite_no_hits2 = /Predicted bipartite NLS<\/th>\s*<\/TR>\s*<TR bgcolor="#d0d0d0">\s*<th>Pos.<\/th>\s*<th>Sequence<\/th>\s*<th>Score<\/th>\s*<\/TR>\s*<TR><td><strong><big><code \/><\/big><\/strong><\/td><td><strong><big><code \/><\/big><\/strong><\/td><td align="center"><strong><big><code \/><\/big><\/strong><\/td><\/TR>/i
|
125
|
+
|
126
|
+
split_regex = /<\/code><\/big><\/strong><br.{0,2}><strong><big><code>/
|
127
|
+
|
128
|
+
# Make sure the sequence isn't too long
|
129
|
+
if html.match(/Query sequence should be < 5000 aa/)
|
130
|
+
raise Exception, "Query sequence provided was too long (> 5000 aa)"
|
131
|
+
|
132
|
+
# parse out monopartite signals
|
133
|
+
elsif matches = html.match(monopartite_regex)
|
134
|
+
positions = matches[1].split(split_regex)
|
135
|
+
seqs = matches[2].split(split_regex)
|
136
|
+
scores = matches[3].split(split_regex)
|
137
|
+
|
138
|
+
positions.each_with_index do |pos, i|
|
139
|
+
nls = Result::MonopartiteNLS.new
|
140
|
+
nls.position = pos.to_i
|
141
|
+
nls.sequence = seqs[i]
|
142
|
+
nls.score = scores[i].to_f
|
143
|
+
result.signals.push nls
|
144
|
+
end
|
145
|
+
elsif html.match(monopartite_no_hits) or html.match(monopartite_no_hits2)
|
146
|
+
# do nothing, except for not raising a parsing exception
|
147
|
+
else
|
148
|
+
raise Exception, "Could not parse HTML output returned from cNLS prediction server. In particular, looking for monopartite signals, but the whole document is likely problematic.\n#{html}"
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
# parse out the bipartite signals
|
153
|
+
if matches = html.match(bipartite_regex)
|
154
|
+
positions = matches[1].split(split_regex)
|
155
|
+
seqs = matches[2].split(split_regex)
|
156
|
+
scores = matches[3].split(split_regex)
|
157
|
+
|
158
|
+
positions.each_with_index do |pos, i|
|
159
|
+
nls = Result::BipartiteNLS.new
|
160
|
+
nls.position = pos.to_i
|
161
|
+
nls.sequence = seqs[i]
|
162
|
+
nls.score = scores[i].to_f
|
163
|
+
result.signals.push nls
|
164
|
+
end
|
165
|
+
elsif html.match(bipartite_no_hits) or html.match(bipartite_no_hits2)
|
166
|
+
# do nothing, except for not raising a parsing exception
|
167
|
+
else
|
168
|
+
raise Exception, "Could not parse HTML output returned from cNLS prediction server. In particular, looking for bipartite signals, monopartite signals seemed to be parsed OK.\n#{html}"
|
169
|
+
end
|
170
|
+
|
171
|
+
return result
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
if __FILE__ == $0
|
180
|
+
require 'optparse'
|
181
|
+
|
182
|
+
# When entering sequences less than this number of amino acids as a query
|
183
|
+
# it fails (if less than 10 it tells you, if less than 19 then it silently fails)
|
184
|
+
QUERY_LENGTH_MINIMUM = 19
|
185
|
+
ACCEPTABLE_AMINO_ACID_CHARACTERS = Bio::AminoAcid::Data::WEIGHT.keys.push('*')
|
186
|
+
|
187
|
+
options = {
|
188
|
+
:verbose => true,
|
189
|
+
:cache_html => false,
|
190
|
+
:use_cache => false,
|
191
|
+
:cutoff_score => nil,
|
192
|
+
:print_scores => false,
|
193
|
+
}
|
194
|
+
o = OptionParser.new do |opts|
|
195
|
+
opts.banner = [
|
196
|
+
'Usage: bio-cnls_formatter.rb [-qhcsp] [fasta_filename]',
|
197
|
+
'\tfasta file can also be piped in on STDIN.'
|
198
|
+
]
|
199
|
+
opts.on('-q','--quiet','Opposite of verbose. Default is not quiet (verbose is on)') do
|
200
|
+
options[:verbose] = false
|
201
|
+
end
|
202
|
+
opts.on('-h','--html','Cache HTML results in the current directory instead of parsing them. Default false.') do
|
203
|
+
options[:cache_html] = true
|
204
|
+
end
|
205
|
+
opts.on('-c','--cached','Parse the cache HTML results (as previously generated using -h/--html) in the current directory. Default false.') do
|
206
|
+
options[:use_cache] = true
|
207
|
+
end
|
208
|
+
opts.on('-s','--score SCORE','Cutoff score to be used when parsing results, between 0 and 10. Used when parsing results, not when querying the server') do |s|
|
209
|
+
options[:cutoff_score] = s.to_f
|
210
|
+
end
|
211
|
+
opts.on('-p','--print-scores','Output scores as well as true/false predictions. Default false.') do |s|
|
212
|
+
options[:print_scores] = true
|
213
|
+
end
|
214
|
+
end
|
215
|
+
o.parse!
|
216
|
+
|
217
|
+
print_result_headers = lambda do
|
218
|
+
to_print = [
|
219
|
+
'Name',
|
220
|
+
'Monopartite signal?',
|
221
|
+
'Bipartite signal?'
|
222
|
+
]
|
223
|
+
if options[:print_scores]
|
224
|
+
to_print.push 'Max monopartite score'
|
225
|
+
to_print.push 'Max bipartite score'
|
226
|
+
end
|
227
|
+
|
228
|
+
puts to_print.join("\t")
|
229
|
+
end
|
230
|
+
|
231
|
+
# Define a procedure for printing parsed results so it is more DRY
|
232
|
+
print_parsed_results = lambda do |sequence_name, cnls_result, score|
|
233
|
+
to_print = [
|
234
|
+
sequence_name,
|
235
|
+
cnls_result.monopartite_predicted?(score),
|
236
|
+
cnls_result.bipartite_predicted?(score)
|
237
|
+
]
|
238
|
+
if options[:print_scores]
|
239
|
+
to_print.push cnls_result.max_monopartite_score
|
240
|
+
to_print.push cnls_result.max_bipartite_score
|
241
|
+
end
|
242
|
+
|
243
|
+
puts to_print.join("\t")
|
244
|
+
end
|
245
|
+
|
246
|
+
# If
|
247
|
+
if options[:use_cache]
|
248
|
+
print_result_headers.call
|
249
|
+
Dir.foreach('.') do |file|
|
250
|
+
next if File.directory?(file) #skip '.', '..' etc.
|
251
|
+
|
252
|
+
begin
|
253
|
+
res = Bio::CNLS::Screenscraper.parse_html_result(File.read(file))
|
254
|
+
print_parsed_results.call(
|
255
|
+
file, res, options[:cutoff_score]
|
256
|
+
)
|
257
|
+
rescue Exception => e
|
258
|
+
$stderr.puts "Failed to parse #{file}: #{e}"
|
259
|
+
end
|
260
|
+
end
|
261
|
+
else
|
262
|
+
Bio::FlatFile.foreach(ARGF) do |entry|
|
263
|
+
# Sequences are automatically disqualified if they contain characters that are neither amino acids or stop codons
|
264
|
+
fails = entry.seq.gsub(/[#{ACCEPTABLE_AMINO_ACID_CHARACTERS.join('')}]/,'')
|
265
|
+
if fails.length > 0
|
266
|
+
if options[:verbose]
|
267
|
+
$stderr.puts "Found unacceptable characters in #{entry.definition}: #{fails}"
|
268
|
+
end
|
269
|
+
next
|
270
|
+
|
271
|
+
# Sequence length must be greater than the minimum, excluding
|
272
|
+
# stop codons
|
273
|
+
elsif entry.seq.gsub(/\*/,'').length < QUERY_LENGTH_MINIMUM
|
274
|
+
if options[:verbose]
|
275
|
+
$stderr.puts "Query sequence too short (less than #{QUERY_LENGTH_MINIMUM} residues excluding stop codons): #{entry.definition}"
|
276
|
+
end
|
277
|
+
else
|
278
|
+
# This sequence passes, run the prediction on it
|
279
|
+
if options[:cache_html]
|
280
|
+
res = Bio::CNLS::Screenscraper.get_raw_html_result(entry.seq)
|
281
|
+
File.open("#{entry.definition}.html",'w') do |f|
|
282
|
+
f.puts res
|
283
|
+
end
|
284
|
+
$stderr.print '.' if options[:verbose]
|
285
|
+
else
|
286
|
+
res = Bio::CNLS::Screenscraper.submit(entry.seq)
|
287
|
+
print_result_headers.call
|
288
|
+
print_parsed_results.call(entry, res, options[:cutoff_score])
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'helper'
|
2
|
-
require 'bio-cnls_screenscraper'
|
3
2
|
|
4
3
|
class TestBioCnlsScreenscraper < Test::Unit::TestCase
|
5
|
-
@@data_dir = File.join(File.dirname(__FILE__),['data'])
|
4
|
+
@@data_dir = File.join(File.dirname(__FILE__),'..',['data'])
|
6
5
|
|
7
6
|
should "correctly parse hit results with no hits" do
|
8
7
|
html = File.open(File.join(@@data_dir,'nohits.html')).read
|
metadata
CHANGED
@@ -1,157 +1,151 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-cnls_screenscraper
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 0
|
10
|
-
version: 0.1.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
prerelease:
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Ben J. Woodcroft
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
22
|
-
prerelease: false
|
12
|
+
date: 2012-05-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
23
15
|
name: shoulda
|
24
|
-
|
25
|
-
version_requirements: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
26
17
|
none: false
|
27
|
-
requirements:
|
28
|
-
- -
|
29
|
-
- !ruby/object:Gem::Version
|
30
|
-
|
31
|
-
segments:
|
32
|
-
- 0
|
33
|
-
version: "0"
|
34
|
-
requirement: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
prerelease: false
|
37
|
-
name: bundler
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
38
22
|
type: :development
|
39
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
25
|
none: false
|
41
|
-
requirements:
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rdoc
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
42
35
|
- - ~>
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
45
|
-
|
46
|
-
- 1
|
47
|
-
- 0
|
48
|
-
- 0
|
49
|
-
version: 1.0.0
|
50
|
-
requirement: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '3.12'
|
38
|
+
type: :development
|
52
39
|
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '3.12'
|
46
|
+
- !ruby/object:Gem::Dependency
|
53
47
|
name: jeweler
|
54
|
-
|
55
|
-
version_requirements: &id003 !ruby/object:Gem::Requirement
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
56
49
|
none: false
|
57
|
-
requirements:
|
50
|
+
requirements:
|
58
51
|
- - ~>
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
|
61
|
-
segments:
|
62
|
-
- 1
|
63
|
-
- 5
|
64
|
-
- 2
|
65
|
-
version: 1.5.2
|
66
|
-
requirement: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
prerelease: false
|
69
|
-
name: rcov
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.8.3
|
70
54
|
type: :development
|
71
|
-
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.8.3
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: bundler
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
72
65
|
none: false
|
73
|
-
requirements:
|
74
|
-
- -
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
|
77
|
-
|
78
|
-
- 0
|
79
|
-
version: "0"
|
80
|
-
requirement: *id004
|
81
|
-
- !ruby/object:Gem::Dependency
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.0.21
|
70
|
+
type: :development
|
82
71
|
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.0.21
|
78
|
+
- !ruby/object:Gem::Dependency
|
83
79
|
name: bio
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 1.4.2
|
84
86
|
type: :development
|
85
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
89
|
none: false
|
87
|
-
requirements:
|
88
|
-
- -
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
- 4
|
94
|
-
- 1
|
95
|
-
version: 1.4.1
|
96
|
-
requirement: *id005
|
97
|
-
description: Programmatic interface to the cNLS nuclear localisation signal prediction software
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 1.4.2
|
94
|
+
description: Programmatic interface to the cNLS nuclear localisation signal prediction
|
95
|
+
software
|
98
96
|
email: donttrustben@gmail.com
|
99
|
-
executables:
|
100
|
-
|
97
|
+
executables:
|
98
|
+
- bio-cnls_screenscraper
|
101
99
|
extensions: []
|
102
|
-
|
103
|
-
extra_rdoc_files:
|
100
|
+
extra_rdoc_files:
|
104
101
|
- LICENSE.txt
|
105
102
|
- README.rdoc
|
106
|
-
files:
|
103
|
+
files:
|
107
104
|
- .document
|
105
|
+
- .travis.yml
|
108
106
|
- Gemfile
|
109
107
|
- LICENSE.txt
|
110
108
|
- README.rdoc
|
111
109
|
- Rakefile
|
112
110
|
- VERSION
|
111
|
+
- bin/bio-cnls_screenscraper
|
113
112
|
- lib/bio-cnls_screenscraper.rb
|
113
|
+
- lib/bio/cnls_screenscraper.rb
|
114
|
+
- lib/bio/cnls_screenscraper/cnls_screenscraper.rb
|
115
|
+
- test/bio/test_cnls_screenscraper.rb
|
114
116
|
- test/data/badCharacters.html
|
115
117
|
- test/data/bipartiteHitOnly.html
|
116
118
|
- test/data/lessThan10Fail.html
|
117
119
|
- test/data/monopartiteHitOnly.html
|
118
120
|
- test/data/nohits.html
|
119
121
|
- test/helper.rb
|
120
|
-
- test/test_bio-cnls_screenscraper.rb
|
121
|
-
has_rdoc: true
|
122
122
|
homepage: http://github.com/wwood/bioruby-cnls_screenscraper
|
123
|
-
licenses:
|
123
|
+
licenses:
|
124
124
|
- MIT
|
125
125
|
post_install_message:
|
126
126
|
rdoc_options: []
|
127
|
-
|
128
|
-
require_paths:
|
127
|
+
require_paths:
|
129
128
|
- lib
|
130
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
131
130
|
none: false
|
132
|
-
requirements:
|
133
|
-
- -
|
134
|
-
- !ruby/object:Gem::Version
|
135
|
-
|
136
|
-
segments:
|
131
|
+
requirements:
|
132
|
+
- - ! '>='
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
segments:
|
137
136
|
- 0
|
138
|
-
|
139
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
hash: 108166141
|
138
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
139
|
none: false
|
141
|
-
requirements:
|
142
|
-
- -
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
|
145
|
-
segments:
|
146
|
-
- 0
|
147
|
-
version: "0"
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
148
144
|
requirements: []
|
149
|
-
|
150
145
|
rubyforge_project:
|
151
|
-
rubygems_version: 1.
|
146
|
+
rubygems_version: 1.8.21
|
152
147
|
signing_key:
|
153
148
|
specification_version: 3
|
154
|
-
summary: Programmatic interface to the cNLS nuclear localisation signal prediction
|
155
|
-
|
156
|
-
|
157
|
-
- test/test_bio-cnls_screenscraper.rb
|
149
|
+
summary: Programmatic interface to the cNLS nuclear localisation signal prediction
|
150
|
+
software
|
151
|
+
test_files: []
|