bio-signalp 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -4
- data/README.md +58 -0
- data/Rakefile +0 -10
- data/VERSION +1 -1
- data/bin/signalp.rb +198 -78
- data/bio-signalp.gemspec +20 -18
- data/lib/bio-signalp.rb +6 -1
- data/lib/bio/appl/common.rb +92 -0
- data/lib/bio/appl/signalp3.rb +110 -0
- data/lib/bio/appl/signalp4.rb +72 -0
- data/test/test_bio-signalp.rb +107 -5
- metadata +59 -27
- data/README.rdoc +0 -30
- data/lib/bio/appl/signalp.rb +0 -165
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source "http://rubygems.org"
|
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
5
|
gem "bio", ">= 1.4.1"
|
6
|
+
gem "bio-logger"
|
6
7
|
|
7
8
|
# Add dependencies to develop your gem here.
|
8
9
|
# Include everything needed to run rake, tests, features, etc.
|
9
10
|
group :development do
|
10
11
|
gem "shoulda", ">= 0"
|
11
|
-
gem "bundler", "
|
12
|
-
gem "jeweler", "
|
13
|
-
gem "
|
14
|
-
gem "rdoc", "~> 3.12"
|
12
|
+
gem "bundler", ">= 1.0.0"
|
13
|
+
gem "jeweler", ">= 1.6.0"
|
14
|
+
gem "rdoc", ">= 3.12"
|
15
15
|
end
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# bio-signalp
|
2
|
+
|
3
|
+
A wrapper for the signal peptide prediction algorithm SignalP.
|
4
|
+
|
5
|
+
Using ```bio-signalp``` requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP/ has instructions on how it may be downloaded. This gem works best when the signalp executable is available from the command line (i.e. running 'signalp' by itself works at the commandline).
|
6
|
+
|
7
|
+
# Installation
|
8
|
+
|
9
|
+
First you need to setup SignalP itself. ```bio-signalp``` is tested with SignalP versions 3.0 and 4.0.
|
10
|
+
|
11
|
+
1. Download SignalP and unpack the archive
|
12
|
+
2. Modify the signalp script in the unpacked directory. Specific instructions are provided in the script itself.
|
13
|
+
3. Add the unpacked directory to your path (or alternately, give the path to the signalp executable to the ```calculate``` method)
|
14
|
+
|
15
|
+
Then you need to install this bio-gem
|
16
|
+
|
17
|
+
```sh
|
18
|
+
gem install bio-signalp
|
19
|
+
```
|
20
|
+
|
21
|
+
# Usage
|
22
|
+
|
23
|
+
Usage as a script:
|
24
|
+
```
|
25
|
+
Usage: signalp.rb my.fasta
|
26
|
+
|
27
|
+
my.fasta is the name of the fasta file you want to analyse ($stdin also accepted). Default output is all the sequences with their signal sequences cleaved.
|
28
|
+
|
29
|
+
This default output can be changed by using one (only) of -s, -S, -v, -f, -F.
|
30
|
+
|
31
|
+
-s, --summary print a tab separated table indicating if the sequence had a signal peptide results (if Signalp 3 is used, HMM and NN predictions are both given, respectively [default: no]
|
32
|
+
-S, --bigger-summary like -s, except also includes where the cleavage site is predicted [default: no]
|
33
|
+
-v, --verbose-summary much like -s except more details of the prediction are predicted [default: no]
|
34
|
+
-f, --filter-in filter in: print those sequences that have a signal peptide [default: no]
|
35
|
+
-F, --filter-out filter out: print those sequences that don't have a signal peptide [default: no]
|
36
|
+
-b, --binary-path SIGNALP_PATH path to the signalp binary e.g. /usr/local/bin/signalp-4.0/signalp [default: 'signalp' i.e. whatever is on the PATH]
|
37
|
+
```
|
38
|
+
|
39
|
+
Usage as a programmatic interface
|
40
|
+
```ruby
|
41
|
+
require 'bio-signalp'
|
42
|
+
|
43
|
+
# The Plasmodium falciparum ACP sequence is known to have a signal peptide (one that helps direct it to the apicoplast)
|
44
|
+
acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
|
45
|
+
|
46
|
+
# Run SignalP. The version is automatically detected
|
47
|
+
result = Bio::SignalP::Wrapper.new.calculate(acp_sequence) #=> Either a Bio::SignalP::Version3::Result or a Bio::SignalP::Version4::Result object
|
48
|
+
|
49
|
+
result.signal? #=> true. ACP has a predicted signal peptide.
|
50
|
+
result.cleavage_site #=> 17. The Ymax output from SignalP gives the predicted cleavage site.
|
51
|
+
result.cleave(acp_sequence) #=> 'FKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'. The acp_sequence after signal peptide cleavage.
|
52
|
+
```
|
53
|
+
|
54
|
+
# Copyright
|
55
|
+
|
56
|
+
Copyright (c) 2011-2012 Ben J Woodcroft. See LICENSE.txt for
|
57
|
+
further details.
|
58
|
+
|
data/Rakefile
CHANGED
@@ -33,13 +33,3 @@ Rake::TestTask.new(:test) do |test|
|
|
33
33
|
end
|
34
34
|
|
35
35
|
task :default => :test
|
36
|
-
|
37
|
-
require 'rake/rdoctask'
|
38
|
-
Rake::RDocTask.new do |rdoc|
|
39
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
-
|
41
|
-
rdoc.rdoc_dir = 'rdoc'
|
42
|
-
rdoc.title = "bio-blah #{version}"
|
43
|
-
rdoc.rdoc_files.include('README*')
|
44
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
-
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/bin/signalp.rb
CHANGED
@@ -9,95 +9,215 @@ require 'optparse'
|
|
9
9
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
10
10
|
require 'bio-signalp'
|
11
11
|
|
12
|
-
runner = Bio::SignalP::Wrapper.new
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
13
|
+
|
14
|
+
# Parse command line options into the options hash
|
15
|
+
SCRIPT_NAME = File.basename(__FILE__)
|
16
|
+
options = {
|
17
|
+
:logger => 'stderr',
|
18
|
+
:binary_path => nil,
|
19
|
+
}
|
20
|
+
o = OptionParser.new do |opts|
|
21
|
+
opts.banner = "
|
22
|
+
Usage: #{SCRIPT_NAME} my.fasta
|
23
|
+
|
24
|
+
my.fasta is the name of the fasta file you want to analyse. Default output is all the sequences with their signal sequences cleaved.
|
25
|
+
|
26
|
+
This default output can be changed by using one (only) of -s, -S, -v, -f, -F.\n\n"
|
27
|
+
|
28
|
+
opts.on("-s", "--summary", "print a tab separated table indicating if the sequence had a signal peptide results (if Signalp 3 is used, HMM and NN predictions are both given, respectively [default: no]") do |arg|
|
29
|
+
options['s'] = true
|
30
|
+
end
|
31
|
+
opts.on("-S", "--bigger-summary", "like -s, except also includes where the cleavage site is predicted [default: no]") do
|
32
|
+
options['S'] = true
|
33
|
+
end
|
34
|
+
opts.on("-v", "--verbose-summary", "much like -s except more details of the prediction are predicted [default: no]") do
|
35
|
+
options['c'] = true
|
36
|
+
end
|
37
|
+
opts.on("-f", "--filter-in", "filter in: print those sequences that have a signal peptide [default: no]") do
|
38
|
+
options['f'] = true
|
39
|
+
end
|
40
|
+
opts.on("-F", "--filter-out", "filter out: print those sequences that don't have a signal peptide [default: no]") do
|
41
|
+
options['F'] = true
|
42
|
+
end
|
43
|
+
opts.on("-b", "--binary-path SIGNALP_PATH", "path to the signalp binary e.g. /usr/local/bin/signalp-4.0/signalp [default: 'signalp' i.e. whatever is on the PATH]") do |arg|
|
44
|
+
options[:binary_path] = arg
|
45
|
+
end
|
46
|
+
end
|
47
|
+
o.parse!
|
48
|
+
if ARGV.length > 1
|
49
|
+
$stderr.puts o
|
50
|
+
exit 1
|
24
51
|
end
|
25
52
|
|
26
|
-
# Print headers if required
|
27
|
-
if options['s']
|
28
|
-
puts [
|
29
|
-
'Name',
|
30
|
-
'NN Prediction',
|
31
|
-
'HMM Prediction'
|
32
|
-
].join("\t")
|
33
|
-
elsif options['S']
|
34
|
-
puts [
|
35
|
-
'Name',
|
36
|
-
'NN Prediction',
|
37
|
-
'HMM Prediction',
|
38
|
-
'Predicted?',
|
39
|
-
'Cleavege site (if predicted)'
|
40
|
-
].join("\t")
|
41
53
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
46
|
-
# :nn_Smean, :nn_Smean_prediction,
|
47
|
-
# :nn_D, :nn_D_prediction]
|
48
|
-
# @@hmm_results = [
|
49
|
-
# :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
|
50
|
-
puts [
|
51
|
-
'Name',
|
52
|
-
'NN Cmax',
|
53
|
-
'NN Cmax position',
|
54
|
-
'NN Cmax prediction',
|
55
|
-
'NN Ymax',
|
56
|
-
'NN Ymax position',
|
57
|
-
'NN Ymax prediction',
|
58
|
-
'NN Smax',
|
59
|
-
'NN Smax position',
|
60
|
-
'NN Smax prediction',
|
61
|
-
'NN Smean',
|
62
|
-
'NN Smean prediction',
|
63
|
-
'NN D',
|
64
|
-
'NN D prediction',
|
65
|
-
'HMM result',
|
66
|
-
'HMM Cmax',
|
67
|
-
'HMM Cmax position',
|
68
|
-
'HMM Cmax prediction',
|
69
|
-
'HMM Sprob',
|
70
|
-
'HMM Sprob prediction',
|
71
|
-
].join("\t")
|
72
|
-
end
|
54
|
+
printed_header = false
|
55
|
+
signalp_version = nil
|
56
|
+
runner = Bio::SignalP::Wrapper.new
|
73
57
|
|
74
58
|
Bio::FlatFile.open(ARGF).each do |seq|
|
75
|
-
result = runner.calculate(seq.seq)
|
59
|
+
result = runner.calculate(seq.seq, :binary_path => options[:binary_path])
|
76
60
|
if result.nil?
|
77
61
|
$stderr.puts "Unexpected empty sequence detected, ignoring: #{seq.definition}"
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
62
|
+
next
|
63
|
+
end
|
64
|
+
|
65
|
+
if !printed_header
|
66
|
+
printed_header = true
|
67
|
+
|
68
|
+
# Different headers are printed for the different versions (if at all)
|
69
|
+
if result.kind_of?(Bio::SignalP::Version3::Result)
|
70
|
+
signalp_version = 3
|
71
|
+
if options['s']
|
72
|
+
puts [
|
73
|
+
'Name',
|
74
|
+
'NN Prediction',
|
75
|
+
'HMM Prediction'
|
76
|
+
].join("\t")
|
77
|
+
elsif options['S']
|
78
|
+
puts [
|
79
|
+
'Name',
|
80
|
+
'NN Prediction',
|
81
|
+
'HMM Prediction',
|
82
|
+
'Predicted?',
|
83
|
+
'Cleavege site (if predicted)'
|
84
|
+
].join("\t")
|
85
|
+
|
86
|
+
elsif options['v']
|
87
|
+
# [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
|
88
|
+
# :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
|
89
|
+
# :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
90
|
+
# :nn_Smean, :nn_Smean_prediction,
|
91
|
+
# :nn_D, :nn_D_prediction]
|
92
|
+
# @@hmm_results = [
|
93
|
+
# :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
|
94
|
+
puts [
|
95
|
+
'Name',
|
96
|
+
'NN Cmax',
|
97
|
+
'NN Cmax position',
|
98
|
+
'NN Cmax prediction',
|
99
|
+
'NN Ymax',
|
100
|
+
'NN Ymax position',
|
101
|
+
'NN Ymax prediction',
|
102
|
+
'NN Smax',
|
103
|
+
'NN Smax position',
|
104
|
+
'NN Smax prediction',
|
105
|
+
'NN Smean',
|
106
|
+
'NN Smean prediction',
|
107
|
+
'NN D',
|
108
|
+
'NN D prediction',
|
109
|
+
'HMM result',
|
110
|
+
'HMM Cmax',
|
111
|
+
'HMM Cmax position',
|
112
|
+
'HMM Cmax prediction',
|
113
|
+
'HMM Sprob',
|
114
|
+
'HMM Sprob prediction',
|
115
|
+
].join("\t")
|
116
|
+
end
|
117
|
+
|
118
|
+
elsif result.kind_of?(Bio::SignalP::Version4::Result)
|
119
|
+
signalp_version = 4
|
120
|
+
|
121
|
+
if options['s']
|
122
|
+
puts [
|
123
|
+
'Name',
|
124
|
+
'Predicted?',
|
125
|
+
].join("\t")
|
126
|
+
elsif options['S']
|
127
|
+
puts [
|
128
|
+
'Name',
|
129
|
+
'Predicted?',
|
130
|
+
'Cleavege site (if predicted)'
|
131
|
+
].join("\t")
|
132
|
+
|
133
|
+
elsif options['v']
|
134
|
+
#:Cmax, :Cmax_position,
|
135
|
+
#:Ymax, :Ymax_position,
|
136
|
+
#:Smax, :Smax_position,
|
137
|
+
#:Smean,
|
138
|
+
#:D,
|
139
|
+
#:prediction,
|
140
|
+
#:Dmaxcut,
|
141
|
+
#:networks_used
|
142
|
+
puts [
|
143
|
+
'Name',
|
144
|
+
'Cmax',
|
145
|
+
'Cmax position',
|
146
|
+
'Ymax',
|
147
|
+
'Ymax position',
|
148
|
+
'Smax',
|
149
|
+
'Smax position',
|
150
|
+
'Smean',
|
151
|
+
'D',
|
152
|
+
'prediction',
|
153
|
+
'Dmaxcut',
|
154
|
+
'networks_used'
|
155
|
+
].join("\t")
|
156
|
+
end
|
157
|
+
else
|
158
|
+
raise "Unexpected bio-signalp result object seen: #{result.inspect}"
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
if options['s']
|
163
|
+
if signalp_version == 3
|
164
|
+
puts [
|
165
|
+
seq.entry_id,
|
166
|
+
result.nn_D_prediction ? 'T' : 'F',
|
167
|
+
result.hmm_Sprob_prediction ? 'T' : 'F'
|
168
|
+
].join("\t")
|
169
|
+
elsif signalp_version == 4
|
170
|
+
puts [
|
171
|
+
seq.entry_id,
|
172
|
+
result.prediction ? 'T' : 'F',
|
173
|
+
].join("\t")
|
174
|
+
else
|
175
|
+
raise "Programming error"
|
176
|
+
end
|
84
177
|
elsif options['S']
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
178
|
+
if signalp_version == 3
|
179
|
+
puts [
|
180
|
+
seq.entry_id,
|
181
|
+
result.nn_D_prediction ? 'T' : 'F',
|
182
|
+
result.hmm_Sprob_prediction ? 'T' : 'F',
|
183
|
+
result.signal? ? 'T' : 'F',
|
184
|
+
result.signal? ? result.cleavage_site : 0,
|
185
|
+
].join("\t")
|
186
|
+
elsif signalp_version == 4
|
187
|
+
puts [
|
188
|
+
seq.entry_id,
|
189
|
+
result.signal? ? 'T' : 'F',
|
190
|
+
result.signal? ? result.cleavage_site : 0,
|
191
|
+
].join("\t")
|
192
|
+
else
|
193
|
+
raise "Programming error"
|
194
|
+
end
|
195
|
+
|
92
196
|
elsif options['v']
|
93
197
|
taputs = [seq.definition]
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
198
|
+
extras = []
|
199
|
+
if signalp_version == 3
|
200
|
+
extras = [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
|
201
|
+
:nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
|
202
|
+
:nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
203
|
+
:nn_Smean, :nn_Smean_prediction,
|
204
|
+
:nn_D, :nn_D_prediction,
|
205
|
+
:hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction,
|
206
|
+
:hmm_Sprob, :hmm_Sprob_prediction]
|
207
|
+
elsif signalp_version == 4
|
208
|
+
extras = [
|
209
|
+
:Cmax, :Cmax_position,
|
210
|
+
:Ymax, :Ymax_position,
|
211
|
+
:Smax, :Smax_position,
|
212
|
+
:Smean,
|
213
|
+
:D,
|
214
|
+
:prediction,
|
215
|
+
:Dmaxcut,
|
216
|
+
:networks_used
|
217
|
+
]
|
218
|
+
end
|
219
|
+
|
220
|
+
extras.each do |meth|
|
101
221
|
taputs.push result.send(meth)
|
102
222
|
end
|
103
223
|
puts taputs.join("\t")
|
data/bio-signalp.gemspec
CHANGED
@@ -5,36 +5,38 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-signalp"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ben J Woodcroft"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-08-18"
|
13
13
|
s.description = "A wrapper for the signal peptide prediction algorith SignalP. Not very well supported, but seems to work for the author, at least."
|
14
14
|
s.email = "donttrustben near gmail.com"
|
15
15
|
s.executables = ["signalp.rb"]
|
16
16
|
s.extra_rdoc_files = [
|
17
17
|
"LICENSE.txt",
|
18
|
-
"README.
|
18
|
+
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
21
|
".document",
|
22
22
|
"Gemfile",
|
23
23
|
"LICENSE.txt",
|
24
|
-
"README.
|
24
|
+
"README.md",
|
25
25
|
"Rakefile",
|
26
26
|
"VERSION",
|
27
27
|
"bin/signalp.rb",
|
28
28
|
"bio-signalp.gemspec",
|
29
29
|
"lib/bio-signalp.rb",
|
30
|
-
"lib/bio/appl/
|
30
|
+
"lib/bio/appl/common.rb",
|
31
|
+
"lib/bio/appl/signalp3.rb",
|
32
|
+
"lib/bio/appl/signalp4.rb",
|
31
33
|
"test/helper.rb",
|
32
34
|
"test/test_bio-signalp.rb"
|
33
35
|
]
|
34
36
|
s.homepage = "http://github.com/wwood/bioruby-signalp"
|
35
37
|
s.licenses = ["MIT"]
|
36
38
|
s.require_paths = ["lib"]
|
37
|
-
s.rubygems_version = "1.8.
|
39
|
+
s.rubygems_version = "1.8.24"
|
38
40
|
s.summary = "A wrapper for the signal peptide prediction algorith SignalP"
|
39
41
|
|
40
42
|
if s.respond_to? :specification_version then
|
@@ -42,26 +44,26 @@ Gem::Specification.new do |s|
|
|
42
44
|
|
43
45
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
44
46
|
s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
|
47
|
+
s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
|
45
48
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
46
|
-
s.add_development_dependency(%q<bundler>, ["
|
47
|
-
s.add_development_dependency(%q<jeweler>, ["
|
48
|
-
s.add_development_dependency(%q<
|
49
|
-
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
49
|
+
s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
|
50
|
+
s.add_development_dependency(%q<jeweler>, [">= 1.6.0"])
|
51
|
+
s.add_development_dependency(%q<rdoc>, [">= 3.12"])
|
50
52
|
else
|
51
53
|
s.add_dependency(%q<bio>, [">= 1.4.1"])
|
54
|
+
s.add_dependency(%q<bio-logger>, [">= 0"])
|
52
55
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
53
|
-
s.add_dependency(%q<bundler>, ["
|
54
|
-
s.add_dependency(%q<jeweler>, ["
|
55
|
-
s.add_dependency(%q<
|
56
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
56
|
+
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
57
|
+
s.add_dependency(%q<jeweler>, [">= 1.6.0"])
|
58
|
+
s.add_dependency(%q<rdoc>, [">= 3.12"])
|
57
59
|
end
|
58
60
|
else
|
59
61
|
s.add_dependency(%q<bio>, [">= 1.4.1"])
|
62
|
+
s.add_dependency(%q<bio-logger>, [">= 0"])
|
60
63
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
61
|
-
s.add_dependency(%q<bundler>, ["
|
62
|
-
s.add_dependency(%q<jeweler>, ["
|
63
|
-
s.add_dependency(%q<
|
64
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
64
|
+
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
65
|
+
s.add_dependency(%q<jeweler>, [">= 1.6.0"])
|
66
|
+
s.add_dependency(%q<rdoc>, [">= 3.12"])
|
65
67
|
end
|
66
68
|
end
|
67
69
|
|
data/lib/bio-signalp.rb
CHANGED
@@ -8,4 +8,9 @@
|
|
8
8
|
# and put your plugin's code there. It is bad practice to write other code
|
9
9
|
# directly into this file, because doing so causes confusion if this biogem
|
10
10
|
# was ever to get merged into the main bioruby tree.
|
11
|
-
require 'bio/appl/
|
11
|
+
require 'bio/appl/common'
|
12
|
+
require 'bio/appl/signalp3'
|
13
|
+
require 'bio/appl/signalp4'
|
14
|
+
|
15
|
+
require 'bio-logger'
|
16
|
+
Bio::Log::LoggerPlus.new('bio-signalp')
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Bio
|
2
|
+
module SignalP
|
3
|
+
NUM_FIELDS_IN_VERSION3_SHORT_OUTPUT = 21
|
4
|
+
NUM_FIELDS_IN_VERSION4_SHORT_OUTPUT = 12
|
5
|
+
|
6
|
+
class Wrapper
|
7
|
+
def log
|
8
|
+
log = Bio::Log::LoggerPlus['bio-signalp']
|
9
|
+
end
|
10
|
+
|
11
|
+
# Given an amino acid sequence, return a SignalP Result
|
12
|
+
# representing it taken from the file. The version of SignalP used
|
13
|
+
# is auto-detected (versions 3 and 4 are supported)
|
14
|
+
#
|
15
|
+
# options:
|
16
|
+
# :binary_path: full path to signalp binary e.g. '/usr/local/bin/signalp-4.0/signalp' [default: 'signalp' i.e. signalp is in the PATH]
|
17
|
+
#
|
18
|
+
# Returns nil if the sequence is empty
|
19
|
+
def calculate(sequence, options={})
|
20
|
+
return nil if sequence.nil? or sequence == ''
|
21
|
+
|
22
|
+
default_options = {
|
23
|
+
:binary_path => 'signalp'
|
24
|
+
}
|
25
|
+
options = default_options.merge options
|
26
|
+
raise "Unexpected option parameters passed in #{options.inspect}" unless options.length == default_options.length
|
27
|
+
options[:binary_path] ||= default_options[:binary_path] #in case nil is passed here
|
28
|
+
|
29
|
+
# This command needs to work with all versions of SignalP (currently v3 and v4)
|
30
|
+
command = "#{options[:binary_path]} -f short -t euk"
|
31
|
+
log.debug "Running signalp command: #{command}" if log.debug?
|
32
|
+
Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
|
33
|
+
stdin.puts '>wrapperSeq'
|
34
|
+
stdin.puts "#{sequence}"
|
35
|
+
stdin.close
|
36
|
+
|
37
|
+
result = stdout.readlines
|
38
|
+
error = stderr.readlines
|
39
|
+
|
40
|
+
unless error.empty?
|
41
|
+
raise Exception, "There appears to be a problem while running signalp:\n#{error}"
|
42
|
+
end
|
43
|
+
|
44
|
+
# Error checking
|
45
|
+
num_expected_result_lines = 3
|
46
|
+
unless result.length == num_expected_result_lines
|
47
|
+
raise Exception, "Unexpected number of lines found in SignalP output (#{result.length}, expected #{num_expected_result_lines}):\n#{result}"
|
48
|
+
end
|
49
|
+
|
50
|
+
splits = result[2].strip.split(/[ \t]+/)
|
51
|
+
if splits.length == NUM_FIELDS_IN_VERSION3_SHORT_OUTPUT
|
52
|
+
# SignalP 3 detected, use that
|
53
|
+
log.debug "Detected SignalP version 3 type output, parsing" if log.debug?
|
54
|
+
return Bio::SignalP::Version3::Result.create_from_line(result[2].strip)
|
55
|
+
elsif splits.length == NUM_FIELDS_IN_VERSION4_SHORT_OUTPUT
|
56
|
+
log.debug "Detected SignalP version 4 type output, parsing" if log.debug?
|
57
|
+
return Bio::SignalP::Version4::Result.create_from_line(result[2].strip)
|
58
|
+
else
|
59
|
+
error_description = "Bad SignalP output line found. Are you using SignalP 3.0 or 4.0? (found #{splits.length} fields in the third line of the output):\n#{result[2]}"
|
60
|
+
log.error error_description
|
61
|
+
raise Exception, error_description
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# A module for methods common to different SignalP version Result classes.
|
68
|
+
module Common
|
69
|
+
# Given an amino acid sequence (as a string),
|
70
|
+
# chop it off and return the remnants. Requires that the cleavage_site
|
71
|
+
# method be implemented
|
72
|
+
def cleave(sequence)
|
73
|
+
if signal?
|
74
|
+
return sequence[cleavage_site-1..sequence.length-1]
|
75
|
+
else
|
76
|
+
return sequence
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Simple method: 'Y' => true, 'N' => false, else nil
|
81
|
+
def to_bool(string)
|
82
|
+
if string === 'Y'
|
83
|
+
return true
|
84
|
+
elsif string === 'N'
|
85
|
+
return false
|
86
|
+
else
|
87
|
+
return nil
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
|
2
|
+
require 'open3'
|
3
|
+
|
4
|
+
# Wrapper around a locally installed SignalP program
|
5
|
+
module Bio
|
6
|
+
module SignalP
|
7
|
+
class Version3
|
8
|
+
# The result of a SignalP program. Create using the output from
|
9
|
+
# -format short output and create_from_line()
|
10
|
+
class Result
|
11
|
+
include Bio::SignalP::Common
|
12
|
+
|
13
|
+
@@nn_results =
|
14
|
+
[:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
|
15
|
+
:nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
|
16
|
+
:nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
17
|
+
:nn_Smean, :nn_Smean_prediction,
|
18
|
+
:nn_D, :nn_D_prediction]
|
19
|
+
@@hmm_results = [
|
20
|
+
:hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
|
21
|
+
|
22
|
+
@@nn_results.each do |sym|
|
23
|
+
attr_accessor sym
|
24
|
+
end
|
25
|
+
@@hmm_results.each do |sym|
|
26
|
+
attr_accessor sym
|
27
|
+
end
|
28
|
+
|
29
|
+
# Create a new SignalpResult using a line from the signal p 'short' output format,
|
30
|
+
# version 3.0
|
31
|
+
def self.create_from_line(line)
|
32
|
+
# e.g.
|
33
|
+
# # name Cmax pos ? Ymax pos ? Smax pos ? Smean ? D ? # name ! Cmax pos ? Sprob ?
|
34
|
+
# 526.m04658 0.734 19 Y 0.686 19 Y 0.933 6 Y 0.760 Y 0.723 Y 526.m04658 Q 0.037 19 N 0.004 N
|
35
|
+
matches = line.split(/[ \t]+/)
|
36
|
+
if matches.length != Bio::SignalP::NUM_FIELDS_IN_VERSION3_SHORT_OUTPUT
|
37
|
+
raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
|
38
|
+
end
|
39
|
+
|
40
|
+
i = 1
|
41
|
+
result = Result.new
|
42
|
+
result.nn_Cmax = matches[i].to_f; i += 1
|
43
|
+
result.nn_Cmax_position = matches[i].to_i; i += 1
|
44
|
+
result.nn_Cmax_prediction = result.to_bool matches[i]; i += 1
|
45
|
+
result.nn_Ymax = matches[i].to_f; i += 1
|
46
|
+
result.nn_Ymax_position = matches[i].to_i; i += 1
|
47
|
+
result.nn_Ymax_prediction = result.to_bool matches[i]; i += 1
|
48
|
+
result.nn_Smax = matches[i].to_f; i += 1
|
49
|
+
result.nn_Smax_position = matches[i].to_i; i += 1
|
50
|
+
result.nn_Smax_prediction = result.to_bool matches[i]; i += 1
|
51
|
+
result.nn_Smean = matches[i].to_f; i += 1
|
52
|
+
result.nn_Smean_prediction = result.to_bool matches[i]; i += 1
|
53
|
+
result.nn_D = matches[i].to_f; i += 1
|
54
|
+
result.nn_D_prediction = result.to_bool matches[i]; i += 1
|
55
|
+
|
56
|
+
i+= 1
|
57
|
+
result.hmm_result = matches[i]; i += 1
|
58
|
+
result.hmm_Cmax = matches[i].to_f; i += 1
|
59
|
+
result.hmm_Cmax_position = matches[i].to_i; i += 1
|
60
|
+
result.hmm_Cmax_prediction = result.to_bool matches[i]; i += 1
|
61
|
+
result.hmm_Sprob = matches[i].to_f; i += 1
|
62
|
+
result.hmm_Sprob_prediction = result.to_bool matches[i]; i += 1
|
63
|
+
|
64
|
+
return result
|
65
|
+
end
|
66
|
+
|
67
|
+
# Does it have a signal peptide? It can be this class (default),
|
68
|
+
# or another class that responds to :nn_D_prediction and :hmm_Sprob_prediction
|
69
|
+
def signal?(clazz=self)
|
70
|
+
return (clazz.send(:nn_D_prediction) or clazz.send(:hmm_Sprob_prediction))
|
71
|
+
end
|
72
|
+
|
73
|
+
def classical_signal_sequence?
|
74
|
+
return @nn_D_prediction
|
75
|
+
end
|
76
|
+
|
77
|
+
def signal_anchor?
|
78
|
+
return @hmm_Sprob_prediction
|
79
|
+
end
|
80
|
+
|
81
|
+
# Return an array of all the results. NN then HMM, as per SignalP short format
|
82
|
+
def all_results
|
83
|
+
all = []
|
84
|
+
|
85
|
+
@@nn_results.each do |sym|
|
86
|
+
all.push self.send(sym)
|
87
|
+
end
|
88
|
+
|
89
|
+
@@hmm_results.each do |sym|
|
90
|
+
all.push self.send(sym)
|
91
|
+
end
|
92
|
+
|
93
|
+
return all
|
94
|
+
end
|
95
|
+
|
96
|
+
# Return an array of symbols representing the names of the columns
|
97
|
+
def self.all_result_names
|
98
|
+
return [@@nn_results, @@hmm_results].flatten
|
99
|
+
end
|
100
|
+
|
101
|
+
# Return the number of the residue after the cleavage site
|
102
|
+
# ie. the first residue of the mature protein
|
103
|
+
# Taken from the Y score, as it was decided this is the best prediction
|
104
|
+
def cleavage_site
|
105
|
+
@nn_Ymax_position
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
|
2
|
+
require 'open3'
|
3
|
+
|
4
|
+
# Wrapper around a locally installed SignalP program
|
5
|
+
module Bio
|
6
|
+
module SignalP
|
7
|
+
class Version4
|
8
|
+
# The result of a SignalP program. Create using the output from
|
9
|
+
# -format short output and create_from_line()
|
10
|
+
class Result
|
11
|
+
include Bio::SignalP::Common
|
12
|
+
|
13
|
+
@@output_fields = [
|
14
|
+
:Cmax, :Cmax_position,
|
15
|
+
:Ymax, :Ymax_position,
|
16
|
+
:Smax, :Smax_position,
|
17
|
+
:Smean,
|
18
|
+
:D,
|
19
|
+
:prediction,
|
20
|
+
:Dmaxcut,
|
21
|
+
:networks_used,
|
22
|
+
]
|
23
|
+
|
24
|
+
@@output_fields.each do |sym|
|
25
|
+
attr_accessor sym
|
26
|
+
end
|
27
|
+
|
28
|
+
# Create a new SignalpResult using a line from the signal p 'short' output format,
|
29
|
+
def self.create_from_line(line)
|
30
|
+
# e.g.
|
31
|
+
#$ ~/bioinfo/signalp-4.0/signalp /tmp/acp
|
32
|
+
## SignalP-4.0 euk predictions
|
33
|
+
## name Cmax pos Ymax pos Smax pos Smean D ? Dmaxcut Networks-used
|
34
|
+
#acp 0.871 17 0.863 17 0.886 1 0.844 0.853 Y 0.450 SignalP-noTM
|
35
|
+
matches = line.split(/[ \t]+/)
|
36
|
+
if matches.length != Bio::SignalP::NUM_FIELDS_IN_VERSION4_SHORT_OUTPUT
|
37
|
+
raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
|
38
|
+
end
|
39
|
+
|
40
|
+
i = 1
|
41
|
+
result = Result.new
|
42
|
+
result.Cmax = matches[i].to_f; i += 1
|
43
|
+
result.Cmax_position = matches[i].to_i; i += 1
|
44
|
+
result.Ymax = matches[i].to_f; i += 1
|
45
|
+
result.Ymax_position = matches[i].to_i; i += 1
|
46
|
+
result.Smax = matches[i].to_f; i += 1
|
47
|
+
result.Smax_position = matches[i].to_i; i += 1
|
48
|
+
result.Smean = matches[i].to_f; i += 1
|
49
|
+
result.D = matches[i].to_f; i += 1
|
50
|
+
result.prediction = result.to_bool matches[i]; i += 1
|
51
|
+
result.Dmaxcut = matches[i].to_f; i += 1
|
52
|
+
result.networks_used = matches[i]; i += 1
|
53
|
+
|
54
|
+
return result
|
55
|
+
end
|
56
|
+
|
57
|
+
# Does it have a signal peptide? It can be this class (default),
|
58
|
+
# or another class that responds to :nn_D_prediction and :hmm_Sprob_prediction
|
59
|
+
def signal?(clazz=self)
|
60
|
+
return clazz.send(:prediction)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Return the number of the residue after the cleavage site
|
64
|
+
# ie. the first residue of the mature protein
|
65
|
+
# Taken from the Y score, as it was decided this is the best prediction
|
66
|
+
def cleavage_site
|
67
|
+
@Ymax_position
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/test/test_bio-signalp.rb
CHANGED
@@ -1,7 +1,20 @@
|
|
1
1
|
require 'helper'
|
2
2
|
require 'open3'
|
3
3
|
|
4
|
+
@@signalp3_path = File.join(ENV['HOME'],'bioinfo','signalp-3.0','signalp')
|
5
|
+
@@signalp4_path = File.join(ENV['HOME'],'bioinfo','signalp-4.0','signalp')
|
6
|
+
@@binaries = [
|
7
|
+
@@signalp3_path,
|
8
|
+
@@signalp4_path,
|
9
|
+
]
|
10
|
+
|
4
11
|
class TestBioSignalp < Test::Unit::TestCase
|
12
|
+
def setup
|
13
|
+
log_name = 'bio-signalp'
|
14
|
+
Bio::Log::CLI.logger('stderr')
|
15
|
+
#Bio::Log::CLI.configure(log_name) # when commented out no debug is printed out
|
16
|
+
end
|
17
|
+
|
5
18
|
should "positive control" do
|
6
19
|
# Known to have a signal peptide
|
7
20
|
acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
|
@@ -15,15 +28,54 @@ class TestBioSignalp < Test::Unit::TestCase
|
|
15
28
|
|
16
29
|
assert_equal false, Bio::SignalP::Wrapper.new.calculate(non_signal_sequence).signal?
|
17
30
|
end
|
31
|
+
|
32
|
+
should 'work with different SignalP versions, this test is specific to Ben\'s machines probably' do
|
33
|
+
binaries = {
|
34
|
+
@@binaries[0] => Bio::SignalP::Version3::Result,
|
35
|
+
@@binaries[1] => Bio::SignalP::Version4::Result,
|
36
|
+
}
|
37
|
+
binaries.each do |binary, clazz|
|
38
|
+
acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
|
39
|
+
positive_result = Bio::SignalP::Wrapper.new.calculate(acp_sequence, :binary_path => binary)
|
40
|
+
assert_equal true, positive_result.signal?, binary
|
41
|
+
assert_kind_of clazz, positive_result, binary
|
42
|
+
assert_equal 17, positive_result.cleavage_site, binary
|
43
|
+
assert_equal 'FKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ',
|
44
|
+
positive_result.cleave(acp_sequence), binary
|
45
|
+
non_signal_sequence = 'KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'
|
46
|
+
assert_equal false, Bio::SignalP::Wrapper.new.calculate(non_signal_sequence, :binary_path => binary).signal?, binary
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
should 'give different predictions for v3 and v4' do
|
51
|
+
different = <<EOF
|
52
|
+
MMKGMVEAAVKWMKAGLPLRILKLVLYAKVIDGDLQGHSLRRFSEVLKTFTELKERYEMQ
|
53
|
+
LLLPKAVPLEFDVYLSFSEEDKEVAKVIREKLSGAKDGVRIYDSSHQGINKDTVFQEDMY
|
54
|
+
SIMMKSARVVTVLSPNYLRNKACIEQYNIALCCNRRALRDMLAPIYVDSVEMMPTYMGLV
|
55
|
+
QYVDCRPHDPSKIGEACSQLTVSLSVTFHTELRVAEFDPLRYDVFLSYSHRDTEKANRFV
|
56
|
+
EMLQKLAPDLKLFFDVQELKTGKSWQRTLYHSIDGSRCMLALISEPYLKSAVCQEEFALA
|
57
|
+
QAKHCAKGKQHLQLISICLDDLDTIQPEFTHIPMVKGTPDVFDDMVKTVCPAVIQWLNGE
|
58
|
+
RVDQTETIKTLFDDQNITTLSADAEMEKFRQSHFQKEFGTQDSLISSKSPFPPKLSDILP
|
59
|
+
EPGKEVKDKRPKSASDCDLIFSYHSDDEKYVSFIVKILQLNAPSLKVKAVCSDENKLSAF
|
60
|
+
ERAHCIVPVLSPNYLESPECVEEFHIAIWRQRISNPEAGALLPICVHTLPQKPTYFHLVQ
|
61
|
+
SAVSMTDGIWAQLSGQHNFGLPQKIVDFSLAAGKTPLSQVDMLALYMAAYYILKRFAKAR
|
62
|
+
SCKEVEFSMKPALFNIMKLQEQIKQLTQPDYTDQLCKTLLEIHFNDIPDSWREGGHYLRP
|
63
|
+
KEPEDTTPAIMAPVEGASDSQTHNALNERTQDSSGKDKSEVQGEKAQRSASEGVSHDTVQ
|
64
|
+
QMDQDHDLKAAQGQDTAEGQDAAEGQDSEGGQDSEGGWDSDGGRDSVGNKTDAEKITGNH
|
65
|
+
GDEGGDGQVGGSEESKEKTEDEDEDISKRNRAPRSVACVCL
|
66
|
+
EOF
|
67
|
+
assert_equal true, Bio::SignalP::Wrapper.new.calculate(different, :binary_path => @@signalp3_path).signal?
|
68
|
+
assert_equal false, Bio::SignalP::Wrapper.new.calculate(different, :binary_path => @@signalp4_path).signal?
|
69
|
+
end
|
18
70
|
end
|
19
71
|
|
20
72
|
class TestSignalPScript < Test::Unit::TestCase
|
21
|
-
|
73
|
+
# Known to have a signal peptide
|
74
|
+
acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
|
22
75
|
|
23
76
|
should "positive control" do
|
24
|
-
|
25
|
-
|
26
|
-
|
77
|
+
command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
|
78
|
+
|
27
79
|
Open3.popen3(command) do |stdin, stdout, stderr|
|
28
80
|
stdin.puts '>positive'
|
29
81
|
stdin.puts acp_sequence
|
@@ -37,7 +89,7 @@ class TestSignalPScript < Test::Unit::TestCase
|
|
37
89
|
end
|
38
90
|
|
39
91
|
should "return gracefully when empty sequences are given" do
|
40
|
-
|
92
|
+
command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
|
41
93
|
|
42
94
|
Open3.popen3(command) do |stdin, stdout, stderr|
|
43
95
|
stdin.puts '>positive'
|
@@ -54,4 +106,54 @@ class TestSignalPScript < Test::Unit::TestCase
|
|
54
106
|
">positive2\n", "FKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ\n"], @result
|
55
107
|
assert_equal ["Unexpected empty sequence detected, ignoring: empty\n"], @error
|
56
108
|
end
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
should 'give the right -s output for signalp 3' do
|
114
|
+
command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
|
115
|
+
|
116
|
+
# This also tests the -b flag
|
117
|
+
command = "#{command} -b #{@@signalp3_path} -s"
|
118
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
119
|
+
stdin.puts '>positive'
|
120
|
+
stdin.puts acp_sequence
|
121
|
+
stdin.close
|
122
|
+
|
123
|
+
@result = stdout.readlines # convert to string?
|
124
|
+
@error = stderr.readlines
|
125
|
+
end
|
126
|
+
|
127
|
+
expected = [
|
128
|
+
"Name\tNN Prediction\tHMM Prediction\n",
|
129
|
+
"positive\tT\tT\n"
|
130
|
+
]
|
131
|
+
assert_equal expected, @result
|
132
|
+
assert_equal [], @error
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
should 'give the right -s output for signalp 4' do
|
139
|
+
command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
|
140
|
+
|
141
|
+
command = "#{command} -b #{@@signalp4_path} -s"
|
142
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
143
|
+
stdin.puts '>positive'
|
144
|
+
stdin.puts acp_sequence
|
145
|
+
stdin.close
|
146
|
+
|
147
|
+
@result = stdout.readlines # convert to string?
|
148
|
+
@error = stderr.readlines
|
149
|
+
end
|
150
|
+
|
151
|
+
expected = [
|
152
|
+
"Name\tPredicted?\n",
|
153
|
+
"positive\tT\n"
|
154
|
+
]
|
155
|
+
assert_equal [], @error
|
156
|
+
assert_equal expected, @result
|
157
|
+
end
|
158
|
+
|
57
159
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-signalp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,31 @@ dependencies:
|
|
21
21
|
version: 1.4.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bio-logger
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
25
46
|
- !ruby/object:Gem::Dependency
|
26
47
|
name: shoulda
|
27
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
28
49
|
none: false
|
29
50
|
requirements:
|
30
51
|
- - ! '>='
|
@@ -32,51 +53,60 @@ dependencies:
|
|
32
53
|
version: '0'
|
33
54
|
type: :development
|
34
55
|
prerelease: false
|
35
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
36
62
|
- !ruby/object:Gem::Dependency
|
37
63
|
name: bundler
|
38
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
39
65
|
none: false
|
40
66
|
requirements:
|
41
|
-
- -
|
67
|
+
- - ! '>='
|
42
68
|
- !ruby/object:Gem::Version
|
43
69
|
version: 1.0.0
|
44
70
|
type: :development
|
45
71
|
prerelease: false
|
46
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.0.0
|
47
78
|
- !ruby/object:Gem::Dependency
|
48
79
|
name: jeweler
|
49
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
50
81
|
none: false
|
51
82
|
requirements:
|
52
|
-
- -
|
83
|
+
- - ! '>='
|
53
84
|
- !ruby/object:Gem::Version
|
54
85
|
version: 1.6.0
|
55
86
|
type: :development
|
56
87
|
prerelease: false
|
57
|
-
version_requirements:
|
58
|
-
- !ruby/object:Gem::Dependency
|
59
|
-
name: bio
|
60
|
-
requirement: &75317190 !ruby/object:Gem::Requirement
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
61
89
|
none: false
|
62
90
|
requirements:
|
63
91
|
- - ! '>='
|
64
92
|
- !ruby/object:Gem::Version
|
65
|
-
version: 1.
|
66
|
-
type: :development
|
67
|
-
prerelease: false
|
68
|
-
version_requirements: *75317190
|
93
|
+
version: 1.6.0
|
69
94
|
- !ruby/object:Gem::Dependency
|
70
95
|
name: rdoc
|
71
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
72
97
|
none: false
|
73
98
|
requirements:
|
74
|
-
- -
|
99
|
+
- - ! '>='
|
75
100
|
- !ruby/object:Gem::Version
|
76
101
|
version: '3.12'
|
77
102
|
type: :development
|
78
103
|
prerelease: false
|
79
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '3.12'
|
80
110
|
description: A wrapper for the signal peptide prediction algorith SignalP. Not very
|
81
111
|
well supported, but seems to work for the author, at least.
|
82
112
|
email: donttrustben near gmail.com
|
@@ -85,18 +115,20 @@ executables:
|
|
85
115
|
extensions: []
|
86
116
|
extra_rdoc_files:
|
87
117
|
- LICENSE.txt
|
88
|
-
- README.
|
118
|
+
- README.md
|
89
119
|
files:
|
90
120
|
- .document
|
91
121
|
- Gemfile
|
92
122
|
- LICENSE.txt
|
93
|
-
- README.
|
123
|
+
- README.md
|
94
124
|
- Rakefile
|
95
125
|
- VERSION
|
96
126
|
- bin/signalp.rb
|
97
127
|
- bio-signalp.gemspec
|
98
128
|
- lib/bio-signalp.rb
|
99
|
-
- lib/bio/appl/
|
129
|
+
- lib/bio/appl/common.rb
|
130
|
+
- lib/bio/appl/signalp3.rb
|
131
|
+
- lib/bio/appl/signalp4.rb
|
100
132
|
- test/helper.rb
|
101
133
|
- test/test_bio-signalp.rb
|
102
134
|
homepage: http://github.com/wwood/bioruby-signalp
|
@@ -114,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
114
146
|
version: '0'
|
115
147
|
segments:
|
116
148
|
- 0
|
117
|
-
hash:
|
149
|
+
hash: -969867473
|
118
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
151
|
none: false
|
120
152
|
requirements:
|
@@ -123,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
123
155
|
version: '0'
|
124
156
|
requirements: []
|
125
157
|
rubyforge_project:
|
126
|
-
rubygems_version: 1.8.
|
158
|
+
rubygems_version: 1.8.24
|
127
159
|
signing_key:
|
128
160
|
specification_version: 3
|
129
161
|
summary: A wrapper for the signal peptide prediction algorith SignalP
|
data/README.rdoc
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
= bio-signalp
|
2
|
-
|
3
|
-
A wrapper for the signal peptide prediction algorithm SignalP (version 3.0).
|
4
|
-
|
5
|
-
Using this bio-gem requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP-3.0/ has instructions on how it may be downloaded. This gem requires that the signalp executable is available from the command line (i.e. running 'signalp' works at the command), so the steps to setup
|
6
|
-
|
7
|
-
# Download SignalP 3.0 and unpack
|
8
|
-
# Modify the signalp script in the unpacked directory
|
9
|
-
# Add the unpacked directory to your path
|
10
|
-
# install this bioruby plugin (gem install bioruby-signalp)
|
11
|
-
|
12
|
-
This bio-gem includes a library for interacting with signalp programmatically, but also (as of version 0.1.1) a script as well.
|
13
|
-
|
14
|
-
NOTE: Only tested with SignalP 3.0 at this point. Hopefully 4.0 will be tested in the future.
|
15
|
-
|
16
|
-
== Contributing to bio-signalp
|
17
|
-
|
18
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
19
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
20
|
-
* Fork the project
|
21
|
-
* Start a feature/bugfix branch
|
22
|
-
* Commit and push until you are happy with your contribution
|
23
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
24
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
25
|
-
|
26
|
-
== Copyright
|
27
|
-
|
28
|
-
Copyright (c) 2011-2012 Ben J Woodcroft. See LICENSE.txt for
|
29
|
-
further details.
|
30
|
-
|
data/lib/bio/appl/signalp.rb
DELETED
@@ -1,165 +0,0 @@
|
|
1
|
-
# Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
|
2
|
-
require 'open3'
|
3
|
-
|
4
|
-
# Wrapper around a locally installed SignalP program
|
5
|
-
module Bio
|
6
|
-
class SignalP
|
7
|
-
NUM_FIELDS_IN_SHORT_OUTPUT = 21
|
8
|
-
|
9
|
-
class Wrapper
|
10
|
-
# Given an amino acid sequence, return a SignalPResult
|
11
|
-
# representing it taken from the file.
|
12
|
-
#
|
13
|
-
# Returns nil if the sequence is empty
|
14
|
-
def calculate(sequence)
|
15
|
-
return nil if sequence.nil? or sequence == ''
|
16
|
-
|
17
|
-
command = 'signalp -trunc 70 -format short -t euk'
|
18
|
-
Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
|
19
|
-
stdin.puts '>wrapperSeq'
|
20
|
-
stdin.puts "#{sequence}"
|
21
|
-
stdin.close
|
22
|
-
|
23
|
-
result = stdout.readlines
|
24
|
-
error = stderr.readlines
|
25
|
-
|
26
|
-
unless error.empty?
|
27
|
-
raise Exception, "There appears to be a problem while running signalp:\n#{error}"
|
28
|
-
end
|
29
|
-
|
30
|
-
# Error checking
|
31
|
-
num_expected_result_lines = 3
|
32
|
-
unless result.length == num_expected_result_lines
|
33
|
-
raise Exception, "Unexpected number of lines found in SignalP output (#{result.length}, expected #{num_expected_result_lines}):\n#{result}"
|
34
|
-
end
|
35
|
-
|
36
|
-
splits = result[2].strip.split(/[ \t]+/)
|
37
|
-
if splits.length != NUM_FIELDS_IN_SHORT_OUTPUT
|
38
|
-
raise Exception, "Bad SignalP output line found. Are you using SignalP 3.0? (expected #{NUM_FIELDS_IN_SHORT_OUTPUT} fields, found #{splits.length} fields):\n#{result[2]}"
|
39
|
-
end
|
40
|
-
|
41
|
-
return Result.create_from_line(result[2].strip)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
# The result of a SignalP program. Create using the output from
|
47
|
-
# -format short output and create_from_line()
|
48
|
-
class Result
|
49
|
-
@@nn_results =
|
50
|
-
[:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
|
51
|
-
:nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
|
52
|
-
:nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
53
|
-
:nn_Smean, :nn_Smean_prediction,
|
54
|
-
:nn_D, :nn_D_prediction]
|
55
|
-
@@hmm_results = [
|
56
|
-
:hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
|
57
|
-
|
58
|
-
@@nn_results.each do |sym|
|
59
|
-
attr_accessor sym
|
60
|
-
end
|
61
|
-
@@hmm_results.each do |sym|
|
62
|
-
attr_accessor sym
|
63
|
-
end
|
64
|
-
|
65
|
-
# Create a new SignalpResult using a line from the signal p 'short' output format,
|
66
|
-
# version 3.0
|
67
|
-
def self.create_from_line(line)
|
68
|
-
# e.g.
|
69
|
-
# # name Cmax pos ? Ymax pos ? Smax pos ? Smean ? D ? # name ! Cmax pos ? Sprob ?
|
70
|
-
# 526.m04658 0.734 19 Y 0.686 19 Y 0.933 6 Y 0.760 Y 0.723 Y 526.m04658 Q 0.037 19 N 0.004 N
|
71
|
-
matches = line.split(/[ \t]+/)
|
72
|
-
if matches.length != NUM_FIELDS_IN_SHORT_OUTPUT
|
73
|
-
raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
|
74
|
-
end
|
75
|
-
|
76
|
-
i = 1
|
77
|
-
result = Result.new
|
78
|
-
result.nn_Cmax = matches[i].to_f; i += 1
|
79
|
-
result.nn_Cmax_position = matches[i].to_i; i += 1
|
80
|
-
result.nn_Cmax_prediction = to_bool matches[i]; i += 1
|
81
|
-
result.nn_Ymax = matches[i].to_f; i += 1
|
82
|
-
result.nn_Ymax_position = matches[i].to_i; i += 1
|
83
|
-
result.nn_Ymax_prediction = to_bool matches[i]; i += 1
|
84
|
-
result.nn_Smax = matches[i].to_f; i += 1
|
85
|
-
result.nn_Smax_position = matches[i].to_i; i += 1
|
86
|
-
result.nn_Smax_prediction = to_bool matches[i]; i += 1
|
87
|
-
result.nn_Smean = matches[i].to_f; i += 1
|
88
|
-
result.nn_Smean_prediction = to_bool matches[i]; i += 1
|
89
|
-
result.nn_D = matches[i].to_f; i += 1
|
90
|
-
result.nn_D_prediction = to_bool matches[i]; i += 1
|
91
|
-
|
92
|
-
i+= 1
|
93
|
-
result.hmm_result = matches[i]; i += 1
|
94
|
-
result.hmm_Cmax = matches[i].to_f; i += 1
|
95
|
-
result.hmm_Cmax_position = matches[i].to_i; i += 1
|
96
|
-
result.hmm_Cmax_prediction = to_bool matches[i]; i += 1
|
97
|
-
result.hmm_Sprob = matches[i].to_f; i += 1
|
98
|
-
result.hmm_Sprob_prediction = to_bool matches[i]; i += 1
|
99
|
-
|
100
|
-
return result
|
101
|
-
end
|
102
|
-
|
103
|
-
def self.to_bool(string)
|
104
|
-
if string === 'Y'
|
105
|
-
return true
|
106
|
-
elsif string === 'N'
|
107
|
-
return false
|
108
|
-
else
|
109
|
-
return nil
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Does it have a signal peptide? It can be this class (default),
|
114
|
-
# or another class that responds to :nn_D_prediction and :hmm_Sprob_prediction
|
115
|
-
def signal?(clazz=self)
|
116
|
-
return (clazz.send(:nn_D_prediction) or clazz.send(:hmm_Sprob_prediction))
|
117
|
-
end
|
118
|
-
|
119
|
-
def classical_signal_sequence?
|
120
|
-
return @nn_D_prediction
|
121
|
-
end
|
122
|
-
|
123
|
-
def signal_anchor?
|
124
|
-
return @hmm_Sprob_prediction
|
125
|
-
end
|
126
|
-
|
127
|
-
# Return an array of all the results. NN then HMM, as per SignalP short format
|
128
|
-
def all_results
|
129
|
-
all = []
|
130
|
-
|
131
|
-
@@nn_results.each do |sym|
|
132
|
-
all.push self.send(sym)
|
133
|
-
end
|
134
|
-
|
135
|
-
@@hmm_results.each do |sym|
|
136
|
-
all.push self.send(sym)
|
137
|
-
end
|
138
|
-
|
139
|
-
return all
|
140
|
-
end
|
141
|
-
|
142
|
-
# Return an array of symbols representing the names of the columns
|
143
|
-
def self.all_result_names
|
144
|
-
return [@@nn_results, @@hmm_results].flatten
|
145
|
-
end
|
146
|
-
|
147
|
-
# Return the number of the residue after the cleavage site
|
148
|
-
# ie. the first residue of the mature protein
|
149
|
-
# Taken from the Y score, as it was decided this is the best prediction
|
150
|
-
def cleavage_site
|
151
|
-
@nn_Ymax_position
|
152
|
-
end
|
153
|
-
|
154
|
-
# Given an amino acid sequence (as a string),
|
155
|
-
# chop it off and return the remnants
|
156
|
-
def cleave(sequence)
|
157
|
-
if signal?
|
158
|
-
return sequence[cleavage_site-1..sequence.length-1]
|
159
|
-
else
|
160
|
-
return sequence
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|