bio-signalp 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
  gem "bio", ">= 1.4.1"
6
+ gem "bio-logger"
6
7
 
7
8
  # Add dependencies to develop your gem here.
8
9
  # Include everything needed to run rake, tests, features, etc.
9
10
  group :development do
10
11
  gem "shoulda", ">= 0"
11
- gem "bundler", "~> 1.0.0"
12
- gem "jeweler", "~> 1.6.0"
13
- gem "bio", ">= 1.4.1"
14
- gem "rdoc", "~> 3.12"
12
+ gem "bundler", ">= 1.0.0"
13
+ gem "jeweler", ">= 1.6.0"
14
+ gem "rdoc", ">= 3.12"
15
15
  end
@@ -0,0 +1,58 @@
1
+ # bio-signalp
2
+
3
+ A wrapper for the signal peptide prediction algorithm SignalP.
4
+
5
+ Using ```bio-signalp``` requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP/ has instructions on how it may be downloaded. This gem works best when the signalp executable is available from the command line (i.e. running 'signalp' by itself works at the commandline).
6
+
7
+ # Installation
8
+
9
+ First you need to setup SignalP itself. ```bio-signalp``` is tested with SignalP versions 3.0 and 4.0.
10
+
11
+ 1. Download SignalP and unpack the archive
12
+ 2. Modify the signalp script in the unpacked directory. Specific instructions are provided in the script itself.
13
+ 3. Add the unpacked directory to your path (or alternately, give the path to the signalp executable to the ```calculate``` method)
14
+
15
+ Then you need to install this bio-gem
16
+
17
+ ```sh
18
+ gem install bio-signalp
19
+ ```
20
+
21
+ # Usage
22
+
23
+ Usage as a script:
24
+ ```
25
+ Usage: signalp.rb my.fasta
26
+
27
+ my.fasta is the name of the fasta file you want to analyse ($stdin also accepted). Default output is all the sequences with their signal sequences cleaved.
28
+
29
+ This default output can be changed by using one (only) of -s, -S, -v, -f, -F.
30
+
31
+ -s, --summary print a tab separated table indicating if the sequence had a signal peptide results (if Signalp 3 is used, HMM and NN predictions are both given, respectively [default: no]
32
+ -S, --bigger-summary like -s, except also includes where the cleavage site is predicted [default: no]
33
+ -v, --verbose-summary much like -s except more details of the prediction are predicted [default: no]
34
+ -f, --filter-in filter in: print those sequences that have a signal peptide [default: no]
35
+ -F, --filter-out filter out: print those sequences that don't have a signal peptide [default: no]
36
+ -b, --binary-path SIGNALP_PATH path to the signalp binary e.g. /usr/local/bin/signalp-4.0/signalp [default: 'signalp' i.e. whatever is on the PATH]
37
+ ```
38
+
39
+ Usage as a programmatic interface
40
+ ```ruby
41
+ require 'bio-signalp'
42
+
43
+ # The Plasmodium falciparum ACP sequence is known to have a signal peptide (one that helps direct it to the apicoplast)
44
+ acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
45
+
46
+ # Run SignalP. The version is automatically detected
47
+ result = Bio::SignalP::Wrapper.new.calculate(acp_sequence) #=> Either a Bio::SignalP::Version3::Result or a Bio::SignalP::Version4::Result object
48
+
49
+ result.signal? #=> true. ACP has a predicted signal peptide.
50
+ result.cleavage_site #=> 17. The Ymax output from SignalP gives the predicted cleavage site.
51
+ result.cleave(acp_sequence) #=> 'FKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'. The acp_sequence after signal peptide cleavage.
52
+ ```
53
+
54
+ # Copyright
55
+
56
+ Copyright (c) 2011-2012 Ben J Woodcroft. See LICENSE.txt for
57
+ further details.
58
+
data/Rakefile CHANGED
@@ -33,13 +33,3 @@ Rake::TestTask.new(:test) do |test|
33
33
  end
34
34
 
35
35
  task :default => :test
36
-
37
- require 'rake/rdoctask'
38
- Rake::RDocTask.new do |rdoc|
39
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
-
41
- rdoc.rdoc_dir = 'rdoc'
42
- rdoc.title = "bio-blah #{version}"
43
- rdoc.rdoc_files.include('README*')
44
- rdoc.rdoc_files.include('lib/**/*.rb')
45
- end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0
@@ -9,95 +9,215 @@ require 'optparse'
9
9
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
10
10
  require 'bio-signalp'
11
11
 
12
- runner = Bio::SignalP::Wrapper.new
13
12
 
14
- options = ARGV.getopts("sShvfF") #s for summary, no args required
15
- if options['h']
16
- $stderr.puts "Usage: signalp.rb [-svf] <my.fasta>"
17
- $stderr.puts "Where my.fasta is the name of the fasta file you want to analyse. Default output is all the sequences with their signal sequences cleaved."
18
- $stderr.puts "-s: summary: print a tab separated table indicating if the sequence had a signal peptide according to the HMM and NN results, respectively."
19
- $stderr.puts "-S: bigger_summary: like -s, except also includes where the cleavage site is predicted"
20
- $stderr.puts "-v: verbose summary: much like -s except more details of the prediction are predicted."
21
- $stderr.puts "-f: filter in: print those sequences that have a signal peptide"
22
- $stderr.puts "-F: filter out: print those sequences that don't have a signal peptide"
23
- exit
13
+
14
+ # Parse command line options into the options hash
15
+ SCRIPT_NAME = File.basename(__FILE__)
16
+ options = {
17
+ :logger => 'stderr',
18
+ :binary_path => nil,
19
+ }
20
+ o = OptionParser.new do |opts|
21
+ opts.banner = "
22
+ Usage: #{SCRIPT_NAME} my.fasta
23
+
24
+ my.fasta is the name of the fasta file you want to analyse. Default output is all the sequences with their signal sequences cleaved.
25
+
26
+ This default output can be changed by using one (only) of -s, -S, -v, -f, -F.\n\n"
27
+
28
+ opts.on("-s", "--summary", "print a tab separated table indicating if the sequence had a signal peptide results (if Signalp 3 is used, HMM and NN predictions are both given, respectively [default: no]") do |arg|
29
+ options['s'] = true
30
+ end
31
+ opts.on("-S", "--bigger-summary", "like -s, except also includes where the cleavage site is predicted [default: no]") do
32
+ options['S'] = true
33
+ end
34
+ opts.on("-v", "--verbose-summary", "much like -s except more details of the prediction are predicted [default: no]") do
35
+ options['c'] = true
36
+ end
37
+ opts.on("-f", "--filter-in", "filter in: print those sequences that have a signal peptide [default: no]") do
38
+ options['f'] = true
39
+ end
40
+ opts.on("-F", "--filter-out", "filter out: print those sequences that don't have a signal peptide [default: no]") do
41
+ options['F'] = true
42
+ end
43
+ opts.on("-b", "--binary-path SIGNALP_PATH", "path to the signalp binary e.g. /usr/local/bin/signalp-4.0/signalp [default: 'signalp' i.e. whatever is on the PATH]") do |arg|
44
+ options[:binary_path] = arg
45
+ end
46
+ end
47
+ o.parse!
48
+ if ARGV.length > 1
49
+ $stderr.puts o
50
+ exit 1
24
51
  end
25
52
 
26
- # Print headers if required
27
- if options['s']
28
- puts [
29
- 'Name',
30
- 'NN Prediction',
31
- 'HMM Prediction'
32
- ].join("\t")
33
- elsif options['S']
34
- puts [
35
- 'Name',
36
- 'NN Prediction',
37
- 'HMM Prediction',
38
- 'Predicted?',
39
- 'Cleavege site (if predicted)'
40
- ].join("\t")
41
53
 
42
- elsif options['v']
43
- # [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
44
- # :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
45
- # :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
46
- # :nn_Smean, :nn_Smean_prediction,
47
- # :nn_D, :nn_D_prediction]
48
- # @@hmm_results = [
49
- # :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
50
- puts [
51
- 'Name',
52
- 'NN Cmax',
53
- 'NN Cmax position',
54
- 'NN Cmax prediction',
55
- 'NN Ymax',
56
- 'NN Ymax position',
57
- 'NN Ymax prediction',
58
- 'NN Smax',
59
- 'NN Smax position',
60
- 'NN Smax prediction',
61
- 'NN Smean',
62
- 'NN Smean prediction',
63
- 'NN D',
64
- 'NN D prediction',
65
- 'HMM result',
66
- 'HMM Cmax',
67
- 'HMM Cmax position',
68
- 'HMM Cmax prediction',
69
- 'HMM Sprob',
70
- 'HMM Sprob prediction',
71
- ].join("\t")
72
- end
54
+ printed_header = false
55
+ signalp_version = nil
56
+ runner = Bio::SignalP::Wrapper.new
73
57
 
74
58
  Bio::FlatFile.open(ARGF).each do |seq|
75
- result = runner.calculate(seq.seq)
59
+ result = runner.calculate(seq.seq, :binary_path => options[:binary_path])
76
60
  if result.nil?
77
61
  $stderr.puts "Unexpected empty sequence detected, ignoring: #{seq.definition}"
78
- elsif options['s']
79
- puts [
80
- seq.entry_id,
81
- result.nn_D_prediction ? 'T' : 'F',
82
- result.hmm_Sprob_prediction ? 'T' : 'F'
83
- ].join("\t")
62
+ next
63
+ end
64
+
65
+ if !printed_header
66
+ printed_header = true
67
+
68
+ # Different headers are printed for the different versions (if at all)
69
+ if result.kind_of?(Bio::SignalP::Version3::Result)
70
+ signalp_version = 3
71
+ if options['s']
72
+ puts [
73
+ 'Name',
74
+ 'NN Prediction',
75
+ 'HMM Prediction'
76
+ ].join("\t")
77
+ elsif options['S']
78
+ puts [
79
+ 'Name',
80
+ 'NN Prediction',
81
+ 'HMM Prediction',
82
+ 'Predicted?',
83
+ 'Cleavege site (if predicted)'
84
+ ].join("\t")
85
+
86
+ elsif options['v']
87
+ # [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
88
+ # :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
89
+ # :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
90
+ # :nn_Smean, :nn_Smean_prediction,
91
+ # :nn_D, :nn_D_prediction]
92
+ # @@hmm_results = [
93
+ # :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
94
+ puts [
95
+ 'Name',
96
+ 'NN Cmax',
97
+ 'NN Cmax position',
98
+ 'NN Cmax prediction',
99
+ 'NN Ymax',
100
+ 'NN Ymax position',
101
+ 'NN Ymax prediction',
102
+ 'NN Smax',
103
+ 'NN Smax position',
104
+ 'NN Smax prediction',
105
+ 'NN Smean',
106
+ 'NN Smean prediction',
107
+ 'NN D',
108
+ 'NN D prediction',
109
+ 'HMM result',
110
+ 'HMM Cmax',
111
+ 'HMM Cmax position',
112
+ 'HMM Cmax prediction',
113
+ 'HMM Sprob',
114
+ 'HMM Sprob prediction',
115
+ ].join("\t")
116
+ end
117
+
118
+ elsif result.kind_of?(Bio::SignalP::Version4::Result)
119
+ signalp_version = 4
120
+
121
+ if options['s']
122
+ puts [
123
+ 'Name',
124
+ 'Predicted?',
125
+ ].join("\t")
126
+ elsif options['S']
127
+ puts [
128
+ 'Name',
129
+ 'Predicted?',
130
+ 'Cleavege site (if predicted)'
131
+ ].join("\t")
132
+
133
+ elsif options['v']
134
+ #:Cmax, :Cmax_position,
135
+ #:Ymax, :Ymax_position,
136
+ #:Smax, :Smax_position,
137
+ #:Smean,
138
+ #:D,
139
+ #:prediction,
140
+ #:Dmaxcut,
141
+ #:networks_used
142
+ puts [
143
+ 'Name',
144
+ 'Cmax',
145
+ 'Cmax position',
146
+ 'Ymax',
147
+ 'Ymax position',
148
+ 'Smax',
149
+ 'Smax position',
150
+ 'Smean',
151
+ 'D',
152
+ 'prediction',
153
+ 'Dmaxcut',
154
+ 'networks_used'
155
+ ].join("\t")
156
+ end
157
+ else
158
+ raise "Unexpected bio-signalp result object seen: #{result.inspect}"
159
+ end
160
+ end
161
+
162
+ if options['s']
163
+ if signalp_version == 3
164
+ puts [
165
+ seq.entry_id,
166
+ result.nn_D_prediction ? 'T' : 'F',
167
+ result.hmm_Sprob_prediction ? 'T' : 'F'
168
+ ].join("\t")
169
+ elsif signalp_version == 4
170
+ puts [
171
+ seq.entry_id,
172
+ result.prediction ? 'T' : 'F',
173
+ ].join("\t")
174
+ else
175
+ raise "Programming error"
176
+ end
84
177
  elsif options['S']
85
- puts [
86
- seq.entry_id,
87
- result.nn_D_prediction ? 'T' : 'F',
88
- result.hmm_Sprob_prediction ? 'T' : 'F',
89
- result.signal? ? 'T' : 'F',
90
- result.signal? ? result.cleavage_site : 0,
91
- ].join("\t")
178
+ if signalp_version == 3
179
+ puts [
180
+ seq.entry_id,
181
+ result.nn_D_prediction ? 'T' : 'F',
182
+ result.hmm_Sprob_prediction ? 'T' : 'F',
183
+ result.signal? ? 'T' : 'F',
184
+ result.signal? ? result.cleavage_site : 0,
185
+ ].join("\t")
186
+ elsif signalp_version == 4
187
+ puts [
188
+ seq.entry_id,
189
+ result.signal? ? 'T' : 'F',
190
+ result.signal? ? result.cleavage_site : 0,
191
+ ].join("\t")
192
+ else
193
+ raise "Programming error"
194
+ end
195
+
92
196
  elsif options['v']
93
197
  taputs = [seq.definition]
94
- [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
95
- :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
96
- :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
97
- :nn_Smean, :nn_Smean_prediction,
98
- :nn_D, :nn_D_prediction,
99
- :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction,
100
- :hmm_Sprob, :hmm_Sprob_prediction].each do |meth|
198
+ extras = []
199
+ if signalp_version == 3
200
+ extras = [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
201
+ :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
202
+ :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
203
+ :nn_Smean, :nn_Smean_prediction,
204
+ :nn_D, :nn_D_prediction,
205
+ :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction,
206
+ :hmm_Sprob, :hmm_Sprob_prediction]
207
+ elsif signalp_version == 4
208
+ extras = [
209
+ :Cmax, :Cmax_position,
210
+ :Ymax, :Ymax_position,
211
+ :Smax, :Smax_position,
212
+ :Smean,
213
+ :D,
214
+ :prediction,
215
+ :Dmaxcut,
216
+ :networks_used
217
+ ]
218
+ end
219
+
220
+ extras.each do |meth|
101
221
  taputs.push result.send(meth)
102
222
  end
103
223
  puts taputs.join("\t")
@@ -5,36 +5,38 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-signalp"
8
- s.version = "0.2.3"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ben J Woodcroft"]
12
- s.date = "2012-04-04"
12
+ s.date = "2012-08-18"
13
13
  s.description = "A wrapper for the signal peptide prediction algorith SignalP. Not very well supported, but seems to work for the author, at least."
14
14
  s.email = "donttrustben near gmail.com"
15
15
  s.executables = ["signalp.rb"]
16
16
  s.extra_rdoc_files = [
17
17
  "LICENSE.txt",
18
- "README.rdoc"
18
+ "README.md"
19
19
  ]
20
20
  s.files = [
21
21
  ".document",
22
22
  "Gemfile",
23
23
  "LICENSE.txt",
24
- "README.rdoc",
24
+ "README.md",
25
25
  "Rakefile",
26
26
  "VERSION",
27
27
  "bin/signalp.rb",
28
28
  "bio-signalp.gemspec",
29
29
  "lib/bio-signalp.rb",
30
- "lib/bio/appl/signalp.rb",
30
+ "lib/bio/appl/common.rb",
31
+ "lib/bio/appl/signalp3.rb",
32
+ "lib/bio/appl/signalp4.rb",
31
33
  "test/helper.rb",
32
34
  "test/test_bio-signalp.rb"
33
35
  ]
34
36
  s.homepage = "http://github.com/wwood/bioruby-signalp"
35
37
  s.licenses = ["MIT"]
36
38
  s.require_paths = ["lib"]
37
- s.rubygems_version = "1.8.17"
39
+ s.rubygems_version = "1.8.24"
38
40
  s.summary = "A wrapper for the signal peptide prediction algorith SignalP"
39
41
 
40
42
  if s.respond_to? :specification_version then
@@ -42,26 +44,26 @@ Gem::Specification.new do |s|
42
44
 
43
45
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
46
  s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
47
+ s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
45
48
  s.add_development_dependency(%q<shoulda>, [">= 0"])
46
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.0"])
48
- s.add_development_dependency(%q<bio>, [">= 1.4.1"])
49
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
49
+ s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
50
+ s.add_development_dependency(%q<jeweler>, [">= 1.6.0"])
51
+ s.add_development_dependency(%q<rdoc>, [">= 3.12"])
50
52
  else
51
53
  s.add_dependency(%q<bio>, [">= 1.4.1"])
54
+ s.add_dependency(%q<bio-logger>, [">= 0"])
52
55
  s.add_dependency(%q<shoulda>, [">= 0"])
53
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
54
- s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
55
- s.add_dependency(%q<bio>, [">= 1.4.1"])
56
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
56
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
57
+ s.add_dependency(%q<jeweler>, [">= 1.6.0"])
58
+ s.add_dependency(%q<rdoc>, [">= 3.12"])
57
59
  end
58
60
  else
59
61
  s.add_dependency(%q<bio>, [">= 1.4.1"])
62
+ s.add_dependency(%q<bio-logger>, [">= 0"])
60
63
  s.add_dependency(%q<shoulda>, [">= 0"])
61
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
- s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
63
- s.add_dependency(%q<bio>, [">= 1.4.1"])
64
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
64
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
65
+ s.add_dependency(%q<jeweler>, [">= 1.6.0"])
66
+ s.add_dependency(%q<rdoc>, [">= 3.12"])
65
67
  end
66
68
  end
67
69
 
@@ -8,4 +8,9 @@
8
8
  # and put your plugin's code there. It is bad practice to write other code
9
9
  # directly into this file, because doing so causes confusion if this biogem
10
10
  # was ever to get merged into the main bioruby tree.
11
- require 'bio/appl/signalp'
11
+ require 'bio/appl/common'
12
+ require 'bio/appl/signalp3'
13
+ require 'bio/appl/signalp4'
14
+
15
+ require 'bio-logger'
16
+ Bio::Log::LoggerPlus.new('bio-signalp')
@@ -0,0 +1,92 @@
1
+ module Bio
2
+ module SignalP
3
+ NUM_FIELDS_IN_VERSION3_SHORT_OUTPUT = 21
4
+ NUM_FIELDS_IN_VERSION4_SHORT_OUTPUT = 12
5
+
6
+ class Wrapper
7
+ def log
8
+ log = Bio::Log::LoggerPlus['bio-signalp']
9
+ end
10
+
11
+ # Given an amino acid sequence, return a SignalP Result
12
+ # representing it taken from the file. The version of SignalP used
13
+ # is auto-detected (versions 3 and 4 are supported)
14
+ #
15
+ # options:
16
+ # :binary_path: full path to signalp binary e.g. '/usr/local/bin/signalp-4.0/signalp' [default: 'signalp' i.e. signalp is in the PATH]
17
+ #
18
+ # Returns nil if the sequence is empty
19
+ def calculate(sequence, options={})
20
+ return nil if sequence.nil? or sequence == ''
21
+
22
+ default_options = {
23
+ :binary_path => 'signalp'
24
+ }
25
+ options = default_options.merge options
26
+ raise "Unexpected option parameters passed in #{options.inspect}" unless options.length == default_options.length
27
+ options[:binary_path] ||= default_options[:binary_path] #in case nil is passed here
28
+
29
+ # This command needs to work with all versions of SignalP (currently v3 and v4)
30
+ command = "#{options[:binary_path]} -f short -t euk"
31
+ log.debug "Running signalp command: #{command}" if log.debug?
32
+ Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
33
+ stdin.puts '>wrapperSeq'
34
+ stdin.puts "#{sequence}"
35
+ stdin.close
36
+
37
+ result = stdout.readlines
38
+ error = stderr.readlines
39
+
40
+ unless error.empty?
41
+ raise Exception, "There appears to be a problem while running signalp:\n#{error}"
42
+ end
43
+
44
+ # Error checking
45
+ num_expected_result_lines = 3
46
+ unless result.length == num_expected_result_lines
47
+ raise Exception, "Unexpected number of lines found in SignalP output (#{result.length}, expected #{num_expected_result_lines}):\n#{result}"
48
+ end
49
+
50
+ splits = result[2].strip.split(/[ \t]+/)
51
+ if splits.length == NUM_FIELDS_IN_VERSION3_SHORT_OUTPUT
52
+ # SignalP 3 detected, use that
53
+ log.debug "Detected SignalP version 3 type output, parsing" if log.debug?
54
+ return Bio::SignalP::Version3::Result.create_from_line(result[2].strip)
55
+ elsif splits.length == NUM_FIELDS_IN_VERSION4_SHORT_OUTPUT
56
+ log.debug "Detected SignalP version 4 type output, parsing" if log.debug?
57
+ return Bio::SignalP::Version4::Result.create_from_line(result[2].strip)
58
+ else
59
+ error_description = "Bad SignalP output line found. Are you using SignalP 3.0 or 4.0? (found #{splits.length} fields in the third line of the output):\n#{result[2]}"
60
+ log.error error_description
61
+ raise Exception, error_description
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ # A module for methods common to different SignalP version Result classes.
68
+ module Common
69
+ # Given an amino acid sequence (as a string),
70
+ # chop it off and return the remnants. Requires that the cleavage_site
71
+ # method be implemented
72
+ def cleave(sequence)
73
+ if signal?
74
+ return sequence[cleavage_site-1..sequence.length-1]
75
+ else
76
+ return sequence
77
+ end
78
+ end
79
+
80
+ # Simple method: 'Y' => true, 'N' => false, else nil
81
+ def to_bool(string)
82
+ if string === 'Y'
83
+ return true
84
+ elsif string === 'N'
85
+ return false
86
+ else
87
+ return nil
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,110 @@
1
+ # Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
2
+ require 'open3'
3
+
4
+ # Wrapper around a locally installed SignalP program
5
+ module Bio
6
+ module SignalP
7
+ class Version3
8
+ # The result of a SignalP program. Create using the output from
9
+ # -format short output and create_from_line()
10
+ class Result
11
+ include Bio::SignalP::Common
12
+
13
+ @@nn_results =
14
+ [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
15
+ :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
16
+ :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
17
+ :nn_Smean, :nn_Smean_prediction,
18
+ :nn_D, :nn_D_prediction]
19
+ @@hmm_results = [
20
+ :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
21
+
22
+ @@nn_results.each do |sym|
23
+ attr_accessor sym
24
+ end
25
+ @@hmm_results.each do |sym|
26
+ attr_accessor sym
27
+ end
28
+
29
+ # Create a new SignalpResult using a line from the signal p 'short' output format,
30
+ # version 3.0
31
+ def self.create_from_line(line)
32
+ # e.g.
33
+ # # name Cmax pos ? Ymax pos ? Smax pos ? Smean ? D ? # name ! Cmax pos ? Sprob ?
34
+ # 526.m04658 0.734 19 Y 0.686 19 Y 0.933 6 Y 0.760 Y 0.723 Y 526.m04658 Q 0.037 19 N 0.004 N
35
+ matches = line.split(/[ \t]+/)
36
+ if matches.length != Bio::SignalP::NUM_FIELDS_IN_VERSION3_SHORT_OUTPUT
37
+ raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
38
+ end
39
+
40
+ i = 1
41
+ result = Result.new
42
+ result.nn_Cmax = matches[i].to_f; i += 1
43
+ result.nn_Cmax_position = matches[i].to_i; i += 1
44
+ result.nn_Cmax_prediction = result.to_bool matches[i]; i += 1
45
+ result.nn_Ymax = matches[i].to_f; i += 1
46
+ result.nn_Ymax_position = matches[i].to_i; i += 1
47
+ result.nn_Ymax_prediction = result.to_bool matches[i]; i += 1
48
+ result.nn_Smax = matches[i].to_f; i += 1
49
+ result.nn_Smax_position = matches[i].to_i; i += 1
50
+ result.nn_Smax_prediction = result.to_bool matches[i]; i += 1
51
+ result.nn_Smean = matches[i].to_f; i += 1
52
+ result.nn_Smean_prediction = result.to_bool matches[i]; i += 1
53
+ result.nn_D = matches[i].to_f; i += 1
54
+ result.nn_D_prediction = result.to_bool matches[i]; i += 1
55
+
56
+ i+= 1
57
+ result.hmm_result = matches[i]; i += 1
58
+ result.hmm_Cmax = matches[i].to_f; i += 1
59
+ result.hmm_Cmax_position = matches[i].to_i; i += 1
60
+ result.hmm_Cmax_prediction = result.to_bool matches[i]; i += 1
61
+ result.hmm_Sprob = matches[i].to_f; i += 1
62
+ result.hmm_Sprob_prediction = result.to_bool matches[i]; i += 1
63
+
64
+ return result
65
+ end
66
+
67
+ # Does it have a signal peptide? It can be this class (default),
68
+ # or another class that responds to :nn_D_prediction and :hmm_Sprob_prediction
69
+ def signal?(clazz=self)
70
+ return (clazz.send(:nn_D_prediction) or clazz.send(:hmm_Sprob_prediction))
71
+ end
72
+
73
+ def classical_signal_sequence?
74
+ return @nn_D_prediction
75
+ end
76
+
77
+ def signal_anchor?
78
+ return @hmm_Sprob_prediction
79
+ end
80
+
81
+ # Return an array of all the results. NN then HMM, as per SignalP short format
82
+ def all_results
83
+ all = []
84
+
85
+ @@nn_results.each do |sym|
86
+ all.push self.send(sym)
87
+ end
88
+
89
+ @@hmm_results.each do |sym|
90
+ all.push self.send(sym)
91
+ end
92
+
93
+ return all
94
+ end
95
+
96
+ # Return an array of symbols representing the names of the columns
97
+ def self.all_result_names
98
+ return [@@nn_results, @@hmm_results].flatten
99
+ end
100
+
101
+ # Return the number of the residue after the cleavage site
102
+ # ie. the first residue of the mature protein
103
+ # Taken from the Y score, as it was decided this is the best prediction
104
+ def cleavage_site
105
+ @nn_Ymax_position
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,72 @@
1
+ # Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
2
+ require 'open3'
3
+
4
+ # Wrapper around a locally installed SignalP program
5
+ module Bio
6
+ module SignalP
7
+ class Version4
8
+ # The result of a SignalP program. Create using the output from
9
+ # -format short output and create_from_line()
10
+ class Result
11
+ include Bio::SignalP::Common
12
+
13
+ @@output_fields = [
14
+ :Cmax, :Cmax_position,
15
+ :Ymax, :Ymax_position,
16
+ :Smax, :Smax_position,
17
+ :Smean,
18
+ :D,
19
+ :prediction,
20
+ :Dmaxcut,
21
+ :networks_used,
22
+ ]
23
+
24
+ @@output_fields.each do |sym|
25
+ attr_accessor sym
26
+ end
27
+
28
+ # Create a new SignalpResult using a line from the signal p 'short' output format,
29
+ def self.create_from_line(line)
30
+ # e.g.
31
+ #$ ~/bioinfo/signalp-4.0/signalp /tmp/acp
32
+ ## SignalP-4.0 euk predictions
33
+ ## name Cmax pos Ymax pos Smax pos Smean D ? Dmaxcut Networks-used
34
+ #acp 0.871 17 0.863 17 0.886 1 0.844 0.853 Y 0.450 SignalP-noTM
35
+ matches = line.split(/[ \t]+/)
36
+ if matches.length != Bio::SignalP::NUM_FIELDS_IN_VERSION4_SHORT_OUTPUT
37
+ raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
38
+ end
39
+
40
+ i = 1
41
+ result = Result.new
42
+ result.Cmax = matches[i].to_f; i += 1
43
+ result.Cmax_position = matches[i].to_i; i += 1
44
+ result.Ymax = matches[i].to_f; i += 1
45
+ result.Ymax_position = matches[i].to_i; i += 1
46
+ result.Smax = matches[i].to_f; i += 1
47
+ result.Smax_position = matches[i].to_i; i += 1
48
+ result.Smean = matches[i].to_f; i += 1
49
+ result.D = matches[i].to_f; i += 1
50
+ result.prediction = result.to_bool matches[i]; i += 1
51
+ result.Dmaxcut = matches[i].to_f; i += 1
52
+ result.networks_used = matches[i]; i += 1
53
+
54
+ return result
55
+ end
56
+
57
+ # Does it have a signal peptide? It can be this class (default),
58
+ # or another class that responds to :nn_D_prediction and :hmm_Sprob_prediction
59
+ def signal?(clazz=self)
60
+ return clazz.send(:prediction)
61
+ end
62
+
63
+ # Return the number of the residue after the cleavage site
64
+ # ie. the first residue of the mature protein
65
+ # Taken from the Y score, as it was decided this is the best prediction
66
+ def cleavage_site
67
+ @Ymax_position
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -1,7 +1,20 @@
1
1
  require 'helper'
2
2
  require 'open3'
3
3
 
4
+ @@signalp3_path = File.join(ENV['HOME'],'bioinfo','signalp-3.0','signalp')
5
+ @@signalp4_path = File.join(ENV['HOME'],'bioinfo','signalp-4.0','signalp')
6
+ @@binaries = [
7
+ @@signalp3_path,
8
+ @@signalp4_path,
9
+ ]
10
+
4
11
  class TestBioSignalp < Test::Unit::TestCase
12
+ def setup
13
+ log_name = 'bio-signalp'
14
+ Bio::Log::CLI.logger('stderr')
15
+ #Bio::Log::CLI.configure(log_name) # when commented out no debug is printed out
16
+ end
17
+
5
18
  should "positive control" do
6
19
  # Known to have a signal peptide
7
20
  acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
@@ -15,15 +28,54 @@ class TestBioSignalp < Test::Unit::TestCase
15
28
 
16
29
  assert_equal false, Bio::SignalP::Wrapper.new.calculate(non_signal_sequence).signal?
17
30
  end
31
+
32
+ should 'work with different SignalP versions, this test is specific to Ben\'s machines probably' do
33
+ binaries = {
34
+ @@binaries[0] => Bio::SignalP::Version3::Result,
35
+ @@binaries[1] => Bio::SignalP::Version4::Result,
36
+ }
37
+ binaries.each do |binary, clazz|
38
+ acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
39
+ positive_result = Bio::SignalP::Wrapper.new.calculate(acp_sequence, :binary_path => binary)
40
+ assert_equal true, positive_result.signal?, binary
41
+ assert_kind_of clazz, positive_result, binary
42
+ assert_equal 17, positive_result.cleavage_site, binary
43
+ assert_equal 'FKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ',
44
+ positive_result.cleave(acp_sequence), binary
45
+ non_signal_sequence = 'KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'
46
+ assert_equal false, Bio::SignalP::Wrapper.new.calculate(non_signal_sequence, :binary_path => binary).signal?, binary
47
+ end
48
+ end
49
+
50
+ should 'give different predictions for v3 and v4' do
51
+ different = <<EOF
52
+ MMKGMVEAAVKWMKAGLPLRILKLVLYAKVIDGDLQGHSLRRFSEVLKTFTELKERYEMQ
53
+ LLLPKAVPLEFDVYLSFSEEDKEVAKVIREKLSGAKDGVRIYDSSHQGINKDTVFQEDMY
54
+ SIMMKSARVVTVLSPNYLRNKACIEQYNIALCCNRRALRDMLAPIYVDSVEMMPTYMGLV
55
+ QYVDCRPHDPSKIGEACSQLTVSLSVTFHTELRVAEFDPLRYDVFLSYSHRDTEKANRFV
56
+ EMLQKLAPDLKLFFDVQELKTGKSWQRTLYHSIDGSRCMLALISEPYLKSAVCQEEFALA
57
+ QAKHCAKGKQHLQLISICLDDLDTIQPEFTHIPMVKGTPDVFDDMVKTVCPAVIQWLNGE
58
+ RVDQTETIKTLFDDQNITTLSADAEMEKFRQSHFQKEFGTQDSLISSKSPFPPKLSDILP
59
+ EPGKEVKDKRPKSASDCDLIFSYHSDDEKYVSFIVKILQLNAPSLKVKAVCSDENKLSAF
60
+ ERAHCIVPVLSPNYLESPECVEEFHIAIWRQRISNPEAGALLPICVHTLPQKPTYFHLVQ
61
+ SAVSMTDGIWAQLSGQHNFGLPQKIVDFSLAAGKTPLSQVDMLALYMAAYYILKRFAKAR
62
+ SCKEVEFSMKPALFNIMKLQEQIKQLTQPDYTDQLCKTLLEIHFNDIPDSWREGGHYLRP
63
+ KEPEDTTPAIMAPVEGASDSQTHNALNERTQDSSGKDKSEVQGEKAQRSASEGVSHDTVQ
64
+ QMDQDHDLKAAQGQDTAEGQDAAEGQDSEGGQDSEGGWDSDGGRDSVGNKTDAEKITGNH
65
+ GDEGGDGQVGGSEESKEKTEDEDEDISKRNRAPRSVACVCL
66
+ EOF
67
+ assert_equal true, Bio::SignalP::Wrapper.new.calculate(different, :binary_path => @@signalp3_path).signal?
68
+ assert_equal false, Bio::SignalP::Wrapper.new.calculate(different, :binary_path => @@signalp4_path).signal?
69
+ end
18
70
  end
19
71
 
20
72
  class TestSignalPScript < Test::Unit::TestCase
21
- command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
73
+ # Known to have a signal peptide
74
+ acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
22
75
 
23
76
  should "positive control" do
24
- # Known to have a signal peptide
25
- acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
26
-
77
+ command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
78
+
27
79
  Open3.popen3(command) do |stdin, stdout, stderr|
28
80
  stdin.puts '>positive'
29
81
  stdin.puts acp_sequence
@@ -37,7 +89,7 @@ class TestSignalPScript < Test::Unit::TestCase
37
89
  end
38
90
 
39
91
  should "return gracefully when empty sequences are given" do
40
- acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
92
+ command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
41
93
 
42
94
  Open3.popen3(command) do |stdin, stdout, stderr|
43
95
  stdin.puts '>positive'
@@ -54,4 +106,54 @@ class TestSignalPScript < Test::Unit::TestCase
54
106
  ">positive2\n", "FKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ\n"], @result
55
107
  assert_equal ["Unexpected empty sequence detected, ignoring: empty\n"], @error
56
108
  end
109
+
110
+
111
+
112
+
113
+ should 'give the right -s output for signalp 3' do
114
+ command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
115
+
116
+ # This also tests the -b flag
117
+ command = "#{command} -b #{@@signalp3_path} -s"
118
+ Open3.popen3(command) do |stdin, stdout, stderr|
119
+ stdin.puts '>positive'
120
+ stdin.puts acp_sequence
121
+ stdin.close
122
+
123
+ @result = stdout.readlines # convert to string?
124
+ @error = stderr.readlines
125
+ end
126
+
127
+ expected = [
128
+ "Name\tNN Prediction\tHMM Prediction\n",
129
+ "positive\tT\tT\n"
130
+ ]
131
+ assert_equal expected, @result
132
+ assert_equal [], @error
133
+ end
134
+
135
+
136
+
137
+
138
+ should 'give the right -s output for signalp 4' do
139
+ command = File.join(File.dirname(__FILE__),'..','bin','signalp.rb')
140
+
141
+ command = "#{command} -b #{@@signalp4_path} -s"
142
+ Open3.popen3(command) do |stdin, stdout, stderr|
143
+ stdin.puts '>positive'
144
+ stdin.puts acp_sequence
145
+ stdin.close
146
+
147
+ @result = stdout.readlines # convert to string?
148
+ @error = stderr.readlines
149
+ end
150
+
151
+ expected = [
152
+ "Name\tPredicted?\n",
153
+ "positive\tT\n"
154
+ ]
155
+ assert_equal [], @error
156
+ assert_equal expected, @result
157
+ end
158
+
57
159
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-signalp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-04 00:00:00.000000000 Z
12
+ date: 2012-08-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
16
- requirement: &75320500 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,31 @@ dependencies:
21
21
  version: 1.4.1
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *75320500
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.1
30
+ - !ruby/object:Gem::Dependency
31
+ name: bio-logger
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
25
46
  - !ruby/object:Gem::Dependency
26
47
  name: shoulda
27
- requirement: &75320020 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
28
49
  none: false
29
50
  requirements:
30
51
  - - ! '>='
@@ -32,51 +53,60 @@ dependencies:
32
53
  version: '0'
33
54
  type: :development
34
55
  prerelease: false
35
- version_requirements: *75320020
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
36
62
  - !ruby/object:Gem::Dependency
37
63
  name: bundler
38
- requirement: &75318990 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
39
65
  none: false
40
66
  requirements:
41
- - - ~>
67
+ - - ! '>='
42
68
  - !ruby/object:Gem::Version
43
69
  version: 1.0.0
44
70
  type: :development
45
71
  prerelease: false
46
- version_requirements: *75318990
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 1.0.0
47
78
  - !ruby/object:Gem::Dependency
48
79
  name: jeweler
49
- requirement: &75318160 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
50
81
  none: false
51
82
  requirements:
52
- - - ~>
83
+ - - ! '>='
53
84
  - !ruby/object:Gem::Version
54
85
  version: 1.6.0
55
86
  type: :development
56
87
  prerelease: false
57
- version_requirements: *75318160
58
- - !ruby/object:Gem::Dependency
59
- name: bio
60
- requirement: &75317190 !ruby/object:Gem::Requirement
88
+ version_requirements: !ruby/object:Gem::Requirement
61
89
  none: false
62
90
  requirements:
63
91
  - - ! '>='
64
92
  - !ruby/object:Gem::Version
65
- version: 1.4.1
66
- type: :development
67
- prerelease: false
68
- version_requirements: *75317190
93
+ version: 1.6.0
69
94
  - !ruby/object:Gem::Dependency
70
95
  name: rdoc
71
- requirement: &75316460 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
72
97
  none: false
73
98
  requirements:
74
- - - ~>
99
+ - - ! '>='
75
100
  - !ruby/object:Gem::Version
76
101
  version: '3.12'
77
102
  type: :development
78
103
  prerelease: false
79
- version_requirements: *75316460
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '3.12'
80
110
  description: A wrapper for the signal peptide prediction algorith SignalP. Not very
81
111
  well supported, but seems to work for the author, at least.
82
112
  email: donttrustben near gmail.com
@@ -85,18 +115,20 @@ executables:
85
115
  extensions: []
86
116
  extra_rdoc_files:
87
117
  - LICENSE.txt
88
- - README.rdoc
118
+ - README.md
89
119
  files:
90
120
  - .document
91
121
  - Gemfile
92
122
  - LICENSE.txt
93
- - README.rdoc
123
+ - README.md
94
124
  - Rakefile
95
125
  - VERSION
96
126
  - bin/signalp.rb
97
127
  - bio-signalp.gemspec
98
128
  - lib/bio-signalp.rb
99
- - lib/bio/appl/signalp.rb
129
+ - lib/bio/appl/common.rb
130
+ - lib/bio/appl/signalp3.rb
131
+ - lib/bio/appl/signalp4.rb
100
132
  - test/helper.rb
101
133
  - test/test_bio-signalp.rb
102
134
  homepage: http://github.com/wwood/bioruby-signalp
@@ -114,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
114
146
  version: '0'
115
147
  segments:
116
148
  - 0
117
- hash: 903911101
149
+ hash: -969867473
118
150
  required_rubygems_version: !ruby/object:Gem::Requirement
119
151
  none: false
120
152
  requirements:
@@ -123,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
123
155
  version: '0'
124
156
  requirements: []
125
157
  rubyforge_project:
126
- rubygems_version: 1.8.17
158
+ rubygems_version: 1.8.24
127
159
  signing_key:
128
160
  specification_version: 3
129
161
  summary: A wrapper for the signal peptide prediction algorith SignalP
@@ -1,30 +0,0 @@
1
- = bio-signalp
2
-
3
- A wrapper for the signal peptide prediction algorithm SignalP (version 3.0).
4
-
5
- Using this bio-gem requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP-3.0/ has instructions on how it may be downloaded. This gem requires that the signalp executable is available from the command line (i.e. running 'signalp' works at the command), so the steps to setup
6
-
7
- # Download SignalP 3.0 and unpack
8
- # Modify the signalp script in the unpacked directory
9
- # Add the unpacked directory to your path
10
- # install this bioruby plugin (gem install bioruby-signalp)
11
-
12
- This bio-gem includes a library for interacting with signalp programmatically, but also (as of version 0.1.1) a script as well.
13
-
14
- NOTE: Only tested with SignalP 3.0 at this point. Hopefully 4.0 will be tested in the future.
15
-
16
- == Contributing to bio-signalp
17
-
18
- * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
19
- * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
20
- * Fork the project
21
- * Start a feature/bugfix branch
22
- * Commit and push until you are happy with your contribution
23
- * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
24
- * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
25
-
26
- == Copyright
27
-
28
- Copyright (c) 2011-2012 Ben J Woodcroft. See LICENSE.txt for
29
- further details.
30
-
@@ -1,165 +0,0 @@
1
- # Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
2
- require 'open3'
3
-
4
- # Wrapper around a locally installed SignalP program
5
- module Bio
6
- class SignalP
7
- NUM_FIELDS_IN_SHORT_OUTPUT = 21
8
-
9
- class Wrapper
10
- # Given an amino acid sequence, return a SignalPResult
11
- # representing it taken from the file.
12
- #
13
- # Returns nil if the sequence is empty
14
- def calculate(sequence)
15
- return nil if sequence.nil? or sequence == ''
16
-
17
- command = 'signalp -trunc 70 -format short -t euk'
18
- Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
19
- stdin.puts '>wrapperSeq'
20
- stdin.puts "#{sequence}"
21
- stdin.close
22
-
23
- result = stdout.readlines
24
- error = stderr.readlines
25
-
26
- unless error.empty?
27
- raise Exception, "There appears to be a problem while running signalp:\n#{error}"
28
- end
29
-
30
- # Error checking
31
- num_expected_result_lines = 3
32
- unless result.length == num_expected_result_lines
33
- raise Exception, "Unexpected number of lines found in SignalP output (#{result.length}, expected #{num_expected_result_lines}):\n#{result}"
34
- end
35
-
36
- splits = result[2].strip.split(/[ \t]+/)
37
- if splits.length != NUM_FIELDS_IN_SHORT_OUTPUT
38
- raise Exception, "Bad SignalP output line found. Are you using SignalP 3.0? (expected #{NUM_FIELDS_IN_SHORT_OUTPUT} fields, found #{splits.length} fields):\n#{result[2]}"
39
- end
40
-
41
- return Result.create_from_line(result[2].strip)
42
- end
43
- end
44
- end
45
-
46
- # The result of a SignalP program. Create using the output from
47
- # -format short output and create_from_line()
48
- class Result
49
- @@nn_results =
50
- [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
51
- :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
52
- :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
53
- :nn_Smean, :nn_Smean_prediction,
54
- :nn_D, :nn_D_prediction]
55
- @@hmm_results = [
56
- :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
57
-
58
- @@nn_results.each do |sym|
59
- attr_accessor sym
60
- end
61
- @@hmm_results.each do |sym|
62
- attr_accessor sym
63
- end
64
-
65
- # Create a new SignalpResult using a line from the signal p 'short' output format,
66
- # version 3.0
67
- def self.create_from_line(line)
68
- # e.g.
69
- # # name Cmax pos ? Ymax pos ? Smax pos ? Smean ? D ? # name ! Cmax pos ? Sprob ?
70
- # 526.m04658 0.734 19 Y 0.686 19 Y 0.933 6 Y 0.760 Y 0.723 Y 526.m04658 Q 0.037 19 N 0.004 N
71
- matches = line.split(/[ \t]+/)
72
- if matches.length != NUM_FIELDS_IN_SHORT_OUTPUT
73
- raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
74
- end
75
-
76
- i = 1
77
- result = Result.new
78
- result.nn_Cmax = matches[i].to_f; i += 1
79
- result.nn_Cmax_position = matches[i].to_i; i += 1
80
- result.nn_Cmax_prediction = to_bool matches[i]; i += 1
81
- result.nn_Ymax = matches[i].to_f; i += 1
82
- result.nn_Ymax_position = matches[i].to_i; i += 1
83
- result.nn_Ymax_prediction = to_bool matches[i]; i += 1
84
- result.nn_Smax = matches[i].to_f; i += 1
85
- result.nn_Smax_position = matches[i].to_i; i += 1
86
- result.nn_Smax_prediction = to_bool matches[i]; i += 1
87
- result.nn_Smean = matches[i].to_f; i += 1
88
- result.nn_Smean_prediction = to_bool matches[i]; i += 1
89
- result.nn_D = matches[i].to_f; i += 1
90
- result.nn_D_prediction = to_bool matches[i]; i += 1
91
-
92
- i+= 1
93
- result.hmm_result = matches[i]; i += 1
94
- result.hmm_Cmax = matches[i].to_f; i += 1
95
- result.hmm_Cmax_position = matches[i].to_i; i += 1
96
- result.hmm_Cmax_prediction = to_bool matches[i]; i += 1
97
- result.hmm_Sprob = matches[i].to_f; i += 1
98
- result.hmm_Sprob_prediction = to_bool matches[i]; i += 1
99
-
100
- return result
101
- end
102
-
103
- def self.to_bool(string)
104
- if string === 'Y'
105
- return true
106
- elsif string === 'N'
107
- return false
108
- else
109
- return nil
110
- end
111
- end
112
-
113
- # Does it have a signal peptide? It can be this class (default),
114
- # or another class that responds to :nn_D_prediction and :hmm_Sprob_prediction
115
- def signal?(clazz=self)
116
- return (clazz.send(:nn_D_prediction) or clazz.send(:hmm_Sprob_prediction))
117
- end
118
-
119
- def classical_signal_sequence?
120
- return @nn_D_prediction
121
- end
122
-
123
- def signal_anchor?
124
- return @hmm_Sprob_prediction
125
- end
126
-
127
- # Return an array of all the results. NN then HMM, as per SignalP short format
128
- def all_results
129
- all = []
130
-
131
- @@nn_results.each do |sym|
132
- all.push self.send(sym)
133
- end
134
-
135
- @@hmm_results.each do |sym|
136
- all.push self.send(sym)
137
- end
138
-
139
- return all
140
- end
141
-
142
- # Return an array of symbols representing the names of the columns
143
- def self.all_result_names
144
- return [@@nn_results, @@hmm_results].flatten
145
- end
146
-
147
- # Return the number of the residue after the cleavage site
148
- # ie. the first residue of the mature protein
149
- # Taken from the Y score, as it was decided this is the best prediction
150
- def cleavage_site
151
- @nn_Ymax_position
152
- end
153
-
154
- # Given an amino acid sequence (as a string),
155
- # chop it off and return the remnants
156
- def cleave(sequence)
157
- if signal?
158
- return sequence[cleavage_site-1..sequence.length-1]
159
- else
160
- return sequence
161
- end
162
- end
163
- end
164
- end
165
- end