bio-signalp 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +10 -5
- data/VERSION +1 -1
- data/bin/signalp.rb +114 -0
- data/bio-signalp.gemspec +5 -2
- data/lib/bio/appl/signalp.rb +30 -20
- data/test/test_bio-signalp.rb +12 -2
- metadata +8 -7
data/README.rdoc
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
= bio-signalp
|
2
2
|
|
3
|
-
A wrapper for the signal peptide prediction algorithm SignalP.
|
3
|
+
A wrapper for the signal peptide prediction algorithm SignalP (version 3.0).
|
4
4
|
|
5
|
-
Using this bio-gem requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP/ has instructions on how it may be downloaded. This gem requires that the signalp executable is available from the command line (i.e. running 'signalp' works at the command)
|
5
|
+
Using this bio-gem requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP-3.0/ has instructions on how it may be downloaded. This gem requires that the signalp executable is available from the command line (i.e. running 'signalp' works at the command), so the steps to setup
|
6
6
|
|
7
|
-
|
7
|
+
# Download SignalP 3.0 and unpack
|
8
|
+
# Modify the signalp script in the unpacked directory
|
9
|
+
# Add the unpacked directory to your path
|
10
|
+
# install this bioruby plugin (gem install bioruby-signalp)
|
8
11
|
|
9
|
-
This bio-gem
|
12
|
+
This bio-gem includes a library for interacting with signalp programmatically, but also (as of version 0.1.1) a script as well.
|
13
|
+
|
14
|
+
NOTE: Only tested with SignalP 3.0 at this point. Hopefully 4.0 will be tested in the future.
|
10
15
|
|
11
16
|
== Contributing to bio-signalp
|
12
17
|
|
@@ -20,6 +25,6 @@ This bio-gem is merely a software library, and does not (currently) include any
|
|
20
25
|
|
21
26
|
== Copyright
|
22
27
|
|
23
|
-
Copyright (c) 2011 Ben J Woodcroft. See LICENSE.txt for
|
28
|
+
Copyright (c) 2011-2012 Ben J Woodcroft. See LICENSE.txt for
|
24
29
|
further details.
|
25
30
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/bin/signalp.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio'
|
5
|
+
require 'bio-signalp'
|
6
|
+
|
7
|
+
# if this was not called as a module, run as a script.
|
8
|
+
if $0 == __FILE__
|
9
|
+
require 'bio'
|
10
|
+
require 'optparse'
|
11
|
+
|
12
|
+
runner = Bio::SignalP::Wrapper.new
|
13
|
+
|
14
|
+
options = ARGV.getopts("sShvfF") #s for summary, no args required
|
15
|
+
if options['h']
|
16
|
+
$stderr.puts "Usage: signalp.rb [-svf] <my.fasta>"
|
17
|
+
$stderr.puts "Where my.fasta is the name of the fasta file you want to analyse. Default output is all the sequences with their signal sequences cleaved."
|
18
|
+
$stderr.puts "-s: summary: print a tab separated table indicating if the sequence had a signal peptide according to the HMM and NN results, respectively."
|
19
|
+
$stderr.puts "-S: bigger_summary: like -s, except also includes where the cleavage site is predicted"
|
20
|
+
$stderr.puts "-v: verbose summary: much like -s except more details of the prediction are predicted."
|
21
|
+
$stderr.puts "-f: filter in: print those sequences that have a signal peptide"
|
22
|
+
$stderr.puts "-F: filter out: print those sequences that don't have a signal peptide"
|
23
|
+
exit
|
24
|
+
end
|
25
|
+
|
26
|
+
# Print headers if required
|
27
|
+
if options['s']
|
28
|
+
puts [
|
29
|
+
'Name',
|
30
|
+
'NN Prediction',
|
31
|
+
'HMM Prediction'
|
32
|
+
].join("\t")
|
33
|
+
elsif options['S']
|
34
|
+
puts [
|
35
|
+
'Name',
|
36
|
+
'NN Prediction',
|
37
|
+
'HMM Prediction',
|
38
|
+
'Predicted?',
|
39
|
+
'Cleavege site (if predicted)'
|
40
|
+
].join("\t")
|
41
|
+
|
42
|
+
elsif options['v']
|
43
|
+
# [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
|
44
|
+
# :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
|
45
|
+
# :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
46
|
+
# :nn_Smean, :nn_Smean_prediction,
|
47
|
+
# :nn_D, :nn_D_prediction]
|
48
|
+
# @@hmm_results = [
|
49
|
+
# :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
|
50
|
+
puts [
|
51
|
+
'Name',
|
52
|
+
'NN Cmax',
|
53
|
+
'NN Cmax position',
|
54
|
+
'NN Cmax prediction',
|
55
|
+
'NN Ymax',
|
56
|
+
'NN Ymax position',
|
57
|
+
'NN Ymax prediction',
|
58
|
+
'NN Smax',
|
59
|
+
'NN Smax position',
|
60
|
+
'NN Smax prediction',
|
61
|
+
'NN Smean',
|
62
|
+
'NN Smean prediction',
|
63
|
+
'NN D',
|
64
|
+
'NN D prediction',
|
65
|
+
'HMM result',
|
66
|
+
'HMM Cmax',
|
67
|
+
'HMM Cmax position',
|
68
|
+
'HMM Cmax prediction',
|
69
|
+
'HMM Sprob',
|
70
|
+
'HMM Sprob prediction',
|
71
|
+
].join("\t")
|
72
|
+
end
|
73
|
+
|
74
|
+
Bio::FlatFile.open(ARGV[0]).each do |seq|
|
75
|
+
result = runner.calculate(seq.seq)
|
76
|
+
if options['s']
|
77
|
+
puts [
|
78
|
+
seq.entry_id,
|
79
|
+
result.nn_D_prediction ? 'T' : 'F',
|
80
|
+
result.hmm_Sprob_prediction ? 'T' : 'F'
|
81
|
+
].join("\t")
|
82
|
+
elsif options['S']
|
83
|
+
puts [
|
84
|
+
seq.entry_id,
|
85
|
+
result.nn_D_prediction ? 'T' : 'F',
|
86
|
+
result.hmm_Sprob_prediction ? 'T' : 'F',
|
87
|
+
result.signal? ? 'T' : 'F',
|
88
|
+
result.signal? ? result.cleavage_site : 0,
|
89
|
+
].join("\t")
|
90
|
+
elsif options['v']
|
91
|
+
taputs = [seq.definition]
|
92
|
+
[:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
|
93
|
+
:nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
|
94
|
+
:nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
|
95
|
+
:nn_Smean, :nn_Smean_prediction,
|
96
|
+
:nn_D, :nn_D_prediction,
|
97
|
+
:hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction,
|
98
|
+
:hmm_Sprob, :hmm_Sprob_prediction].each do |meth|
|
99
|
+
taputs.push result.send(meth)
|
100
|
+
end
|
101
|
+
puts taputs.join("\t")
|
102
|
+
elsif options['f']
|
103
|
+
if result.signal?
|
104
|
+
puts seq
|
105
|
+
end
|
106
|
+
elsif options['F']
|
107
|
+
if !result.signal?
|
108
|
+
puts seq
|
109
|
+
end
|
110
|
+
else
|
111
|
+
puts ">#{seq.entry_id}\n#{result.cleave(seq.seq)}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
data/bio-signalp.gemspec
CHANGED
@@ -5,13 +5,15 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-signalp}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ben J Woodcroft"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2012-01-06}
|
13
|
+
s.default_executable = %q{signalp.rb}
|
13
14
|
s.description = %q{A wrapper for the signal peptide prediction algorith SignalP. Not very well supported, but seems to work for the author, at least.}
|
14
15
|
s.email = %q{donttrustben near gmail.com}
|
16
|
+
s.executables = ["signalp.rb"]
|
15
17
|
s.extra_rdoc_files = [
|
16
18
|
"LICENSE.txt",
|
17
19
|
"README.rdoc"
|
@@ -24,6 +26,7 @@ Gem::Specification.new do |s|
|
|
24
26
|
"README.rdoc",
|
25
27
|
"Rakefile",
|
26
28
|
"VERSION",
|
29
|
+
"bin/signalp.rb",
|
27
30
|
"bio-signalp.gemspec",
|
28
31
|
"lib/bio-signalp.rb",
|
29
32
|
"lib/bio/appl/signalp.rb",
|
data/lib/bio/appl/signalp.rb
CHANGED
@@ -1,31 +1,41 @@
|
|
1
1
|
# Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
|
2
|
-
require '
|
3
|
-
require 'rubygems'
|
4
|
-
require 'rio'
|
2
|
+
require 'open3'
|
5
3
|
|
6
4
|
# Wrapper around a locally installed SignalP program
|
7
5
|
module Bio
|
8
6
|
class SignalP
|
9
|
-
|
7
|
+
NUM_FIELDS_IN_SHORT_OUTPUT = 21
|
8
|
+
|
9
|
+
class Wrapper
|
10
10
|
# Given an amino acid sequence, return a SignalPResult
|
11
11
|
# representing it taken from the file.
|
12
12
|
def calculate(sequence)
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
command = 'signalp -trunc 70 -format short -t euk'
|
14
|
+
Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
|
15
|
+
stdin.puts '>wrapperSeq'
|
16
|
+
stdin.puts "#{sequence}"
|
17
|
+
stdin.close
|
18
|
+
|
19
|
+
result = stdout.readlines
|
20
|
+
error = stderr.readlines
|
21
|
+
|
22
|
+
unless error.empty?
|
23
|
+
raise Exception, "There appears to be a problem while running signalp:\n#{error}"
|
24
|
+
end
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
# Error checking
|
27
|
+
num_expected_result_lines = 3
|
28
|
+
unless result.length == num_expected_result_lines
|
29
|
+
raise Exception, "Unexpected number of lines found in SignalP output (#{result.length}, expected #{num_expected_result_lines}):\n#{result}"
|
30
|
+
end
|
31
|
+
|
32
|
+
splits = result[2].strip.split(/[ \t]+/)
|
33
|
+
if splits.length != NUM_FIELDS_IN_SHORT_OUTPUT
|
34
|
+
raise Exception, "Bad SignalP output line found. Are you using SignalP 3.0? (expected #{NUM_FIELDS_IN_SHORT_OUTPUT} fields, found #{splits.length} fields):\n#{result[2]}"
|
35
|
+
end
|
36
|
+
|
37
|
+
return Result.create_from_line(result[2].strip)
|
38
|
+
end
|
29
39
|
end
|
30
40
|
end
|
31
41
|
|
@@ -55,7 +65,7 @@ module Bio
|
|
55
65
|
# # name Cmax pos ? Ymax pos ? Smax pos ? Smean ? D ? # name ! Cmax pos ? Sprob ?
|
56
66
|
# 526.m04658 0.734 19 Y 0.686 19 Y 0.933 6 Y 0.760 Y 0.723 Y 526.m04658 Q 0.037 19 N 0.004 N
|
57
67
|
matches = line.split(/[ \t]+/)
|
58
|
-
if matches.length !=
|
68
|
+
if matches.length != NUM_FIELDS_IN_SHORT_OUTPUT
|
59
69
|
raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
|
60
70
|
end
|
61
71
|
|
data/test/test_bio-signalp.rb
CHANGED
@@ -1,7 +1,17 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class TestBioSignalp < Test::Unit::TestCase
|
4
|
-
should "
|
5
|
-
|
4
|
+
should "positive control" do
|
5
|
+
# Known to have a signal peptide
|
6
|
+
acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
|
7
|
+
|
8
|
+
assert_equal true, Bio::SignalP::Wrapper.new.calculate(acp_sequence).signal?
|
9
|
+
end
|
10
|
+
|
11
|
+
should "negative control" do
|
12
|
+
# Known to have a signal peptide
|
13
|
+
non_signal_sequence = 'KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'
|
14
|
+
|
15
|
+
assert_equal false, Bio::SignalP::Wrapper.new.calculate(non_signal_sequence).signal?
|
6
16
|
end
|
7
17
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-signalp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Ben J Woodcroft
|
@@ -15,8 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
default_executable:
|
18
|
+
date: 2012-01-06 00:00:00 +11:00
|
19
|
+
default_executable: signalp.rb
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
type: :development
|
@@ -96,8 +96,8 @@ dependencies:
|
|
96
96
|
prerelease: false
|
97
97
|
description: A wrapper for the signal peptide prediction algorith SignalP. Not very well supported, but seems to work for the author, at least.
|
98
98
|
email: donttrustben near gmail.com
|
99
|
-
executables:
|
100
|
-
|
99
|
+
executables:
|
100
|
+
- signalp.rb
|
101
101
|
extensions: []
|
102
102
|
|
103
103
|
extra_rdoc_files:
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- README.rdoc
|
112
112
|
- Rakefile
|
113
113
|
- VERSION
|
114
|
+
- bin/signalp.rb
|
114
115
|
- bio-signalp.gemspec
|
115
116
|
- lib/bio-signalp.rb
|
116
117
|
- lib/bio/appl/signalp.rb
|