bio-signalp 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,17 @@
1
1
  = bio-signalp
2
2
 
3
- A wrapper for the signal peptide prediction algorithm SignalP.
3
+ A wrapper for the signal peptide prediction algorithm SignalP (version 3.0).
4
4
 
5
- Using this bio-gem requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP/ has instructions on how it may be downloaded. This gem requires that the signalp executable is available from the command line (i.e. running 'signalp' works at the command).
5
+ Using this bio-gem requires SignalP to be locally installed and configured correctly. http://www.cbs.dtu.dk/services/SignalP-3.0/ has instructions on how it may be downloaded. This gem requires that the signalp executable is available from the command line (i.e. running 'signalp' works at the command), so the steps to setup
6
6
 
7
- Only compatible with Ruby 1.8 at this stage, because of the dependency on the rio gem. This might be replaced in the future with using BioRuby's Bio::Command class.
7
+ # Download SignalP 3.0 and unpack
8
+ # Modify the signalp script in the unpacked directory
9
+ # Add the unpacked directory to your path
10
+ # install this bioruby plugin (gem install bioruby-signalp)
8
11
 
9
- This bio-gem is merely a software library, and does not (currently) include any scripts. An example script is freely available at https://github.com/wwood/bbbin/blob/master/signalp.rb
12
+ This bio-gem includes a library for interacting with signalp programmatically, but also (as of version 0.1.1) a script as well.
13
+
14
+ NOTE: Only tested with SignalP 3.0 at this point. Hopefully 4.0 will be tested in the future.
10
15
 
11
16
  == Contributing to bio-signalp
12
17
 
@@ -20,6 +25,6 @@ This bio-gem is merely a software library, and does not (currently) include any
20
25
 
21
26
  == Copyright
22
27
 
23
- Copyright (c) 2011 Ben J Woodcroft. See LICENSE.txt for
28
+ Copyright (c) 2011-2012 Ben J Woodcroft. See LICENSE.txt for
24
29
  further details.
25
30
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'bio'
5
+ require 'bio-signalp'
6
+
7
+ # if this was not called as a module, run as a script.
8
+ if $0 == __FILE__
9
+ require 'bio'
10
+ require 'optparse'
11
+
12
+ runner = Bio::SignalP::Wrapper.new
13
+
14
+ options = ARGV.getopts("sShvfF") #s for summary, no args required
15
+ if options['h']
16
+ $stderr.puts "Usage: signalp.rb [-svf] <my.fasta>"
17
+ $stderr.puts "Where my.fasta is the name of the fasta file you want to analyse. Default output is all the sequences with their signal sequences cleaved."
18
+ $stderr.puts "-s: summary: print a tab separated table indicating if the sequence had a signal peptide according to the HMM and NN results, respectively."
19
+ $stderr.puts "-S: bigger_summary: like -s, except also includes where the cleavage site is predicted"
20
+ $stderr.puts "-v: verbose summary: much like -s except more details of the prediction are predicted."
21
+ $stderr.puts "-f: filter in: print those sequences that have a signal peptide"
22
+ $stderr.puts "-F: filter out: print those sequences that don't have a signal peptide"
23
+ exit
24
+ end
25
+
26
+ # Print headers if required
27
+ if options['s']
28
+ puts [
29
+ 'Name',
30
+ 'NN Prediction',
31
+ 'HMM Prediction'
32
+ ].join("\t")
33
+ elsif options['S']
34
+ puts [
35
+ 'Name',
36
+ 'NN Prediction',
37
+ 'HMM Prediction',
38
+ 'Predicted?',
39
+ 'Cleavege site (if predicted)'
40
+ ].join("\t")
41
+
42
+ elsif options['v']
43
+ # [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
44
+ # :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
45
+ # :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
46
+ # :nn_Smean, :nn_Smean_prediction,
47
+ # :nn_D, :nn_D_prediction]
48
+ # @@hmm_results = [
49
+ # :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction, :hmm_Sprob, :hmm_Sprob_prediction]
50
+ puts [
51
+ 'Name',
52
+ 'NN Cmax',
53
+ 'NN Cmax position',
54
+ 'NN Cmax prediction',
55
+ 'NN Ymax',
56
+ 'NN Ymax position',
57
+ 'NN Ymax prediction',
58
+ 'NN Smax',
59
+ 'NN Smax position',
60
+ 'NN Smax prediction',
61
+ 'NN Smean',
62
+ 'NN Smean prediction',
63
+ 'NN D',
64
+ 'NN D prediction',
65
+ 'HMM result',
66
+ 'HMM Cmax',
67
+ 'HMM Cmax position',
68
+ 'HMM Cmax prediction',
69
+ 'HMM Sprob',
70
+ 'HMM Sprob prediction',
71
+ ].join("\t")
72
+ end
73
+
74
+ Bio::FlatFile.open(ARGV[0]).each do |seq|
75
+ result = runner.calculate(seq.seq)
76
+ if options['s']
77
+ puts [
78
+ seq.entry_id,
79
+ result.nn_D_prediction ? 'T' : 'F',
80
+ result.hmm_Sprob_prediction ? 'T' : 'F'
81
+ ].join("\t")
82
+ elsif options['S']
83
+ puts [
84
+ seq.entry_id,
85
+ result.nn_D_prediction ? 'T' : 'F',
86
+ result.hmm_Sprob_prediction ? 'T' : 'F',
87
+ result.signal? ? 'T' : 'F',
88
+ result.signal? ? result.cleavage_site : 0,
89
+ ].join("\t")
90
+ elsif options['v']
91
+ taputs = [seq.definition]
92
+ [:nn_Cmax, :nn_Cmax_position, :nn_Cmax_prediction,
93
+ :nn_Ymax, :nn_Ymax_position, :nn_Ymax_prediction,
94
+ :nn_Smax, :nn_Smax_position, :nn_Smax_prediction,
95
+ :nn_Smean, :nn_Smean_prediction,
96
+ :nn_D, :nn_D_prediction,
97
+ :hmm_result, :hmm_Cmax, :hmm_Cmax_position, :hmm_Cmax_prediction,
98
+ :hmm_Sprob, :hmm_Sprob_prediction].each do |meth|
99
+ taputs.push result.send(meth)
100
+ end
101
+ puts taputs.join("\t")
102
+ elsif options['f']
103
+ if result.signal?
104
+ puts seq
105
+ end
106
+ elsif options['F']
107
+ if !result.signal?
108
+ puts seq
109
+ end
110
+ else
111
+ puts ">#{seq.entry_id}\n#{result.cleave(seq.seq)}"
112
+ end
113
+ end
114
+ end
@@ -5,13 +5,15 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{bio-signalp}
8
- s.version = "0.1.0"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ben J Woodcroft"]
12
- s.date = %q{2011-07-25}
12
+ s.date = %q{2012-01-06}
13
+ s.default_executable = %q{signalp.rb}
13
14
  s.description = %q{A wrapper for the signal peptide prediction algorith SignalP. Not very well supported, but seems to work for the author, at least.}
14
15
  s.email = %q{donttrustben near gmail.com}
16
+ s.executables = ["signalp.rb"]
15
17
  s.extra_rdoc_files = [
16
18
  "LICENSE.txt",
17
19
  "README.rdoc"
@@ -24,6 +26,7 @@ Gem::Specification.new do |s|
24
26
  "README.rdoc",
25
27
  "Rakefile",
26
28
  "VERSION",
29
+ "bin/signalp.rb",
27
30
  "bio-signalp.gemspec",
28
31
  "lib/bio-signalp.rb",
29
32
  "lib/bio/appl/signalp.rb",
@@ -1,31 +1,41 @@
1
1
  # Methods to wrap around the signal peptide prediction program SignalP (version 3.0)
2
- require 'tempfile'
3
- require 'rubygems'
4
- require 'rio'
2
+ require 'open3'
5
3
 
6
4
  # Wrapper around a locally installed SignalP program
7
5
  module Bio
8
6
  class SignalP
9
- class Wrapper
7
+ NUM_FIELDS_IN_SHORT_OUTPUT = 21
8
+
9
+ class Wrapper
10
10
  # Given an amino acid sequence, return a SignalPResult
11
11
  # representing it taken from the file.
12
12
  def calculate(sequence)
13
- Tempfile.open('signalpin') { |tempfilein|
14
- # Write a fasta to the tempfile
15
- tempfilein.puts '>wrapperSeq'
16
- tempfilein.puts "#{sequence}"
17
- tempfilein.close #required. Maybe because it doesn't flush otherwise?
13
+ command = 'signalp -trunc 70 -format short -t euk'
14
+ Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
15
+ stdin.puts '>wrapperSeq'
16
+ stdin.puts "#{sequence}"
17
+ stdin.close
18
+
19
+ result = stdout.readlines
20
+ error = stderr.readlines
21
+
22
+ unless error.empty?
23
+ raise Exception, "There appears to be a problem while running signalp:\n#{error}"
24
+ end
18
25
 
19
- Tempfile.open('signalpout') {|out|
20
- result = system("signalp -trunc 70 -format short -t euk #{tempfilein.path} >#{out.path}")
21
-
22
- if !result
23
- raise Exception, "Running signalp program failed. $? is #{$!.inspect}"
24
- end
25
- line = rio(out.path)[2][0].strip
26
- return Result.create_from_line(line)
27
- }
28
- }
26
+ # Error checking
27
+ num_expected_result_lines = 3
28
+ unless result.length == num_expected_result_lines
29
+ raise Exception, "Unexpected number of lines found in SignalP output (#{result.length}, expected #{num_expected_result_lines}):\n#{result}"
30
+ end
31
+
32
+ splits = result[2].strip.split(/[ \t]+/)
33
+ if splits.length != NUM_FIELDS_IN_SHORT_OUTPUT
34
+ raise Exception, "Bad SignalP output line found. Are you using SignalP 3.0? (expected #{NUM_FIELDS_IN_SHORT_OUTPUT} fields, found #{splits.length} fields):\n#{result[2]}"
35
+ end
36
+
37
+ return Result.create_from_line(result[2].strip)
38
+ end
29
39
  end
30
40
  end
31
41
 
@@ -55,7 +65,7 @@ module Bio
55
65
  # # name Cmax pos ? Ymax pos ? Smax pos ? Smean ? D ? # name ! Cmax pos ? Sprob ?
56
66
  # 526.m04658 0.734 19 Y 0.686 19 Y 0.933 6 Y 0.760 Y 0.723 Y 526.m04658 Q 0.037 19 N 0.004 N
57
67
  matches = line.split(/[ \t]+/)
58
- if matches.length != 21
68
+ if matches.length != NUM_FIELDS_IN_SHORT_OUTPUT
59
69
  raise Exception, "Bad SignalP Short Line Found (#{matches.length}): '#{line}'"
60
70
  end
61
71
 
@@ -1,7 +1,17 @@
1
1
  require 'helper'
2
2
 
3
3
  class TestBioSignalp < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
4
+ should "positive control" do
5
+ # Known to have a signal peptide
6
+ acp_sequence = 'MKILLLCIIFLYYVNAFKNTQKDGVSLQILKKKRSNQVNFLNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
7
+
8
+ assert_equal true, Bio::SignalP::Wrapper.new.calculate(acp_sequence).signal?
9
+ end
10
+
11
+ should "negative control" do
12
+ # Known to have a signal peptide
13
+ non_signal_sequence = 'KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'
14
+
15
+ assert_equal false, Bio::SignalP::Wrapper.new.calculate(non_signal_sequence).signal?
6
16
  end
7
17
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-signalp
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
8
+ - 2
9
9
  - 0
10
- version: 0.1.0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ben J Woodcroft
@@ -15,8 +15,8 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-25 00:00:00 +10:00
19
- default_executable:
18
+ date: 2012-01-06 00:00:00 +11:00
19
+ default_executable: signalp.rb
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  type: :development
@@ -96,8 +96,8 @@ dependencies:
96
96
  prerelease: false
97
97
  description: A wrapper for the signal peptide prediction algorith SignalP. Not very well supported, but seems to work for the author, at least.
98
98
  email: donttrustben near gmail.com
99
- executables: []
100
-
99
+ executables:
100
+ - signalp.rb
101
101
  extensions: []
102
102
 
103
103
  extra_rdoc_files:
@@ -111,6 +111,7 @@ files:
111
111
  - README.rdoc
112
112
  - Rakefile
113
113
  - VERSION
114
+ - bin/signalp.rb
114
115
  - bio-signalp.gemspec
115
116
  - lib/bio-signalp.rb
116
117
  - lib/bio/appl/signalp.rb