bio-plasmoap 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'bio-signalp', '>=0.2.0'
4
+
5
+ # Add dependencies to develop your gem here.
6
+ # Include everything needed to run rake, tests, features, etc.
7
+ group :development do
8
+ gem "shoulda", ">= 0"
9
+ gem "bundler", "~> 1.0.0"
10
+ gem "jeweler", "~> 1.6.4"
11
+ gem "rcov", ">= 0"
12
+ gem "bio", ">= 1.4.1"
13
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Ben J Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = bio-plasmoap
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to bio-plasmoap
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 Ben J Woodcroft. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-plasmoap"
18
+ gem.homepage = "http://github.com/wwood/bioruby-plasmoap"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Implementation of the PlasmoAP algorithm}
21
+ gem.description = %Q{The PlasmoAP algorithm (Foth and Ralph et. al. 2003) predicts apicoplast transit peptides in Plasmodium falciparum. This is an implementation of that algorithm.}
22
+ gem.email = "gmail.com after donttrustben"
23
+ gem.authors = ["Ben J Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "bio-plasmoap #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/bin/plasmoap.rb ADDED
@@ -0,0 +1,26 @@
1
+
2
+ require 'bio-plasmoap'
3
+
4
+ if $0 == __FILE__
5
+ runner = Bio::PlasmoAP.new
6
+
7
+ # print out a list of proteins with yes/no answers
8
+ puts [
9
+ 'Name',
10
+ 'PlasmoAP Score',
11
+ 'Apicoplast Targeted',
12
+ 'Points'
13
+ ].join("\t")
14
+
15
+ Bio::FlatFile.auto(ARGF).each do |seq|
16
+ result = runner.calculate_score(seq.seq)
17
+ to_print = [seq.definition, result.to_s]
18
+ if result.apicoplast_targeted?
19
+ to_print.push 1
20
+ else
21
+ to_print.push 0
22
+ end
23
+ to_print.push result.points
24
+ puts to_print.join("\t")
25
+ end
26
+ end
@@ -0,0 +1 @@
1
+ require 'bio/appl/plasmoap'
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'bio'
4
+ require 'bio-signalp'
5
+
6
+ module Bio
7
+ # PlasmoAP is a program for predicting apicoplast targetting sequences
8
+ # in Plasmodium falciparum (the most fatal causative agent of malaria).
9
+ # This algorithm was designed based on what was outlined in the
10
+ # PlasmoAP journal article
11
+ #
12
+ # Dissecting apicoplast targeting in the malaria parasite Plasmodium falciparum.
13
+ # Foth BJ, Ralph SA, Tonkin CJ, Struck NS, Fraunholz M, Roos DS, Cowman AF, McFadden GI.
14
+ # Science. 2003 Jan 31;299(5607):705-8.
15
+ # PMID: 12560551
16
+ class PlasmoAP
17
+ # Calculate the PlasmoAP score for a sequence (a string of amino acids)
18
+ # sequence - the amino acids to test on
19
+ # has_signal_sequence - Define if it has a signal sequence or not. The default
20
+ # nil specifies that it should be worked out by SignalP.
21
+ def calculate_score(sequence, has_signal_sequence = nil, signalp_cleaved_sequence = nil)
22
+ # Only calculate signal sequence if it isn't already set by the parameter
23
+ if has_signal_sequence.nil?
24
+ # to_s means the sequence can be amino acid string or proper Bio::Sequence::AA object
25
+ signalp = Bio::SignalP::Wrapper.new.calculate(sequence.to_s)
26
+ has_signal_sequence = signalp.classical_signal_sequence?
27
+
28
+ signalp_cleaved_sequence = signalp.cleave(sequence)
29
+ elsif signalp_cleaved_sequence.nil?
30
+ raise ArgumentError, "if the has_signal_sequence parameter is defined, then so must the signalp_cleaved_sequence be as well"
31
+ end
32
+
33
+ return PlasmoAPResult.new(0) if !has_signal_sequence #Both set of rules need a signal peptide
34
+
35
+ cleaved = Bio::Sequence::AA.new(signalp_cleaved_sequence)
36
+
37
+ set1 = set1?(cleaved)
38
+ set2 = set2?(cleaved)
39
+ additional = additional?(cleaved)
40
+
41
+ points = 0
42
+ points += 2 if set1
43
+ points += 2 if set2
44
+ points += 1 if additional
45
+ return PlasmoAPResult.new(points)
46
+ end
47
+
48
+ private
49
+
50
+ # Set 1: a sequence is considered ‘positive’ if i) it starts with a signal peptide, ii) the 15
51
+ #amino acids following the predicted signal peptide do not contain more than 2 acidic
52
+ #residues, iii) the 80 amino acids following the predicted signal peptide cleavage site
53
+ #contain a stretch of 40 amino acids with a total of at least 9 asparagines and/or lysines,
54
+ #and iv) the asparagine/lysine-enriched region has a ratio of basic to acidic residues of at
55
+ #least 5 to 3.
56
+ def set1?(cleaved_amino_acids)
57
+ set1 = false
58
+ return false if cleaved_amino_acids.length <= 15
59
+ aa = Bio::Sequence::AA.new(cleaved_amino_acids[0..14])
60
+ if acidic_count(aa) <= 2 # ii)
61
+ aa = Bio::Sequence::AA.new(cleaved_amino_acids[15..15+80-1])
62
+ containing = nil
63
+ # iii) contain a stretch of 40 amino acids with a total of at least 9 asparagines and/or lysines
64
+ aa.window_search(40) do |window|
65
+ if !containing #only the first one is needed
66
+ comp = window.composition
67
+ if comp['N']+comp['K'] >= 9
68
+ containing = window
69
+ end
70
+ end
71
+ end
72
+
73
+ if containing
74
+ if basic_count(containing).to_f / acidic_count(containing).to_f >= 5.0/3.0 # iv)
75
+ set1 = true
76
+ end
77
+ end
78
+ end
79
+
80
+ return set1
81
+ end
82
+
83
+
84
+ # Set 2: a sequence is considered ‘positive’ if it i) starts with a signal peptide, ii) if the 22
85
+ #amino acids following the predicted signal peptide cleavage site exhibit a ratio of basic to
86
+ #acidic residues of at least 10 to 7, iii) if the 80 amino acids following the predicted signal
87
+ #peptide cleavage site contain a stretch of 40 amino acids with a total of at least 9
88
+ #asparagines and/or lysines, and iv) if the asparagine/lysine-enriched region has a ratio of
89
+ #basic to acidic residues of at least 10 to 9.
90
+ def set2?(cleaved_amino_acids)
91
+ set2 = false
92
+ return false if cleaved_amino_acids.length <= 21
93
+ aa = Bio::Sequence::AA.new(cleaved_amino_acids[0..21])
94
+ if basic_count(aa).to_f / acidic_count(aa).to_f >= 10.0/7.0 #ii)
95
+
96
+ # iii) if the 80 amino acids following the predicted signal
97
+ #peptide cleavage site contain a stretch of 40 amino acids with a total of at least 9
98
+ #asparagines and/or lysines
99
+ aa = Bio::Sequence::AA.new(cleaved_amino_acids[22..22+80-1])
100
+ containing = nil
101
+ aa.window_search(40) do |window|
102
+ if !containing #only the first one is needed
103
+ comp = window.composition
104
+ if comp['N']+comp['K'] >= 9
105
+ containing = window
106
+ end
107
+ end
108
+ end
109
+
110
+ if containing
111
+ if basic_count(containing).to_f / acidic_count(containing).to_f >= 10.0/9.0 # iv)
112
+ set2 = true
113
+ end
114
+ end
115
+ end
116
+ return set2
117
+ end
118
+
119
+
120
+ # Additional point
121
+ # basic => nil for none, true for basic, false for acidic
122
+ def additional?(cleaved_amino_acids)
123
+ cleaved_amino_acids.window_search(1) do |aa|
124
+ if basic_count(aa) == 1
125
+ return true
126
+ elsif acidic_count(aa) == 1
127
+ return false
128
+ end
129
+ end
130
+ return nil
131
+ end
132
+
133
+ private
134
+ ACIDIC_AMINO_ACIDS = %w(D E)
135
+ BASIC_AMINO_ACIDS = %w(H K R)
136
+
137
+ # Return the number of bases considered acidic in this amino acid sequence in neutral conditions.
138
+ # Amino acids considered acidic are D and E. Input is a Bio::Sequence::AA object
139
+ def acidic_count(amino_acid_sequence)
140
+ count = 0
141
+ comp = amino_acid_sequence.composition
142
+ ACIDIC_AMINO_ACIDS.each do |acidic|
143
+ if comp[acidic]
144
+ count += comp[acidic]
145
+ end
146
+ end
147
+ return count
148
+ end
149
+
150
+ # Return the number of bases considered basic in this amino acid sequence in neutral conditions.
151
+ # Amino acids considered acidic are H, K and R. Input is a Bio::Sequence::AA object
152
+ def basic_count(amino_acid_sequence)
153
+ count = 0
154
+ comp = amino_acid_sequence.composition
155
+ BASIC_AMINO_ACIDS.each do |basic|
156
+ if comp[basic]
157
+ count += comp[basic]
158
+ end
159
+ end
160
+ return count
161
+ end
162
+
163
+
164
+ end # End class PlasmoAP
165
+
166
+
167
+
168
+ class PlasmoAPResult
169
+ attr_reader :points
170
+ def initialize(points)
171
+ @points = points
172
+ raise Exception, "Bad PlasmoAP Score points: #{points}" if points < 0 or points > 5
173
+ end
174
+
175
+ def to_s
176
+ case @points
177
+ when 0..2
178
+ return '-'
179
+ when 3
180
+ return '0'
181
+ when 4
182
+ return '+'
183
+ when 5
184
+ return '++'
185
+ end
186
+ end
187
+
188
+ def ==(another)
189
+ @points == another.points
190
+ end
191
+
192
+ # '+' or '++' scores were taken as apicoplast targeted in the paper
193
+ # does this result pass that test?
194
+ def apicoplast_targeted?
195
+ @points >= 4
196
+ end
197
+
198
+ alias_method :predicted?, :apicoplast_targeted?
199
+ alias_method :signal?, :apicoplast_targeted?
200
+ end
201
+ end # End module Bio
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-plasmoap'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,119 @@
1
+ require 'helper'
2
+
3
+ class TestBioPlasmoap < Test::Unit::TestCase
4
+ def setup
5
+ # constructs are taken from the Science paper supplementary material.
6
+ # Constructs are made using ACP as the base amino acid sequence - the constructs deviate from that
7
+ signal = 'MKILLLCIIFLYYVNA'
8
+ transit = 'FKNTQKDGVSLQILKKKRSNQVNF'
9
+ rest = 'LNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
10
+
11
+ @constructs = []
12
+ @names = []
13
+
14
+ # base
15
+ @names.push 'base'
16
+ @constructs.push signal+transit+rest
17
+
18
+ # no signal
19
+ @names.push 'no signal peptide'
20
+ @constructs.push 'M'+transit+rest
21
+
22
+ # not transit
23
+ @names.push 'no transit peptide'
24
+ @constructs.push signal+rest
25
+
26
+ # 2 A changes
27
+ @names.push '2 A changes'
28
+ myt = transit.clone
29
+ myt[1] = 'A'
30
+ myt[5] = 'A'
31
+ @constructs.push signal+myt+rest
32
+
33
+ # 1 E change
34
+ @names.push '1 E change'
35
+ myt = transit.clone
36
+ myt[5] = 'E'
37
+ @constructs.push signal+myt+rest
38
+
39
+ # another 1 E changed
40
+ @names.push 'another 1 E change'
41
+ myt = transit.clone
42
+ myt[1] = 'E'
43
+ @constructs.push signal+myt+rest
44
+
45
+
46
+ # 2 E changed
47
+ @names.push '2 E change'
48
+ myt = transit.clone
49
+ myt[1] = 'E'
50
+ myt[5] = 'E'
51
+ @constructs.push signal+myt+rest
52
+
53
+ # 2 D changed
54
+ @names.push '2 D change'
55
+ myt = transit.clone
56
+ myt[1] = 'D'
57
+ myt[5] = 'D'
58
+ @constructs.push signal+myt+rest
59
+
60
+ # 3 A changed
61
+ @names.push '3 A change'
62
+ myt = transit.clone
63
+ myt[10] = 'A'
64
+ myt[12] = 'A'
65
+ myt[13] = 'A'
66
+ @constructs.push signal+myt+rest
67
+
68
+ @construct_points = [
69
+ 5,
70
+ 0,
71
+ 1,
72
+ 4,
73
+ 5,
74
+ 4,
75
+ 0,
76
+ 0,
77
+ 5
78
+ ]
79
+
80
+ @construct_strings = [
81
+ '++',
82
+ '-',
83
+ '-',
84
+ '+',
85
+ '++',
86
+ '+',
87
+ '-',
88
+ '-',
89
+ '++'
90
+ ]
91
+
92
+ @plasmoap = Bio::PlasmoAP.new
93
+ end
94
+
95
+ def test_official_paper_constructs
96
+ @constructs.each_with_index do |construct, i|
97
+ obj = @plasmoap.calculate_score(construct)
98
+ assert_kind_of Bio::PlasmoAPResult, obj
99
+ assert_equal @construct_points[i], obj.points, "#{@names[i]}: #{i}: #{construct}"
100
+ assert_equal @construct_strings[i], obj.to_s, i
101
+ end
102
+ end
103
+
104
+ def test_short
105
+ obj = @plasmoap.calculate_score('MMM')
106
+ assert_equal 0, obj.points
107
+ end
108
+
109
+ def test_define_signal
110
+ signal = 'MKILLLCIIFLYYVNA'
111
+ transit = 'FKNTQKDGVSLQILKKKRSNQVNF'
112
+ rest = 'LNRKNDYNLIKNKNPSSSLKSTFDDIKKIISKQLSVEEDKIQMNSNFTKDLGADSLDLVELIMALEEKFNVTISDQDALKINTVQDAIDYIEKNNKQ'
113
+ assert_raise ArgumentError do
114
+ @plasmoap.calculate_score(signal+transit+rest, false)
115
+ end
116
+ assert_equal 0, @plasmoap.calculate_score(signal+transit+rest, false, signal+transit+rest).points
117
+ assert_equal 5, @plasmoap.calculate_score(signal+transit+rest, true, transit+rest).points
118
+ end
119
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-plasmoap
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ben J Woodcroft
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-06 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio-signalp
16
+ requirement: &69055880 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.2.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *69055880
25
+ - !ruby/object:Gem::Dependency
26
+ name: shoulda
27
+ requirement: &68875950 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *68875950
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &68874120 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *68874120
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &68872090 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.4
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *68872090
58
+ - !ruby/object:Gem::Dependency
59
+ name: rcov
60
+ requirement: &68870680 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *68870680
69
+ - !ruby/object:Gem::Dependency
70
+ name: bio
71
+ requirement: &68869790 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: 1.4.1
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *68869790
80
+ description: The PlasmoAP algorithm (Foth and Ralph et. al. 2003) predicts apicoplast
81
+ transit peptides in Plasmodium falciparum. This is an implementation of that algorithm.
82
+ email: gmail.com after donttrustben
83
+ executables:
84
+ - plasmoap.rb
85
+ extensions: []
86
+ extra_rdoc_files:
87
+ - LICENSE.txt
88
+ - README.rdoc
89
+ files:
90
+ - .document
91
+ - Gemfile
92
+ - LICENSE.txt
93
+ - README.rdoc
94
+ - Rakefile
95
+ - VERSION
96
+ - bin/plasmoap.rb
97
+ - lib/bio-plasmoap.rb
98
+ - lib/bio/appl/plasmoap.rb
99
+ - test/helper.rb
100
+ - test/test_bio-plasmoap.rb
101
+ homepage: http://github.com/wwood/bioruby-plasmoap
102
+ licenses:
103
+ - MIT
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ segments:
115
+ - 0
116
+ hash: -824018541
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ! '>='
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 1.8.6
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Implementation of the PlasmoAP algorithm
129
+ test_files: []