bio-tm_hmm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "jeweler", "~> 1.8.3"
12
+ gem "bundler", ">= 1.0.21"
13
+ gem "bio", ">= 1.4.2"
14
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Ben J Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,56 @@
1
+ = bio-tm_hmm
2
+
3
+ {<img
4
+ src="https://secure.travis-ci.org/wwood/bioruby-tm_hmm.png"
5
+ />}[http://travis-ci.org/#!/wwood/bioruby-tm_hmm]
6
+
7
+ A bioruby plugin for running TMHMM automatically on multiple sequences in a FASTA file and manipulation of the results
8
+
9
+ Note: this software is under active development!
10
+
11
+ == Installation
12
+
13
+ gem install bio-tm_hmm
14
+
15
+ == Usage
16
+
17
+ bio-tm_hmm my.fasta
18
+
19
+ Where my.fasta is a FASTA file with one or more protein sequences in it. Output will be a description of the transmembrane domains predicted by TMHMM.
20
+
21
+ Other options include -f for printing out the fasta sequences that have some number of transmembrane domains in them, and ignoring those that done (converse is -g)
22
+
23
+ bio-tm_hmm -f 2 <my.fasta
24
+
25
+ == Developers
26
+
27
+ To use the library
28
+
29
+ require 'bio-tm_hmm'
30
+
31
+ The API doc is online. For more code examples see also the test files in
32
+ the source tree.
33
+
34
+ == Project home page
35
+
36
+ Information on the source tree, documentation, issues and how to contribute, see
37
+
38
+ http://github.com/wwood/bioruby-tm_hmm
39
+
40
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
41
+
42
+ == Cite
43
+
44
+ If you use this software, please cite one of
45
+
46
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
47
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
48
+
49
+ == Biogems.info
50
+
51
+ This Biogem is published at http://biogems.info/index.html#bio-tm_hmm
52
+
53
+ == Copyright
54
+
55
+ Copyright (c) 2012 Ben J Woodcroft. See LICENSE.txt for further details.
56
+
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-tm_hmm"
18
+ gem.homepage = "http://github.com/wwood/bioruby-tm_hmm"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A bioruby plugin for interaction with the transmembrane predictor TMHMM}
21
+ gem.description = %Q{A bioruby plugin for interaction with the transmembrane predictor TMHMM}
22
+ gem.email = "donttrustben@gmail.com"
23
+ gem.authors = ["Ben J. Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "bio-tm_hmm #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/bin/bio-tm_hmm ADDED
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Author:: Ben J. Woodcroft
4
+ # Copyright:: 2011
5
+ # License:: The Ruby License
6
+
7
+ require 'rubygems'
8
+ require 'bio'
9
+ require 'bio-tm_hmm'
10
+ require 'optparse'
11
+
12
+ # If being run directly instead of being require'd,
13
+ # output one transmembrane per line, and
14
+ # indicate that a particular protein has no transmembrane domain
15
+ options = {
16
+ :filter_in => false,
17
+ :filter_out => false,
18
+ }
19
+ o = OptionParser.new do |opts|
20
+ opts.banner = ['',
21
+ 'Usage: tm_hmm_wrapper.rb [-fg] [fasta_filename]','',
22
+ "\tfasta file can also be piped in on STDIN.",
23
+ "\twithout arguments, a description of the transmembranes is printed out for each input sequence",''
24
+ ].join("\n")
25
+ opts.on('-f','--filter-in [MIN_TRANSMEMBRANE_DOMAINS]','Print those sequences that have a transmembrane domain. If MIN_TRANSMEMBRANE_DOMAINS is defined, only those proteins with that many TMDs or more are printed out') do |m|
26
+ options[:filter_in] = m.to_i #gets set to 0 when optional MIN_TRANSMEMBRANE_DOMAINS is omitted
27
+ end
28
+ opts.on('-g','--filter-out [MIN_TRANSMEMBRANE_DOMAINS]','Print those sequences that do NOT have a transmembrane domain. If MIN_TRANSMEMBRANE_DOMAINS is defined, only those proteins with that many TMDs or more are filtered out') do |m|
29
+ options[:filter_out] = m.to_i #gets set to 0 when optional MIN_TRANSMEMBRANE_DOMAINS is omitted
30
+ end
31
+ end
32
+ o.parse!
33
+
34
+ runner = Bio::TMHMM::TmHmmWrapper.new
35
+
36
+ Bio::FlatFile.auto(ARGF).each do |seq|
37
+ result = runner.calculate(seq.seq)
38
+ name = seq.definition
39
+
40
+ # Default output - a description of the TMDs for each input aaseq
41
+ if options[:filter_in] == false and options[:filter_out] == false
42
+ if result.has_domain?
43
+ # At least one TMD found. Output each on a separate line
44
+ result.transmembrane_domains.each do |tmd|
45
+ puts [
46
+ name,
47
+ result.transmembrane_type,
48
+ tmd.start,
49
+ tmd.stop,
50
+ tmd.orientation
51
+ ].join("\t")
52
+ end
53
+ else
54
+ puts [
55
+ name,
56
+ 'No Transmembrane Domain Found'
57
+ ].join("\t")
58
+ end
59
+
60
+ elsif options[:filter_in] != false
61
+ if result.transmembrane_domains.length >= options[:filter_in]
62
+ puts seq
63
+ end
64
+ elsif options[:filter_out] != false
65
+ unless result.transmembrane_domains.length >= options[:filter_out]
66
+ puts seq
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,68 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bio-tm_hmm"
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Ben J. Woodcroft"]
12
+ s.date = "2012-05-03"
13
+ s.description = "A bioruby plugin for interaction with the transmembrane predictor TMHMM"
14
+ s.email = "donttrustben@gmail.com"
15
+ s.executables = ["bio-tm_hmm"]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".travis.yml",
23
+ "Gemfile",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/bio-tm_hmm",
29
+ "bio-tm_hmm.gemspec",
30
+ "lib/bio-tm_hmm.rb",
31
+ "lib/bio/appl/tmhmm/tmhmm_runner.rb",
32
+ "lib/bio/transmembrane.rb",
33
+ "test/data/falciparum1.fa",
34
+ "test/helper.rb",
35
+ "test/test_tm_hmm_wrapper.rb",
36
+ "test/test_transmembrane.rb"
37
+ ]
38
+ s.homepage = "http://github.com/wwood/bioruby-tm_hmm"
39
+ s.licenses = ["MIT"]
40
+ s.require_paths = ["lib"]
41
+ s.rubygems_version = "1.8.21"
42
+ s.summary = "A bioruby plugin for interaction with the transmembrane predictor TMHMM"
43
+
44
+ if s.respond_to? :specification_version then
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
49
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
50
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
51
+ s.add_development_dependency(%q<bundler>, [">= 1.0.21"])
52
+ s.add_development_dependency(%q<bio>, [">= 1.4.2"])
53
+ else
54
+ s.add_dependency(%q<shoulda>, [">= 0"])
55
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
56
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
57
+ s.add_dependency(%q<bundler>, [">= 1.0.21"])
58
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<shoulda>, [">= 0"])
62
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
63
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
64
+ s.add_dependency(%q<bundler>, [">= 1.0.21"])
65
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
66
+ end
67
+ end
68
+
@@ -0,0 +1,91 @@
1
+ require 'tempfile'
2
+
3
+ module Bio
4
+ class TMHMM
5
+ class TmHmmWrapper
6
+ # Given an amino acid sequence, return a TransmembraneProtein
7
+ # made up of the predicted transmembrane domains
8
+ def calculate(sequence)
9
+ Bio::Command.mktmpdir do |d|
10
+ line = nil
11
+ Bio::Command.call_command(['tmhmm','-short'], :chdir => d) do |io|
12
+ io.puts '>wrapperSeq'
13
+ io.puts sequence
14
+ io.close_write
15
+ line = io.readline
16
+ end
17
+
18
+ if line.nil?
19
+ raise Exception, "Error running locally installed TMHMM program 'tmhmm'. Is it properly installed?"
20
+ end
21
+
22
+ return TmHmmResult.create_from_short_line(line)
23
+ end
24
+ end
25
+ end
26
+
27
+ class TmHmmResult
28
+ attr_reader :domains
29
+
30
+ # initialise with the output line of a
31
+ # eg.
32
+ #PFF0290w len=293 ExpAA=145.77 First60=20.51 PredHel=7 Topology=o39-61i101-120o140-162i169-186o196-218i230-252o262-284i
33
+ def self.create_from_short_line(line)
34
+ protein = Bio::Transmembrane::OrientedTransmembraneDomainProtein.new
35
+
36
+ splits = line.strip.split("\t")
37
+ if splits.length != 6
38
+ raise Exception, "Incorrectly parsed short line from TMHMM: #{line}"
39
+ end
40
+
41
+ substrate = splits[5]
42
+ if substrate.gsub!(/^Topology\=[io]/,'').nil?
43
+ raise Exception, "Badly parsed Topology hit: #{substrate}"
44
+ end
45
+
46
+ matches = substrate.match('^(\d+?)\-')
47
+ if !matches
48
+ return protein #no transmembrane domains predicted
49
+ end
50
+
51
+ # eat the string from the beginning adding the transmembrane domains
52
+ prev = matches[1]
53
+ substrate.gsub!(/^(\d+?)-/,'')
54
+ # match all the middle bits
55
+ reg = /^(\d+?)([io])(\d+?)\-/
56
+ while matches =substrate.match(reg)
57
+ tmd = Bio::Transmembrane::OrientedTransmembraneDomain.new
58
+ tmd.start = prev.to_i
59
+ tmd.stop = matches[1].to_i
60
+ tmd.orientation = parse_orientation_from_last_location(matches[2])
61
+ protein.push tmd
62
+
63
+ prev = matches[3]
64
+ substrate.gsub!(reg, '')
65
+ end
66
+ #match the last bit
67
+ if !(matches = substrate.match('(\d+?)([io])$'))
68
+ raise Exception, "Failed to parse the last bit of: #{substrate}"
69
+ end
70
+ tmd = Bio::Transmembrane::OrientedTransmembraneDomain.new
71
+ tmd.start = prev.to_i
72
+ tmd.stop = matches[1].to_i
73
+ tmd.orientation = parse_orientation_from_last_location(matches[2])
74
+ protein.push tmd
75
+
76
+ return protein
77
+ end
78
+
79
+ def self.parse_orientation_from_last_location(last_location)
80
+ case last_location
81
+ when 'i'
82
+ return Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN
83
+ when 'o'
84
+ return Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT
85
+ else
86
+ raise Exception, "Badly parsed topology hit due to orientation character: #{substrate}"
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,222 @@
1
+
2
+ # Monkey-patch Array#pair in
3
+ class Array
4
+ # Return an array of all pairs of elements from this array (each is an array).
5
+ # If another_array is not nil, then do pairwise between this array and that (but not within each)
6
+ #
7
+ # NOT thread safe.
8
+ def pairs(another_array = nil)
9
+ pairs = []
10
+
11
+ if another_array #between this array and the next
12
+ (0..length-1).each do |index1|
13
+ (0..another_array.length-1).each do |index2|
14
+ pairs.push [self[index1], another_array[index2]]
15
+ end
16
+ end
17
+ else # within this array only
18
+ (0..length-1).each do |index1|
19
+ index2 = index1+1
20
+ while index2 < length
21
+ pairs.push [self[index1], self[index2]]
22
+ index2 += 1
23
+ end
24
+ end
25
+ end
26
+
27
+ return pairs
28
+ end
29
+ end
30
+
31
+
32
+
33
+ # a simple class to represent a transmembrane domain, and a transmembrane domain containing protein
34
+ module Bio
35
+ module Transmembrane
36
+ class TransmembraneProtein
37
+ attr_accessor :transmembrane_domains, :name
38
+ include Enumerable #so each, each_with_index, etc. work
39
+
40
+ def initialize
41
+ # default no domains to empty array not nil
42
+ @transmembrane_domains = []
43
+ end
44
+
45
+ def push(transmembrane_domain)
46
+ @transmembrane_domains.push transmembrane_domain
47
+ end
48
+
49
+ def average_length
50
+ @transmembrane_domains.inject(0){|sum,cur| sum+cur.length}.to_f/@transmembrane_domains.length.to_f
51
+ end
52
+
53
+ def minimum_length
54
+ @transmembrane_domains.min.length
55
+ end
56
+
57
+ def maximum_length
58
+ @transmembrane_domains.max.length
59
+ end
60
+
61
+ def has_domain?
62
+ !@transmembrane_domains.empty?
63
+ end
64
+
65
+ def multiple_transmembrane_domains?
66
+ @transmembrane_domains.length > 1
67
+ end
68
+
69
+ def overlaps(another_transmembrane_protein)
70
+ @transmembrane_domains.pairs(another_transmembrane_protein.transmembrane_domains).collect {|t1,t2|
71
+ t1.intersection(t2) == () ? nil : [t1,t2]
72
+ }.reject {|a| a.nil?}
73
+ end
74
+
75
+ # return the pair of transmembrane domains that overlaps the best (ie for the longest period)
76
+ def best_overlap(another_transmembrane_protein)
77
+ max = @transmembrane_domains.pairs(another_transmembrane_protein.transmembrane_domains).collect {|t1,t2|
78
+ [t1.overlap_length(t2), [t1,t2]]
79
+ }.max {|a,b| a[0] <=> b[0]}
80
+ max[0] == 0 ? nil : max[1]
81
+ end
82
+
83
+ def each
84
+ @transmembrane_domains.each{|t| yield t}
85
+ end
86
+
87
+ def residue_number_contained?(residue_number)
88
+ contained = false
89
+ @transmembrane_domains.each do |tmd|
90
+ if tmd.start <= residue_number and tmd.stop >= residue_number
91
+ contained = true
92
+ end
93
+ end
94
+ contained
95
+ end
96
+ end
97
+
98
+ class OrientedTransmembraneDomainProtein<TransmembraneProtein
99
+ def transmembrane_type_1?
100
+ @transmembrane_domains and @transmembrane_domains.length == 1 and @transmembrane_domains[0].orientation == OrientedTransmembraneDomain::OUTSIDE_IN
101
+ end
102
+
103
+ def transmembrane_type_2?
104
+ @transmembrane_domains and @transmembrane_domains.length == 1 and @transmembrane_domains[0].orientation == OrientedTransmembraneDomain::INSIDE_OUT
105
+ end
106
+
107
+ def transmembrane_type
108
+ if transmembrane_type_1?
109
+ return 'I'
110
+ elsif transmembrane_type_2?
111
+ return 'II'
112
+ else
113
+ return 'Unknown'
114
+ end
115
+ end
116
+ end
117
+
118
+ class TransmembraneDomainDefinition
119
+ attr_accessor :start, :stop
120
+
121
+ # A new TMD. The length is stop-start+1, so start and stop are
122
+ # 'inclusive'
123
+ def initialize(start=nil, stop=nil)
124
+ @start = start
125
+ @stop = stop
126
+ end
127
+
128
+ def length
129
+ @stop-@start+1
130
+ end
131
+
132
+ def <=>(other)
133
+ length <=> other.length
134
+ end
135
+
136
+ def ==(other)
137
+ start == other.start and
138
+ stop == other.stop
139
+ end
140
+
141
+ def sequence(protein_sequence_string, nterm_offset=0, cterm_offset=0)
142
+ one = start+nterm_offset-1
143
+ one = 0 if one < 0
144
+ two = stop+cterm_offset-1
145
+ two = 0 if two < 0
146
+
147
+ protein_sequence_string[(one)..(two)]
148
+ end
149
+
150
+ # Return the number of amino acids that overlap with another
151
+ # transmembrane domain, or 0 if none are found
152
+ def overlap_length(another_transmembrane_domain_defintion)
153
+ intersection(another_transmembrane_domain_defintion).to_a.length
154
+ end
155
+
156
+ # Return a range representing the overlap of this transmembrane domain
157
+ # with another
158
+ #
159
+ # Code inspired by http://billsiggelkow.com/2008/8/29/ruby-range-intersection
160
+ def intersection(another_transmembrane_domain_defintion)
161
+ res = (@start..@stop).to_a & (another_transmembrane_domain_defintion.start..another_transmembrane_domain_defintion.stop).to_a
162
+ res.empty? ? nil : (res.first..res.last)
163
+ end
164
+ alias_method(:overlap, :intersection)
165
+ end
166
+
167
+ class ConfidencedTransmembraneDomain<TransmembraneDomainDefinition
168
+ attr_accessor :confidence
169
+
170
+ def <=>(other)
171
+ return start<=>other.start if start<=>other.start
172
+ return stop<=>other.start if stop<=>other.stop
173
+ return confidence <=> other.confidence
174
+ end
175
+
176
+ def ==(other)
177
+ start == other.start and
178
+ stop == other.stop and
179
+ confidence == other.confidence
180
+ end
181
+ end
182
+
183
+ # From Predicting transmembrane protein topology with a hidden markov model: application to complete g
184
+ #
185
+ # Another signal shown to be associated with transmembrane helices is
186
+ # the abundance of positively charged residues in the part of the
187
+ # sequence on the cytoplasmic side of the membrane, “the positive inside
188
+ # rule” (von Heijne 1986) and (von Heijne 1994).
189
+ #
190
+ # So, inside means cytosolic. So outside_in means type I transmembrane domain protein
191
+ class OrientedTransmembraneDomain<TransmembraneDomainDefinition
192
+ # The orientation can either be inside out (like a type II transmembrane domain protein)
193
+ INSIDE_OUT = 'inside_out'
194
+ # Or outside in, like a type I transmembrane domain protein)
195
+ OUTSIDE_IN = 'outside_in'
196
+ # or the whole protein is TMD, so orientation is unknown
197
+ UNKNOWN = 'unknown'
198
+
199
+ attr_accessor :orientation
200
+
201
+ def initialize(start=nil, stop=nil, orientation=nil)
202
+ @start = start.to_i unless start.nil?
203
+ @stop = stop.to_i unless stop.nil?
204
+ @orientation = orientation unless orientation.nil?
205
+ end
206
+ end
207
+
208
+ # A class to represent a protein with a signal peptide and a transmembrane
209
+ # domain
210
+ class SignalPeptideTransmembraneDomainProtein<OrientedTransmembraneDomainProtein
211
+ attr_accessor :signal_peptide
212
+
213
+ def signal?
214
+ !@signal_peptide.nil?
215
+ end
216
+ end
217
+
218
+ class SignalPeptide
219
+ attr_accessor :start, :stop
220
+ end
221
+ end
222
+ end
data/lib/bio-tm_hmm.rb ADDED
@@ -0,0 +1,4 @@
1
+ # General transmembrane domain related data structures
2
+ require 'bio/transmembrane'
3
+ # TMHMM extras
4
+ require 'bio/appl/tmhmm/tmhmm_runner'
@@ -0,0 +1,38 @@
1
+ >Plasmodium_falciparum_3D7|MAL8|PF08_0142|Annotation|Plasmodium_falciparum_Sanger_Stanford_TIGR|(protein coding) erythrocyte membrane protein 1 (PfEMP1)
2
+ MAAAGGGGKDKYKNAQDAKHLLDIIGEDIYKIANDAALKRSGSELKGLLSLAKFEKNPPD
3
+ KQTPEDPCDLDYKYHTNVTSNVIEPCNKRSGKRFSEVSGAECANNRIKGNKGSNGDACAP
4
+ FRRLHVCDRNLEQIDPAKITATHNLLVDVCQAAKFEGQSITQDYPKYLATYNDSPSQICT
5
+ MLARSFADIGDIVRGKDLFRGYDDEEKNRRKKLEQKLKVIFGHIYEELKKHKKLKEEAEE
6
+ RYKKDGDNYYKLREDWWALNRQEIWKAITCGHPGGTYFRQTACGGGTTPTPNKCRCATND
7
+ VPTYFDYVPQYLRWFEEWAEDFCRKRKYKLENAIEKCRGKTKGEKYCDLNGFDCTQTASG
8
+ EKKFVKGHNCHNCSVTCIPFGPWIDNQKKEFLKQRNKYQNEISSNSRKKRSTSNNNYKGY
9
+ DEEFYKILKEDYGDVEQFLEKLSREGICQSQPTVGNQKADAANFTKDNPAKTFSHTEYCQ
10
+ ACPWCGVVCKSGNCTKNPEGSCTEQIRKKVYDDSNTTTIPVLTPEKGKTSILQKYKTFCE
11
+ KPEKHNQINNWECHYEKTDISNNCILGKWEKFQKGQEVMVYHPFFWKWVTEMLDDSIKWR
12
+ KELDNCLKNENKQCISKCNGKCDCYKRWVEQKKEKEWTQIKDHFGKQEDMKEQIRGADPG
13
+ IILEGVLDIEDLFENIKDTYGDVKEIDHIKKLLEEETTVDADNQNKTTIDKLLDHEDKDA
14
+ KGCLQKQNECKEQERDGGARSDSQEPTPRSEVKPDSEDLDDDDEDDPDEEKSEEVENPED
15
+ QGEEEGTKQGSGEKKVDGTEAVQETVAEVTPEKKDEVNPCEIVKTLFEKPENLSDACGLK
16
+ YGPGGKERYSQWKCIPTKPNSDNKGEVGSAGRVARSAPSGEKGSICVPPRRRRLYVKDLE
17
+ TLGDSEVTQVQLRDAFIKCAAVETFFLWDRYKKEKEKKKPQEGVLQLLGTVGTPPTDDEE
18
+ DPPEKMLQKGEIPEEFKRQMFYTLADYKDILFGDQEVIKTLKDSGDENIKDISEKIKKTL
19
+ NGDNNQESGSSPSLSGKKTTPKDWWETYGKDIWEGMVCALTYKNSGDKKIEQVKTADDGE
20
+ DLFQKLKTQYEYNTVTLKDENSGTEGAKPFTPKTVSSSSGEKNPPKLSDFVLRPPYFRYL
21
+ EEWGETFCRQRARMLDKIKKDCNVEENDNRPGGGITKQYSGDGESCKDYLPDDPTTLPDL
22
+ VSSCPKSCSSYRKWINKKKDEFVEQQNAYTEQQNKCQSKSDKAKSDNGFYTRLQNLPDAA
23
+ AFLKTLGSCSKNDIPEYKIDFDVNGETFRYEKYCGTCPEFKINCTKVKCTSGDMQNGCKD
24
+ NKINAANFKTMAQSTEINMLVSDNSGNGSQNDLKDCKTSGIFKGFREDVWTCGKVCGYNV
25
+ CKPKNVNGQNGDGNQILLFNALLKRWVEYFLEDYKKIKHKISHCKNSSEGHTCIKNCVEQ
26
+ WISTKRTEWETIRGRFNDQYKSNDSDVYPVRSFLETWIPKIPVANANNDGKKLIKLSKFD
27
+ NFCSCSASAHSPNGKDDAIDCMINRLQDKIDKCKEKHPQPSAENQTTCDESTLVEDVDDY
28
+ EEQNPENKVGKPAICGNVDTTEPVKEEDEEECKAAESPAEPEQAAEEESVPAAETKDTEN
29
+ QPPQAPDVGPPPLAPAPADQPLDPTILQTTIPFGIAIALTSIVFLFLKKKTKSTIDLLRV
30
+ INIPKSDYDIPTKLSPNRYIPYTSGKYRGKRYIYLEGDSGTDSGYTDHYSDITSSSESEY
31
+ EELDINDIYVPRAPKYKTLIEVVLEPSGNNTTASGNNTTASDTQNDIQNDIPSDIPKTPS
32
+ DTPPPITDDEWNQLKDEFISQYLQSEQPKDVPNDYSSGDIPLNTQPNTLYFDKPQEKPFI
33
+ TSIHDRNLYSGEEYNYDMFNSGKNGPYSDKNDLYSGNHDSLSGNRDPTSANHDSYSGNHH
34
+ PYSGIDLINDSISGNQHIDIYDELLKRKENELFGTNNPKRTSTYSVAKLTNSDPIHNQLE
35
+ LFHTWLDRHRDMCEKWENHHERLAKLKEEWENETHSGNTHPSDSNKTLNTDVSIQIDMDH
36
+ EKRMKEFTNMDTILEDLDKPFNEPYYYDMYDDDIYYDVNDHDTSTVDSNAMDVPSKVQIE
37
+ MDVNTKLVKEKYPIGDVWDI
38
+
data/test/helper.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ require 'bio-tm_hmm'
15
+
16
+ class Test::Unit::TestCase
17
+ end
@@ -0,0 +1,65 @@
1
+ require 'bio'
2
+ require 'helper'
3
+
4
+ class TmHmmWrapperTest < Test::Unit::TestCase
5
+ include Bio::Transmembrane
6
+
7
+ def test_parser
8
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0635c len=555 ExpAA=0.00 First60=0.00 PredHel=0 Topology=o')
9
+ assert result
10
+ assert_equal false, result.has_domain?
11
+
12
+ # test a single TMD
13
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0685c len=324 ExpAA=20.36 First60=0.00 PredHel=1 Topology=o281-303i')
14
+ assert result
15
+ assert_equal 1, result.transmembrane_domains.length
16
+ assert_equal 281, result.transmembrane_domains[0].start
17
+ assert_equal 303, result.transmembrane_domains[0].stop
18
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
19
+ result.transmembrane_domains[0].orientation
20
+ assert result.transmembrane_type_1?
21
+ assert_equal false, result.transmembrane_type_2?
22
+
23
+ # test 2 TMD
24
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0680c len=209 ExpAA=43.03 First60=0.02 PredHel=2 Topology=i137-159o164-183i')
25
+ assert result
26
+ assert_equal 2, result.transmembrane_domains.length
27
+ assert_equal 137, result.transmembrane_domains[0].start
28
+ assert_equal 159, result.transmembrane_domains[0].stop
29
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT,
30
+ result.transmembrane_domains[0].orientation
31
+ assert_equal 164, result.transmembrane_domains[1].start
32
+ assert_equal 183, result.transmembrane_domains[1].stop
33
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
34
+ result.transmembrane_domains[1].orientation
35
+ assert_equal false, result.transmembrane_type_1?
36
+ assert_equal false, result.transmembrane_type_2?
37
+
38
+ # test 3 TMD
39
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0705c len=282 ExpAA=90.97 First60=22.20 PredHel=4 Topology=i22-44o185-207i212-234o259-281i')
40
+ assert result
41
+ assert_equal 4, result.transmembrane_domains.length
42
+ assert_equal 22, result.transmembrane_domains[0].start
43
+ assert_equal 44, result.transmembrane_domains[0].stop
44
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT,
45
+ result.transmembrane_domains[0].orientation
46
+ assert_equal 185, result.transmembrane_domains[1].start
47
+ assert_equal 207, result.transmembrane_domains[1].stop
48
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
49
+ result.transmembrane_domains[1].orientation
50
+ assert_equal 259, result.transmembrane_domains[3].start
51
+ assert_equal 281, result.transmembrane_domains[3].stop
52
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
53
+ result.transmembrane_domains[3].orientation
54
+ assert_equal false, result.transmembrane_type_1?
55
+ assert_equal false, result.transmembrane_type_2?
56
+ end
57
+
58
+ def test_wrapper
59
+ prog = Bio::TMHMM::TmHmmWrapper.new
60
+ seq = Bio::FlatFile.auto(File.join(File.dirname(__FILE__),'data','falciparum1.fa')).next_entry
61
+ tmp = prog.calculate(seq.seq)
62
+ assert tmp
63
+ assert_equal false, tmp.has_domain?
64
+ end
65
+ end
@@ -0,0 +1,146 @@
1
+ require 'helper'
2
+
3
+ module Transmembrane
4
+ class TransmembraneTest < Test::Unit::TestCase
5
+ include Bio::Transmembrane
6
+
7
+ def test_confidenced_transmembrane_domain
8
+ one = ConfidencedTransmembraneDomain.new
9
+ two = ConfidencedTransmembraneDomain.new
10
+ assert_equal one, two
11
+ end
12
+
13
+ def test_sequence_offsets
14
+ aaseq = 'AAAAAANG' #8 aa long
15
+ d = TransmembraneDomainDefinition.new
16
+ d.start = 6
17
+ d.stop = 8
18
+ assert_equal 'ANG', d.sequence(aaseq)
19
+
20
+ assert_equal 'AANG', d.sequence('AAAAAANG', -1, 0)
21
+ assert_equal 'AANG', d.sequence('AAAAAANG', -1, 1) #overhang
22
+ assert_equal 'AAN', d.sequence('AAAAAANG', -1, -1) #overhang over the cterm
23
+
24
+ d.start = 1
25
+ d.stop = 5
26
+ assert_equal 'AAAAA', d.sequence('AAAAAANG', -2, 0) #overhang over the nterm
27
+ assert_equal 'AAAAAANG', d.sequence('AAAAAANG', -2, 15) #overhang over the nterm and cterm
28
+ end
29
+ end
30
+
31
+ class TransmembraneProteinTest < Test::Unit::TestCase
32
+ include Bio::Transmembrane
33
+ def test_simple
34
+ protein = TransmembraneProtein.new
35
+ tmd = TransmembraneDomainDefinition.new
36
+ tmd.start = 8
37
+ tmd.stop = 9
38
+ protein.push tmd
39
+
40
+ tmd = TransmembraneDomainDefinition.new
41
+ tmd.start = 8
42
+ tmd.stop = 10
43
+ protein.push tmd
44
+
45
+ assert_equal 2, protein.minimum_length
46
+ assert_equal 2.5, protein.average_length
47
+ end
48
+
49
+ def test_empty
50
+ protein = TransmembraneProtein.new
51
+ assert protein.transmembrane_domains.empty?
52
+ assert_equal false, protein.has_domain?
53
+ end
54
+
55
+ def test_overlaps
56
+ p1 = TransmembraneProtein.new
57
+ p2 = TransmembraneProtein.new
58
+ tmd1 = TransmembraneDomainDefinition.new(5,10)
59
+ tmd2 = TransmembraneDomainDefinition.new(5,6)
60
+ p1.transmembrane_domains = [tmd1]
61
+ p2.transmembrane_domains = [tmd2]
62
+ assert_equal [[tmd1, tmd2]], p1.overlaps(p2)
63
+
64
+ p2.transmembrane_domains = [tmd1,tmd2]
65
+ assert_equal [[tmd1, tmd1],[tmd1, tmd2]], p1.overlaps(p2)
66
+
67
+ tmd3 = TransmembraneDomainDefinition.new(500,600)
68
+ p2.transmembrane_domains = [tmd3]
69
+ assert_equal [], p1.overlaps(p2)
70
+ end
71
+
72
+ def test_best_overlap
73
+ p1 = TransmembraneProtein.new
74
+ p2 = TransmembraneProtein.new
75
+ tmd1 = TransmembraneDomainDefinition.new(5,10)
76
+ tmd2 = TransmembraneDomainDefinition.new(5,6)
77
+ tmd3 = TransmembraneDomainDefinition.new(11,22)
78
+ p1.transmembrane_domains = [tmd1]
79
+ p2.transmembrane_domains = [tmd2]
80
+ assert_equal [tmd1, tmd2], p1.best_overlap(p2)
81
+
82
+ p2.transmembrane_domains = [tmd1,tmd2]
83
+ assert_equal [tmd1, tmd1], p1.best_overlap(p2)
84
+
85
+ p2.transmembrane_domains = [tmd3]
86
+ assert_equal nil, p1.best_overlap(p2)
87
+ end
88
+
89
+ def test_each
90
+ expected = [10,6,22]
91
+ p = TransmembraneProtein.new
92
+ tmd1 = TransmembraneDomainDefinition.new(5,10)
93
+ tmd2 = TransmembraneDomainDefinition.new(5,6)
94
+ tmd3 = TransmembraneDomainDefinition.new(11,22)
95
+ p.transmembrane_domains = [tmd1,tmd2,tmd3]
96
+ p.each_with_index do |tmd, i|
97
+ assert_equal expected[i], tmd.stop
98
+ end
99
+ end
100
+ end
101
+
102
+ class TransmembraneProteinTest < Test::Unit::TestCase
103
+ def test_overlap_tmd
104
+ tmd1 = TransmembraneDomainDefinition.new(3,6)
105
+ tmd2 = TransmembraneDomainDefinition.new(3,6)
106
+ assert_equal((3..6), tmd1.intersection(tmd2))
107
+ assert_equal 4, tmd1.overlap_length(tmd2)
108
+
109
+ tmd2 = TransmembraneDomainDefinition.new(7,8)
110
+ assert_equal 0, tmd1.overlap_length(tmd2)
111
+
112
+ tmd2 = TransmembraneDomainDefinition.new(5,8)
113
+ assert_equal 2, tmd1.overlap_length(tmd2)
114
+
115
+ tmd2 = TransmembraneDomainDefinition.new(1,3)
116
+ assert_equal 1, tmd1.overlap_length(tmd2)
117
+ assert_equal((3..3), tmd1.intersection(tmd2))
118
+ end
119
+
120
+ def test_residue_contained?
121
+ p = TransmembraneProtein.new
122
+
123
+ # test none
124
+ p.transmembrane_domains = []
125
+ assert_equal false, p.residue_number_contained?(5)
126
+
127
+ # test one
128
+ p.transmembrane_domains = [TransmembraneDomainDefinition.new(5,8)]
129
+ assert p.residue_number_contained?(5)
130
+ assert p.residue_number_contained?(6)
131
+ assert p.residue_number_contained?(8)
132
+ assert_equal false, p.residue_number_contained?(4)
133
+ assert_equal false, p.residue_number_contained?(9)
134
+
135
+ # test 3
136
+ p.transmembrane_domains = [
137
+ TransmembraneDomainDefinition.new(1,10),
138
+ TransmembraneDomainDefinition.new(90,100),
139
+ TransmembraneDomainDefinition.new(16,24),
140
+ ]
141
+ assert p.residue_number_contained?(5)
142
+ assert p.residue_number_contained?(95)
143
+ assert_equal false, p.residue_number_contained?(150)
144
+ end
145
+ end
146
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-tm_hmm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ben J. Woodcroft
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: shoulda
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '3.12'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '3.12'
46
+ - !ruby/object:Gem::Dependency
47
+ name: jeweler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.8.3
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.8.3
62
+ - !ruby/object:Gem::Dependency
63
+ name: bundler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.0.21
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 1.0.21
78
+ - !ruby/object:Gem::Dependency
79
+ name: bio
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 1.4.2
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.4.2
94
+ description: A bioruby plugin for interaction with the transmembrane predictor TMHMM
95
+ email: donttrustben@gmail.com
96
+ executables:
97
+ - bio-tm_hmm
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - LICENSE.txt
101
+ - README.rdoc
102
+ files:
103
+ - .document
104
+ - .travis.yml
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.rdoc
108
+ - Rakefile
109
+ - VERSION
110
+ - bin/bio-tm_hmm
111
+ - bio-tm_hmm.gemspec
112
+ - lib/bio-tm_hmm.rb
113
+ - lib/bio/appl/tmhmm/tmhmm_runner.rb
114
+ - lib/bio/transmembrane.rb
115
+ - test/data/falciparum1.fa
116
+ - test/helper.rb
117
+ - test/test_tm_hmm_wrapper.rb
118
+ - test/test_transmembrane.rb
119
+ homepage: http://github.com/wwood/bioruby-tm_hmm
120
+ licenses:
121
+ - MIT
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ segments:
133
+ - 0
134
+ hash: 1028203441
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 1.8.21
144
+ signing_key:
145
+ specification_version: 3
146
+ summary: A bioruby plugin for interaction with the transmembrane predictor TMHMM
147
+ test_files: []