bio-tm_hmm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "jeweler", "~> 1.8.3"
12
+ gem "bundler", ">= 1.0.21"
13
+ gem "bio", ">= 1.4.2"
14
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Ben J Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,56 @@
1
+ = bio-tm_hmm
2
+
3
+ {<img
4
+ src="https://secure.travis-ci.org/wwood/bioruby-tm_hmm.png"
5
+ />}[http://travis-ci.org/#!/wwood/bioruby-tm_hmm]
6
+
7
+ A bioruby plugin for running TMHMM automatically on multiple sequences in a FASTA file and manipulation of the results
8
+
9
+ Note: this software is under active development!
10
+
11
+ == Installation
12
+
13
+ gem install bio-tm_hmm
14
+
15
+ == Usage
16
+
17
+ bio-tm_hmm my.fasta
18
+
19
+ Where my.fasta is a FASTA file with one or more protein sequences in it. Output will be a description of the transmembrane domains predicted by TMHMM.
20
+
21
+ Other options include -f for printing out the fasta sequences that have some number of transmembrane domains in them, and ignoring those that done (converse is -g)
22
+
23
+ bio-tm_hmm -f 2 <my.fasta
24
+
25
+ == Developers
26
+
27
+ To use the library
28
+
29
+ require 'bio-tm_hmm'
30
+
31
+ The API doc is online. For more code examples see also the test files in
32
+ the source tree.
33
+
34
+ == Project home page
35
+
36
+ Information on the source tree, documentation, issues and how to contribute, see
37
+
38
+ http://github.com/wwood/bioruby-tm_hmm
39
+
40
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
41
+
42
+ == Cite
43
+
44
+ If you use this software, please cite one of
45
+
46
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
47
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
48
+
49
+ == Biogems.info
50
+
51
+ This Biogem is published at http://biogems.info/index.html#bio-tm_hmm
52
+
53
+ == Copyright
54
+
55
+ Copyright (c) 2012 Ben J Woodcroft. See LICENSE.txt for further details.
56
+
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-tm_hmm"
18
+ gem.homepage = "http://github.com/wwood/bioruby-tm_hmm"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A bioruby plugin for interaction with the transmembrane predictor TMHMM}
21
+ gem.description = %Q{A bioruby plugin for interaction with the transmembrane predictor TMHMM}
22
+ gem.email = "donttrustben@gmail.com"
23
+ gem.authors = ["Ben J. Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "bio-tm_hmm #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/bin/bio-tm_hmm ADDED
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Author:: Ben J. Woodcroft
4
+ # Copyright:: 2011
5
+ # License:: The Ruby License
6
+
7
+ require 'rubygems'
8
+ require 'bio'
9
+ require 'bio-tm_hmm'
10
+ require 'optparse'
11
+
12
+ # If being run directly instead of being require'd,
13
+ # output one transmembrane per line, and
14
+ # indicate that a particular protein has no transmembrane domain
15
+ options = {
16
+ :filter_in => false,
17
+ :filter_out => false,
18
+ }
19
+ o = OptionParser.new do |opts|
20
+ opts.banner = ['',
21
+ 'Usage: tm_hmm_wrapper.rb [-fg] [fasta_filename]','',
22
+ "\tfasta file can also be piped in on STDIN.",
23
+ "\twithout arguments, a description of the transmembranes is printed out for each input sequence",''
24
+ ].join("\n")
25
+ opts.on('-f','--filter-in [MIN_TRANSMEMBRANE_DOMAINS]','Print those sequences that have a transmembrane domain. If MIN_TRANSMEMBRANE_DOMAINS is defined, only those proteins with that many TMDs or more are printed out') do |m|
26
+ options[:filter_in] = m.to_i #gets set to 0 when optional MIN_TRANSMEMBRANE_DOMAINS is omitted
27
+ end
28
+ opts.on('-g','--filter-out [MIN_TRANSMEMBRANE_DOMAINS]','Print those sequences that do NOT have a transmembrane domain. If MIN_TRANSMEMBRANE_DOMAINS is defined, only those proteins with that many TMDs or more are filtered out') do |m|
29
+ options[:filter_out] = m.to_i #gets set to 0 when optional MIN_TRANSMEMBRANE_DOMAINS is omitted
30
+ end
31
+ end
32
+ o.parse!
33
+
34
+ runner = Bio::TMHMM::TmHmmWrapper.new
35
+
36
+ Bio::FlatFile.auto(ARGF).each do |seq|
37
+ result = runner.calculate(seq.seq)
38
+ name = seq.definition
39
+
40
+ # Default output - a description of the TMDs for each input aaseq
41
+ if options[:filter_in] == false and options[:filter_out] == false
42
+ if result.has_domain?
43
+ # At least one TMD found. Output each on a separate line
44
+ result.transmembrane_domains.each do |tmd|
45
+ puts [
46
+ name,
47
+ result.transmembrane_type,
48
+ tmd.start,
49
+ tmd.stop,
50
+ tmd.orientation
51
+ ].join("\t")
52
+ end
53
+ else
54
+ puts [
55
+ name,
56
+ 'No Transmembrane Domain Found'
57
+ ].join("\t")
58
+ end
59
+
60
+ elsif options[:filter_in] != false
61
+ if result.transmembrane_domains.length >= options[:filter_in]
62
+ puts seq
63
+ end
64
+ elsif options[:filter_out] != false
65
+ unless result.transmembrane_domains.length >= options[:filter_out]
66
+ puts seq
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,68 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bio-tm_hmm"
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Ben J. Woodcroft"]
12
+ s.date = "2012-05-03"
13
+ s.description = "A bioruby plugin for interaction with the transmembrane predictor TMHMM"
14
+ s.email = "donttrustben@gmail.com"
15
+ s.executables = ["bio-tm_hmm"]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".travis.yml",
23
+ "Gemfile",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/bio-tm_hmm",
29
+ "bio-tm_hmm.gemspec",
30
+ "lib/bio-tm_hmm.rb",
31
+ "lib/bio/appl/tmhmm/tmhmm_runner.rb",
32
+ "lib/bio/transmembrane.rb",
33
+ "test/data/falciparum1.fa",
34
+ "test/helper.rb",
35
+ "test/test_tm_hmm_wrapper.rb",
36
+ "test/test_transmembrane.rb"
37
+ ]
38
+ s.homepage = "http://github.com/wwood/bioruby-tm_hmm"
39
+ s.licenses = ["MIT"]
40
+ s.require_paths = ["lib"]
41
+ s.rubygems_version = "1.8.21"
42
+ s.summary = "A bioruby plugin for interaction with the transmembrane predictor TMHMM"
43
+
44
+ if s.respond_to? :specification_version then
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
49
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
50
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
51
+ s.add_development_dependency(%q<bundler>, [">= 1.0.21"])
52
+ s.add_development_dependency(%q<bio>, [">= 1.4.2"])
53
+ else
54
+ s.add_dependency(%q<shoulda>, [">= 0"])
55
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
56
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
57
+ s.add_dependency(%q<bundler>, [">= 1.0.21"])
58
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<shoulda>, [">= 0"])
62
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
63
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
64
+ s.add_dependency(%q<bundler>, [">= 1.0.21"])
65
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
66
+ end
67
+ end
68
+
@@ -0,0 +1,91 @@
1
+ require 'tempfile'
2
+
3
+ module Bio
4
+ class TMHMM
5
+ class TmHmmWrapper
6
+ # Given an amino acid sequence, return a TransmembraneProtein
7
+ # made up of the predicted transmembrane domains
8
+ def calculate(sequence)
9
+ Bio::Command.mktmpdir do |d|
10
+ line = nil
11
+ Bio::Command.call_command(['tmhmm','-short'], :chdir => d) do |io|
12
+ io.puts '>wrapperSeq'
13
+ io.puts sequence
14
+ io.close_write
15
+ line = io.readline
16
+ end
17
+
18
+ if line.nil?
19
+ raise Exception, "Error running locally installed TMHMM program 'tmhmm'. Is it properly installed?"
20
+ end
21
+
22
+ return TmHmmResult.create_from_short_line(line)
23
+ end
24
+ end
25
+ end
26
+
27
+ class TmHmmResult
28
+ attr_reader :domains
29
+
30
+ # initialise with the output line of a
31
+ # eg.
32
+ #PFF0290w len=293 ExpAA=145.77 First60=20.51 PredHel=7 Topology=o39-61i101-120o140-162i169-186o196-218i230-252o262-284i
33
+ def self.create_from_short_line(line)
34
+ protein = Bio::Transmembrane::OrientedTransmembraneDomainProtein.new
35
+
36
+ splits = line.strip.split("\t")
37
+ if splits.length != 6
38
+ raise Exception, "Incorrectly parsed short line from TMHMM: #{line}"
39
+ end
40
+
41
+ substrate = splits[5]
42
+ if substrate.gsub!(/^Topology\=[io]/,'').nil?
43
+ raise Exception, "Badly parsed Topology hit: #{substrate}"
44
+ end
45
+
46
+ matches = substrate.match('^(\d+?)\-')
47
+ if !matches
48
+ return protein #no transmembrane domains predicted
49
+ end
50
+
51
+ # eat the string from the beginning adding the transmembrane domains
52
+ prev = matches[1]
53
+ substrate.gsub!(/^(\d+?)-/,'')
54
+ # match all the middle bits
55
+ reg = /^(\d+?)([io])(\d+?)\-/
56
+ while matches =substrate.match(reg)
57
+ tmd = Bio::Transmembrane::OrientedTransmembraneDomain.new
58
+ tmd.start = prev.to_i
59
+ tmd.stop = matches[1].to_i
60
+ tmd.orientation = parse_orientation_from_last_location(matches[2])
61
+ protein.push tmd
62
+
63
+ prev = matches[3]
64
+ substrate.gsub!(reg, '')
65
+ end
66
+ #match the last bit
67
+ if !(matches = substrate.match('(\d+?)([io])$'))
68
+ raise Exception, "Failed to parse the last bit of: #{substrate}"
69
+ end
70
+ tmd = Bio::Transmembrane::OrientedTransmembraneDomain.new
71
+ tmd.start = prev.to_i
72
+ tmd.stop = matches[1].to_i
73
+ tmd.orientation = parse_orientation_from_last_location(matches[2])
74
+ protein.push tmd
75
+
76
+ return protein
77
+ end
78
+
79
+ def self.parse_orientation_from_last_location(last_location)
80
+ case last_location
81
+ when 'i'
82
+ return Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN
83
+ when 'o'
84
+ return Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT
85
+ else
86
+ raise Exception, "Badly parsed topology hit due to orientation character: #{substrate}"
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,222 @@
1
+
2
+ # Monkey-patch Array#pair in
3
+ class Array
4
+ # Return an array of all pairs of elements from this array (each is an array).
5
+ # If another_array is not nil, then do pairwise between this array and that (but not within each)
6
+ #
7
+ # NOT thread safe.
8
+ def pairs(another_array = nil)
9
+ pairs = []
10
+
11
+ if another_array #between this array and the next
12
+ (0..length-1).each do |index1|
13
+ (0..another_array.length-1).each do |index2|
14
+ pairs.push [self[index1], another_array[index2]]
15
+ end
16
+ end
17
+ else # within this array only
18
+ (0..length-1).each do |index1|
19
+ index2 = index1+1
20
+ while index2 < length
21
+ pairs.push [self[index1], self[index2]]
22
+ index2 += 1
23
+ end
24
+ end
25
+ end
26
+
27
+ return pairs
28
+ end
29
+ end
30
+
31
+
32
+
33
+ # a simple class to represent a transmembrane domain, and a transmembrane domain containing protein
34
+ module Bio
35
+ module Transmembrane
36
+ class TransmembraneProtein
37
+ attr_accessor :transmembrane_domains, :name
38
+ include Enumerable #so each, each_with_index, etc. work
39
+
40
+ def initialize
41
+ # default no domains to empty array not nil
42
+ @transmembrane_domains = []
43
+ end
44
+
45
+ def push(transmembrane_domain)
46
+ @transmembrane_domains.push transmembrane_domain
47
+ end
48
+
49
+ def average_length
50
+ @transmembrane_domains.inject(0){|sum,cur| sum+cur.length}.to_f/@transmembrane_domains.length.to_f
51
+ end
52
+
53
+ def minimum_length
54
+ @transmembrane_domains.min.length
55
+ end
56
+
57
+ def maximum_length
58
+ @transmembrane_domains.max.length
59
+ end
60
+
61
+ def has_domain?
62
+ !@transmembrane_domains.empty?
63
+ end
64
+
65
+ def multiple_transmembrane_domains?
66
+ @transmembrane_domains.length > 1
67
+ end
68
+
69
+ def overlaps(another_transmembrane_protein)
70
+ @transmembrane_domains.pairs(another_transmembrane_protein.transmembrane_domains).collect {|t1,t2|
71
+ t1.intersection(t2) == () ? nil : [t1,t2]
72
+ }.reject {|a| a.nil?}
73
+ end
74
+
75
+ # return the pair of transmembrane domains that overlaps the best (ie for the longest period)
76
+ def best_overlap(another_transmembrane_protein)
77
+ max = @transmembrane_domains.pairs(another_transmembrane_protein.transmembrane_domains).collect {|t1,t2|
78
+ [t1.overlap_length(t2), [t1,t2]]
79
+ }.max {|a,b| a[0] <=> b[0]}
80
+ max[0] == 0 ? nil : max[1]
81
+ end
82
+
83
+ def each
84
+ @transmembrane_domains.each{|t| yield t}
85
+ end
86
+
87
+ def residue_number_contained?(residue_number)
88
+ contained = false
89
+ @transmembrane_domains.each do |tmd|
90
+ if tmd.start <= residue_number and tmd.stop >= residue_number
91
+ contained = true
92
+ end
93
+ end
94
+ contained
95
+ end
96
+ end
97
+
98
+ class OrientedTransmembraneDomainProtein<TransmembraneProtein
99
+ def transmembrane_type_1?
100
+ @transmembrane_domains and @transmembrane_domains.length == 1 and @transmembrane_domains[0].orientation == OrientedTransmembraneDomain::OUTSIDE_IN
101
+ end
102
+
103
+ def transmembrane_type_2?
104
+ @transmembrane_domains and @transmembrane_domains.length == 1 and @transmembrane_domains[0].orientation == OrientedTransmembraneDomain::INSIDE_OUT
105
+ end
106
+
107
+ def transmembrane_type
108
+ if transmembrane_type_1?
109
+ return 'I'
110
+ elsif transmembrane_type_2?
111
+ return 'II'
112
+ else
113
+ return 'Unknown'
114
+ end
115
+ end
116
+ end
117
+
118
+ class TransmembraneDomainDefinition
119
+ attr_accessor :start, :stop
120
+
121
+ # A new TMD. The length is stop-start+1, so start and stop are
122
+ # 'inclusive'
123
+ def initialize(start=nil, stop=nil)
124
+ @start = start
125
+ @stop = stop
126
+ end
127
+
128
+ def length
129
+ @stop-@start+1
130
+ end
131
+
132
+ def <=>(other)
133
+ length <=> other.length
134
+ end
135
+
136
+ def ==(other)
137
+ start == other.start and
138
+ stop == other.stop
139
+ end
140
+
141
+ def sequence(protein_sequence_string, nterm_offset=0, cterm_offset=0)
142
+ one = start+nterm_offset-1
143
+ one = 0 if one < 0
144
+ two = stop+cterm_offset-1
145
+ two = 0 if two < 0
146
+
147
+ protein_sequence_string[(one)..(two)]
148
+ end
149
+
150
+ # Return the number of amino acids that overlap with another
151
+ # transmembrane domain, or 0 if none are found
152
+ def overlap_length(another_transmembrane_domain_defintion)
153
+ intersection(another_transmembrane_domain_defintion).to_a.length
154
+ end
155
+
156
+ # Return a range representing the overlap of this transmembrane domain
157
+ # with another
158
+ #
159
+ # Code inspired by http://billsiggelkow.com/2008/8/29/ruby-range-intersection
160
+ def intersection(another_transmembrane_domain_defintion)
161
+ res = (@start..@stop).to_a & (another_transmembrane_domain_defintion.start..another_transmembrane_domain_defintion.stop).to_a
162
+ res.empty? ? nil : (res.first..res.last)
163
+ end
164
+ alias_method(:overlap, :intersection)
165
+ end
166
+
167
+ class ConfidencedTransmembraneDomain<TransmembraneDomainDefinition
168
+ attr_accessor :confidence
169
+
170
+ def <=>(other)
171
+ return start<=>other.start if start<=>other.start
172
+ return stop<=>other.start if stop<=>other.stop
173
+ return confidence <=> other.confidence
174
+ end
175
+
176
+ def ==(other)
177
+ start == other.start and
178
+ stop == other.stop and
179
+ confidence == other.confidence
180
+ end
181
+ end
182
+
183
+ # From Predicting transmembrane protein topology with a hidden markov model: application to complete g
184
+ #
185
+ # Another signal shown to be associated with transmembrane helices is
186
+ # the abundance of positively charged residues in the part of the
187
+ # sequence on the cytoplasmic side of the membrane, “the positive inside
188
+ # rule” (von Heijne 1986) and (von Heijne 1994).
189
+ #
190
+ # So, inside means cytosolic. So outside_in means type I transmembrane domain protein
191
+ class OrientedTransmembraneDomain<TransmembraneDomainDefinition
192
+ # The orientation can either be inside out (like a type II transmembrane domain protein)
193
+ INSIDE_OUT = 'inside_out'
194
+ # Or outside in, like a type I transmembrane domain protein)
195
+ OUTSIDE_IN = 'outside_in'
196
+ # or the whole protein is TMD, so orientation is unknown
197
+ UNKNOWN = 'unknown'
198
+
199
+ attr_accessor :orientation
200
+
201
+ def initialize(start=nil, stop=nil, orientation=nil)
202
+ @start = start.to_i unless start.nil?
203
+ @stop = stop.to_i unless stop.nil?
204
+ @orientation = orientation unless orientation.nil?
205
+ end
206
+ end
207
+
208
+ # A class to represent a protein with a signal peptide and a transmembrane
209
+ # domain
210
+ class SignalPeptideTransmembraneDomainProtein<OrientedTransmembraneDomainProtein
211
+ attr_accessor :signal_peptide
212
+
213
+ def signal?
214
+ !@signal_peptide.nil?
215
+ end
216
+ end
217
+
218
+ class SignalPeptide
219
+ attr_accessor :start, :stop
220
+ end
221
+ end
222
+ end
data/lib/bio-tm_hmm.rb ADDED
@@ -0,0 +1,4 @@
1
+ # General transmembrane domain related data structures
2
+ require 'bio/transmembrane'
3
+ # TMHMM extras
4
+ require 'bio/appl/tmhmm/tmhmm_runner'
@@ -0,0 +1,38 @@
1
+ >Plasmodium_falciparum_3D7|MAL8|PF08_0142|Annotation|Plasmodium_falciparum_Sanger_Stanford_TIGR|(protein coding) erythrocyte membrane protein 1 (PfEMP1)
2
+ MAAAGGGGKDKYKNAQDAKHLLDIIGEDIYKIANDAALKRSGSELKGLLSLAKFEKNPPD
3
+ KQTPEDPCDLDYKYHTNVTSNVIEPCNKRSGKRFSEVSGAECANNRIKGNKGSNGDACAP
4
+ FRRLHVCDRNLEQIDPAKITATHNLLVDVCQAAKFEGQSITQDYPKYLATYNDSPSQICT
5
+ MLARSFADIGDIVRGKDLFRGYDDEEKNRRKKLEQKLKVIFGHIYEELKKHKKLKEEAEE
6
+ RYKKDGDNYYKLREDWWALNRQEIWKAITCGHPGGTYFRQTACGGGTTPTPNKCRCATND
7
+ VPTYFDYVPQYLRWFEEWAEDFCRKRKYKLENAIEKCRGKTKGEKYCDLNGFDCTQTASG
8
+ EKKFVKGHNCHNCSVTCIPFGPWIDNQKKEFLKQRNKYQNEISSNSRKKRSTSNNNYKGY
9
+ DEEFYKILKEDYGDVEQFLEKLSREGICQSQPTVGNQKADAANFTKDNPAKTFSHTEYCQ
10
+ ACPWCGVVCKSGNCTKNPEGSCTEQIRKKVYDDSNTTTIPVLTPEKGKTSILQKYKTFCE
11
+ KPEKHNQINNWECHYEKTDISNNCILGKWEKFQKGQEVMVYHPFFWKWVTEMLDDSIKWR
12
+ KELDNCLKNENKQCISKCNGKCDCYKRWVEQKKEKEWTQIKDHFGKQEDMKEQIRGADPG
13
+ IILEGVLDIEDLFENIKDTYGDVKEIDHIKKLLEEETTVDADNQNKTTIDKLLDHEDKDA
14
+ KGCLQKQNECKEQERDGGARSDSQEPTPRSEVKPDSEDLDDDDEDDPDEEKSEEVENPED
15
+ QGEEEGTKQGSGEKKVDGTEAVQETVAEVTPEKKDEVNPCEIVKTLFEKPENLSDACGLK
16
+ YGPGGKERYSQWKCIPTKPNSDNKGEVGSAGRVARSAPSGEKGSICVPPRRRRLYVKDLE
17
+ TLGDSEVTQVQLRDAFIKCAAVETFFLWDRYKKEKEKKKPQEGVLQLLGTVGTPPTDDEE
18
+ DPPEKMLQKGEIPEEFKRQMFYTLADYKDILFGDQEVIKTLKDSGDENIKDISEKIKKTL
19
+ NGDNNQESGSSPSLSGKKTTPKDWWETYGKDIWEGMVCALTYKNSGDKKIEQVKTADDGE
20
+ DLFQKLKTQYEYNTVTLKDENSGTEGAKPFTPKTVSSSSGEKNPPKLSDFVLRPPYFRYL
21
+ EEWGETFCRQRARMLDKIKKDCNVEENDNRPGGGITKQYSGDGESCKDYLPDDPTTLPDL
22
+ VSSCPKSCSSYRKWINKKKDEFVEQQNAYTEQQNKCQSKSDKAKSDNGFYTRLQNLPDAA
23
+ AFLKTLGSCSKNDIPEYKIDFDVNGETFRYEKYCGTCPEFKINCTKVKCTSGDMQNGCKD
24
+ NKINAANFKTMAQSTEINMLVSDNSGNGSQNDLKDCKTSGIFKGFREDVWTCGKVCGYNV
25
+ CKPKNVNGQNGDGNQILLFNALLKRWVEYFLEDYKKIKHKISHCKNSSEGHTCIKNCVEQ
26
+ WISTKRTEWETIRGRFNDQYKSNDSDVYPVRSFLETWIPKIPVANANNDGKKLIKLSKFD
27
+ NFCSCSASAHSPNGKDDAIDCMINRLQDKIDKCKEKHPQPSAENQTTCDESTLVEDVDDY
28
+ EEQNPENKVGKPAICGNVDTTEPVKEEDEEECKAAESPAEPEQAAEEESVPAAETKDTEN
29
+ QPPQAPDVGPPPLAPAPADQPLDPTILQTTIPFGIAIALTSIVFLFLKKKTKSTIDLLRV
30
+ INIPKSDYDIPTKLSPNRYIPYTSGKYRGKRYIYLEGDSGTDSGYTDHYSDITSSSESEY
31
+ EELDINDIYVPRAPKYKTLIEVVLEPSGNNTTASGNNTTASDTQNDIQNDIPSDIPKTPS
32
+ DTPPPITDDEWNQLKDEFISQYLQSEQPKDVPNDYSSGDIPLNTQPNTLYFDKPQEKPFI
33
+ TSIHDRNLYSGEEYNYDMFNSGKNGPYSDKNDLYSGNHDSLSGNRDPTSANHDSYSGNHH
34
+ PYSGIDLINDSISGNQHIDIYDELLKRKENELFGTNNPKRTSTYSVAKLTNSDPIHNQLE
35
+ LFHTWLDRHRDMCEKWENHHERLAKLKEEWENETHSGNTHPSDSNKTLNTDVSIQIDMDH
36
+ EKRMKEFTNMDTILEDLDKPFNEPYYYDMYDDDIYYDVNDHDTSTVDSNAMDVPSKVQIE
37
+ MDVNTKLVKEKYPIGDVWDI
38
+
data/test/helper.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ require 'bio-tm_hmm'
15
+
16
+ class Test::Unit::TestCase
17
+ end
@@ -0,0 +1,65 @@
1
+ require 'bio'
2
+ require 'helper'
3
+
4
+ class TmHmmWrapperTest < Test::Unit::TestCase
5
+ include Bio::Transmembrane
6
+
7
+ def test_parser
8
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0635c len=555 ExpAA=0.00 First60=0.00 PredHel=0 Topology=o')
9
+ assert result
10
+ assert_equal false, result.has_domain?
11
+
12
+ # test a single TMD
13
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0685c len=324 ExpAA=20.36 First60=0.00 PredHel=1 Topology=o281-303i')
14
+ assert result
15
+ assert_equal 1, result.transmembrane_domains.length
16
+ assert_equal 281, result.transmembrane_domains[0].start
17
+ assert_equal 303, result.transmembrane_domains[0].stop
18
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
19
+ result.transmembrane_domains[0].orientation
20
+ assert result.transmembrane_type_1?
21
+ assert_equal false, result.transmembrane_type_2?
22
+
23
+ # test 2 TMD
24
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0680c len=209 ExpAA=43.03 First60=0.02 PredHel=2 Topology=i137-159o164-183i')
25
+ assert result
26
+ assert_equal 2, result.transmembrane_domains.length
27
+ assert_equal 137, result.transmembrane_domains[0].start
28
+ assert_equal 159, result.transmembrane_domains[0].stop
29
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT,
30
+ result.transmembrane_domains[0].orientation
31
+ assert_equal 164, result.transmembrane_domains[1].start
32
+ assert_equal 183, result.transmembrane_domains[1].stop
33
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
34
+ result.transmembrane_domains[1].orientation
35
+ assert_equal false, result.transmembrane_type_1?
36
+ assert_equal false, result.transmembrane_type_2?
37
+
38
+ # test 3 TMD
39
+ result = Bio::TMHMM::TmHmmResult.create_from_short_line('PFA0705c len=282 ExpAA=90.97 First60=22.20 PredHel=4 Topology=i22-44o185-207i212-234o259-281i')
40
+ assert result
41
+ assert_equal 4, result.transmembrane_domains.length
42
+ assert_equal 22, result.transmembrane_domains[0].start
43
+ assert_equal 44, result.transmembrane_domains[0].stop
44
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT,
45
+ result.transmembrane_domains[0].orientation
46
+ assert_equal 185, result.transmembrane_domains[1].start
47
+ assert_equal 207, result.transmembrane_domains[1].stop
48
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
49
+ result.transmembrane_domains[1].orientation
50
+ assert_equal 259, result.transmembrane_domains[3].start
51
+ assert_equal 281, result.transmembrane_domains[3].stop
52
+ assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN,
53
+ result.transmembrane_domains[3].orientation
54
+ assert_equal false, result.transmembrane_type_1?
55
+ assert_equal false, result.transmembrane_type_2?
56
+ end
57
+
58
+ def test_wrapper
59
+ prog = Bio::TMHMM::TmHmmWrapper.new
60
+ seq = Bio::FlatFile.auto(File.join(File.dirname(__FILE__),'data','falciparum1.fa')).next_entry
61
+ tmp = prog.calculate(seq.seq)
62
+ assert tmp
63
+ assert_equal false, tmp.has_domain?
64
+ end
65
+ end
@@ -0,0 +1,146 @@
1
+ require 'helper'
2
+
3
+ module Transmembrane
4
+ class TransmembraneTest < Test::Unit::TestCase
5
+ include Bio::Transmembrane
6
+
7
+ def test_confidenced_transmembrane_domain
8
+ one = ConfidencedTransmembraneDomain.new
9
+ two = ConfidencedTransmembraneDomain.new
10
+ assert_equal one, two
11
+ end
12
+
13
+ def test_sequence_offsets
14
+ aaseq = 'AAAAAANG' #8 aa long
15
+ d = TransmembraneDomainDefinition.new
16
+ d.start = 6
17
+ d.stop = 8
18
+ assert_equal 'ANG', d.sequence(aaseq)
19
+
20
+ assert_equal 'AANG', d.sequence('AAAAAANG', -1, 0)
21
+ assert_equal 'AANG', d.sequence('AAAAAANG', -1, 1) #overhang
22
+ assert_equal 'AAN', d.sequence('AAAAAANG', -1, -1) #overhang over the cterm
23
+
24
+ d.start = 1
25
+ d.stop = 5
26
+ assert_equal 'AAAAA', d.sequence('AAAAAANG', -2, 0) #overhang over the nterm
27
+ assert_equal 'AAAAAANG', d.sequence('AAAAAANG', -2, 15) #overhang over the nterm and cterm
28
+ end
29
+ end
30
+
31
+ class TransmembraneProteinTest < Test::Unit::TestCase
32
+ include Bio::Transmembrane
33
+ def test_simple
34
+ protein = TransmembraneProtein.new
35
+ tmd = TransmembraneDomainDefinition.new
36
+ tmd.start = 8
37
+ tmd.stop = 9
38
+ protein.push tmd
39
+
40
+ tmd = TransmembraneDomainDefinition.new
41
+ tmd.start = 8
42
+ tmd.stop = 10
43
+ protein.push tmd
44
+
45
+ assert_equal 2, protein.minimum_length
46
+ assert_equal 2.5, protein.average_length
47
+ end
48
+
49
+ def test_empty
50
+ protein = TransmembraneProtein.new
51
+ assert protein.transmembrane_domains.empty?
52
+ assert_equal false, protein.has_domain?
53
+ end
54
+
55
+ def test_overlaps
56
+ p1 = TransmembraneProtein.new
57
+ p2 = TransmembraneProtein.new
58
+ tmd1 = TransmembraneDomainDefinition.new(5,10)
59
+ tmd2 = TransmembraneDomainDefinition.new(5,6)
60
+ p1.transmembrane_domains = [tmd1]
61
+ p2.transmembrane_domains = [tmd2]
62
+ assert_equal [[tmd1, tmd2]], p1.overlaps(p2)
63
+
64
+ p2.transmembrane_domains = [tmd1,tmd2]
65
+ assert_equal [[tmd1, tmd1],[tmd1, tmd2]], p1.overlaps(p2)
66
+
67
+ tmd3 = TransmembraneDomainDefinition.new(500,600)
68
+ p2.transmembrane_domains = [tmd3]
69
+ assert_equal [], p1.overlaps(p2)
70
+ end
71
+
72
+ def test_best_overlap
73
+ p1 = TransmembraneProtein.new
74
+ p2 = TransmembraneProtein.new
75
+ tmd1 = TransmembraneDomainDefinition.new(5,10)
76
+ tmd2 = TransmembraneDomainDefinition.new(5,6)
77
+ tmd3 = TransmembraneDomainDefinition.new(11,22)
78
+ p1.transmembrane_domains = [tmd1]
79
+ p2.transmembrane_domains = [tmd2]
80
+ assert_equal [tmd1, tmd2], p1.best_overlap(p2)
81
+
82
+ p2.transmembrane_domains = [tmd1,tmd2]
83
+ assert_equal [tmd1, tmd1], p1.best_overlap(p2)
84
+
85
+ p2.transmembrane_domains = [tmd3]
86
+ assert_equal nil, p1.best_overlap(p2)
87
+ end
88
+
89
+ def test_each
90
+ expected = [10,6,22]
91
+ p = TransmembraneProtein.new
92
+ tmd1 = TransmembraneDomainDefinition.new(5,10)
93
+ tmd2 = TransmembraneDomainDefinition.new(5,6)
94
+ tmd3 = TransmembraneDomainDefinition.new(11,22)
95
+ p.transmembrane_domains = [tmd1,tmd2,tmd3]
96
+ p.each_with_index do |tmd, i|
97
+ assert_equal expected[i], tmd.stop
98
+ end
99
+ end
100
+ end
101
+
102
+ class TransmembraneProteinTest < Test::Unit::TestCase
103
+ def test_overlap_tmd
104
+ tmd1 = TransmembraneDomainDefinition.new(3,6)
105
+ tmd2 = TransmembraneDomainDefinition.new(3,6)
106
+ assert_equal((3..6), tmd1.intersection(tmd2))
107
+ assert_equal 4, tmd1.overlap_length(tmd2)
108
+
109
+ tmd2 = TransmembraneDomainDefinition.new(7,8)
110
+ assert_equal 0, tmd1.overlap_length(tmd2)
111
+
112
+ tmd2 = TransmembraneDomainDefinition.new(5,8)
113
+ assert_equal 2, tmd1.overlap_length(tmd2)
114
+
115
+ tmd2 = TransmembraneDomainDefinition.new(1,3)
116
+ assert_equal 1, tmd1.overlap_length(tmd2)
117
+ assert_equal((3..3), tmd1.intersection(tmd2))
118
+ end
119
+
120
+ def test_residue_contained?
121
+ p = TransmembraneProtein.new
122
+
123
+ # test none
124
+ p.transmembrane_domains = []
125
+ assert_equal false, p.residue_number_contained?(5)
126
+
127
+ # test one
128
+ p.transmembrane_domains = [TransmembraneDomainDefinition.new(5,8)]
129
+ assert p.residue_number_contained?(5)
130
+ assert p.residue_number_contained?(6)
131
+ assert p.residue_number_contained?(8)
132
+ assert_equal false, p.residue_number_contained?(4)
133
+ assert_equal false, p.residue_number_contained?(9)
134
+
135
+ # test 3
136
+ p.transmembrane_domains = [
137
+ TransmembraneDomainDefinition.new(1,10),
138
+ TransmembraneDomainDefinition.new(90,100),
139
+ TransmembraneDomainDefinition.new(16,24),
140
+ ]
141
+ assert p.residue_number_contained?(5)
142
+ assert p.residue_number_contained?(95)
143
+ assert_equal false, p.residue_number_contained?(150)
144
+ end
145
+ end
146
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-tm_hmm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ben J. Woodcroft
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: shoulda
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '3.12'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '3.12'
46
+ - !ruby/object:Gem::Dependency
47
+ name: jeweler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.8.3
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.8.3
62
+ - !ruby/object:Gem::Dependency
63
+ name: bundler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.0.21
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 1.0.21
78
+ - !ruby/object:Gem::Dependency
79
+ name: bio
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 1.4.2
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.4.2
94
+ description: A bioruby plugin for interaction with the transmembrane predictor TMHMM
95
+ email: donttrustben@gmail.com
96
+ executables:
97
+ - bio-tm_hmm
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - LICENSE.txt
101
+ - README.rdoc
102
+ files:
103
+ - .document
104
+ - .travis.yml
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.rdoc
108
+ - Rakefile
109
+ - VERSION
110
+ - bin/bio-tm_hmm
111
+ - bio-tm_hmm.gemspec
112
+ - lib/bio-tm_hmm.rb
113
+ - lib/bio/appl/tmhmm/tmhmm_runner.rb
114
+ - lib/bio/transmembrane.rb
115
+ - test/data/falciparum1.fa
116
+ - test/helper.rb
117
+ - test/test_tm_hmm_wrapper.rb
118
+ - test/test_transmembrane.rb
119
+ homepage: http://github.com/wwood/bioruby-tm_hmm
120
+ licenses:
121
+ - MIT
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ segments:
133
+ - 0
134
+ hash: 1028203441
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 1.8.21
144
+ signing_key:
145
+ specification_version: 3
146
+ summary: A bioruby plugin for interaction with the transmembrane predictor TMHMM
147
+ test_files: []