bio-octopus 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +17 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bin/biooctopus +36 -0
- data/bio-octopus.gemspec +78 -0
- data/lib/bio-octopus.rb +1 -0
- data/lib/bio/appl/octopus.rb +229 -0
- data/test/helper.rb +17 -0
- data/test/test_bio-octopus.rb +138 -0
- metadata +199 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem 'bio', ">= 1.4.1"
|
6
|
+
gem 'fastercsv'
|
7
|
+
gem 'rio'
|
8
|
+
gem 'bio-tm_hmm'
|
9
|
+
|
10
|
+
# Add dependencies to develop your gem here.
|
11
|
+
# Include everything needed to run rake, tests, features, etc.
|
12
|
+
group :development do
|
13
|
+
gem "bundler", "~> 1.0.0"
|
14
|
+
gem "jeweler", "~> 1.5.2"
|
15
|
+
gem "rcov", ">= 0"
|
16
|
+
gem "bio", ">= 1.4.1"
|
17
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Ben J Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= bio-octopus
|
2
|
+
|
3
|
+
A bioruby plugin for running and parsing octopus, available from http://octopus.cbr.su.se/ Currently, the plugin is not Ruby 1.9 (only 1.8) compatible, because of the dependency on the rio gem.
|
4
|
+
|
5
|
+
== Contributing to bio-octopus
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
9
|
+
* Fork the project
|
10
|
+
* Start a feature/bugfix branch
|
11
|
+
* Commit and push until you are happy with your contribution
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2011 Ben J Woodcroft. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
|
12
|
+
require 'jeweler'
|
13
|
+
Jeweler::Tasks.new do |gem|
|
14
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
|
+
gem.name = "bio-octopus"
|
16
|
+
gem.homepage = "http://github.com/wwood/bioruby-octopus"
|
17
|
+
gem.license = "MIT"
|
18
|
+
gem.summary = %Q{Running and parsing of the protein transmembrane domain predictor octopus}
|
19
|
+
gem.description = %Q{Running and parsing of the protein transmembrane domain predictor octopus}
|
20
|
+
gem.email = "gmail.com after donttrustben"
|
21
|
+
gem.authors = ["Ben J Woodcroft"]
|
22
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |test|
|
31
|
+
test.libs << 'lib' << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
require 'rcov/rcovtask'
|
37
|
+
Rcov::RcovTask.new do |test|
|
38
|
+
test.libs << 'test'
|
39
|
+
test.pattern = 'test/**/test_*.rb'
|
40
|
+
test.verbose = true
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "bio-octopus #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/bin/biooctopus
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# BioRuby bio-octopus Plugin
|
4
|
+
# Version 0.0.1
|
5
|
+
# Author:: Ben J Woodcroft
|
6
|
+
# Copyright:: 2011
|
7
|
+
# License:: The Ruby License
|
8
|
+
|
9
|
+
USAGE = "This scripts reads a fasta file in, and uses the BLOCTOPUS and SPOCTOPUS scripts, returning a structured result for each transmembrane domain. Requires the scripts to be installed before use. They are available from http://octopus.cbr.su.se/"
|
10
|
+
|
11
|
+
if $0 == __FILE__
|
12
|
+
runner = Bio::Spoctopus::Wrapper.new
|
13
|
+
|
14
|
+
Bio::FlatFile.auto(ARGF).each do |seq|
|
15
|
+
result = runner.calculate(seq.seq)
|
16
|
+
name = seq.definition
|
17
|
+
|
18
|
+
if result.has_domain?
|
19
|
+
# At least one TMD found. Output each on a separate line
|
20
|
+
result.transmembrane_domains.each do |tmd|
|
21
|
+
puts [
|
22
|
+
name,
|
23
|
+
result.transmembrane_type,
|
24
|
+
tmd.start,
|
25
|
+
tmd.stop,
|
26
|
+
tmd.orientation
|
27
|
+
].join("\t")
|
28
|
+
end
|
29
|
+
else
|
30
|
+
puts [
|
31
|
+
name,
|
32
|
+
'No Transmembrane Domain Found'
|
33
|
+
].join("\t")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/bio-octopus.gemspec
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{bio-octopus}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Ben J Woodcroft"]
|
12
|
+
s.date = %q{2011-04-10}
|
13
|
+
s.default_executable = %q{biooctopus}
|
14
|
+
s.description = %q{Running and parsing of the protein transmembrane domain predictor octopus}
|
15
|
+
s.email = %q{gmail.com after donttrustben}
|
16
|
+
s.executables = ["biooctopus"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE.txt",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
"Gemfile",
|
24
|
+
"LICENSE.txt",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"bin/biooctopus",
|
29
|
+
"bio-octopus.gemspec",
|
30
|
+
"lib/bio-octopus.rb",
|
31
|
+
"lib/bio/appl/octopus.rb",
|
32
|
+
"test/helper.rb",
|
33
|
+
"test/test_bio-octopus.rb"
|
34
|
+
]
|
35
|
+
s.homepage = %q{http://github.com/wwood/bioruby-octopus}
|
36
|
+
s.licenses = ["MIT"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.6.2}
|
39
|
+
s.summary = %q{Running and parsing of the protein transmembrane domain predictor octopus}
|
40
|
+
s.test_files = [
|
41
|
+
"test/helper.rb",
|
42
|
+
"test/test_bio-octopus.rb"
|
43
|
+
]
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
|
50
|
+
s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
|
51
|
+
s.add_runtime_dependency(%q<rio>, [">= 0"])
|
52
|
+
s.add_runtime_dependency(%q<bio-tm_hmm>, [">= 0"])
|
53
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
54
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
55
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
56
|
+
s.add_development_dependency(%q<bio>, [">= 1.4.1"])
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<bio>, [">= 1.4.1"])
|
59
|
+
s.add_dependency(%q<fastercsv>, [">= 0"])
|
60
|
+
s.add_dependency(%q<rio>, [">= 0"])
|
61
|
+
s.add_dependency(%q<bio-tm_hmm>, [">= 0"])
|
62
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
63
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
64
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
65
|
+
s.add_dependency(%q<bio>, [">= 1.4.1"])
|
66
|
+
end
|
67
|
+
else
|
68
|
+
s.add_dependency(%q<bio>, [">= 1.4.1"])
|
69
|
+
s.add_dependency(%q<fastercsv>, [">= 0"])
|
70
|
+
s.add_dependency(%q<rio>, [">= 0"])
|
71
|
+
s.add_dependency(%q<bio-tm_hmm>, [">= 0"])
|
72
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
73
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
74
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
75
|
+
s.add_dependency(%q<bio>, [">= 1.4.1"])
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
data/lib/bio-octopus.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bio/appl/octopus'
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'fastercsv'
|
2
|
+
require 'bio-tm_hmm'
|
3
|
+
|
4
|
+
module Bio
|
5
|
+
class Spoctopus
|
6
|
+
class Wrapper
|
7
|
+
TMP_SEQUENCE_NAME = 'wrapperSeq'
|
8
|
+
|
9
|
+
def calculate(sequence, blast_database_path)
|
10
|
+
# Remove stop codons, as these mess things up for the predictor
|
11
|
+
sequence.gsub!('*','')
|
12
|
+
|
13
|
+
|
14
|
+
rio(:tempdir) do |d| # Do all the work in a temporary directory
|
15
|
+
FileUtils.cd(d.to_s) do
|
16
|
+
|
17
|
+
# Create the input files
|
18
|
+
# * the names file (in base directory)
|
19
|
+
# * the fasta file with the sequence in it (in fasta directory)
|
20
|
+
# * output file directory
|
21
|
+
|
22
|
+
names = File.open('names','w')
|
23
|
+
names.puts TMP_SEQUENCE_NAME
|
24
|
+
names.close
|
25
|
+
|
26
|
+
Dir.mkdir 'fasta'
|
27
|
+
fastafile = File.open("fasta/#{TMP_SEQUENCE_NAME}.fa", 'w')
|
28
|
+
fastafile.puts '>wrapperSeq'
|
29
|
+
fastafile.puts "#{sequence}"
|
30
|
+
fastafile.close
|
31
|
+
|
32
|
+
Dir.mkdir 'tmd'
|
33
|
+
|
34
|
+
# First, run BLOCTOPUS to create the profiles
|
35
|
+
#
|
36
|
+
# ben@ben:~/bioinfo/spoctopus$ ./BLOCTOPUS.sh /tmp/spoctopus/names /tmp/spoctopus/fa
|
37
|
+
# /tmp/spoctopus/tmd blastall blastpgp`
|
38
|
+
# /blastdb/UniProt15/uniprot_sprot.fasta makemat -P
|
39
|
+
Tempfile.open('octopuserr') do |err|
|
40
|
+
result = system [
|
41
|
+
'BLOCTOPUS.sh',
|
42
|
+
"#{Dir.pwd}/names",
|
43
|
+
"#{Dir.pwd}/fasta",
|
44
|
+
"#{Dir.pwd}/tmd",
|
45
|
+
'blastall',
|
46
|
+
'blastpgp',
|
47
|
+
"'#{blast_database_path}'",
|
48
|
+
'makemat',
|
49
|
+
'-P',
|
50
|
+
'>/dev/null' # SPOCTOPUS doesn't understand the concept of STDERR
|
51
|
+
# "2>#{err.path}"
|
52
|
+
].join(' ')
|
53
|
+
|
54
|
+
if !result
|
55
|
+
raise Exception, "Running BLOCTOPUS program failed. $? was #{$?.inspect}. STDERR was #{err.read}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Now run SPOCTOPUS to do the actual prediction of SP and TMD,
|
60
|
+
# given the profile.
|
61
|
+
# ./SPOCTOPUS.sh /tmp/spoctopus/names
|
62
|
+
# /tmp/spoctopus/tmd/PSSM_PRF_FILES/
|
63
|
+
# /tmp/spoctopus/tmd/RAW_PRF_FILES/
|
64
|
+
# /tmp/spoctopus/tmd/
|
65
|
+
Tempfile.open('octopuserr') do |err|
|
66
|
+
result = system [
|
67
|
+
'SPOCTOPUS.sh',
|
68
|
+
"#{Dir.pwd}/names",
|
69
|
+
"#{Dir.pwd}/tmd/PSSM_PRF_FILES/",
|
70
|
+
"#{Dir.pwd}/tmd/RAW_PRF_FILES/",
|
71
|
+
"#{Dir.pwd}/tmd/",
|
72
|
+
'>/dev/null' # SPOCTOPUS doesn't understand the concept of STDERR
|
73
|
+
# "2>#{err.path}"
|
74
|
+
].join(' ')
|
75
|
+
|
76
|
+
if !result
|
77
|
+
raise Exception, "Running SPOCTOPUS program failed. $? was #{$?.inspect}. STDERR was #{err.read}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
return Result.create_from_output(File.open("tmd/#{TMP_SEQUENCE_NAME}.top").read)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Result
|
88
|
+
# Given the fasta-ish file output from spoctopus, parse it into
|
89
|
+
# a SignalPeptideTransmembraneDomainProtein.
|
90
|
+
#
|
91
|
+
# Example without TMD:
|
92
|
+
# >wrapperSeq
|
93
|
+
# gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
94
|
+
# ggggggggggggggggggggggggggggggggggggg
|
95
|
+
#
|
96
|
+
# Example with 2 TMD
|
97
|
+
# >wrapperSeq
|
98
|
+
# iiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMooooooooooooooooooooooooooooo
|
99
|
+
# ooooMMMMMMMMMMMMMMMMMMMMMiiiiiMMMMMMMMMMMMMMMMMMMMMo
|
100
|
+
#
|
101
|
+
# Example with SP and TMD
|
102
|
+
# >wrapperSeq
|
103
|
+
# nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnSSSSSSSSSSSSSSSooooooooooooooo
|
104
|
+
# ooooooooooooooooooooooooooooooooooooo
|
105
|
+
def self.create_from_output(spoctopus_output)
|
106
|
+
# puts spoctopus_output
|
107
|
+
# split the fasta into the real parts
|
108
|
+
lines = spoctopus_output.split("\n")
|
109
|
+
|
110
|
+
# Error checking
|
111
|
+
unless lines[0].match(/^\>/) and lines.length > 1
|
112
|
+
raise Exception, "Unexpected SPOCTOPUS output file: #{spoctopus_output.inspect}"
|
113
|
+
end
|
114
|
+
|
115
|
+
seq = lines[1..(lines.length-1)].join('')
|
116
|
+
|
117
|
+
# Taken from http://octopus.cbr.su.se/OCTOPUS_DATA/readme
|
118
|
+
# and supplemented by experiment, as there doesn't seem to be one available for
|
119
|
+
# SPOCTOPUS, only OCTOPUS.
|
120
|
+
#
|
121
|
+
# Currently dips, hairpins, unannotated and reentrants are ignored.
|
122
|
+
unless seq.match(/^[ioMgnSHRrDd\.T]+$/)
|
123
|
+
raise Exception, "Unexpected characters in SPOCTOPUS output sequence: #{seq}"
|
124
|
+
end
|
125
|
+
|
126
|
+
tmd = Bio::Transmembrane::SignalPeptideTransmembraneDomainProtein.new
|
127
|
+
|
128
|
+
# deal with nothing proteins
|
129
|
+
return tmd if seq.match(/^g*$/)
|
130
|
+
|
131
|
+
seq.scan(/S+/) do
|
132
|
+
if tmd.signal?
|
133
|
+
raise Exception, "Only 1 Signal Peptide is expected!. SPOCTOPUS output was #{seq}"
|
134
|
+
end
|
135
|
+
|
136
|
+
s = Bio::Transmembrane::SignalPeptide.new
|
137
|
+
s.start = $~.offset(0)[0]+1
|
138
|
+
s.stop = $~.offset(0)[1]
|
139
|
+
tmd.signal_peptide = s
|
140
|
+
end
|
141
|
+
|
142
|
+
seq.scan(/M+/) do # for each transmembrane domain
|
143
|
+
t = Bio::Transmembrane::OrientedTransmembraneDomain.new
|
144
|
+
t.start = $~.offset(0)[0]+1
|
145
|
+
t.stop = $~.offset(0)[1]
|
146
|
+
|
147
|
+
# set orientation
|
148
|
+
# if at the start of the protein it is harder
|
149
|
+
if t.start == 1
|
150
|
+
if t.stop == seq.length #all TMD, so we don't know
|
151
|
+
t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::UNKNOWN
|
152
|
+
else
|
153
|
+
char = seq[t.stop-2..t.stop-2]
|
154
|
+
if char == 'o'
|
155
|
+
t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT
|
156
|
+
else
|
157
|
+
t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
else # usual - TMD does not start at exactly the beginning
|
162
|
+
char = seq[t.start-2..t.start-2]
|
163
|
+
if char == 'i'
|
164
|
+
t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT
|
165
|
+
else
|
166
|
+
t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
tmd.transmembrane_domains.push t
|
171
|
+
end
|
172
|
+
|
173
|
+
return tmd
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Read the output from this file when it is run as a script and return
|
178
|
+
# useful programmatic objects - TransmembraneProteins
|
179
|
+
#
|
180
|
+
#pfa|PFD0635c I 1833 1853 outside_in
|
181
|
+
#pfa|PFD0595c I 2 22 outside_in
|
182
|
+
#pfa|PFB0610c No Transmembrane Domain Found
|
183
|
+
#pfa|PFF1525c Unknown 2 22 outside_in
|
184
|
+
#pfa|PFF1525c Unknown 160 180 inside_out
|
185
|
+
#pfa|PFF1525c Unknown 188 208 outside_in
|
186
|
+
class WrapperParser
|
187
|
+
attr_accessor :io
|
188
|
+
|
189
|
+
def initialize(io)
|
190
|
+
@io = io
|
191
|
+
end
|
192
|
+
|
193
|
+
# Return an array of transmembrane proteins
|
194
|
+
def transmembrane_proteins
|
195
|
+
transmembrane_proteins = []
|
196
|
+
current_transmembrane_protein = nil
|
197
|
+
|
198
|
+
FasterCSV.foreach(@io, :col_sep => "\t") do |row|
|
199
|
+
next if row.length == 0
|
200
|
+
current_protein_id = row[0]
|
201
|
+
|
202
|
+
# if the protein ID changes then return the last protein
|
203
|
+
# (if there is one)
|
204
|
+
unless current_transmembrane_protein.nil? or
|
205
|
+
current_transmembrane_protein.name == current_protein_id
|
206
|
+
transmembrane_proteins.push current_transmembrane_protein
|
207
|
+
current_transmembrane_protein = nil
|
208
|
+
end
|
209
|
+
|
210
|
+
# deal with no tmd proteins
|
211
|
+
if row[1] == 'No Transmembrane Domain Found'
|
212
|
+
prot = Bio::Transmembrane::OrientedTransmembraneDomainProtein.new
|
213
|
+
prot.name = current_protein_id
|
214
|
+
transmembrane_proteins.push prot
|
215
|
+
current_transmembrane_protein = nil
|
216
|
+
else
|
217
|
+
current_transmembrane_protein ||= Bio::Transmembrane::OrientedTransmembraneDomainProtein.new
|
218
|
+
current_transmembrane_protein.name = current_protein_id
|
219
|
+
current_transmembrane_protein.transmembrane_domains.push Bio::Transmembrane::OrientedTransmembraneDomain.new(row[2],row[3],row[4])
|
220
|
+
end
|
221
|
+
end
|
222
|
+
# push the last one
|
223
|
+
transmembrane_proteins.push current_transmembrane_protein unless current_transmembrane_protein.nil?
|
224
|
+
|
225
|
+
return transmembrane_proteins
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
require 'bio-octopus'
|
15
|
+
|
16
|
+
class Test::Unit::TestCase
|
17
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'bio-tm_hmm'
|
4
|
+
|
5
|
+
class TestBioOctopus < Test::Unit::TestCase
|
6
|
+
def test_no_tmd_result
|
7
|
+
res = Bio::Spoctopus::Result.create_from_output([
|
8
|
+
'>wrapperSeq',
|
9
|
+
'gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg',
|
10
|
+
'ggggggggggggggggggggggggggggggggggggg'
|
11
|
+
].join("\n"))
|
12
|
+
|
13
|
+
assert_kind_of Bio::Transmembrane::SignalPeptideTransmembraneDomainProtein, res
|
14
|
+
assert_equal [], res.transmembrane_domains
|
15
|
+
assert_equal false, res.signal?
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_two_tmd_result
|
19
|
+
res = Bio::Spoctopus::Result.create_from_output([
|
20
|
+
'>wrapperSeq',
|
21
|
+
'iiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMooooooooooooooooooooooooooooo',
|
22
|
+
'ooooMMMMMMMMMMMMMMMMMMMMMiiiiiMMMMMMMMMMMMMMMMMMMMMo'
|
23
|
+
].join("\n"))
|
24
|
+
|
25
|
+
assert_kind_of Bio::Transmembrane::SignalPeptideTransmembraneDomainProtein, res
|
26
|
+
assert_equal 3, res.transmembrane_domains.length
|
27
|
+
assert_equal 11, res.transmembrane_domains[0].start
|
28
|
+
assert_equal 31, res.transmembrane_domains[0].stop
|
29
|
+
assert_equal 112-1, res.transmembrane_domains[2].stop
|
30
|
+
|
31
|
+
# test orientation
|
32
|
+
assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT, res.transmembrane_domains[0].orientation
|
33
|
+
assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN, res.transmembrane_domains[1].orientation
|
34
|
+
assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT, res.transmembrane_domains[2].orientation
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_all_tmd_result
|
38
|
+
res = Bio::Spoctopus::Result.create_from_output([
|
39
|
+
'>wrapperSeq',
|
40
|
+
'MMMMMMMMMMMMMMMMMMMMM'
|
41
|
+
].join("\n"))
|
42
|
+
|
43
|
+
assert_equal 1, res.transmembrane_domains.length
|
44
|
+
assert_equal 1, res.transmembrane_domains[0].start
|
45
|
+
assert_equal 21, res.transmembrane_domains[0].stop
|
46
|
+
assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::UNKNOWN, res.transmembrane_domains[0].orientation
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_tmd_at_end_result
|
50
|
+
res = Bio::Spoctopus::Result.create_from_output([
|
51
|
+
'>wrapperSeq',
|
52
|
+
'oooMMMMMMMMMMMMMMMMMMMMM'
|
53
|
+
].join("\n"))
|
54
|
+
|
55
|
+
assert_equal 1, res.transmembrane_domains.length
|
56
|
+
assert_equal Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN, res.transmembrane_domains[0].orientation
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_signal_peptide
|
60
|
+
res = Bio::Spoctopus::Result.create_from_output([
|
61
|
+
'>wrapperSeq',
|
62
|
+
'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnSSSSSSSSSSSSSSSoooooooooooooooooooooooooooooooooooooooooooooooooooo'
|
63
|
+
].join("\n"))
|
64
|
+
|
65
|
+
assert res.signal?
|
66
|
+
assert_equal false, res.has_domain?
|
67
|
+
assert_equal 31, res.signal_peptide.start
|
68
|
+
assert_equal 45, res.signal_peptide.stop
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_reentrant
|
72
|
+
res = Bio::Spoctopus::Result.create_from_output([
|
73
|
+
'>wrapperSeq',
|
74
|
+
'iiiirrrrrrriiiiiiiiiiiMMMMMM
|
75
|
+
MMMMMMMMMMMMMMMoooooooooooooooMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMoMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiiMMMMMMMMMMMM
|
76
|
+
MMMMMMMMMooooooooooooooooooooMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMoooooooooooMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiMMMMMM
|
77
|
+
MMMMMMMMMMMMMMMooooMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMooooooooooooooooMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiiii
|
78
|
+
iiiiiiiiiMMMMMMMMMMMMMMMMMMMMMooooo'
|
79
|
+
].join("\n"))
|
80
|
+
|
81
|
+
assert_equal false, res.signal?
|
82
|
+
assert res.has_domain?
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_wrapper_read
|
86
|
+
Tempfile.open('spock') do |tempfile|
|
87
|
+
tempfile.puts ''
|
88
|
+
tempfile.flush
|
89
|
+
|
90
|
+
pees = Bio::Spoctopus::WrapperParser.new(tempfile.path).transmembrane_proteins
|
91
|
+
assert_equal [], pees
|
92
|
+
end
|
93
|
+
|
94
|
+
Tempfile.open('spock') do |tempfile|
|
95
|
+
tempfile.puts 'pfa|PFD0635c I 1833 1853 outside_in'
|
96
|
+
tempfile.flush
|
97
|
+
|
98
|
+
pees = Bio::Spoctopus::WrapperParser.new(tempfile.path).transmembrane_proteins
|
99
|
+
assert_equal 1, pees.length
|
100
|
+
r = pees[0]
|
101
|
+
assert_equal 'pfa|PFD0635c', r.name
|
102
|
+
assert_equal 1, r.transmembrane_domains.length
|
103
|
+
t = r.transmembrane_domains[0]
|
104
|
+
assert_equal 1833, t.start
|
105
|
+
assert_equal 1853, t.stop
|
106
|
+
assert r.transmembrane_type_1?
|
107
|
+
end
|
108
|
+
|
109
|
+
Tempfile.open('spock') do |tempfile|
|
110
|
+
tempfile.puts 'pfa|PFD0635c I 1833 1853 outside_in
|
111
|
+
pfa|PFD0595c II 2 22 inside_out
|
112
|
+
pfa|PFB0610c No Transmembrane Domain Found
|
113
|
+
pfa|PFF1525c Unknown 2 22 outside_in
|
114
|
+
pfa|PFF1525c Unknown 160 180 inside_out
|
115
|
+
pfa|PFF1525c Unknown 188 208 outside_in'
|
116
|
+
tempfile.flush
|
117
|
+
|
118
|
+
pees = Bio::Spoctopus::WrapperParser.new(tempfile.path).transmembrane_proteins
|
119
|
+
assert_equal 4, pees.length
|
120
|
+
r = pees[0]
|
121
|
+
assert_equal 'pfa|PFD0635c', r.name
|
122
|
+
assert_equal 1, r.transmembrane_domains.length
|
123
|
+
t = r.transmembrane_domains[0]
|
124
|
+
assert_equal 1833, t.start
|
125
|
+
assert_equal 1853, t.stop
|
126
|
+
assert r.transmembrane_type_1?
|
127
|
+
|
128
|
+
r = pees[1]
|
129
|
+
|
130
|
+
assert r.transmembrane_type_2?
|
131
|
+
|
132
|
+
assert_equal 'pfa|PFB0610c', pees[2].name
|
133
|
+
assert_equal false, pees[2].has_domain?
|
134
|
+
|
135
|
+
assert_equal 3, pees[3].transmembrane_domains.length
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
metadata
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-octopus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Ben J Woodcroft
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-10 00:00:00 +10:00
|
19
|
+
default_executable: biooctopus
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
type: :runtime
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 5
|
29
|
+
segments:
|
30
|
+
- 1
|
31
|
+
- 4
|
32
|
+
- 1
|
33
|
+
version: 1.4.1
|
34
|
+
name: bio
|
35
|
+
version_requirements: *id001
|
36
|
+
prerelease: false
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
type: :runtime
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
name: fastercsv
|
49
|
+
version_requirements: *id002
|
50
|
+
prerelease: false
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
type: :runtime
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
hash: 3
|
59
|
+
segments:
|
60
|
+
- 0
|
61
|
+
version: "0"
|
62
|
+
name: rio
|
63
|
+
version_requirements: *id003
|
64
|
+
prerelease: false
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
type: :runtime
|
67
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 3
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
name: bio-tm_hmm
|
77
|
+
version_requirements: *id004
|
78
|
+
prerelease: false
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
type: :development
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ~>
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 23
|
87
|
+
segments:
|
88
|
+
- 1
|
89
|
+
- 0
|
90
|
+
- 0
|
91
|
+
version: 1.0.0
|
92
|
+
name: bundler
|
93
|
+
version_requirements: *id005
|
94
|
+
prerelease: false
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
type: :development
|
97
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ~>
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
hash: 7
|
103
|
+
segments:
|
104
|
+
- 1
|
105
|
+
- 5
|
106
|
+
- 2
|
107
|
+
version: 1.5.2
|
108
|
+
name: jeweler
|
109
|
+
version_requirements: *id006
|
110
|
+
prerelease: false
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
type: :development
|
113
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
119
|
+
segments:
|
120
|
+
- 0
|
121
|
+
version: "0"
|
122
|
+
name: rcov
|
123
|
+
version_requirements: *id007
|
124
|
+
prerelease: false
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
type: :development
|
127
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
128
|
+
none: false
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
hash: 5
|
133
|
+
segments:
|
134
|
+
- 1
|
135
|
+
- 4
|
136
|
+
- 1
|
137
|
+
version: 1.4.1
|
138
|
+
name: bio
|
139
|
+
version_requirements: *id008
|
140
|
+
prerelease: false
|
141
|
+
description: Running and parsing of the protein transmembrane domain predictor octopus
|
142
|
+
email: gmail.com after donttrustben
|
143
|
+
executables:
|
144
|
+
- biooctopus
|
145
|
+
extensions: []
|
146
|
+
|
147
|
+
extra_rdoc_files:
|
148
|
+
- LICENSE.txt
|
149
|
+
- README.rdoc
|
150
|
+
files:
|
151
|
+
- .document
|
152
|
+
- Gemfile
|
153
|
+
- LICENSE.txt
|
154
|
+
- README.rdoc
|
155
|
+
- Rakefile
|
156
|
+
- VERSION
|
157
|
+
- bin/biooctopus
|
158
|
+
- bio-octopus.gemspec
|
159
|
+
- lib/bio-octopus.rb
|
160
|
+
- lib/bio/appl/octopus.rb
|
161
|
+
- test/helper.rb
|
162
|
+
- test/test_bio-octopus.rb
|
163
|
+
has_rdoc: true
|
164
|
+
homepage: http://github.com/wwood/bioruby-octopus
|
165
|
+
licenses:
|
166
|
+
- MIT
|
167
|
+
post_install_message:
|
168
|
+
rdoc_options: []
|
169
|
+
|
170
|
+
require_paths:
|
171
|
+
- lib
|
172
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
173
|
+
none: false
|
174
|
+
requirements:
|
175
|
+
- - ">="
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
hash: 3
|
178
|
+
segments:
|
179
|
+
- 0
|
180
|
+
version: "0"
|
181
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
182
|
+
none: false
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
hash: 3
|
187
|
+
segments:
|
188
|
+
- 0
|
189
|
+
version: "0"
|
190
|
+
requirements: []
|
191
|
+
|
192
|
+
rubyforge_project:
|
193
|
+
rubygems_version: 1.6.2
|
194
|
+
signing_key:
|
195
|
+
specification_version: 3
|
196
|
+
summary: Running and parsing of the protein transmembrane domain predictor octopus
|
197
|
+
test_files:
|
198
|
+
- test/helper.rb
|
199
|
+
- test/test_bio-octopus.rb
|