bio-octopus 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +3 -0
- data/VERSION +1 -1
- data/bin/biooctopus +24 -3
- data/bin/biooctopus.rb +57 -0
- data/bio-octopus.gemspec +7 -4
- data/lib/bio/appl/octopus.rb +19 -62
- data/test/data/dummyLegacyDb +49 -0
- data/test/data/dummyLegacyDb.phr +0 -0
- data/test/data/dummyLegacyDb.pin +0 -0
- data/test/data/dummyLegacyDb.psq +0 -0
- data/test/test_bio-octopus.rb +20 -54
- metadata +11 -5
data/Rakefile
CHANGED
|
@@ -23,6 +23,9 @@ Jeweler::Tasks.new do |gem|
|
|
|
23
23
|
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
|
24
24
|
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
|
25
25
|
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
|
26
|
+
|
|
27
|
+
#included so the IDE recognizes the file type of bin/biooctopus, but these are hard-linked so just use one of these two.
|
|
28
|
+
gem.files.exclude 'bin/biooctopus.rb'
|
|
26
29
|
end
|
|
27
30
|
Jeweler::RubygemsDotOrgTasks.new
|
|
28
31
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0
|
|
1
|
+
0.1.0
|
data/bin/biooctopus
CHANGED
|
@@ -5,14 +5,35 @@
|
|
|
5
5
|
# Author:: Ben J Woodcroft
|
|
6
6
|
# Copyright:: 2011
|
|
7
7
|
# License:: The Ruby License
|
|
8
|
+
require 'rubygems'
|
|
9
|
+
require 'optparse'
|
|
10
|
+
require 'bio'
|
|
11
|
+
require 'bio-octopus'
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
USAGE = "This scripts reads a fasta file in, and uses the BLOCTOPUS and SPOCTOPUS scripts, returning a structured result for each transmembrane domain. Requires the scripts and a legacy BLAST formated database to be installed before use. They are available from http://octopus.cbr.su.se/"
|
|
10
14
|
|
|
11
15
|
if $0 == __FILE__
|
|
16
|
+
o = OptionParser.new do |opts|
|
|
17
|
+
indent = ' '
|
|
18
|
+
opts.banner = [
|
|
19
|
+
"Usage: biooctopus <FASTA_FILE> <BLASTDB>\n",
|
|
20
|
+
USAGE
|
|
21
|
+
].flatten
|
|
22
|
+
end
|
|
23
|
+
o.parse!
|
|
24
|
+
|
|
25
|
+
unless ARGV.length == 2
|
|
26
|
+
puts o.help
|
|
27
|
+
exit
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
fasta_path = ARGV[0]
|
|
31
|
+
blastdb_path = ARGV[1]
|
|
32
|
+
|
|
12
33
|
runner = Bio::Spoctopus::Wrapper.new
|
|
13
34
|
|
|
14
|
-
Bio::FlatFile.auto(
|
|
15
|
-
result = runner.calculate(seq.seq)
|
|
35
|
+
Bio::FlatFile.auto(File.open(fasta_path)).each do |seq|
|
|
36
|
+
result = runner.calculate(seq.seq, blastdb_path)
|
|
16
37
|
name = seq.definition
|
|
17
38
|
|
|
18
39
|
if result.has_domain?
|
data/bin/biooctopus.rb
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# BioRuby bio-octopus Plugin
|
|
4
|
+
# Version 0.0.1
|
|
5
|
+
# Author:: Ben J Woodcroft
|
|
6
|
+
# Copyright:: 2011
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
require 'rubygems'
|
|
9
|
+
require 'optparse'
|
|
10
|
+
require 'bio'
|
|
11
|
+
require 'bio-octopus'
|
|
12
|
+
|
|
13
|
+
USAGE = "This scripts reads a fasta file in, and uses the BLOCTOPUS and SPOCTOPUS scripts, returning a structured result for each transmembrane domain. Requires the scripts and a legacy BLAST formated database to be installed before use. They are available from http://octopus.cbr.su.se/"
|
|
14
|
+
|
|
15
|
+
if $0 == __FILE__
|
|
16
|
+
o = OptionParser.new do |opts|
|
|
17
|
+
indent = ' '
|
|
18
|
+
opts.banner = [
|
|
19
|
+
"Usage: biooctopus <FASTA_FILE> <BLASTDB>\n",
|
|
20
|
+
USAGE
|
|
21
|
+
].flatten
|
|
22
|
+
end
|
|
23
|
+
o.parse!
|
|
24
|
+
|
|
25
|
+
unless ARGV.length == 2
|
|
26
|
+
puts o.help
|
|
27
|
+
exit
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
fasta_path = ARGV[0]
|
|
31
|
+
blastdb_path = ARGV[1]
|
|
32
|
+
|
|
33
|
+
runner = Bio::Spoctopus::Wrapper.new
|
|
34
|
+
|
|
35
|
+
Bio::FlatFile.auto(File.open(fasta_path)).each do |seq|
|
|
36
|
+
result = runner.calculate(seq.seq, blastdb_path)
|
|
37
|
+
name = seq.definition
|
|
38
|
+
|
|
39
|
+
if result.has_domain?
|
|
40
|
+
# At least one TMD found. Output each on a separate line
|
|
41
|
+
result.transmembrane_domains.each do |tmd|
|
|
42
|
+
puts [
|
|
43
|
+
name,
|
|
44
|
+
result.transmembrane_type,
|
|
45
|
+
tmd.start,
|
|
46
|
+
tmd.stop,
|
|
47
|
+
tmd.orientation
|
|
48
|
+
].join("\t")
|
|
49
|
+
end
|
|
50
|
+
else
|
|
51
|
+
puts [
|
|
52
|
+
name,
|
|
53
|
+
'No Transmembrane Domain Found'
|
|
54
|
+
].join("\t")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
data/bio-octopus.gemspec
CHANGED
|
@@ -5,15 +5,14 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{bio-octopus}
|
|
8
|
-
s.version = "0.0
|
|
8
|
+
s.version = "0.1.0"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Ben J Woodcroft"]
|
|
12
|
-
s.date = %q{2011-04-
|
|
13
|
-
s.default_executable = %q{biooctopus}
|
|
12
|
+
s.date = %q{2011-04-17}
|
|
14
13
|
s.description = %q{Running and parsing of the protein transmembrane domain predictor octopus}
|
|
15
14
|
s.email = %q{gmail.com after donttrustben}
|
|
16
|
-
s.executables = ["biooctopus"]
|
|
15
|
+
s.executables = ["biooctopus.rb", "biooctopus"]
|
|
17
16
|
s.extra_rdoc_files = [
|
|
18
17
|
"LICENSE.txt",
|
|
19
18
|
"README.rdoc"
|
|
@@ -29,6 +28,10 @@ Gem::Specification.new do |s|
|
|
|
29
28
|
"bio-octopus.gemspec",
|
|
30
29
|
"lib/bio-octopus.rb",
|
|
31
30
|
"lib/bio/appl/octopus.rb",
|
|
31
|
+
"test/data/dummyLegacyDb",
|
|
32
|
+
"test/data/dummyLegacyDb.phr",
|
|
33
|
+
"test/data/dummyLegacyDb.pin",
|
|
34
|
+
"test/data/dummyLegacyDb.psq",
|
|
32
35
|
"test/helper.rb",
|
|
33
36
|
"test/test_bio-octopus.rb"
|
|
34
37
|
]
|
data/lib/bio/appl/octopus.rb
CHANGED
|
@@ -1,16 +1,24 @@
|
|
|
1
1
|
require 'fastercsv'
|
|
2
2
|
require 'bio-tm_hmm'
|
|
3
|
+
require 'rio'
|
|
3
4
|
|
|
4
5
|
module Bio
|
|
5
6
|
class Spoctopus
|
|
6
7
|
class Wrapper
|
|
7
8
|
TMP_SEQUENCE_NAME = 'wrapperSeq'
|
|
9
|
+
BLOCTOPUS_DEFAULT_PATH='BLOCTOPUS.sh'
|
|
10
|
+
SPOCTOPUS_DEFAULT_PATH='SPOCTOPUS.sh'
|
|
11
|
+
|
|
12
|
+
# The path to the BLOCTOPUS executable, by default BLOCTOPUS_DEFAULT_PATH
|
|
13
|
+
attr_accessor :bloctopus_executable
|
|
14
|
+
|
|
15
|
+
# The path to the SPOCTOPUS executable, by default SPOCTOPUS_DEFAULT_PATH
|
|
16
|
+
attr_accessor :spoctopus_executable
|
|
8
17
|
|
|
9
18
|
def calculate(sequence, blast_database_path)
|
|
10
19
|
# Remove stop codons, as these mess things up for the predictor
|
|
11
20
|
sequence.gsub!('*','')
|
|
12
21
|
|
|
13
|
-
|
|
14
22
|
rio(:tempdir) do |d| # Do all the work in a temporary directory
|
|
15
23
|
FileUtils.cd(d.to_s) do
|
|
16
24
|
|
|
@@ -36,9 +44,10 @@ module Bio
|
|
|
36
44
|
# ben@ben:~/bioinfo/spoctopus$ ./BLOCTOPUS.sh /tmp/spoctopus/names /tmp/spoctopus/fa
|
|
37
45
|
# /tmp/spoctopus/tmd blastall blastpgp`
|
|
38
46
|
# /blastdb/UniProt15/uniprot_sprot.fasta makemat -P
|
|
47
|
+
#
|
|
39
48
|
Tempfile.open('octopuserr') do |err|
|
|
40
49
|
result = system [
|
|
41
|
-
|
|
50
|
+
@bloctopus_executable.nil? ? BLOCTOPUS_DEFAULT_PATH : @bloctopus_executable,
|
|
42
51
|
"#{Dir.pwd}/names",
|
|
43
52
|
"#{Dir.pwd}/fasta",
|
|
44
53
|
"#{Dir.pwd}/tmd",
|
|
@@ -47,12 +56,12 @@ module Bio
|
|
|
47
56
|
"'#{blast_database_path}'",
|
|
48
57
|
'makemat',
|
|
49
58
|
'-P',
|
|
50
|
-
'>/dev/null' # SPOCTOPUS doesn't understand the concept of STDERR
|
|
51
|
-
|
|
59
|
+
'>/dev/null', # SPOCTOPUS doesn't understand the concept of STDERR
|
|
60
|
+
"2>#{err.path}"
|
|
52
61
|
].join(' ')
|
|
53
62
|
|
|
54
63
|
if !result
|
|
55
|
-
raise Exception, "Running BLOCTOPUS program failed. $? was #{$?.inspect}. STDERR
|
|
64
|
+
raise Exception, "Running BLOCTOPUS program failed. $? was #{$?.inspect}. Has it been installed properly? STDERR: #{File.open(err.path).read}"
|
|
56
65
|
end
|
|
57
66
|
end
|
|
58
67
|
|
|
@@ -64,17 +73,17 @@ module Bio
|
|
|
64
73
|
# /tmp/spoctopus/tmd/
|
|
65
74
|
Tempfile.open('octopuserr') do |err|
|
|
66
75
|
result = system [
|
|
67
|
-
|
|
76
|
+
@spoctopus_executable.nil? ? SPOCTOPUS_DEFAULT_PATH : @soctopus_executable,
|
|
68
77
|
"#{Dir.pwd}/names",
|
|
69
78
|
"#{Dir.pwd}/tmd/PSSM_PRF_FILES/",
|
|
70
79
|
"#{Dir.pwd}/tmd/RAW_PRF_FILES/",
|
|
71
80
|
"#{Dir.pwd}/tmd/",
|
|
72
|
-
'>/dev/null' # SPOCTOPUS doesn't understand the concept of STDERR
|
|
73
|
-
|
|
81
|
+
'>/dev/null', # SPOCTOPUS doesn't understand the concept of STDERR
|
|
82
|
+
"2>#{err.path}"
|
|
74
83
|
].join(' ')
|
|
75
84
|
|
|
76
85
|
if !result
|
|
77
|
-
raise Exception, "Running SPOCTOPUS program failed. $? was #{$?.inspect}. STDERR
|
|
86
|
+
raise Exception, "Running SPOCTOPUS program failed. $? was #{$?.inspect}. Has it been installed properly? STDERR: #{File.open(err.path).read}"
|
|
78
87
|
end
|
|
79
88
|
end
|
|
80
89
|
|
|
@@ -109,7 +118,7 @@ module Bio
|
|
|
109
118
|
|
|
110
119
|
# Error checking
|
|
111
120
|
unless lines[0].match(/^\>/) and lines.length > 1
|
|
112
|
-
raise Exception, "Unexpected
|
|
121
|
+
raise Exception, "Unexpected OCTOPUS output file: #{spoctopus_output.inspect}. STDERR: #{File.open(err.path).read}"
|
|
113
122
|
end
|
|
114
123
|
|
|
115
124
|
seq = lines[1..(lines.length-1)].join('')
|
|
@@ -173,57 +182,5 @@ module Bio
|
|
|
173
182
|
return tmd
|
|
174
183
|
end
|
|
175
184
|
end
|
|
176
|
-
|
|
177
|
-
# Read the output from this file when it is run as a script and return
|
|
178
|
-
# useful programmatic objects - TransmembraneProteins
|
|
179
|
-
#
|
|
180
|
-
#pfa|PFD0635c I 1833 1853 outside_in
|
|
181
|
-
#pfa|PFD0595c I 2 22 outside_in
|
|
182
|
-
#pfa|PFB0610c No Transmembrane Domain Found
|
|
183
|
-
#pfa|PFF1525c Unknown 2 22 outside_in
|
|
184
|
-
#pfa|PFF1525c Unknown 160 180 inside_out
|
|
185
|
-
#pfa|PFF1525c Unknown 188 208 outside_in
|
|
186
|
-
class WrapperParser
|
|
187
|
-
attr_accessor :io
|
|
188
|
-
|
|
189
|
-
def initialize(io)
|
|
190
|
-
@io = io
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
# Return an array of transmembrane proteins
|
|
194
|
-
def transmembrane_proteins
|
|
195
|
-
transmembrane_proteins = []
|
|
196
|
-
current_transmembrane_protein = nil
|
|
197
|
-
|
|
198
|
-
FasterCSV.foreach(@io, :col_sep => "\t") do |row|
|
|
199
|
-
next if row.length == 0
|
|
200
|
-
current_protein_id = row[0]
|
|
201
|
-
|
|
202
|
-
# if the protein ID changes then return the last protein
|
|
203
|
-
# (if there is one)
|
|
204
|
-
unless current_transmembrane_protein.nil? or
|
|
205
|
-
current_transmembrane_protein.name == current_protein_id
|
|
206
|
-
transmembrane_proteins.push current_transmembrane_protein
|
|
207
|
-
current_transmembrane_protein = nil
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
# deal with no tmd proteins
|
|
211
|
-
if row[1] == 'No Transmembrane Domain Found'
|
|
212
|
-
prot = Bio::Transmembrane::OrientedTransmembraneDomainProtein.new
|
|
213
|
-
prot.name = current_protein_id
|
|
214
|
-
transmembrane_proteins.push prot
|
|
215
|
-
current_transmembrane_protein = nil
|
|
216
|
-
else
|
|
217
|
-
current_transmembrane_protein ||= Bio::Transmembrane::OrientedTransmembraneDomainProtein.new
|
|
218
|
-
current_transmembrane_protein.name = current_protein_id
|
|
219
|
-
current_transmembrane_protein.transmembrane_domains.push Bio::Transmembrane::OrientedTransmembraneDomain.new(row[2],row[3],row[4])
|
|
220
|
-
end
|
|
221
|
-
end
|
|
222
|
-
# push the last one
|
|
223
|
-
transmembrane_proteins.push current_transmembrane_protein unless current_transmembrane_protein.nil?
|
|
224
|
-
|
|
225
|
-
return transmembrane_proteins
|
|
226
|
-
end
|
|
227
|
-
end
|
|
228
185
|
end
|
|
229
186
|
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
>PB000746.03.0 chloroquine resistance transporter, putative
|
|
2
|
+
FKIIGNEMKNNVYVYLLSILYLCVCVMNKVFAKRTLNKMGNYSFVTSETHNIICIIVFQLLYFIYRKTSSSSVYKNESQK
|
|
3
|
+
NFGWQFFLISLLDASTVIISMIGLTRTTGNIQSFIMQLIIPVNMYFCFMFLGYRYHLFNYLGAFIILITIAVVETFLSFE
|
|
4
|
+
TQGENSIIFNLIMISALIPLSFSNMTREVVFKKHKINILRLNAMVVLFQFFTSLLVLPVYNIPFLKEIYMPFSEMSTNIN
|
|
5
|
+
NGLRCLFYGENTIVENCGVGMVKMCDNCEGAWKTFITFSFFNICDNLLACYIIDKFSTMTYTIVSCIQGPAITIAYYFKF
|
|
6
|
+
LAGDAVRKPRILDFLTLFGYLFGTIIYRIGNIILEKKQVIKSQNSNDSEAELTSIETSRA
|
|
7
|
+
>cgd6_4890 transporter protein with conserved Zn ribbon C11C7CxxC
|
|
8
|
+
MNGSGRELLLGSESENEQIQIITLIKNGMSALNEGVIKPCKENFFLIVCMMSLLISSVFNSVFFKKMTSAMPNHVWFLTQ
|
|
9
|
+
LTSALYIPLFGLVLLISYFRGELSRDNLEFPMSKFWMMGFFDAFSSILTLLASTHTSGVMQVVLGQMCTPITLVMLSSIC
|
|
10
|
+
KDRFHKLQYIGATVMVMGIFIVKSTLILGIRKSSGAENSNQLIFNILFVIACIPASASSVYKDLSFREYSSLNENYLQFC
|
|
11
|
+
VAATQVIIGFILVPINSLSILGPQKIEMNQIPSLLFDGANCLFLKRNSITESCGGELQRPCDNCNSAQFPVLIYFIANVI
|
|
12
|
+
CNVFSVLVLKHGTASTGFIVSTLRLPVTTLVFFSPRLVGKEATEPKVEDLIGILILILGLILYRLGSVKINQEEEEIQTE
|
|
13
|
+
EEPERDYYELSSMTYSKETIDKIKSNTSSTTCSFNIEYDSSHFHSS
|
|
14
|
+
>PCAS_122020 chloroquine resistance transporter, putative
|
|
15
|
+
MTGMKKGKNKKKNVKNDERYKELDSLISNDSEIGNNSRWGGAKRICKLIGNEMRNNIYVYLLSILYLCVSVMNKVFSKRT
|
|
16
|
+
LNKIGNYSFVTSEVHNMICTIVFQLLYFIYRKTSNPASRNESQKNFGWQFFLISLLDASTVIITMIGLTRTTGNIQSFIM
|
|
17
|
+
QLIIPVNMYFCFIFLGYRYHLFNYLGAFIILITIAAVETVLSYETQSDNSIIFNLIMIFALIPLSFSNMTREVVFKKHKI
|
|
18
|
+
NIIRLNAMVALFQFFTSLLVLPVYNISFLKEIYMPFSEMGTNINDGLRCLFYGQSTIVENCGVGMVKMCDQCEGAWKTFI
|
|
19
|
+
TYSFFNICDNLLVCYIIDKFSTMTYTIVSCIQGPAITIAYYFKFLAGDVVRQPRLLDFLTLFGYLLGTIIYRIGNIILEK
|
|
20
|
+
KKMLKAQNTDGSEAELTSIETSTA
|
|
21
|
+
>TA12005 transporter protein cg10, putative
|
|
22
|
+
MLKEGSSLDLSASSSSGTLRSDNSFGNSPLDRITSLLILIYKSIRACFKWIYSKSFGIICILFVILDVLTTVFFKRFIDH
|
|
23
|
+
TKNYVMFTIQVIIFTFWIIVCCIAILCFLFNREYMKRHFNVRPLVFLGFLDMLSTGLSANGSAHTSGLMLVLLGQISVPL
|
|
24
|
+
TMVSCKLILSKKYHHYQYISSAIILTFAVLKPILNRTDTTDNRFYNNMLYLLASVPDSIASALREKQYTSKFFHVVKYQF
|
|
25
|
+
FGFLFHFFYNILYTLLFTLPFNSVKGYFDSLYKLCVNGYKCIFFGVNTITENCGPTLIPTCDNCLEAFKIYCLYILFSSA
|
|
26
|
+
IRVAYVFIMLDGSVTFTLLLGTVKVPLTSIAFSLRFIAGDSTTSFNLLDVVCFLGIVAGLLLYALGSKKIQEETDLLESP
|
|
27
|
+
LIDDAESEHELLSTGTEKLMRSEICHDLFT
|
|
28
|
+
>PY05061 putative transporter protein CG10
|
|
29
|
+
MTVIKKGKNKKKNLKNDDRYKELDSLITNGSEIGDNSGRSCIKRFFKIIGNEMKNNVYVYFLSILYLCVCVMNKVFAKRT
|
|
30
|
+
LNKMGNYSFVTSETHNIICIVVFQLLYFIYRKTSTSGYKNESQKNFGWQFFLISLLDASTVIISMIGLTRTTGNIQSFIM
|
|
31
|
+
QLIIPVNMYFCFMFLGYRYHLFNYLGAFIILITIAVVETFLSFETQSENSIIFNLIMISALIPLSFSNMTREVVFKKHKI
|
|
32
|
+
NILRLNAMVVLFQFFTSLLVLPVYNIPFLKEIYMPFSEMSTNINNGLRCLFYGQNTVVENCGVGMVKMCDNCEGAWKTFI
|
|
33
|
+
TFSFFNICDNLLACYIIDKFSTMTYTIVSCIQGPAITIAYYFKFLAGDAVRKPRILDFLTLFGYLFGTIIYRIGNIILEK
|
|
34
|
+
KKMVKSQNSNDSEAELTCIETSTA
|
|
35
|
+
>PVX_087980 chloroquine resistance transporter, putative
|
|
36
|
+
MTILKKKKKGSPQITPDERYRELDSHAQNESEIQEDVPISRKIANFLKLAYNEIRENISIYLLIIVYLCVCVMNKLLAKR
|
|
37
|
+
TLKKIGNYSFVTSETHNCICMVVFFALYFMFGRRVMSAKERHRNFGVQFLLISLLDACSVIIAFIGLTRTTGNIQSFVMQ
|
|
38
|
+
LSIPINMFFCFLILRYRYHLFNYVGAFIIVVTIAVVEFMLSFETQEENSIVFNLVLIASLIPLSFSNMTREIVFKKYKIN
|
|
39
|
+
ILRLNAVVSFFQIFTSCLMLPMYTLPFLKQINLPFSEIGTNIKNGFRCLFLGQNTIVENCGLGMSKMCDDCEGAWKTFIA
|
|
40
|
+
YSFFNICDNLITSFIIEKFSTMTYTIVSCIQGPAIAIAYYFKFLAGDAVMQPRMLDFVTLFGYLFGSIIYRIGNIILEKK
|
|
41
|
+
RMMEAGNDDDSEGELTNADSIITH
|
|
42
|
+
>TGME49_113930 hypothetical protein, conserved
|
|
43
|
+
MPNYSPCLTQVTTVVFVPVFFALSLYTDYAGGLPQEMADFPKRNFAVMGFLDSFSGVMAIIGAVHTTGTTQVVLQQSCIV
|
|
44
|
+
FSLLASIVMLRKRFHAAHYLGALVIILGVLVVKLPDLLHPSSDGGGDVFVFNLLYLLSNLPTAVSCVYKEVAFRGVEMGT
|
|
45
|
+
NYLQAWVALFQFLIGFLVLPLNALPVLGPQRVPLAELPASLWNGTRCLFGFNTIVTNCGGAGNMESPCDNCEGAWKYVGM
|
|
46
|
+
YLSFNLLYNMFIIFVVKSGGAALTFLVSTLRLPVTALAFCSRAIMGDRAVPPKATDFYGLLVLILGLVIYRAGGIMKRRA
|
|
47
|
+
QRRAVAAARGHTSSPMMLTPREEEQIGTIFVEEVFAAGELEDGGVTEEDETDDDTSEVEVHPVFSSVVASEPPHVYVHTK
|
|
48
|
+
RHSHSDGGYHKLPACGSSPAAFTPFTQRMPGTGSESCSRRRNRDGDDERSPRSHACSFDEETGFAGGTGTGRHFSSPGTA
|
|
49
|
+
LSPNRVGGYEPPSMHAVQPAVIGKSRANNGCI
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/test/test_bio-octopus.rb
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
require 'helper'
|
|
2
2
|
require 'tempfile'
|
|
3
|
-
require 'bio-tm_hmm'
|
|
4
3
|
|
|
5
4
|
class TestBioOctopus < Test::Unit::TestCase
|
|
5
|
+
DATA_DIR = File.join(Dir.pwd,'test','data')
|
|
6
|
+
|
|
6
7
|
def test_no_tmd_result
|
|
7
8
|
res = Bio::Spoctopus::Result.create_from_output([
|
|
8
9
|
'>wrapperSeq',
|
|
@@ -81,58 +82,23 @@ iiiiiiiiiMMMMMMMMMMMMMMMMMMMMMooooo'
|
|
|
81
82
|
assert_equal false, res.signal?
|
|
82
83
|
assert res.has_domain?
|
|
83
84
|
end
|
|
84
|
-
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
assert_equal 1, r.transmembrane_domains.length
|
|
103
|
-
t = r.transmembrane_domains[0]
|
|
104
|
-
assert_equal 1833, t.start
|
|
105
|
-
assert_equal 1853, t.stop
|
|
106
|
-
assert r.transmembrane_type_1?
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
Tempfile.open('spock') do |tempfile|
|
|
110
|
-
tempfile.puts 'pfa|PFD0635c I 1833 1853 outside_in
|
|
111
|
-
pfa|PFD0595c II 2 22 inside_out
|
|
112
|
-
pfa|PFB0610c No Transmembrane Domain Found
|
|
113
|
-
pfa|PFF1525c Unknown 2 22 outside_in
|
|
114
|
-
pfa|PFF1525c Unknown 160 180 inside_out
|
|
115
|
-
pfa|PFF1525c Unknown 188 208 outside_in'
|
|
116
|
-
tempfile.flush
|
|
117
|
-
|
|
118
|
-
pees = Bio::Spoctopus::WrapperParser.new(tempfile.path).transmembrane_proteins
|
|
119
|
-
assert_equal 4, pees.length
|
|
120
|
-
r = pees[0]
|
|
121
|
-
assert_equal 'pfa|PFD0635c', r.name
|
|
122
|
-
assert_equal 1, r.transmembrane_domains.length
|
|
123
|
-
t = r.transmembrane_domains[0]
|
|
124
|
-
assert_equal 1833, t.start
|
|
125
|
-
assert_equal 1853, t.stop
|
|
126
|
-
assert r.transmembrane_type_1?
|
|
127
|
-
|
|
128
|
-
r = pees[1]
|
|
129
|
-
|
|
130
|
-
assert r.transmembrane_type_2?
|
|
131
|
-
|
|
132
|
-
assert_equal 'pfa|PFB0610c', pees[2].name
|
|
133
|
-
assert_equal false, pees[2].has_domain?
|
|
134
|
-
|
|
135
|
-
assert_equal 3, pees[3].transmembrane_domains.length
|
|
136
|
-
end
|
|
85
|
+
|
|
86
|
+
def test_wrapper_by_actually_running_the_underlying_program
|
|
87
|
+
sequence = "MKFASKKNNQKNSSKNDERYRELDNLVQEGNGSRLGGGSCLGKCAHVFKLIFKEIKDNIFIYILSIIYLSVCVMNKIFAK
|
|
88
|
+
RTLNKIGNYSFVTSETHNFICMIMFFIVYSLFGNKKGNSKERHRSFNLQFFAISMLDACSVILAFIGLTRTTGNIQSFVL
|
|
89
|
+
QLSIPINMFFCFLILRYRYHLYNYLGAVIIVVTIALVEMKLSFETQEENSIIFNLVLISALIPVCFSNMTREIVFKKYKI
|
|
90
|
+
DILRLNAMVSFFQLFTSCLILPVYTLPFLKQLHLPYNEIWTNIKNGFACLFLGRNTVVENCGLGMAKLCDDCDGAWKTFA
|
|
91
|
+
LFSFFNICDNLITSYIIDKFSTMTYTIVSCIQGPAIAIAYYFKFLAGDVVREPRLLDFVTLFGYLFGSIIYRVGNIILER
|
|
92
|
+
KKMRNEENEDSEGELTNVDSIITQ".gsub(/\n/,'') #this is PfCRT, MAL7P1.27
|
|
93
|
+
Tempfile.open('pfcrt') do |tempfile|
|
|
94
|
+
tempfile.puts sequence
|
|
95
|
+
tempfile.close
|
|
96
|
+
|
|
97
|
+
blastdb_path = File.join(DATA_DIR,'dummyLegacyDb')
|
|
98
|
+
result = Bio::Spoctopus::Wrapper.new.calculate(sequence, blastdb_path)
|
|
99
|
+
assert_kind_of Bio::Transmembrane::SignalPeptideTransmembraneDomainProtein, result
|
|
100
|
+
assert_equal false, result.signal?
|
|
101
|
+
assert_equal 58, result.transmembrane_domains[0].start
|
|
102
|
+
end
|
|
137
103
|
end
|
|
138
104
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bio-octopus
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 27
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 0
|
|
8
|
-
- 0
|
|
9
8
|
- 1
|
|
10
|
-
|
|
9
|
+
- 0
|
|
10
|
+
version: 0.1.0
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Ben J Woodcroft
|
|
@@ -15,8 +15,8 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2011-04-
|
|
19
|
-
default_executable:
|
|
18
|
+
date: 2011-04-17 00:00:00 +10:00
|
|
19
|
+
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
22
22
|
type: :runtime
|
|
@@ -141,6 +141,7 @@ dependencies:
|
|
|
141
141
|
description: Running and parsing of the protein transmembrane domain predictor octopus
|
|
142
142
|
email: gmail.com after donttrustben
|
|
143
143
|
executables:
|
|
144
|
+
- biooctopus.rb
|
|
144
145
|
- biooctopus
|
|
145
146
|
extensions: []
|
|
146
147
|
|
|
@@ -158,8 +159,13 @@ files:
|
|
|
158
159
|
- bio-octopus.gemspec
|
|
159
160
|
- lib/bio-octopus.rb
|
|
160
161
|
- lib/bio/appl/octopus.rb
|
|
162
|
+
- test/data/dummyLegacyDb
|
|
163
|
+
- test/data/dummyLegacyDb.phr
|
|
164
|
+
- test/data/dummyLegacyDb.pin
|
|
165
|
+
- test/data/dummyLegacyDb.psq
|
|
161
166
|
- test/helper.rb
|
|
162
167
|
- test/test_bio-octopus.rb
|
|
168
|
+
- bin/biooctopus.rb
|
|
163
169
|
has_rdoc: true
|
|
164
170
|
homepage: http://github.com/wwood/bioruby-octopus
|
|
165
171
|
licenses:
|