bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/mafft.rb - MAFFT wrapper class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#--
|
|
8
|
+
# This library is free software; you can redistribute it and/or
|
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
+
# License as published by the Free Software Foundation; either
|
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
+
# Lesser General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
+
# License along with this library; if not, write to the Free Software
|
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
#++
|
|
22
|
+
#
|
|
23
|
+
# $Id: mafft.rb,v 1.9 2005/12/18 15:58:40 k Exp $
|
|
24
|
+
#
|
|
25
|
+
# Bio::MAFFT is a wrapper class to execute MAFFT.
|
|
26
|
+
# MAFFT is a very fast multiple sequence alignment software.
|
|
27
|
+
#
|
|
28
|
+
# = Important Notes
|
|
29
|
+
#
|
|
30
|
+
# Though Bio::MAFFT class currently supports only MAFFT version 3,
|
|
31
|
+
# you can use MAFFT version 5 because the class is a wrapper class.
|
|
32
|
+
#
|
|
33
|
+
# == References
|
|
34
|
+
#
|
|
35
|
+
# * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
|
|
36
|
+
# MAFFT: a novel method for rapid multiple sequence alignment based
|
|
37
|
+
# on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002.
|
|
38
|
+
# http://nar.oupjournals.org/cgi/content/abstract/30/14/3059
|
|
39
|
+
# * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
|
|
40
|
+
#
|
|
41
|
+
|
|
42
|
+
require 'bio/db/fasta'
|
|
43
|
+
require 'bio/io/flatfile'
|
|
44
|
+
|
|
45
|
+
#--
|
|
46
|
+
# We use Open3.popen3, because MAFFT on win32 requires Cygwin.
|
|
47
|
+
#++
|
|
48
|
+
require 'open3'
|
|
49
|
+
|
|
50
|
+
module Bio
|
|
51
|
+
|
|
52
|
+
# Bio::MAFFT is a wrapper class to execute MAFFT.
|
|
53
|
+
# MAFFT is a very fast multiple sequence alignment software.
|
|
54
|
+
#
|
|
55
|
+
# Though Bio::MAFFT class currently supports only MAFFT version 3,
|
|
56
|
+
# you can use MAFFT version 5 because the class is a wrapper class.
|
|
57
|
+
class MAFFT
|
|
58
|
+
|
|
59
|
+
autoload :Report, 'bio/appl/mafft/report'
|
|
60
|
+
|
|
61
|
+
# Creates a new alignment factory.
|
|
62
|
+
# When +n+ is a number (1,2,3, ...), performs 'fftns n'.
|
|
63
|
+
# When +n+ is :i or 'i', performs 'fftnsi'.
|
|
64
|
+
def self.fftns(n = nil)
|
|
65
|
+
opt = []
|
|
66
|
+
if n.to_s == 'i' then
|
|
67
|
+
self.new2(nil, 'fftnsi', *opt)
|
|
68
|
+
else
|
|
69
|
+
opt << n.to_s if n
|
|
70
|
+
self.new2(nil, 'fftns', *opt)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Creates a new alignment factory.
|
|
75
|
+
# Performs 'fftnsi'.
|
|
76
|
+
def self.fftnsi
|
|
77
|
+
self.new2(nil, 'fftnsi')
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Creates a new alignment factory.
|
|
81
|
+
# When +n+ is a number (1,2,3, ...), performs 'nwns n'.
|
|
82
|
+
# When +n+ is :i or 'i', performs 'nwnsi'.
|
|
83
|
+
# In both case, if all_positive is true, add option '--all-positive'.
|
|
84
|
+
def self.nwns(n = nil, ap = nil)
|
|
85
|
+
opt = []
|
|
86
|
+
opt << '--all-positive' if ap
|
|
87
|
+
if n.to_s == 'i' then
|
|
88
|
+
self.new2(nil, 'nwnsi', *opt)
|
|
89
|
+
else
|
|
90
|
+
opt << n.to_s if n
|
|
91
|
+
self.new2(nil, 'nwns', *opt)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Creates a new alignment factory.
|
|
96
|
+
# Performs 'nwnsi'.
|
|
97
|
+
# If +all_positive+ is true, add option '--all-positive'.
|
|
98
|
+
def self.nwnsi(all_positive = nil)
|
|
99
|
+
opt = []
|
|
100
|
+
opt << '--all-positive' if all_positive
|
|
101
|
+
self.new2(nil, 'nwnsi', *opt)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Creates a new alignment factory.
|
|
105
|
+
# Performs 'nwns --all-positive n' or 'nwnsi --all-positive'.
|
|
106
|
+
# Same as Bio::MAFFT.nwap(n, true).
|
|
107
|
+
def self.nwap(n = nil)
|
|
108
|
+
self.nwns(n, true)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Creates a new alignment factory.
|
|
112
|
+
# +dir+ is the path of the MAFFT program.
|
|
113
|
+
# +prog+ is the name of the program.
|
|
114
|
+
# +opt+ is options of the program.
|
|
115
|
+
def self.new2(dir, prog, *opt)
|
|
116
|
+
if dir then
|
|
117
|
+
prog = File.join(dir, prog)
|
|
118
|
+
end
|
|
119
|
+
self.new(prog, opt)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Creates a new alignment factory.
|
|
123
|
+
# +program+ is the name of the program.
|
|
124
|
+
# +opt+ is options of the program.
|
|
125
|
+
def initialize(program, option)
|
|
126
|
+
@program = program
|
|
127
|
+
@option = option
|
|
128
|
+
@command = nil
|
|
129
|
+
@output = nil
|
|
130
|
+
@report = nil
|
|
131
|
+
@log = nil
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# program name
|
|
135
|
+
attr_accessor :program
|
|
136
|
+
|
|
137
|
+
# options
|
|
138
|
+
attr_accessor :option
|
|
139
|
+
|
|
140
|
+
# Shows last command-line string. Returns nil or an array of String.
|
|
141
|
+
# Note that filenames described in the command-line may already
|
|
142
|
+
# be removed because they are temporary files.
|
|
143
|
+
attr_reader :command
|
|
144
|
+
|
|
145
|
+
# last message to STDERR when executing the program.
|
|
146
|
+
attr_reader :log
|
|
147
|
+
|
|
148
|
+
# Shows latest raw alignment result.
|
|
149
|
+
# Since a result of MAFFT is simply a multiple-fasta format,
|
|
150
|
+
# it returns an array of Bio::FastaFormat instances
|
|
151
|
+
# instead of raw string.
|
|
152
|
+
attr_reader :output
|
|
153
|
+
|
|
154
|
+
# Shows last alignment result (instance of Bio::MAFFT::Report class)
|
|
155
|
+
# performed by the factory.
|
|
156
|
+
attr_reader :report
|
|
157
|
+
|
|
158
|
+
# Executes the program.
|
|
159
|
+
# If +seqs+ is not nil, perform alignment for seqs.
|
|
160
|
+
# If +seqs+ is nil, simply executes the program.
|
|
161
|
+
def query(seqs)
|
|
162
|
+
if seqs then
|
|
163
|
+
query_align(seqs)
|
|
164
|
+
else
|
|
165
|
+
exec_local(@option)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Performs alignment for seqs.
|
|
170
|
+
# +seqs+ should be Bio::Alignment or Array of sequences or nil.
|
|
171
|
+
def query_align(seqs, *arg)
|
|
172
|
+
unless seqs.is_a?(Bio::Alignment)
|
|
173
|
+
seqs = Bio::Alignment.new(seqs, *arg)
|
|
174
|
+
end
|
|
175
|
+
query_string(seqs.to_fasta(70))
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Performs alignment for +str+.
|
|
179
|
+
# Str should be a string that can be recognized by the program.
|
|
180
|
+
def query_string(str, *arg)
|
|
181
|
+
begin
|
|
182
|
+
tf_in = Tempfile.open('align')
|
|
183
|
+
tf_in.print str
|
|
184
|
+
ensure
|
|
185
|
+
tf_in.close(false)
|
|
186
|
+
end
|
|
187
|
+
r = query_by_filename(tf_in.path, *arg)
|
|
188
|
+
tf_in.close(true)
|
|
189
|
+
r
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Performs alignment of sequences in the file named +fn+.
|
|
193
|
+
def query_by_filename(fn, seqtype = nil)
|
|
194
|
+
opt = @option + [ fn ]
|
|
195
|
+
exec_local(opt)
|
|
196
|
+
@report = Report.new(@output, seqtype)
|
|
197
|
+
@report
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
private
|
|
201
|
+
# Executes a program in the local machine.
|
|
202
|
+
def exec_local(opt)
|
|
203
|
+
@command = [ @program, *opt ]
|
|
204
|
+
#STDERR.print "DEBUG: ", @command.join(" "), "\n"
|
|
205
|
+
@output = nil
|
|
206
|
+
@log = nil
|
|
207
|
+
Open3.popen3(*@command) do |din, dout, derr|
|
|
208
|
+
din.close
|
|
209
|
+
derr.sync = true
|
|
210
|
+
t = Thread.start do
|
|
211
|
+
@log = derr.read
|
|
212
|
+
end
|
|
213
|
+
ff = Bio::FlatFile.new(Bio::FastaFormat, dout)
|
|
214
|
+
@output = ff.to_a
|
|
215
|
+
t.join
|
|
216
|
+
end
|
|
217
|
+
@log
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
end #class MAFFT
|
|
221
|
+
end #module Bio
|
|
222
|
+
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/mafft/report.rb - MAFFT report class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#--
|
|
8
|
+
# This library is free software; you can redistribute it and/or
|
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
+
# License as published by the Free Software Foundation; either
|
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
+
# Lesser General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
+
# License along with this library; if not, write to the Free Software
|
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
#++
|
|
22
|
+
#
|
|
23
|
+
# $Id: report.rb,v 1.8 2005/12/18 15:58:40 k Exp $
|
|
24
|
+
#
|
|
25
|
+
# MAFFT result parser class.
|
|
26
|
+
# MAFFT is a very fast multiple sequence alignment software.
|
|
27
|
+
#
|
|
28
|
+
# Since a result of MAFFT is simply a multiple-fasta format,
|
|
29
|
+
# the significance of this class is to keep standard form and
|
|
30
|
+
# interface between Bio::ClustalW::Report.
|
|
31
|
+
#
|
|
32
|
+
# == References
|
|
33
|
+
#
|
|
34
|
+
# * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
|
|
35
|
+
# MAFFT: a novel method for rapid multiple sequence alignment based
|
|
36
|
+
# on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002.
|
|
37
|
+
# http://nar.oupjournals.org/cgi/content/abstract/30/14/3059
|
|
38
|
+
# * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
|
|
39
|
+
#
|
|
40
|
+
|
|
41
|
+
require 'bio/db/fasta'
|
|
42
|
+
require 'bio/io/flatfile'
|
|
43
|
+
require 'bio/appl/mafft'
|
|
44
|
+
|
|
45
|
+
module Bio
|
|
46
|
+
class MAFFT
|
|
47
|
+
|
|
48
|
+
# MAFFT result parser class.
|
|
49
|
+
# MAFFT is a very fast multiple sequence alignment software.
|
|
50
|
+
#
|
|
51
|
+
# Since a result of MAFFT is simply a multiple-fasta format,
|
|
52
|
+
# the significance of this class is to keep standard form and
|
|
53
|
+
# interface between Bio::ClustalW::Report.
|
|
54
|
+
class Report
|
|
55
|
+
|
|
56
|
+
# Creates a new Report object.
|
|
57
|
+
# +ary+ should be an Array of Bio::FastaFormat.
|
|
58
|
+
# +seqclass+ should on of following:
|
|
59
|
+
# Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
|
|
60
|
+
# String: 'PROTEIN', 'DNA', ...
|
|
61
|
+
def initialize(ary, seqclass = nil)
|
|
62
|
+
@data = ary
|
|
63
|
+
@align = nil
|
|
64
|
+
case seqclass
|
|
65
|
+
when /PROTEIN/i
|
|
66
|
+
@seqclass = Bio::Sequence::AA
|
|
67
|
+
when /[DR]NA/i
|
|
68
|
+
@seqclass = Bio::Sequence::NA
|
|
69
|
+
else
|
|
70
|
+
if seqclass.is_a?(Module) then
|
|
71
|
+
@seqclass = seqclass
|
|
72
|
+
else
|
|
73
|
+
@seqclass = Bio::Sequence
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# sequence data. Returns an array of Bio::FastaFormat.
|
|
79
|
+
attr_reader :data
|
|
80
|
+
|
|
81
|
+
# Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
|
|
82
|
+
attr_reader :seqclass
|
|
83
|
+
|
|
84
|
+
# Gets an multiple alignment.
|
|
85
|
+
# Returns an instance of Bio::Alignment class.
|
|
86
|
+
def align
|
|
87
|
+
do_parse() unless @align
|
|
88
|
+
@align
|
|
89
|
+
end
|
|
90
|
+
alias alignment align
|
|
91
|
+
|
|
92
|
+
# Gets an fasta-format string of the sequences.
|
|
93
|
+
# Returns a string.
|
|
94
|
+
# Same as align.to_fasta.
|
|
95
|
+
# Please refer to Bio::Alignment#to_fasta for arguments.
|
|
96
|
+
def to_fasta(*arg)
|
|
97
|
+
align.to_fasta(*arg)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Gets an array of the sequences.
|
|
101
|
+
# Returns an array of Bio::FastaFormat instances.
|
|
102
|
+
def to_a
|
|
103
|
+
@data
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
# Parsing a result.
|
|
108
|
+
def do_parse
|
|
109
|
+
return nil if @align
|
|
110
|
+
@align = Bio::Alignment.new(@data) do |x|
|
|
111
|
+
[ @seqclass.new(x.seq), x.definition ]
|
|
112
|
+
end
|
|
113
|
+
nil
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
end #class Report
|
|
117
|
+
end #class MAFFT
|
|
118
|
+
end #module Bio
|
|
119
|
+
|
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/psort.rb - PSORT, protein sorting site prediction systems
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#
|
|
8
|
+
# $Id: psort.rb,v 1.8 2005/11/01 05:15:15 nakao Exp $
|
|
9
|
+
#
|
|
10
|
+
# == A client for PSORT WWW Server
|
|
11
|
+
#
|
|
12
|
+
# A client for PSORT WWW Server for predicting protein subcellular
|
|
13
|
+
# localization.
|
|
14
|
+
#
|
|
15
|
+
# PSORT family members,
|
|
16
|
+
# 1. PSORT
|
|
17
|
+
# 2. PSORT II
|
|
18
|
+
# 3. iPSORT
|
|
19
|
+
# 4. PSORT-B http://psort.org
|
|
20
|
+
# 5. WoLF-PSORT
|
|
21
|
+
#
|
|
22
|
+
# See http://psort.ims.u-tokyo.ac.jp.
|
|
23
|
+
#
|
|
24
|
+
# === Example
|
|
25
|
+
#
|
|
26
|
+
#
|
|
27
|
+
#--
|
|
28
|
+
#
|
|
29
|
+
# This library is free software; you can redistribute it and/or
|
|
30
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
31
|
+
# License as published by the Free Software Foundation; either
|
|
32
|
+
# version 2 of the License, or (at your option) any later version.
|
|
33
|
+
#
|
|
34
|
+
# This library is distributed in the hope that it will be useful,
|
|
35
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
36
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
37
|
+
# Lesser General Public License for more details.
|
|
38
|
+
#
|
|
39
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
40
|
+
# License along with this library; if not, write to the Free Software
|
|
41
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
42
|
+
#
|
|
43
|
+
#++
|
|
44
|
+
#
|
|
45
|
+
|
|
46
|
+
require 'bio/sequence'
|
|
47
|
+
require 'bio/db/fasta'
|
|
48
|
+
require 'net/http'
|
|
49
|
+
require 'cgi'
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
module Bio
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class PSORT
|
|
58
|
+
# a Hash for PSORT official hosts:
|
|
59
|
+
# Key value (host)
|
|
60
|
+
# ------- -----------------------
|
|
61
|
+
# IMSUT psort.ims.u-tokyo.ac.jp
|
|
62
|
+
# Okazaki psort.nibb.ac.jp
|
|
63
|
+
# Peking srs.pku.edu.cn:8088
|
|
64
|
+
WWWServer = {
|
|
65
|
+
'IMSUT' => {'host' => 'psort.hgc.jp', #'psort.ims.u-tokyo.ac.jp',
|
|
66
|
+
'PSORT1' => '/cgi-bin/okumura.pl',
|
|
67
|
+
'PSORT2' => '/cgi-bin/runpsort.pl'},
|
|
68
|
+
'Okazaki' => {'host' => 'psort.nibb.ac.jp',
|
|
69
|
+
'PSORT1' => '/cgi-bin/okumura.pl',
|
|
70
|
+
'PSORT2' => '/cgi-bin/runpsort.pl'},
|
|
71
|
+
'Peking' => {'host' => 'srs.pku.edu.en:8088',
|
|
72
|
+
'PSORT1' => '/cgi-bin/okumura.pl',
|
|
73
|
+
'PSORT2' => '/cgi-bin/runpsort.pl'}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# = Generic CGI client class
|
|
78
|
+
# A generic CGI client class for Bio::PSORT::* classes.
|
|
79
|
+
# The class provides an interface for CGI argument processing and output
|
|
80
|
+
# report parsing.
|
|
81
|
+
#
|
|
82
|
+
# == Example
|
|
83
|
+
#
|
|
84
|
+
# class NewClient < CGIDriver
|
|
85
|
+
# def initialize(host, path)
|
|
86
|
+
# super(host, path)
|
|
87
|
+
# end
|
|
88
|
+
# end
|
|
89
|
+
# private
|
|
90
|
+
# def make_args(query)
|
|
91
|
+
# # ...
|
|
92
|
+
# end
|
|
93
|
+
# def parse_report(output)
|
|
94
|
+
# # ...
|
|
95
|
+
# end
|
|
96
|
+
#
|
|
97
|
+
class CGIDriver
|
|
98
|
+
|
|
99
|
+
# CGI query argument in Hash ({key => value, ...}).
|
|
100
|
+
attr_accessor :args
|
|
101
|
+
|
|
102
|
+
# CGI output raw text
|
|
103
|
+
attr_reader :report
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Sets remote ``host'' and cgi ``path''.
|
|
107
|
+
def initialize(host = '', path = '')
|
|
108
|
+
@host = host
|
|
109
|
+
@path = path
|
|
110
|
+
@args = {}
|
|
111
|
+
@report
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# Executes a CGI ``query'' and returns aReport
|
|
116
|
+
def exec(query)
|
|
117
|
+
data = make_args(query)
|
|
118
|
+
|
|
119
|
+
begin
|
|
120
|
+
result, = Net::HTTP.new(@host).post(@path, data)
|
|
121
|
+
@report = result.body
|
|
122
|
+
output = parse_report(@report)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
return output
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private
|
|
129
|
+
|
|
130
|
+
# Bio::CGIDriver#make_args. An API skelton.
|
|
131
|
+
def make_args(args_hash)
|
|
132
|
+
# The routin should be provided in the inherited class
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Bio::CGIDriver#parse_report. An API skelton.
|
|
136
|
+
def parse_report(result_body)
|
|
137
|
+
# The routin should be provided in the inherited class
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Erases HTML tags
|
|
141
|
+
def erase_html_tags(str)
|
|
142
|
+
return str.gsub(/<\S.*?>/,'')
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Returns CGI argument text in String (key=value&) from a Hash ({key=>value}).
|
|
146
|
+
def args_join(hash, delim = '&')
|
|
147
|
+
tmp = []
|
|
148
|
+
hash.each do |key, val|
|
|
149
|
+
tmp << CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
|
|
150
|
+
end
|
|
151
|
+
return tmp.join(delim) # not ';' but '&' in psort's cgi
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
end # class CGIDriver
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# = Bio::PSORT::PSORT1
|
|
159
|
+
# Bio::PSORT::PSORT1 is a wapper class for the original PSORT program.
|
|
160
|
+
#
|
|
161
|
+
# == Example
|
|
162
|
+
#
|
|
163
|
+
# serv = Bio::PSORT::PSORT1.imsut
|
|
164
|
+
# serv.title = 'Query_title_splited_by_white space'
|
|
165
|
+
# serv.exec(seq, false) # seq.class => String
|
|
166
|
+
# serv.exec(seq)
|
|
167
|
+
# report = serv.exec(Bio::FastaFormat.new(seq))
|
|
168
|
+
# report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
|
169
|
+
#
|
|
170
|
+
# == References
|
|
171
|
+
# 1. Nakai, K. and Kanehisa, M., A knowledge base for predicting protein
|
|
172
|
+
# localization sites in eukaryotic cells, Genomics 14, 897-911 (1992).
|
|
173
|
+
# [PMID:1478671]
|
|
174
|
+
class PSORT1
|
|
175
|
+
|
|
176
|
+
autoload :Report, 'bio/appl/psort/report'
|
|
177
|
+
|
|
178
|
+
# Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
|
|
179
|
+
# connecting to the IMSUT server.
|
|
180
|
+
def self.imsut
|
|
181
|
+
self.new(Remote.new(WWWServer['IMSUT']['host'],
|
|
182
|
+
WWWServer['IMSUT']['PSORT1']))
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
|
|
187
|
+
# connecting to the NIBB server.
|
|
188
|
+
def self.okazaki
|
|
189
|
+
self.new(Remote.new(WWWServer['Okazaki']['host'],
|
|
190
|
+
WWWServer['Okazaki']['PSORT1']))
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
|
|
195
|
+
# connecting to the Peking server.
|
|
196
|
+
def self.peking
|
|
197
|
+
self.new(Remote.new(WWWServer['Peking']['host'],
|
|
198
|
+
WWWServer['Peking']['PSORT1']))
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# Sets a server CGI Driver (Bio::PSORT::PSORT1::Remote).
|
|
203
|
+
def initialize(driver, origin = 'yeast')
|
|
204
|
+
@serv = driver
|
|
205
|
+
@origin = origin # Gram-positive bacterium, Gram-negative bacterium,
|
|
206
|
+
# yeast, aminal, plant
|
|
207
|
+
@title = 'MYSEQ'
|
|
208
|
+
@sequence = ''
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# An accessor of the origin argument. Default setting is "yeast".
|
|
213
|
+
# Usable values:
|
|
214
|
+
# 1. Gram-positive bacterium
|
|
215
|
+
# 2. Gram-negative bacterium
|
|
216
|
+
# 3. yeast
|
|
217
|
+
# 4. animal
|
|
218
|
+
# 5. plant
|
|
219
|
+
attr_accessor :origin
|
|
220
|
+
|
|
221
|
+
# An accessor of the query sequence argument.
|
|
222
|
+
attr_accessor :sequence
|
|
223
|
+
|
|
224
|
+
# An accessor of the title argument. Default setting is 'MYSEQ'.
|
|
225
|
+
# The value is automatically setted if you use a query in
|
|
226
|
+
# Bio::FastaFormat.
|
|
227
|
+
attr_accessor :title
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# Executes the query (faa) and returns an Bio::PSORT::PSORT1::Report.
|
|
231
|
+
#
|
|
232
|
+
# The ``faa'' argument is acceptable a sequence both in String and in
|
|
233
|
+
# Bio::FastaFormat.
|
|
234
|
+
#
|
|
235
|
+
# If you set the second argument is ``parsing = false'',
|
|
236
|
+
# returns ourput text without any parsing.
|
|
237
|
+
def exec(faa, parsing = true)
|
|
238
|
+
if faa.class == Bio::FastaFormat
|
|
239
|
+
@title = faa.entry_id if @title == 'MYSEQ'
|
|
240
|
+
@sequence = faa.seq
|
|
241
|
+
@serv.args = {'title' => @title, 'origin' => @origin}
|
|
242
|
+
@serv.parsing = parsing
|
|
243
|
+
return @serv.exec(sequence)
|
|
244
|
+
else
|
|
245
|
+
self.exec(Bio::FastaFormat.new(faa), parsing)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# = Bio::PSORT::PSORT1::Remote
|
|
251
|
+
# PSORT1 specific CGIDriver.
|
|
252
|
+
class Remote < CGIDriver
|
|
253
|
+
|
|
254
|
+
# Accessor for Bio::PSORT::PSORT1::Remote#origin to contein target domain.
|
|
255
|
+
# Taget domains:
|
|
256
|
+
# 1. Gram-positive bacterium
|
|
257
|
+
# 2. Gram-negative bacterium
|
|
258
|
+
# 3. yeast
|
|
259
|
+
# 4. animal
|
|
260
|
+
# 5. plant
|
|
261
|
+
attr_accessor :origin
|
|
262
|
+
|
|
263
|
+
# Accessor for Bio::POSRT::PSORT1#sequence to contein the query sequence.
|
|
264
|
+
attr_accessor :title
|
|
265
|
+
|
|
266
|
+
# Accessor for Bio::PSORT::PSORT1#title to contain the query title.
|
|
267
|
+
attr_accessor :parsing
|
|
268
|
+
|
|
269
|
+
# Sets remote ``host'' and cgi ``path''.
|
|
270
|
+
def initialize(host, path)
|
|
271
|
+
@origin = 'yeast'
|
|
272
|
+
@title = 'MYSEQ'
|
|
273
|
+
@parsing = true
|
|
274
|
+
super(host, path)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
private
|
|
278
|
+
|
|
279
|
+
# Returns parsed CGI argument.
|
|
280
|
+
# An API implementation.
|
|
281
|
+
def make_args(query)
|
|
282
|
+
@args.update({'sequence' => query})
|
|
283
|
+
return args_join(@args)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# Returns parsed output report.
|
|
288
|
+
# An API implementation.
|
|
289
|
+
def parse_report(str)
|
|
290
|
+
str = erase_html_tags(str)
|
|
291
|
+
str = Bio::PSORT::PSORT1::Report.parser(str) if @parsing
|
|
292
|
+
return str
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
end # Class Remote
|
|
296
|
+
|
|
297
|
+
end # class PSORT1
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# = Bio::PSORT::PSORT2
|
|
301
|
+
# Bio::PSORT::PSORT2 is a wapper class for the original PSORT program.
|
|
302
|
+
#
|
|
303
|
+
# == Example
|
|
304
|
+
#
|
|
305
|
+
# serv = Bio::PSORT::PSORT2.imsut
|
|
306
|
+
# serv.title = 'Query_title_splited_by_white space'
|
|
307
|
+
# serv.exec(seq, false) # seq.class => String
|
|
308
|
+
# serv.exec(seq)
|
|
309
|
+
# report = serv.exec(Bio::FastaFormat.new(seq))
|
|
310
|
+
# report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
|
311
|
+
#
|
|
312
|
+
# == References
|
|
313
|
+
# 1. Nakai, K. and Horton, P., PSORT: a program for detecting the sorting
|
|
314
|
+
# signals of proteins and predicting their subcellular localization,
|
|
315
|
+
# Trends Biochem. Sci, 24(1) 34-35 (1999).
|
|
316
|
+
# [PMID:10087920]
|
|
317
|
+
class PSORT2
|
|
318
|
+
|
|
319
|
+
autoload :Report, 'bio/appl/psort/report'
|
|
320
|
+
|
|
321
|
+
# Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote).
|
|
322
|
+
#
|
|
323
|
+
# PSORT official hosts:
|
|
324
|
+
# key host path
|
|
325
|
+
# ------- ----------------------- -------------------- ---------
|
|
326
|
+
# IMSUT psort.ims.u-tokyo.ac.jp /cgi-bin/runpsort.pl (default)
|
|
327
|
+
# Okazaki psort.nibb.ac.jp /cgi-bin/runpsort.pl
|
|
328
|
+
# Peking srs.pku.edu.cn:8088 /cgi-bin/runpsort.pl
|
|
329
|
+
def self.remote(host, path)
|
|
330
|
+
self.new(Remote.new(host, path))
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
|
|
334
|
+
# connecting to the IMSUT server.
|
|
335
|
+
def self.imsut
|
|
336
|
+
self.remote(WWWServer['IMSUT']['host'],
|
|
337
|
+
WWWServer['IMSUT']['PSORT2'])
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
|
|
341
|
+
# connecting to the NIBB server.
|
|
342
|
+
def self.okazaki
|
|
343
|
+
self.remote(WWWServer['Okazaki']['host'],
|
|
344
|
+
WWWServer['Okazaki']['PSORT2'])
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
|
|
348
|
+
# connecting to the Peking server.
|
|
349
|
+
def self.peking
|
|
350
|
+
self.remote(WWWServer['Peking']['host'],
|
|
351
|
+
WWWServer['Peking']['PSORT2'])
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# An accessor of the origin argument.
|
|
355
|
+
# Default setting is ``yeast''.
|
|
356
|
+
attr_accessor :origin
|
|
357
|
+
|
|
358
|
+
# An accessor of the title argument. Default setting is ``QUERY''.
|
|
359
|
+
# The value is automatically setted if you use a query in
|
|
360
|
+
# Bio::FastaFormat.
|
|
361
|
+
attr_accessor :title
|
|
362
|
+
|
|
363
|
+
# Sets a server CGI Driver (Bio::PSORT::PSORT2::Remote).
|
|
364
|
+
def initialize(driver, origin = 'yeast')
|
|
365
|
+
@serv = driver
|
|
366
|
+
@origin = origin
|
|
367
|
+
@title = ''
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
# Executes PSORT II prediction and returns Report object
|
|
372
|
+
# (Bio::PSORT::PSORT2::Report) if parsing = true.
|
|
373
|
+
# Returns PSORT II report in text if parsing = false.
|
|
374
|
+
def exec(faa, parsing = true)
|
|
375
|
+
if faa.class == Bio::FastaFormat
|
|
376
|
+
@title = faa.entry_id if @title == nil
|
|
377
|
+
@sequence = faa.seq
|
|
378
|
+
@serv.args = {'origin' => @origin, 'title' => @title}
|
|
379
|
+
@serv.parsing = parsing
|
|
380
|
+
return @serv.exec(@sequence)
|
|
381
|
+
else
|
|
382
|
+
self.exec(Bio::FastaFormat.new(faa), parsing)
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# = Bio::PSORT::PSORT2::Remote
|
|
388
|
+
# PSORT2 specific CGIDriver
|
|
389
|
+
class Remote < CGIDriver
|
|
390
|
+
|
|
391
|
+
# Sets remote ``host'' and cgi ``path''.
|
|
392
|
+
def initialize(host, path)
|
|
393
|
+
@origin = 'yeast'
|
|
394
|
+
super(host, path)
|
|
395
|
+
@parsing = true
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# An accessor of the origin argument.
|
|
399
|
+
# Default setting is ``yeast''.
|
|
400
|
+
attr_accessor :origin
|
|
401
|
+
|
|
402
|
+
# An accessor of the output parsing.
|
|
403
|
+
# Default setting is ``true''.
|
|
404
|
+
attr_accessor :parsing
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
private
|
|
408
|
+
|
|
409
|
+
# Returns parsed CGI argument.
|
|
410
|
+
# An API implementation.
|
|
411
|
+
def make_args(query)
|
|
412
|
+
@args.update({'sequence' => query})
|
|
413
|
+
return args_join(@args)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
# Returns parsed output report.
|
|
418
|
+
# An API implementation.
|
|
419
|
+
def parse_report(str)
|
|
420
|
+
str = str.gsub(/\n<hr>/i, Report::BOUNDARY)
|
|
421
|
+
str = erase_html_tags(str)
|
|
422
|
+
str = Bio::PSORT::PSORT2::Report.parser(str, self.args['title']) if @parsing
|
|
423
|
+
return str
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
end # class Remote
|
|
427
|
+
|
|
428
|
+
end # class PSORT2
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class IPSORT
|
|
432
|
+
end # class IPSORT
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class PSORTB
|
|
436
|
+
end # class PSORTB
|
|
437
|
+
|
|
438
|
+
class WoLF_PSORT
|
|
439
|
+
end # class PSORTB
|
|
440
|
+
|
|
441
|
+
end # class PSORT
|
|
442
|
+
|
|
443
|
+
end # module Bio
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
if __FILE__ == $0
|
|
450
|
+
|
|
451
|
+
begin
|
|
452
|
+
require 'psort/report.rb'
|
|
453
|
+
rescue LoadError
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
seq = ">hoge mit
|
|
458
|
+
MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
|
|
459
|
+
ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
|
|
460
|
+
DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
|
|
461
|
+
FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
|
|
462
|
+
KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
|
|
463
|
+
NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
|
|
464
|
+
SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
|
|
465
|
+
DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
|
|
466
|
+
DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
|
|
467
|
+
KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
|
|
468
|
+
APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
|
|
469
|
+
KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
|
|
470
|
+
"
|
|
471
|
+
Seq1 = ">hgoe
|
|
472
|
+
LTFVENDKII NI
|
|
473
|
+
"
|
|
474
|
+
|
|
475
|
+
puts "\n Bio::PSORT::PSORT"
|
|
476
|
+
|
|
477
|
+
puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
|
|
478
|
+
p serv = Bio::PSORT::PSORT1.imsut
|
|
479
|
+
|
|
480
|
+
puts "\n ==> p serv.class "
|
|
481
|
+
p serv.class
|
|
482
|
+
|
|
483
|
+
puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
|
|
484
|
+
p serv.title = 'Query_title_splited_by_white space'
|
|
485
|
+
|
|
486
|
+
puts "\n ==> p serv.exec(seq, false) "
|
|
487
|
+
p serv.exec(seq, false)
|
|
488
|
+
|
|
489
|
+
puts "\n ==> p serv.exec(seq) "
|
|
490
|
+
p serv.exec(seq)
|
|
491
|
+
|
|
492
|
+
puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
|
|
493
|
+
p report = serv.exec(Bio::FastaFormat.new(seq))
|
|
494
|
+
|
|
495
|
+
puts "\n ==> p report.class"
|
|
496
|
+
p report.class
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
|
|
500
|
+
p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
|
501
|
+
|
|
502
|
+
puts "\n ==> p report_raw.class"
|
|
503
|
+
p report_raw.class
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
puts "\n ==> p report.methods"
|
|
507
|
+
p report.methods
|
|
508
|
+
|
|
509
|
+
methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
|
|
510
|
+
'reasoning', 'final_result', 'raw']
|
|
511
|
+
methods.each do |method|
|
|
512
|
+
puts "\n ==> p report.#{method}"
|
|
513
|
+
p eval("report.#{method}")
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
puts "\n Bio::PSORT::PSORT2"
|
|
519
|
+
|
|
520
|
+
puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
|
|
521
|
+
p serv = Bio::PSORT::PSORT2.imsut
|
|
522
|
+
|
|
523
|
+
puts "\n ==> p serv.class "
|
|
524
|
+
p serv.class
|
|
525
|
+
|
|
526
|
+
puts "\n ==> p seq "
|
|
527
|
+
p seq
|
|
528
|
+
|
|
529
|
+
puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
|
|
530
|
+
p serv.title = 'Query_title_splited_by_white space'
|
|
531
|
+
|
|
532
|
+
puts "\n ==> p serv.exec(seq) # parsed report"
|
|
533
|
+
p serv.exec(seq)
|
|
534
|
+
|
|
535
|
+
puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
|
|
536
|
+
p report = serv.exec(Bio::FastaFormat.new(seq))
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
puts "\n ==> p serv.exec(seq, false) # report in plain text"
|
|
541
|
+
p serv.exec(seq, false)
|
|
542
|
+
|
|
543
|
+
puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
|
|
544
|
+
p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
puts "\n ==> p report.methods"
|
|
548
|
+
p report.methods
|
|
549
|
+
|
|
550
|
+
methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
|
|
551
|
+
methods.each do |method|
|
|
552
|
+
puts "\n ==> p report.#{method}"
|
|
553
|
+
p eval("report.#{method}")
|
|
554
|
+
end
|
|
555
|
+
end
|