bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,222 @@
1
+ #
2
+ # = bio/appl/mafft.rb - MAFFT wrapper class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: mafft.rb,v 1.9 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # Bio::MAFFT is a wrapper class to execute MAFFT.
26
+ # MAFFT is a very fast multiple sequence alignment software.
27
+ #
28
+ # = Important Notes
29
+ #
30
+ # Though Bio::MAFFT class currently supports only MAFFT version 3,
31
+ # you can use MAFFT version 5 because the class is a wrapper class.
32
+ #
33
+ # == References
34
+ #
35
+ # * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
36
+ # MAFFT: a novel method for rapid multiple sequence alignment based
37
+ # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002.
38
+ # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059
39
+ # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
40
+ #
41
+
42
+ require 'bio/db/fasta'
43
+ require 'bio/io/flatfile'
44
+
45
+ #--
46
+ # We use Open3.popen3, because MAFFT on win32 requires Cygwin.
47
+ #++
48
+ require 'open3'
49
+
50
+ module Bio
51
+
52
+ # Bio::MAFFT is a wrapper class to execute MAFFT.
53
+ # MAFFT is a very fast multiple sequence alignment software.
54
+ #
55
+ # Though Bio::MAFFT class currently supports only MAFFT version 3,
56
+ # you can use MAFFT version 5 because the class is a wrapper class.
57
+ class MAFFT
58
+
59
+ autoload :Report, 'bio/appl/mafft/report'
60
+
61
+ # Creates a new alignment factory.
62
+ # When +n+ is a number (1,2,3, ...), performs 'fftns n'.
63
+ # When +n+ is :i or 'i', performs 'fftnsi'.
64
+ def self.fftns(n = nil)
65
+ opt = []
66
+ if n.to_s == 'i' then
67
+ self.new2(nil, 'fftnsi', *opt)
68
+ else
69
+ opt << n.to_s if n
70
+ self.new2(nil, 'fftns', *opt)
71
+ end
72
+ end
73
+
74
+ # Creates a new alignment factory.
75
+ # Performs 'fftnsi'.
76
+ def self.fftnsi
77
+ self.new2(nil, 'fftnsi')
78
+ end
79
+
80
+ # Creates a new alignment factory.
81
+ # When +n+ is a number (1,2,3, ...), performs 'nwns n'.
82
+ # When +n+ is :i or 'i', performs 'nwnsi'.
83
+ # In both case, if all_positive is true, add option '--all-positive'.
84
+ def self.nwns(n = nil, ap = nil)
85
+ opt = []
86
+ opt << '--all-positive' if ap
87
+ if n.to_s == 'i' then
88
+ self.new2(nil, 'nwnsi', *opt)
89
+ else
90
+ opt << n.to_s if n
91
+ self.new2(nil, 'nwns', *opt)
92
+ end
93
+ end
94
+
95
+ # Creates a new alignment factory.
96
+ # Performs 'nwnsi'.
97
+ # If +all_positive+ is true, add option '--all-positive'.
98
+ def self.nwnsi(all_positive = nil)
99
+ opt = []
100
+ opt << '--all-positive' if all_positive
101
+ self.new2(nil, 'nwnsi', *opt)
102
+ end
103
+
104
+ # Creates a new alignment factory.
105
+ # Performs 'nwns --all-positive n' or 'nwnsi --all-positive'.
106
+ # Same as Bio::MAFFT.nwap(n, true).
107
+ def self.nwap(n = nil)
108
+ self.nwns(n, true)
109
+ end
110
+
111
+ # Creates a new alignment factory.
112
+ # +dir+ is the path of the MAFFT program.
113
+ # +prog+ is the name of the program.
114
+ # +opt+ is options of the program.
115
+ def self.new2(dir, prog, *opt)
116
+ if dir then
117
+ prog = File.join(dir, prog)
118
+ end
119
+ self.new(prog, opt)
120
+ end
121
+
122
+ # Creates a new alignment factory.
123
+ # +program+ is the name of the program.
124
+ # +opt+ is options of the program.
125
+ def initialize(program, option)
126
+ @program = program
127
+ @option = option
128
+ @command = nil
129
+ @output = nil
130
+ @report = nil
131
+ @log = nil
132
+ end
133
+
134
+ # program name
135
+ attr_accessor :program
136
+
137
+ # options
138
+ attr_accessor :option
139
+
140
+ # Shows last command-line string. Returns nil or an array of String.
141
+ # Note that filenames described in the command-line may already
142
+ # be removed because they are temporary files.
143
+ attr_reader :command
144
+
145
+ # last message to STDERR when executing the program.
146
+ attr_reader :log
147
+
148
+ # Shows latest raw alignment result.
149
+ # Since a result of MAFFT is simply a multiple-fasta format,
150
+ # it returns an array of Bio::FastaFormat instances
151
+ # instead of raw string.
152
+ attr_reader :output
153
+
154
+ # Shows last alignment result (instance of Bio::MAFFT::Report class)
155
+ # performed by the factory.
156
+ attr_reader :report
157
+
158
+ # Executes the program.
159
+ # If +seqs+ is not nil, perform alignment for seqs.
160
+ # If +seqs+ is nil, simply executes the program.
161
+ def query(seqs)
162
+ if seqs then
163
+ query_align(seqs)
164
+ else
165
+ exec_local(@option)
166
+ end
167
+ end
168
+
169
+ # Performs alignment for seqs.
170
+ # +seqs+ should be Bio::Alignment or Array of sequences or nil.
171
+ def query_align(seqs, *arg)
172
+ unless seqs.is_a?(Bio::Alignment)
173
+ seqs = Bio::Alignment.new(seqs, *arg)
174
+ end
175
+ query_string(seqs.to_fasta(70))
176
+ end
177
+
178
+ # Performs alignment for +str+.
179
+ # Str should be a string that can be recognized by the program.
180
+ def query_string(str, *arg)
181
+ begin
182
+ tf_in = Tempfile.open('align')
183
+ tf_in.print str
184
+ ensure
185
+ tf_in.close(false)
186
+ end
187
+ r = query_by_filename(tf_in.path, *arg)
188
+ tf_in.close(true)
189
+ r
190
+ end
191
+
192
+ # Performs alignment of sequences in the file named +fn+.
193
+ def query_by_filename(fn, seqtype = nil)
194
+ opt = @option + [ fn ]
195
+ exec_local(opt)
196
+ @report = Report.new(@output, seqtype)
197
+ @report
198
+ end
199
+
200
+ private
201
+ # Executes a program in the local machine.
202
+ def exec_local(opt)
203
+ @command = [ @program, *opt ]
204
+ #STDERR.print "DEBUG: ", @command.join(" "), "\n"
205
+ @output = nil
206
+ @log = nil
207
+ Open3.popen3(*@command) do |din, dout, derr|
208
+ din.close
209
+ derr.sync = true
210
+ t = Thread.start do
211
+ @log = derr.read
212
+ end
213
+ ff = Bio::FlatFile.new(Bio::FastaFormat, dout)
214
+ @output = ff.to_a
215
+ t.join
216
+ end
217
+ @log
218
+ end
219
+
220
+ end #class MAFFT
221
+ end #module Bio
222
+
@@ -0,0 +1,119 @@
1
+ #
2
+ # = bio/appl/mafft/report.rb - MAFFT report class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.8 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # MAFFT result parser class.
26
+ # MAFFT is a very fast multiple sequence alignment software.
27
+ #
28
+ # Since a result of MAFFT is simply a multiple-fasta format,
29
+ # the significance of this class is to keep standard form and
30
+ # interface between Bio::ClustalW::Report.
31
+ #
32
+ # == References
33
+ #
34
+ # * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
35
+ # MAFFT: a novel method for rapid multiple sequence alignment based
36
+ # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002.
37
+ # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059
38
+ # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
39
+ #
40
+
41
+ require 'bio/db/fasta'
42
+ require 'bio/io/flatfile'
43
+ require 'bio/appl/mafft'
44
+
45
+ module Bio
46
+ class MAFFT
47
+
48
+ # MAFFT result parser class.
49
+ # MAFFT is a very fast multiple sequence alignment software.
50
+ #
51
+ # Since a result of MAFFT is simply a multiple-fasta format,
52
+ # the significance of this class is to keep standard form and
53
+ # interface between Bio::ClustalW::Report.
54
+ class Report
55
+
56
+ # Creates a new Report object.
57
+ # +ary+ should be an Array of Bio::FastaFormat.
58
+ # +seqclass+ should on of following:
59
+ # Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
60
+ # String: 'PROTEIN', 'DNA', ...
61
+ def initialize(ary, seqclass = nil)
62
+ @data = ary
63
+ @align = nil
64
+ case seqclass
65
+ when /PROTEIN/i
66
+ @seqclass = Bio::Sequence::AA
67
+ when /[DR]NA/i
68
+ @seqclass = Bio::Sequence::NA
69
+ else
70
+ if seqclass.is_a?(Module) then
71
+ @seqclass = seqclass
72
+ else
73
+ @seqclass = Bio::Sequence
74
+ end
75
+ end
76
+ end
77
+
78
+ # sequence data. Returns an array of Bio::FastaFormat.
79
+ attr_reader :data
80
+
81
+ # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
82
+ attr_reader :seqclass
83
+
84
+ # Gets an multiple alignment.
85
+ # Returns an instance of Bio::Alignment class.
86
+ def align
87
+ do_parse() unless @align
88
+ @align
89
+ end
90
+ alias alignment align
91
+
92
+ # Gets an fasta-format string of the sequences.
93
+ # Returns a string.
94
+ # Same as align.to_fasta.
95
+ # Please refer to Bio::Alignment#to_fasta for arguments.
96
+ def to_fasta(*arg)
97
+ align.to_fasta(*arg)
98
+ end
99
+
100
+ # Gets an array of the sequences.
101
+ # Returns an array of Bio::FastaFormat instances.
102
+ def to_a
103
+ @data
104
+ end
105
+
106
+ private
107
+ # Parsing a result.
108
+ def do_parse
109
+ return nil if @align
110
+ @align = Bio::Alignment.new(@data) do |x|
111
+ [ @seqclass.new(x.seq), x.definition ]
112
+ end
113
+ nil
114
+ end
115
+
116
+ end #class Report
117
+ end #class MAFFT
118
+ end #module Bio
119
+
@@ -0,0 +1,555 @@
1
+ #
2
+ # = bio/appl/psort.rb - PSORT, protein sorting site prediction systems
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #
8
+ # $Id: psort.rb,v 1.8 2005/11/01 05:15:15 nakao Exp $
9
+ #
10
+ # == A client for PSORT WWW Server
11
+ #
12
+ # A client for PSORT WWW Server for predicting protein subcellular
13
+ # localization.
14
+ #
15
+ # PSORT family members,
16
+ # 1. PSORT
17
+ # 2. PSORT II
18
+ # 3. iPSORT
19
+ # 4. PSORT-B http://psort.org
20
+ # 5. WoLF-PSORT
21
+ #
22
+ # See http://psort.ims.u-tokyo.ac.jp.
23
+ #
24
+ # === Example
25
+ #
26
+ #
27
+ #--
28
+ #
29
+ # This library is free software; you can redistribute it and/or
30
+ # modify it under the terms of the GNU Lesser General Public
31
+ # License as published by the Free Software Foundation; either
32
+ # version 2 of the License, or (at your option) any later version.
33
+ #
34
+ # This library is distributed in the hope that it will be useful,
35
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
36
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
37
+ # Lesser General Public License for more details.
38
+ #
39
+ # You should have received a copy of the GNU Lesser General Public
40
+ # License along with this library; if not, write to the Free Software
41
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42
+ #
43
+ #++
44
+ #
45
+
46
+ require 'bio/sequence'
47
+ require 'bio/db/fasta'
48
+ require 'net/http'
49
+ require 'cgi'
50
+
51
+
52
+ module Bio
53
+
54
+
55
+
56
+
57
+ class PSORT
58
+ # a Hash for PSORT official hosts:
59
+ # Key value (host)
60
+ # ------- -----------------------
61
+ # IMSUT psort.ims.u-tokyo.ac.jp
62
+ # Okazaki psort.nibb.ac.jp
63
+ # Peking srs.pku.edu.cn:8088
64
+ WWWServer = {
65
+ 'IMSUT' => {'host' => 'psort.hgc.jp', #'psort.ims.u-tokyo.ac.jp',
66
+ 'PSORT1' => '/cgi-bin/okumura.pl',
67
+ 'PSORT2' => '/cgi-bin/runpsort.pl'},
68
+ 'Okazaki' => {'host' => 'psort.nibb.ac.jp',
69
+ 'PSORT1' => '/cgi-bin/okumura.pl',
70
+ 'PSORT2' => '/cgi-bin/runpsort.pl'},
71
+ 'Peking' => {'host' => 'srs.pku.edu.en:8088',
72
+ 'PSORT1' => '/cgi-bin/okumura.pl',
73
+ 'PSORT2' => '/cgi-bin/runpsort.pl'}
74
+ }
75
+
76
+
77
+ # = Generic CGI client class
78
+ # A generic CGI client class for Bio::PSORT::* classes.
79
+ # The class provides an interface for CGI argument processing and output
80
+ # report parsing.
81
+ #
82
+ # == Example
83
+ #
84
+ # class NewClient < CGIDriver
85
+ # def initialize(host, path)
86
+ # super(host, path)
87
+ # end
88
+ # end
89
+ # private
90
+ # def make_args(query)
91
+ # # ...
92
+ # end
93
+ # def parse_report(output)
94
+ # # ...
95
+ # end
96
+ #
97
+ class CGIDriver
98
+
99
+ # CGI query argument in Hash ({key => value, ...}).
100
+ attr_accessor :args
101
+
102
+ # CGI output raw text
103
+ attr_reader :report
104
+
105
+
106
+ # Sets remote ``host'' and cgi ``path''.
107
+ def initialize(host = '', path = '')
108
+ @host = host
109
+ @path = path
110
+ @args = {}
111
+ @report
112
+ end
113
+
114
+
115
+ # Executes a CGI ``query'' and returns aReport
116
+ def exec(query)
117
+ data = make_args(query)
118
+
119
+ begin
120
+ result, = Net::HTTP.new(@host).post(@path, data)
121
+ @report = result.body
122
+ output = parse_report(@report)
123
+ end
124
+
125
+ return output
126
+ end
127
+
128
+ private
129
+
130
+ # Bio::CGIDriver#make_args. An API skelton.
131
+ def make_args(args_hash)
132
+ # The routin should be provided in the inherited class
133
+ end
134
+
135
+ # Bio::CGIDriver#parse_report. An API skelton.
136
+ def parse_report(result_body)
137
+ # The routin should be provided in the inherited class
138
+ end
139
+
140
+ # Erases HTML tags
141
+ def erase_html_tags(str)
142
+ return str.gsub(/<\S.*?>/,'')
143
+ end
144
+
145
+ # Returns CGI argument text in String (key=value&) from a Hash ({key=>value}).
146
+ def args_join(hash, delim = '&')
147
+ tmp = []
148
+ hash.each do |key, val|
149
+ tmp << CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
150
+ end
151
+ return tmp.join(delim) # not ';' but '&' in psort's cgi
152
+ end
153
+
154
+ end # class CGIDriver
155
+
156
+
157
+
158
+ # = Bio::PSORT::PSORT1
159
+ # Bio::PSORT::PSORT1 is a wapper class for the original PSORT program.
160
+ #
161
+ # == Example
162
+ #
163
+ # serv = Bio::PSORT::PSORT1.imsut
164
+ # serv.title = 'Query_title_splited_by_white space'
165
+ # serv.exec(seq, false) # seq.class => String
166
+ # serv.exec(seq)
167
+ # report = serv.exec(Bio::FastaFormat.new(seq))
168
+ # report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
169
+ #
170
+ # == References
171
+ # 1. Nakai, K. and Kanehisa, M., A knowledge base for predicting protein
172
+ # localization sites in eukaryotic cells, Genomics 14, 897-911 (1992).
173
+ # [PMID:1478671]
174
+ class PSORT1
175
+
176
+ autoload :Report, 'bio/appl/psort/report'
177
+
178
+ # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
179
+ # connecting to the IMSUT server.
180
+ def self.imsut
181
+ self.new(Remote.new(WWWServer['IMSUT']['host'],
182
+ WWWServer['IMSUT']['PSORT1']))
183
+ end
184
+
185
+
186
+ # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
187
+ # connecting to the NIBB server.
188
+ def self.okazaki
189
+ self.new(Remote.new(WWWServer['Okazaki']['host'],
190
+ WWWServer['Okazaki']['PSORT1']))
191
+ end
192
+
193
+
194
+ # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
195
+ # connecting to the Peking server.
196
+ def self.peking
197
+ self.new(Remote.new(WWWServer['Peking']['host'],
198
+ WWWServer['Peking']['PSORT1']))
199
+ end
200
+
201
+
202
+ # Sets a server CGI Driver (Bio::PSORT::PSORT1::Remote).
203
+ def initialize(driver, origin = 'yeast')
204
+ @serv = driver
205
+ @origin = origin # Gram-positive bacterium, Gram-negative bacterium,
206
+ # yeast, aminal, plant
207
+ @title = 'MYSEQ'
208
+ @sequence = ''
209
+ end
210
+
211
+
212
+ # An accessor of the origin argument. Default setting is "yeast".
213
+ # Usable values:
214
+ # 1. Gram-positive bacterium
215
+ # 2. Gram-negative bacterium
216
+ # 3. yeast
217
+ # 4. animal
218
+ # 5. plant
219
+ attr_accessor :origin
220
+
221
+ # An accessor of the query sequence argument.
222
+ attr_accessor :sequence
223
+
224
+ # An accessor of the title argument. Default setting is 'MYSEQ'.
225
+ # The value is automatically setted if you use a query in
226
+ # Bio::FastaFormat.
227
+ attr_accessor :title
228
+
229
+
230
+ # Executes the query (faa) and returns an Bio::PSORT::PSORT1::Report.
231
+ #
232
+ # The ``faa'' argument is acceptable a sequence both in String and in
233
+ # Bio::FastaFormat.
234
+ #
235
+ # If you set the second argument is ``parsing = false'',
236
+ # returns ourput text without any parsing.
237
+ def exec(faa, parsing = true)
238
+ if faa.class == Bio::FastaFormat
239
+ @title = faa.entry_id if @title == 'MYSEQ'
240
+ @sequence = faa.seq
241
+ @serv.args = {'title' => @title, 'origin' => @origin}
242
+ @serv.parsing = parsing
243
+ return @serv.exec(sequence)
244
+ else
245
+ self.exec(Bio::FastaFormat.new(faa), parsing)
246
+ end
247
+ end
248
+
249
+
250
+ # = Bio::PSORT::PSORT1::Remote
251
+ # PSORT1 specific CGIDriver.
252
+ class Remote < CGIDriver
253
+
254
+ # Accessor for Bio::PSORT::PSORT1::Remote#origin to contein target domain.
255
+ # Taget domains:
256
+ # 1. Gram-positive bacterium
257
+ # 2. Gram-negative bacterium
258
+ # 3. yeast
259
+ # 4. animal
260
+ # 5. plant
261
+ attr_accessor :origin
262
+
263
+ # Accessor for Bio::POSRT::PSORT1#sequence to contein the query sequence.
264
+ attr_accessor :title
265
+
266
+ # Accessor for Bio::PSORT::PSORT1#title to contain the query title.
267
+ attr_accessor :parsing
268
+
269
+ # Sets remote ``host'' and cgi ``path''.
270
+ def initialize(host, path)
271
+ @origin = 'yeast'
272
+ @title = 'MYSEQ'
273
+ @parsing = true
274
+ super(host, path)
275
+ end
276
+
277
+ private
278
+
279
+ # Returns parsed CGI argument.
280
+ # An API implementation.
281
+ def make_args(query)
282
+ @args.update({'sequence' => query})
283
+ return args_join(@args)
284
+ end
285
+
286
+
287
+ # Returns parsed output report.
288
+ # An API implementation.
289
+ def parse_report(str)
290
+ str = erase_html_tags(str)
291
+ str = Bio::PSORT::PSORT1::Report.parser(str) if @parsing
292
+ return str
293
+ end
294
+
295
+ end # Class Remote
296
+
297
+ end # class PSORT1
298
+
299
+
300
+ # = Bio::PSORT::PSORT2
301
+ # Bio::PSORT::PSORT2 is a wapper class for the original PSORT program.
302
+ #
303
+ # == Example
304
+ #
305
+ # serv = Bio::PSORT::PSORT2.imsut
306
+ # serv.title = 'Query_title_splited_by_white space'
307
+ # serv.exec(seq, false) # seq.class => String
308
+ # serv.exec(seq)
309
+ # report = serv.exec(Bio::FastaFormat.new(seq))
310
+ # report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
311
+ #
312
+ # == References
313
+ # 1. Nakai, K. and Horton, P., PSORT: a program for detecting the sorting
314
+ # signals of proteins and predicting their subcellular localization,
315
+ # Trends Biochem. Sci, 24(1) 34-35 (1999).
316
+ # [PMID:10087920]
317
+ class PSORT2
318
+
319
+ autoload :Report, 'bio/appl/psort/report'
320
+
321
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote).
322
+ #
323
+ # PSORT official hosts:
324
+ # key host path
325
+ # ------- ----------------------- -------------------- ---------
326
+ # IMSUT psort.ims.u-tokyo.ac.jp /cgi-bin/runpsort.pl (default)
327
+ # Okazaki psort.nibb.ac.jp /cgi-bin/runpsort.pl
328
+ # Peking srs.pku.edu.cn:8088 /cgi-bin/runpsort.pl
329
+ def self.remote(host, path)
330
+ self.new(Remote.new(host, path))
331
+ end
332
+
333
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
334
+ # connecting to the IMSUT server.
335
+ def self.imsut
336
+ self.remote(WWWServer['IMSUT']['host'],
337
+ WWWServer['IMSUT']['PSORT2'])
338
+ end
339
+
340
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
341
+ # connecting to the NIBB server.
342
+ def self.okazaki
343
+ self.remote(WWWServer['Okazaki']['host'],
344
+ WWWServer['Okazaki']['PSORT2'])
345
+ end
346
+
347
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
348
+ # connecting to the Peking server.
349
+ def self.peking
350
+ self.remote(WWWServer['Peking']['host'],
351
+ WWWServer['Peking']['PSORT2'])
352
+ end
353
+
354
+ # An accessor of the origin argument.
355
+ # Default setting is ``yeast''.
356
+ attr_accessor :origin
357
+
358
+ # An accessor of the title argument. Default setting is ``QUERY''.
359
+ # The value is automatically setted if you use a query in
360
+ # Bio::FastaFormat.
361
+ attr_accessor :title
362
+
363
+ # Sets a server CGI Driver (Bio::PSORT::PSORT2::Remote).
364
+ def initialize(driver, origin = 'yeast')
365
+ @serv = driver
366
+ @origin = origin
367
+ @title = ''
368
+ end
369
+
370
+
371
+ # Executes PSORT II prediction and returns Report object
372
+ # (Bio::PSORT::PSORT2::Report) if parsing = true.
373
+ # Returns PSORT II report in text if parsing = false.
374
+ def exec(faa, parsing = true)
375
+ if faa.class == Bio::FastaFormat
376
+ @title = faa.entry_id if @title == nil
377
+ @sequence = faa.seq
378
+ @serv.args = {'origin' => @origin, 'title' => @title}
379
+ @serv.parsing = parsing
380
+ return @serv.exec(@sequence)
381
+ else
382
+ self.exec(Bio::FastaFormat.new(faa), parsing)
383
+ end
384
+ end
385
+
386
+
387
+ # = Bio::PSORT::PSORT2::Remote
388
+ # PSORT2 specific CGIDriver
389
+ class Remote < CGIDriver
390
+
391
+ # Sets remote ``host'' and cgi ``path''.
392
+ def initialize(host, path)
393
+ @origin = 'yeast'
394
+ super(host, path)
395
+ @parsing = true
396
+ end
397
+
398
+ # An accessor of the origin argument.
399
+ # Default setting is ``yeast''.
400
+ attr_accessor :origin
401
+
402
+ # An accessor of the output parsing.
403
+ # Default setting is ``true''.
404
+ attr_accessor :parsing
405
+
406
+
407
+ private
408
+
409
+ # Returns parsed CGI argument.
410
+ # An API implementation.
411
+ def make_args(query)
412
+ @args.update({'sequence' => query})
413
+ return args_join(@args)
414
+ end
415
+
416
+
417
+ # Returns parsed output report.
418
+ # An API implementation.
419
+ def parse_report(str)
420
+ str = str.gsub(/\n<hr>/i, Report::BOUNDARY)
421
+ str = erase_html_tags(str)
422
+ str = Bio::PSORT::PSORT2::Report.parser(str, self.args['title']) if @parsing
423
+ return str
424
+ end
425
+
426
+ end # class Remote
427
+
428
+ end # class PSORT2
429
+
430
+
431
+ class IPSORT
432
+ end # class IPSORT
433
+
434
+
435
+ class PSORTB
436
+ end # class PSORTB
437
+
438
+ class WoLF_PSORT
439
+ end # class PSORTB
440
+
441
+ end # class PSORT
442
+
443
+ end # module Bio
444
+
445
+
446
+
447
+
448
+
449
+ if __FILE__ == $0
450
+
451
+ begin
452
+ require 'psort/report.rb'
453
+ rescue LoadError
454
+ end
455
+
456
+
457
+ seq = ">hoge mit
458
+ MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
459
+ ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
460
+ DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
461
+ FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
462
+ KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
463
+ NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
464
+ SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
465
+ DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
466
+ DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
467
+ KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
468
+ APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
469
+ KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
470
+ "
471
+ Seq1 = ">hgoe
472
+ LTFVENDKII NI
473
+ "
474
+
475
+ puts "\n Bio::PSORT::PSORT"
476
+
477
+ puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
478
+ p serv = Bio::PSORT::PSORT1.imsut
479
+
480
+ puts "\n ==> p serv.class "
481
+ p serv.class
482
+
483
+ puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
484
+ p serv.title = 'Query_title_splited_by_white space'
485
+
486
+ puts "\n ==> p serv.exec(seq, false) "
487
+ p serv.exec(seq, false)
488
+
489
+ puts "\n ==> p serv.exec(seq) "
490
+ p serv.exec(seq)
491
+
492
+ puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
493
+ p report = serv.exec(Bio::FastaFormat.new(seq))
494
+
495
+ puts "\n ==> p report.class"
496
+ p report.class
497
+
498
+
499
+ puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
500
+ p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
501
+
502
+ puts "\n ==> p report_raw.class"
503
+ p report_raw.class
504
+
505
+
506
+ puts "\n ==> p report.methods"
507
+ p report.methods
508
+
509
+ methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
510
+ 'reasoning', 'final_result', 'raw']
511
+ methods.each do |method|
512
+ puts "\n ==> p report.#{method}"
513
+ p eval("report.#{method}")
514
+ end
515
+
516
+
517
+
518
+ puts "\n Bio::PSORT::PSORT2"
519
+
520
+ puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
521
+ p serv = Bio::PSORT::PSORT2.imsut
522
+
523
+ puts "\n ==> p serv.class "
524
+ p serv.class
525
+
526
+ puts "\n ==> p seq "
527
+ p seq
528
+
529
+ puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
530
+ p serv.title = 'Query_title_splited_by_white space'
531
+
532
+ puts "\n ==> p serv.exec(seq) # parsed report"
533
+ p serv.exec(seq)
534
+
535
+ puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
536
+ p report = serv.exec(Bio::FastaFormat.new(seq))
537
+
538
+
539
+
540
+ puts "\n ==> p serv.exec(seq, false) # report in plain text"
541
+ p serv.exec(seq, false)
542
+
543
+ puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
544
+ p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
545
+
546
+
547
+ puts "\n ==> p report.methods"
548
+ p report.methods
549
+
550
+ methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
551
+ methods.each do |method|
552
+ puts "\n ==> p report.#{method}"
553
+ p eval("report.#{method}")
554
+ end
555
+ end