bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,222 @@
1
+ #
2
+ # = bio/appl/mafft.rb - MAFFT wrapper class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: mafft.rb,v 1.9 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # Bio::MAFFT is a wrapper class to execute MAFFT.
26
+ # MAFFT is a very fast multiple sequence alignment software.
27
+ #
28
+ # = Important Notes
29
+ #
30
+ # Though Bio::MAFFT class currently supports only MAFFT version 3,
31
+ # you can use MAFFT version 5 because the class is a wrapper class.
32
+ #
33
+ # == References
34
+ #
35
+ # * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
36
+ # MAFFT: a novel method for rapid multiple sequence alignment based
37
+ # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002.
38
+ # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059
39
+ # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
40
+ #
41
+
42
+ require 'bio/db/fasta'
43
+ require 'bio/io/flatfile'
44
+
45
+ #--
46
+ # We use Open3.popen3, because MAFFT on win32 requires Cygwin.
47
+ #++
48
+ require 'open3'
49
+
50
+ module Bio
51
+
52
+ # Bio::MAFFT is a wrapper class to execute MAFFT.
53
+ # MAFFT is a very fast multiple sequence alignment software.
54
+ #
55
+ # Though Bio::MAFFT class currently supports only MAFFT version 3,
56
+ # you can use MAFFT version 5 because the class is a wrapper class.
57
+ class MAFFT
58
+
59
+ autoload :Report, 'bio/appl/mafft/report'
60
+
61
+ # Creates a new alignment factory.
62
+ # When +n+ is a number (1,2,3, ...), performs 'fftns n'.
63
+ # When +n+ is :i or 'i', performs 'fftnsi'.
64
+ def self.fftns(n = nil)
65
+ opt = []
66
+ if n.to_s == 'i' then
67
+ self.new2(nil, 'fftnsi', *opt)
68
+ else
69
+ opt << n.to_s if n
70
+ self.new2(nil, 'fftns', *opt)
71
+ end
72
+ end
73
+
74
+ # Creates a new alignment factory.
75
+ # Performs 'fftnsi'.
76
+ def self.fftnsi
77
+ self.new2(nil, 'fftnsi')
78
+ end
79
+
80
+ # Creates a new alignment factory.
81
+ # When +n+ is a number (1,2,3, ...), performs 'nwns n'.
82
+ # When +n+ is :i or 'i', performs 'nwnsi'.
83
+ # In both case, if all_positive is true, add option '--all-positive'.
84
+ def self.nwns(n = nil, ap = nil)
85
+ opt = []
86
+ opt << '--all-positive' if ap
87
+ if n.to_s == 'i' then
88
+ self.new2(nil, 'nwnsi', *opt)
89
+ else
90
+ opt << n.to_s if n
91
+ self.new2(nil, 'nwns', *opt)
92
+ end
93
+ end
94
+
95
+ # Creates a new alignment factory.
96
+ # Performs 'nwnsi'.
97
+ # If +all_positive+ is true, add option '--all-positive'.
98
+ def self.nwnsi(all_positive = nil)
99
+ opt = []
100
+ opt << '--all-positive' if all_positive
101
+ self.new2(nil, 'nwnsi', *opt)
102
+ end
103
+
104
+ # Creates a new alignment factory.
105
+ # Performs 'nwns --all-positive n' or 'nwnsi --all-positive'.
106
+ # Same as Bio::MAFFT.nwap(n, true).
107
+ def self.nwap(n = nil)
108
+ self.nwns(n, true)
109
+ end
110
+
111
+ # Creates a new alignment factory.
112
+ # +dir+ is the path of the MAFFT program.
113
+ # +prog+ is the name of the program.
114
+ # +opt+ is options of the program.
115
+ def self.new2(dir, prog, *opt)
116
+ if dir then
117
+ prog = File.join(dir, prog)
118
+ end
119
+ self.new(prog, opt)
120
+ end
121
+
122
+ # Creates a new alignment factory.
123
+ # +program+ is the name of the program.
124
+ # +opt+ is options of the program.
125
+ def initialize(program, option)
126
+ @program = program
127
+ @option = option
128
+ @command = nil
129
+ @output = nil
130
+ @report = nil
131
+ @log = nil
132
+ end
133
+
134
+ # program name
135
+ attr_accessor :program
136
+
137
+ # options
138
+ attr_accessor :option
139
+
140
+ # Shows last command-line string. Returns nil or an array of String.
141
+ # Note that filenames described in the command-line may already
142
+ # be removed because they are temporary files.
143
+ attr_reader :command
144
+
145
+ # last message to STDERR when executing the program.
146
+ attr_reader :log
147
+
148
+ # Shows latest raw alignment result.
149
+ # Since a result of MAFFT is simply a multiple-fasta format,
150
+ # it returns an array of Bio::FastaFormat instances
151
+ # instead of raw string.
152
+ attr_reader :output
153
+
154
+ # Shows last alignment result (instance of Bio::MAFFT::Report class)
155
+ # performed by the factory.
156
+ attr_reader :report
157
+
158
+ # Executes the program.
159
+ # If +seqs+ is not nil, perform alignment for seqs.
160
+ # If +seqs+ is nil, simply executes the program.
161
+ def query(seqs)
162
+ if seqs then
163
+ query_align(seqs)
164
+ else
165
+ exec_local(@option)
166
+ end
167
+ end
168
+
169
+ # Performs alignment for seqs.
170
+ # +seqs+ should be Bio::Alignment or Array of sequences or nil.
171
+ def query_align(seqs, *arg)
172
+ unless seqs.is_a?(Bio::Alignment)
173
+ seqs = Bio::Alignment.new(seqs, *arg)
174
+ end
175
+ query_string(seqs.to_fasta(70))
176
+ end
177
+
178
+ # Performs alignment for +str+.
179
+ # Str should be a string that can be recognized by the program.
180
+ def query_string(str, *arg)
181
+ begin
182
+ tf_in = Tempfile.open('align')
183
+ tf_in.print str
184
+ ensure
185
+ tf_in.close(false)
186
+ end
187
+ r = query_by_filename(tf_in.path, *arg)
188
+ tf_in.close(true)
189
+ r
190
+ end
191
+
192
+ # Performs alignment of sequences in the file named +fn+.
193
+ def query_by_filename(fn, seqtype = nil)
194
+ opt = @option + [ fn ]
195
+ exec_local(opt)
196
+ @report = Report.new(@output, seqtype)
197
+ @report
198
+ end
199
+
200
+ private
201
+ # Executes a program in the local machine.
202
+ def exec_local(opt)
203
+ @command = [ @program, *opt ]
204
+ #STDERR.print "DEBUG: ", @command.join(" "), "\n"
205
+ @output = nil
206
+ @log = nil
207
+ Open3.popen3(*@command) do |din, dout, derr|
208
+ din.close
209
+ derr.sync = true
210
+ t = Thread.start do
211
+ @log = derr.read
212
+ end
213
+ ff = Bio::FlatFile.new(Bio::FastaFormat, dout)
214
+ @output = ff.to_a
215
+ t.join
216
+ end
217
+ @log
218
+ end
219
+
220
+ end #class MAFFT
221
+ end #module Bio
222
+
@@ -0,0 +1,119 @@
1
+ #
2
+ # = bio/appl/mafft/report.rb - MAFFT report class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.8 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # MAFFT result parser class.
26
+ # MAFFT is a very fast multiple sequence alignment software.
27
+ #
28
+ # Since a result of MAFFT is simply a multiple-fasta format,
29
+ # the significance of this class is to keep standard form and
30
+ # interface between Bio::ClustalW::Report.
31
+ #
32
+ # == References
33
+ #
34
+ # * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
35
+ # MAFFT: a novel method for rapid multiple sequence alignment based
36
+ # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002.
37
+ # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059
38
+ # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
39
+ #
40
+
41
+ require 'bio/db/fasta'
42
+ require 'bio/io/flatfile'
43
+ require 'bio/appl/mafft'
44
+
45
+ module Bio
46
+ class MAFFT
47
+
48
+ # MAFFT result parser class.
49
+ # MAFFT is a very fast multiple sequence alignment software.
50
+ #
51
+ # Since a result of MAFFT is simply a multiple-fasta format,
52
+ # the significance of this class is to keep standard form and
53
+ # interface between Bio::ClustalW::Report.
54
+ class Report
55
+
56
+ # Creates a new Report object.
57
+ # +ary+ should be an Array of Bio::FastaFormat.
58
+ # +seqclass+ should on of following:
59
+ # Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
60
+ # String: 'PROTEIN', 'DNA', ...
61
+ def initialize(ary, seqclass = nil)
62
+ @data = ary
63
+ @align = nil
64
+ case seqclass
65
+ when /PROTEIN/i
66
+ @seqclass = Bio::Sequence::AA
67
+ when /[DR]NA/i
68
+ @seqclass = Bio::Sequence::NA
69
+ else
70
+ if seqclass.is_a?(Module) then
71
+ @seqclass = seqclass
72
+ else
73
+ @seqclass = Bio::Sequence
74
+ end
75
+ end
76
+ end
77
+
78
+ # sequence data. Returns an array of Bio::FastaFormat.
79
+ attr_reader :data
80
+
81
+ # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
82
+ attr_reader :seqclass
83
+
84
+ # Gets an multiple alignment.
85
+ # Returns an instance of Bio::Alignment class.
86
+ def align
87
+ do_parse() unless @align
88
+ @align
89
+ end
90
+ alias alignment align
91
+
92
+ # Gets an fasta-format string of the sequences.
93
+ # Returns a string.
94
+ # Same as align.to_fasta.
95
+ # Please refer to Bio::Alignment#to_fasta for arguments.
96
+ def to_fasta(*arg)
97
+ align.to_fasta(*arg)
98
+ end
99
+
100
+ # Gets an array of the sequences.
101
+ # Returns an array of Bio::FastaFormat instances.
102
+ def to_a
103
+ @data
104
+ end
105
+
106
+ private
107
+ # Parsing a result.
108
+ def do_parse
109
+ return nil if @align
110
+ @align = Bio::Alignment.new(@data) do |x|
111
+ [ @seqclass.new(x.seq), x.definition ]
112
+ end
113
+ nil
114
+ end
115
+
116
+ end #class Report
117
+ end #class MAFFT
118
+ end #module Bio
119
+
@@ -0,0 +1,555 @@
1
+ #
2
+ # = bio/appl/psort.rb - PSORT, protein sorting site prediction systems
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #
8
+ # $Id: psort.rb,v 1.8 2005/11/01 05:15:15 nakao Exp $
9
+ #
10
+ # == A client for PSORT WWW Server
11
+ #
12
+ # A client for PSORT WWW Server for predicting protein subcellular
13
+ # localization.
14
+ #
15
+ # PSORT family members,
16
+ # 1. PSORT
17
+ # 2. PSORT II
18
+ # 3. iPSORT
19
+ # 4. PSORT-B http://psort.org
20
+ # 5. WoLF-PSORT
21
+ #
22
+ # See http://psort.ims.u-tokyo.ac.jp.
23
+ #
24
+ # === Example
25
+ #
26
+ #
27
+ #--
28
+ #
29
+ # This library is free software; you can redistribute it and/or
30
+ # modify it under the terms of the GNU Lesser General Public
31
+ # License as published by the Free Software Foundation; either
32
+ # version 2 of the License, or (at your option) any later version.
33
+ #
34
+ # This library is distributed in the hope that it will be useful,
35
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
36
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
37
+ # Lesser General Public License for more details.
38
+ #
39
+ # You should have received a copy of the GNU Lesser General Public
40
+ # License along with this library; if not, write to the Free Software
41
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42
+ #
43
+ #++
44
+ #
45
+
46
+ require 'bio/sequence'
47
+ require 'bio/db/fasta'
48
+ require 'net/http'
49
+ require 'cgi'
50
+
51
+
52
+ module Bio
53
+
54
+
55
+
56
+
57
+ class PSORT
58
+ # a Hash for PSORT official hosts:
59
+ # Key value (host)
60
+ # ------- -----------------------
61
+ # IMSUT psort.ims.u-tokyo.ac.jp
62
+ # Okazaki psort.nibb.ac.jp
63
+ # Peking srs.pku.edu.cn:8088
64
+ WWWServer = {
65
+ 'IMSUT' => {'host' => 'psort.hgc.jp', #'psort.ims.u-tokyo.ac.jp',
66
+ 'PSORT1' => '/cgi-bin/okumura.pl',
67
+ 'PSORT2' => '/cgi-bin/runpsort.pl'},
68
+ 'Okazaki' => {'host' => 'psort.nibb.ac.jp',
69
+ 'PSORT1' => '/cgi-bin/okumura.pl',
70
+ 'PSORT2' => '/cgi-bin/runpsort.pl'},
71
+ 'Peking' => {'host' => 'srs.pku.edu.en:8088',
72
+ 'PSORT1' => '/cgi-bin/okumura.pl',
73
+ 'PSORT2' => '/cgi-bin/runpsort.pl'}
74
+ }
75
+
76
+
77
+ # = Generic CGI client class
78
+ # A generic CGI client class for Bio::PSORT::* classes.
79
+ # The class provides an interface for CGI argument processing and output
80
+ # report parsing.
81
+ #
82
+ # == Example
83
+ #
84
+ # class NewClient < CGIDriver
85
+ # def initialize(host, path)
86
+ # super(host, path)
87
+ # end
88
+ # end
89
+ # private
90
+ # def make_args(query)
91
+ # # ...
92
+ # end
93
+ # def parse_report(output)
94
+ # # ...
95
+ # end
96
+ #
97
+ class CGIDriver
98
+
99
+ # CGI query argument in Hash ({key => value, ...}).
100
+ attr_accessor :args
101
+
102
+ # CGI output raw text
103
+ attr_reader :report
104
+
105
+
106
+ # Sets remote ``host'' and cgi ``path''.
107
+ def initialize(host = '', path = '')
108
+ @host = host
109
+ @path = path
110
+ @args = {}
111
+ @report
112
+ end
113
+
114
+
115
+ # Executes a CGI ``query'' and returns aReport
116
+ def exec(query)
117
+ data = make_args(query)
118
+
119
+ begin
120
+ result, = Net::HTTP.new(@host).post(@path, data)
121
+ @report = result.body
122
+ output = parse_report(@report)
123
+ end
124
+
125
+ return output
126
+ end
127
+
128
+ private
129
+
130
+ # Bio::CGIDriver#make_args. An API skelton.
131
+ def make_args(args_hash)
132
+ # The routin should be provided in the inherited class
133
+ end
134
+
135
+ # Bio::CGIDriver#parse_report. An API skelton.
136
+ def parse_report(result_body)
137
+ # The routin should be provided in the inherited class
138
+ end
139
+
140
+ # Erases HTML tags
141
+ def erase_html_tags(str)
142
+ return str.gsub(/<\S.*?>/,'')
143
+ end
144
+
145
+ # Returns CGI argument text in String (key=value&) from a Hash ({key=>value}).
146
+ def args_join(hash, delim = '&')
147
+ tmp = []
148
+ hash.each do |key, val|
149
+ tmp << CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s)
150
+ end
151
+ return tmp.join(delim) # not ';' but '&' in psort's cgi
152
+ end
153
+
154
+ end # class CGIDriver
155
+
156
+
157
+
158
+ # = Bio::PSORT::PSORT1
159
+ # Bio::PSORT::PSORT1 is a wapper class for the original PSORT program.
160
+ #
161
+ # == Example
162
+ #
163
+ # serv = Bio::PSORT::PSORT1.imsut
164
+ # serv.title = 'Query_title_splited_by_white space'
165
+ # serv.exec(seq, false) # seq.class => String
166
+ # serv.exec(seq)
167
+ # report = serv.exec(Bio::FastaFormat.new(seq))
168
+ # report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
169
+ #
170
+ # == References
171
+ # 1. Nakai, K. and Kanehisa, M., A knowledge base for predicting protein
172
+ # localization sites in eukaryotic cells, Genomics 14, 897-911 (1992).
173
+ # [PMID:1478671]
174
+ class PSORT1
175
+
176
+ autoload :Report, 'bio/appl/psort/report'
177
+
178
+ # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
179
+ # connecting to the IMSUT server.
180
+ def self.imsut
181
+ self.new(Remote.new(WWWServer['IMSUT']['host'],
182
+ WWWServer['IMSUT']['PSORT1']))
183
+ end
184
+
185
+
186
+ # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
187
+ # connecting to the NIBB server.
188
+ def self.okazaki
189
+ self.new(Remote.new(WWWServer['Okazaki']['host'],
190
+ WWWServer['Okazaki']['PSORT1']))
191
+ end
192
+
193
+
194
+ # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote)
195
+ # connecting to the Peking server.
196
+ def self.peking
197
+ self.new(Remote.new(WWWServer['Peking']['host'],
198
+ WWWServer['Peking']['PSORT1']))
199
+ end
200
+
201
+
202
+ # Sets a server CGI Driver (Bio::PSORT::PSORT1::Remote).
203
+ def initialize(driver, origin = 'yeast')
204
+ @serv = driver
205
+ @origin = origin # Gram-positive bacterium, Gram-negative bacterium,
206
+ # yeast, aminal, plant
207
+ @title = 'MYSEQ'
208
+ @sequence = ''
209
+ end
210
+
211
+
212
+ # An accessor of the origin argument. Default setting is "yeast".
213
+ # Usable values:
214
+ # 1. Gram-positive bacterium
215
+ # 2. Gram-negative bacterium
216
+ # 3. yeast
217
+ # 4. animal
218
+ # 5. plant
219
+ attr_accessor :origin
220
+
221
+ # An accessor of the query sequence argument.
222
+ attr_accessor :sequence
223
+
224
+ # An accessor of the title argument. Default setting is 'MYSEQ'.
225
+ # The value is automatically setted if you use a query in
226
+ # Bio::FastaFormat.
227
+ attr_accessor :title
228
+
229
+
230
+ # Executes the query (faa) and returns an Bio::PSORT::PSORT1::Report.
231
+ #
232
+ # The ``faa'' argument is acceptable a sequence both in String and in
233
+ # Bio::FastaFormat.
234
+ #
235
+ # If you set the second argument is ``parsing = false'',
236
+ # returns ourput text without any parsing.
237
+ def exec(faa, parsing = true)
238
+ if faa.class == Bio::FastaFormat
239
+ @title = faa.entry_id if @title == 'MYSEQ'
240
+ @sequence = faa.seq
241
+ @serv.args = {'title' => @title, 'origin' => @origin}
242
+ @serv.parsing = parsing
243
+ return @serv.exec(sequence)
244
+ else
245
+ self.exec(Bio::FastaFormat.new(faa), parsing)
246
+ end
247
+ end
248
+
249
+
250
+ # = Bio::PSORT::PSORT1::Remote
251
+ # PSORT1 specific CGIDriver.
252
+ class Remote < CGIDriver
253
+
254
+ # Accessor for Bio::PSORT::PSORT1::Remote#origin to contein target domain.
255
+ # Taget domains:
256
+ # 1. Gram-positive bacterium
257
+ # 2. Gram-negative bacterium
258
+ # 3. yeast
259
+ # 4. animal
260
+ # 5. plant
261
+ attr_accessor :origin
262
+
263
+ # Accessor for Bio::POSRT::PSORT1#sequence to contein the query sequence.
264
+ attr_accessor :title
265
+
266
+ # Accessor for Bio::PSORT::PSORT1#title to contain the query title.
267
+ attr_accessor :parsing
268
+
269
+ # Sets remote ``host'' and cgi ``path''.
270
+ def initialize(host, path)
271
+ @origin = 'yeast'
272
+ @title = 'MYSEQ'
273
+ @parsing = true
274
+ super(host, path)
275
+ end
276
+
277
+ private
278
+
279
+ # Returns parsed CGI argument.
280
+ # An API implementation.
281
+ def make_args(query)
282
+ @args.update({'sequence' => query})
283
+ return args_join(@args)
284
+ end
285
+
286
+
287
+ # Returns parsed output report.
288
+ # An API implementation.
289
+ def parse_report(str)
290
+ str = erase_html_tags(str)
291
+ str = Bio::PSORT::PSORT1::Report.parser(str) if @parsing
292
+ return str
293
+ end
294
+
295
+ end # Class Remote
296
+
297
+ end # class PSORT1
298
+
299
+
300
+ # = Bio::PSORT::PSORT2
301
+ # Bio::PSORT::PSORT2 is a wapper class for the original PSORT program.
302
+ #
303
+ # == Example
304
+ #
305
+ # serv = Bio::PSORT::PSORT2.imsut
306
+ # serv.title = 'Query_title_splited_by_white space'
307
+ # serv.exec(seq, false) # seq.class => String
308
+ # serv.exec(seq)
309
+ # report = serv.exec(Bio::FastaFormat.new(seq))
310
+ # report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
311
+ #
312
+ # == References
313
+ # 1. Nakai, K. and Horton, P., PSORT: a program for detecting the sorting
314
+ # signals of proteins and predicting their subcellular localization,
315
+ # Trends Biochem. Sci, 24(1) 34-35 (1999).
316
+ # [PMID:10087920]
317
+ class PSORT2
318
+
319
+ autoload :Report, 'bio/appl/psort/report'
320
+
321
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote).
322
+ #
323
+ # PSORT official hosts:
324
+ # key host path
325
+ # ------- ----------------------- -------------------- ---------
326
+ # IMSUT psort.ims.u-tokyo.ac.jp /cgi-bin/runpsort.pl (default)
327
+ # Okazaki psort.nibb.ac.jp /cgi-bin/runpsort.pl
328
+ # Peking srs.pku.edu.cn:8088 /cgi-bin/runpsort.pl
329
+ def self.remote(host, path)
330
+ self.new(Remote.new(host, path))
331
+ end
332
+
333
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
334
+ # connecting to the IMSUT server.
335
+ def self.imsut
336
+ self.remote(WWWServer['IMSUT']['host'],
337
+ WWWServer['IMSUT']['PSORT2'])
338
+ end
339
+
340
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
341
+ # connecting to the NIBB server.
342
+ def self.okazaki
343
+ self.remote(WWWServer['Okazaki']['host'],
344
+ WWWServer['Okazaki']['PSORT2'])
345
+ end
346
+
347
+ # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote)
348
+ # connecting to the Peking server.
349
+ def self.peking
350
+ self.remote(WWWServer['Peking']['host'],
351
+ WWWServer['Peking']['PSORT2'])
352
+ end
353
+
354
+ # An accessor of the origin argument.
355
+ # Default setting is ``yeast''.
356
+ attr_accessor :origin
357
+
358
+ # An accessor of the title argument. Default setting is ``QUERY''.
359
+ # The value is automatically setted if you use a query in
360
+ # Bio::FastaFormat.
361
+ attr_accessor :title
362
+
363
+ # Sets a server CGI Driver (Bio::PSORT::PSORT2::Remote).
364
+ def initialize(driver, origin = 'yeast')
365
+ @serv = driver
366
+ @origin = origin
367
+ @title = ''
368
+ end
369
+
370
+
371
+ # Executes PSORT II prediction and returns Report object
372
+ # (Bio::PSORT::PSORT2::Report) if parsing = true.
373
+ # Returns PSORT II report in text if parsing = false.
374
+ def exec(faa, parsing = true)
375
+ if faa.class == Bio::FastaFormat
376
+ @title = faa.entry_id if @title == nil
377
+ @sequence = faa.seq
378
+ @serv.args = {'origin' => @origin, 'title' => @title}
379
+ @serv.parsing = parsing
380
+ return @serv.exec(@sequence)
381
+ else
382
+ self.exec(Bio::FastaFormat.new(faa), parsing)
383
+ end
384
+ end
385
+
386
+
387
+ # = Bio::PSORT::PSORT2::Remote
388
+ # PSORT2 specific CGIDriver
389
+ class Remote < CGIDriver
390
+
391
+ # Sets remote ``host'' and cgi ``path''.
392
+ def initialize(host, path)
393
+ @origin = 'yeast'
394
+ super(host, path)
395
+ @parsing = true
396
+ end
397
+
398
+ # An accessor of the origin argument.
399
+ # Default setting is ``yeast''.
400
+ attr_accessor :origin
401
+
402
+ # An accessor of the output parsing.
403
+ # Default setting is ``true''.
404
+ attr_accessor :parsing
405
+
406
+
407
+ private
408
+
409
+ # Returns parsed CGI argument.
410
+ # An API implementation.
411
+ def make_args(query)
412
+ @args.update({'sequence' => query})
413
+ return args_join(@args)
414
+ end
415
+
416
+
417
+ # Returns parsed output report.
418
+ # An API implementation.
419
+ def parse_report(str)
420
+ str = str.gsub(/\n<hr>/i, Report::BOUNDARY)
421
+ str = erase_html_tags(str)
422
+ str = Bio::PSORT::PSORT2::Report.parser(str, self.args['title']) if @parsing
423
+ return str
424
+ end
425
+
426
+ end # class Remote
427
+
428
+ end # class PSORT2
429
+
430
+
431
+ class IPSORT
432
+ end # class IPSORT
433
+
434
+
435
+ class PSORTB
436
+ end # class PSORTB
437
+
438
+ class WoLF_PSORT
439
+ end # class PSORTB
440
+
441
+ end # class PSORT
442
+
443
+ end # module Bio
444
+
445
+
446
+
447
+
448
+
449
+ if __FILE__ == $0
450
+
451
+ begin
452
+ require 'psort/report.rb'
453
+ rescue LoadError
454
+ end
455
+
456
+
457
+ seq = ">hoge mit
458
+ MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
459
+ ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
460
+ DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
461
+ FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
462
+ KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
463
+ NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
464
+ SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
465
+ DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
466
+ DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
467
+ KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
468
+ APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
469
+ KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
470
+ "
471
+ Seq1 = ">hgoe
472
+ LTFVENDKII NI
473
+ "
474
+
475
+ puts "\n Bio::PSORT::PSORT"
476
+
477
+ puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
478
+ p serv = Bio::PSORT::PSORT1.imsut
479
+
480
+ puts "\n ==> p serv.class "
481
+ p serv.class
482
+
483
+ puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
484
+ p serv.title = 'Query_title_splited_by_white space'
485
+
486
+ puts "\n ==> p serv.exec(seq, false) "
487
+ p serv.exec(seq, false)
488
+
489
+ puts "\n ==> p serv.exec(seq) "
490
+ p serv.exec(seq)
491
+
492
+ puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
493
+ p report = serv.exec(Bio::FastaFormat.new(seq))
494
+
495
+ puts "\n ==> p report.class"
496
+ p report.class
497
+
498
+
499
+ puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
500
+ p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
501
+
502
+ puts "\n ==> p report_raw.class"
503
+ p report_raw.class
504
+
505
+
506
+ puts "\n ==> p report.methods"
507
+ p report.methods
508
+
509
+ methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
510
+ 'reasoning', 'final_result', 'raw']
511
+ methods.each do |method|
512
+ puts "\n ==> p report.#{method}"
513
+ p eval("report.#{method}")
514
+ end
515
+
516
+
517
+
518
+ puts "\n Bio::PSORT::PSORT2"
519
+
520
+ puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
521
+ p serv = Bio::PSORT::PSORT2.imsut
522
+
523
+ puts "\n ==> p serv.class "
524
+ p serv.class
525
+
526
+ puts "\n ==> p seq "
527
+ p seq
528
+
529
+ puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
530
+ p serv.title = 'Query_title_splited_by_white space'
531
+
532
+ puts "\n ==> p serv.exec(seq) # parsed report"
533
+ p serv.exec(seq)
534
+
535
+ puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
536
+ p report = serv.exec(Bio::FastaFormat.new(seq))
537
+
538
+
539
+
540
+ puts "\n ==> p serv.exec(seq, false) # report in plain text"
541
+ p serv.exec(seq, false)
542
+
543
+ puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
544
+ p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
545
+
546
+
547
+ puts "\n ==> p report.methods"
548
+ p report.methods
549
+
550
+ methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
551
+ methods.each do |method|
552
+ puts "\n ==> p report.#{method}"
553
+ p eval("report.#{method}")
554
+ end
555
+ end