bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,191 @@
1
+ #
2
+ # = bio/appl/clustalw.rb - CLUSTAL W wrapper class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: clustalw.rb,v 1.10 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # Bio::ClustalW is a CLUSTAL W execution wrapper class.
26
+ # Its object is also called an alignment factory.
27
+ # CLUSTAL W is a very popular software for multiple sequence alignment.
28
+ #
29
+ # == References
30
+ #
31
+ # * Thompson,J.D., Higgins,D.G. and Gibson,T.J..
32
+ # CLUSTAL W: improving the sensitivity of progressive multiple sequence
33
+ # alignment through sequence weighting, position-specific gap penalties
34
+ # and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994.
35
+ # http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673
36
+ # * http://www.ebi.ac.uk/clustalw/
37
+ # * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/
38
+ #
39
+
40
+
41
+ require 'tempfile'
42
+ require 'open3'
43
+
44
+ require 'bio/sequence'
45
+ require 'bio/alignment'
46
+
47
+ module Bio
48
+
49
+ # Bio::ClustalW is a CLUSTAL W execution wrapper class.
50
+ # Its object is also called an alignment factory.
51
+ # CLUSTAL W is a very popular software for multiple sequence alignment.
52
+ class ClustalW
53
+
54
+ autoload :Report, 'bio/appl/clustalw/report'
55
+
56
+ # Creates a new CLUSTAL W execution wrapper object (alignment factory).
57
+ def initialize(program = 'clustalw', option = [])
58
+ @program = program
59
+ @option = option
60
+ @command = nil
61
+ @output = nil
62
+ @report = nil
63
+ @log = nil
64
+ end
65
+
66
+ # name of the program (usually 'clustalw' in UNIX)
67
+ attr_accessor :program
68
+
69
+ # options
70
+ attr_accessor :option
71
+
72
+ # Returns last command-line strings executed by this factory.
73
+ # Note that filenames described in the command-line may already
74
+ # be removed because they are temporary files.
75
+ # Returns an array.
76
+ attr_reader :command
77
+
78
+ # Returns last messages of CLUSTAL W execution.
79
+ attr_reader :log
80
+
81
+ # Returns last raw alignment result (String).
82
+ attr_reader :output
83
+
84
+ # Returns last alignment result.
85
+ # Returns a Bio::ClustalW::Report object.
86
+ attr_reader :report
87
+
88
+ # Executes the program(clustalw).
89
+ # If +seqs+ is not nil, perform alignment for seqs.
90
+ # If +seqs+ is nil, simply executes CLUSTAL W.
91
+ def query(seqs)
92
+ if seqs then
93
+ query_align(seqs)
94
+ else
95
+ exec_local(@option)
96
+ end
97
+ end
98
+
99
+ # Performs alignment for +seqs+.
100
+ # +seqs+ should be Bio::Alignment or Array of sequences or nil.
101
+ def query_align(seqs)
102
+ seqtype = nil
103
+ unless seqs.is_a?(Bio::Alignment)
104
+ seqs = Bio::Alignment.new(seqs)
105
+ end
106
+ seqs.each do |s|
107
+ if s.is_a?(Bio::Sequence::AA) then
108
+ seqtype = 'PROTEIN'
109
+ elsif s.is_a?(Bio::Sequence::NA) then
110
+ seqtype = 'DNA'
111
+ end
112
+ break if seqtype
113
+ end
114
+ query_string(seqs.to_fasta(70, :avoid_same_name => true), seqtype)
115
+ end
116
+
117
+ # Performs alignment for +str+.
118
+ # +str+ should be a string that can be recognized by CLUSTAL W.
119
+ def query_string(str, *arg)
120
+ begin
121
+ tf_in = Tempfile.open('align')
122
+ tf_in.print str
123
+ ensure
124
+ tf_in.close(false)
125
+ end
126
+ r = query_by_filename(tf_in.path, *arg)
127
+ tf_in.close(true)
128
+ r
129
+ end
130
+
131
+ # Performs alignment of sequences in the file named +path+.
132
+ def query_by_filename(path, seqtype = nil)
133
+ require 'bio/appl/clustalw/report'
134
+
135
+ tf_out = Tempfile.open('clustalout')
136
+ tf_out.close(false)
137
+ tf_dnd = Tempfile.open('clustaldnd')
138
+ tf_dnd.close(false)
139
+
140
+ opt = [ "-align",
141
+ "-infile=#{path}",
142
+ "-outfile=#{tf_out.path}",
143
+ "-newtree=#{tf_dnd.path}",
144
+ "-outorder=input"
145
+ ]
146
+ opt << "-type=#{seqtype}" if seqtype
147
+ opt.concat(@option)
148
+ exec_local(opt)
149
+ tf_out.open
150
+ @output = tf_out.read
151
+ tf_out.close(true)
152
+ tf_dnd.open
153
+ @output_dnd = tf_dnd.read
154
+ tf_dnd.close(true)
155
+ @report = Report.new(@output, seqtype)
156
+ @report
157
+ end
158
+
159
+ # Returns last alignment guild-tree (file.dnd).
160
+ attr_reader :output_dnd
161
+
162
+ # Returns last error messages (to stderr) of CLUSTAL W execution.
163
+ attr_reader :errorlog
164
+
165
+ private
166
+ # Executes the program in the local machine.
167
+ def exec_local(opt)
168
+ @command = [ @program, *opt ]
169
+ #STDERR.print "DEBUG: ", @command.join(" "), "\n"
170
+ @log = nil
171
+
172
+ Open3.popen3(*@command) do |din, dout, derr|
173
+ din.close
174
+ t = Thread.start do
175
+ @errorlog = derr.read
176
+ end
177
+ @log = dout.read
178
+ t.join
179
+ end
180
+ # @command_string = @command.join(" ")
181
+ # IO.popen(@command, "r") do |io|
182
+ # io.sync = true
183
+ # @log = io.read
184
+ # end
185
+ @log
186
+ end
187
+
188
+ end #class ClustalW
189
+
190
+ end #module Bio
191
+
@@ -0,0 +1,154 @@
1
+ #
2
+ # = bio/appl/clustalw/report.rb - CLUSTAL W format data (*.aln) class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.9 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # Bio::ClustalW::Report is a CLUSTAL W report (*.aln file) parser.
26
+ # CLUSTAL W is a very popular software for multiple sequence alignment.
27
+ #
28
+ # == References
29
+ #
30
+ # * Thompson,J.D., Higgins,D.G. and Gibson,T.J..
31
+ # CLUSTAL W: improving the sensitivity of progressive multiple sequence
32
+ # alignment through sequence weighting, position-specific gap penalties
33
+ # and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994.
34
+ # http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673
35
+ # * http://www.ebi.ac.uk/clustalw/
36
+ # * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/
37
+ #
38
+
39
+ require 'bio/sequence'
40
+ require 'bio/db'
41
+ require 'bio/alignment'
42
+ require 'bio/appl/clustalw'
43
+
44
+ module Bio
45
+ class ClustalW
46
+
47
+ # CLUSTAL W result data (*.aln file) parser class.
48
+ class Report < Bio::DB
49
+
50
+ # Delimiter of each entry. Bio::FlatFile uses it.
51
+ # In Bio::ClustalW::Report, it it nil (1 entry 1 file).
52
+ DELIMITER = nil
53
+
54
+ # Creates new instance.
55
+ # +str+ should be a CLUSTAL format string.
56
+ # +seqclass+ should on of following:
57
+ # * Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
58
+ # * String: 'PROTEIN', 'DNA', ...
59
+ def initialize(str, seqclass = nil)
60
+ @raw = str
61
+ @align = nil
62
+ @match_line = nil
63
+ @header = nil
64
+ case seqclass
65
+ when /PROTEIN/i
66
+ @seqclass = Bio::Sequence::AA
67
+ when /[DR]NA/i
68
+ @seqclass = Bio::Sequence::NA
69
+ else
70
+ if seqclass.is_a?(Module) then
71
+ @seqclass = seqclass
72
+ else
73
+ @seqclass = Bio::Sequence
74
+ end
75
+ end
76
+ end
77
+ # string of whole result
78
+ attr_reader :raw
79
+
80
+ # sequence class (one of Bio::Sequence, Bio::Sequence::NA,
81
+ # Bio::Sequence::AA, ...)
82
+ attr_reader :seqclass
83
+
84
+ # Shows first line of the result data, for example,
85
+ # 'CLUSTAL W (1.82) multiple sequence alignment'.
86
+ # Returns a string.
87
+ def header
88
+ @header or (do_parse or @header)
89
+ end
90
+
91
+ # Shows "match line" of CLUSTAL's alignment result, for example,
92
+ # ':* :* .* * .*::*. ** :* . * . '.
93
+ # Returns a string.
94
+ def match_line
95
+ @match_line or (do_parse or @match_line)
96
+ end
97
+
98
+ # Gets an multiple alignment.
99
+ # Returns a Bio::Alignment object.
100
+ def align
101
+ do_parse() unless @align
102
+ @align
103
+ end
104
+ alias alignment align
105
+
106
+ # Gets an fasta-format string of the sequences.
107
+ # Returns a string.
108
+ def to_fasta(*arg)
109
+ align.to_fasta(*arg)
110
+ end
111
+
112
+ # Gets an array of the sequences.
113
+ # Returns an array of Bio::FastaFormat objects.
114
+ def to_a
115
+ align.to_fastaformat_array
116
+ end
117
+
118
+ private
119
+ # Parses Clustal W result text.
120
+ def do_parse
121
+ return nil if @align
122
+ a = @raw.split(/\r?\n\r?\n/)
123
+ @header = a.shift.to_s
124
+ xalign = Bio::Alignment.new
125
+ @match_line = ''
126
+ if a.size > 0 then
127
+ a[0].gsub!(/\A(\r?\n)+/, '')
128
+ a.collect! { |x| x.split(/\r?\n/) }
129
+ a.each { |x|
130
+ x.each { |y| y.sub!(/ +\d+\s*$/, '') }} #for -SEQNOS=on option
131
+ @tagsize = ( a[0][0].rindex(/\s/) or -1 ) + 1
132
+ a.each do |x|
133
+ @match_line << x.pop.to_s[@tagsize..-1]
134
+ end
135
+ a[0].each do |y|
136
+ xalign.store(y[0, @tagsize].sub(/\s+\z/, ''), '')
137
+ end
138
+ a.each do |x|
139
+ x.each do |y|
140
+ name = y[0, @tagsize].sub(/\s+\z/, '')
141
+ seq = y[@tagsize..-1]
142
+ xalign[name] << seq
143
+ end
144
+ end
145
+ xalign.collect! { |x| @seqclass.new(x) }
146
+ end
147
+ @align = xalign
148
+ nil
149
+ end
150
+
151
+ end #class Report
152
+ end #class ClustalW
153
+ end #module Bio
154
+
@@ -0,0 +1,68 @@
1
+ #
2
+ # bio/appl/emboss.rb - EMBOSS wrapper
3
+ #
4
+ # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: emboss.rb,v 1.2 2005/09/08 01:22:08 k Exp $
21
+ #
22
+
23
+ module Bio
24
+
25
+ class EMBOSS
26
+
27
+ def initialize(cmd_line)
28
+ @cmd_line = cmd_line + ' -stdout'
29
+ end
30
+
31
+ def exec
32
+ begin
33
+ @io = IO.popen(@cmd_line, "w+")
34
+ @result = @io.read
35
+ return @result
36
+ ensure
37
+ @io.close
38
+ end
39
+ end
40
+ attr_reader :io, :result
41
+
42
+ end
43
+
44
+ end
45
+
46
+ =begin
47
+
48
+ = Bio::EMBOSS
49
+
50
+ EMBOSS wrapper.
51
+
52
+ #!/usr/bin/env ruby
53
+ require 'bio'
54
+
55
+ emboss = Bio::EMBOSS.new("getorf -sequence ~/xlrhodop -outseq stdout")
56
+ puts emboss.exec
57
+
58
+ --- Bio::EMBOSS.new(command_line)
59
+
60
+ --- Bio::EMBOSS#exec
61
+ --- Bio::EMBOSS#io
62
+ --- Bio::EMBOSS#result
63
+
64
+ === SEE ALSO
65
+
66
+ * http://www.emboss.org
67
+
68
+ =end
@@ -0,0 +1,262 @@
1
+ #
2
+ # bio/appl/fasta.rb - FASTA wrapper
3
+ #
4
+ # Copyright (C) 2001,2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: fasta.rb,v 1.20 2005/09/26 13:00:04 k Exp $
21
+ #
22
+
23
+ require 'net/http'
24
+ require 'cgi' unless defined?(CGI)
25
+ require 'bio/command'
26
+ require 'shellwords'
27
+
28
+ module Bio
29
+
30
+ class Fasta
31
+
32
+ autoload :Report, 'bio/appl/fasta/format10'
33
+ #autoload :?????, 'bio/appl/fasta/format6'
34
+
35
+ include Bio::Command::Tools
36
+
37
+ def initialize(program, db, opt = [], server = 'local')
38
+ @format = 10
39
+
40
+ @program = program
41
+ @db = db
42
+ @server = server
43
+
44
+ @ktup = nil
45
+ @matrix = nil
46
+
47
+ @output = ''
48
+
49
+ begin
50
+ a = opt.to_ary
51
+ rescue NameError #NoMethodError
52
+ # backward compatibility
53
+ a = Shellwords.shellwords(opt)
54
+ end
55
+ @options = [ '-Q', '-H', '-m', @format.to_s, *a ] # need -a ?
56
+ end
57
+ attr_accessor :program, :db, :options, :server, :ktup, :matrix
58
+ attr_reader :output
59
+
60
+ def option
61
+ # backward compatibility
62
+ make_command_line(@options)
63
+ end
64
+
65
+ def option=(str)
66
+ # backward compatibility
67
+ @options = Shellwords.shellwords(str)
68
+ end
69
+
70
+ def format=(num)
71
+ @format = num.to_i
72
+ if i = @options.index('-m') then
73
+ @options[i+1, 1] = @format.to_s
74
+ else
75
+ @options << '-m' << @format.to_s
76
+ end
77
+ end
78
+ attr_reader :format
79
+
80
+ def self.parser(parser)
81
+ require "bio/appl/fasta/#{parser}"
82
+ end
83
+
84
+ def self.local(program, db, option = '')
85
+ self.new(program, db, option, 'local')
86
+ end
87
+
88
+ def self.remote(program, db, option = '', server = 'genomenet')
89
+ self.new(program, db, option, server)
90
+ end
91
+
92
+ def query(query)
93
+ return self.send("exec_#{@server}", query.to_s)
94
+ end
95
+
96
+
97
+ private
98
+
99
+
100
+ def parse_result(data)
101
+ case @format
102
+ when 6
103
+ require 'bio/appl/fasta/format6'
104
+ when 10
105
+ require 'bio/appl/fasta/format10'
106
+ end
107
+ Report.new(data)
108
+ end
109
+
110
+
111
+ def exec_local(query)
112
+ cmd = [ @program, *@options ]
113
+ cmd.concat([ '@', @db, @ktup ])
114
+
115
+ report = nil
116
+
117
+ @output = call_command_local(cmd, query)
118
+ report = parse_result(@output)
119
+
120
+ return report
121
+ end
122
+
123
+
124
+ def exec_genomenet(query)
125
+ host = "fasta.genome.jp"
126
+ #path = "/sit-bin/nph-fasta"
127
+ path = "/sit-bin/fasta" #2005.08.12
128
+
129
+ form = {
130
+ 'style' => 'raw',
131
+ 'prog' => @program,
132
+ 'dbname' => @db,
133
+ 'sequence' => CGI.escape(query),
134
+ 'other_param' => CGI.escape(make_command_line_unix(@options)),
135
+ 'ktup_value' => @ktup,
136
+ 'matrix' => @matrix,
137
+ }
138
+
139
+ data = []
140
+
141
+ form.each do |k, v|
142
+ data.push("#{k}=#{v}") if v
143
+ end
144
+
145
+ report = nil
146
+
147
+ begin
148
+ http = Net::HTTP.new(host)
149
+ http.open_timeout = 300
150
+ http.read_timeout = 600
151
+ result, = http.post(path, data.join('&'))
152
+ @output = result.body
153
+ # workaround 2005.08.12
154
+ if /\<A +HREF=\"(http\:\/\/fasta\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
155
+ result, = http.get($2)
156
+ @output = result.body
157
+ txt = @output.to_s.split(/\<pre\>/)[1]
158
+ raise 'cannot understand response' unless txt
159
+ txt.sub!(/\<\/pre\>.*\z/m, '')
160
+ txt.sub!(/.*^((T?FASTA|SSEARCH) (searches|compares))/m, '\1')
161
+ txt.sub!(/^\<form method\=\"POST\" name\=\"clust_check\"\>.*\n/, '')
162
+ txt.gsub!(/\<input[^\>]+value\=\"[^\"]*\"[^\>]*\>/i, '')
163
+ txt.gsub!(/\<(a|form|select|input|option|img)\s+[^\>]+\>/i, '')
164
+ txt.gsub!(/\<\/(a|form|select|input|option|img)\>/i, '')
165
+ @output = txt.gsub(/\&lt\;/, '<')
166
+ report = parse_result(@output.dup)
167
+ else
168
+ raise 'cannot understand response'
169
+ end
170
+ end
171
+
172
+ return report
173
+ end
174
+
175
+ end
176
+
177
+ end
178
+
179
+
180
+ if __FILE__ == $0
181
+ begin
182
+ require 'pp'
183
+ alias p pp
184
+ rescue
185
+ end
186
+
187
+ # serv = Bio::Fasta.local('fasta34', 'hoge.nuc')
188
+ # serv = Bio::Fasta.local('fasta34', 'hoge.pep')
189
+ # serv = Bio::Fasta.local('ssearch34', 'hoge.pep')
190
+ serv = Bio::Fasta.remote('fasta', 'genes')
191
+ p serv.query(ARGF.read)
192
+ end
193
+
194
+
195
+ =begin
196
+
197
+ = Bio::Fasta
198
+
199
+ --- Bio::Fasta.new(program, db, option = '', server = 'local')
200
+ --- Bio::Fasta.local(program, db, option = '')
201
+ --- Bio::Fasta.remote(program, db, option = '', server = 'genomenet')
202
+
203
+ Returns a fasta factory object (Bio::Fasta).
204
+
205
+ For the develpper, you can add server 'hoge' by adding
206
+ exec_hoge(query) method.
207
+
208
+ --- Bio::Fasta#query(query)
209
+
210
+ Execute fasta search and returns Report object (Bio::Fasta::Report).
211
+
212
+ --- Bio::Fasta#output
213
+
214
+ Returns a String containing fasta execution output in as is format.
215
+
216
+ --- Bio::Fasta#program
217
+ --- Bio::Fasta#db
218
+ --- Bio::Fasta#options
219
+ --- Bio::Fasta#server
220
+ --- Bio::Fasta#ktup
221
+
222
+ Accessors for the factory parameters.
223
+
224
+ --- Bio::Fasta#option
225
+ --- Bio::Fasta#option=(str)
226
+
227
+ Get/set options by string.
228
+
229
+ --- Bio::Fasta#format
230
+ --- Bio::Fasta#format=(number)
231
+
232
+ Accessors for the -m option.
233
+
234
+ --- Bio::Fasta.parser(parser)
235
+
236
+ Import Bio::Fasta::Report class by requiring specified parser.
237
+
238
+ This class method will be useful when you already have fasta
239
+ output files and want to use appropriate Report class for parsing.
240
+
241
+
242
+ == Available databases for Fasta.remote(@program, @db, option, 'genomenet')
243
+
244
+ # ----------+-------+---------------------------------------------------
245
+ # @program | query | @db (supported in GenomeNet)
246
+ # ----------+-------+---------------------------------------------------
247
+ # fasta | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
248
+ # | | pir, prf, pdbstr
249
+ # +-------+---------------------------------------------------
250
+ # | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
251
+ # | | htgs, dbsts, embl-nonst, embnonst-upd, epd,
252
+ # | | genes-nt, genome, vgenes.nuc
253
+ # ----------+-------+---------------------------------------------------
254
+ # tfasta | AA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
255
+ # | | htgs, dbsts, embl-nonst, embnonst-upd,
256
+ # | | genes-nt, genome, vgenes.nuc
257
+ # ----------+-------+---------------------------------------------------
258
+
259
+ See http://fasta.genome.jp/ideas/ideas.html#fasta for more details.
260
+
261
+ =end
262
+