bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,212 @@
1
+ #
2
+ # = bio/io/dbget.rb - GenomeNet/DBGET client module
3
+ #
4
+ # Copyright:: Copyright (C) 2000, 2001
5
+ # Mitsuteru C. Nakao <n@bioruby.org>,
6
+ # Toshiaki Katayama <k@bioruby.org>
7
+ # License:: LGPL
8
+ #
9
+ # $Id: dbget.rb,v 1.11 2005/11/05 08:32:26 k Exp $
10
+ #
11
+ # == DBGET
12
+ #
13
+ # Accessing the GenomeNet/DBGET data retrieval system
14
+ # http://www.genome.jp/dbget/ within the intranet.
15
+ #
16
+ #--
17
+ #
18
+ # This library is free software; you can redistribute it and/or
19
+ # modify it under the terms of the GNU Lesser General Public
20
+ # License as published by the Free Software Foundation; either
21
+ # version 2 of the License, or (at your option) any later version.
22
+ #
23
+ # This library is distributed in the hope that it will be useful,
24
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
25
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26
+ # Lesser General Public License for more details.
27
+ #
28
+ # You should have received a copy of the GNU Lesser General Public
29
+ # License along with this library; if not, write to the Free Software
30
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31
+ #
32
+ #++
33
+ #
34
+
35
+ require 'socket'
36
+
37
+ module Bio
38
+
39
+ class DBGET
40
+
41
+ # default DBGET server address
42
+ # SERV = "dbgetserv.genome.jp"
43
+ SERV = "dbget.genome.jp"
44
+ # default DBGET port number
45
+ PORT = "3266"
46
+
47
+ # Main class method to access DBGET server. Optionally, this method
48
+ # can be called with the alternative DBGET server address and the
49
+ # TCP/IP port number.
50
+ #
51
+ # 'com' should be one of the following DBGET commands:
52
+ #
53
+ # * alink, bfind, bget, binfo, blink, bman, bref, btab, btit
54
+ #
55
+ # These methods are shortcut for the dbget commands. Actually,
56
+ # Bio::DBGET.((|com|))(arg) internally calls Bio::DBGET.dbget(com, arg).
57
+ # Most of these methods accept the argument "-h" for help.
58
+ #
59
+ # 'arg' should be one of the following formats :
60
+ #
61
+ # * [options] db
62
+ # * specify the database name only for binfo, bman etc.
63
+ # * [options] db:entry
64
+ # * specify the database name and the entry name to retrieve.
65
+ # * [options] db entry1 entry2 ...
66
+ # * specify the database name and the list of entries to retrieve.
67
+ #
68
+ # Note that options in the above example can be omitted. If 'arg' is
69
+ # empty, the help message with a list of options for 'com' will be
70
+ # shown by default. Supported database names will be found at the
71
+ # GenomeNet DBGET web page http://www.genome.jp/dbget/.
72
+ #
73
+ def DBGET.dbget(com, arg, serv = nil, port = nil)
74
+
75
+ unless serv or port # if both of serv and port are nil
76
+ if ENV["DBGET"] =~ /:/ # and ENV["DBGET"] exists
77
+ serv, port = ENV["DBGET"].split(':')
78
+ end
79
+ end
80
+ serv = serv ? serv : SERV
81
+ port = port ? port : PORT
82
+
83
+ if arg.empty?
84
+ arg = "-h" # DBGET help message
85
+ end
86
+
87
+ query = "#{com} #{arg}\n" # DBGET query string
88
+
89
+ sock = TCPSocket.open("#{serv}", "#{port}")
90
+
91
+ sock.write(query) # submit query
92
+ sock.flush # buffer flush
93
+
94
+ sock.gets # skip "+Helo DBgetServ ..."
95
+ sock.gets # skip "#If you see this message, ..."
96
+ sock.gets # skip "*Request-IDent"
97
+
98
+ result = sock.read # DBGET result
99
+
100
+ sock.close
101
+
102
+ return result
103
+ end
104
+
105
+ # Show the version information of the DBGET server.
106
+ def DBGET.version
107
+ dbget("bget", "-V")
108
+ end
109
+
110
+
111
+ #--
112
+ # bacc("db entry") - not supported : get accession(s)
113
+ # bent("db entry") - not supported : get entry name
114
+ # lmarge("db entry") - not supported
115
+ #++
116
+
117
+ # alink("db entry") method returns relations
118
+ def DBGET.alink(arg)
119
+ dbget("alink", arg)
120
+ end
121
+
122
+ # bfind("db keyword") method searches entries by keyword
123
+ def DBGET.bfind(arg)
124
+ dbget("bfind", arg)
125
+ end
126
+
127
+ # bget("db entry") method retrieves entries specified by the entry names
128
+ def DBGET.bget(arg)
129
+ dbget("bget", arg)
130
+ end
131
+
132
+ # seq("db entry") method retrieves the first sequence of the entry
133
+ #
134
+ # Shortcut to retrieve the sequence of the entry in FASTA format.
135
+ # This method is equivalent to Bio::DBGET.bget("-f -n 1 #{arg}") and
136
+ # 'arg' should be the "db:entry" or "db entry1 entry2 ..." format.
137
+ def DBGET.seq(arg)
138
+ dbget("bget", "-f -n 1 #{arg}")
139
+ end
140
+
141
+ # seq2("db entry") method retrieves the second sequence of the entry if any
142
+ #
143
+ # Shortcut to retrieve the second sequence of the entry in FASTA format.
144
+ # This method is equivalent to Bio::DBGET.bget("-f -n 2 #{arg}").
145
+ # Only useful when treating the KEGG GENES database entries which have
146
+ # both AASEQ and NTSEQ fields. This method is obsolete and it is
147
+ # recommended to use 'naseq' and 'aaseq' instead.
148
+ def DBGET.seq2(arg)
149
+ dbget("bget", "-f -n 2 #{arg}")
150
+ end
151
+
152
+ # naseq("db entry") method retrieves the nucleic acid sequence of the
153
+ # entry if any.
154
+ def DBGET.naseq(arg)
155
+ dbget("bget", "-f -n n #{arg}")
156
+ end
157
+
158
+ # aaseq("db entry") method retrieves the amino acid sequence of the
159
+ # entry if any.
160
+ def DBGET.aaseq(arg)
161
+ dbget("bget", "-f -n a #{arg}")
162
+ end
163
+
164
+ # binfo("db") method retrieves the database information
165
+ def DBGET.binfo(arg)
166
+ dbget("binfo", arg)
167
+ end
168
+
169
+ # blink("db entry") method retrieves the link information
170
+ def DBGET.blink(arg)
171
+ dbget("blink", arg)
172
+ end
173
+
174
+ # bman ("db entry") method shows the manual page
175
+ def DBGET.bman(arg)
176
+ dbget("bman", arg)
177
+ end
178
+
179
+ # bref("db entry") method retrieves the references and authors
180
+ def DBGET.bref(arg)
181
+ dbget("bref", arg)
182
+ end
183
+
184
+ # btab ("db entry") method retrives (and generates) the database alias table
185
+ def DBGET.btab(arg)
186
+ dbget("btab", arg)
187
+ end
188
+
189
+ # btit("db entry ..") method retrieves the entry definition
190
+ def DBGET.btit(arg)
191
+ dbget("btit", arg)
192
+ end
193
+
194
+ end
195
+
196
+ end # module Bio
197
+
198
+
199
+ if __FILE__ == $0
200
+ puts "### DBGET version"
201
+ p Bio::DBGET.version
202
+ puts "### DBGET.dbget('bfind', 'sce tyrosin kinase')"
203
+ puts Bio::DBGET.dbget('bfind', 'sce tyrosin kinase')
204
+ puts "### DBGET.bfind('sce tyrosin kinase')"
205
+ puts Bio::DBGET.bfind('sce tyrosin kinase')
206
+ puts "### DBGET.bget('sce:YDL028C')"
207
+ puts Bio::DBGET.bget('sce:YDL028C')
208
+ puts "### DBGET.binfo('dbget')"
209
+ puts Bio::DBGET.binfo('dbget')
210
+ end
211
+
212
+
@@ -0,0 +1,614 @@
1
+ #
2
+ # = bio/io/ddbjxml.rb - DDBJ SOAP server access class
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2004
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: ddbjxml.rb,v 1.9 2005/11/26 09:37:11 nakao Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ require 'bio/io/soapwsdl'
30
+ require 'bio/db/genbank/ddbj'
31
+
32
+
33
+ module Bio
34
+ class DDBJ
35
+
36
+
37
+ # = Bio::DDBJ::XML
38
+ #
39
+ # Accessing the DDBJ web services at
40
+ #
41
+ # * http://xml.nig.ac.jp/
42
+ # * http://xml.nig.ac.jp/wsdl/index.jsp
43
+ #
44
+ class XML < Bio::SOAPWSDL
45
+
46
+ BASE_URI = "http://xml.nig.ac.jp/wsdl/"
47
+
48
+ # = Blast
49
+ #
50
+ # BLAST Database Search
51
+ #
52
+ # * http://xml.nig.ac.jp/doc/Blast.txt
53
+ #
54
+ # == Examples
55
+ #
56
+ # serv = Bio::DDBJ::XML::Blast.new
57
+ # query = "MSSRIARALALVVTLLHLTRLALSTCPAACHCPLEAPKCAPGVGLVRDGCGCCKVCAKQL"
58
+ #
59
+ # report = serv.searchSimple('blastp', 'SWISS', query)
60
+ # Bio::Blast::Default::Report.new(report).each_hit do |hit|
61
+ # hit.hsps.find_all {|x| x.evalue < 0.1 }.each do |hsp|
62
+ # p [hsps.evalue, hsps.identity, hsps.definition]
63
+ # end
64
+ # end
65
+ #
66
+ # puts serv.searchParam('tblastn', 'ddbjvrl', query, '-m 8')
67
+ #
68
+ # == WSDL Methods
69
+ #
70
+ # * searchSimple(program, database, query)
71
+ # Returns a blast report in the default format.
72
+ # * searchParam(program, database, query, param)
73
+ # Blasts with param and returns a blast report.
74
+ #
75
+ # == References
76
+ #
77
+ # * http://xml.nig.ac.jp/doc/Blast.txt
78
+ #
79
+ class Blast < XML
80
+ SERVER_URI = BASE_URI + "Blast.wsdl"
81
+ def initialize(wsdl = nil)
82
+ super(wsdl || SERVER_URI)
83
+ end
84
+ end
85
+
86
+
87
+ # == ClustalW
88
+ #
89
+ # Multiple seaquece alignment using ClustalW.
90
+ #
91
+ # * http://xml.nig.ac.jp/doc/ClustalW.txt
92
+ #
93
+ # == Examples
94
+ #
95
+ # serv = Bio::DDBJ::XML::ClustalW.new
96
+ #
97
+ # query = <<END
98
+ # > RABSTOUT rabbit Guinness receptor
99
+ # LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS
100
+ # ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC
101
+ # > MUSNOSE mouse nose drying factor
102
+ # mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt
103
+ # fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfdv
104
+ # > HSHEAVEN human Guinness receptor repeat
105
+ # mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt
106
+ # fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv
107
+ # mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt
108
+ # fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv
109
+ # END
110
+ #
111
+ # puts serv.analyzeSimple(query)
112
+ # puts serv.analyzeParam(query, '-align -matrix=blosum')
113
+ #
114
+ # == WSDL Methods
115
+ #
116
+ # * analyzeSimple(query)
117
+ # * analyzeParam(query, param)
118
+ #
119
+ # == References
120
+ #
121
+ # * http://xml.nig.ac.jp/doc/ClustalW.txt
122
+ #
123
+ class ClustalW < XML
124
+ SERVER_URI = BASE_URI + "ClustalW.wsdl"
125
+ def initialize(wsdl = nil)
126
+ super(wsdl || SERVER_URI)
127
+ end
128
+ end
129
+
130
+
131
+ # = DDBJ
132
+ #
133
+ # Retrieves a sequence entry from the DDBJ DNA Data Bank Japan.
134
+ #
135
+ # * http://xml.nig.ac.jp/doc/DDBJ.txt
136
+ #
137
+ # == Examples
138
+ #
139
+ # serv = Bio::DDBJ::XML::DDBJ.new
140
+ # puts serv.getFFEntry('AB000050')
141
+ # puts serv.getXMLEntry('AB000050')
142
+ # puts serv.getFeatureInfo('AB000050', 'cds')
143
+ # puts serv.getAllFeatures('AB000050')
144
+ # puts serv.getRelatedFeatures('AL121903', '59000', '64000')
145
+ # puts serv.getRelatedFeaturesSeq('AL121903', '59000', '64000')
146
+ #
147
+ # == WSDL Methods
148
+ #
149
+ # * getFFEntry(accession)
150
+ # * getXMLEntry(accession)
151
+ # * getFeatureInfo(accession, feature)
152
+ # * getAllFeatures(accession)
153
+ # * getRelatedFeatures(accession, start, stop)
154
+ # * getRelatedFeaturesSeq(accession, start, stop)
155
+ #
156
+ # == References
157
+ #
158
+ # * http://xml.nig.ac.jp/doc/DDBJ.txt
159
+ #
160
+ class DDBJ < XML
161
+ SERVER_URI = BASE_URI + "DDBJ.wsdl"
162
+ def initialize(wsdl = nil)
163
+ super(wsdl || SERVER_URI)
164
+ end
165
+ end
166
+
167
+
168
+ # = Fasta
169
+ #
170
+ # Searching database using the Fasta package.
171
+ #
172
+ # * http://xml.nig.ac.jp/doc/Fasta.txt
173
+ #
174
+ # == Examples
175
+ #
176
+ # serv = Bio::DDBJ::XML::Fasta.new
177
+ # query = ">Test\nMSDGAVQPDG GQPAVRNERA TGSGNGSGGG GGGGSGGVGI"
178
+ #
179
+ # puts serv.searchSimple('fasta34', 'PDB', query)
180
+ # query = ">Test\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
181
+ # puts serv.searchParam('fastx34_t', 'PDB', query, '-n')
182
+ #
183
+ # == WSDL Methods
184
+ #
185
+ # * searchSimple(program, database, query)
186
+ # * searchParam(program, database, query, param)
187
+ #
188
+ # == References
189
+ #
190
+ # * http://xml.nig.ac.jp/doc/Fasta.txt
191
+ #
192
+ class Fasta < XML
193
+ SERVER_URI = BASE_URI + "Fasta.wsdl"
194
+ def initialize(wsdl = nil)
195
+ super(wsdl || SERVER_URI)
196
+ end
197
+ end
198
+
199
+
200
+ # = GetEntry
201
+ #
202
+ # Retrieves database entries.
203
+ #
204
+ # * http://xml.nig.ac.jp/doc/GetEntry.txt
205
+ #
206
+ # == Examples
207
+ #
208
+ # serv = Bio::DDBJ::XML::GetEntry.new
209
+ # puts serv.getDDBJEntry('AB000050')
210
+ # puts serv. getPDBEntry('1AAR')
211
+ #
212
+ # == WSDL Methods
213
+ #
214
+ # * getEntry(database, var, param1, param2)
215
+ # * getEntry(database, var)
216
+ # * getDDBJEntry(accession)
217
+ # * getDDBJCONEntry(accession)
218
+ # * getDDBJVerEntry(accession)
219
+ # * getLocus_DDBJEntry(locus)
220
+ # * getGene_DDBJEntry(gene)
221
+ # * getProd_DDBJEntry(products)
222
+ # * getPID_DDBJEntry(pid)
223
+ # * getClone_DDBJEntry(clone)
224
+ # * getXML_DDBJEntry(accession)
225
+ # * getEMBLEntry(accession)
226
+ # * getSWISSEntry(accession)
227
+ # * getPIREntry(accession)
228
+ # * getPRFEntry(accession)
229
+ # * getPDBEntry(accession)
230
+ # * getQVEntry(accession)
231
+ # * getDADEntry(accession)
232
+ # * getPID_DADEntry(pid)
233
+ # * getFASTA_DDBJEntry(accession)
234
+ # * getFASTA_DDBJCONEntry(accession)
235
+ # * getFASTA_DDBJVerEntry(accession)
236
+ # * getFASTA_DDBJSeqEntry(accession, start, end)
237
+ # * getFASTA_DADEntry(accession)
238
+ # * getFASTA_PIREntry(accession)
239
+ # * getFASTA_SWISSEntry(accession)
240
+ # * getFASTA_PDBEntry(accession)
241
+ # * getFASTA_PRFEntry(accession)
242
+ # * getFASTA_CDSEntry(accession)
243
+ #
244
+ # == References
245
+ #
246
+ # * http://xml.nig.ac.jp/doc/GetEntry.txt
247
+ #
248
+ class GetEntry < XML
249
+ SERVER_URI = BASE_URI + "GetEntry.wsdl"
250
+ def initialize(wsdl = nil)
251
+ super(wsdl || SERVER_URI)
252
+ end
253
+ end
254
+
255
+
256
+ # = Gib
257
+ #
258
+ # Genome Information broker
259
+ #
260
+ # * http://xml.nig.ac.jp/doc/Gib.txt
261
+ #
262
+ # == Examples
263
+ #
264
+ # serv = Bio::DDBJ::XML::Gib.new
265
+ # puts serv.getOrganismList
266
+ # puts serv.getChIDList
267
+ # puts serv.getOrganismNameFromChid('Sent_CT18:')
268
+ # puts serv.getChIDFromOrganismName('Aquifex aeolicus VF5')
269
+ # puts serv.getAccession('Ecol_K12_MG1655:')
270
+ # puts serv.getPieceNumber('Mgen_G37:')
271
+ # puts serv.getDivision('Mgen_G37:')
272
+ # puts serv.getType('Mgen_G37:')
273
+ # puts serv.getCDS('Aaeo_VF5:ece1')
274
+ # puts serv.getFlatFile('Nost_PCC7120:pCC7120zeta')
275
+ # puts serv.getFastaFile('Nost_PCC7120:pCC7120zeta', 'cdsaa')
276
+ #
277
+ # == WSDL Methods
278
+ #
279
+ # * getOrganismList
280
+ # * getChIDList
281
+ # * getOrganismNameFromChid(chid)
282
+ # * getChIDFromOrganismName(orgName)
283
+ # * getAccession(chid)
284
+ # * getPieceNumber(chid)
285
+ # * getDivision(chid)
286
+ # * getType(chid)
287
+ # * getFlatFile(chid)
288
+ # * getFastaFile(chid, type)
289
+ # * getCDS(chid)
290
+ #
291
+ # == References
292
+ #
293
+ # * http://xml.nig.ac.jp/doc/Gib.txt
294
+ #
295
+ class Gib < XML
296
+ SERVER_URI = BASE_URI + "Gib.wsdl"
297
+ def initialize(wsdl = nil)
298
+ super(wsdl || SERVER_URI)
299
+ end
300
+ end
301
+
302
+
303
+ # = Gtop
304
+ #
305
+ # GTOP: Gene to protein.
306
+ #
307
+ # * http://xml.nig.ac.jp/doc/Gtop.txt
308
+ #
309
+ # == Examples
310
+ #
311
+ # serv = Bio::DDBJ::XML::Gtop.new
312
+ # puts serv.getOrganismList
313
+ # puts serv.getMasterInfo('thrA', 'ecol0')
314
+ #
315
+ # == WSDL Methods
316
+ #
317
+ # * getOrganismList
318
+ # * getMasterInfo(orfID, organism)
319
+ #
320
+ # == References
321
+ #
322
+ # * http://xml.nig.ac.jp/doc/Gtop.txt
323
+ #
324
+ class Gtop < XML
325
+ SERVER_URI = BASE_URI + "Gtop.wsdl"
326
+ def initialize(wsdl = nil)
327
+ super(wsdl || SERVER_URI)
328
+ end
329
+ end
330
+
331
+
332
+ # == PML
333
+ #
334
+ # Variation database
335
+ #
336
+ # * http://xml.nig.ac.jp/doc/PML.txt
337
+ #
338
+ # == Examples
339
+ #
340
+ # serv = Bio::DDBJ::XML::PML.new
341
+ # puts serv.getVariation('1')
342
+ #
343
+ # == WSDL Methods
344
+ #
345
+ # * searchVariation(field, query, order)
346
+ # * searchVariationSimple(field, query)
347
+ # * searchFrequency(field, query, order)
348
+ # * searchFrequencySimple(field, query)
349
+ # * getVariation(variation_id)
350
+ # * getFrequency(variation_id, population_id)
351
+ #
352
+ # == References
353
+ #
354
+ # * http://xml.nig.ac.jp/doc/PML.txt
355
+ #
356
+ class PML < XML
357
+ SERVER_URI = BASE_URI + "PML.wsdl"
358
+ def initialize(wsdl = nil)
359
+ super(wsdl || SERVER_URI)
360
+ end
361
+ end
362
+
363
+
364
+ # = SRS
365
+ #
366
+ # Sequence Retrieving System
367
+ #
368
+ # * http://xml.nig.ac.jp/doc/SRS.txt
369
+ #
370
+ # == Examples
371
+ #
372
+ # serv = Bio::DDBJ::XML::SRS.new
373
+ # puts serv.searchSimple('[pathway-des:sugar]')
374
+ # puts serv.searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')
375
+ #
376
+ # == WSDL Methods
377
+ #
378
+ # * searchSimple(query)
379
+ # * searchParam(query, param)
380
+ #
381
+ # == Examples
382
+ #
383
+ # * http://xml.nig.ac.jp/doc/SRS.txt
384
+ #
385
+ class SRS < XML
386
+ SERVER_URI = BASE_URI + "SRS.wsdl"
387
+ def initialize(wsdl = nil)
388
+ super(wsdl || SERVER_URI)
389
+ end
390
+ end
391
+
392
+
393
+ # = TxSearch
394
+ #
395
+ # Searching taxonomy information.
396
+ #
397
+ # * http://xml.nig.ac.jp/doc/TxSearch.txt
398
+ #
399
+ # == Examples
400
+ #
401
+ # serv = Bio::DDBJ::XML::TxSearch.new
402
+ # puts serv.searchSimple('*coli')
403
+ # puts serv.searchSimple('*tardigrada*')
404
+ # puts serv.getTxId('Escherichia coli')
405
+ # puts serv.getTxName('562')
406
+ #
407
+ # query = ["Campylobacter coli", "Escherichia coli"].join("\n")
408
+ # rank = ["family", "genus"].join("\n")
409
+ # puts serv.searchLineage(query, rank, 'Bacteria')
410
+ #
411
+ # == WSDL Methdos
412
+ #
413
+ # * searchSimple(tx_Name)
414
+ # * searchParam(tx_Name, tx_Clas, tx_Rank, tx_Rmax, tx_Dcls)
415
+ # * getTxId(tx_Name)
416
+ # * getTxName(tx_Id)
417
+ # * searchLineage(query, ranks, superkingdom)
418
+ #
419
+ # == References
420
+ #
421
+ # * http://xml.nig.ac.jp/doc/TxSearch.txt
422
+ #
423
+ class TxSearch < XML
424
+ SERVER_URI = BASE_URI + "TxSearch.wsdl"
425
+ def initialize(wsdl = nil)
426
+ super(wsdl || SERVER_URI)
427
+ end
428
+ end
429
+
430
+ end # XML
431
+
432
+ end # DDBJ
433
+ end # Bio
434
+
435
+
436
+
437
+ if __FILE__ == $0
438
+
439
+ begin
440
+ require 'pp'
441
+ alias p pp
442
+ rescue LoadError
443
+ end
444
+
445
+ puts ">>> Bio::DDBJ::XML::Blast"
446
+ serv = Bio::DDBJ::XML::Blast.new
447
+ # serv.log = STDERR
448
+
449
+ query = "MSSRIARALALVVTLLHLTRLALSTCPAACHCPLEAPKCAPGVGLVRDGCGCCKVCAKQL"
450
+
451
+ puts "### searchSimple('blastp', 'SWISS', query)"
452
+ puts serv.searchSimple('blastp', 'SWISS', query)
453
+
454
+ puts "### searchParam('tblastn', 'ddbjvrl', query, '-m 8')"
455
+ puts serv.searchParam('tblastn', 'ddbjvrl', query, '-m 8')
456
+
457
+
458
+ puts ">>> Bio::DDBJ::XML::ClustalW"
459
+ serv = Bio::DDBJ::XML::ClustalW.new
460
+
461
+ query = <<END
462
+ > RABSTOUT rabbit Guinness receptor
463
+ LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS
464
+ ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC
465
+ > MUSNOSE mouse nose drying factor
466
+ mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt
467
+ fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfdv
468
+ > HSHEAVEN human Guinness receptor repeat
469
+ mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt
470
+ fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv
471
+ mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt
472
+ fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv
473
+ END
474
+
475
+ puts "### analyzeSimple(query)"
476
+ puts serv.analyzeSimple(query)
477
+
478
+ puts "### analyzeParam(query, '-align -matrix=blosum')"
479
+ puts serv.analyzeParam(query, '-align -matrix=blosum')
480
+
481
+
482
+ puts ">>> Bio::DDBJ::XML::DDBJ"
483
+ serv = Bio::DDBJ::XML::DDBJ.new
484
+
485
+ puts "### getFFEntry('AB000050')"
486
+ puts serv.getFFEntry('AB000050')
487
+
488
+ puts "### getXMLEntry('AB000050')"
489
+ puts serv.getXMLEntry('AB000050')
490
+
491
+ puts "### getFeatureInfo('AB000050', 'cds')"
492
+ puts serv.getFeatureInfo('AB000050', 'cds')
493
+
494
+ puts "### getAllFeatures('AB000050')"
495
+ puts serv.getAllFeatures('AB000050')
496
+
497
+ puts "### getRelatedFeatures('AL121903', '59000', '64000')"
498
+ puts serv.getRelatedFeatures('AL121903', '59000', '64000')
499
+
500
+ puts "### getRelatedFeaturesSeq('AL121903', '59000', '64000')"
501
+ puts serv.getRelatedFeaturesSeq('AL121903', '59000', '64000')
502
+
503
+
504
+ puts ">>> Bio::DDBJ::XML::Fasta"
505
+ serv = Bio::DDBJ::XML::Fasta.new
506
+
507
+ query = ">Test\nMSDGAVQPDG GQPAVRNERA TGSGNGSGGG GGGGSGGVGI"
508
+
509
+ puts "### searchSimple('fasta34', 'PDB', query)"
510
+ puts serv.searchSimple('fasta34', 'PDB', query)
511
+
512
+ query = ">Test\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
513
+
514
+ puts "### searchParam('fastx34_t', 'PDB', query, '-n')"
515
+ puts serv.searchParam('fastx34_t', 'PDB', query, '-n')
516
+
517
+
518
+ puts ">>> Bio::DDBJ::XML::GetEntry"
519
+ serv = Bio::DDBJ::XML::GetEntry.new
520
+
521
+ puts "### getDDBJEntry('AB000050')"
522
+ puts serv.getDDBJEntry('AB000050')
523
+
524
+ puts "### getPDBEntry('1AAR')"
525
+ puts serv. getPDBEntry('1AAR')
526
+
527
+
528
+ puts ">>> Bio::DDBJ::XML::Gib"
529
+ serv = Bio::DDBJ::XML::Gib.new
530
+
531
+ puts "### getOrganismList"
532
+ puts serv.getOrganismList
533
+
534
+ puts "### getChIDList"
535
+ puts serv.getChIDList
536
+
537
+ puts "### getOrganismNameFromChid('Sent_CT18:')"
538
+ puts serv.getOrganismNameFromChid('Sent_CT18:')
539
+
540
+ puts "### getChIDFromOrganismName('Aquifex aeolicus VF5')"
541
+ puts serv.getChIDFromOrganismName('Aquifex aeolicus VF5')
542
+
543
+ puts "### getAccession('Ecol_K12_MG1655:')"
544
+ puts serv.getAccession('Ecol_K12_MG1655:')
545
+
546
+ puts "### getPieceNumber('Mgen_G37:')"
547
+ puts serv.getPieceNumber('Mgen_G37:')
548
+
549
+ puts "### getDivision('Mgen_G37:')"
550
+ puts serv.getDivision('Mgen_G37:')
551
+
552
+ puts "### getType('Mgen_G37:')"
553
+ puts serv.getType('Mgen_G37:')
554
+
555
+ puts "### getCDS('Aaeo_VF5:ece1')"
556
+ puts serv.getCDS('Aaeo_VF5:ece1')
557
+
558
+ puts "### getFlatFile('Nost_PCC7120:pCC7120zeta')"
559
+ puts serv.getFlatFile('Nost_PCC7120:pCC7120zeta')
560
+
561
+ puts "### getFastaFile('Nost_PCC7120:pCC7120zeta')"
562
+ puts serv.getFastaFile('Nost_PCC7120:pCC7120zeta', 'cdsaa')
563
+
564
+
565
+ puts ">>> Bio::DDBJ::XML::Gtop"
566
+ serv = Bio::DDBJ::XML::Gtop.new
567
+
568
+ puts "### getOrganismList"
569
+ puts serv.getOrganismList
570
+
571
+ puts "### getMasterInfo"
572
+ puts serv.getMasterInfo('thrA', 'ecol0')
573
+
574
+
575
+ # puts ">>> Bio::DDBJ::XML::PML"
576
+ # serv = Bio::DDBJ::XML::PML.new
577
+ #
578
+ # puts "### getVariation('1')"
579
+ # puts serv.getVariation('1')
580
+
581
+
582
+ puts ">>> Bio::DDBJ::XML::SRS"
583
+ serv = Bio::DDBJ::XML::SRS.new
584
+
585
+ puts "### searchSimple('[pathway-des:sugar]')"
586
+ puts serv.searchSimple('[pathway-des:sugar]')
587
+
588
+ puts "### searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')"
589
+ puts serv.searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')
590
+
591
+
592
+ puts ">>> Bio::DDBJ::XML::TxSearch"
593
+ serv = Bio::DDBJ::XML::TxSearch.new
594
+
595
+ puts "### searchSimple('*coli')"
596
+ puts serv.searchSimple('*coli')
597
+
598
+ puts "### searchSimple('*tardigrada*')"
599
+ puts serv.searchSimple('*tardigrada*')
600
+
601
+ puts "### getTxId('Escherichia coli')"
602
+ puts serv.getTxId('Escherichia coli')
603
+
604
+ puts "### getTxName('562')"
605
+ puts serv.getTxName('562')
606
+
607
+ query = "Campylobacter coli\nEscherichia coli"
608
+ rank = "family\ngenus"
609
+
610
+ puts "### searchLineage(query, rank, 'Bacteria')"
611
+ puts serv.searchLineage(query, rank, 'Bacteria')
612
+
613
+ end
614
+