bio 1.4.3.0001 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/lib/bio/io/keggapi.rb
DELETED
@@ -1,363 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# = bio/io/keggapi.rb - KEGG API access class
|
3
|
-
#
|
4
|
-
# Copyright:: Copyright (C) 2003, 2004 Toshiaki Katayama <k@bioruby.org>
|
5
|
-
# License:: The Ruby License
|
6
|
-
#
|
7
|
-
# $Id:$
|
8
|
-
#
|
9
|
-
|
10
|
-
require 'bio/io/soapwsdl'
|
11
|
-
require 'uri'
|
12
|
-
require 'net/http'
|
13
|
-
require 'bio/command'
|
14
|
-
|
15
|
-
module Bio
|
16
|
-
class KEGG
|
17
|
-
|
18
|
-
# == Description
|
19
|
-
#
|
20
|
-
# KEGG API is a web service to use KEGG system via SOAP/WSDL.
|
21
|
-
#
|
22
|
-
# == References
|
23
|
-
#
|
24
|
-
# For more informations on KEGG API, see the following site and read the
|
25
|
-
# reference manual.
|
26
|
-
#
|
27
|
-
# * http://www.genome.jp/kegg/soap/
|
28
|
-
# * http://www.genome.jp/kegg/soap/doc/keggapi_manual.html
|
29
|
-
#
|
30
|
-
# == List of methods
|
31
|
-
#
|
32
|
-
# As of KEGG API v5.0
|
33
|
-
#
|
34
|
-
# * list_databases
|
35
|
-
# * list_organisms
|
36
|
-
# * list_pathways(org)
|
37
|
-
# * binfo(string)
|
38
|
-
# * bget(string)
|
39
|
-
# * bfind(string)
|
40
|
-
# * btit(string)
|
41
|
-
# * get_linkdb_by_entry(entry_id, db, start, max_results)
|
42
|
-
# * get_best_best_neighbors_by_gene(genes_id, start, max_results)
|
43
|
-
# * get_best_neighbors_by_gene(genes_id, start, max_results)
|
44
|
-
# * get_reverse_best_neighbors_by_gene(genes_id, start, max_results)
|
45
|
-
# * get_paralogs_by_gene(genes_id, start, max_results)
|
46
|
-
# * get_similarity_between_genes(genes_id1, genes_id2)
|
47
|
-
# * get_motifs_by_gene(genes_id, db)
|
48
|
-
# * get_genes_by_motifs(motif_id_list, start, max_results)
|
49
|
-
# * get_ko_by_gene(genes_id)
|
50
|
-
# * get_ko_members(ko_id)
|
51
|
-
# * get_oc_members_by_gene(genes_id, start, max_results)
|
52
|
-
# * get_pc_members_by_gene(genes_id, start, max_results)
|
53
|
-
# * mark_pathway_by_objects(pathway_id, object_id_list)
|
54
|
-
# * color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
|
55
|
-
# * get_genes_by_pathway(pathway_id)
|
56
|
-
# * get_enzymes_by_pathway(pathway_id)
|
57
|
-
# * get_compounds_by_pathway(pathway_id)
|
58
|
-
# * get_reactions_by_pathway(pathway_id)
|
59
|
-
# * get_pathways_by_genes(genes_id_list)
|
60
|
-
# * get_pathways_by_enzymes(enzyme_id_list)
|
61
|
-
# * get_pathways_by_compounds(compound_id_list)
|
62
|
-
# * get_pathways_by_reactions(reaction_id_list)
|
63
|
-
# * get_linked_pathways(pathway_id)
|
64
|
-
# * get_genes_by_enzyme(enzyme_id, org)
|
65
|
-
# * get_enzymes_by_gene(genes_id)
|
66
|
-
# * get_enzymes_by_compound(compound_id)
|
67
|
-
# * get_enzymes_by_reaction(reaction_id)
|
68
|
-
# * get_compounds_by_enzyme(enzyme_id)
|
69
|
-
# * get_compounds_by_reaction(reaction_id)
|
70
|
-
# * get_reactions_by_enzyme(enzyme_id)
|
71
|
-
# * get_reactions_by_compound(compound_id)
|
72
|
-
# * get_genes_by_organism(org, start, max_results)
|
73
|
-
# * get_number_of_genes_by_organism(org)
|
74
|
-
#
|
75
|
-
# == KEGG API methods implemented only in BioRuby
|
76
|
-
#
|
77
|
-
# In BioRuby, returned values are added filter method to pick up
|
78
|
-
# values in a complex data type as an array.
|
79
|
-
#
|
80
|
-
# #!/usr/bin/env ruby
|
81
|
-
#
|
82
|
-
# require 'bio'
|
83
|
-
#
|
84
|
-
# serv = Bio::KEGG::API.new
|
85
|
-
# results = serv.get_best_neighbors_by_gene("eco:b0002", "bsu")
|
86
|
-
#
|
87
|
-
# # case 0 : without filter
|
88
|
-
# results.each do |hit|
|
89
|
-
# print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n"
|
90
|
-
# end
|
91
|
-
#
|
92
|
-
# # case 1 : select gene names and SW score only
|
93
|
-
# fields = [:genes_id1, :genes_id2, :sw_score]
|
94
|
-
# results.each do |hit|
|
95
|
-
# puts hit.filter(fields).join("\t")
|
96
|
-
# end
|
97
|
-
#
|
98
|
-
# # case 2 : also uses aligned position in each amino acid sequence etc.
|
99
|
-
# fields1 = [:genes_id1, :start_position1, :end_position1, :best_flag_1to2]
|
100
|
-
# fields2 = [:genes_id2, :start_position2, :end_position2, :best_flag_2to1]
|
101
|
-
# results.each do |hit|
|
102
|
-
# print "> score: ", hit.sw_score, ", identity: ", hit.identity, "\n"
|
103
|
-
# print "1:\t", hit.filter(fields1).join("\t"), "\n"
|
104
|
-
# print "2:\t", hit.filter(fields2).join("\t"), "\n"
|
105
|
-
# end
|
106
|
-
#
|
107
|
-
# Using filter method will make it easy to change fields to select and
|
108
|
-
# keep the script clean.
|
109
|
-
#
|
110
|
-
# * Bio::KEGG::API#get_all_neighbors_by_gene(genes_id, org)
|
111
|
-
# * Bio::KEGG::API#get_all_best_best_neighbors_by_gene(genes_id)
|
112
|
-
# * Bio::KEGG::API#get_all_best_neighbors_by_gene(genes_id)
|
113
|
-
# * Bio::KEGG::API#get_all_reverse_best_neighbors_by_gene(genes_id)
|
114
|
-
# * Bio::KEGG::API#get_all_paralogs_by_gene(genes_id)
|
115
|
-
# * Bio::KEGG::API#get_all_genes_by_motifs(motif_id_list)
|
116
|
-
# * Bio::KEGG::API#get_all_oc_members_by_gene(genes_id)
|
117
|
-
# * Bio::KEGG::API#get_all_pc_members_by_gene(genes_id)
|
118
|
-
# * Bio::KEGG::API#get_all_genes_by_organism(org)
|
119
|
-
#
|
120
|
-
# These methods are wrapper for the methods without _all_ in its name
|
121
|
-
# and internally iterate to retrive all the results using start/max_results
|
122
|
-
# value pairs described above. For example,
|
123
|
-
#
|
124
|
-
# #!/usr/bin/env ruby
|
125
|
-
#
|
126
|
-
# require 'soap/wsdlDriver'
|
127
|
-
#
|
128
|
-
# wsdl = "http://soap.genome.jp/KEGG.wsdl"
|
129
|
-
# serv = SOAP::WSDLDriverFactory.new(wsdl).create_driver
|
130
|
-
# serv.generate_explicit_type = true
|
131
|
-
#
|
132
|
-
# start = 1
|
133
|
-
# max_results = 100
|
134
|
-
#
|
135
|
-
# loop do
|
136
|
-
# results = serv.get_best_neighbors_by_gene('eco:b0002', start, max_results)
|
137
|
-
# break unless results # when no more results returned
|
138
|
-
# results.each do |hit|
|
139
|
-
# print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n"
|
140
|
-
# end
|
141
|
-
# start += max_results
|
142
|
-
# end
|
143
|
-
#
|
144
|
-
# can be witten as
|
145
|
-
#
|
146
|
-
# #!/usr/bin/env ruby
|
147
|
-
#
|
148
|
-
# require 'bio'
|
149
|
-
#
|
150
|
-
# serv = Bio::KEGG::API.new
|
151
|
-
#
|
152
|
-
# results = serv.get_all_best_neighbors_by_gene('eco:b0002')
|
153
|
-
# results.each do |hit|
|
154
|
-
# print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n"
|
155
|
-
# end
|
156
|
-
#
|
157
|
-
#
|
158
|
-
# * Bio::KEGG::API#save_image(url, filename = nil)
|
159
|
-
#
|
160
|
-
# Some methods of the KEGG API will return a URL of the generated image.
|
161
|
-
# This method save an image specified by the URL. The filename can be
|
162
|
-
# specified by its second argument, otherwise basename of the URL will
|
163
|
-
# be used.
|
164
|
-
#
|
165
|
-
# #!/usr/bin/env ruby
|
166
|
-
#
|
167
|
-
# require 'bio'
|
168
|
-
#
|
169
|
-
# serv = Bio::KEGG::API.new("http://soap.genome.jp/v3.0/KEGG.wsdl")
|
170
|
-
#
|
171
|
-
# list = ["eco:b1002", "eco:b2388"]
|
172
|
-
# url = serv.mark_pathway_by_objects("path:eco00010", list)
|
173
|
-
#
|
174
|
-
# # Save with the original filename (eco00010.gif in this case)
|
175
|
-
# serv.save_image(url)
|
176
|
-
#
|
177
|
-
# # or save as "save_image.gif"
|
178
|
-
# serv.save_image(url, "save_image.gif")
|
179
|
-
#
|
180
|
-
# * Bio::KEGG::API#get_entries(entry_id_list)
|
181
|
-
# * Bio::KEGG::API#get_aaseqs(entry_id_list)
|
182
|
-
# * Bio::KEGG::API#get_naseqs(entry_id_list)
|
183
|
-
# * Bio::KEGG::API#get_definitions(entry_id_list)
|
184
|
-
#
|
185
|
-
# These methods are for the shortcut and backward compatibility
|
186
|
-
# (these methods existed in the older version of the KEGG API).
|
187
|
-
#
|
188
|
-
class API < Bio::SOAPWSDL
|
189
|
-
|
190
|
-
SERVER_URI = "http://soap.genome.jp/KEGG.wsdl"
|
191
|
-
|
192
|
-
# Connect to the KEGG API's SOAP server. A WSDL file will be automatically
|
193
|
-
# downloaded and parsed to generate the SOAP client driver. The default URL
|
194
|
-
# for the WSDL is http://soap.genome.jp/KEGG.wsdl but it can be changed by
|
195
|
-
# the argument or by wsdl= method.
|
196
|
-
def initialize(wsdl = nil)
|
197
|
-
@wsdl = wsdl || SERVER_URI
|
198
|
-
@log = nil
|
199
|
-
@start = 1
|
200
|
-
@max_results = 100
|
201
|
-
create_driver
|
202
|
-
end
|
203
|
-
|
204
|
-
# Returns current value for the 'start' count for the methods having
|
205
|
-
# start/max_results argument pairs or changes the default value for
|
206
|
-
# the 'start' count.
|
207
|
-
attr_accessor :start
|
208
|
-
|
209
|
-
# Returns current value for the 'max_results' number for the methods having
|
210
|
-
# start/max_results argument pairs or changes the default value for the
|
211
|
-
# 'max_results' count. If your request timeouts, try smaller value for
|
212
|
-
# the max_results.
|
213
|
-
attr_accessor :max_results
|
214
|
-
|
215
|
-
def method_missing(*arg)
|
216
|
-
begin
|
217
|
-
results = @driver.send(*arg)
|
218
|
-
rescue Timeout::Error
|
219
|
-
retry
|
220
|
-
end
|
221
|
-
results = add_filter(results)
|
222
|
-
return results
|
223
|
-
end
|
224
|
-
|
225
|
-
|
226
|
-
# def get_all_neighbors_by_gene(genes_id, org)
|
227
|
-
# get_all(:get_neighbors_by_gene, genes_id, org)
|
228
|
-
# end
|
229
|
-
|
230
|
-
def get_all_best_best_neighbors_by_gene(genes_id)
|
231
|
-
get_all(:get_best_best_neighbors_by_gene, genes_id)
|
232
|
-
end
|
233
|
-
|
234
|
-
def get_all_best_neighbors_by_gene(genes_id)
|
235
|
-
get_all(:get_best_neighbors_by_gene, genes_id)
|
236
|
-
end
|
237
|
-
|
238
|
-
def get_all_reverse_best_neighbors_by_gene(genes_id)
|
239
|
-
get_all(:get_reverse_best_neighbors_by_gene, genes_id)
|
240
|
-
end
|
241
|
-
|
242
|
-
def get_all_paralogs_by_gene(genes_id)
|
243
|
-
get_all(:get_paralogs_by_gene, genes_id)
|
244
|
-
end
|
245
|
-
|
246
|
-
def get_all_genes_by_motifs(motif_id_list)
|
247
|
-
get_all(:get_genes_by_motifs, motif_id_list)
|
248
|
-
end
|
249
|
-
|
250
|
-
def get_all_oc_members_by_gene(genes_id)
|
251
|
-
get_all(:get_oc_members_by_gene, genes_id)
|
252
|
-
end
|
253
|
-
|
254
|
-
def get_all_pc_members_by_gene(genes_id)
|
255
|
-
get_all(:get_pc_members_by_gene, genes_id)
|
256
|
-
end
|
257
|
-
|
258
|
-
def get_all_genes_by_organism(org)
|
259
|
-
get_all(:get_genes_by_organism, org)
|
260
|
-
end
|
261
|
-
|
262
|
-
def get_all_linkdb_by_entry(entry_id, db)
|
263
|
-
get_all(:get_linkdb_by_entry, entry_id, db)
|
264
|
-
end
|
265
|
-
|
266
|
-
|
267
|
-
def save_image(url, filename = nil)
|
268
|
-
schema, user, host, port, reg, path, = URI.split(url)
|
269
|
-
filename ||= File.basename(path)
|
270
|
-
|
271
|
-
http = Bio::Command.new_http(host, port)
|
272
|
-
response = http.get(path)
|
273
|
-
File.open(filename, "w+") do |f|
|
274
|
-
f.print response.body
|
275
|
-
end
|
276
|
-
return filename
|
277
|
-
end
|
278
|
-
|
279
|
-
|
280
|
-
def get_entries(ary = [])
|
281
|
-
result = ''
|
282
|
-
step = [@max_results, 50].min
|
283
|
-
0.step(ary.length, step) do |i|
|
284
|
-
str = ary[i, step].join(" ")
|
285
|
-
if entry = @driver.send(:bget, str)
|
286
|
-
result << entry.to_s
|
287
|
-
end
|
288
|
-
end
|
289
|
-
return result
|
290
|
-
end
|
291
|
-
|
292
|
-
def get_aaseqs(ary = [])
|
293
|
-
result = ''
|
294
|
-
step = [@max_results, 50].min
|
295
|
-
0.step(ary.length, step) do |i|
|
296
|
-
str = "-f -n a " + ary[i, step].join(" ")
|
297
|
-
if entry = @driver.send(:bget, str)
|
298
|
-
result << entry.to_s
|
299
|
-
end
|
300
|
-
end
|
301
|
-
return result
|
302
|
-
end
|
303
|
-
|
304
|
-
def get_naseqs(ary = [])
|
305
|
-
result = ''
|
306
|
-
step = [@max_results, 50].min
|
307
|
-
0.step(ary.length, step) do |i|
|
308
|
-
str = "-f -n n " + ary[i, step].join(" ")
|
309
|
-
if entry = @driver.send(:bget, str)
|
310
|
-
result << entry.to_s
|
311
|
-
end
|
312
|
-
end
|
313
|
-
return result
|
314
|
-
end
|
315
|
-
|
316
|
-
def get_definitions(ary = [])
|
317
|
-
result = ''
|
318
|
-
step = [@max_results, 50].min
|
319
|
-
0.step(ary.length, step) do |i|
|
320
|
-
str = ary[i, step].join(" ")
|
321
|
-
if entry = @driver.send(:btit, str)
|
322
|
-
result << entry.to_s
|
323
|
-
end
|
324
|
-
end
|
325
|
-
return result
|
326
|
-
end
|
327
|
-
|
328
|
-
|
329
|
-
private
|
330
|
-
|
331
|
-
def add_filter(results)
|
332
|
-
if results.is_a?(Array)
|
333
|
-
results.each do |result|
|
334
|
-
next if result.is_a?(Fixnum)
|
335
|
-
def result.filter(fields)
|
336
|
-
fields.collect { |field| self.send(field) }
|
337
|
-
end
|
338
|
-
end
|
339
|
-
end
|
340
|
-
return results
|
341
|
-
end
|
342
|
-
|
343
|
-
def get_all(method, *args)
|
344
|
-
args << @start
|
345
|
-
args << @max_results
|
346
|
-
|
347
|
-
ary = []
|
348
|
-
loop do
|
349
|
-
results = @driver.send(method, *args)
|
350
|
-
break unless results
|
351
|
-
break if results.empty?
|
352
|
-
results = add_filter(results)
|
353
|
-
ary << results
|
354
|
-
args[-2] += @max_results # next start count
|
355
|
-
end
|
356
|
-
return ary.flatten
|
357
|
-
end
|
358
|
-
|
359
|
-
end # API
|
360
|
-
|
361
|
-
end # KEGG
|
362
|
-
end # Bio
|
363
|
-
|
data/lib/bio/io/ncbisoap.rb
DELETED
@@ -1,156 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# = bio/io/ncbisoap.rb - SOAP interface for NCBI Entrez Utilities
|
3
|
-
#
|
4
|
-
# Copyright:: Copyright (C) 2004, 2006
|
5
|
-
# Toshiaki Katayama <k@bioruby.org>
|
6
|
-
# License:: The Ruby License
|
7
|
-
#
|
8
|
-
# $Id:$
|
9
|
-
#
|
10
|
-
|
11
|
-
require 'bio/io/ncbirest'
|
12
|
-
require 'bio/io/soapwsdl'
|
13
|
-
|
14
|
-
module Bio
|
15
|
-
class NCBI
|
16
|
-
|
17
|
-
# == References
|
18
|
-
#
|
19
|
-
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esoap_help.html
|
20
|
-
#
|
21
|
-
# == Methods
|
22
|
-
#
|
23
|
-
# All methods accept a hash as its argument and most of the keys can be
|
24
|
-
# ommited (values are string).
|
25
|
-
#
|
26
|
-
# Note: Methods which name ends with _MS are designed for use with
|
27
|
-
# Microsoft Visual Studio and SOAP Toolkit 3.0
|
28
|
-
#
|
29
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/query/static/esoap_ms_help.html
|
30
|
-
#
|
31
|
-
# * run_eFetch(_MS)
|
32
|
-
# * "db", "id", "WebEnv", "query_key", "tool", "email", "retstart",
|
33
|
-
# "retmax", "rettype", "strand", "seq_start", "seq_stop", "complexity",
|
34
|
-
# "report"
|
35
|
-
#
|
36
|
-
# * run_eGquery(_MS)
|
37
|
-
# * "term", "tool", "email"
|
38
|
-
#
|
39
|
-
# * run_eInfo(_MS)
|
40
|
-
# * "db", "tool", "email"
|
41
|
-
#
|
42
|
-
# * run_eSpell(_MS)
|
43
|
-
# * "db", "term", "tool", "email"
|
44
|
-
#
|
45
|
-
# * run_eLink(_MS)
|
46
|
-
# * "db", "id", "reldate", "mindate", "maxdate", "datetype", "term"
|
47
|
-
# "dbfrom", "WebEnv", "query_key", "cmd", "tool", "email"
|
48
|
-
#
|
49
|
-
# * run_eSearch(_MS)
|
50
|
-
# * "db", "term", "WebEnv", "QueryKey", "usehistory", "tool", "email",
|
51
|
-
# "field", "reldate", "mindate", "maxdate", "datetype", "RetStart",
|
52
|
-
# "RetMax", "rettype", "sort"
|
53
|
-
#
|
54
|
-
# * run_eSummary(_MS)
|
55
|
-
# * "db", "id", "WebEnv", "query_key", "retstart", "retmax", "tool", "email"
|
56
|
-
#
|
57
|
-
# == Complex data types
|
58
|
-
#
|
59
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/egquery.xsd
|
60
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/einfo.xsd
|
61
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/esearch.xsd
|
62
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/esummary.xsd
|
63
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/elink.xsd
|
64
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/efetch.xsd
|
65
|
-
# * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/espell.xsd
|
66
|
-
#
|
67
|
-
class SOAP < Bio::SOAPWSDL
|
68
|
-
|
69
|
-
BASE_URI = "http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/"
|
70
|
-
|
71
|
-
# set default to EUtils
|
72
|
-
SERVER_URI = BASE_URI + "eutils.wsdl"
|
73
|
-
|
74
|
-
def initialize(wsdl = nil)
|
75
|
-
super(wsdl || self.class::SERVER_URI)
|
76
|
-
end
|
77
|
-
|
78
|
-
def method_missing(*arg)
|
79
|
-
sleep 3 # make sure to rest for 3 seconds per request
|
80
|
-
@driver.send(*arg)
|
81
|
-
end
|
82
|
-
|
83
|
-
class EUtils < Bio::NCBI::SOAP
|
84
|
-
SERVER_URI = BASE_URI + "eutils.wsdl"
|
85
|
-
end
|
86
|
-
|
87
|
-
class EUtilsLite < Bio::NCBI::SOAP
|
88
|
-
SERVER_URI = BASE_URI + "eutils_lite.wsdl"
|
89
|
-
end
|
90
|
-
|
91
|
-
class EFetch < Bio::NCBI::SOAP
|
92
|
-
SERVER_URI = BASE_URI + "efetch.wsdl"
|
93
|
-
end
|
94
|
-
|
95
|
-
class EFetchLite < Bio::NCBI::SOAP
|
96
|
-
SERVER_URI = BASE_URI + "efetch_lit.wsdl"
|
97
|
-
end
|
98
|
-
|
99
|
-
end # SOAP
|
100
|
-
end # NCBI
|
101
|
-
end # Bio
|
102
|
-
|
103
|
-
|
104
|
-
if __FILE__ == $0
|
105
|
-
|
106
|
-
puts ">>> Bio::NCBI::SOAP::EFetch"
|
107
|
-
efetch = Bio::NCBI::SOAP::EFetch.new
|
108
|
-
|
109
|
-
puts "### run_eFetch in EFetch"
|
110
|
-
hash = {"db" => "protein", "id" => "37776955"}
|
111
|
-
result = efetch.run_eFetch(hash)
|
112
|
-
p result
|
113
|
-
|
114
|
-
puts ">>> Bio::NCBI::SOAP::EUtils"
|
115
|
-
eutils = Bio::NCBI::SOAP::EUtils.new
|
116
|
-
|
117
|
-
puts "### run_eFetch in EUtils"
|
118
|
-
hash = {"db" => "pubmed", "id" => "12345"}
|
119
|
-
result = eutils.run_eFetch(hash)
|
120
|
-
p result
|
121
|
-
|
122
|
-
puts "### run_eGquery - Entrez meta search to count hits in each DB"
|
123
|
-
hash = {"term" => "kinase"}
|
124
|
-
result = eutils.run_eGquery(hash) # working?
|
125
|
-
p result
|
126
|
-
|
127
|
-
puts "### run_eInfo - listing of the databases"
|
128
|
-
hash = {"db" => "protein"}
|
129
|
-
result = eutils.run_eInfo(hash)
|
130
|
-
p result
|
131
|
-
|
132
|
-
puts "### run_eSpell"
|
133
|
-
hash = {"db" => "pubmed", "term" => "kinas"}
|
134
|
-
result = eutils.run_eSpell(hash)
|
135
|
-
p result
|
136
|
-
p result["CorrectedQuery"]
|
137
|
-
|
138
|
-
puts "### run_eLink"
|
139
|
-
hash = {"db" => "protein", "id" => "37776955"}
|
140
|
-
result = eutils.run_eLink(hash) # working?
|
141
|
-
p result
|
142
|
-
|
143
|
-
puts "### run_eSearch"
|
144
|
-
hash = {"db" => "pubmed", "term" => "kinase"}
|
145
|
-
result = eutils.run_eSearch(hash)
|
146
|
-
p result
|
147
|
-
|
148
|
-
puts "### run_eSummary"
|
149
|
-
hash = {"db" => "protein", "id" => "37776955"}
|
150
|
-
result = eutils.run_eSummary(hash)
|
151
|
-
p result
|
152
|
-
|
153
|
-
end
|
154
|
-
|
155
|
-
|
156
|
-
|