bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/io/togows.rb - REST interface for TogoWS
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Naohisa Goto <ng@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
# $Id:$
|
|
8
|
+
#
|
|
9
|
+
# Bio::TogoWS is a set of clients for the TogoWS web services
|
|
10
|
+
# (http://togows.dbcls.jp/).
|
|
11
|
+
#
|
|
12
|
+
# * Bio::TogoWS::REST is a REST client for the TogoWS.
|
|
13
|
+
# * Bio::TogoWS::SOAP will be implemented in the future.
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
require 'uri'
|
|
17
|
+
require 'cgi'
|
|
18
|
+
require 'bio/version'
|
|
19
|
+
require 'bio/command'
|
|
20
|
+
|
|
21
|
+
module Bio
|
|
22
|
+
|
|
23
|
+
# Bio::TogoWS is a namespace for the TogoWS web services.
|
|
24
|
+
module TogoWS
|
|
25
|
+
|
|
26
|
+
# Internal Use Only.
|
|
27
|
+
#
|
|
28
|
+
# Bio::TogoWS::AccessWait is a module to implement a
|
|
29
|
+
# private method for access.
|
|
30
|
+
module AccessWait
|
|
31
|
+
|
|
32
|
+
# common default access wait for TogoWS services
|
|
33
|
+
TOGOWS_ACCESS_WAIT = 1
|
|
34
|
+
|
|
35
|
+
# Maximum waiting time to avoid dead lock.
|
|
36
|
+
# When exceeding this value, (max/2) + rand(max) is used,
|
|
37
|
+
# to randomize access.
|
|
38
|
+
# This means real maximum waiting time is (max * 1.5).
|
|
39
|
+
TOGOWS_ACCESS_WAIT_MAX = 60
|
|
40
|
+
|
|
41
|
+
# Sleeping if needed.
|
|
42
|
+
# It sleeps about TOGOWS_ACCESS_WAIT * (number of waiting processes).
|
|
43
|
+
#
|
|
44
|
+
# ---
|
|
45
|
+
# *Returns*:: (Numeric) sleeped time
|
|
46
|
+
def togows_access_wait
|
|
47
|
+
w_min = TOGOWS_ACCESS_WAIT
|
|
48
|
+
debug = defined?(@debug) && @debug
|
|
49
|
+
|
|
50
|
+
# initializing class variable
|
|
51
|
+
@@togows_last_access ||= nil
|
|
52
|
+
|
|
53
|
+
# determines waiting time
|
|
54
|
+
wait = 0
|
|
55
|
+
if last = @@togows_last_access then
|
|
56
|
+
elapsed = Time.now - last
|
|
57
|
+
if elapsed < w_min then
|
|
58
|
+
wait = w_min - elapsed
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# If wait is too long, truncated to TOGOWS_ACCESS_WAIT_MAX.
|
|
63
|
+
if wait > TOGOWS_ACCESS_WAIT_MAX then
|
|
64
|
+
orig_wait = wait
|
|
65
|
+
wait = TOGOWS_ACCESS_WAIT_MAX
|
|
66
|
+
wait = wait / 2 + rand(wait)
|
|
67
|
+
if debug then
|
|
68
|
+
$stderr.puts "TogoWS: sleeping time #{orig_wait} is too long and set to #{wait} to avoid dead lock."
|
|
69
|
+
end
|
|
70
|
+
newlast = Time.now + TOGOWS_ACCESS_WAIT_MAX
|
|
71
|
+
else
|
|
72
|
+
newlast = Time.now + wait
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# put expected end time of sleeping
|
|
76
|
+
if !@@togows_last_access or @@togows_last_access < newlast then
|
|
77
|
+
@@togows_last_access = newlast
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# sleeping if needed
|
|
81
|
+
if wait > 0 then
|
|
82
|
+
$stderr.puts "TogoWS: sleeping #{wait} second" if debug
|
|
83
|
+
sleep(wait)
|
|
84
|
+
end
|
|
85
|
+
# returns waited time
|
|
86
|
+
wait
|
|
87
|
+
end
|
|
88
|
+
private :togows_access_wait
|
|
89
|
+
|
|
90
|
+
# (private) resets last access.
|
|
91
|
+
# Should be used only for debug purpose.
|
|
92
|
+
def reset_togows_access_wait
|
|
93
|
+
@@togows_last_access = nil
|
|
94
|
+
end
|
|
95
|
+
private :reset_togows_access_wait
|
|
96
|
+
|
|
97
|
+
end #module AccessWait
|
|
98
|
+
|
|
99
|
+
# == Description
|
|
100
|
+
#
|
|
101
|
+
# Bio::TogoWS::REST is a REST client for the TogoWS web service.
|
|
102
|
+
#
|
|
103
|
+
# Details of the service are desribed in the following URI.
|
|
104
|
+
#
|
|
105
|
+
# * http://togows.dbcls.jp/site/en/rest.html
|
|
106
|
+
#
|
|
107
|
+
# == Examples
|
|
108
|
+
#
|
|
109
|
+
# For light users, class methods can be used.
|
|
110
|
+
#
|
|
111
|
+
# print Bio::TogoWS::REST.entry('genbank', 'AF237819')
|
|
112
|
+
# print Bio::TogoWS::REST.search('uniprot', 'lung cancer')
|
|
113
|
+
#
|
|
114
|
+
# For heavy users, an instance of the REST class can be created, and
|
|
115
|
+
# using the instance is more efficient than using class methods.
|
|
116
|
+
#
|
|
117
|
+
# t = Bio::TogoWS::REST.new
|
|
118
|
+
# print t.entry('genbank', 'AF237819')
|
|
119
|
+
# print t.search('uniprot', 'lung cancer')
|
|
120
|
+
#
|
|
121
|
+
# == References
|
|
122
|
+
#
|
|
123
|
+
# * http://togows.dbcls.jp/site/en/rest.html
|
|
124
|
+
#
|
|
125
|
+
class REST
|
|
126
|
+
|
|
127
|
+
include AccessWait
|
|
128
|
+
|
|
129
|
+
# URI of the TogoWS REST service
|
|
130
|
+
BASE_URI = 'http://togows.dbcls.jp/'.freeze
|
|
131
|
+
|
|
132
|
+
# preset default databases used by the retrieve method.
|
|
133
|
+
#
|
|
134
|
+
DEFAULT_RETRIEVAL_DATABASES =
|
|
135
|
+
%w( genbank uniprot embl ddbj dad )
|
|
136
|
+
|
|
137
|
+
# Creates a new object.
|
|
138
|
+
# ---
|
|
139
|
+
# *Arguments*:
|
|
140
|
+
# * (optional) _uri_: String or URI object
|
|
141
|
+
# *Returns*:: new object
|
|
142
|
+
def initialize(uri = BASE_URI)
|
|
143
|
+
uri = URI.parse(uri) unless uri.kind_of?(URI)
|
|
144
|
+
@pathbase = uri.path
|
|
145
|
+
@pathbase = '/' + @pathbase unless /\A\// =~ @pathbase
|
|
146
|
+
@pathbase = @pathbase + '/' unless /\/\z/ =~ @pathbase
|
|
147
|
+
@http = Bio::Command.new_http(uri.host, uri.port)
|
|
148
|
+
@header = {
|
|
149
|
+
'User-Agent' => "BioRuby/#{Bio::BIORUBY_VERSION_ID}"
|
|
150
|
+
}
|
|
151
|
+
@debug = false
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# If true, shows debug information to $stderr.
|
|
155
|
+
attr_accessor :debug
|
|
156
|
+
|
|
157
|
+
# Debug purpose only.
|
|
158
|
+
# Returns Net::HTTP object used inside the object.
|
|
159
|
+
# The method will be changed in the future if the implementation
|
|
160
|
+
# of this class is changed.
|
|
161
|
+
def internal_http
|
|
162
|
+
@http
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Intelligent version of the entry method.
|
|
166
|
+
# If two or more databases are specified, sequentially tries
|
|
167
|
+
# them until valid entry is obtained.
|
|
168
|
+
#
|
|
169
|
+
# If database is not specified, preset default databases are used.
|
|
170
|
+
# See DEFAULT_RETRIEVAL_DATABASES for details.
|
|
171
|
+
#
|
|
172
|
+
# When multiple IDs and multiple databases are specified, sequentially
|
|
173
|
+
# tries each IDs. Note that results with no hits found or with server
|
|
174
|
+
# errors are regarded as void strings. Also note that data format of
|
|
175
|
+
# the result entries can be different from entries to entries.
|
|
176
|
+
#
|
|
177
|
+
# ---
|
|
178
|
+
# *Arguments*:
|
|
179
|
+
# * (required) _ids_: (String) an entry ID, or
|
|
180
|
+
# (Array containing String) IDs. Note that strings containing ","
|
|
181
|
+
# * (optional) _hash_: (Hash) options below can be passed as a hash.
|
|
182
|
+
# * (optional) <I>:database</I>: (String) database name, or
|
|
183
|
+
# (Array containing String) database names.
|
|
184
|
+
# * (optional) <I>:format</I>: (String) format
|
|
185
|
+
# * (optional) <I>:field</I>: (String) gets only the specified field
|
|
186
|
+
# *Returns*:: String or nil
|
|
187
|
+
def retrieve(ids, hash = {})
|
|
188
|
+
begin
|
|
189
|
+
a = ids.to_ary
|
|
190
|
+
rescue NoMethodError
|
|
191
|
+
ids = ids.to_s
|
|
192
|
+
end
|
|
193
|
+
ids = a.join(',') if a
|
|
194
|
+
ids = ids.split(',')
|
|
195
|
+
|
|
196
|
+
dbs = hash[:database] || DEFAULT_RETRIEVAL_DATABASES
|
|
197
|
+
begin
|
|
198
|
+
dbs.to_ary
|
|
199
|
+
rescue NoMethodError
|
|
200
|
+
dbs = dbs.to_s.empty? ? [] : [ dbs.to_s ]
|
|
201
|
+
end
|
|
202
|
+
return nil if dbs.empty? or ids.empty?
|
|
203
|
+
|
|
204
|
+
if dbs.size == 1 then
|
|
205
|
+
return entry(dbs[0], ids, hash[:format], hash[:field])
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
results = []
|
|
209
|
+
ids.each do |idstr|
|
|
210
|
+
dbs.each do |dbstr|
|
|
211
|
+
r = entry(dbstr, idstr, hash[:format], hash[:field])
|
|
212
|
+
if r and !r.strip.empty? then
|
|
213
|
+
results.push r
|
|
214
|
+
break
|
|
215
|
+
end
|
|
216
|
+
end #dbs.each
|
|
217
|
+
end #ids.each
|
|
218
|
+
|
|
219
|
+
results.join('')
|
|
220
|
+
end #def retrieve
|
|
221
|
+
|
|
222
|
+
# Retrieves entries corresponding to the specified IDs.
|
|
223
|
+
#
|
|
224
|
+
# Example:
|
|
225
|
+
# t = Bio::TogoWS::REST.new
|
|
226
|
+
# kuma = t.entry('genbank', 'AF237819')
|
|
227
|
+
# # multiple IDs at a time
|
|
228
|
+
# misc = t.entry('genbank', [ 'AF237819', 'AF237820' ])
|
|
229
|
+
# # with format change
|
|
230
|
+
# p53 = t.entry('uniprot', 'P53_HUMAN', 'fasta')
|
|
231
|
+
#
|
|
232
|
+
# ---
|
|
233
|
+
# *Arguments*:
|
|
234
|
+
# * (required) _database_: (String) database name
|
|
235
|
+
# * (required) _ids_: (String) an entry ID, or
|
|
236
|
+
# (Array containing String) IDs. Note that strings containing ","
|
|
237
|
+
# are regarded as multiple IDs.
|
|
238
|
+
# * (optional) _format_: (String) format. nil means the default format
|
|
239
|
+
# (differs depending on the database).
|
|
240
|
+
# * (optional) _field_: (String) gets only the specified field if not nil
|
|
241
|
+
# *Returns*:: String or nil
|
|
242
|
+
def entry(database, ids, format = nil, field = nil)
|
|
243
|
+
begin
|
|
244
|
+
a = ids.to_ary
|
|
245
|
+
rescue NoMethodError
|
|
246
|
+
ids = ids.to_s
|
|
247
|
+
end
|
|
248
|
+
ids = a.join(',') if a
|
|
249
|
+
|
|
250
|
+
arg = [ 'entry', database, ids ]
|
|
251
|
+
arg.push field if field
|
|
252
|
+
arg[-1] = "#{arg[-1]}.#{format}" if format
|
|
253
|
+
response = get(*arg)
|
|
254
|
+
|
|
255
|
+
prepare_return_value(response)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Database search.
|
|
259
|
+
# Format of the search term string follows the Common Query Language.
|
|
260
|
+
# * http://en.wikipedia.org/wiki/Common_Query_Language
|
|
261
|
+
#
|
|
262
|
+
# Example:
|
|
263
|
+
# t = Bio::TogoWS::REST.new
|
|
264
|
+
# print t.search('uniprot', 'lung cancer')
|
|
265
|
+
# # only get the 10th and 11th hit ID
|
|
266
|
+
# print t.search('uniprot', 'lung cancer', 10, 2)
|
|
267
|
+
# # with json format
|
|
268
|
+
# print t.search('uniprot', 'lung cancer', 10, 2, 'json')
|
|
269
|
+
#
|
|
270
|
+
# ---
|
|
271
|
+
# *Arguments*:
|
|
272
|
+
# * (required) _database_: (String) database name
|
|
273
|
+
# * (required) _query_: (String) query string
|
|
274
|
+
# * (optional) _offset_: (Integer) offset in search results.
|
|
275
|
+
# * (optional) _limit_: (Integer) max. number of returned results.
|
|
276
|
+
# If offset is not nil and the limit is nil, it is set to 1.
|
|
277
|
+
# * (optional) _format_: (String) format. nil means the default format.
|
|
278
|
+
# *Returns*:: String or nil
|
|
279
|
+
def search(database, query, offset = nil, limit = nil, format = nil)
|
|
280
|
+
arg = [ 'search', database, query ]
|
|
281
|
+
if offset then
|
|
282
|
+
limit ||= 1
|
|
283
|
+
arg.push "#{offset},#{limit}"
|
|
284
|
+
end
|
|
285
|
+
arg[-1] = "#{arg[-1]}.#{format}" if format
|
|
286
|
+
response = get(*arg)
|
|
287
|
+
|
|
288
|
+
prepare_return_value(response)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Data format conversion.
|
|
292
|
+
#
|
|
293
|
+
# Example:
|
|
294
|
+
# t = Bio::TogoWS::REST.new
|
|
295
|
+
# blast_string = File.read('test.blastn')
|
|
296
|
+
# t.convert(blast_string, 'blast', 'gff')
|
|
297
|
+
#
|
|
298
|
+
# ---
|
|
299
|
+
# *Arguments*:
|
|
300
|
+
# * (required) _text_: (String) input data
|
|
301
|
+
# * (required) _inputformat_: (String) data source format
|
|
302
|
+
# * (required) _format_: (String) output format
|
|
303
|
+
# *Returns*:: String or nil
|
|
304
|
+
def convert(data, inputformat, format)
|
|
305
|
+
response = post_data(data, 'convert', "#{inputformat}.#{format}")
|
|
306
|
+
|
|
307
|
+
prepare_return_value(response)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Returns list of available databases in the entry service.
|
|
311
|
+
# ---
|
|
312
|
+
# *Returns*:: Array containing String
|
|
313
|
+
def entry_database_list
|
|
314
|
+
database_list('entry')
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Returns list of available databases in the search service.
|
|
318
|
+
# ---
|
|
319
|
+
# *Returns*:: Array containing String
|
|
320
|
+
def search_database_list
|
|
321
|
+
database_list('search')
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
#--
|
|
325
|
+
# class methods
|
|
326
|
+
#++
|
|
327
|
+
|
|
328
|
+
# The same as Bio::TogoWS::REST#entry.
|
|
329
|
+
def self.entry(*arg)
|
|
330
|
+
self.new.entry(*arg)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# The same as Bio::TogoWS::REST#search.
|
|
334
|
+
def self.search(*arg)
|
|
335
|
+
self.new.search(*arg)
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# The same as Bio::TogoWS::REST#convert.
|
|
339
|
+
def self.convert(*arg)
|
|
340
|
+
self.new.convert(*arg)
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# The same as Bio::TogoWS::REST#retrieve.
|
|
344
|
+
def self.retrieve(*arg)
|
|
345
|
+
self.new.retrieve(*arg)
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# The same as Bio::TogoWS::REST#entry_database_list
|
|
349
|
+
def self.entry_database_list(*arg)
|
|
350
|
+
self.new.entry_database_list(*arg)
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# The same as Bio::TogoWS::REST#search_database_list
|
|
354
|
+
def self.search_database_list(*arg)
|
|
355
|
+
self.new.search_database_list(*arg)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
private
|
|
359
|
+
|
|
360
|
+
# Access to the TogoWS by using GET method.
|
|
361
|
+
#
|
|
362
|
+
# Example 1:
|
|
363
|
+
# get('entry', 'genbank', AF209156')
|
|
364
|
+
# Example 2:
|
|
365
|
+
# get('search', 'uniprot', 'lung cancer')
|
|
366
|
+
#
|
|
367
|
+
# ---
|
|
368
|
+
# *Arguments*:
|
|
369
|
+
# * (optional) _path_: String
|
|
370
|
+
# *Returns*:: Net::HTTPResponse object
|
|
371
|
+
def get(*paths)
|
|
372
|
+
path = make_path(paths)
|
|
373
|
+
if @debug then
|
|
374
|
+
$stderr.puts "TogoWS: HTTP#get(#{path.inspect}, #{@header.inspect})"
|
|
375
|
+
end
|
|
376
|
+
togows_access_wait
|
|
377
|
+
@http.get(path, @header)
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Access to the TogoWS by using GET method.
|
|
381
|
+
# Always adds '/' at the end of the path.
|
|
382
|
+
#
|
|
383
|
+
# Example 1:
|
|
384
|
+
# get_dir('entry')
|
|
385
|
+
#
|
|
386
|
+
# ---
|
|
387
|
+
# *Arguments*:
|
|
388
|
+
# * (optional) _path_: String
|
|
389
|
+
# *Returns*:: Net::HTTPResponse object
|
|
390
|
+
def get_dir(*paths)
|
|
391
|
+
path = make_path(paths)
|
|
392
|
+
path += '/' unless /\/\z/ =~ path
|
|
393
|
+
if @debug then
|
|
394
|
+
$stderr.puts "TogoWS: HTTP#get(#{path.inspect}, #{@header.inspect})"
|
|
395
|
+
end
|
|
396
|
+
togows_access_wait
|
|
397
|
+
@http.get(path, @header)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# Access to the TogoWS by using POST method.
|
|
401
|
+
# The data is stored to the form key 'data'.
|
|
402
|
+
# Mime type is 'application/x-www-form-urlencoded'.
|
|
403
|
+
# ---
|
|
404
|
+
# *Arguments*:
|
|
405
|
+
# * (required) _data_: String
|
|
406
|
+
# * (optional) _path_: String
|
|
407
|
+
# *Returns*:: Net::HTTPResponse object
|
|
408
|
+
def post_data(data, *paths)
|
|
409
|
+
path = make_path(paths)
|
|
410
|
+
if @debug then
|
|
411
|
+
$stderr.puts "TogoWS: Bio::Command.http_post_form(#{path.inspect}, { \"data\" => (#{data.size} bytes) }, #{@header.inspect})"
|
|
412
|
+
end
|
|
413
|
+
togows_access_wait
|
|
414
|
+
Bio::Command.http_post_form(@http, path, { 'data' => data }, @header)
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Generates path string from the given paths.
|
|
418
|
+
# ---
|
|
419
|
+
# *Arguments*:
|
|
420
|
+
# * (required) _paths_: Array containing String objects
|
|
421
|
+
# *Returns*:: String
|
|
422
|
+
def make_path(paths)
|
|
423
|
+
@pathbase + paths.collect { |x| CGI.escape(x.to_s) }.join('/')
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# If response.code == "200", returns body as a String.
|
|
427
|
+
# Otherwise, returns nil.
|
|
428
|
+
def prepare_return_value(response)
|
|
429
|
+
if @debug then
|
|
430
|
+
$stderr.puts "TogoWS: #{response.inspect}"
|
|
431
|
+
end
|
|
432
|
+
if response.code == "200" then
|
|
433
|
+
response.body
|
|
434
|
+
else
|
|
435
|
+
nil
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
# Returns list of available databases
|
|
440
|
+
# ---
|
|
441
|
+
# *Arguments*:
|
|
442
|
+
# * (required) _service_: String
|
|
443
|
+
# *Returns*:: Array containing String
|
|
444
|
+
def database_list(service)
|
|
445
|
+
response = get_dir(service)
|
|
446
|
+
str = prepare_return_value(response)
|
|
447
|
+
if str then
|
|
448
|
+
str.chomp.split(/\r?\n/)
|
|
449
|
+
else
|
|
450
|
+
raise 'Unexpected server response'
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
end #class REST
|
|
455
|
+
|
|
456
|
+
end #module TogoWS
|
|
457
|
+
|
|
458
|
+
end #module Bio
|