bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,458 @@
1
+ #
2
+ # = bio/io/togows.rb - REST interface for TogoWS
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+ # Bio::TogoWS is a set of clients for the TogoWS web services
10
+ # (http://togows.dbcls.jp/).
11
+ #
12
+ # * Bio::TogoWS::REST is a REST client for the TogoWS.
13
+ # * Bio::TogoWS::SOAP will be implemented in the future.
14
+ #
15
+
16
+ require 'uri'
17
+ require 'cgi'
18
+ require 'bio/version'
19
+ require 'bio/command'
20
+
21
+ module Bio
22
+
23
+ # Bio::TogoWS is a namespace for the TogoWS web services.
24
+ module TogoWS
25
+
26
+ # Internal Use Only.
27
+ #
28
+ # Bio::TogoWS::AccessWait is a module to implement a
29
+ # private method for access.
30
+ module AccessWait
31
+
32
+ # common default access wait for TogoWS services
33
+ TOGOWS_ACCESS_WAIT = 1
34
+
35
+ # Maximum waiting time to avoid dead lock.
36
+ # When exceeding this value, (max/2) + rand(max) is used,
37
+ # to randomize access.
38
+ # This means real maximum waiting time is (max * 1.5).
39
+ TOGOWS_ACCESS_WAIT_MAX = 60
40
+
41
+ # Sleeping if needed.
42
+ # It sleeps about TOGOWS_ACCESS_WAIT * (number of waiting processes).
43
+ #
44
+ # ---
45
+ # *Returns*:: (Numeric) sleeped time
46
+ def togows_access_wait
47
+ w_min = TOGOWS_ACCESS_WAIT
48
+ debug = defined?(@debug) && @debug
49
+
50
+ # initializing class variable
51
+ @@togows_last_access ||= nil
52
+
53
+ # determines waiting time
54
+ wait = 0
55
+ if last = @@togows_last_access then
56
+ elapsed = Time.now - last
57
+ if elapsed < w_min then
58
+ wait = w_min - elapsed
59
+ end
60
+ end
61
+
62
+ # If wait is too long, truncated to TOGOWS_ACCESS_WAIT_MAX.
63
+ if wait > TOGOWS_ACCESS_WAIT_MAX then
64
+ orig_wait = wait
65
+ wait = TOGOWS_ACCESS_WAIT_MAX
66
+ wait = wait / 2 + rand(wait)
67
+ if debug then
68
+ $stderr.puts "TogoWS: sleeping time #{orig_wait} is too long and set to #{wait} to avoid dead lock."
69
+ end
70
+ newlast = Time.now + TOGOWS_ACCESS_WAIT_MAX
71
+ else
72
+ newlast = Time.now + wait
73
+ end
74
+
75
+ # put expected end time of sleeping
76
+ if !@@togows_last_access or @@togows_last_access < newlast then
77
+ @@togows_last_access = newlast
78
+ end
79
+
80
+ # sleeping if needed
81
+ if wait > 0 then
82
+ $stderr.puts "TogoWS: sleeping #{wait} second" if debug
83
+ sleep(wait)
84
+ end
85
+ # returns waited time
86
+ wait
87
+ end
88
+ private :togows_access_wait
89
+
90
+ # (private) resets last access.
91
+ # Should be used only for debug purpose.
92
+ def reset_togows_access_wait
93
+ @@togows_last_access = nil
94
+ end
95
+ private :reset_togows_access_wait
96
+
97
+ end #module AccessWait
98
+
99
+ # == Description
100
+ #
101
+ # Bio::TogoWS::REST is a REST client for the TogoWS web service.
102
+ #
103
+ # Details of the service are desribed in the following URI.
104
+ #
105
+ # * http://togows.dbcls.jp/site/en/rest.html
106
+ #
107
+ # == Examples
108
+ #
109
+ # For light users, class methods can be used.
110
+ #
111
+ # print Bio::TogoWS::REST.entry('genbank', 'AF237819')
112
+ # print Bio::TogoWS::REST.search('uniprot', 'lung cancer')
113
+ #
114
+ # For heavy users, an instance of the REST class can be created, and
115
+ # using the instance is more efficient than using class methods.
116
+ #
117
+ # t = Bio::TogoWS::REST.new
118
+ # print t.entry('genbank', 'AF237819')
119
+ # print t.search('uniprot', 'lung cancer')
120
+ #
121
+ # == References
122
+ #
123
+ # * http://togows.dbcls.jp/site/en/rest.html
124
+ #
125
+ class REST
126
+
127
+ include AccessWait
128
+
129
+ # URI of the TogoWS REST service
130
+ BASE_URI = 'http://togows.dbcls.jp/'.freeze
131
+
132
+ # preset default databases used by the retrieve method.
133
+ #
134
+ DEFAULT_RETRIEVAL_DATABASES =
135
+ %w( genbank uniprot embl ddbj dad )
136
+
137
+ # Creates a new object.
138
+ # ---
139
+ # *Arguments*:
140
+ # * (optional) _uri_: String or URI object
141
+ # *Returns*:: new object
142
+ def initialize(uri = BASE_URI)
143
+ uri = URI.parse(uri) unless uri.kind_of?(URI)
144
+ @pathbase = uri.path
145
+ @pathbase = '/' + @pathbase unless /\A\// =~ @pathbase
146
+ @pathbase = @pathbase + '/' unless /\/\z/ =~ @pathbase
147
+ @http = Bio::Command.new_http(uri.host, uri.port)
148
+ @header = {
149
+ 'User-Agent' => "BioRuby/#{Bio::BIORUBY_VERSION_ID}"
150
+ }
151
+ @debug = false
152
+ end
153
+
154
+ # If true, shows debug information to $stderr.
155
+ attr_accessor :debug
156
+
157
+ # Debug purpose only.
158
+ # Returns Net::HTTP object used inside the object.
159
+ # The method will be changed in the future if the implementation
160
+ # of this class is changed.
161
+ def internal_http
162
+ @http
163
+ end
164
+
165
+ # Intelligent version of the entry method.
166
+ # If two or more databases are specified, sequentially tries
167
+ # them until valid entry is obtained.
168
+ #
169
+ # If database is not specified, preset default databases are used.
170
+ # See DEFAULT_RETRIEVAL_DATABASES for details.
171
+ #
172
+ # When multiple IDs and multiple databases are specified, sequentially
173
+ # tries each IDs. Note that results with no hits found or with server
174
+ # errors are regarded as void strings. Also note that data format of
175
+ # the result entries can be different from entries to entries.
176
+ #
177
+ # ---
178
+ # *Arguments*:
179
+ # * (required) _ids_: (String) an entry ID, or
180
+ # (Array containing String) IDs. Note that strings containing ","
181
+ # * (optional) _hash_: (Hash) options below can be passed as a hash.
182
+ # * (optional) <I>:database</I>: (String) database name, or
183
+ # (Array containing String) database names.
184
+ # * (optional) <I>:format</I>: (String) format
185
+ # * (optional) <I>:field</I>: (String) gets only the specified field
186
+ # *Returns*:: String or nil
187
+ def retrieve(ids, hash = {})
188
+ begin
189
+ a = ids.to_ary
190
+ rescue NoMethodError
191
+ ids = ids.to_s
192
+ end
193
+ ids = a.join(',') if a
194
+ ids = ids.split(',')
195
+
196
+ dbs = hash[:database] || DEFAULT_RETRIEVAL_DATABASES
197
+ begin
198
+ dbs.to_ary
199
+ rescue NoMethodError
200
+ dbs = dbs.to_s.empty? ? [] : [ dbs.to_s ]
201
+ end
202
+ return nil if dbs.empty? or ids.empty?
203
+
204
+ if dbs.size == 1 then
205
+ return entry(dbs[0], ids, hash[:format], hash[:field])
206
+ end
207
+
208
+ results = []
209
+ ids.each do |idstr|
210
+ dbs.each do |dbstr|
211
+ r = entry(dbstr, idstr, hash[:format], hash[:field])
212
+ if r and !r.strip.empty? then
213
+ results.push r
214
+ break
215
+ end
216
+ end #dbs.each
217
+ end #ids.each
218
+
219
+ results.join('')
220
+ end #def retrieve
221
+
222
+ # Retrieves entries corresponding to the specified IDs.
223
+ #
224
+ # Example:
225
+ # t = Bio::TogoWS::REST.new
226
+ # kuma = t.entry('genbank', 'AF237819')
227
+ # # multiple IDs at a time
228
+ # misc = t.entry('genbank', [ 'AF237819', 'AF237820' ])
229
+ # # with format change
230
+ # p53 = t.entry('uniprot', 'P53_HUMAN', 'fasta')
231
+ #
232
+ # ---
233
+ # *Arguments*:
234
+ # * (required) _database_: (String) database name
235
+ # * (required) _ids_: (String) an entry ID, or
236
+ # (Array containing String) IDs. Note that strings containing ","
237
+ # are regarded as multiple IDs.
238
+ # * (optional) _format_: (String) format. nil means the default format
239
+ # (differs depending on the database).
240
+ # * (optional) _field_: (String) gets only the specified field if not nil
241
+ # *Returns*:: String or nil
242
+ def entry(database, ids, format = nil, field = nil)
243
+ begin
244
+ a = ids.to_ary
245
+ rescue NoMethodError
246
+ ids = ids.to_s
247
+ end
248
+ ids = a.join(',') if a
249
+
250
+ arg = [ 'entry', database, ids ]
251
+ arg.push field if field
252
+ arg[-1] = "#{arg[-1]}.#{format}" if format
253
+ response = get(*arg)
254
+
255
+ prepare_return_value(response)
256
+ end
257
+
258
+ # Database search.
259
+ # Format of the search term string follows the Common Query Language.
260
+ # * http://en.wikipedia.org/wiki/Common_Query_Language
261
+ #
262
+ # Example:
263
+ # t = Bio::TogoWS::REST.new
264
+ # print t.search('uniprot', 'lung cancer')
265
+ # # only get the 10th and 11th hit ID
266
+ # print t.search('uniprot', 'lung cancer', 10, 2)
267
+ # # with json format
268
+ # print t.search('uniprot', 'lung cancer', 10, 2, 'json')
269
+ #
270
+ # ---
271
+ # *Arguments*:
272
+ # * (required) _database_: (String) database name
273
+ # * (required) _query_: (String) query string
274
+ # * (optional) _offset_: (Integer) offset in search results.
275
+ # * (optional) _limit_: (Integer) max. number of returned results.
276
+ # If offset is not nil and the limit is nil, it is set to 1.
277
+ # * (optional) _format_: (String) format. nil means the default format.
278
+ # *Returns*:: String or nil
279
+ def search(database, query, offset = nil, limit = nil, format = nil)
280
+ arg = [ 'search', database, query ]
281
+ if offset then
282
+ limit ||= 1
283
+ arg.push "#{offset},#{limit}"
284
+ end
285
+ arg[-1] = "#{arg[-1]}.#{format}" if format
286
+ response = get(*arg)
287
+
288
+ prepare_return_value(response)
289
+ end
290
+
291
+ # Data format conversion.
292
+ #
293
+ # Example:
294
+ # t = Bio::TogoWS::REST.new
295
+ # blast_string = File.read('test.blastn')
296
+ # t.convert(blast_string, 'blast', 'gff')
297
+ #
298
+ # ---
299
+ # *Arguments*:
300
+ # * (required) _text_: (String) input data
301
+ # * (required) _inputformat_: (String) data source format
302
+ # * (required) _format_: (String) output format
303
+ # *Returns*:: String or nil
304
+ def convert(data, inputformat, format)
305
+ response = post_data(data, 'convert', "#{inputformat}.#{format}")
306
+
307
+ prepare_return_value(response)
308
+ end
309
+
310
+ # Returns list of available databases in the entry service.
311
+ # ---
312
+ # *Returns*:: Array containing String
313
+ def entry_database_list
314
+ database_list('entry')
315
+ end
316
+
317
+ # Returns list of available databases in the search service.
318
+ # ---
319
+ # *Returns*:: Array containing String
320
+ def search_database_list
321
+ database_list('search')
322
+ end
323
+
324
+ #--
325
+ # class methods
326
+ #++
327
+
328
+ # The same as Bio::TogoWS::REST#entry.
329
+ def self.entry(*arg)
330
+ self.new.entry(*arg)
331
+ end
332
+
333
+ # The same as Bio::TogoWS::REST#search.
334
+ def self.search(*arg)
335
+ self.new.search(*arg)
336
+ end
337
+
338
+ # The same as Bio::TogoWS::REST#convert.
339
+ def self.convert(*arg)
340
+ self.new.convert(*arg)
341
+ end
342
+
343
+ # The same as Bio::TogoWS::REST#retrieve.
344
+ def self.retrieve(*arg)
345
+ self.new.retrieve(*arg)
346
+ end
347
+
348
+ # The same as Bio::TogoWS::REST#entry_database_list
349
+ def self.entry_database_list(*arg)
350
+ self.new.entry_database_list(*arg)
351
+ end
352
+
353
+ # The same as Bio::TogoWS::REST#search_database_list
354
+ def self.search_database_list(*arg)
355
+ self.new.search_database_list(*arg)
356
+ end
357
+
358
+ private
359
+
360
+ # Access to the TogoWS by using GET method.
361
+ #
362
+ # Example 1:
363
+ # get('entry', 'genbank', AF209156')
364
+ # Example 2:
365
+ # get('search', 'uniprot', 'lung cancer')
366
+ #
367
+ # ---
368
+ # *Arguments*:
369
+ # * (optional) _path_: String
370
+ # *Returns*:: Net::HTTPResponse object
371
+ def get(*paths)
372
+ path = make_path(paths)
373
+ if @debug then
374
+ $stderr.puts "TogoWS: HTTP#get(#{path.inspect}, #{@header.inspect})"
375
+ end
376
+ togows_access_wait
377
+ @http.get(path, @header)
378
+ end
379
+
380
+ # Access to the TogoWS by using GET method.
381
+ # Always adds '/' at the end of the path.
382
+ #
383
+ # Example 1:
384
+ # get_dir('entry')
385
+ #
386
+ # ---
387
+ # *Arguments*:
388
+ # * (optional) _path_: String
389
+ # *Returns*:: Net::HTTPResponse object
390
+ def get_dir(*paths)
391
+ path = make_path(paths)
392
+ path += '/' unless /\/\z/ =~ path
393
+ if @debug then
394
+ $stderr.puts "TogoWS: HTTP#get(#{path.inspect}, #{@header.inspect})"
395
+ end
396
+ togows_access_wait
397
+ @http.get(path, @header)
398
+ end
399
+
400
+ # Access to the TogoWS by using POST method.
401
+ # The data is stored to the form key 'data'.
402
+ # Mime type is 'application/x-www-form-urlencoded'.
403
+ # ---
404
+ # *Arguments*:
405
+ # * (required) _data_: String
406
+ # * (optional) _path_: String
407
+ # *Returns*:: Net::HTTPResponse object
408
+ def post_data(data, *paths)
409
+ path = make_path(paths)
410
+ if @debug then
411
+ $stderr.puts "TogoWS: Bio::Command.http_post_form(#{path.inspect}, { \"data\" => (#{data.size} bytes) }, #{@header.inspect})"
412
+ end
413
+ togows_access_wait
414
+ Bio::Command.http_post_form(@http, path, { 'data' => data }, @header)
415
+ end
416
+
417
+ # Generates path string from the given paths.
418
+ # ---
419
+ # *Arguments*:
420
+ # * (required) _paths_: Array containing String objects
421
+ # *Returns*:: String
422
+ def make_path(paths)
423
+ @pathbase + paths.collect { |x| CGI.escape(x.to_s) }.join('/')
424
+ end
425
+
426
+ # If response.code == "200", returns body as a String.
427
+ # Otherwise, returns nil.
428
+ def prepare_return_value(response)
429
+ if @debug then
430
+ $stderr.puts "TogoWS: #{response.inspect}"
431
+ end
432
+ if response.code == "200" then
433
+ response.body
434
+ else
435
+ nil
436
+ end
437
+ end
438
+
439
+ # Returns list of available databases
440
+ # ---
441
+ # *Arguments*:
442
+ # * (required) _service_: String
443
+ # *Returns*:: Array containing String
444
+ def database_list(service)
445
+ response = get_dir(service)
446
+ str = prepare_return_value(response)
447
+ if str then
448
+ str.chomp.split(/\r?\n/)
449
+ else
450
+ raise 'Unexpected server response'
451
+ end
452
+ end
453
+
454
+ end #class REST
455
+
456
+ end #module TogoWS
457
+
458
+ end #module Bio