bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,189 @@
1
+ #
2
+ # bio/io/pubmed.rb - NCBI Entrez/PubMed client module
3
+ #
4
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: pubmed.rb,v 1.12 2005/09/08 01:22:12 k Exp $
21
+ #
22
+
23
+ require 'net/http'
24
+ require 'cgi' unless defined?(CGI)
25
+
26
+ module Bio
27
+
28
+ class PubMed
29
+
30
+ def self.query(id)
31
+ host = "www.ncbi.nlm.nih.gov"
32
+ path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
33
+
34
+ http = Net::HTTP.new(host)
35
+ response, = http.get(path + id.to_s)
36
+ result = response.body
37
+ if result =~ /#{id}\s+Error/
38
+ raise( result )
39
+ else
40
+ result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
41
+ return result
42
+ end
43
+ end
44
+
45
+ def self.pmfetch(id)
46
+ host = "www.ncbi.nlm.nih.gov"
47
+ path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
48
+
49
+ http = Net::HTTP.new(host)
50
+ response, = http.get(path + id.to_s)
51
+ result = response.body
52
+ if result =~ /#{id}\s+Error/
53
+ raise( result )
54
+ else
55
+ result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
56
+ return result
57
+ end
58
+ end
59
+
60
+ def self.search(str)
61
+ host = "www.ncbi.nlm.nih.gov"
62
+ path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
63
+
64
+ http = Net::HTTP.new(host)
65
+ response, = http.get(path + CGI.escape(str))
66
+ result = response.body
67
+ result = result.gsub("\r", "\n").squeeze("\n")
68
+ result = result.scan(/<pre>(.*?)<\/pre>/m).flatten
69
+ return result
70
+ end
71
+
72
+ def self.esearch(str, hash = {})
73
+ hash['retmax'] = 100 unless hash['retmax']
74
+
75
+ opts = []
76
+ hash.each do |k, v|
77
+ opts << "#{k}=#{v}"
78
+ end
79
+
80
+ host = "eutils.ncbi.nlm.nih.gov"
81
+ path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
82
+
83
+ http = Net::HTTP.new(host)
84
+ response, = http.get(path + CGI.escape(str))
85
+ result = response.body
86
+ result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
87
+ return result
88
+ end
89
+
90
+ def self.efetch(*ids)
91
+ return [] if ids.empty?
92
+
93
+ host = "eutils.ncbi.nlm.nih.gov"
94
+ path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
95
+
96
+ ids = ids.join(",")
97
+
98
+ http = Net::HTTP.new(host)
99
+ response, = http.get(path + ids)
100
+ result = response.body
101
+ result = result.split(/\n\n+/)
102
+ return result
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+
110
+ if __FILE__ == $0
111
+
112
+ puts Bio::PubMed.query("10592173")
113
+ puts "--- ---"
114
+ puts Bio::PubMed.pmfetch("10592173")
115
+ puts "--- ---"
116
+ Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
117
+ p x
118
+ end
119
+ puts "--- ---"
120
+ Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
121
+ p x
122
+ end
123
+ puts "--- ---"
124
+ puts Bio::PubMed.efetch("10592173", "14693808")
125
+
126
+ end
127
+
128
+ =begin
129
+
130
+ = Bio::PubMed
131
+
132
+ These class methods access NCBI/PubMed database via HTTP.
133
+
134
+ --- Bio::PubMed.esearch(str, options)
135
+
136
+ Search keywords in PubMed by E-Utils and returns an array of PubMed IDs.
137
+ Options can be a hash containing keys include 'field', 'reldate',
138
+ 'mindate', 'maxdate', 'datetype', 'retstart', 'retmax', 'retmode',
139
+ and 'rettype' as specified in the following URL:
140
+
141
+ ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed>))
142
+
143
+ Default 'retmax' is 100.
144
+
145
+ --- Bio::PubMed.efetch(pmids)
146
+
147
+ Returns an array of MEDLINE records. A list of PubMed IDs can be
148
+ supplied as following:
149
+
150
+ Bio::PubMed.efetch(123)
151
+ Bio::PubMed.efetch(123,456,789)
152
+ Bio::PubMed.efetch([123,456,789])
153
+
154
+ --- Bio::PubMed.query(pmid)
155
+
156
+ Retrieve PubMed entry by PMID and returns MEDLINE format string (can
157
+ be parsed by the Bio::MEDLINE and can be converted into Bio::Reference
158
+ object).
159
+
160
+ --- Bio::PubMed.pmfetch(pmid)
161
+
162
+ Just another query method (by pmfetch).
163
+
164
+ --- Bio::PubMed.search(str)
165
+
166
+ Search the PubMed database by given keywords and returns the list of
167
+ matched records in MEDLINE format.
168
+
169
+
170
+ = For more informations
171
+
172
+ * Overview
173
+ * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html>))
174
+ * How to link
175
+ * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html>))
176
+ * MEDLINE format
177
+ * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat>))
178
+ * Search field descriptions and tags
179
+ * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags>))
180
+ * Entrez utilities index
181
+ * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html>))
182
+ * PmFetch CGI help
183
+ * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html>))
184
+ * E-Utilities CGI help
185
+ * ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html>))
186
+
187
+ =end
188
+
189
+
@@ -0,0 +1,308 @@
1
+ #
2
+ # = bio/io/registry.rb - OBDA BioRegistry module
3
+ #
4
+ # Copyright:: Copyright (C) 2002, 2003, 2004, 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: registry.rb,v 1.16 2005/12/18 15:58:42 k Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # BioRegistry read the OBDA (Open Bio Database Access) configuration file
13
+ # (seqdatabase.ini) and create a registry object. OBDA is created during
14
+ # the BioHackathon held in Tucson and South Africa in 2002 as a project
15
+ # independent set of protocols to access biological databases. The spec
16
+ # is refined in the BioHackathon 2003 held in Singapore.
17
+ #
18
+ # By using the OBDA, user can access to the database by get_database method
19
+ # without knowing where and how the database is stored, and each database
20
+ # has the get_by_id method to obtain a sequence entry.
21
+ #
22
+ # Sample configuration file is distributed with BioRuby package which
23
+ # consists of stanza format entries as following:
24
+ #
25
+ # VERSION=1.00
26
+ #
27
+ # [myembl]
28
+ # protocol=biofetch
29
+ # location=http://www.ebi.ac.uk/cgi-bin/dbfetch
30
+ # dbname=embl
31
+ #
32
+ # [mysp]
33
+ # protocol=biosql
34
+ # location=db.bioruby.org
35
+ # dbname=biosql
36
+ # driver=mysql
37
+ # user=root
38
+ # pass=
39
+ # biodbname=swissprot
40
+ #
41
+ # The first line means that this configration file is version 1.00.
42
+ #
43
+ # The [myembl] line defines a user defined database name 'myembl' and
44
+ # following block indicates how the database can be accessed.
45
+ # In this example, the 'myembl' database is accecced via the OBDA's
46
+ # BioFetch protocol to the dbfetch server at EBI, where the EMBL
47
+ # database is accessed by the name 'embl' on the server side.
48
+ #
49
+ # The [mysp] line defines another database 'mysp' which accesses the
50
+ # RDB (Relational Database) at the db.bioruby.org via the OBDA's
51
+ # BioSQL protocol. This BioSQL server is running MySQL database as
52
+ # its backend and stores the SwissProt database by the name 'swissprot'
53
+ # and which can be accessed by 'root' user without password.
54
+ # Note that the db.bioruby.org server is a dummy for the explanation.
55
+ #
56
+ # The configuration file is searched by the following order.
57
+ #
58
+ # 1. Local file name given to the Bio::Registry.new(filename).
59
+ #
60
+ # 2. Remote or local file list given by the environmenetal variable
61
+ # 'OBDA_SEARCH_PATH', which is a '+' separated string of the
62
+ # remote (HTTP) and/or local files.
63
+ #
64
+ # e.g. OBDA_SEARCH_PATH="http://example.org/obda.ini+$HOME/lib/myobda.ini"
65
+ #
66
+ # 3. Local file "$HOME/.bioinformatics/seqdatabase.ini" in the user's
67
+ # home directory.
68
+ #
69
+ # 4. Local file "/etc/bioinformatics/seqdatabase.ini" in the system
70
+ # configuration directry.
71
+ #
72
+ # All these configuration files are loaded. If there are database
73
+ # definitions having the same name, the first one is used.
74
+ #
75
+ # If none of these files can be found, Bio::Registry.new will try
76
+ # to use http://www.open-bio.org/registry/seqdatabase.ini file.
77
+ #
78
+ # == References
79
+ #
80
+ # * http://obda.open-bio.org/
81
+ # * http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common
82
+ # * http://www.open-bio.org/registry/seqdatabase.ini
83
+ #
84
+ #--
85
+ # This library is free software; you can redistribute it and/or
86
+ # modify it under the terms of the GNU Lesser General Public
87
+ # License as published by the Free Software Foundation; either
88
+ # version 2 of the License, or (at your option) any later version.
89
+ #
90
+ # This library is distributed in the hope that it will be useful,
91
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
92
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
93
+ # Lesser General Public License for more details.
94
+ #
95
+ # You should have received a copy of the GNU Lesser General Public
96
+ # License along with this library; if not, write to the Free Software
97
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
98
+ #
99
+ #++
100
+ #
101
+
102
+ require 'uri'
103
+ require 'net/http'
104
+
105
+
106
+ module Bio
107
+
108
+ autoload :Fetch, 'bio/io/fetch'
109
+ autoload :SQL, 'bio/io/sql'
110
+ autoload :FlatFile, 'bio/io/flatfile'
111
+ autoload :FlatFileIndex, 'bio/io/flatfile/index'
112
+
113
+ class Registry
114
+
115
+ def initialize(file = nil)
116
+ @spec_version = nil
117
+ @databases = Array.new
118
+ read_local(file) if file
119
+ env_path = ENV['OBDA_SEARCH_PATH']
120
+ if env_path and env_path.size > 0
121
+ read_env(env_path)
122
+ else
123
+ read_local("#{ENV['HOME']}/.bioinformatics/seqdatabase.ini")
124
+ read_local("/etc/bioinformatics/seqdatabase.ini")
125
+ if @databases.empty?
126
+ read_remote("http://www.open-bio.org/registry/seqdatabase.ini")
127
+ end
128
+ end
129
+ end
130
+
131
+ # Version string of the first configulation file
132
+ attr_reader :spec_version
133
+
134
+ # List of databases (Array of Bio::Registry::DB)
135
+ attr_reader :databases
136
+
137
+ # Returns a dababase handle (Bio::SQL, Bio::Fetch etc.) or nil
138
+ # if not found (case insensitive).
139
+ # The handles should have get_by_id method.
140
+ def get_database(dbname)
141
+ @databases.each do |db|
142
+ if db.database == dbname.downcase
143
+ case db.protocol
144
+ when 'biofetch'
145
+ return serv_biofetch(db)
146
+ when 'biosql'
147
+ return serv_biosql(db)
148
+ when 'flat', 'index-flat', 'index-berkeleydb'
149
+ return serv_flat(db)
150
+ when 'bsane-corba', 'biocorba'
151
+ raise NotImplementedError
152
+ when 'xembl'
153
+ raise NotImplementedError
154
+ end
155
+ end
156
+ end
157
+ return nil
158
+ end
159
+ alias db get_database
160
+
161
+ # Returns a Registry::DB object corresponding to the first dbname
162
+ # entry in the registry records (case insensitive).
163
+ def query(dbname)
164
+ @databases.each do |db|
165
+ return db if db.database == dbname.downcase
166
+ end
167
+ end
168
+
169
+ private
170
+
171
+ def read_env(path)
172
+ path.split('+').each do |elem|
173
+ if /:/.match(elem)
174
+ read_remote(elem)
175
+ else
176
+ read_local(elem)
177
+ end
178
+ end
179
+ end
180
+
181
+ def read_local(file)
182
+ if File.readable?(file)
183
+ stanza = File.read(file)
184
+ parse_stanza(stanza)
185
+ end
186
+ end
187
+
188
+ def read_remote(url)
189
+ schema, user, host, port, reg, path, = URI.split(url)
190
+ Net::HTTP.start(host, port) do |http|
191
+ response, = http.get(path)
192
+ parse_stanza(response.body)
193
+ end
194
+ end
195
+
196
+ def parse_stanza(stanza)
197
+ return unless stanza
198
+ if stanza[/.*/] =~ /VERSION\s*=\s*(\S+)/
199
+ @spec_version ||= $1 # for internal use (may differ on each file)
200
+ stanza[/.*/] = '' # remove VERSION line
201
+ end
202
+ stanza.each_line do |line|
203
+ case line
204
+ when /^\[(.*)\]/
205
+ dbname = $1.downcase
206
+ db = Bio::Registry::DB.new($1)
207
+ @databases.push(db)
208
+ when /=/
209
+ tag, value = line.chomp.split(/\s*=\s*/)
210
+ @databases.last[tag] = value
211
+ end
212
+ end
213
+ end
214
+
215
+ def serv_biofetch(db)
216
+ serv = Bio::Fetch.new(db.location)
217
+ serv.database = db.dbname
218
+ return serv
219
+ end
220
+
221
+ def serv_biosql(db)
222
+ location, port = db.location.split(':')
223
+ port = db.port unless port
224
+
225
+ case db.driver
226
+ when /mysql/i
227
+ driver = 'Mysql'
228
+ when /pg|postgres/i
229
+ driver = 'Pg'
230
+ when /oracle/
231
+ when /sybase/
232
+ when /sqlserver/
233
+ when /access/
234
+ when /csv/
235
+ when /informix/
236
+ when /odbc/
237
+ when /rdb/
238
+ end
239
+
240
+ dbi = [ "dbi", driver, db.dbname, location ].compact.join(':')
241
+ dbi += ';port=' + port if port
242
+ serv = Bio::SQL.new(dbi, db.user, db.pass)
243
+
244
+ # We can not manage biodbname (for name space) in BioSQL yet.
245
+ # use db.biodbname here!!
246
+
247
+ return serv
248
+ end
249
+
250
+ def serv_flat(db)
251
+ path = db.location
252
+ path = File.join(path, db.dbname) if db.dbname
253
+ serv = Bio::FlatFileIndex.open(path)
254
+ return serv
255
+ end
256
+
257
+
258
+ class DB
259
+
260
+ def initialize(dbname)
261
+ @database = dbname
262
+ @property = Hash.new
263
+ end
264
+ attr_reader :database
265
+
266
+ def method_missing(meth_id)
267
+ @property[meth_id.id2name]
268
+ end
269
+
270
+ def []=(tag, value)
271
+ @property[tag] = value
272
+ end
273
+
274
+ end
275
+
276
+ end # class Registry
277
+
278
+ end # module Bio
279
+
280
+
281
+
282
+ if __FILE__ == $0
283
+ begin
284
+ require 'pp'
285
+ alias p pp
286
+ rescue
287
+ end
288
+
289
+ # Usually, you don't need to pass ARGV.
290
+ reg = Bio::Registry.new(ARGV[0])
291
+
292
+ p reg
293
+ p reg.query('genbank_biosql')
294
+
295
+ serv = reg.get_database('genbank_biofetch')
296
+ puts serv.get_by_id('AA2CG')
297
+
298
+ serv = reg.get_database('genbank_biosql')
299
+ puts serv.get_by_id('AA2CG')
300
+
301
+ serv = reg.get_database('swissprot_biofetch')
302
+ puts serv.get_by_id('CYC_BOVIN')
303
+
304
+ serv = reg.get_database('swissprot_biosql')
305
+ puts serv.get_by_id('CYC_BOVIN')
306
+ end
307
+
308
+