bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
@@ -0,0 +1,189 @@
|
|
1
|
+
#
|
2
|
+
# bio/io/pubmed.rb - NCBI Entrez/PubMed client module
|
3
|
+
#
|
4
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# $Id: pubmed.rb,v 1.12 2005/09/08 01:22:12 k Exp $
|
21
|
+
#
|
22
|
+
|
23
|
+
require 'net/http'
|
24
|
+
require 'cgi' unless defined?(CGI)
|
25
|
+
|
26
|
+
module Bio
|
27
|
+
|
28
|
+
class PubMed
|
29
|
+
|
30
|
+
def self.query(id)
|
31
|
+
host = "www.ncbi.nlm.nih.gov"
|
32
|
+
path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
|
33
|
+
|
34
|
+
http = Net::HTTP.new(host)
|
35
|
+
response, = http.get(path + id.to_s)
|
36
|
+
result = response.body
|
37
|
+
if result =~ /#{id}\s+Error/
|
38
|
+
raise( result )
|
39
|
+
else
|
40
|
+
result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.pmfetch(id)
|
46
|
+
host = "www.ncbi.nlm.nih.gov"
|
47
|
+
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
|
48
|
+
|
49
|
+
http = Net::HTTP.new(host)
|
50
|
+
response, = http.get(path + id.to_s)
|
51
|
+
result = response.body
|
52
|
+
if result =~ /#{id}\s+Error/
|
53
|
+
raise( result )
|
54
|
+
else
|
55
|
+
result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
|
56
|
+
return result
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.search(str)
|
61
|
+
host = "www.ncbi.nlm.nih.gov"
|
62
|
+
path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
|
63
|
+
|
64
|
+
http = Net::HTTP.new(host)
|
65
|
+
response, = http.get(path + CGI.escape(str))
|
66
|
+
result = response.body
|
67
|
+
result = result.gsub("\r", "\n").squeeze("\n")
|
68
|
+
result = result.scan(/<pre>(.*?)<\/pre>/m).flatten
|
69
|
+
return result
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.esearch(str, hash = {})
|
73
|
+
hash['retmax'] = 100 unless hash['retmax']
|
74
|
+
|
75
|
+
opts = []
|
76
|
+
hash.each do |k, v|
|
77
|
+
opts << "#{k}=#{v}"
|
78
|
+
end
|
79
|
+
|
80
|
+
host = "eutils.ncbi.nlm.nih.gov"
|
81
|
+
path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
|
82
|
+
|
83
|
+
http = Net::HTTP.new(host)
|
84
|
+
response, = http.get(path + CGI.escape(str))
|
85
|
+
result = response.body
|
86
|
+
result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
87
|
+
return result
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.efetch(*ids)
|
91
|
+
return [] if ids.empty?
|
92
|
+
|
93
|
+
host = "eutils.ncbi.nlm.nih.gov"
|
94
|
+
path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
|
95
|
+
|
96
|
+
ids = ids.join(",")
|
97
|
+
|
98
|
+
http = Net::HTTP.new(host)
|
99
|
+
response, = http.get(path + ids)
|
100
|
+
result = response.body
|
101
|
+
result = result.split(/\n\n+/)
|
102
|
+
return result
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
if __FILE__ == $0
|
111
|
+
|
112
|
+
puts Bio::PubMed.query("10592173")
|
113
|
+
puts "--- ---"
|
114
|
+
puts Bio::PubMed.pmfetch("10592173")
|
115
|
+
puts "--- ---"
|
116
|
+
Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
|
117
|
+
p x
|
118
|
+
end
|
119
|
+
puts "--- ---"
|
120
|
+
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
|
121
|
+
p x
|
122
|
+
end
|
123
|
+
puts "--- ---"
|
124
|
+
puts Bio::PubMed.efetch("10592173", "14693808")
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
=begin
|
129
|
+
|
130
|
+
= Bio::PubMed
|
131
|
+
|
132
|
+
These class methods access NCBI/PubMed database via HTTP.
|
133
|
+
|
134
|
+
--- Bio::PubMed.esearch(str, options)
|
135
|
+
|
136
|
+
Search keywords in PubMed by E-Utils and returns an array of PubMed IDs.
|
137
|
+
Options can be a hash containing keys include 'field', 'reldate',
|
138
|
+
'mindate', 'maxdate', 'datetype', 'retstart', 'retmax', 'retmode',
|
139
|
+
and 'rettype' as specified in the following URL:
|
140
|
+
|
141
|
+
((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed>))
|
142
|
+
|
143
|
+
Default 'retmax' is 100.
|
144
|
+
|
145
|
+
--- Bio::PubMed.efetch(pmids)
|
146
|
+
|
147
|
+
Returns an array of MEDLINE records. A list of PubMed IDs can be
|
148
|
+
supplied as following:
|
149
|
+
|
150
|
+
Bio::PubMed.efetch(123)
|
151
|
+
Bio::PubMed.efetch(123,456,789)
|
152
|
+
Bio::PubMed.efetch([123,456,789])
|
153
|
+
|
154
|
+
--- Bio::PubMed.query(pmid)
|
155
|
+
|
156
|
+
Retrieve PubMed entry by PMID and returns MEDLINE format string (can
|
157
|
+
be parsed by the Bio::MEDLINE and can be converted into Bio::Reference
|
158
|
+
object).
|
159
|
+
|
160
|
+
--- Bio::PubMed.pmfetch(pmid)
|
161
|
+
|
162
|
+
Just another query method (by pmfetch).
|
163
|
+
|
164
|
+
--- Bio::PubMed.search(str)
|
165
|
+
|
166
|
+
Search the PubMed database by given keywords and returns the list of
|
167
|
+
matched records in MEDLINE format.
|
168
|
+
|
169
|
+
|
170
|
+
= For more informations
|
171
|
+
|
172
|
+
* Overview
|
173
|
+
* ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html>))
|
174
|
+
* How to link
|
175
|
+
* ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html>))
|
176
|
+
* MEDLINE format
|
177
|
+
* ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat>))
|
178
|
+
* Search field descriptions and tags
|
179
|
+
* ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags>))
|
180
|
+
* Entrez utilities index
|
181
|
+
* ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html>))
|
182
|
+
* PmFetch CGI help
|
183
|
+
* ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html>))
|
184
|
+
* E-Utilities CGI help
|
185
|
+
* ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html>))
|
186
|
+
|
187
|
+
=end
|
188
|
+
|
189
|
+
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#
|
2
|
+
# = bio/io/registry.rb - OBDA BioRegistry module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2002, 2003, 2004, 2005
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
6
|
+
# License:: LGPL
|
7
|
+
#
|
8
|
+
# $Id: registry.rb,v 1.16 2005/12/18 15:58:42 k Exp $
|
9
|
+
#
|
10
|
+
# == Description
|
11
|
+
#
|
12
|
+
# BioRegistry read the OBDA (Open Bio Database Access) configuration file
|
13
|
+
# (seqdatabase.ini) and create a registry object. OBDA is created during
|
14
|
+
# the BioHackathon held in Tucson and South Africa in 2002 as a project
|
15
|
+
# independent set of protocols to access biological databases. The spec
|
16
|
+
# is refined in the BioHackathon 2003 held in Singapore.
|
17
|
+
#
|
18
|
+
# By using the OBDA, user can access to the database by get_database method
|
19
|
+
# without knowing where and how the database is stored, and each database
|
20
|
+
# has the get_by_id method to obtain a sequence entry.
|
21
|
+
#
|
22
|
+
# Sample configuration file is distributed with BioRuby package which
|
23
|
+
# consists of stanza format entries as following:
|
24
|
+
#
|
25
|
+
# VERSION=1.00
|
26
|
+
#
|
27
|
+
# [myembl]
|
28
|
+
# protocol=biofetch
|
29
|
+
# location=http://www.ebi.ac.uk/cgi-bin/dbfetch
|
30
|
+
# dbname=embl
|
31
|
+
#
|
32
|
+
# [mysp]
|
33
|
+
# protocol=biosql
|
34
|
+
# location=db.bioruby.org
|
35
|
+
# dbname=biosql
|
36
|
+
# driver=mysql
|
37
|
+
# user=root
|
38
|
+
# pass=
|
39
|
+
# biodbname=swissprot
|
40
|
+
#
|
41
|
+
# The first line means that this configration file is version 1.00.
|
42
|
+
#
|
43
|
+
# The [myembl] line defines a user defined database name 'myembl' and
|
44
|
+
# following block indicates how the database can be accessed.
|
45
|
+
# In this example, the 'myembl' database is accecced via the OBDA's
|
46
|
+
# BioFetch protocol to the dbfetch server at EBI, where the EMBL
|
47
|
+
# database is accessed by the name 'embl' on the server side.
|
48
|
+
#
|
49
|
+
# The [mysp] line defines another database 'mysp' which accesses the
|
50
|
+
# RDB (Relational Database) at the db.bioruby.org via the OBDA's
|
51
|
+
# BioSQL protocol. This BioSQL server is running MySQL database as
|
52
|
+
# its backend and stores the SwissProt database by the name 'swissprot'
|
53
|
+
# and which can be accessed by 'root' user without password.
|
54
|
+
# Note that the db.bioruby.org server is a dummy for the explanation.
|
55
|
+
#
|
56
|
+
# The configuration file is searched by the following order.
|
57
|
+
#
|
58
|
+
# 1. Local file name given to the Bio::Registry.new(filename).
|
59
|
+
#
|
60
|
+
# 2. Remote or local file list given by the environmenetal variable
|
61
|
+
# 'OBDA_SEARCH_PATH', which is a '+' separated string of the
|
62
|
+
# remote (HTTP) and/or local files.
|
63
|
+
#
|
64
|
+
# e.g. OBDA_SEARCH_PATH="http://example.org/obda.ini+$HOME/lib/myobda.ini"
|
65
|
+
#
|
66
|
+
# 3. Local file "$HOME/.bioinformatics/seqdatabase.ini" in the user's
|
67
|
+
# home directory.
|
68
|
+
#
|
69
|
+
# 4. Local file "/etc/bioinformatics/seqdatabase.ini" in the system
|
70
|
+
# configuration directry.
|
71
|
+
#
|
72
|
+
# All these configuration files are loaded. If there are database
|
73
|
+
# definitions having the same name, the first one is used.
|
74
|
+
#
|
75
|
+
# If none of these files can be found, Bio::Registry.new will try
|
76
|
+
# to use http://www.open-bio.org/registry/seqdatabase.ini file.
|
77
|
+
#
|
78
|
+
# == References
|
79
|
+
#
|
80
|
+
# * http://obda.open-bio.org/
|
81
|
+
# * http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common
|
82
|
+
# * http://www.open-bio.org/registry/seqdatabase.ini
|
83
|
+
#
|
84
|
+
#--
|
85
|
+
# This library is free software; you can redistribute it and/or
|
86
|
+
# modify it under the terms of the GNU Lesser General Public
|
87
|
+
# License as published by the Free Software Foundation; either
|
88
|
+
# version 2 of the License, or (at your option) any later version.
|
89
|
+
#
|
90
|
+
# This library is distributed in the hope that it will be useful,
|
91
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
92
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
93
|
+
# Lesser General Public License for more details.
|
94
|
+
#
|
95
|
+
# You should have received a copy of the GNU Lesser General Public
|
96
|
+
# License along with this library; if not, write to the Free Software
|
97
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
98
|
+
#
|
99
|
+
#++
|
100
|
+
#
|
101
|
+
|
102
|
+
require 'uri'
|
103
|
+
require 'net/http'
|
104
|
+
|
105
|
+
|
106
|
+
module Bio
|
107
|
+
|
108
|
+
autoload :Fetch, 'bio/io/fetch'
|
109
|
+
autoload :SQL, 'bio/io/sql'
|
110
|
+
autoload :FlatFile, 'bio/io/flatfile'
|
111
|
+
autoload :FlatFileIndex, 'bio/io/flatfile/index'
|
112
|
+
|
113
|
+
class Registry
|
114
|
+
|
115
|
+
def initialize(file = nil)
|
116
|
+
@spec_version = nil
|
117
|
+
@databases = Array.new
|
118
|
+
read_local(file) if file
|
119
|
+
env_path = ENV['OBDA_SEARCH_PATH']
|
120
|
+
if env_path and env_path.size > 0
|
121
|
+
read_env(env_path)
|
122
|
+
else
|
123
|
+
read_local("#{ENV['HOME']}/.bioinformatics/seqdatabase.ini")
|
124
|
+
read_local("/etc/bioinformatics/seqdatabase.ini")
|
125
|
+
if @databases.empty?
|
126
|
+
read_remote("http://www.open-bio.org/registry/seqdatabase.ini")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Version string of the first configulation file
|
132
|
+
attr_reader :spec_version
|
133
|
+
|
134
|
+
# List of databases (Array of Bio::Registry::DB)
|
135
|
+
attr_reader :databases
|
136
|
+
|
137
|
+
# Returns a dababase handle (Bio::SQL, Bio::Fetch etc.) or nil
|
138
|
+
# if not found (case insensitive).
|
139
|
+
# The handles should have get_by_id method.
|
140
|
+
def get_database(dbname)
|
141
|
+
@databases.each do |db|
|
142
|
+
if db.database == dbname.downcase
|
143
|
+
case db.protocol
|
144
|
+
when 'biofetch'
|
145
|
+
return serv_biofetch(db)
|
146
|
+
when 'biosql'
|
147
|
+
return serv_biosql(db)
|
148
|
+
when 'flat', 'index-flat', 'index-berkeleydb'
|
149
|
+
return serv_flat(db)
|
150
|
+
when 'bsane-corba', 'biocorba'
|
151
|
+
raise NotImplementedError
|
152
|
+
when 'xembl'
|
153
|
+
raise NotImplementedError
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
return nil
|
158
|
+
end
|
159
|
+
alias db get_database
|
160
|
+
|
161
|
+
# Returns a Registry::DB object corresponding to the first dbname
|
162
|
+
# entry in the registry records (case insensitive).
|
163
|
+
def query(dbname)
|
164
|
+
@databases.each do |db|
|
165
|
+
return db if db.database == dbname.downcase
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
private
|
170
|
+
|
171
|
+
def read_env(path)
|
172
|
+
path.split('+').each do |elem|
|
173
|
+
if /:/.match(elem)
|
174
|
+
read_remote(elem)
|
175
|
+
else
|
176
|
+
read_local(elem)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def read_local(file)
|
182
|
+
if File.readable?(file)
|
183
|
+
stanza = File.read(file)
|
184
|
+
parse_stanza(stanza)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def read_remote(url)
|
189
|
+
schema, user, host, port, reg, path, = URI.split(url)
|
190
|
+
Net::HTTP.start(host, port) do |http|
|
191
|
+
response, = http.get(path)
|
192
|
+
parse_stanza(response.body)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def parse_stanza(stanza)
|
197
|
+
return unless stanza
|
198
|
+
if stanza[/.*/] =~ /VERSION\s*=\s*(\S+)/
|
199
|
+
@spec_version ||= $1 # for internal use (may differ on each file)
|
200
|
+
stanza[/.*/] = '' # remove VERSION line
|
201
|
+
end
|
202
|
+
stanza.each_line do |line|
|
203
|
+
case line
|
204
|
+
when /^\[(.*)\]/
|
205
|
+
dbname = $1.downcase
|
206
|
+
db = Bio::Registry::DB.new($1)
|
207
|
+
@databases.push(db)
|
208
|
+
when /=/
|
209
|
+
tag, value = line.chomp.split(/\s*=\s*/)
|
210
|
+
@databases.last[tag] = value
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def serv_biofetch(db)
|
216
|
+
serv = Bio::Fetch.new(db.location)
|
217
|
+
serv.database = db.dbname
|
218
|
+
return serv
|
219
|
+
end
|
220
|
+
|
221
|
+
def serv_biosql(db)
|
222
|
+
location, port = db.location.split(':')
|
223
|
+
port = db.port unless port
|
224
|
+
|
225
|
+
case db.driver
|
226
|
+
when /mysql/i
|
227
|
+
driver = 'Mysql'
|
228
|
+
when /pg|postgres/i
|
229
|
+
driver = 'Pg'
|
230
|
+
when /oracle/
|
231
|
+
when /sybase/
|
232
|
+
when /sqlserver/
|
233
|
+
when /access/
|
234
|
+
when /csv/
|
235
|
+
when /informix/
|
236
|
+
when /odbc/
|
237
|
+
when /rdb/
|
238
|
+
end
|
239
|
+
|
240
|
+
dbi = [ "dbi", driver, db.dbname, location ].compact.join(':')
|
241
|
+
dbi += ';port=' + port if port
|
242
|
+
serv = Bio::SQL.new(dbi, db.user, db.pass)
|
243
|
+
|
244
|
+
# We can not manage biodbname (for name space) in BioSQL yet.
|
245
|
+
# use db.biodbname here!!
|
246
|
+
|
247
|
+
return serv
|
248
|
+
end
|
249
|
+
|
250
|
+
def serv_flat(db)
|
251
|
+
path = db.location
|
252
|
+
path = File.join(path, db.dbname) if db.dbname
|
253
|
+
serv = Bio::FlatFileIndex.open(path)
|
254
|
+
return serv
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
class DB
|
259
|
+
|
260
|
+
def initialize(dbname)
|
261
|
+
@database = dbname
|
262
|
+
@property = Hash.new
|
263
|
+
end
|
264
|
+
attr_reader :database
|
265
|
+
|
266
|
+
def method_missing(meth_id)
|
267
|
+
@property[meth_id.id2name]
|
268
|
+
end
|
269
|
+
|
270
|
+
def []=(tag, value)
|
271
|
+
@property[tag] = value
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
end # class Registry
|
277
|
+
|
278
|
+
end # module Bio
|
279
|
+
|
280
|
+
|
281
|
+
|
282
|
+
if __FILE__ == $0
|
283
|
+
begin
|
284
|
+
require 'pp'
|
285
|
+
alias p pp
|
286
|
+
rescue
|
287
|
+
end
|
288
|
+
|
289
|
+
# Usually, you don't need to pass ARGV.
|
290
|
+
reg = Bio::Registry.new(ARGV[0])
|
291
|
+
|
292
|
+
p reg
|
293
|
+
p reg.query('genbank_biosql')
|
294
|
+
|
295
|
+
serv = reg.get_database('genbank_biofetch')
|
296
|
+
puts serv.get_by_id('AA2CG')
|
297
|
+
|
298
|
+
serv = reg.get_database('genbank_biosql')
|
299
|
+
puts serv.get_by_id('AA2CG')
|
300
|
+
|
301
|
+
serv = reg.get_database('swissprot_biofetch')
|
302
|
+
puts serv.get_by_id('CYC_BOVIN')
|
303
|
+
|
304
|
+
serv = reg.get_database('swissprot_biosql')
|
305
|
+
puts serv.get_by_id('CYC_BOVIN')
|
306
|
+
end
|
307
|
+
|
308
|
+
|