bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/io/flatfile/bdb.rb - OBDA flatfile index by Berkley DB
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: bdb.rb,v 1.8 2005/09/26 13:00:08 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
begin
|
|
24
|
+
require 'bdb'
|
|
25
|
+
rescue LoadError,NotImplementedError
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
require 'bio/io/flatfile/index'
|
|
29
|
+
require 'bio/io/flatfile/indexer'
|
|
30
|
+
|
|
31
|
+
module Bio
|
|
32
|
+
class FlatFileIndex
|
|
33
|
+
|
|
34
|
+
module BDBdefault
|
|
35
|
+
def permission
|
|
36
|
+
(0666 & (0777 ^ File.umask))
|
|
37
|
+
end
|
|
38
|
+
module_function :permission
|
|
39
|
+
|
|
40
|
+
def flag_read
|
|
41
|
+
BDB::RDONLY
|
|
42
|
+
end
|
|
43
|
+
module_function :flag_read
|
|
44
|
+
|
|
45
|
+
def flag_write
|
|
46
|
+
(BDB::CREATE | BDB::TRUNCATE)
|
|
47
|
+
end
|
|
48
|
+
module_function :flag_write
|
|
49
|
+
|
|
50
|
+
def flag_append
|
|
51
|
+
'r+'
|
|
52
|
+
end
|
|
53
|
+
module_function :flag_append
|
|
54
|
+
end #module BDBdefault
|
|
55
|
+
|
|
56
|
+
class BDBwrapper
|
|
57
|
+
def initialize(name, filename, *arg)
|
|
58
|
+
@dbname = name
|
|
59
|
+
@file = nil
|
|
60
|
+
@filename = filename
|
|
61
|
+
#self.open(*arg)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def filename
|
|
65
|
+
File.join(@dbname, @filename)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def open(flag = BDBdefault.flag_read,
|
|
69
|
+
permission = BDBdefault.permission)
|
|
70
|
+
unless @file then
|
|
71
|
+
DEBUG.print "BDBwrapper: open #{filename}\n"
|
|
72
|
+
@file = BDB::Btree.open(filename, nil, flag, permission)
|
|
73
|
+
end
|
|
74
|
+
true
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def close
|
|
78
|
+
if @file
|
|
79
|
+
DEBUG.print "BDBwrapper: close #{filename}\n"
|
|
80
|
+
@file.close
|
|
81
|
+
@file = nil
|
|
82
|
+
end
|
|
83
|
+
nil
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def [](arg)
|
|
87
|
+
#self.open
|
|
88
|
+
if @file then
|
|
89
|
+
@file[arg]
|
|
90
|
+
else
|
|
91
|
+
nil
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def []=(key, val)
|
|
96
|
+
#self.open
|
|
97
|
+
@file[key.to_s] = val.to_s
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def writeback_array(prefix, array, *arg)
|
|
101
|
+
self.close
|
|
102
|
+
self.open(*arg)
|
|
103
|
+
array.each_with_index do |val, key|
|
|
104
|
+
@file["#{prefix}#{key}"] = val.to_s
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def keys
|
|
109
|
+
if @file then
|
|
110
|
+
@file.keys
|
|
111
|
+
else
|
|
112
|
+
[]
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end #class BDBwrapper
|
|
116
|
+
|
|
117
|
+
module BDB_1
|
|
118
|
+
class BDBMappingFile
|
|
119
|
+
def self.open(*arg)
|
|
120
|
+
self.new(*arg)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def initialize(filename, flag = BDBdefault.flag_read,
|
|
124
|
+
permission = BDBdefault.permission)
|
|
125
|
+
@filename = filename
|
|
126
|
+
@flag = flag
|
|
127
|
+
@permission = permission
|
|
128
|
+
#@bdb = BDB::Btree.open(@filename, nil, @flag, @permission)
|
|
129
|
+
end
|
|
130
|
+
attr_reader :filename
|
|
131
|
+
attr_accessor :flag, :permission
|
|
132
|
+
|
|
133
|
+
def open
|
|
134
|
+
unless @bdb then
|
|
135
|
+
DEBUG.print "BDBMappingFile: open #{@filename}\n"
|
|
136
|
+
@bdb = BDB::Btree.open(@filename, nil, @flag, @permission)
|
|
137
|
+
true
|
|
138
|
+
else
|
|
139
|
+
nil
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def close
|
|
144
|
+
if @bdb then
|
|
145
|
+
DEBUG.print "BDBMappingFile: close #{@filename}\n"
|
|
146
|
+
@bdb.close
|
|
147
|
+
@bdb = nil
|
|
148
|
+
end
|
|
149
|
+
nil
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def records
|
|
153
|
+
@bdb.size
|
|
154
|
+
end
|
|
155
|
+
alias size records
|
|
156
|
+
|
|
157
|
+
# methods for writing
|
|
158
|
+
def add(key, val)
|
|
159
|
+
open
|
|
160
|
+
val = val.to_a.join("\t")
|
|
161
|
+
s = @bdb[key]
|
|
162
|
+
if s then
|
|
163
|
+
s << "\t"
|
|
164
|
+
s << val
|
|
165
|
+
val = s
|
|
166
|
+
end
|
|
167
|
+
@bdb[key] = val
|
|
168
|
+
#DEBUG.print "add: key=#{key.inspect}, val=#{val.inspect}\n"
|
|
169
|
+
val
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def add_exclusive(key, val)
|
|
173
|
+
open
|
|
174
|
+
val = val.to_a.join("\t")
|
|
175
|
+
s = @bdb[key]
|
|
176
|
+
if s then
|
|
177
|
+
raise RuntimeError, "keys must be unique, but key #{key.inspect} already exists"
|
|
178
|
+
end
|
|
179
|
+
@bdb[key] = val
|
|
180
|
+
#DEBUG.print "add_exclusive: key=#{key.inspect}, val=#{val.inspect}\n"
|
|
181
|
+
val
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def add_overwrite(key, val)
|
|
185
|
+
open
|
|
186
|
+
val = val.to_a.join("\t")
|
|
187
|
+
s = @bdb[key]
|
|
188
|
+
if s then
|
|
189
|
+
DEBUG.print "Warining: overwrote unique id #{key.inspect}\n"
|
|
190
|
+
end
|
|
191
|
+
@bdb[key] = val
|
|
192
|
+
#DEBUG.print "add_overwrite: key=#{key.inspect}, val=#{val.inspect}\n"
|
|
193
|
+
val
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def add_nr(key, val)
|
|
197
|
+
open
|
|
198
|
+
s = @bdb[key]
|
|
199
|
+
if s then
|
|
200
|
+
a = s.split("\t")
|
|
201
|
+
else
|
|
202
|
+
a = []
|
|
203
|
+
end
|
|
204
|
+
a.concat val.to_a
|
|
205
|
+
a.sort!
|
|
206
|
+
a.uniq!
|
|
207
|
+
str = a.join("\t")
|
|
208
|
+
@bdb[key] = str
|
|
209
|
+
#DEBUG.print "add_nr: key=#{key.inspect}, val=#{str.inspect}\n"
|
|
210
|
+
str
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# methods for searching
|
|
214
|
+
def search(key)
|
|
215
|
+
open
|
|
216
|
+
s = @bdb[key]
|
|
217
|
+
if s then
|
|
218
|
+
a = s.split("\t")
|
|
219
|
+
a
|
|
220
|
+
else
|
|
221
|
+
[]
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end #class BDBMappingFile
|
|
225
|
+
|
|
226
|
+
class PrimaryNameSpace < Template::NameSpace
|
|
227
|
+
def mapping(filename)
|
|
228
|
+
BDBMappingFile.new(filename)
|
|
229
|
+
end
|
|
230
|
+
def filename
|
|
231
|
+
File.join(dbname, "key_#{name}")
|
|
232
|
+
end
|
|
233
|
+
def search(key)
|
|
234
|
+
r = super(key)
|
|
235
|
+
unless r.empty? then
|
|
236
|
+
[ r ]
|
|
237
|
+
else
|
|
238
|
+
r
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end #class PrimaryNameSpace
|
|
242
|
+
|
|
243
|
+
class SecondaryNameSpace < Template::NameSpace
|
|
244
|
+
def mapping(filename)
|
|
245
|
+
BDBMappingFile.new(filename)
|
|
246
|
+
end
|
|
247
|
+
def filename
|
|
248
|
+
File.join(dbname, "id_#{name}")
|
|
249
|
+
end #class SecondaryNameSpaces
|
|
250
|
+
|
|
251
|
+
def search(key)
|
|
252
|
+
r = super(key)
|
|
253
|
+
file.close
|
|
254
|
+
r
|
|
255
|
+
end
|
|
256
|
+
end #class SecondaryNameSpace
|
|
257
|
+
end #module BDB_1
|
|
258
|
+
|
|
259
|
+
end #class FlatFileIndex
|
|
260
|
+
end #module Bio
|
|
261
|
+
|
|
262
|
+
=begin
|
|
263
|
+
|
|
264
|
+
* Classes/modules in this file are internal use only.
|
|
265
|
+
|
|
266
|
+
=end
|
|
@@ -0,0 +1,1308 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/io/flatfile/index.rb - OBDA flatfile index
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: index.rb,v 1.15 2005/11/28 05:08:26 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/io/flatfile/indexer'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
class FlatFileIndex
|
|
27
|
+
|
|
28
|
+
autoload :Indexer, 'bio/io/flatfile/indexer'
|
|
29
|
+
autoload :BDBdefault, 'bio/io/flatfile/bdb'
|
|
30
|
+
autoload :BDBwrapper, 'bio/io/flatfile/bdb'
|
|
31
|
+
autoload :BDB_1, 'bio/io/flatfile/bdb'
|
|
32
|
+
|
|
33
|
+
MAGIC_FLAT = 'flat/1'
|
|
34
|
+
MAGIC_BDB = 'BerkeleyDB/1'
|
|
35
|
+
|
|
36
|
+
#########################################################
|
|
37
|
+
def self.open(name)
|
|
38
|
+
if block_given? then
|
|
39
|
+
begin
|
|
40
|
+
i = self.new(name)
|
|
41
|
+
r = yield i
|
|
42
|
+
ensure
|
|
43
|
+
if i then
|
|
44
|
+
begin
|
|
45
|
+
i.close
|
|
46
|
+
rescue IOError
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
else
|
|
51
|
+
r = self.new(name)
|
|
52
|
+
end
|
|
53
|
+
r
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def initialize(name)
|
|
57
|
+
@db = DataBank.open(name)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# common interface defined in registry.rb
|
|
61
|
+
def get_by_id(key)
|
|
62
|
+
search(key).to_s
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# original methods
|
|
66
|
+
def close
|
|
67
|
+
check_closed?
|
|
68
|
+
@db.close
|
|
69
|
+
@db = nil
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def closed?
|
|
73
|
+
if @db then
|
|
74
|
+
false
|
|
75
|
+
else
|
|
76
|
+
true
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def default_namespaces=(names)
|
|
81
|
+
if names then
|
|
82
|
+
@names = []
|
|
83
|
+
names.each { |x| @names.push(x.dup) }
|
|
84
|
+
else
|
|
85
|
+
@names = nil
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def default_namespaces
|
|
90
|
+
@names
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def search(key)
|
|
94
|
+
check_closed?
|
|
95
|
+
if @names then
|
|
96
|
+
@db.search_namespaces(key, *@names)
|
|
97
|
+
else
|
|
98
|
+
@db.search_all(key)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def search_namespaces(key, *names)
|
|
103
|
+
check_closed?
|
|
104
|
+
@db.search_namespaces(key, *names)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def search_primary(key)
|
|
108
|
+
check_closed?
|
|
109
|
+
@db.search_primary(key)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def include?(key)
|
|
113
|
+
check_closed?
|
|
114
|
+
if @names then
|
|
115
|
+
r = @db.search_namespaces_get_unique_id(key, *@names)
|
|
116
|
+
else
|
|
117
|
+
r = @db.search_all_get_unique_id(key)
|
|
118
|
+
end
|
|
119
|
+
if r.empty? then
|
|
120
|
+
nil
|
|
121
|
+
else
|
|
122
|
+
r
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def include_in_namespaces?(key, *names)
|
|
127
|
+
check_closed?
|
|
128
|
+
r = @db.search_namespaces_get_unique_id(key, *names)
|
|
129
|
+
if r.empty? then
|
|
130
|
+
nil
|
|
131
|
+
else
|
|
132
|
+
r
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def include_in_primary?(key)
|
|
137
|
+
check_closed?
|
|
138
|
+
r = @db.search_primary_get_unique_id(key)
|
|
139
|
+
if r.empty? then
|
|
140
|
+
nil
|
|
141
|
+
else
|
|
142
|
+
r
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def namespaces
|
|
147
|
+
check_closed?
|
|
148
|
+
r = secondary_namespaces
|
|
149
|
+
r.unshift primary_namespace
|
|
150
|
+
r
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def primary_namespace
|
|
154
|
+
check_closed?
|
|
155
|
+
@db.primary.name
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def secondary_namespaces
|
|
159
|
+
check_closed?
|
|
160
|
+
@db.secondary.names
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def check_consistency
|
|
164
|
+
check_closed?
|
|
165
|
+
@db.check_consistency
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def always_check_consistency=(bool)
|
|
169
|
+
@db.always_check=(bool)
|
|
170
|
+
end
|
|
171
|
+
def always_check_consistency(bool)
|
|
172
|
+
@db.always_check
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# private methods
|
|
176
|
+
def check_closed?
|
|
177
|
+
@db or raise IOError, 'closed databank'
|
|
178
|
+
end
|
|
179
|
+
private :check_closed?
|
|
180
|
+
|
|
181
|
+
#########################################################
|
|
182
|
+
|
|
183
|
+
class Results < Hash
|
|
184
|
+
|
|
185
|
+
def +(a)
|
|
186
|
+
raise 'argument must be Results class' unless a.is_a?(self.class)
|
|
187
|
+
res = self.dup
|
|
188
|
+
res.update(a)
|
|
189
|
+
res
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def *(a)
|
|
193
|
+
raise 'argument must be Results class' unless a.is_a?(self.class)
|
|
194
|
+
res = self.class.new
|
|
195
|
+
a.each_key { |x| res.store(x, a[x]) if self[x] }
|
|
196
|
+
res
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def to_s
|
|
200
|
+
self.values.join
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
#alias each_orig each
|
|
204
|
+
alias each each_value
|
|
205
|
+
#alias to_a_orig to_a
|
|
206
|
+
alias to_a values
|
|
207
|
+
|
|
208
|
+
end #class Results
|
|
209
|
+
|
|
210
|
+
#########################################################
|
|
211
|
+
|
|
212
|
+
module DEBUG
|
|
213
|
+
@@out = STDERR
|
|
214
|
+
@@flag = nil
|
|
215
|
+
def self.out=(io)
|
|
216
|
+
if io then
|
|
217
|
+
@@out = io
|
|
218
|
+
@@out = STDERR if io == true
|
|
219
|
+
@@flag = true
|
|
220
|
+
else
|
|
221
|
+
@@out = nil
|
|
222
|
+
@@flag = nil
|
|
223
|
+
end
|
|
224
|
+
@@out
|
|
225
|
+
end
|
|
226
|
+
def self.out
|
|
227
|
+
@@out
|
|
228
|
+
end
|
|
229
|
+
def self.print(*arg)
|
|
230
|
+
@@flag = true if $DEBUG or $VERBOSE
|
|
231
|
+
@@out.print(*arg) if @@out and @@flag
|
|
232
|
+
end
|
|
233
|
+
end #module DEBUG
|
|
234
|
+
|
|
235
|
+
#########################################################
|
|
236
|
+
|
|
237
|
+
module Template
|
|
238
|
+
class NameSpace
|
|
239
|
+
def filename
|
|
240
|
+
# should be redifined in child class
|
|
241
|
+
raise NotImplementedError, "should be redefined in child class"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def mapping(filename)
|
|
245
|
+
# should be redifined in child class
|
|
246
|
+
raise NotImplementedError, "should be redefined in child class"
|
|
247
|
+
#Flat_1::FlatMappingFile.new(filename)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def initialize(dbname, name)
|
|
251
|
+
@dbname = dbname
|
|
252
|
+
@name = name.dup
|
|
253
|
+
@name.freeze
|
|
254
|
+
@file = mapping(filename)
|
|
255
|
+
end
|
|
256
|
+
attr_reader :dbname, :name, :file
|
|
257
|
+
|
|
258
|
+
def search(key)
|
|
259
|
+
@file.open
|
|
260
|
+
@file.search(key)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def close
|
|
264
|
+
@file.close
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def include?(key)
|
|
268
|
+
r = search(key)
|
|
269
|
+
unless r.empty? then
|
|
270
|
+
key
|
|
271
|
+
else
|
|
272
|
+
nil
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
end #class NameSpace
|
|
276
|
+
end #module Template
|
|
277
|
+
|
|
278
|
+
class FileID
|
|
279
|
+
def self.new_from_string(str)
|
|
280
|
+
a = str.split("\t", 2)
|
|
281
|
+
a[1] = a[1].to_i if a[1]
|
|
282
|
+
self.new(a[0], a[1])
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def initialize(filename, filesize = nil)
|
|
286
|
+
@filename = filename
|
|
287
|
+
@filesize = filesize
|
|
288
|
+
@io = nil
|
|
289
|
+
end
|
|
290
|
+
attr_reader :filename, :filesize
|
|
291
|
+
|
|
292
|
+
def check
|
|
293
|
+
begin
|
|
294
|
+
fsize = File.size(@filename)
|
|
295
|
+
r = ( fsize == @filesize)
|
|
296
|
+
rescue Errno::ENOENT
|
|
297
|
+
fsize = -1
|
|
298
|
+
r = nil
|
|
299
|
+
end
|
|
300
|
+
DEBUG.print "FileID: File.size(#{@filename.inspect}) = ",
|
|
301
|
+
fsize, (r ? ' == ' : ' != ') , @filesize,
|
|
302
|
+
(r ? '' : ' bad!'), "\n"
|
|
303
|
+
r
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def recalc
|
|
307
|
+
@filesize = File.size(@filename)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def to_s(i = nil)
|
|
311
|
+
if i then
|
|
312
|
+
str = "fileid_#{i}\t"
|
|
313
|
+
else
|
|
314
|
+
str = ''
|
|
315
|
+
end
|
|
316
|
+
str << "#{@filename}\t#{@filesize}"
|
|
317
|
+
str
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def open
|
|
321
|
+
unless @io then
|
|
322
|
+
DEBUG.print "FileID: open #{@filename}\n"
|
|
323
|
+
@io = File.open(@filename, 'rb')
|
|
324
|
+
true
|
|
325
|
+
else
|
|
326
|
+
nil
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def close
|
|
331
|
+
if @io then
|
|
332
|
+
DEBUG.print "FileID: close #{@filename}\n"
|
|
333
|
+
@io.close
|
|
334
|
+
@io = nil
|
|
335
|
+
nil
|
|
336
|
+
else
|
|
337
|
+
true
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def seek(*arg)
|
|
342
|
+
@io.seek(*arg)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def read(size)
|
|
346
|
+
@io.read(size)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def get(pos, length)
|
|
350
|
+
open
|
|
351
|
+
seek(pos, IO::SEEK_SET)
|
|
352
|
+
data = read(length)
|
|
353
|
+
close
|
|
354
|
+
data
|
|
355
|
+
end
|
|
356
|
+
end #class FileID
|
|
357
|
+
|
|
358
|
+
class FileIDs < Array
|
|
359
|
+
def initialize(prefix, hash)
|
|
360
|
+
@hash = hash
|
|
361
|
+
@prefix = prefix
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def [](n)
|
|
365
|
+
r = super(n)
|
|
366
|
+
if r then
|
|
367
|
+
r
|
|
368
|
+
else
|
|
369
|
+
data = @hash["#{@prefix}#{n}"]
|
|
370
|
+
if data then
|
|
371
|
+
self[n] = data
|
|
372
|
+
end
|
|
373
|
+
super(n)
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def []=(n, data)
|
|
378
|
+
if data.is_a?(FileID) then
|
|
379
|
+
super(n, data)
|
|
380
|
+
elsif data then
|
|
381
|
+
super(n, FileID.new_from_string(data))
|
|
382
|
+
else
|
|
383
|
+
# data is nil
|
|
384
|
+
super(n, nil)
|
|
385
|
+
end
|
|
386
|
+
self[n]
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def add(*arg)
|
|
390
|
+
arg.each do |filename|
|
|
391
|
+
self << FileID.new(filename)
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def cache_all
|
|
396
|
+
a = @hash.keys.collect do |k|
|
|
397
|
+
if k =~ /\A#{Regexp.escape(@prefix)}(\d+)/ then
|
|
398
|
+
$1.to_i
|
|
399
|
+
else
|
|
400
|
+
nil
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
a.compact!
|
|
404
|
+
a.each do |i|
|
|
405
|
+
self[i]
|
|
406
|
+
end
|
|
407
|
+
a
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def each
|
|
411
|
+
(0...self.size).each do |i|
|
|
412
|
+
x = self[i]
|
|
413
|
+
yield(x) if x
|
|
414
|
+
end
|
|
415
|
+
self
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def each_with_index
|
|
419
|
+
(0...self.size).each do |i|
|
|
420
|
+
x = self[i]
|
|
421
|
+
yield(x, i) if x
|
|
422
|
+
end
|
|
423
|
+
self
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def keys
|
|
427
|
+
self.cache_all
|
|
428
|
+
a = []
|
|
429
|
+
(0...self.size).each do |i|
|
|
430
|
+
a << i if self[i]
|
|
431
|
+
end
|
|
432
|
+
a
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def filenames
|
|
436
|
+
self.cache_all
|
|
437
|
+
a = []
|
|
438
|
+
self.each do |x|
|
|
439
|
+
a << x.filename
|
|
440
|
+
end
|
|
441
|
+
a
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def check_all
|
|
445
|
+
self.cache_all
|
|
446
|
+
r = true
|
|
447
|
+
self.each do |x|
|
|
448
|
+
r = x.check
|
|
449
|
+
break unless r
|
|
450
|
+
end
|
|
451
|
+
r
|
|
452
|
+
end
|
|
453
|
+
alias check check_all
|
|
454
|
+
|
|
455
|
+
def close_all
|
|
456
|
+
self.each do |x|
|
|
457
|
+
x.close
|
|
458
|
+
end
|
|
459
|
+
nil
|
|
460
|
+
end
|
|
461
|
+
alias close close_all
|
|
462
|
+
|
|
463
|
+
def recalc_all
|
|
464
|
+
self.cache_all
|
|
465
|
+
self.each do |x|
|
|
466
|
+
x.recalc
|
|
467
|
+
end
|
|
468
|
+
true
|
|
469
|
+
end
|
|
470
|
+
alias recalc recalc_all
|
|
471
|
+
|
|
472
|
+
end #class FileIDs
|
|
473
|
+
|
|
474
|
+
module Flat_1
|
|
475
|
+
class Record
|
|
476
|
+
def initialize(str, size = nil)
|
|
477
|
+
a = str.split("\t")
|
|
478
|
+
a.each { |x| x.to_s.gsub!(/[\000 ]+\z/, '') }
|
|
479
|
+
@key = a.shift.to_s
|
|
480
|
+
@val = a
|
|
481
|
+
@size = (size or str.length)
|
|
482
|
+
#DEBUG.print "key=#{@key.inspect},val=#{@val.inspect},size=#{@size}\n"
|
|
483
|
+
end
|
|
484
|
+
attr_reader :key, :val, :size
|
|
485
|
+
|
|
486
|
+
def to_s
|
|
487
|
+
self.class.to_string(@size, @key, @val)
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
def self.to_string(size, key, val)
|
|
491
|
+
sprintf("%-*s", size, key + "\t" + val.join("\t"))
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def self.create(size, key, val)
|
|
495
|
+
self.new(self.to_string(size, key, val))
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def ==(x)
|
|
499
|
+
self.to_s == x.to_s
|
|
500
|
+
end
|
|
501
|
+
end #class Record
|
|
502
|
+
|
|
503
|
+
class FlatMappingFile
|
|
504
|
+
@@recsize_width = 4
|
|
505
|
+
@@recsize_regex = /\A\d{4}\z/
|
|
506
|
+
|
|
507
|
+
def self.open(*arg)
|
|
508
|
+
self.new(*arg)
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def initialize(filename, mode = 'rb')
|
|
512
|
+
@filename = filename
|
|
513
|
+
@mode = mode
|
|
514
|
+
@file = nil
|
|
515
|
+
#@file = File.open(filename, mode)
|
|
516
|
+
@record_size = nil
|
|
517
|
+
@records = nil
|
|
518
|
+
end
|
|
519
|
+
attr_accessor :mode
|
|
520
|
+
attr_reader :filename
|
|
521
|
+
|
|
522
|
+
def open
|
|
523
|
+
unless @file then
|
|
524
|
+
DEBUG.print "FlatMappingFile: open #{@filename}\n"
|
|
525
|
+
@file = File.open(@filename, @mode)
|
|
526
|
+
true
|
|
527
|
+
else
|
|
528
|
+
nil
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def close
|
|
533
|
+
if @file then
|
|
534
|
+
DEBUG.print "FlatMappingFile: close #{@filename}\n"
|
|
535
|
+
@file.close
|
|
536
|
+
@file = nil
|
|
537
|
+
end
|
|
538
|
+
nil
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
def record_size
|
|
542
|
+
unless @record_size then
|
|
543
|
+
open
|
|
544
|
+
@file.seek(0, IO::SEEK_SET)
|
|
545
|
+
s = @file.read(@@recsize_width)
|
|
546
|
+
raise 'strange record size' unless s =~ @@recsize_regex
|
|
547
|
+
@record_size = s.to_i
|
|
548
|
+
DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n"
|
|
549
|
+
end
|
|
550
|
+
@record_size
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
def get_record(i)
|
|
554
|
+
rs = record_size
|
|
555
|
+
seek(i)
|
|
556
|
+
str = @file.read(rs)
|
|
557
|
+
#DEBUG.print "get_record(#{i})=#{str.inspect}\n"
|
|
558
|
+
str
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def seek(i)
|
|
562
|
+
rs = record_size
|
|
563
|
+
@file.seek(@@recsize_width + rs * i)
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
def records
|
|
567
|
+
unless @records then
|
|
568
|
+
rs = record_size
|
|
569
|
+
@records = (@file.stat.size - @@recsize_width) / rs
|
|
570
|
+
DEBUG.print "FlatMappingFile: records: #{@records}\n"
|
|
571
|
+
end
|
|
572
|
+
@records
|
|
573
|
+
end
|
|
574
|
+
alias size records
|
|
575
|
+
|
|
576
|
+
# methods for writing file
|
|
577
|
+
def write_record(str)
|
|
578
|
+
rs = record_size
|
|
579
|
+
rec = sprintf("%-*s", rs, str)[0..rs]
|
|
580
|
+
@file.write(rec)
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
def add_record(str)
|
|
584
|
+
n = records
|
|
585
|
+
rs = record_size
|
|
586
|
+
@file.seek(0, IO::SEEK_END)
|
|
587
|
+
write_record(str)
|
|
588
|
+
@records += 1
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
def put_record(i, str)
|
|
592
|
+
n = records
|
|
593
|
+
rs = record_size
|
|
594
|
+
if i >= n then
|
|
595
|
+
@file.seek(0, IO::SEEK_END)
|
|
596
|
+
@file.write(sprintf("%-*s", rs, '') * (i - n))
|
|
597
|
+
@records = i + 1
|
|
598
|
+
else
|
|
599
|
+
seek(i)
|
|
600
|
+
end
|
|
601
|
+
write_record(str)
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
def init(rs)
|
|
605
|
+
unless 0 < rs and rs < 10 ** @@recsize_width then
|
|
606
|
+
raise 'record size out of range'
|
|
607
|
+
end
|
|
608
|
+
open
|
|
609
|
+
@record_size = rs
|
|
610
|
+
str = sprintf("%0*d", @@recsize_width, rs)
|
|
611
|
+
@file.truncate(0)
|
|
612
|
+
@file.seek(0, IO::SEEK_SET)
|
|
613
|
+
@file.write(str)
|
|
614
|
+
@records = 0
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
# export/import/edit data
|
|
618
|
+
def each
|
|
619
|
+
n = records
|
|
620
|
+
seek(0)
|
|
621
|
+
(0...n).each do |i|
|
|
622
|
+
yield Record.new(get_record(i))
|
|
623
|
+
end
|
|
624
|
+
self
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
def export_tsv(stream)
|
|
628
|
+
self.each do |x|
|
|
629
|
+
stream << "#{x.to_s}\n"
|
|
630
|
+
end
|
|
631
|
+
stream
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
def init_with_sorted_tsv_file(filename, flag_primary = false)
|
|
635
|
+
rec_size = 1
|
|
636
|
+
f = File.open(filename)
|
|
637
|
+
f.each do |y|
|
|
638
|
+
rec_size = y.chomp.length if rec_size < y.chomp.length
|
|
639
|
+
end
|
|
640
|
+
self.init(rec_size)
|
|
641
|
+
|
|
642
|
+
prev = nil
|
|
643
|
+
f.rewind
|
|
644
|
+
if flag_primary then
|
|
645
|
+
f.each do |y|
|
|
646
|
+
x = Record.new(y.chomp, rec_size)
|
|
647
|
+
if prev then
|
|
648
|
+
if x.key == prev.key
|
|
649
|
+
DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n"
|
|
650
|
+
else
|
|
651
|
+
self.add_record(prev.to_s)
|
|
652
|
+
end
|
|
653
|
+
end
|
|
654
|
+
prev = x
|
|
655
|
+
end
|
|
656
|
+
self.add_record(prev.to_s) if prev
|
|
657
|
+
else
|
|
658
|
+
f.each do |y|
|
|
659
|
+
x = Record.new(y.chomp, rec_size)
|
|
660
|
+
self.add_record(x.to_s) if x != prev
|
|
661
|
+
prev = x
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
f.close
|
|
665
|
+
self
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
def self.external_sort_proc(sort_program = '/usr/bin/sort')
|
|
669
|
+
Proc.new do |out, in1, *files|
|
|
670
|
+
system(sort_program, '-o', out, in1, *files)
|
|
671
|
+
end
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
def self.external_merge_sort_proc(sort_program = '/usr/bin/sort')
|
|
675
|
+
Proc.new do |out, in1, *files|
|
|
676
|
+
# (in1 may be sorted)
|
|
677
|
+
tf_all = []
|
|
678
|
+
tfn_all = []
|
|
679
|
+
files.each do |fn|
|
|
680
|
+
tf = Tempfile.open('sort')
|
|
681
|
+
tf.close(false)
|
|
682
|
+
system(sort_program, '-o', tf.path, fn)
|
|
683
|
+
tf_all << tf
|
|
684
|
+
tfn_all << tf.path
|
|
685
|
+
end
|
|
686
|
+
system(sort_program, '-m', '-o', out, in1, *tfn_all)
|
|
687
|
+
tf_all.each do |tf|
|
|
688
|
+
tf.close(true)
|
|
689
|
+
end
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
def self.external_merge_proc(sort_program = '/usr/bin/sort')
|
|
694
|
+
Proc.new do |out, in1, *files|
|
|
695
|
+
# files (and in1) must be sorted
|
|
696
|
+
system(sort_program, '-m', '-o', out, in1, *files)
|
|
697
|
+
end
|
|
698
|
+
end
|
|
699
|
+
|
|
700
|
+
def self.internal_sort_proc
|
|
701
|
+
Proc.new do |out, in1, *files|
|
|
702
|
+
a = IO.readlines(in1)
|
|
703
|
+
files.each do |fn|
|
|
704
|
+
IO.foreach(fn) do |x|
|
|
705
|
+
a << x
|
|
706
|
+
end
|
|
707
|
+
end
|
|
708
|
+
a.sort!
|
|
709
|
+
of = File.open(out, 'w')
|
|
710
|
+
a.each { |x| of << x }
|
|
711
|
+
of.close
|
|
712
|
+
end
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
def import_tsv_files(flag_primary, mode, sort_proc, *files)
|
|
716
|
+
require 'tempfile'
|
|
717
|
+
|
|
718
|
+
tmpfile1 = Tempfile.open('flat')
|
|
719
|
+
self.export_tsv(tmpfile1) unless mode == :new
|
|
720
|
+
tmpfile1.close(false)
|
|
721
|
+
|
|
722
|
+
tmpfile0 = Tempfile.open('sorted')
|
|
723
|
+
tmpfile0.close(false)
|
|
724
|
+
|
|
725
|
+
sort_proc.call(tmpfile0.path, tmpfile1.path, *files)
|
|
726
|
+
|
|
727
|
+
tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+')
|
|
728
|
+
tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary)
|
|
729
|
+
tmpmap.close
|
|
730
|
+
self.close
|
|
731
|
+
|
|
732
|
+
begin
|
|
733
|
+
File.rename(self.filename, self.filename + ".#{$$}.bak~")
|
|
734
|
+
rescue Errno::ENOENT
|
|
735
|
+
end
|
|
736
|
+
File.rename(tmpmap.filename, self.filename)
|
|
737
|
+
begin
|
|
738
|
+
File.delete(self.filename + ".#{$$}.bak~")
|
|
739
|
+
rescue Errno::ENOENT
|
|
740
|
+
end
|
|
741
|
+
|
|
742
|
+
tmpfile0.close(true)
|
|
743
|
+
tmpfile1.close(true)
|
|
744
|
+
self
|
|
745
|
+
end
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
# methods for searching
|
|
749
|
+
def search(key)
|
|
750
|
+
n = records
|
|
751
|
+
return [] if n <= 0
|
|
752
|
+
i = n / 2
|
|
753
|
+
i_prev = nil
|
|
754
|
+
DEBUG.print "binary search starts...\n"
|
|
755
|
+
begin
|
|
756
|
+
rec = Record.new(get_record(i))
|
|
757
|
+
i_prev = i
|
|
758
|
+
if key < rec.key then
|
|
759
|
+
n = i
|
|
760
|
+
i = i / 2
|
|
761
|
+
elsif key > rec.key then
|
|
762
|
+
i = (i + n) / 2
|
|
763
|
+
else # key == rec.key
|
|
764
|
+
result = [ rec.val ]
|
|
765
|
+
j = i - 1
|
|
766
|
+
while j >= 0 and
|
|
767
|
+
(rec = Record.new(get_record(j))).key == key
|
|
768
|
+
result << rec.val
|
|
769
|
+
j = j - 1
|
|
770
|
+
end
|
|
771
|
+
result.reverse!
|
|
772
|
+
j = i + 1
|
|
773
|
+
while j < n and
|
|
774
|
+
(rec = Record.new(get_record(j))).key == key
|
|
775
|
+
result << rec.val
|
|
776
|
+
j = j + 1
|
|
777
|
+
end
|
|
778
|
+
DEBUG.print "#{result.size} hits found!!\n"
|
|
779
|
+
return result
|
|
780
|
+
end
|
|
781
|
+
end until i_prev == i
|
|
782
|
+
DEBUG.print "no hits found\n"
|
|
783
|
+
#nil
|
|
784
|
+
[]
|
|
785
|
+
end
|
|
786
|
+
end #class FlatMappingFile
|
|
787
|
+
|
|
788
|
+
class PrimaryNameSpace < Template::NameSpace
|
|
789
|
+
def mapping(filename)
|
|
790
|
+
FlatMappingFile.new(filename)
|
|
791
|
+
end
|
|
792
|
+
def filename
|
|
793
|
+
File.join(dbname, "key_#{name}.key")
|
|
794
|
+
end
|
|
795
|
+
end #class PrimaryNameSpace
|
|
796
|
+
|
|
797
|
+
class SecondaryNameSpace < Template::NameSpace
|
|
798
|
+
def mapping(filename)
|
|
799
|
+
FlatMappingFile.new(filename)
|
|
800
|
+
end
|
|
801
|
+
def filename
|
|
802
|
+
File.join(dbname, "id_#{name}.index")
|
|
803
|
+
end
|
|
804
|
+
def search(key)
|
|
805
|
+
r = super(key)
|
|
806
|
+
file.close
|
|
807
|
+
r.flatten!
|
|
808
|
+
r
|
|
809
|
+
end
|
|
810
|
+
end #class SecondaryNameSpace
|
|
811
|
+
end #module Flat_1
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
class NameSpaces < Hash
|
|
815
|
+
def initialize(dbname, nsclass, arg)
|
|
816
|
+
@dbname = dbname
|
|
817
|
+
@nsclass = nsclass
|
|
818
|
+
if arg.is_a?(String) then
|
|
819
|
+
a = arg.split("\t")
|
|
820
|
+
else
|
|
821
|
+
a = arg
|
|
822
|
+
end
|
|
823
|
+
a.each do |x|
|
|
824
|
+
self[x] = @nsclass.new(@dbname, x)
|
|
825
|
+
end
|
|
826
|
+
self
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
def each_names
|
|
830
|
+
self.names.each do |x|
|
|
831
|
+
yield x
|
|
832
|
+
end
|
|
833
|
+
end
|
|
834
|
+
|
|
835
|
+
def each_files
|
|
836
|
+
self.values.each do |x|
|
|
837
|
+
yield x
|
|
838
|
+
end
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
def names
|
|
842
|
+
keys
|
|
843
|
+
end
|
|
844
|
+
|
|
845
|
+
def close_all
|
|
846
|
+
values.each { |x| x.file.close }
|
|
847
|
+
end
|
|
848
|
+
alias close close_all
|
|
849
|
+
|
|
850
|
+
def search(key)
|
|
851
|
+
r = []
|
|
852
|
+
values.each do |ns|
|
|
853
|
+
r.concat ns.search(key)
|
|
854
|
+
end
|
|
855
|
+
r.sort!
|
|
856
|
+
r.uniq!
|
|
857
|
+
r
|
|
858
|
+
end
|
|
859
|
+
|
|
860
|
+
def search_names(key, *names)
|
|
861
|
+
r = []
|
|
862
|
+
names.each do |x|
|
|
863
|
+
ns = self[x]
|
|
864
|
+
raise "undefined namespace #{x.inspect}" unless ns
|
|
865
|
+
r.concat ns.search(key)
|
|
866
|
+
end
|
|
867
|
+
r
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
def to_s
|
|
871
|
+
names.join("\t")
|
|
872
|
+
end
|
|
873
|
+
end #class NameSpaces
|
|
874
|
+
|
|
875
|
+
class DataBank
|
|
876
|
+
def self.file2hash(fileobj)
|
|
877
|
+
hash = {}
|
|
878
|
+
fileobj.each do |line|
|
|
879
|
+
line.chomp!
|
|
880
|
+
a = line.split("\t", 2)
|
|
881
|
+
hash[a[0]] = a[1]
|
|
882
|
+
end
|
|
883
|
+
hash
|
|
884
|
+
end
|
|
885
|
+
private_class_method :file2hash
|
|
886
|
+
|
|
887
|
+
def self.filename(dbname)
|
|
888
|
+
File.join(dbname, 'config.dat')
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
def self.read(name, mode = 'rb', *bdbarg)
|
|
892
|
+
f = File.open(filename(name), mode)
|
|
893
|
+
hash = file2hash(f)
|
|
894
|
+
f.close
|
|
895
|
+
db = self.new(name, nil, hash)
|
|
896
|
+
db.bdb_open(*bdbarg)
|
|
897
|
+
db
|
|
898
|
+
end
|
|
899
|
+
|
|
900
|
+
def self.open(*arg)
|
|
901
|
+
self.read(*arg)
|
|
902
|
+
end
|
|
903
|
+
|
|
904
|
+
def initialize(name, idx_type = nil, hash = {})
|
|
905
|
+
@dbname = name.dup
|
|
906
|
+
@dbname.freeze
|
|
907
|
+
@bdb = nil
|
|
908
|
+
|
|
909
|
+
@always_check = true
|
|
910
|
+
self.index_type = (hash['index'] or idx_type)
|
|
911
|
+
|
|
912
|
+
if @bdb then
|
|
913
|
+
@config = BDBwrapper.new(@dbname, 'config')
|
|
914
|
+
@bdb_fileids = BDBwrapper.new(@dbname, 'fileids')
|
|
915
|
+
@nsclass_pri = BDB_1::PrimaryNameSpace
|
|
916
|
+
@nsclass_sec = BDB_1::SecondaryNameSpace
|
|
917
|
+
else
|
|
918
|
+
@config = hash
|
|
919
|
+
@nsclass_pri = Flat_1::PrimaryNameSpace
|
|
920
|
+
@nsclass_sec = Flat_1::SecondaryNameSpace
|
|
921
|
+
end
|
|
922
|
+
true
|
|
923
|
+
end
|
|
924
|
+
|
|
925
|
+
attr_reader :dbname, :index_type
|
|
926
|
+
|
|
927
|
+
def index_type=(str)
|
|
928
|
+
case str
|
|
929
|
+
when MAGIC_BDB
|
|
930
|
+
@index_type = MAGIC_BDB
|
|
931
|
+
@bdb = true
|
|
932
|
+
unless defined?(BDB)
|
|
933
|
+
raise RuntimeError, "Berkeley DB support not found"
|
|
934
|
+
end
|
|
935
|
+
when MAGIC_FLAT, '', nil, false
|
|
936
|
+
@index_type = MAGIC_FLAT
|
|
937
|
+
@bdb = false
|
|
938
|
+
else
|
|
939
|
+
raise 'unknown or unsupported index type'
|
|
940
|
+
end
|
|
941
|
+
end
|
|
942
|
+
|
|
943
|
+
def to_s
|
|
944
|
+
a = ""
|
|
945
|
+
a << "index\t#{@index_type}\n"
|
|
946
|
+
|
|
947
|
+
unless @bdb then
|
|
948
|
+
a << "format\t#{@format}\n"
|
|
949
|
+
@fileids.each_with_index do |x, i|
|
|
950
|
+
a << "#{x.to_s(i)}\n"
|
|
951
|
+
end
|
|
952
|
+
a << "primary_namespace\t#{@primary.name}\n"
|
|
953
|
+
a << "secondary_namespaces\t"
|
|
954
|
+
a << @secondary.names.join("\t")
|
|
955
|
+
a << "\n"
|
|
956
|
+
end
|
|
957
|
+
a
|
|
958
|
+
end
|
|
959
|
+
|
|
960
|
+
def bdb_open(*bdbarg)
|
|
961
|
+
if @bdb then
|
|
962
|
+
@config.close
|
|
963
|
+
@config.open(*bdbarg)
|
|
964
|
+
@bdb_fileids.close
|
|
965
|
+
@bdb_fileids.open(*bdbarg)
|
|
966
|
+
true
|
|
967
|
+
else
|
|
968
|
+
nil
|
|
969
|
+
end
|
|
970
|
+
end
|
|
971
|
+
|
|
972
|
+
def write(mode = 'wb', *bdbarg)
|
|
973
|
+
unless FileTest.directory?(@dbname) then
|
|
974
|
+
Dir.mkdir(@dbname)
|
|
975
|
+
end
|
|
976
|
+
f = File.open(self.class.filename(@dbname), mode)
|
|
977
|
+
f.write self.to_s
|
|
978
|
+
f.close
|
|
979
|
+
|
|
980
|
+
if @bdb then
|
|
981
|
+
bdb_open(*bdbarg)
|
|
982
|
+
@config['format'] = format
|
|
983
|
+
@config['primary_namespace'] = @primary.name
|
|
984
|
+
@config['secondary_namespaces'] = @secondary.names.join("\t")
|
|
985
|
+
@bdb_fileids.writeback_array('', fileids, *bdbarg)
|
|
986
|
+
end
|
|
987
|
+
true
|
|
988
|
+
end
|
|
989
|
+
|
|
990
|
+
def close
|
|
991
|
+
DEBUG.print "DataBank: close #{@dbname}\n"
|
|
992
|
+
primary.close
|
|
993
|
+
secondary.close
|
|
994
|
+
fileids.close
|
|
995
|
+
if @bdb then
|
|
996
|
+
@config.close
|
|
997
|
+
@bdb_fileids.close
|
|
998
|
+
end
|
|
999
|
+
nil
|
|
1000
|
+
end
|
|
1001
|
+
|
|
1002
|
+
##parameters
|
|
1003
|
+
def primary
|
|
1004
|
+
unless @primary then
|
|
1005
|
+
self.primary = @config['primary_namespace']
|
|
1006
|
+
end
|
|
1007
|
+
@primary
|
|
1008
|
+
end
|
|
1009
|
+
|
|
1010
|
+
def primary=(pri_name)
|
|
1011
|
+
if !pri_name or pri_name.empty? then
|
|
1012
|
+
pri_name = 'UNIQUE'
|
|
1013
|
+
end
|
|
1014
|
+
@primary = @nsclass_pri.new(@dbname, pri_name)
|
|
1015
|
+
@primary
|
|
1016
|
+
end
|
|
1017
|
+
|
|
1018
|
+
def secondary
|
|
1019
|
+
unless @secondary then
|
|
1020
|
+
self.secondary = @config['secondary_namespaces']
|
|
1021
|
+
end
|
|
1022
|
+
@secondary
|
|
1023
|
+
end
|
|
1024
|
+
|
|
1025
|
+
def secondary=(sec_names)
|
|
1026
|
+
if !sec_names then
|
|
1027
|
+
sec_names = []
|
|
1028
|
+
end
|
|
1029
|
+
@secondary = NameSpaces.new(@dbname, @nsclass_sec, sec_names)
|
|
1030
|
+
@secondary
|
|
1031
|
+
end
|
|
1032
|
+
|
|
1033
|
+
def format=(str)
|
|
1034
|
+
@format = str.to_s.dup
|
|
1035
|
+
end
|
|
1036
|
+
|
|
1037
|
+
def format
|
|
1038
|
+
unless @format then
|
|
1039
|
+
self.format = @config['format']
|
|
1040
|
+
end
|
|
1041
|
+
@format
|
|
1042
|
+
end
|
|
1043
|
+
|
|
1044
|
+
def fileids
|
|
1045
|
+
unless @fileids then
|
|
1046
|
+
init_fileids
|
|
1047
|
+
end
|
|
1048
|
+
@fileids
|
|
1049
|
+
end
|
|
1050
|
+
|
|
1051
|
+
def init_fileids
|
|
1052
|
+
if @bdb then
|
|
1053
|
+
@fileids = FileIDs.new('', @bdb_fileids)
|
|
1054
|
+
else
|
|
1055
|
+
@fileids = FileIDs.new('fileid_', @config)
|
|
1056
|
+
end
|
|
1057
|
+
@fileids
|
|
1058
|
+
end
|
|
1059
|
+
|
|
1060
|
+
# high level methods
|
|
1061
|
+
def always_check=(bool)
|
|
1062
|
+
if bool then
|
|
1063
|
+
@always_check = true
|
|
1064
|
+
else
|
|
1065
|
+
@always_check = false
|
|
1066
|
+
end
|
|
1067
|
+
end
|
|
1068
|
+
attr_reader :always_check
|
|
1069
|
+
|
|
1070
|
+
def get_flatfile_data(f, pos, length)
|
|
1071
|
+
fi = fileids[f.to_i]
|
|
1072
|
+
if @always_check then
|
|
1073
|
+
raise "flatfile #{fi.filename.inspect} may be modified" unless fi.check
|
|
1074
|
+
end
|
|
1075
|
+
fi.get(pos.to_i, length.to_i)
|
|
1076
|
+
end
|
|
1077
|
+
|
|
1078
|
+
def search_all_get_unique_id(key)
|
|
1079
|
+
s = secondary.search(key)
|
|
1080
|
+
p = primary.include?(key)
|
|
1081
|
+
s.push p if p
|
|
1082
|
+
s.sort!
|
|
1083
|
+
s.uniq!
|
|
1084
|
+
s
|
|
1085
|
+
end
|
|
1086
|
+
|
|
1087
|
+
def search_primary(*arg)
|
|
1088
|
+
r = Results.new
|
|
1089
|
+
arg.each do |x|
|
|
1090
|
+
a = primary.search(x)
|
|
1091
|
+
# a is empty or a.size==1 because primary key must be unique
|
|
1092
|
+
r.store(x, get_flatfile_data(*a[0])) unless a.empty?
|
|
1093
|
+
end
|
|
1094
|
+
r
|
|
1095
|
+
end
|
|
1096
|
+
|
|
1097
|
+
def search_all(key)
|
|
1098
|
+
s = search_all_get_unique_id(key)
|
|
1099
|
+
search_primary(*s)
|
|
1100
|
+
end
|
|
1101
|
+
|
|
1102
|
+
def search_primary_get_unique_id(key)
|
|
1103
|
+
s = []
|
|
1104
|
+
p = primary.include?(key)
|
|
1105
|
+
s.push p if p
|
|
1106
|
+
s
|
|
1107
|
+
end
|
|
1108
|
+
|
|
1109
|
+
def search_namespaces_get_unique_id(key, *names)
|
|
1110
|
+
if names.include?(primary.name) then
|
|
1111
|
+
n2 = names.dup
|
|
1112
|
+
n2.delete(primary.name)
|
|
1113
|
+
p = primary.include?(key)
|
|
1114
|
+
else
|
|
1115
|
+
n2 = names
|
|
1116
|
+
p = nil
|
|
1117
|
+
end
|
|
1118
|
+
s = secondary.search_names(key, *n2)
|
|
1119
|
+
s.push p if p
|
|
1120
|
+
s.sort!
|
|
1121
|
+
s.uniq!
|
|
1122
|
+
s
|
|
1123
|
+
end
|
|
1124
|
+
|
|
1125
|
+
def search_namespaces(key, *names)
|
|
1126
|
+
s = search_namespaces_get_unique_id(key, *names)
|
|
1127
|
+
search_primary(*s)
|
|
1128
|
+
end
|
|
1129
|
+
|
|
1130
|
+
def check_consistency
|
|
1131
|
+
fileids.check_all
|
|
1132
|
+
end
|
|
1133
|
+
end #class DataBank
|
|
1134
|
+
|
|
1135
|
+
end #class FlatFileIndex
|
|
1136
|
+
end #module Bio
|
|
1137
|
+
|
|
1138
|
+
######################################################################
|
|
1139
|
+
|
|
1140
|
+
=begin
|
|
1141
|
+
|
|
1142
|
+
= Bio::FlatFileIndex
|
|
1143
|
+
|
|
1144
|
+
--- Bio::FlatFileIndex.new(dbname)
|
|
1145
|
+
--- Bio::FlatFileIndex.open(dbname)
|
|
1146
|
+
|
|
1147
|
+
Opens existing databank. Databank is a directory which contains
|
|
1148
|
+
indexed files and configuration files. The type of the databank
|
|
1149
|
+
(flat or BerkeleyDB) are determined automatically.
|
|
1150
|
+
|
|
1151
|
+
--- Bio::FlatFileIndex#close
|
|
1152
|
+
|
|
1153
|
+
Closes opened databank.
|
|
1154
|
+
|
|
1155
|
+
--- Bio::FlatFileIndex#closed?
|
|
1156
|
+
|
|
1157
|
+
Returns true if already closed. Otherwise, returns false.
|
|
1158
|
+
|
|
1159
|
+
--- Bio::FlatFileIndex#get_by_id(key)
|
|
1160
|
+
|
|
1161
|
+
Common interface defined in registry.rb.
|
|
1162
|
+
Searching databank and returns entry (or entries) as a string.
|
|
1163
|
+
Multiple entries (contatinated to one string) may be returned.
|
|
1164
|
+
Returns empty string If not found.
|
|
1165
|
+
|
|
1166
|
+
--- Bio::FlatFileIndex#search(key)
|
|
1167
|
+
|
|
1168
|
+
Searching databank and returns a Bio::FlatFileIndex::Results object.
|
|
1169
|
+
|
|
1170
|
+
--- Bio::FlatFileIndex#include?(key)
|
|
1171
|
+
|
|
1172
|
+
Searching databank.
|
|
1173
|
+
If found, returns an array of unique IDs (primary identifiers).
|
|
1174
|
+
If not found, returns nil.
|
|
1175
|
+
|
|
1176
|
+
--- Bio::FlatFileIndex#search_primary(key)
|
|
1177
|
+
|
|
1178
|
+
Searching only primary namespece.
|
|
1179
|
+
Returns a Bio::FlatFileIndex::Results object.
|
|
1180
|
+
|
|
1181
|
+
--- Bio::FlatFileIndex#search_namespaces(key, name1, name2, ...)
|
|
1182
|
+
|
|
1183
|
+
Searching only specific namespeces.
|
|
1184
|
+
Returns a Bio::FlatFileIndex::Results object.
|
|
1185
|
+
|
|
1186
|
+
--- Bio::FlatFileIndex#include_in_primary?(key)
|
|
1187
|
+
|
|
1188
|
+
Same as #include?, but serching only primary namespace.
|
|
1189
|
+
|
|
1190
|
+
--- Bio::FlatFileIndex#include_in_namespaces?(key, name1, name2, ...)
|
|
1191
|
+
|
|
1192
|
+
Same as #include?, but serching only specific namespaces.
|
|
1193
|
+
|
|
1194
|
+
--- Bio::FlatFileIndex#namespaces
|
|
1195
|
+
|
|
1196
|
+
Returns names of namespaces defined in the databank.
|
|
1197
|
+
(example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
|
|
1198
|
+
|
|
1199
|
+
--- Bio::FlatFileIndex#primary_namespace
|
|
1200
|
+
|
|
1201
|
+
Returns name of primary namespace.
|
|
1202
|
+
|
|
1203
|
+
--- Bio::FlatFileIndex#secondary_namespaces
|
|
1204
|
+
|
|
1205
|
+
Returns names of secondary namespaces.
|
|
1206
|
+
|
|
1207
|
+
--- Bio::FlatFileIndex#default_namespaces= [ str1, str2, ... ]
|
|
1208
|
+
--- Bio::FlatFileIndex#default_namespaces= nil
|
|
1209
|
+
|
|
1210
|
+
Set default namespaces.
|
|
1211
|
+
nil means all namespaces in the databank.
|
|
1212
|
+
Default namespaces specified in this method only affect
|
|
1213
|
+
#get_by_id, #search, and #include? methods.
|
|
1214
|
+
Default of default namespaces is nil (that is, all namespaces
|
|
1215
|
+
are search destinations by default).
|
|
1216
|
+
|
|
1217
|
+
--- Bio::FlatFileIndex#default_namespaces
|
|
1218
|
+
|
|
1219
|
+
Returns default namespaces.
|
|
1220
|
+
nil means all namespaces.
|
|
1221
|
+
|
|
1222
|
+
--- Bio::FlatFileIndex#check_consistency
|
|
1223
|
+
|
|
1224
|
+
Raise RuntimeError if flatfiles are changed after creating
|
|
1225
|
+
the databank. (This check only compare file sizes as
|
|
1226
|
+
described in the OBDA specification.)
|
|
1227
|
+
|
|
1228
|
+
--- Bio::FlatFileIndex#always_check_consistency=(bool)
|
|
1229
|
+
--- Bio::FlatFileIndex#always_check_consistency
|
|
1230
|
+
|
|
1231
|
+
If true, consistency checks are performed every time
|
|
1232
|
+
accessing flatfiles. If nil/false, no checks are performed.
|
|
1233
|
+
Default of always_check_consistency is true.
|
|
1234
|
+
|
|
1235
|
+
== Bio::FlatFileIndex::Results
|
|
1236
|
+
|
|
1237
|
+
This object is made by Bio::FlatFileIndex methods.
|
|
1238
|
+
Currently, this class inherits Hash, but internal
|
|
1239
|
+
structure of this class may be changed anytime.
|
|
1240
|
+
Only using methods described below are strongly recomended.
|
|
1241
|
+
|
|
1242
|
+
--- Bio::FlatFileIndex::Results#to_a
|
|
1243
|
+
|
|
1244
|
+
Returns an array of strings.
|
|
1245
|
+
If no search results are exist, returns an empty array.
|
|
1246
|
+
|
|
1247
|
+
--- Bio::FlatFileIndex::Results#each
|
|
1248
|
+
|
|
1249
|
+
Iterates over each result(string).
|
|
1250
|
+
Same as to_a.each.
|
|
1251
|
+
|
|
1252
|
+
--- Bio::FlatFileIndex::Results#to_s
|
|
1253
|
+
|
|
1254
|
+
Returns a string. (concatinated if multiple results exists).
|
|
1255
|
+
Same as to_a.join('').
|
|
1256
|
+
|
|
1257
|
+
--- Bio::FlatFileIndex::Results#size
|
|
1258
|
+
|
|
1259
|
+
Returns number of results.
|
|
1260
|
+
Same as to_a.size.
|
|
1261
|
+
|
|
1262
|
+
--- Bio::FlatFileIndex::Results#+(res)
|
|
1263
|
+
|
|
1264
|
+
Add search results.
|
|
1265
|
+
"a + b" means "a OR b".
|
|
1266
|
+
* Example
|
|
1267
|
+
# I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
|
|
1268
|
+
db = Bio::FlatFIleIndex.new(location)
|
|
1269
|
+
a1 = db.search('ADH_IRON_1')
|
|
1270
|
+
a2 = db.search('ADH_IRON_2')
|
|
1271
|
+
# a1 and a2 are Bio::FlatFileIndex::Results objects.
|
|
1272
|
+
print a1 + a2
|
|
1273
|
+
|
|
1274
|
+
--- Bio::FlatFileIndex::Results#*(res)
|
|
1275
|
+
|
|
1276
|
+
Returns set intersection of results.
|
|
1277
|
+
"a * b" means "a AND b".
|
|
1278
|
+
* Example
|
|
1279
|
+
# I want to search 'HIS_KIN' AND 'human'
|
|
1280
|
+
db = Bio::FlatFIleIndex.new(location)
|
|
1281
|
+
hk = db.search('HIS_KIN')
|
|
1282
|
+
hu = db.search('human')
|
|
1283
|
+
# hk and hu are Bio::FlatFileIndex::Results objects.
|
|
1284
|
+
print hk * hu
|
|
1285
|
+
|
|
1286
|
+
== Bio::FlatFileIndex::DEBUG
|
|
1287
|
+
|
|
1288
|
+
Module for output debug messages.
|
|
1289
|
+
Default setting: If $DEBUG or $VERBOSE is true, output debug
|
|
1290
|
+
messages to STDERR; Otherwise, don't output messages.
|
|
1291
|
+
|
|
1292
|
+
--- Bio::FlatFileIndex::DEBUG.out=(io)
|
|
1293
|
+
|
|
1294
|
+
Set debug messages output destination.
|
|
1295
|
+
If true is given, outputs to STDERR.
|
|
1296
|
+
If nil is given, outputs nothing.
|
|
1297
|
+
This method affects ALL of FlatFileIndex related objects/methods.
|
|
1298
|
+
|
|
1299
|
+
== Other classes/modules
|
|
1300
|
+
|
|
1301
|
+
Classes/modules not described in this file are internal use only.
|
|
1302
|
+
|
|
1303
|
+
== SEE ALSO
|
|
1304
|
+
|
|
1305
|
+
* ((<URL:http://obda.open-bio.org/>))
|
|
1306
|
+
* ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
|
1307
|
+
|
|
1308
|
+
=end
|