bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,266 @@
1
+ #
2
+ # bio/io/flatfile/bdb.rb - OBDA flatfile index by Berkley DB
3
+ #
4
+ # Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: bdb.rb,v 1.8 2005/09/26 13:00:08 k Exp $
21
+ #
22
+
23
+ begin
24
+ require 'bdb'
25
+ rescue LoadError,NotImplementedError
26
+ end
27
+
28
+ require 'bio/io/flatfile/index'
29
+ require 'bio/io/flatfile/indexer'
30
+
31
+ module Bio
32
+ class FlatFileIndex
33
+
34
+ module BDBdefault
35
+ def permission
36
+ (0666 & (0777 ^ File.umask))
37
+ end
38
+ module_function :permission
39
+
40
+ def flag_read
41
+ BDB::RDONLY
42
+ end
43
+ module_function :flag_read
44
+
45
+ def flag_write
46
+ (BDB::CREATE | BDB::TRUNCATE)
47
+ end
48
+ module_function :flag_write
49
+
50
+ def flag_append
51
+ 'r+'
52
+ end
53
+ module_function :flag_append
54
+ end #module BDBdefault
55
+
56
+ class BDBwrapper
57
+ def initialize(name, filename, *arg)
58
+ @dbname = name
59
+ @file = nil
60
+ @filename = filename
61
+ #self.open(*arg)
62
+ end
63
+
64
+ def filename
65
+ File.join(@dbname, @filename)
66
+ end
67
+
68
+ def open(flag = BDBdefault.flag_read,
69
+ permission = BDBdefault.permission)
70
+ unless @file then
71
+ DEBUG.print "BDBwrapper: open #{filename}\n"
72
+ @file = BDB::Btree.open(filename, nil, flag, permission)
73
+ end
74
+ true
75
+ end
76
+
77
+ def close
78
+ if @file
79
+ DEBUG.print "BDBwrapper: close #{filename}\n"
80
+ @file.close
81
+ @file = nil
82
+ end
83
+ nil
84
+ end
85
+
86
+ def [](arg)
87
+ #self.open
88
+ if @file then
89
+ @file[arg]
90
+ else
91
+ nil
92
+ end
93
+ end
94
+
95
+ def []=(key, val)
96
+ #self.open
97
+ @file[key.to_s] = val.to_s
98
+ end
99
+
100
+ def writeback_array(prefix, array, *arg)
101
+ self.close
102
+ self.open(*arg)
103
+ array.each_with_index do |val, key|
104
+ @file["#{prefix}#{key}"] = val.to_s
105
+ end
106
+ end
107
+
108
+ def keys
109
+ if @file then
110
+ @file.keys
111
+ else
112
+ []
113
+ end
114
+ end
115
+ end #class BDBwrapper
116
+
117
+ module BDB_1
118
+ class BDBMappingFile
119
+ def self.open(*arg)
120
+ self.new(*arg)
121
+ end
122
+
123
+ def initialize(filename, flag = BDBdefault.flag_read,
124
+ permission = BDBdefault.permission)
125
+ @filename = filename
126
+ @flag = flag
127
+ @permission = permission
128
+ #@bdb = BDB::Btree.open(@filename, nil, @flag, @permission)
129
+ end
130
+ attr_reader :filename
131
+ attr_accessor :flag, :permission
132
+
133
+ def open
134
+ unless @bdb then
135
+ DEBUG.print "BDBMappingFile: open #{@filename}\n"
136
+ @bdb = BDB::Btree.open(@filename, nil, @flag, @permission)
137
+ true
138
+ else
139
+ nil
140
+ end
141
+ end
142
+
143
+ def close
144
+ if @bdb then
145
+ DEBUG.print "BDBMappingFile: close #{@filename}\n"
146
+ @bdb.close
147
+ @bdb = nil
148
+ end
149
+ nil
150
+ end
151
+
152
+ def records
153
+ @bdb.size
154
+ end
155
+ alias size records
156
+
157
+ # methods for writing
158
+ def add(key, val)
159
+ open
160
+ val = val.to_a.join("\t")
161
+ s = @bdb[key]
162
+ if s then
163
+ s << "\t"
164
+ s << val
165
+ val = s
166
+ end
167
+ @bdb[key] = val
168
+ #DEBUG.print "add: key=#{key.inspect}, val=#{val.inspect}\n"
169
+ val
170
+ end
171
+
172
+ def add_exclusive(key, val)
173
+ open
174
+ val = val.to_a.join("\t")
175
+ s = @bdb[key]
176
+ if s then
177
+ raise RuntimeError, "keys must be unique, but key #{key.inspect} already exists"
178
+ end
179
+ @bdb[key] = val
180
+ #DEBUG.print "add_exclusive: key=#{key.inspect}, val=#{val.inspect}\n"
181
+ val
182
+ end
183
+
184
+ def add_overwrite(key, val)
185
+ open
186
+ val = val.to_a.join("\t")
187
+ s = @bdb[key]
188
+ if s then
189
+ DEBUG.print "Warining: overwrote unique id #{key.inspect}\n"
190
+ end
191
+ @bdb[key] = val
192
+ #DEBUG.print "add_overwrite: key=#{key.inspect}, val=#{val.inspect}\n"
193
+ val
194
+ end
195
+
196
+ def add_nr(key, val)
197
+ open
198
+ s = @bdb[key]
199
+ if s then
200
+ a = s.split("\t")
201
+ else
202
+ a = []
203
+ end
204
+ a.concat val.to_a
205
+ a.sort!
206
+ a.uniq!
207
+ str = a.join("\t")
208
+ @bdb[key] = str
209
+ #DEBUG.print "add_nr: key=#{key.inspect}, val=#{str.inspect}\n"
210
+ str
211
+ end
212
+
213
+ # methods for searching
214
+ def search(key)
215
+ open
216
+ s = @bdb[key]
217
+ if s then
218
+ a = s.split("\t")
219
+ a
220
+ else
221
+ []
222
+ end
223
+ end
224
+ end #class BDBMappingFile
225
+
226
+ class PrimaryNameSpace < Template::NameSpace
227
+ def mapping(filename)
228
+ BDBMappingFile.new(filename)
229
+ end
230
+ def filename
231
+ File.join(dbname, "key_#{name}")
232
+ end
233
+ def search(key)
234
+ r = super(key)
235
+ unless r.empty? then
236
+ [ r ]
237
+ else
238
+ r
239
+ end
240
+ end
241
+ end #class PrimaryNameSpace
242
+
243
+ class SecondaryNameSpace < Template::NameSpace
244
+ def mapping(filename)
245
+ BDBMappingFile.new(filename)
246
+ end
247
+ def filename
248
+ File.join(dbname, "id_#{name}")
249
+ end #class SecondaryNameSpaces
250
+
251
+ def search(key)
252
+ r = super(key)
253
+ file.close
254
+ r
255
+ end
256
+ end #class SecondaryNameSpace
257
+ end #module BDB_1
258
+
259
+ end #class FlatFileIndex
260
+ end #module Bio
261
+
262
+ =begin
263
+
264
+ * Classes/modules in this file are internal use only.
265
+
266
+ =end
@@ -0,0 +1,1308 @@
1
+ #
2
+ # bio/io/flatfile/index.rb - OBDA flatfile index
3
+ #
4
+ # Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: index.rb,v 1.15 2005/11/28 05:08:26 k Exp $
21
+ #
22
+
23
+ require 'bio/io/flatfile/indexer'
24
+
25
+ module Bio
26
+ class FlatFileIndex
27
+
28
+ autoload :Indexer, 'bio/io/flatfile/indexer'
29
+ autoload :BDBdefault, 'bio/io/flatfile/bdb'
30
+ autoload :BDBwrapper, 'bio/io/flatfile/bdb'
31
+ autoload :BDB_1, 'bio/io/flatfile/bdb'
32
+
33
+ MAGIC_FLAT = 'flat/1'
34
+ MAGIC_BDB = 'BerkeleyDB/1'
35
+
36
+ #########################################################
37
+ def self.open(name)
38
+ if block_given? then
39
+ begin
40
+ i = self.new(name)
41
+ r = yield i
42
+ ensure
43
+ if i then
44
+ begin
45
+ i.close
46
+ rescue IOError
47
+ end
48
+ end
49
+ end
50
+ else
51
+ r = self.new(name)
52
+ end
53
+ r
54
+ end
55
+
56
+ def initialize(name)
57
+ @db = DataBank.open(name)
58
+ end
59
+
60
+ # common interface defined in registry.rb
61
+ def get_by_id(key)
62
+ search(key).to_s
63
+ end
64
+
65
+ # original methods
66
+ def close
67
+ check_closed?
68
+ @db.close
69
+ @db = nil
70
+ end
71
+
72
+ def closed?
73
+ if @db then
74
+ false
75
+ else
76
+ true
77
+ end
78
+ end
79
+
80
+ def default_namespaces=(names)
81
+ if names then
82
+ @names = []
83
+ names.each { |x| @names.push(x.dup) }
84
+ else
85
+ @names = nil
86
+ end
87
+ end
88
+
89
+ def default_namespaces
90
+ @names
91
+ end
92
+
93
+ def search(key)
94
+ check_closed?
95
+ if @names then
96
+ @db.search_namespaces(key, *@names)
97
+ else
98
+ @db.search_all(key)
99
+ end
100
+ end
101
+
102
+ def search_namespaces(key, *names)
103
+ check_closed?
104
+ @db.search_namespaces(key, *names)
105
+ end
106
+
107
+ def search_primary(key)
108
+ check_closed?
109
+ @db.search_primary(key)
110
+ end
111
+
112
+ def include?(key)
113
+ check_closed?
114
+ if @names then
115
+ r = @db.search_namespaces_get_unique_id(key, *@names)
116
+ else
117
+ r = @db.search_all_get_unique_id(key)
118
+ end
119
+ if r.empty? then
120
+ nil
121
+ else
122
+ r
123
+ end
124
+ end
125
+
126
+ def include_in_namespaces?(key, *names)
127
+ check_closed?
128
+ r = @db.search_namespaces_get_unique_id(key, *names)
129
+ if r.empty? then
130
+ nil
131
+ else
132
+ r
133
+ end
134
+ end
135
+
136
+ def include_in_primary?(key)
137
+ check_closed?
138
+ r = @db.search_primary_get_unique_id(key)
139
+ if r.empty? then
140
+ nil
141
+ else
142
+ r
143
+ end
144
+ end
145
+
146
+ def namespaces
147
+ check_closed?
148
+ r = secondary_namespaces
149
+ r.unshift primary_namespace
150
+ r
151
+ end
152
+
153
+ def primary_namespace
154
+ check_closed?
155
+ @db.primary.name
156
+ end
157
+
158
+ def secondary_namespaces
159
+ check_closed?
160
+ @db.secondary.names
161
+ end
162
+
163
+ def check_consistency
164
+ check_closed?
165
+ @db.check_consistency
166
+ end
167
+
168
+ def always_check_consistency=(bool)
169
+ @db.always_check=(bool)
170
+ end
171
+ def always_check_consistency(bool)
172
+ @db.always_check
173
+ end
174
+
175
+ # private methods
176
+ def check_closed?
177
+ @db or raise IOError, 'closed databank'
178
+ end
179
+ private :check_closed?
180
+
181
+ #########################################################
182
+
183
+ class Results < Hash
184
+
185
+ def +(a)
186
+ raise 'argument must be Results class' unless a.is_a?(self.class)
187
+ res = self.dup
188
+ res.update(a)
189
+ res
190
+ end
191
+
192
+ def *(a)
193
+ raise 'argument must be Results class' unless a.is_a?(self.class)
194
+ res = self.class.new
195
+ a.each_key { |x| res.store(x, a[x]) if self[x] }
196
+ res
197
+ end
198
+
199
+ def to_s
200
+ self.values.join
201
+ end
202
+
203
+ #alias each_orig each
204
+ alias each each_value
205
+ #alias to_a_orig to_a
206
+ alias to_a values
207
+
208
+ end #class Results
209
+
210
+ #########################################################
211
+
212
+ module DEBUG
213
+ @@out = STDERR
214
+ @@flag = nil
215
+ def self.out=(io)
216
+ if io then
217
+ @@out = io
218
+ @@out = STDERR if io == true
219
+ @@flag = true
220
+ else
221
+ @@out = nil
222
+ @@flag = nil
223
+ end
224
+ @@out
225
+ end
226
+ def self.out
227
+ @@out
228
+ end
229
+ def self.print(*arg)
230
+ @@flag = true if $DEBUG or $VERBOSE
231
+ @@out.print(*arg) if @@out and @@flag
232
+ end
233
+ end #module DEBUG
234
+
235
+ #########################################################
236
+
237
+ module Template
238
+ class NameSpace
239
+ def filename
240
+ # should be redifined in child class
241
+ raise NotImplementedError, "should be redefined in child class"
242
+ end
243
+
244
+ def mapping(filename)
245
+ # should be redifined in child class
246
+ raise NotImplementedError, "should be redefined in child class"
247
+ #Flat_1::FlatMappingFile.new(filename)
248
+ end
249
+
250
+ def initialize(dbname, name)
251
+ @dbname = dbname
252
+ @name = name.dup
253
+ @name.freeze
254
+ @file = mapping(filename)
255
+ end
256
+ attr_reader :dbname, :name, :file
257
+
258
+ def search(key)
259
+ @file.open
260
+ @file.search(key)
261
+ end
262
+
263
+ def close
264
+ @file.close
265
+ end
266
+
267
+ def include?(key)
268
+ r = search(key)
269
+ unless r.empty? then
270
+ key
271
+ else
272
+ nil
273
+ end
274
+ end
275
+ end #class NameSpace
276
+ end #module Template
277
+
278
+ class FileID
279
+ def self.new_from_string(str)
280
+ a = str.split("\t", 2)
281
+ a[1] = a[1].to_i if a[1]
282
+ self.new(a[0], a[1])
283
+ end
284
+
285
+ def initialize(filename, filesize = nil)
286
+ @filename = filename
287
+ @filesize = filesize
288
+ @io = nil
289
+ end
290
+ attr_reader :filename, :filesize
291
+
292
+ def check
293
+ begin
294
+ fsize = File.size(@filename)
295
+ r = ( fsize == @filesize)
296
+ rescue Errno::ENOENT
297
+ fsize = -1
298
+ r = nil
299
+ end
300
+ DEBUG.print "FileID: File.size(#{@filename.inspect}) = ",
301
+ fsize, (r ? ' == ' : ' != ') , @filesize,
302
+ (r ? '' : ' bad!'), "\n"
303
+ r
304
+ end
305
+
306
+ def recalc
307
+ @filesize = File.size(@filename)
308
+ end
309
+
310
+ def to_s(i = nil)
311
+ if i then
312
+ str = "fileid_#{i}\t"
313
+ else
314
+ str = ''
315
+ end
316
+ str << "#{@filename}\t#{@filesize}"
317
+ str
318
+ end
319
+
320
+ def open
321
+ unless @io then
322
+ DEBUG.print "FileID: open #{@filename}\n"
323
+ @io = File.open(@filename, 'rb')
324
+ true
325
+ else
326
+ nil
327
+ end
328
+ end
329
+
330
+ def close
331
+ if @io then
332
+ DEBUG.print "FileID: close #{@filename}\n"
333
+ @io.close
334
+ @io = nil
335
+ nil
336
+ else
337
+ true
338
+ end
339
+ end
340
+
341
+ def seek(*arg)
342
+ @io.seek(*arg)
343
+ end
344
+
345
+ def read(size)
346
+ @io.read(size)
347
+ end
348
+
349
+ def get(pos, length)
350
+ open
351
+ seek(pos, IO::SEEK_SET)
352
+ data = read(length)
353
+ close
354
+ data
355
+ end
356
+ end #class FileID
357
+
358
+ class FileIDs < Array
359
+ def initialize(prefix, hash)
360
+ @hash = hash
361
+ @prefix = prefix
362
+ end
363
+
364
+ def [](n)
365
+ r = super(n)
366
+ if r then
367
+ r
368
+ else
369
+ data = @hash["#{@prefix}#{n}"]
370
+ if data then
371
+ self[n] = data
372
+ end
373
+ super(n)
374
+ end
375
+ end
376
+
377
+ def []=(n, data)
378
+ if data.is_a?(FileID) then
379
+ super(n, data)
380
+ elsif data then
381
+ super(n, FileID.new_from_string(data))
382
+ else
383
+ # data is nil
384
+ super(n, nil)
385
+ end
386
+ self[n]
387
+ end
388
+
389
+ def add(*arg)
390
+ arg.each do |filename|
391
+ self << FileID.new(filename)
392
+ end
393
+ end
394
+
395
+ def cache_all
396
+ a = @hash.keys.collect do |k|
397
+ if k =~ /\A#{Regexp.escape(@prefix)}(\d+)/ then
398
+ $1.to_i
399
+ else
400
+ nil
401
+ end
402
+ end
403
+ a.compact!
404
+ a.each do |i|
405
+ self[i]
406
+ end
407
+ a
408
+ end
409
+
410
+ def each
411
+ (0...self.size).each do |i|
412
+ x = self[i]
413
+ yield(x) if x
414
+ end
415
+ self
416
+ end
417
+
418
+ def each_with_index
419
+ (0...self.size).each do |i|
420
+ x = self[i]
421
+ yield(x, i) if x
422
+ end
423
+ self
424
+ end
425
+
426
+ def keys
427
+ self.cache_all
428
+ a = []
429
+ (0...self.size).each do |i|
430
+ a << i if self[i]
431
+ end
432
+ a
433
+ end
434
+
435
+ def filenames
436
+ self.cache_all
437
+ a = []
438
+ self.each do |x|
439
+ a << x.filename
440
+ end
441
+ a
442
+ end
443
+
444
+ def check_all
445
+ self.cache_all
446
+ r = true
447
+ self.each do |x|
448
+ r = x.check
449
+ break unless r
450
+ end
451
+ r
452
+ end
453
+ alias check check_all
454
+
455
+ def close_all
456
+ self.each do |x|
457
+ x.close
458
+ end
459
+ nil
460
+ end
461
+ alias close close_all
462
+
463
+ def recalc_all
464
+ self.cache_all
465
+ self.each do |x|
466
+ x.recalc
467
+ end
468
+ true
469
+ end
470
+ alias recalc recalc_all
471
+
472
+ end #class FileIDs
473
+
474
+ module Flat_1
475
+ class Record
476
+ def initialize(str, size = nil)
477
+ a = str.split("\t")
478
+ a.each { |x| x.to_s.gsub!(/[\000 ]+\z/, '') }
479
+ @key = a.shift.to_s
480
+ @val = a
481
+ @size = (size or str.length)
482
+ #DEBUG.print "key=#{@key.inspect},val=#{@val.inspect},size=#{@size}\n"
483
+ end
484
+ attr_reader :key, :val, :size
485
+
486
+ def to_s
487
+ self.class.to_string(@size, @key, @val)
488
+ end
489
+
490
+ def self.to_string(size, key, val)
491
+ sprintf("%-*s", size, key + "\t" + val.join("\t"))
492
+ end
493
+
494
+ def self.create(size, key, val)
495
+ self.new(self.to_string(size, key, val))
496
+ end
497
+
498
+ def ==(x)
499
+ self.to_s == x.to_s
500
+ end
501
+ end #class Record
502
+
503
+ class FlatMappingFile
504
+ @@recsize_width = 4
505
+ @@recsize_regex = /\A\d{4}\z/
506
+
507
+ def self.open(*arg)
508
+ self.new(*arg)
509
+ end
510
+
511
+ def initialize(filename, mode = 'rb')
512
+ @filename = filename
513
+ @mode = mode
514
+ @file = nil
515
+ #@file = File.open(filename, mode)
516
+ @record_size = nil
517
+ @records = nil
518
+ end
519
+ attr_accessor :mode
520
+ attr_reader :filename
521
+
522
+ def open
523
+ unless @file then
524
+ DEBUG.print "FlatMappingFile: open #{@filename}\n"
525
+ @file = File.open(@filename, @mode)
526
+ true
527
+ else
528
+ nil
529
+ end
530
+ end
531
+
532
+ def close
533
+ if @file then
534
+ DEBUG.print "FlatMappingFile: close #{@filename}\n"
535
+ @file.close
536
+ @file = nil
537
+ end
538
+ nil
539
+ end
540
+
541
+ def record_size
542
+ unless @record_size then
543
+ open
544
+ @file.seek(0, IO::SEEK_SET)
545
+ s = @file.read(@@recsize_width)
546
+ raise 'strange record size' unless s =~ @@recsize_regex
547
+ @record_size = s.to_i
548
+ DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n"
549
+ end
550
+ @record_size
551
+ end
552
+
553
+ def get_record(i)
554
+ rs = record_size
555
+ seek(i)
556
+ str = @file.read(rs)
557
+ #DEBUG.print "get_record(#{i})=#{str.inspect}\n"
558
+ str
559
+ end
560
+
561
+ def seek(i)
562
+ rs = record_size
563
+ @file.seek(@@recsize_width + rs * i)
564
+ end
565
+
566
+ def records
567
+ unless @records then
568
+ rs = record_size
569
+ @records = (@file.stat.size - @@recsize_width) / rs
570
+ DEBUG.print "FlatMappingFile: records: #{@records}\n"
571
+ end
572
+ @records
573
+ end
574
+ alias size records
575
+
576
+ # methods for writing file
577
+ def write_record(str)
578
+ rs = record_size
579
+ rec = sprintf("%-*s", rs, str)[0..rs]
580
+ @file.write(rec)
581
+ end
582
+
583
+ def add_record(str)
584
+ n = records
585
+ rs = record_size
586
+ @file.seek(0, IO::SEEK_END)
587
+ write_record(str)
588
+ @records += 1
589
+ end
590
+
591
+ def put_record(i, str)
592
+ n = records
593
+ rs = record_size
594
+ if i >= n then
595
+ @file.seek(0, IO::SEEK_END)
596
+ @file.write(sprintf("%-*s", rs, '') * (i - n))
597
+ @records = i + 1
598
+ else
599
+ seek(i)
600
+ end
601
+ write_record(str)
602
+ end
603
+
604
+ def init(rs)
605
+ unless 0 < rs and rs < 10 ** @@recsize_width then
606
+ raise 'record size out of range'
607
+ end
608
+ open
609
+ @record_size = rs
610
+ str = sprintf("%0*d", @@recsize_width, rs)
611
+ @file.truncate(0)
612
+ @file.seek(0, IO::SEEK_SET)
613
+ @file.write(str)
614
+ @records = 0
615
+ end
616
+
617
+ # export/import/edit data
618
+ def each
619
+ n = records
620
+ seek(0)
621
+ (0...n).each do |i|
622
+ yield Record.new(get_record(i))
623
+ end
624
+ self
625
+ end
626
+
627
+ def export_tsv(stream)
628
+ self.each do |x|
629
+ stream << "#{x.to_s}\n"
630
+ end
631
+ stream
632
+ end
633
+
634
+ def init_with_sorted_tsv_file(filename, flag_primary = false)
635
+ rec_size = 1
636
+ f = File.open(filename)
637
+ f.each do |y|
638
+ rec_size = y.chomp.length if rec_size < y.chomp.length
639
+ end
640
+ self.init(rec_size)
641
+
642
+ prev = nil
643
+ f.rewind
644
+ if flag_primary then
645
+ f.each do |y|
646
+ x = Record.new(y.chomp, rec_size)
647
+ if prev then
648
+ if x.key == prev.key
649
+ DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n"
650
+ else
651
+ self.add_record(prev.to_s)
652
+ end
653
+ end
654
+ prev = x
655
+ end
656
+ self.add_record(prev.to_s) if prev
657
+ else
658
+ f.each do |y|
659
+ x = Record.new(y.chomp, rec_size)
660
+ self.add_record(x.to_s) if x != prev
661
+ prev = x
662
+ end
663
+ end
664
+ f.close
665
+ self
666
+ end
667
+
668
+ def self.external_sort_proc(sort_program = '/usr/bin/sort')
669
+ Proc.new do |out, in1, *files|
670
+ system(sort_program, '-o', out, in1, *files)
671
+ end
672
+ end
673
+
674
+ def self.external_merge_sort_proc(sort_program = '/usr/bin/sort')
675
+ Proc.new do |out, in1, *files|
676
+ # (in1 may be sorted)
677
+ tf_all = []
678
+ tfn_all = []
679
+ files.each do |fn|
680
+ tf = Tempfile.open('sort')
681
+ tf.close(false)
682
+ system(sort_program, '-o', tf.path, fn)
683
+ tf_all << tf
684
+ tfn_all << tf.path
685
+ end
686
+ system(sort_program, '-m', '-o', out, in1, *tfn_all)
687
+ tf_all.each do |tf|
688
+ tf.close(true)
689
+ end
690
+ end
691
+ end
692
+
693
+ def self.external_merge_proc(sort_program = '/usr/bin/sort')
694
+ Proc.new do |out, in1, *files|
695
+ # files (and in1) must be sorted
696
+ system(sort_program, '-m', '-o', out, in1, *files)
697
+ end
698
+ end
699
+
700
+ def self.internal_sort_proc
701
+ Proc.new do |out, in1, *files|
702
+ a = IO.readlines(in1)
703
+ files.each do |fn|
704
+ IO.foreach(fn) do |x|
705
+ a << x
706
+ end
707
+ end
708
+ a.sort!
709
+ of = File.open(out, 'w')
710
+ a.each { |x| of << x }
711
+ of.close
712
+ end
713
+ end
714
+
715
+ def import_tsv_files(flag_primary, mode, sort_proc, *files)
716
+ require 'tempfile'
717
+
718
+ tmpfile1 = Tempfile.open('flat')
719
+ self.export_tsv(tmpfile1) unless mode == :new
720
+ tmpfile1.close(false)
721
+
722
+ tmpfile0 = Tempfile.open('sorted')
723
+ tmpfile0.close(false)
724
+
725
+ sort_proc.call(tmpfile0.path, tmpfile1.path, *files)
726
+
727
+ tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+')
728
+ tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary)
729
+ tmpmap.close
730
+ self.close
731
+
732
+ begin
733
+ File.rename(self.filename, self.filename + ".#{$$}.bak~")
734
+ rescue Errno::ENOENT
735
+ end
736
+ File.rename(tmpmap.filename, self.filename)
737
+ begin
738
+ File.delete(self.filename + ".#{$$}.bak~")
739
+ rescue Errno::ENOENT
740
+ end
741
+
742
+ tmpfile0.close(true)
743
+ tmpfile1.close(true)
744
+ self
745
+ end
746
+
747
+
748
+ # methods for searching
749
+ def search(key)
750
+ n = records
751
+ return [] if n <= 0
752
+ i = n / 2
753
+ i_prev = nil
754
+ DEBUG.print "binary search starts...\n"
755
+ begin
756
+ rec = Record.new(get_record(i))
757
+ i_prev = i
758
+ if key < rec.key then
759
+ n = i
760
+ i = i / 2
761
+ elsif key > rec.key then
762
+ i = (i + n) / 2
763
+ else # key == rec.key
764
+ result = [ rec.val ]
765
+ j = i - 1
766
+ while j >= 0 and
767
+ (rec = Record.new(get_record(j))).key == key
768
+ result << rec.val
769
+ j = j - 1
770
+ end
771
+ result.reverse!
772
+ j = i + 1
773
+ while j < n and
774
+ (rec = Record.new(get_record(j))).key == key
775
+ result << rec.val
776
+ j = j + 1
777
+ end
778
+ DEBUG.print "#{result.size} hits found!!\n"
779
+ return result
780
+ end
781
+ end until i_prev == i
782
+ DEBUG.print "no hits found\n"
783
+ #nil
784
+ []
785
+ end
786
+ end #class FlatMappingFile
787
+
788
+ class PrimaryNameSpace < Template::NameSpace
789
+ def mapping(filename)
790
+ FlatMappingFile.new(filename)
791
+ end
792
+ def filename
793
+ File.join(dbname, "key_#{name}.key")
794
+ end
795
+ end #class PrimaryNameSpace
796
+
797
+ class SecondaryNameSpace < Template::NameSpace
798
+ def mapping(filename)
799
+ FlatMappingFile.new(filename)
800
+ end
801
+ def filename
802
+ File.join(dbname, "id_#{name}.index")
803
+ end
804
+ def search(key)
805
+ r = super(key)
806
+ file.close
807
+ r.flatten!
808
+ r
809
+ end
810
+ end #class SecondaryNameSpace
811
+ end #module Flat_1
812
+
813
+
814
+ class NameSpaces < Hash
815
+ def initialize(dbname, nsclass, arg)
816
+ @dbname = dbname
817
+ @nsclass = nsclass
818
+ if arg.is_a?(String) then
819
+ a = arg.split("\t")
820
+ else
821
+ a = arg
822
+ end
823
+ a.each do |x|
824
+ self[x] = @nsclass.new(@dbname, x)
825
+ end
826
+ self
827
+ end
828
+
829
+ def each_names
830
+ self.names.each do |x|
831
+ yield x
832
+ end
833
+ end
834
+
835
+ def each_files
836
+ self.values.each do |x|
837
+ yield x
838
+ end
839
+ end
840
+
841
+ def names
842
+ keys
843
+ end
844
+
845
+ def close_all
846
+ values.each { |x| x.file.close }
847
+ end
848
+ alias close close_all
849
+
850
+ def search(key)
851
+ r = []
852
+ values.each do |ns|
853
+ r.concat ns.search(key)
854
+ end
855
+ r.sort!
856
+ r.uniq!
857
+ r
858
+ end
859
+
860
+ def search_names(key, *names)
861
+ r = []
862
+ names.each do |x|
863
+ ns = self[x]
864
+ raise "undefined namespace #{x.inspect}" unless ns
865
+ r.concat ns.search(key)
866
+ end
867
+ r
868
+ end
869
+
870
+ def to_s
871
+ names.join("\t")
872
+ end
873
+ end #class NameSpaces
874
+
875
+ class DataBank
876
+ def self.file2hash(fileobj)
877
+ hash = {}
878
+ fileobj.each do |line|
879
+ line.chomp!
880
+ a = line.split("\t", 2)
881
+ hash[a[0]] = a[1]
882
+ end
883
+ hash
884
+ end
885
+ private_class_method :file2hash
886
+
887
+ def self.filename(dbname)
888
+ File.join(dbname, 'config.dat')
889
+ end
890
+
891
+ def self.read(name, mode = 'rb', *bdbarg)
892
+ f = File.open(filename(name), mode)
893
+ hash = file2hash(f)
894
+ f.close
895
+ db = self.new(name, nil, hash)
896
+ db.bdb_open(*bdbarg)
897
+ db
898
+ end
899
+
900
+ def self.open(*arg)
901
+ self.read(*arg)
902
+ end
903
+
904
+ def initialize(name, idx_type = nil, hash = {})
905
+ @dbname = name.dup
906
+ @dbname.freeze
907
+ @bdb = nil
908
+
909
+ @always_check = true
910
+ self.index_type = (hash['index'] or idx_type)
911
+
912
+ if @bdb then
913
+ @config = BDBwrapper.new(@dbname, 'config')
914
+ @bdb_fileids = BDBwrapper.new(@dbname, 'fileids')
915
+ @nsclass_pri = BDB_1::PrimaryNameSpace
916
+ @nsclass_sec = BDB_1::SecondaryNameSpace
917
+ else
918
+ @config = hash
919
+ @nsclass_pri = Flat_1::PrimaryNameSpace
920
+ @nsclass_sec = Flat_1::SecondaryNameSpace
921
+ end
922
+ true
923
+ end
924
+
925
+ attr_reader :dbname, :index_type
926
+
927
+ def index_type=(str)
928
+ case str
929
+ when MAGIC_BDB
930
+ @index_type = MAGIC_BDB
931
+ @bdb = true
932
+ unless defined?(BDB)
933
+ raise RuntimeError, "Berkeley DB support not found"
934
+ end
935
+ when MAGIC_FLAT, '', nil, false
936
+ @index_type = MAGIC_FLAT
937
+ @bdb = false
938
+ else
939
+ raise 'unknown or unsupported index type'
940
+ end
941
+ end
942
+
943
+ def to_s
944
+ a = ""
945
+ a << "index\t#{@index_type}\n"
946
+
947
+ unless @bdb then
948
+ a << "format\t#{@format}\n"
949
+ @fileids.each_with_index do |x, i|
950
+ a << "#{x.to_s(i)}\n"
951
+ end
952
+ a << "primary_namespace\t#{@primary.name}\n"
953
+ a << "secondary_namespaces\t"
954
+ a << @secondary.names.join("\t")
955
+ a << "\n"
956
+ end
957
+ a
958
+ end
959
+
960
+ def bdb_open(*bdbarg)
961
+ if @bdb then
962
+ @config.close
963
+ @config.open(*bdbarg)
964
+ @bdb_fileids.close
965
+ @bdb_fileids.open(*bdbarg)
966
+ true
967
+ else
968
+ nil
969
+ end
970
+ end
971
+
972
+ def write(mode = 'wb', *bdbarg)
973
+ unless FileTest.directory?(@dbname) then
974
+ Dir.mkdir(@dbname)
975
+ end
976
+ f = File.open(self.class.filename(@dbname), mode)
977
+ f.write self.to_s
978
+ f.close
979
+
980
+ if @bdb then
981
+ bdb_open(*bdbarg)
982
+ @config['format'] = format
983
+ @config['primary_namespace'] = @primary.name
984
+ @config['secondary_namespaces'] = @secondary.names.join("\t")
985
+ @bdb_fileids.writeback_array('', fileids, *bdbarg)
986
+ end
987
+ true
988
+ end
989
+
990
+ def close
991
+ DEBUG.print "DataBank: close #{@dbname}\n"
992
+ primary.close
993
+ secondary.close
994
+ fileids.close
995
+ if @bdb then
996
+ @config.close
997
+ @bdb_fileids.close
998
+ end
999
+ nil
1000
+ end
1001
+
1002
+ ##parameters
1003
+ def primary
1004
+ unless @primary then
1005
+ self.primary = @config['primary_namespace']
1006
+ end
1007
+ @primary
1008
+ end
1009
+
1010
+ def primary=(pri_name)
1011
+ if !pri_name or pri_name.empty? then
1012
+ pri_name = 'UNIQUE'
1013
+ end
1014
+ @primary = @nsclass_pri.new(@dbname, pri_name)
1015
+ @primary
1016
+ end
1017
+
1018
+ def secondary
1019
+ unless @secondary then
1020
+ self.secondary = @config['secondary_namespaces']
1021
+ end
1022
+ @secondary
1023
+ end
1024
+
1025
+ def secondary=(sec_names)
1026
+ if !sec_names then
1027
+ sec_names = []
1028
+ end
1029
+ @secondary = NameSpaces.new(@dbname, @nsclass_sec, sec_names)
1030
+ @secondary
1031
+ end
1032
+
1033
+ def format=(str)
1034
+ @format = str.to_s.dup
1035
+ end
1036
+
1037
+ def format
1038
+ unless @format then
1039
+ self.format = @config['format']
1040
+ end
1041
+ @format
1042
+ end
1043
+
1044
+ def fileids
1045
+ unless @fileids then
1046
+ init_fileids
1047
+ end
1048
+ @fileids
1049
+ end
1050
+
1051
+ def init_fileids
1052
+ if @bdb then
1053
+ @fileids = FileIDs.new('', @bdb_fileids)
1054
+ else
1055
+ @fileids = FileIDs.new('fileid_', @config)
1056
+ end
1057
+ @fileids
1058
+ end
1059
+
1060
+ # high level methods
1061
+ def always_check=(bool)
1062
+ if bool then
1063
+ @always_check = true
1064
+ else
1065
+ @always_check = false
1066
+ end
1067
+ end
1068
+ attr_reader :always_check
1069
+
1070
+ def get_flatfile_data(f, pos, length)
1071
+ fi = fileids[f.to_i]
1072
+ if @always_check then
1073
+ raise "flatfile #{fi.filename.inspect} may be modified" unless fi.check
1074
+ end
1075
+ fi.get(pos.to_i, length.to_i)
1076
+ end
1077
+
1078
+ def search_all_get_unique_id(key)
1079
+ s = secondary.search(key)
1080
+ p = primary.include?(key)
1081
+ s.push p if p
1082
+ s.sort!
1083
+ s.uniq!
1084
+ s
1085
+ end
1086
+
1087
+ def search_primary(*arg)
1088
+ r = Results.new
1089
+ arg.each do |x|
1090
+ a = primary.search(x)
1091
+ # a is empty or a.size==1 because primary key must be unique
1092
+ r.store(x, get_flatfile_data(*a[0])) unless a.empty?
1093
+ end
1094
+ r
1095
+ end
1096
+
1097
+ def search_all(key)
1098
+ s = search_all_get_unique_id(key)
1099
+ search_primary(*s)
1100
+ end
1101
+
1102
+ def search_primary_get_unique_id(key)
1103
+ s = []
1104
+ p = primary.include?(key)
1105
+ s.push p if p
1106
+ s
1107
+ end
1108
+
1109
+ def search_namespaces_get_unique_id(key, *names)
1110
+ if names.include?(primary.name) then
1111
+ n2 = names.dup
1112
+ n2.delete(primary.name)
1113
+ p = primary.include?(key)
1114
+ else
1115
+ n2 = names
1116
+ p = nil
1117
+ end
1118
+ s = secondary.search_names(key, *n2)
1119
+ s.push p if p
1120
+ s.sort!
1121
+ s.uniq!
1122
+ s
1123
+ end
1124
+
1125
+ def search_namespaces(key, *names)
1126
+ s = search_namespaces_get_unique_id(key, *names)
1127
+ search_primary(*s)
1128
+ end
1129
+
1130
+ def check_consistency
1131
+ fileids.check_all
1132
+ end
1133
+ end #class DataBank
1134
+
1135
+ end #class FlatFileIndex
1136
+ end #module Bio
1137
+
1138
+ ######################################################################
1139
+
1140
+ =begin
1141
+
1142
+ = Bio::FlatFileIndex
1143
+
1144
+ --- Bio::FlatFileIndex.new(dbname)
1145
+ --- Bio::FlatFileIndex.open(dbname)
1146
+
1147
+ Opens existing databank. Databank is a directory which contains
1148
+ indexed files and configuration files. The type of the databank
1149
+ (flat or BerkeleyDB) are determined automatically.
1150
+
1151
+ --- Bio::FlatFileIndex#close
1152
+
1153
+ Closes opened databank.
1154
+
1155
+ --- Bio::FlatFileIndex#closed?
1156
+
1157
+ Returns true if already closed. Otherwise, returns false.
1158
+
1159
+ --- Bio::FlatFileIndex#get_by_id(key)
1160
+
1161
+ Common interface defined in registry.rb.
1162
+ Searching databank and returns entry (or entries) as a string.
1163
+ Multiple entries (contatinated to one string) may be returned.
1164
+ Returns empty string If not found.
1165
+
1166
+ --- Bio::FlatFileIndex#search(key)
1167
+
1168
+ Searching databank and returns a Bio::FlatFileIndex::Results object.
1169
+
1170
+ --- Bio::FlatFileIndex#include?(key)
1171
+
1172
+ Searching databank.
1173
+ If found, returns an array of unique IDs (primary identifiers).
1174
+ If not found, returns nil.
1175
+
1176
+ --- Bio::FlatFileIndex#search_primary(key)
1177
+
1178
+ Searching only primary namespece.
1179
+ Returns a Bio::FlatFileIndex::Results object.
1180
+
1181
+ --- Bio::FlatFileIndex#search_namespaces(key, name1, name2, ...)
1182
+
1183
+ Searching only specific namespeces.
1184
+ Returns a Bio::FlatFileIndex::Results object.
1185
+
1186
+ --- Bio::FlatFileIndex#include_in_primary?(key)
1187
+
1188
+ Same as #include?, but serching only primary namespace.
1189
+
1190
+ --- Bio::FlatFileIndex#include_in_namespaces?(key, name1, name2, ...)
1191
+
1192
+ Same as #include?, but serching only specific namespaces.
1193
+
1194
+ --- Bio::FlatFileIndex#namespaces
1195
+
1196
+ Returns names of namespaces defined in the databank.
1197
+ (example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
1198
+
1199
+ --- Bio::FlatFileIndex#primary_namespace
1200
+
1201
+ Returns name of primary namespace.
1202
+
1203
+ --- Bio::FlatFileIndex#secondary_namespaces
1204
+
1205
+ Returns names of secondary namespaces.
1206
+
1207
+ --- Bio::FlatFileIndex#default_namespaces= [ str1, str2, ... ]
1208
+ --- Bio::FlatFileIndex#default_namespaces= nil
1209
+
1210
+ Set default namespaces.
1211
+ nil means all namespaces in the databank.
1212
+ Default namespaces specified in this method only affect
1213
+ #get_by_id, #search, and #include? methods.
1214
+ Default of default namespaces is nil (that is, all namespaces
1215
+ are search destinations by default).
1216
+
1217
+ --- Bio::FlatFileIndex#default_namespaces
1218
+
1219
+ Returns default namespaces.
1220
+ nil means all namespaces.
1221
+
1222
+ --- Bio::FlatFileIndex#check_consistency
1223
+
1224
+ Raise RuntimeError if flatfiles are changed after creating
1225
+ the databank. (This check only compare file sizes as
1226
+ described in the OBDA specification.)
1227
+
1228
+ --- Bio::FlatFileIndex#always_check_consistency=(bool)
1229
+ --- Bio::FlatFileIndex#always_check_consistency
1230
+
1231
+ If true, consistency checks are performed every time
1232
+ accessing flatfiles. If nil/false, no checks are performed.
1233
+ Default of always_check_consistency is true.
1234
+
1235
+ == Bio::FlatFileIndex::Results
1236
+
1237
+ This object is made by Bio::FlatFileIndex methods.
1238
+ Currently, this class inherits Hash, but internal
1239
+ structure of this class may be changed anytime.
1240
+ Only using methods described below are strongly recomended.
1241
+
1242
+ --- Bio::FlatFileIndex::Results#to_a
1243
+
1244
+ Returns an array of strings.
1245
+ If no search results are exist, returns an empty array.
1246
+
1247
+ --- Bio::FlatFileIndex::Results#each
1248
+
1249
+ Iterates over each result(string).
1250
+ Same as to_a.each.
1251
+
1252
+ --- Bio::FlatFileIndex::Results#to_s
1253
+
1254
+ Returns a string. (concatinated if multiple results exists).
1255
+ Same as to_a.join('').
1256
+
1257
+ --- Bio::FlatFileIndex::Results#size
1258
+
1259
+ Returns number of results.
1260
+ Same as to_a.size.
1261
+
1262
+ --- Bio::FlatFileIndex::Results#+(res)
1263
+
1264
+ Add search results.
1265
+ "a + b" means "a OR b".
1266
+ * Example
1267
+ # I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
1268
+ db = Bio::FlatFIleIndex.new(location)
1269
+ a1 = db.search('ADH_IRON_1')
1270
+ a2 = db.search('ADH_IRON_2')
1271
+ # a1 and a2 are Bio::FlatFileIndex::Results objects.
1272
+ print a1 + a2
1273
+
1274
+ --- Bio::FlatFileIndex::Results#*(res)
1275
+
1276
+ Returns set intersection of results.
1277
+ "a * b" means "a AND b".
1278
+ * Example
1279
+ # I want to search 'HIS_KIN' AND 'human'
1280
+ db = Bio::FlatFIleIndex.new(location)
1281
+ hk = db.search('HIS_KIN')
1282
+ hu = db.search('human')
1283
+ # hk and hu are Bio::FlatFileIndex::Results objects.
1284
+ print hk * hu
1285
+
1286
+ == Bio::FlatFileIndex::DEBUG
1287
+
1288
+ Module for output debug messages.
1289
+ Default setting: If $DEBUG or $VERBOSE is true, output debug
1290
+ messages to STDERR; Otherwise, don't output messages.
1291
+
1292
+ --- Bio::FlatFileIndex::DEBUG.out=(io)
1293
+
1294
+ Set debug messages output destination.
1295
+ If true is given, outputs to STDERR.
1296
+ If nil is given, outputs nothing.
1297
+ This method affects ALL of FlatFileIndex related objects/methods.
1298
+
1299
+ == Other classes/modules
1300
+
1301
+ Classes/modules not described in this file are internal use only.
1302
+
1303
+ == SEE ALSO
1304
+
1305
+ * ((<URL:http://obda.open-bio.org/>))
1306
+ * ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
1307
+
1308
+ =end