perobs 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9dd54b9f62dc6b5cc7129d25b9ba87e2d7aa3775
4
- data.tar.gz: 1431e7ec23c7bf2c18fa65b7c3e14b33bc696b2b
3
+ metadata.gz: 059ab4702a14c6ff6328881c528eda77d321de51
4
+ data.tar.gz: 5a8944222bf1feccb2eda749f05d963598e3b945
5
5
  SHA512:
6
- metadata.gz: dbf7166adf28acabef48594bb80721512f0b30156f66965e7077f6b4089e429c5d951b621c0c3322bc6c2a0b269e47042994dd9449d75cb00858e0d2a23bbbbc
7
- data.tar.gz: 73ae5cbfd48a5bc3a53194961398ec6a0ff57a4ac4ed606ba0e3ab1922fcba89cbc72cb90327e2256e2c9709a2a2707bdea8a8bd9124ff973531b800ffc2f304
6
+ metadata.gz: eeab29c68225efd8efbfb6a94b14c708fac38e16c6c1a399b55941e11e7dab83ecea925382180b803a7a152b71509eef30c50e40e568d5c9900c0dea1eec1d7f
7
+ data.tar.gz: 993b3ff327426b2797e4696a43ad0a0483d45a22638241fe34409b8fe4bbdddbe5a27adb0f06b088ae5f45638515fe11aa0b4f1baf6079afbfde4d7c06188ea5
data/.gitignore CHANGED
@@ -1,12 +1,10 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
1
+ .bundle/
2
+ .yardoc
3
+ Gemfile.lock
4
+ _yardoc/
5
+ coverage/
6
+ doc/
7
+ pkg/
10
8
  *.bundle
11
9
  *.so
12
10
  *.o
data/README.md CHANGED
@@ -100,10 +100,6 @@ Or install it yourself as:
100
100
 
101
101
  $ gem install perobs
102
102
 
103
- ## Usage
104
-
105
- TODO: Write usage instructions here
106
-
107
103
  ## Contributing
108
104
 
109
105
  1. Fork it ( https://github.com/scrapper/perobs/fork )
data/lib/perobs/Array.rb CHANGED
@@ -161,6 +161,12 @@ module PEROBS
161
161
  @data = data
162
162
  end
163
163
 
164
+ # Textual dump for debugging purposes
165
+ # @return [String]
166
+ def inspect
167
+ "[\n" + @data.map { |v| " #{v.inspect}" }.join(",\n") + "\n]\n"
168
+ end
169
+
164
170
  private
165
171
 
166
172
  def _serialize
@@ -0,0 +1,327 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BTreeBlob.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+
29
+ module PEROBS
30
+
31
+ # This class manages the usage of the data blobs in the corresponding
32
+ # HashedBlobsDB object.
33
+ class BTreeBlob
34
+
35
+ # For performance reasons we use an Array for the entries instead of a
36
+ # Hash. These constants specify the Array index for the corresponding
37
+ # value.
38
+ ID = 0
39
+ # Number of bytes
40
+ BYTES = 1
41
+ # Start Address
42
+ START = 2
43
+ # Mark/Unmarked flag
44
+ MARKED = 3
45
+
46
+ # Create a new BTreeBlob object.
47
+ # @param dir [String] Fully qualified directory name
48
+ # @param btreedb [BTreeDB] Reference to the DB that owns this blob
49
+ def initialize(dir, btreedb)
50
+ @dir = dir
51
+ @btreedb = btreedb
52
+
53
+ @index_file_name = File.join(dir, 'index')
54
+ @blobs_file_name = File.join(dir, 'data')
55
+ read_index
56
+ end
57
+
58
+ # Write the given bytes with the given ID into the DB.
59
+ # @param id [Fixnum or Bignum] ID
60
+ # @param raw [String] sequence of bytes
61
+ def write_object(id, raw)
62
+ if @entries.length > @btreedb.max_blob_size
63
+ # The blob has reached the maximum size. Replace the blob with a BTree
64
+ # node directory and distribute the blob entires into the sub-blobs of
65
+ # the new BTree node.
66
+ split_blob
67
+ # Insert the passed object into the newly created BTree node.
68
+ @btreedb.put_raw_object(raw, id)
69
+ else
70
+ bytes = raw.bytesize
71
+ start_address = reserve_bytes(id, bytes)
72
+ if write_to_blobs_file(raw, start_address) != bytes
73
+ raise RuntimeError, 'Object length does not match written bytes'
74
+ end
75
+ write_index
76
+ end
77
+ end
78
+
79
+ # Read the entry for the given ID and return it as bytes.
80
+ # @param id [Fixnum or Bignum] ID
81
+ # @return [String] sequence of bytes or nil if ID is unknown
82
+ def read_object(id)
83
+ return nil unless (bytes_and_start = find(id))
84
+ read_from_blobs_file(*bytes_and_start)
85
+ end
86
+
87
+
88
+ # Find the data for the object with given id.
89
+ # @param id [Fixnum or Bignum] Object ID
90
+ # @return [Array] Returns an Array with two Fixnum entries. The first is
91
+ # the number of bytes and the second is the starting offset in the
92
+ # blob storage file.
93
+ def find(id)
94
+ if (entry = @entries_by_id[id])
95
+ return [ entry[BYTES], entry[START] ]
96
+ end
97
+
98
+ nil
99
+ end
100
+
101
+ # Clear the mark on all entries in the index.
102
+ def clear_marks
103
+ @entries.each { |e| e[MARKED] = 0 }
104
+ write_index
105
+ end
106
+
107
+ # Set a mark on the entry with the given ID.
108
+ # @param id [Fixnum or Bignum] ID of the entry
109
+ def mark(id)
110
+ found = false
111
+ @entries.each do |entry|
112
+ if entry[ID] == id
113
+ entry[MARKED] = 1
114
+ found = true
115
+ break
116
+ end
117
+ end
118
+
119
+ unless found
120
+ raise ArgumentError,
121
+ "Cannot find an entry for ID #{'%016X' % id} to mark"
122
+ end
123
+
124
+ write_index
125
+ end
126
+
127
+ # Check if the entry for a given ID is marked.
128
+ # @param id [Fixnum or Bignum] ID of the entry
129
+ # @return [TrueClass or FalseClass] true if marked, false otherwise
130
+ def is_marked?(id)
131
+ @entries.each do |entry|
132
+ return entry[MARKED] != 0 if entry[ID] == id
133
+ end
134
+
135
+ raise ArgumentError,
136
+ "Cannot find an entry for ID #{'%016X' % id} to check"
137
+ end
138
+
139
+ # Remove all entries from the index that have not been marked.
140
+ def delete_unmarked_entries
141
+ # First remove the entry from the hash table.
142
+ @entries_by_id.delete_if { |id, e| e[MARKED] == 0 }
143
+ # Then delete the entry itself.
144
+ @entries.delete_if { |e| e[MARKED] == 0 }
145
+ write_index
146
+ end
147
+
148
+ # Run a basic consistency check.
149
+ # @param repair [TrueClass/FalseClass] Not used right now
150
+ # @return [TrueClass/FalseClass] Always true right now
151
+ def check(repair = false)
152
+ # Determine size of the data blobs file.
153
+ data_file_size = File.exists?(@blobs_file_name) ?
154
+ File.size(@blobs_file_name) : 0
155
+
156
+ next_start = 0
157
+ prev_entry = nil
158
+ @entries.each do |entry|
159
+ # Entries should never overlap
160
+ if prev_entry && next_start > entry[START]
161
+ raise RuntimeError,
162
+ "#{@dir}: Index entries are overlapping\n" +
163
+ "ID: #{'%016X' % prev_entry[ID]} " +
164
+ "Start: #{prev_entry[START]} " +
165
+ "Bytes: #{prev_entry[BYTES]}\n" +
166
+ "ID: #{'%016X' % entry[ID]} Start: #{entry[START]} " +
167
+ "Bytes: #{entry[BYTES]}"
168
+ end
169
+ next_start = entry[START] + entry[BYTES]
170
+
171
+ # Entries must fit within the data file
172
+ if next_start > data_file_size
173
+ raise RuntimeError,
174
+ "#{@dir}: Entry for ID #{'%016X' % entry[ID]} " +
175
+ "goes beyond 'data' file " +
176
+ "size (#{data_file_size})\n" +
177
+ "ID: #{'%016X' % entry[ID]} Start: #{entry[START]} " +
178
+ "Bytes: #{entry[BYTES]}"
179
+ end
180
+
181
+ prev_entry = entry
182
+ end
183
+
184
+ true
185
+ end
186
+
187
+ private
188
+
189
+ # Write a string of bytes into the file at the given address.
190
+ # @param raw [String] bytes to write
191
+ # @param address [Fixnum] offset in the file
192
+ # @return [Fixnum] number of bytes written
193
+ def write_to_blobs_file(raw, address)
194
+ begin
195
+ File.write(@blobs_file_name, raw, address)
196
+ rescue => e
197
+ raise IOError,
198
+ "Cannot write blobs file #{@blobs_file_name}: #{e.message}"
199
+ end
200
+ end
201
+
202
+ # Read _bytes_ bytes from the file starting at offset _address_.
203
+ # @param bytes [Fixnum] number of bytes to read
204
+ # @param address [Fixnum] offset in the file
205
+ def read_from_blobs_file(bytes, address)
206
+ begin
207
+ File.read(@blobs_file_name, bytes, address)
208
+ rescue => e
209
+ raise IOError,
210
+ "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
211
+ end
212
+ end
213
+
214
+ # Reserve the bytes needed for the specified number of bytes with the
215
+ # given ID.
216
+ # @param id [Fixnum or Bignum] ID of the entry
217
+ # @param bytes [Fixnum] number of bytes for this entry
218
+ # @return [Fixnum] the start address of the reserved blob
219
+ def reserve_bytes(id, bytes)
220
+ # index of first blob after the last seen entry
221
+ end_of_last_entry = 0
222
+ # blob index of best fit segment
223
+ best_fit_start = nil
224
+ # best fir segment size in bytes
225
+ best_fit_bytes = nil
226
+ # Index where to insert the new entry. Append by default.
227
+ best_fit_index = -1
228
+ # If there is already an entry for an object with the _id_, we mark it
229
+ # for deletion.
230
+ entry_to_delete = nil
231
+
232
+ @entries.each.with_index do |entry, i|
233
+ if entry[ID] == id
234
+ # We've found an old entry for this ID. Mark it for deletion.
235
+ entry_to_delete = entry
236
+ next
237
+ end
238
+
239
+ gap = entry[START] - end_of_last_entry
240
+ if gap >= bytes &&
241
+ (best_fit_bytes.nil? || gap < best_fit_bytes)
242
+ # We've found a segment that fits the requested bytes and fits
243
+ # better than any previous find.
244
+ best_fit_start = end_of_last_entry
245
+ best_fit_bytes = gap
246
+ # The old entry gets deleted before the new one gets inserted. We
247
+ # need to correct the index appropriately.
248
+ best_fit_index = i - (entry_to_delete ? 1 : 0)
249
+ end
250
+ end_of_last_entry = entry[START] + entry[BYTES]
251
+ end
252
+
253
+ # Delete the old entry if requested.
254
+ @entries.delete(entry_to_delete) if entry_to_delete
255
+
256
+ # Create a new entry and insert it. The order must match the above
257
+ # defined constants!
258
+ entry = [ id, bytes, best_fit_start || end_of_last_entry, 0 ]
259
+ @entries.insert(best_fit_index, entry)
260
+ @entries_by_id[id] = entry
261
+
262
+ entry[START]
263
+ end
264
+
265
+ def read_index
266
+ # The entries are stored in two data structures to provide the fastest
267
+ # access mechanism for each situation. The Array @entries stores them in
268
+ # a plan Array. @entries_by_id stores them hashed by their ID.
269
+ @entries = []
270
+ @entries_by_id = {}
271
+ if File.exists?(@index_file_name)
272
+ begin
273
+ File.open(@index_file_name, 'rb') do |f|
274
+ # The index is a binary format. Each entry has exactly 25 bytes.
275
+ # Bytes
276
+ # 0 - 7 : 64 bits, little endian : ID
277
+ # 8 - 15 : 64 bits, little endian : Entry length in bytes
278
+ # 16 - 23 : 64 bits, little endian : Start address in data file
279
+ # 24 : 8 bits : 0 if unmarked, 1 if marked
280
+ while (bytes = f.read(25))
281
+ @entries << (e = bytes.unpack('QQQC'))
282
+ @entries_by_id[e[ID]] = e
283
+ end
284
+ end
285
+ rescue => e
286
+ raise RuntimeError,
287
+ "BTreeBlob file #{@index_file_name} corrupted: #{e.message}"
288
+ end
289
+ end
290
+ end
291
+
292
+ def write_index
293
+ begin
294
+ File.open(@index_file_name, 'wb') do |f|
295
+ # See read_index for data format documentation.
296
+ @entries.each do |entry|
297
+ f.write(entry.pack('QQQC'))
298
+ end
299
+ end
300
+ rescue => e
301
+ raise RuntimeError,
302
+ "Cannot write BTreeBlob index file #{@index_file_name}: " +
303
+ e.message
304
+ end
305
+ end
306
+
307
+ def split_blob
308
+ # Rename the index file to hide the blob file from the DB.
309
+ File.rename(@index_file_name, @index_file_name + '.bak')
310
+
311
+ # Read all entries from the blob and re-store them into the DB. We've
312
+ # already created the new BTree node, so these entries will be
313
+ # distributed into new leaf blobs of this new node.
314
+ @entries.each do |entry|
315
+ raw = read_from_blobs_file(entry[BYTES], entry[START])
316
+ @btreedb.put_raw_object(raw, entry[ID])
317
+ end
318
+
319
+ # Once the entries are re-stored, we can delete the old blob files.
320
+ File.delete(@index_file_name + '.bak')
321
+ File.delete(@blobs_file_name)
322
+ end
323
+
324
+ end
325
+
326
+ end
327
+
@@ -0,0 +1,252 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BTreeBlob.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/DataBase'
29
+ require 'perobs/BTreeBlob'
30
+
31
+ module PEROBS
32
+
33
+ # This class implements a BTree database using filesystem directories as
34
+ # nodes and blob files as leafs. The BTree grows with the number of stored
35
+ # entries. Each leaf node blob can hold a fixed number of entries. If more
36
+ # entries need to be stored, the blob is replaced by a node with multiple
37
+ # new leafs that store the entries of the previous node. The leafs are
38
+ # implemented by the BTreeBlob class.
39
+ class BTreeDB < DataBase
40
+
41
+ attr_reader :max_blob_size
42
+
43
+ # Create a new BTreeDB object.
44
+ # @param db_name [String] name of the DB directory
45
+ # @param options [Hash] options to customize the behavior. Currently only
46
+ # the following options are supported:
47
+ # :serializer : Can be :marshal, :json, :yaml
48
+ # :dir_bits : The number of bits to use for the BTree nodes.
49
+ # The value must be between 4 and 14. The larger
50
+ # the number the more back-end directories are
51
+ # being used. The default is 12 which results in
52
+ # 4096 directories per node.
53
+ # :max_blob_size : The maximum number of entries in the BTree leaf
54
+ # nodes. The insert/find/delete time grows
55
+ # linearly with the size.
56
+ def initialize(db_name, options = {})
57
+ super(options[:serializer] || :json)
58
+
59
+ @db_dir = db_name
60
+ # Create the database directory if it doesn't exist yet.
61
+ ensure_dir_exists(@db_dir)
62
+
63
+ # Read the existing DB config.
64
+ @config = get_hash('config')
65
+ check_option('serializer')
66
+
67
+ # Check and set @dir_bits, the number of bits used for each tree level.
68
+ @dir_bits = options[:dir_bits] || 12
69
+ if @dir_bits < 4 || @dir_bits > 14
70
+ raise ArgumentError,
71
+ "dir_bits option (#{@dir_bits}) must be between 4 and 12"
72
+ end
73
+ check_option('dir_bits')
74
+
75
+ @max_blob_size = options[:max_blob_size] || 32
76
+ if @max_blob_size < 4 || @max_blob_size > 128
77
+ raise ArgumentError,
78
+ "max_blob_size option (#{@max_blob_size}) must be between 4 and 128"
79
+ end
80
+ check_option('max_blob_size')
81
+
82
+ put_hash('config', @config)
83
+
84
+ # This format string is used to create the directory name.
85
+ @dir_format_string = "%0#{(@dir_bits / 4) +
86
+ (@dir_bits % 4 == 0 ? 0 : 1)}X"
87
+ # Bit mask to extract the dir_bits LSBs.
88
+ @dir_mask = 2 ** @dir_bits - 1
89
+ end
90
+
91
+ # Return true if the object with given ID exists
92
+ # @param id [Fixnum or Bignum]
93
+ def include?(id)
94
+ (blob = find_blob(id)) && blob.find(id)
95
+ end
96
+
97
+ # Store a simple Hash as a JSON encoded file into the DB directory.
98
+ # @param name [String] Name of the hash. Will be used as file name.
99
+ # @param hash [Hash] A Hash that maps String objects to strings or
100
+ # numbers.
101
+ def put_hash(name, hash)
102
+ file_name = File.join(@db_dir, name + '.json')
103
+ begin
104
+ File.write(file_name, hash.to_json)
105
+ rescue => e
106
+ raise RuntimeError,
107
+ "Cannot write hash file '#{file_name}': #{e.message}"
108
+ end
109
+ end
110
+
111
+ # Load the Hash with the given name.
112
+ # @param name [String] Name of the hash.
113
+ # @return [Hash] A Hash that maps String objects to strings or numbers.
114
+ def get_hash(name)
115
+ file_name = File.join(@db_dir, name + '.json')
116
+ return ::Hash.new unless File.exists?(file_name)
117
+
118
+ begin
119
+ json = File.read(file_name)
120
+ rescue => e
121
+ raise RuntimeError,
122
+ "Cannot read hash file '#{file_name}': #{e.message}"
123
+ end
124
+ JSON.parse(json, :create_additions => true)
125
+ end
126
+
127
+ # Store the given object into the cluster files.
128
+ # @param obj [Hash] Object as defined by PEROBS::ObjectBase
129
+ def put_object(obj, id)
130
+ find_blob(id, true).write_object(id, serialize(obj))
131
+ end
132
+
133
+ # Load the given object from the filesystem.
134
+ # @param id [Fixnum or Bignum] object ID
135
+ # @return [Hash] Object as defined by PEROBS::ObjectBase or nil if ID does
136
+ # not exist
137
+ def get_object(id)
138
+ return nil unless (blob = find_blob(id)) && (obj = blob.read_object(id))
139
+ deserialize(obj)
140
+ end
141
+
142
+ # This method must be called to initiate the marking process.
143
+ def clear_marks
144
+ each_blob { |blob| blob.clear_marks }
145
+ end
146
+
147
+ # Permanently delete all objects that have not been marked. Those are
148
+ # orphaned and are no longer referenced by any actively used object.
149
+ def delete_unmarked_objects
150
+ each_blob { |blob| blob.delete_unmarked_entries }
151
+ end
152
+
153
+ # Mark an object.
154
+ # @param id [Fixnum or Bignum] ID of the object to mark
155
+ def mark(id)
156
+ (blob = find_blob(id)) && blob.mark(id)
157
+ end
158
+
159
+ # Check if the object is marked.
160
+ # @param id [Fixnum or Bignum] ID of the object to check
161
+ def is_marked?(id)
162
+ (blob = find_blob(id)) && blob.is_marked?(id)
163
+ end
164
+
165
+ # Basic consistency check.
166
+ # @param repair [TrueClass/FalseClass] True if found errors should be
167
+ # repaired.
168
+ def check_db(repair = false)
169
+ each_blob { |blob| blob.check(repair) }
170
+ end
171
+
172
+ # Check if the stored object is syntactically correct.
173
+ # @param id [Fixnum/Bignum] Object ID
174
+ # @param repair [TrueClass/FalseClass] True if an repair attempt should be
175
+ # made.
176
+ # @return [TrueClass/FalseClass] True if the object is OK, otherwise
177
+ # false.
178
+ def check(id, repair)
179
+ begin
180
+ get_object(id)
181
+ rescue => e
182
+ $stderr.puts "Cannot read object with ID #{id}: #{e.message}"
183
+ return false
184
+ end
185
+
186
+ true
187
+ end
188
+
189
+ # Store the given serialized object into the cluster files. This method is
190
+ # for internal use only!
191
+ # @param raw [String] Serialized Object as defined by PEROBS::ObjectBase
192
+ # @param id [Fixnum or Bignum] Object ID
193
+ def put_raw_object(raw, id)
194
+ find_blob(id, true).write_object(id, raw)
195
+ end
196
+
197
+ private
198
+
199
+ def find_blob(id, create_missing_blob = false)
200
+ dir_name = @db_dir
201
+ loop do
202
+ dir_bits = id & @dir_mask
203
+ dir_name = File.join(dir_name, @dir_format_string % dir_bits)
204
+
205
+ if Dir.exists?(dir_name)
206
+ if File.exists?(File.join(dir_name, 'index'))
207
+ # The directory is a blob directory and not a BTree node dir.
208
+ return BTreeBlob.new(dir_name, self)
209
+ end
210
+ else
211
+ if create_missing_blob
212
+ # Create the new blob directory.
213
+ Dir.mkdir(dir_name)
214
+ # And initialize the blob DB.
215
+ return BTreeBlob.new(dir_name, self)
216
+ else
217
+ return nil
218
+ end
219
+ end
220
+
221
+ # Discard the least significant @dir_bits bits and start over again
222
+ # with the directory that matches the @dir_bits LSBs of the new ID.
223
+ id = id >> @dir_bits
224
+ end
225
+ end
226
+
227
+ def each_blob(&block)
228
+ each_blob_r(@db_dir, &block)
229
+ end
230
+
231
+ def each_blob_r(dir, &block)
232
+ Dir.glob(File.join(dir, '*')) do |dir_name|
233
+ if is_blob_dir?(dir_name)
234
+ block.call(BTreeBlob.new(dir_name, self))
235
+ else
236
+ each_blob_r(dir_name, &block)
237
+ end
238
+ end
239
+ end
240
+
241
+ def is_blob_dir?(dir_name)
242
+ # A blob directory contains an 'index' and 'data' file. This is in
243
+ # contrast to BTree node directories that only contain other
244
+ # directories.
245
+ index_file = File.join(dir_name, 'index')
246
+ File.exists?(index_file)
247
+ end
248
+
249
+ end
250
+
251
+ end
252
+