perobs 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9dd54b9f62dc6b5cc7129d25b9ba87e2d7aa3775
4
- data.tar.gz: 1431e7ec23c7bf2c18fa65b7c3e14b33bc696b2b
3
+ metadata.gz: 059ab4702a14c6ff6328881c528eda77d321de51
4
+ data.tar.gz: 5a8944222bf1feccb2eda749f05d963598e3b945
5
5
  SHA512:
6
- metadata.gz: dbf7166adf28acabef48594bb80721512f0b30156f66965e7077f6b4089e429c5d951b621c0c3322bc6c2a0b269e47042994dd9449d75cb00858e0d2a23bbbbc
7
- data.tar.gz: 73ae5cbfd48a5bc3a53194961398ec6a0ff57a4ac4ed606ba0e3ab1922fcba89cbc72cb90327e2256e2c9709a2a2707bdea8a8bd9124ff973531b800ffc2f304
6
+ metadata.gz: eeab29c68225efd8efbfb6a94b14c708fac38e16c6c1a399b55941e11e7dab83ecea925382180b803a7a152b71509eef30c50e40e568d5c9900c0dea1eec1d7f
7
+ data.tar.gz: 993b3ff327426b2797e4696a43ad0a0483d45a22638241fe34409b8fe4bbdddbe5a27adb0f06b088ae5f45638515fe11aa0b4f1baf6079afbfde4d7c06188ea5
data/.gitignore CHANGED
@@ -1,12 +1,10 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
1
+ .bundle/
2
+ .yardoc
3
+ Gemfile.lock
4
+ _yardoc/
5
+ coverage/
6
+ doc/
7
+ pkg/
10
8
  *.bundle
11
9
  *.so
12
10
  *.o
data/README.md CHANGED
@@ -100,10 +100,6 @@ Or install it yourself as:
100
100
 
101
101
  $ gem install perobs
102
102
 
103
- ## Usage
104
-
105
- TODO: Write usage instructions here
106
-
107
103
  ## Contributing
108
104
 
109
105
  1. Fork it ( https://github.com/scrapper/perobs/fork )
data/lib/perobs/Array.rb CHANGED
@@ -161,6 +161,12 @@ module PEROBS
161
161
  @data = data
162
162
  end
163
163
 
164
+ # Textual dump for debugging purposes
165
+ # @return [String]
166
+ def inspect
167
+ "[\n" + @data.map { |v| " #{v.inspect}" }.join(",\n") + "\n]\n"
168
+ end
169
+
164
170
  private
165
171
 
166
172
  def _serialize
@@ -0,0 +1,327 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BTreeBlob.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+
29
+ module PEROBS
30
+
31
+ # This class manages the usage of the data blobs in the corresponding
32
+ # HashedBlobsDB object.
33
+ class BTreeBlob
34
+
35
+ # For performance reasons we use an Array for the entries instead of a
36
+ # Hash. These constants specify the Array index for the corresponding
37
+ # value.
38
+ ID = 0
39
+ # Number of bytes
40
+ BYTES = 1
41
+ # Start Address
42
+ START = 2
43
+ # Mark/Unmarked flag
44
+ MARKED = 3
45
+
46
+ # Create a new BTreeBlob object.
47
+ # @param dir [String] Fully qualified directory name
48
+ # @param btreedb [BTreeDB] Reference to the DB that owns this blob
49
+ def initialize(dir, btreedb)
50
+ @dir = dir
51
+ @btreedb = btreedb
52
+
53
+ @index_file_name = File.join(dir, 'index')
54
+ @blobs_file_name = File.join(dir, 'data')
55
+ read_index
56
+ end
57
+
58
+ # Write the given bytes with the given ID into the DB.
59
+ # @param id [Fixnum or Bignum] ID
60
+ # @param raw [String] sequence of bytes
61
+ def write_object(id, raw)
62
+ if @entries.length > @btreedb.max_blob_size
63
+ # The blob has reached the maximum size. Replace the blob with a BTree
64
+ # node directory and distribute the blob entires into the sub-blobs of
65
+ # the new BTree node.
66
+ split_blob
67
+ # Insert the passed object into the newly created BTree node.
68
+ @btreedb.put_raw_object(raw, id)
69
+ else
70
+ bytes = raw.bytesize
71
+ start_address = reserve_bytes(id, bytes)
72
+ if write_to_blobs_file(raw, start_address) != bytes
73
+ raise RuntimeError, 'Object length does not match written bytes'
74
+ end
75
+ write_index
76
+ end
77
+ end
78
+
79
+ # Read the entry for the given ID and return it as bytes.
80
+ # @param id [Fixnum or Bignum] ID
81
+ # @return [String] sequence of bytes or nil if ID is unknown
82
+ def read_object(id)
83
+ return nil unless (bytes_and_start = find(id))
84
+ read_from_blobs_file(*bytes_and_start)
85
+ end
86
+
87
+
88
+ # Find the data for the object with given id.
89
+ # @param id [Fixnum or Bignum] Object ID
90
+ # @return [Array] Returns an Array with two Fixnum entries. The first is
91
+ # the number of bytes and the second is the starting offset in the
92
+ # blob storage file.
93
+ def find(id)
94
+ if (entry = @entries_by_id[id])
95
+ return [ entry[BYTES], entry[START] ]
96
+ end
97
+
98
+ nil
99
+ end
100
+
101
+ # Clear the mark on all entries in the index.
102
+ def clear_marks
103
+ @entries.each { |e| e[MARKED] = 0 }
104
+ write_index
105
+ end
106
+
107
+ # Set a mark on the entry with the given ID.
108
+ # @param id [Fixnum or Bignum] ID of the entry
109
+ def mark(id)
110
+ found = false
111
+ @entries.each do |entry|
112
+ if entry[ID] == id
113
+ entry[MARKED] = 1
114
+ found = true
115
+ break
116
+ end
117
+ end
118
+
119
+ unless found
120
+ raise ArgumentError,
121
+ "Cannot find an entry for ID #{'%016X' % id} to mark"
122
+ end
123
+
124
+ write_index
125
+ end
126
+
127
+ # Check if the entry for a given ID is marked.
128
+ # @param id [Fixnum or Bignum] ID of the entry
129
+ # @return [TrueClass or FalseClass] true if marked, false otherwise
130
+ def is_marked?(id)
131
+ @entries.each do |entry|
132
+ return entry[MARKED] != 0 if entry[ID] == id
133
+ end
134
+
135
+ raise ArgumentError,
136
+ "Cannot find an entry for ID #{'%016X' % id} to check"
137
+ end
138
+
139
+ # Remove all entries from the index that have not been marked.
140
+ def delete_unmarked_entries
141
+ # First remove the entry from the hash table.
142
+ @entries_by_id.delete_if { |id, e| e[MARKED] == 0 }
143
+ # Then delete the entry itself.
144
+ @entries.delete_if { |e| e[MARKED] == 0 }
145
+ write_index
146
+ end
147
+
148
+ # Run a basic consistency check.
149
+ # @param repair [TrueClass/FalseClass] Not used right now
150
+ # @return [TrueClass/FalseClass] Always true right now
151
+ def check(repair = false)
152
+ # Determine size of the data blobs file.
153
+ data_file_size = File.exists?(@blobs_file_name) ?
154
+ File.size(@blobs_file_name) : 0
155
+
156
+ next_start = 0
157
+ prev_entry = nil
158
+ @entries.each do |entry|
159
+ # Entries should never overlap
160
+ if prev_entry && next_start > entry[START]
161
+ raise RuntimeError,
162
+ "#{@dir}: Index entries are overlapping\n" +
163
+ "ID: #{'%016X' % prev_entry[ID]} " +
164
+ "Start: #{prev_entry[START]} " +
165
+ "Bytes: #{prev_entry[BYTES]}\n" +
166
+ "ID: #{'%016X' % entry[ID]} Start: #{entry[START]} " +
167
+ "Bytes: #{entry[BYTES]}"
168
+ end
169
+ next_start = entry[START] + entry[BYTES]
170
+
171
+ # Entries must fit within the data file
172
+ if next_start > data_file_size
173
+ raise RuntimeError,
174
+ "#{@dir}: Entry for ID #{'%016X' % entry[ID]} " +
175
+ "goes beyond 'data' file " +
176
+ "size (#{data_file_size})\n" +
177
+ "ID: #{'%016X' % entry[ID]} Start: #{entry[START]} " +
178
+ "Bytes: #{entry[BYTES]}"
179
+ end
180
+
181
+ prev_entry = entry
182
+ end
183
+
184
+ true
185
+ end
186
+
187
+ private
188
+
189
+ # Write a string of bytes into the file at the given address.
190
+ # @param raw [String] bytes to write
191
+ # @param address [Fixnum] offset in the file
192
+ # @return [Fixnum] number of bytes written
193
+ def write_to_blobs_file(raw, address)
194
+ begin
195
+ File.write(@blobs_file_name, raw, address)
196
+ rescue => e
197
+ raise IOError,
198
+ "Cannot write blobs file #{@blobs_file_name}: #{e.message}"
199
+ end
200
+ end
201
+
202
+ # Read _bytes_ bytes from the file starting at offset _address_.
203
+ # @param bytes [Fixnum] number of bytes to read
204
+ # @param address [Fixnum] offset in the file
205
+ def read_from_blobs_file(bytes, address)
206
+ begin
207
+ File.read(@blobs_file_name, bytes, address)
208
+ rescue => e
209
+ raise IOError,
210
+ "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
211
+ end
212
+ end
213
+
214
+ # Reserve the bytes needed for the specified number of bytes with the
215
+ # given ID.
216
+ # @param id [Fixnum or Bignum] ID of the entry
217
+ # @param bytes [Fixnum] number of bytes for this entry
218
+ # @return [Fixnum] the start address of the reserved blob
219
+ def reserve_bytes(id, bytes)
220
+ # index of first blob after the last seen entry
221
+ end_of_last_entry = 0
222
+ # blob index of best fit segment
223
+ best_fit_start = nil
224
+ # best fir segment size in bytes
225
+ best_fit_bytes = nil
226
+ # Index where to insert the new entry. Append by default.
227
+ best_fit_index = -1
228
+ # If there is already an entry for an object with the _id_, we mark it
229
+ # for deletion.
230
+ entry_to_delete = nil
231
+
232
+ @entries.each.with_index do |entry, i|
233
+ if entry[ID] == id
234
+ # We've found an old entry for this ID. Mark it for deletion.
235
+ entry_to_delete = entry
236
+ next
237
+ end
238
+
239
+ gap = entry[START] - end_of_last_entry
240
+ if gap >= bytes &&
241
+ (best_fit_bytes.nil? || gap < best_fit_bytes)
242
+ # We've found a segment that fits the requested bytes and fits
243
+ # better than any previous find.
244
+ best_fit_start = end_of_last_entry
245
+ best_fit_bytes = gap
246
+ # The old entry gets deleted before the new one gets inserted. We
247
+ # need to correct the index appropriately.
248
+ best_fit_index = i - (entry_to_delete ? 1 : 0)
249
+ end
250
+ end_of_last_entry = entry[START] + entry[BYTES]
251
+ end
252
+
253
+ # Delete the old entry if requested.
254
+ @entries.delete(entry_to_delete) if entry_to_delete
255
+
256
+ # Create a new entry and insert it. The order must match the above
257
+ # defined constants!
258
+ entry = [ id, bytes, best_fit_start || end_of_last_entry, 0 ]
259
+ @entries.insert(best_fit_index, entry)
260
+ @entries_by_id[id] = entry
261
+
262
+ entry[START]
263
+ end
264
+
265
+ def read_index
266
+ # The entries are stored in two data structures to provide the fastest
267
+ # access mechanism for each situation. The Array @entries stores them in
268
+ # a plan Array. @entries_by_id stores them hashed by their ID.
269
+ @entries = []
270
+ @entries_by_id = {}
271
+ if File.exists?(@index_file_name)
272
+ begin
273
+ File.open(@index_file_name, 'rb') do |f|
274
+ # The index is a binary format. Each entry has exactly 25 bytes.
275
+ # Bytes
276
+ # 0 - 7 : 64 bits, little endian : ID
277
+ # 8 - 15 : 64 bits, little endian : Entry length in bytes
278
+ # 16 - 23 : 64 bits, little endian : Start address in data file
279
+ # 24 : 8 bits : 0 if unmarked, 1 if marked
280
+ while (bytes = f.read(25))
281
+ @entries << (e = bytes.unpack('QQQC'))
282
+ @entries_by_id[e[ID]] = e
283
+ end
284
+ end
285
+ rescue => e
286
+ raise RuntimeError,
287
+ "BTreeBlob file #{@index_file_name} corrupted: #{e.message}"
288
+ end
289
+ end
290
+ end
291
+
292
+ def write_index
293
+ begin
294
+ File.open(@index_file_name, 'wb') do |f|
295
+ # See read_index for data format documentation.
296
+ @entries.each do |entry|
297
+ f.write(entry.pack('QQQC'))
298
+ end
299
+ end
300
+ rescue => e
301
+ raise RuntimeError,
302
+ "Cannot write BTreeBlob index file #{@index_file_name}: " +
303
+ e.message
304
+ end
305
+ end
306
+
307
+ def split_blob
308
+ # Rename the index file to hide the blob file from the DB.
309
+ File.rename(@index_file_name, @index_file_name + '.bak')
310
+
311
+ # Read all entries from the blob and re-store them into the DB. We've
312
+ # already created the new BTree node, so these entries will be
313
+ # distributed into new leaf blobs of this new node.
314
+ @entries.each do |entry|
315
+ raw = read_from_blobs_file(entry[BYTES], entry[START])
316
+ @btreedb.put_raw_object(raw, entry[ID])
317
+ end
318
+
319
+ # Once the entries are re-stored, we can delete the old blob files.
320
+ File.delete(@index_file_name + '.bak')
321
+ File.delete(@blobs_file_name)
322
+ end
323
+
324
+ end
325
+
326
+ end
327
+
@@ -0,0 +1,252 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BTreeBlob.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/DataBase'
29
+ require 'perobs/BTreeBlob'
30
+
31
+ module PEROBS
32
+
33
+ # This class implements a BTree database using filesystem directories as
34
+ # nodes and blob files as leafs. The BTree grows with the number of stored
35
+ # entries. Each leaf node blob can hold a fixed number of entries. If more
36
+ # entries need to be stored, the blob is replaced by a node with multiple
37
+ # new leafs that store the entries of the previous node. The leafs are
38
+ # implemented by the BTreeBlob class.
39
+ class BTreeDB < DataBase
40
+
41
+ attr_reader :max_blob_size
42
+
43
+ # Create a new BTreeDB object.
44
+ # @param db_name [String] name of the DB directory
45
+ # @param options [Hash] options to customize the behavior. Currently only
46
+ # the following options are supported:
47
+ # :serializer : Can be :marshal, :json, :yaml
48
+ # :dir_bits : The number of bits to use for the BTree nodes.
49
+ # The value must be between 4 and 14. The larger
50
+ # the number the more back-end directories are
51
+ # being used. The default is 12 which results in
52
+ # 4096 directories per node.
53
+ # :max_blob_size : The maximum number of entries in the BTree leaf
54
+ # nodes. The insert/find/delete time grows
55
+ # linearly with the size.
56
+ def initialize(db_name, options = {})
57
+ super(options[:serializer] || :json)
58
+
59
+ @db_dir = db_name
60
+ # Create the database directory if it doesn't exist yet.
61
+ ensure_dir_exists(@db_dir)
62
+
63
+ # Read the existing DB config.
64
+ @config = get_hash('config')
65
+ check_option('serializer')
66
+
67
+ # Check and set @dir_bits, the number of bits used for each tree level.
68
+ @dir_bits = options[:dir_bits] || 12
69
+ if @dir_bits < 4 || @dir_bits > 14
70
+ raise ArgumentError,
71
+ "dir_bits option (#{@dir_bits}) must be between 4 and 12"
72
+ end
73
+ check_option('dir_bits')
74
+
75
+ @max_blob_size = options[:max_blob_size] || 32
76
+ if @max_blob_size < 4 || @max_blob_size > 128
77
+ raise ArgumentError,
78
+ "max_blob_size option (#{@max_blob_size}) must be between 4 and 128"
79
+ end
80
+ check_option('max_blob_size')
81
+
82
+ put_hash('config', @config)
83
+
84
+ # This format string is used to create the directory name.
85
+ @dir_format_string = "%0#{(@dir_bits / 4) +
86
+ (@dir_bits % 4 == 0 ? 0 : 1)}X"
87
+ # Bit mask to extract the dir_bits LSBs.
88
+ @dir_mask = 2 ** @dir_bits - 1
89
+ end
90
+
91
+ # Return true if the object with given ID exists
92
+ # @param id [Fixnum or Bignum]
93
+ def include?(id)
94
+ (blob = find_blob(id)) && blob.find(id)
95
+ end
96
+
97
+ # Store a simple Hash as a JSON encoded file into the DB directory.
98
+ # @param name [String] Name of the hash. Will be used as file name.
99
+ # @param hash [Hash] A Hash that maps String objects to strings or
100
+ # numbers.
101
+ def put_hash(name, hash)
102
+ file_name = File.join(@db_dir, name + '.json')
103
+ begin
104
+ File.write(file_name, hash.to_json)
105
+ rescue => e
106
+ raise RuntimeError,
107
+ "Cannot write hash file '#{file_name}': #{e.message}"
108
+ end
109
+ end
110
+
111
+ # Load the Hash with the given name.
112
+ # @param name [String] Name of the hash.
113
+ # @return [Hash] A Hash that maps String objects to strings or numbers.
114
+ def get_hash(name)
115
+ file_name = File.join(@db_dir, name + '.json')
116
+ return ::Hash.new unless File.exists?(file_name)
117
+
118
+ begin
119
+ json = File.read(file_name)
120
+ rescue => e
121
+ raise RuntimeError,
122
+ "Cannot read hash file '#{file_name}': #{e.message}"
123
+ end
124
+ JSON.parse(json, :create_additions => true)
125
+ end
126
+
127
+ # Store the given object into the cluster files.
128
+ # @param obj [Hash] Object as defined by PEROBS::ObjectBase
129
+ def put_object(obj, id)
130
+ find_blob(id, true).write_object(id, serialize(obj))
131
+ end
132
+
133
+ # Load the given object from the filesystem.
134
+ # @param id [Fixnum or Bignum] object ID
135
+ # @return [Hash] Object as defined by PEROBS::ObjectBase or nil if ID does
136
+ # not exist
137
+ def get_object(id)
138
+ return nil unless (blob = find_blob(id)) && (obj = blob.read_object(id))
139
+ deserialize(obj)
140
+ end
141
+
142
+ # This method must be called to initiate the marking process.
143
+ def clear_marks
144
+ each_blob { |blob| blob.clear_marks }
145
+ end
146
+
147
+ # Permanently delete all objects that have not been marked. Those are
148
+ # orphaned and are no longer referenced by any actively used object.
149
+ def delete_unmarked_objects
150
+ each_blob { |blob| blob.delete_unmarked_entries }
151
+ end
152
+
153
+ # Mark an object.
154
+ # @param id [Fixnum or Bignum] ID of the object to mark
155
+ def mark(id)
156
+ (blob = find_blob(id)) && blob.mark(id)
157
+ end
158
+
159
+ # Check if the object is marked.
160
+ # @param id [Fixnum or Bignum] ID of the object to check
161
+ def is_marked?(id)
162
+ (blob = find_blob(id)) && blob.is_marked?(id)
163
+ end
164
+
165
+ # Basic consistency check.
166
+ # @param repair [TrueClass/FalseClass] True if found errors should be
167
+ # repaired.
168
+ def check_db(repair = false)
169
+ each_blob { |blob| blob.check(repair) }
170
+ end
171
+
172
+ # Check if the stored object is syntactically correct.
173
+ # @param id [Fixnum/Bignum] Object ID
174
+ # @param repair [TrueClass/FalseClass] True if an repair attempt should be
175
+ # made.
176
+ # @return [TrueClass/FalseClass] True if the object is OK, otherwise
177
+ # false.
178
+ def check(id, repair)
179
+ begin
180
+ get_object(id)
181
+ rescue => e
182
+ $stderr.puts "Cannot read object with ID #{id}: #{e.message}"
183
+ return false
184
+ end
185
+
186
+ true
187
+ end
188
+
189
+ # Store the given serialized object into the cluster files. This method is
190
+ # for internal use only!
191
+ # @param raw [String] Serialized Object as defined by PEROBS::ObjectBase
192
+ # @param id [Fixnum or Bignum] Object ID
193
+ def put_raw_object(raw, id)
194
+ find_blob(id, true).write_object(id, raw)
195
+ end
196
+
197
+ private
198
+
199
+ def find_blob(id, create_missing_blob = false)
200
+ dir_name = @db_dir
201
+ loop do
202
+ dir_bits = id & @dir_mask
203
+ dir_name = File.join(dir_name, @dir_format_string % dir_bits)
204
+
205
+ if Dir.exists?(dir_name)
206
+ if File.exists?(File.join(dir_name, 'index'))
207
+ # The directory is a blob directory and not a BTree node dir.
208
+ return BTreeBlob.new(dir_name, self)
209
+ end
210
+ else
211
+ if create_missing_blob
212
+ # Create the new blob directory.
213
+ Dir.mkdir(dir_name)
214
+ # And initialize the blob DB.
215
+ return BTreeBlob.new(dir_name, self)
216
+ else
217
+ return nil
218
+ end
219
+ end
220
+
221
+ # Discard the least significant @dir_bits bits and start over again
222
+ # with the directory that matches the @dir_bits LSBs of the new ID.
223
+ id = id >> @dir_bits
224
+ end
225
+ end
226
+
227
+ def each_blob(&block)
228
+ each_blob_r(@db_dir, &block)
229
+ end
230
+
231
+ def each_blob_r(dir, &block)
232
+ Dir.glob(File.join(dir, '*')) do |dir_name|
233
+ if is_blob_dir?(dir_name)
234
+ block.call(BTreeBlob.new(dir_name, self))
235
+ else
236
+ each_blob_r(dir_name, &block)
237
+ end
238
+ end
239
+ end
240
+
241
+ def is_blob_dir?(dir_name)
242
+ # A blob directory contains an 'index' and 'data' file. This is in
243
+ # contrast to BTree node directories that only contain other
244
+ # directories.
245
+ index_file = File.join(dir_name, 'index')
246
+ File.exists?(index_file)
247
+ end
248
+
249
+ end
250
+
251
+ end
252
+