perobs 0.0.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +7 -9
- data/README.md +0 -4
- data/lib/perobs/Array.rb +6 -0
- data/lib/perobs/BTreeBlob.rb +327 -0
- data/lib/perobs/BTreeDB.rb +252 -0
- data/lib/perobs/ClassMap.rb +125 -0
- data/lib/perobs/DataBase.rb +21 -2
- data/lib/perobs/Hash.rb +8 -0
- data/lib/perobs/Object.rb +11 -0
- data/lib/perobs/ObjectBase.rb +12 -2
- data/lib/perobs/Store.rb +21 -7
- data/lib/perobs/version.rb +1 -1
- data/spec/{FileSystemDB_spec.rb → BTreeDB_spec.rb} +53 -9
- data/spec/ClassMap_spec.rb +70 -0
- data/spec/Store_spec.rb +43 -2
- metadata +9 -7
- data/lib/perobs/BlockDB.rb +0 -242
- data/lib/perobs/FileSystemDB.rb +0 -171
- data/lib/perobs/HashedBlocksDB.rb +0 -153
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 059ab4702a14c6ff6328881c528eda77d321de51
|
4
|
+
data.tar.gz: 5a8944222bf1feccb2eda749f05d963598e3b945
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eeab29c68225efd8efbfb6a94b14c708fac38e16c6c1a399b55941e11e7dab83ecea925382180b803a7a152b71509eef30c50e40e568d5c9900c0dea1eec1d7f
|
7
|
+
data.tar.gz: 993b3ff327426b2797e4696a43ad0a0483d45a22638241fe34409b8fe4bbdddbe5a27adb0f06b088ae5f45638515fe11aa0b4f1baf6079afbfde4d7c06188ea5
|
data/.gitignore
CHANGED
data/README.md
CHANGED
data/lib/perobs/Array.rb
CHANGED
@@ -0,0 +1,327 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BTreeBlob.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
|
29
|
+
module PEROBS
|
30
|
+
|
31
|
+
# This class manages the usage of the data blobs in the corresponding
|
32
|
+
# HashedBlobsDB object.
|
33
|
+
class BTreeBlob
|
34
|
+
|
35
|
+
# For performance reasons we use an Array for the entries instead of a
|
36
|
+
# Hash. These constants specify the Array index for the corresponding
|
37
|
+
# value.
|
38
|
+
ID = 0
|
39
|
+
# Number of bytes
|
40
|
+
BYTES = 1
|
41
|
+
# Start Address
|
42
|
+
START = 2
|
43
|
+
# Mark/Unmarked flag
|
44
|
+
MARKED = 3
|
45
|
+
|
46
|
+
# Create a new BTreeBlob object.
|
47
|
+
# @param dir [String] Fully qualified directory name
|
48
|
+
# @param btreedb [BTreeDB] Reference to the DB that owns this blob
|
49
|
+
def initialize(dir, btreedb)
|
50
|
+
@dir = dir
|
51
|
+
@btreedb = btreedb
|
52
|
+
|
53
|
+
@index_file_name = File.join(dir, 'index')
|
54
|
+
@blobs_file_name = File.join(dir, 'data')
|
55
|
+
read_index
|
56
|
+
end
|
57
|
+
|
58
|
+
# Write the given bytes with the given ID into the DB.
|
59
|
+
# @param id [Fixnum or Bignum] ID
|
60
|
+
# @param raw [String] sequence of bytes
|
61
|
+
def write_object(id, raw)
|
62
|
+
if @entries.length > @btreedb.max_blob_size
|
63
|
+
# The blob has reached the maximum size. Replace the blob with a BTree
|
64
|
+
# node directory and distribute the blob entires into the sub-blobs of
|
65
|
+
# the new BTree node.
|
66
|
+
split_blob
|
67
|
+
# Insert the passed object into the newly created BTree node.
|
68
|
+
@btreedb.put_raw_object(raw, id)
|
69
|
+
else
|
70
|
+
bytes = raw.bytesize
|
71
|
+
start_address = reserve_bytes(id, bytes)
|
72
|
+
if write_to_blobs_file(raw, start_address) != bytes
|
73
|
+
raise RuntimeError, 'Object length does not match written bytes'
|
74
|
+
end
|
75
|
+
write_index
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Read the entry for the given ID and return it as bytes.
|
80
|
+
# @param id [Fixnum or Bignum] ID
|
81
|
+
# @return [String] sequence of bytes or nil if ID is unknown
|
82
|
+
def read_object(id)
|
83
|
+
return nil unless (bytes_and_start = find(id))
|
84
|
+
read_from_blobs_file(*bytes_and_start)
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Find the data for the object with given id.
|
89
|
+
# @param id [Fixnum or Bignum] Object ID
|
90
|
+
# @return [Array] Returns an Array with two Fixnum entries. The first is
|
91
|
+
# the number of bytes and the second is the starting offset in the
|
92
|
+
# blob storage file.
|
93
|
+
def find(id)
|
94
|
+
if (entry = @entries_by_id[id])
|
95
|
+
return [ entry[BYTES], entry[START] ]
|
96
|
+
end
|
97
|
+
|
98
|
+
nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Clear the mark on all entries in the index.
|
102
|
+
def clear_marks
|
103
|
+
@entries.each { |e| e[MARKED] = 0 }
|
104
|
+
write_index
|
105
|
+
end
|
106
|
+
|
107
|
+
# Set a mark on the entry with the given ID.
|
108
|
+
# @param id [Fixnum or Bignum] ID of the entry
|
109
|
+
def mark(id)
|
110
|
+
found = false
|
111
|
+
@entries.each do |entry|
|
112
|
+
if entry[ID] == id
|
113
|
+
entry[MARKED] = 1
|
114
|
+
found = true
|
115
|
+
break
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
unless found
|
120
|
+
raise ArgumentError,
|
121
|
+
"Cannot find an entry for ID #{'%016X' % id} to mark"
|
122
|
+
end
|
123
|
+
|
124
|
+
write_index
|
125
|
+
end
|
126
|
+
|
127
|
+
# Check if the entry for a given ID is marked.
|
128
|
+
# @param id [Fixnum or Bignum] ID of the entry
|
129
|
+
# @return [TrueClass or FalseClass] true if marked, false otherwise
|
130
|
+
def is_marked?(id)
|
131
|
+
@entries.each do |entry|
|
132
|
+
return entry[MARKED] != 0 if entry[ID] == id
|
133
|
+
end
|
134
|
+
|
135
|
+
raise ArgumentError,
|
136
|
+
"Cannot find an entry for ID #{'%016X' % id} to check"
|
137
|
+
end
|
138
|
+
|
139
|
+
# Remove all entries from the index that have not been marked.
|
140
|
+
def delete_unmarked_entries
|
141
|
+
# First remove the entry from the hash table.
|
142
|
+
@entries_by_id.delete_if { |id, e| e[MARKED] == 0 }
|
143
|
+
# Then delete the entry itself.
|
144
|
+
@entries.delete_if { |e| e[MARKED] == 0 }
|
145
|
+
write_index
|
146
|
+
end
|
147
|
+
|
148
|
+
# Run a basic consistency check.
|
149
|
+
# @param repair [TrueClass/FalseClass] Not used right now
|
150
|
+
# @return [TrueClass/FalseClass] Always true right now
|
151
|
+
def check(repair = false)
|
152
|
+
# Determine size of the data blobs file.
|
153
|
+
data_file_size = File.exists?(@blobs_file_name) ?
|
154
|
+
File.size(@blobs_file_name) : 0
|
155
|
+
|
156
|
+
next_start = 0
|
157
|
+
prev_entry = nil
|
158
|
+
@entries.each do |entry|
|
159
|
+
# Entries should never overlap
|
160
|
+
if prev_entry && next_start > entry[START]
|
161
|
+
raise RuntimeError,
|
162
|
+
"#{@dir}: Index entries are overlapping\n" +
|
163
|
+
"ID: #{'%016X' % prev_entry[ID]} " +
|
164
|
+
"Start: #{prev_entry[START]} " +
|
165
|
+
"Bytes: #{prev_entry[BYTES]}\n" +
|
166
|
+
"ID: #{'%016X' % entry[ID]} Start: #{entry[START]} " +
|
167
|
+
"Bytes: #{entry[BYTES]}"
|
168
|
+
end
|
169
|
+
next_start = entry[START] + entry[BYTES]
|
170
|
+
|
171
|
+
# Entries must fit within the data file
|
172
|
+
if next_start > data_file_size
|
173
|
+
raise RuntimeError,
|
174
|
+
"#{@dir}: Entry for ID #{'%016X' % entry[ID]} " +
|
175
|
+
"goes beyond 'data' file " +
|
176
|
+
"size (#{data_file_size})\n" +
|
177
|
+
"ID: #{'%016X' % entry[ID]} Start: #{entry[START]} " +
|
178
|
+
"Bytes: #{entry[BYTES]}"
|
179
|
+
end
|
180
|
+
|
181
|
+
prev_entry = entry
|
182
|
+
end
|
183
|
+
|
184
|
+
true
|
185
|
+
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
189
|
+
# Write a string of bytes into the file at the given address.
|
190
|
+
# @param raw [String] bytes to write
|
191
|
+
# @param address [Fixnum] offset in the file
|
192
|
+
# @return [Fixnum] number of bytes written
|
193
|
+
def write_to_blobs_file(raw, address)
|
194
|
+
begin
|
195
|
+
File.write(@blobs_file_name, raw, address)
|
196
|
+
rescue => e
|
197
|
+
raise IOError,
|
198
|
+
"Cannot write blobs file #{@blobs_file_name}: #{e.message}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# Read _bytes_ bytes from the file starting at offset _address_.
|
203
|
+
# @param bytes [Fixnum] number of bytes to read
|
204
|
+
# @param address [Fixnum] offset in the file
|
205
|
+
def read_from_blobs_file(bytes, address)
|
206
|
+
begin
|
207
|
+
File.read(@blobs_file_name, bytes, address)
|
208
|
+
rescue => e
|
209
|
+
raise IOError,
|
210
|
+
"Cannot read blobs file #{@blobs_file_name}: #{e.message}"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# Reserve the bytes needed for the specified number of bytes with the
|
215
|
+
# given ID.
|
216
|
+
# @param id [Fixnum or Bignum] ID of the entry
|
217
|
+
# @param bytes [Fixnum] number of bytes for this entry
|
218
|
+
# @return [Fixnum] the start address of the reserved blob
|
219
|
+
def reserve_bytes(id, bytes)
|
220
|
+
# index of first blob after the last seen entry
|
221
|
+
end_of_last_entry = 0
|
222
|
+
# blob index of best fit segment
|
223
|
+
best_fit_start = nil
|
224
|
+
# best fir segment size in bytes
|
225
|
+
best_fit_bytes = nil
|
226
|
+
# Index where to insert the new entry. Append by default.
|
227
|
+
best_fit_index = -1
|
228
|
+
# If there is already an entry for an object with the _id_, we mark it
|
229
|
+
# for deletion.
|
230
|
+
entry_to_delete = nil
|
231
|
+
|
232
|
+
@entries.each.with_index do |entry, i|
|
233
|
+
if entry[ID] == id
|
234
|
+
# We've found an old entry for this ID. Mark it for deletion.
|
235
|
+
entry_to_delete = entry
|
236
|
+
next
|
237
|
+
end
|
238
|
+
|
239
|
+
gap = entry[START] - end_of_last_entry
|
240
|
+
if gap >= bytes &&
|
241
|
+
(best_fit_bytes.nil? || gap < best_fit_bytes)
|
242
|
+
# We've found a segment that fits the requested bytes and fits
|
243
|
+
# better than any previous find.
|
244
|
+
best_fit_start = end_of_last_entry
|
245
|
+
best_fit_bytes = gap
|
246
|
+
# The old entry gets deleted before the new one gets inserted. We
|
247
|
+
# need to correct the index appropriately.
|
248
|
+
best_fit_index = i - (entry_to_delete ? 1 : 0)
|
249
|
+
end
|
250
|
+
end_of_last_entry = entry[START] + entry[BYTES]
|
251
|
+
end
|
252
|
+
|
253
|
+
# Delete the old entry if requested.
|
254
|
+
@entries.delete(entry_to_delete) if entry_to_delete
|
255
|
+
|
256
|
+
# Create a new entry and insert it. The order must match the above
|
257
|
+
# defined constants!
|
258
|
+
entry = [ id, bytes, best_fit_start || end_of_last_entry, 0 ]
|
259
|
+
@entries.insert(best_fit_index, entry)
|
260
|
+
@entries_by_id[id] = entry
|
261
|
+
|
262
|
+
entry[START]
|
263
|
+
end
|
264
|
+
|
265
|
+
def read_index
|
266
|
+
# The entries are stored in two data structures to provide the fastest
|
267
|
+
# access mechanism for each situation. The Array @entries stores them in
|
268
|
+
# a plan Array. @entries_by_id stores them hashed by their ID.
|
269
|
+
@entries = []
|
270
|
+
@entries_by_id = {}
|
271
|
+
if File.exists?(@index_file_name)
|
272
|
+
begin
|
273
|
+
File.open(@index_file_name, 'rb') do |f|
|
274
|
+
# The index is a binary format. Each entry has exactly 25 bytes.
|
275
|
+
# Bytes
|
276
|
+
# 0 - 7 : 64 bits, little endian : ID
|
277
|
+
# 8 - 15 : 64 bits, little endian : Entry length in bytes
|
278
|
+
# 16 - 23 : 64 bits, little endian : Start address in data file
|
279
|
+
# 24 : 8 bits : 0 if unmarked, 1 if marked
|
280
|
+
while (bytes = f.read(25))
|
281
|
+
@entries << (e = bytes.unpack('QQQC'))
|
282
|
+
@entries_by_id[e[ID]] = e
|
283
|
+
end
|
284
|
+
end
|
285
|
+
rescue => e
|
286
|
+
raise RuntimeError,
|
287
|
+
"BTreeBlob file #{@index_file_name} corrupted: #{e.message}"
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def write_index
|
293
|
+
begin
|
294
|
+
File.open(@index_file_name, 'wb') do |f|
|
295
|
+
# See read_index for data format documentation.
|
296
|
+
@entries.each do |entry|
|
297
|
+
f.write(entry.pack('QQQC'))
|
298
|
+
end
|
299
|
+
end
|
300
|
+
rescue => e
|
301
|
+
raise RuntimeError,
|
302
|
+
"Cannot write BTreeBlob index file #{@index_file_name}: " +
|
303
|
+
e.message
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
def split_blob
|
308
|
+
# Rename the index file to hide the blob file from the DB.
|
309
|
+
File.rename(@index_file_name, @index_file_name + '.bak')
|
310
|
+
|
311
|
+
# Read all entries from the blob and re-store them into the DB. We've
|
312
|
+
# already created the new BTree node, so these entries will be
|
313
|
+
# distributed into new leaf blobs of this new node.
|
314
|
+
@entries.each do |entry|
|
315
|
+
raw = read_from_blobs_file(entry[BYTES], entry[START])
|
316
|
+
@btreedb.put_raw_object(raw, entry[ID])
|
317
|
+
end
|
318
|
+
|
319
|
+
# Once the entries are re-stored, we can delete the old blob files.
|
320
|
+
File.delete(@index_file_name + '.bak')
|
321
|
+
File.delete(@blobs_file_name)
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|
325
|
+
|
326
|
+
end
|
327
|
+
|
@@ -0,0 +1,252 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BTreeBlob.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/DataBase'
|
29
|
+
require 'perobs/BTreeBlob'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# This class implements a BTree database using filesystem directories as
|
34
|
+
# nodes and blob files as leafs. The BTree grows with the number of stored
|
35
|
+
# entries. Each leaf node blob can hold a fixed number of entries. If more
|
36
|
+
# entries need to be stored, the blob is replaced by a node with multiple
|
37
|
+
# new leafs that store the entries of the previous node. The leafs are
|
38
|
+
# implemented by the BTreeBlob class.
|
39
|
+
class BTreeDB < DataBase
|
40
|
+
|
41
|
+
attr_reader :max_blob_size
|
42
|
+
|
43
|
+
# Create a new BTreeDB object.
|
44
|
+
# @param db_name [String] name of the DB directory
|
45
|
+
# @param options [Hash] options to customize the behavior. Currently only
|
46
|
+
# the following options are supported:
|
47
|
+
# :serializer : Can be :marshal, :json, :yaml
|
48
|
+
# :dir_bits : The number of bits to use for the BTree nodes.
|
49
|
+
# The value must be between 4 and 14. The larger
|
50
|
+
# the number the more back-end directories are
|
51
|
+
# being used. The default is 12 which results in
|
52
|
+
# 4096 directories per node.
|
53
|
+
# :max_blob_size : The maximum number of entries in the BTree leaf
|
54
|
+
# nodes. The insert/find/delete time grows
|
55
|
+
# linearly with the size.
|
56
|
+
def initialize(db_name, options = {})
|
57
|
+
super(options[:serializer] || :json)
|
58
|
+
|
59
|
+
@db_dir = db_name
|
60
|
+
# Create the database directory if it doesn't exist yet.
|
61
|
+
ensure_dir_exists(@db_dir)
|
62
|
+
|
63
|
+
# Read the existing DB config.
|
64
|
+
@config = get_hash('config')
|
65
|
+
check_option('serializer')
|
66
|
+
|
67
|
+
# Check and set @dir_bits, the number of bits used for each tree level.
|
68
|
+
@dir_bits = options[:dir_bits] || 12
|
69
|
+
if @dir_bits < 4 || @dir_bits > 14
|
70
|
+
raise ArgumentError,
|
71
|
+
"dir_bits option (#{@dir_bits}) must be between 4 and 12"
|
72
|
+
end
|
73
|
+
check_option('dir_bits')
|
74
|
+
|
75
|
+
@max_blob_size = options[:max_blob_size] || 32
|
76
|
+
if @max_blob_size < 4 || @max_blob_size > 128
|
77
|
+
raise ArgumentError,
|
78
|
+
"max_blob_size option (#{@max_blob_size}) must be between 4 and 128"
|
79
|
+
end
|
80
|
+
check_option('max_blob_size')
|
81
|
+
|
82
|
+
put_hash('config', @config)
|
83
|
+
|
84
|
+
# This format string is used to create the directory name.
|
85
|
+
@dir_format_string = "%0#{(@dir_bits / 4) +
|
86
|
+
(@dir_bits % 4 == 0 ? 0 : 1)}X"
|
87
|
+
# Bit mask to extract the dir_bits LSBs.
|
88
|
+
@dir_mask = 2 ** @dir_bits - 1
|
89
|
+
end
|
90
|
+
|
91
|
+
# Return true if the object with given ID exists
|
92
|
+
# @param id [Fixnum or Bignum]
|
93
|
+
def include?(id)
|
94
|
+
(blob = find_blob(id)) && blob.find(id)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Store a simple Hash as a JSON encoded file into the DB directory.
|
98
|
+
# @param name [String] Name of the hash. Will be used as file name.
|
99
|
+
# @param hash [Hash] A Hash that maps String objects to strings or
|
100
|
+
# numbers.
|
101
|
+
def put_hash(name, hash)
|
102
|
+
file_name = File.join(@db_dir, name + '.json')
|
103
|
+
begin
|
104
|
+
File.write(file_name, hash.to_json)
|
105
|
+
rescue => e
|
106
|
+
raise RuntimeError,
|
107
|
+
"Cannot write hash file '#{file_name}': #{e.message}"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Load the Hash with the given name.
|
112
|
+
# @param name [String] Name of the hash.
|
113
|
+
# @return [Hash] A Hash that maps String objects to strings or numbers.
|
114
|
+
def get_hash(name)
|
115
|
+
file_name = File.join(@db_dir, name + '.json')
|
116
|
+
return ::Hash.new unless File.exists?(file_name)
|
117
|
+
|
118
|
+
begin
|
119
|
+
json = File.read(file_name)
|
120
|
+
rescue => e
|
121
|
+
raise RuntimeError,
|
122
|
+
"Cannot read hash file '#{file_name}': #{e.message}"
|
123
|
+
end
|
124
|
+
JSON.parse(json, :create_additions => true)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Store the given object into the cluster files.
|
128
|
+
# @param obj [Hash] Object as defined by PEROBS::ObjectBase
|
129
|
+
def put_object(obj, id)
|
130
|
+
find_blob(id, true).write_object(id, serialize(obj))
|
131
|
+
end
|
132
|
+
|
133
|
+
# Load the given object from the filesystem.
|
134
|
+
# @param id [Fixnum or Bignum] object ID
|
135
|
+
# @return [Hash] Object as defined by PEROBS::ObjectBase or nil if ID does
|
136
|
+
# not exist
|
137
|
+
def get_object(id)
|
138
|
+
return nil unless (blob = find_blob(id)) && (obj = blob.read_object(id))
|
139
|
+
deserialize(obj)
|
140
|
+
end
|
141
|
+
|
142
|
+
# This method must be called to initiate the marking process.
|
143
|
+
def clear_marks
|
144
|
+
each_blob { |blob| blob.clear_marks }
|
145
|
+
end
|
146
|
+
|
147
|
+
# Permanently delete all objects that have not been marked. Those are
|
148
|
+
# orphaned and are no longer referenced by any actively used object.
|
149
|
+
def delete_unmarked_objects
|
150
|
+
each_blob { |blob| blob.delete_unmarked_entries }
|
151
|
+
end
|
152
|
+
|
153
|
+
# Mark an object.
|
154
|
+
# @param id [Fixnum or Bignum] ID of the object to mark
|
155
|
+
def mark(id)
|
156
|
+
(blob = find_blob(id)) && blob.mark(id)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Check if the object is marked.
|
160
|
+
# @param id [Fixnum or Bignum] ID of the object to check
|
161
|
+
def is_marked?(id)
|
162
|
+
(blob = find_blob(id)) && blob.is_marked?(id)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Basic consistency check.
|
166
|
+
# @param repair [TrueClass/FalseClass] True if found errors should be
|
167
|
+
# repaired.
|
168
|
+
def check_db(repair = false)
|
169
|
+
each_blob { |blob| blob.check(repair) }
|
170
|
+
end
|
171
|
+
|
172
|
+
# Check if the stored object is syntactically correct.
|
173
|
+
# @param id [Fixnum/Bignum] Object ID
|
174
|
+
# @param repair [TrueClass/FalseClass] True if an repair attempt should be
|
175
|
+
# made.
|
176
|
+
# @return [TrueClass/FalseClass] True if the object is OK, otherwise
|
177
|
+
# false.
|
178
|
+
def check(id, repair)
|
179
|
+
begin
|
180
|
+
get_object(id)
|
181
|
+
rescue => e
|
182
|
+
$stderr.puts "Cannot read object with ID #{id}: #{e.message}"
|
183
|
+
return false
|
184
|
+
end
|
185
|
+
|
186
|
+
true
|
187
|
+
end
|
188
|
+
|
189
|
+
# Store the given serialized object into the cluster files. This method is
|
190
|
+
# for internal use only!
|
191
|
+
# @param raw [String] Serialized Object as defined by PEROBS::ObjectBase
|
192
|
+
# @param id [Fixnum or Bignum] Object ID
|
193
|
+
def put_raw_object(raw, id)
|
194
|
+
find_blob(id, true).write_object(id, raw)
|
195
|
+
end
|
196
|
+
|
197
|
+
private
|
198
|
+
|
199
|
+
def find_blob(id, create_missing_blob = false)
|
200
|
+
dir_name = @db_dir
|
201
|
+
loop do
|
202
|
+
dir_bits = id & @dir_mask
|
203
|
+
dir_name = File.join(dir_name, @dir_format_string % dir_bits)
|
204
|
+
|
205
|
+
if Dir.exists?(dir_name)
|
206
|
+
if File.exists?(File.join(dir_name, 'index'))
|
207
|
+
# The directory is a blob directory and not a BTree node dir.
|
208
|
+
return BTreeBlob.new(dir_name, self)
|
209
|
+
end
|
210
|
+
else
|
211
|
+
if create_missing_blob
|
212
|
+
# Create the new blob directory.
|
213
|
+
Dir.mkdir(dir_name)
|
214
|
+
# And initialize the blob DB.
|
215
|
+
return BTreeBlob.new(dir_name, self)
|
216
|
+
else
|
217
|
+
return nil
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
# Discard the least significant @dir_bits bits and start over again
|
222
|
+
# with the directory that matches the @dir_bits LSBs of the new ID.
|
223
|
+
id = id >> @dir_bits
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def each_blob(&block)
|
228
|
+
each_blob_r(@db_dir, &block)
|
229
|
+
end
|
230
|
+
|
231
|
+
def each_blob_r(dir, &block)
|
232
|
+
Dir.glob(File.join(dir, '*')) do |dir_name|
|
233
|
+
if is_blob_dir?(dir_name)
|
234
|
+
block.call(BTreeBlob.new(dir_name, self))
|
235
|
+
else
|
236
|
+
each_blob_r(dir_name, &block)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def is_blob_dir?(dir_name)
|
242
|
+
# A blob directory contains an 'index' and 'data' file. This is in
|
243
|
+
# contrast to BTree node directories that only contain other
|
244
|
+
# directories.
|
245
|
+
index_file = File.join(dir_name, 'index')
|
246
|
+
File.exists?(index_file)
|
247
|
+
end
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
|