perobs 2.4.2 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/perobs/FlatFile.rb +139 -123
- data/lib/perobs/FlatFileBlobHeader.rb +144 -0
- data/lib/perobs/FlatFileDB.rb +19 -4
- data/lib/perobs/IndexTreeNode.rb +2 -2
- data/lib/perobs/Store.rb +0 -1
- data/lib/perobs/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9df83ee37d61319185f94bcaf64a1f48083280fd
|
4
|
+
data.tar.gz: 130fbd021bfe32cad6b45e6b4063f0093552d8f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1e61da3e76e0bb4e965ca00c0d4b97105fe5a37f1272cc9a4b0d997d0f8a521547c22c3318ad0bc17eabf9059c54d7d616ad287f1d48743c9f126e17c66ec97
|
7
|
+
data.tar.gz: 0d86ab15a8ffdb18a1b8df4f27a5f2e0fc4f9619f040934ec7904b4e17bc453b383be424190951f2c2d6f1fcb309398913aba6111a360ba908a428f1629c7b67
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -28,36 +28,17 @@
|
|
28
28
|
require 'zlib'
|
29
29
|
|
30
30
|
require 'perobs/Log'
|
31
|
+
require 'perobs/FlatFileBlobHeader'
|
31
32
|
require 'perobs/IndexTree'
|
32
33
|
require 'perobs/FreeSpaceManager'
|
33
34
|
|
34
35
|
module PEROBS
|
35
36
|
|
36
37
|
# The FlatFile class manages the storage file of the FlatFileDB. It contains
|
37
|
-
# a sequence of blobs Each blob consists of
|
38
|
-
# blob data bytes.
|
39
|
-
#
|
40
|
-
# 1 Byte: Mark byte.
|
41
|
-
# Bit 0: 0 deleted entry, 1 valid entry
|
42
|
-
# Bit 1: 0 unmarked, 1 marked
|
43
|
-
# Bit 2 - 7: reserved, must be 0
|
44
|
-
# 8 bytes: Length of the data blob in bytes
|
45
|
-
# 8 bytes: ID of the value in the data blob
|
46
|
-
# 4 bytes: CRC32 checksum of the data blob
|
47
|
-
#
|
48
|
-
# If the bit 0 of the mark byte is 0, only the length is valid. The blob is
|
49
|
-
# empty. Only of bit 0 is set then entry is valid.
|
38
|
+
# a sequence of blobs Each blob consists of header and the actual
|
39
|
+
# blob data bytes.
|
50
40
|
class FlatFile
|
51
41
|
|
52
|
-
# Utility class to hold all the data that is stored in a blob header.
|
53
|
-
class Header < Struct.new(:mark, :length, :id, :crc)
|
54
|
-
end
|
55
|
-
|
56
|
-
# The 'pack()' format of the header.
|
57
|
-
BLOB_HEADER_FORMAT = 'CQQL'
|
58
|
-
# The length of the header in bytes.
|
59
|
-
BLOB_HEADER_LENGTH = 21
|
60
|
-
|
61
42
|
# Create a new FlatFile object for a database in the given path.
|
62
43
|
# @param dir [String] Directory path for the data base file
|
63
44
|
def initialize(dir)
|
@@ -125,13 +106,13 @@ module PEROBS
|
|
125
106
|
# @param id [Integer] ID of the blob to delete
|
126
107
|
def delete_obj_by_address(addr, id)
|
127
108
|
@index.delete_value(id)
|
128
|
-
header =
|
109
|
+
header = FlatFileBlobHeader.read_at(@f, addr, id)
|
129
110
|
begin
|
130
111
|
@f.seek(addr)
|
131
112
|
@f.write([ 0 ].pack('C'))
|
132
113
|
@f.flush
|
133
114
|
@space_list.add_space(addr, header.length)
|
134
|
-
rescue => e
|
115
|
+
rescue IOError => e
|
135
116
|
PEROBS.log.fatal "Cannot erase blob for ID #{header.id}: #{e.message}"
|
136
117
|
end
|
137
118
|
end
|
@@ -142,10 +123,10 @@ module PEROBS
|
|
142
123
|
t = Time.now
|
143
124
|
|
144
125
|
deleted_ids = []
|
145
|
-
each_blob_header do |pos,
|
146
|
-
if
|
147
|
-
delete_obj_by_address(pos,
|
148
|
-
deleted_ids <<
|
126
|
+
each_blob_header do |pos, header|
|
127
|
+
if header.is_valid? && !header.is_marked?
|
128
|
+
delete_obj_by_address(pos, header.id)
|
129
|
+
deleted_ids << header.id
|
149
130
|
end
|
150
131
|
end
|
151
132
|
defragmentize
|
@@ -161,11 +142,23 @@ module PEROBS
|
|
161
142
|
# @param raw_obj [String] Raw object as String
|
162
143
|
# @return [Integer] position of the written blob in the blob file
|
163
144
|
def write_obj_by_id(id, raw_obj)
|
145
|
+
crc = checksum(raw_obj)
|
146
|
+
|
147
|
+
# If the raw_obj is larger then 256 characters we will compress it to
|
148
|
+
# safe some space in the database file. For smaller strings the
|
149
|
+
# performance impact of compression is not compensated by writing
|
150
|
+
# less data to the storage.
|
151
|
+
compressed = false
|
152
|
+
if raw_obj.length > 256
|
153
|
+
raw_obj = Zlib.deflate(raw_obj)
|
154
|
+
compressed = true
|
155
|
+
end
|
156
|
+
|
164
157
|
addr, length = find_free_blob(raw_obj.length)
|
165
158
|
begin
|
166
159
|
if length != -1
|
167
160
|
# Just a safeguard so we don't overwrite current data.
|
168
|
-
header =
|
161
|
+
header = FlatFileBlobHeader.read_at(@f, addr)
|
169
162
|
if header.length != length
|
170
163
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
171
164
|
"(#{header.length}) don't match."
|
@@ -174,26 +167,26 @@ module PEROBS
|
|
174
167
|
PEROBS.log.fatal "Object (#{raw_obj.length}) is longer than " +
|
175
168
|
"blob space (#{header.length})."
|
176
169
|
end
|
177
|
-
if header.
|
178
|
-
PEROBS.log.fatal "
|
170
|
+
if header.is_valid?
|
171
|
+
PEROBS.log.fatal "Entry (mark: #{header.mark}) is already used."
|
179
172
|
end
|
180
173
|
end
|
181
174
|
@f.seek(addr)
|
182
|
-
|
183
|
-
|
175
|
+
FlatFileBlobHeader.new(compressed ? (1 << 2) | 1 : 1, raw_obj.length,
|
176
|
+
id, crc).write(@f)
|
184
177
|
@f.write(raw_obj)
|
185
178
|
if length != -1 && raw_obj.length < length
|
186
179
|
# The new object was not appended and it did not completely fill the
|
187
180
|
# free space. So we have to write a new header to mark the remaining
|
188
181
|
# empty space.
|
189
|
-
unless length - raw_obj.length >=
|
182
|
+
unless length - raw_obj.length >= FlatFileBlobHeader::LENGTH
|
190
183
|
PEROBS.log.fatal "Not enough space to append the empty space " +
|
191
184
|
"header (space: #{length} bytes, object: #{raw_obj.length} " +
|
192
185
|
"bytes)."
|
193
186
|
end
|
194
187
|
space_address = @f.pos
|
195
|
-
space_length = length -
|
196
|
-
|
188
|
+
space_length = length - FlatFileBlobHeader::LENGTH - raw_obj.length
|
189
|
+
FlatFileBlobHeader.new(0, space_length, 0, 0).write(@f)
|
197
190
|
# Register the new space with the space list.
|
198
191
|
@space_list.add_space(space_address, space_length) if space_length > 0
|
199
192
|
end
|
@@ -230,21 +223,31 @@ module PEROBS
|
|
230
223
|
# @param id [Integer] ID of the data blob
|
231
224
|
# @return [String] Raw object data
|
232
225
|
def read_obj_by_address(addr, id)
|
233
|
-
header =
|
226
|
+
header = FlatFileBlobHeader.read_at(@f, addr, id)
|
234
227
|
if header.id != id
|
235
228
|
PEROBS.log.fatal "Database index corrupted: Index for object " +
|
236
229
|
"#{id} points to object with ID #{header.id}"
|
237
230
|
end
|
231
|
+
|
232
|
+
buf = nil
|
233
|
+
|
238
234
|
begin
|
239
|
-
@f.seek(addr +
|
235
|
+
@f.seek(addr + FlatFileBlobHeader::LENGTH)
|
240
236
|
buf = @f.read(header.length)
|
241
|
-
|
242
|
-
PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
|
243
|
-
end
|
244
|
-
return buf
|
245
|
-
rescue => e
|
237
|
+
rescue IOError => e
|
246
238
|
PEROBS.log.fatal "Cannot read blob for ID #{id}: #{e.message}"
|
247
239
|
end
|
240
|
+
|
241
|
+
# Uncompress the data if the compression bit is set in the mark byte.
|
242
|
+
if header.is_compressed?
|
243
|
+
buf = Zlib.inflate(buf)
|
244
|
+
end
|
245
|
+
|
246
|
+
if checksum(buf) != header.crc
|
247
|
+
PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
|
248
|
+
end
|
249
|
+
|
250
|
+
buf
|
248
251
|
end
|
249
252
|
|
250
253
|
# Mark the object with the given ID.
|
@@ -259,12 +262,12 @@ module PEROBS
|
|
259
262
|
# @param addr [Integer] Offset in the file
|
260
263
|
# @param id [Integer] ID of the object
|
261
264
|
def mark_obj_by_address(addr, id)
|
262
|
-
header =
|
265
|
+
header = FlatFileBlobHeader.read_at(@f, addr, id)
|
263
266
|
begin
|
264
267
|
@f.seek(addr)
|
265
|
-
@f.write([ header.mark |
|
268
|
+
@f.write([ header.mark | (1 << 1) ].pack('C'))
|
266
269
|
@f.flush
|
267
|
-
rescue => e
|
270
|
+
rescue IOError => e
|
268
271
|
PEROBS.log.fatal "Marking of FlatFile blob with ID #{id} " +
|
269
272
|
"failed: #{e.message}"
|
270
273
|
end
|
@@ -274,8 +277,8 @@ module PEROBS
|
|
274
277
|
# @param id [Integer] ID of the object
|
275
278
|
def is_marked_by_id?(id)
|
276
279
|
if (addr = find_obj_addr_by_id(id))
|
277
|
-
header =
|
278
|
-
return
|
280
|
+
header = FlatFileBlobHeader.read_at(@f, addr, id)
|
281
|
+
return header.is_marked?
|
279
282
|
end
|
280
283
|
|
281
284
|
false
|
@@ -289,16 +292,16 @@ module PEROBS
|
|
289
292
|
total_blob_count = 0
|
290
293
|
marked_blob_count = 0
|
291
294
|
|
292
|
-
each_blob_header do |pos,
|
295
|
+
each_blob_header do |pos, header|
|
293
296
|
total_blob_count += 1
|
294
|
-
if
|
297
|
+
if header.is_valid? && header.is_marked?
|
295
298
|
# Clear all valid and marked blocks.
|
296
299
|
marked_blob_count += 1
|
297
300
|
begin
|
298
301
|
@f.seek(pos)
|
299
|
-
@f.write([ mark & 0b11111101 ].pack('C'))
|
302
|
+
@f.write([ header.mark & 0b11111101 ].pack('C'))
|
300
303
|
@f.flush
|
301
|
-
rescue => e
|
304
|
+
rescue IOError => e
|
302
305
|
PEROBS.log.fatal "Unmarking of FlatFile blob with ID #{blob_id} " +
|
303
306
|
"failed: #{e.message}"
|
304
307
|
end
|
@@ -317,10 +320,10 @@ module PEROBS
|
|
317
320
|
t = Time.now
|
318
321
|
PEROBS.log.info "Defragmenting FlatFile"
|
319
322
|
# Iterate over all entries.
|
320
|
-
each_blob_header do |pos,
|
323
|
+
each_blob_header do |pos, header|
|
321
324
|
# Total size of the current entry
|
322
|
-
entry_bytes =
|
323
|
-
if
|
325
|
+
entry_bytes = FlatFileBlobHeader::LENGTH + header.length
|
326
|
+
if header.is_valid?
|
324
327
|
# We have found a valid entry.
|
325
328
|
valid_blobs += 1
|
326
329
|
if distance > 0
|
@@ -332,14 +335,14 @@ module PEROBS
|
|
332
335
|
@f.seek(pos - distance)
|
333
336
|
@f.write(buf)
|
334
337
|
# Update the index with the new position
|
335
|
-
@index.put_value(
|
338
|
+
@index.put_value(header.id, pos - distance)
|
336
339
|
# Mark the space between the relocated current entry and the
|
337
340
|
# next valid entry as deleted space.
|
338
|
-
|
339
|
-
|
341
|
+
FlatFileBlobHeader.new(0, distance - FlatFileBlobHeader::LENGTH,
|
342
|
+
0, 0).write(@f)
|
340
343
|
@f.flush
|
341
|
-
rescue => e
|
342
|
-
PEROBS.log.fatal "Error while moving blob for ID #{
|
344
|
+
rescue IOError => e
|
345
|
+
PEROBS.log.fatal "Error while moving blob for ID #{header.id}: " +
|
343
346
|
e.message
|
344
347
|
end
|
345
348
|
end
|
@@ -361,6 +364,35 @@ module PEROBS
|
|
361
364
|
sync
|
362
365
|
end
|
363
366
|
|
367
|
+
# This method iterates over all entries in the FlatFile and removes the
|
368
|
+
# entry and inserts it again. This is useful to update all entries in
|
369
|
+
# cased the storage format has changed.
|
370
|
+
def refresh
|
371
|
+
# This iteration might look scary as we iterate over the entries while
|
372
|
+
# while we are rearranging them. Re-inserted items may be inserted
|
373
|
+
# before or at the current entry and this is fine. They also may be
|
374
|
+
# inserted after the current entry and will be re-read again unless they
|
375
|
+
# are inserted after the original file end.
|
376
|
+
file_size = @f.size
|
377
|
+
PEROBS.log.info "Refreshing the DB..."
|
378
|
+
t = Time.now
|
379
|
+
each_blob_header do |pos, header|
|
380
|
+
if header.is_valid?
|
381
|
+
buf = read_obj_by_address(pos, header.id)
|
382
|
+
delete_obj_by_address(pos, header.id)
|
383
|
+
write_obj_by_id(header.id, buf)
|
384
|
+
end
|
385
|
+
|
386
|
+
# Some re-inserted blobs may be inserted after the original file end.
|
387
|
+
# No need to process those blobs again.
|
388
|
+
break if pos >= file_size
|
389
|
+
end
|
390
|
+
PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
|
391
|
+
|
392
|
+
# Reclaim the space saved by compressing entries.
|
393
|
+
defragmentize
|
394
|
+
end
|
395
|
+
|
364
396
|
def check(repair = false)
|
365
397
|
return unless @f
|
366
398
|
|
@@ -370,24 +402,28 @@ module PEROBS
|
|
370
402
|
|
371
403
|
# First check the database blob file. Each entry should be readable and
|
372
404
|
# correct.
|
373
|
-
each_blob_header do |pos,
|
374
|
-
if
|
405
|
+
each_blob_header do |pos, header|
|
406
|
+
if header.is_valid?
|
375
407
|
# We have a non-deleted entry.
|
376
408
|
begin
|
377
|
-
@f.seek(pos +
|
378
|
-
buf = @f.read(length)
|
379
|
-
if
|
409
|
+
@f.seek(pos + FlatFileBlobHeader::LENGTH)
|
410
|
+
buf = @f.read(header.length)
|
411
|
+
# Uncompress the data if the compression bit is set in the mark
|
412
|
+
# byte.
|
413
|
+
buf = Zlib.inflate(buf) if header.is_compressed?
|
414
|
+
|
415
|
+
if header.crc && checksum(buf) != header.crc
|
380
416
|
if repair
|
381
417
|
PEROBS.log.error "Checksum failure while checking blob " +
|
382
|
-
"with ID #{id}. Deleting object."
|
383
|
-
delete_obj_by_address(pos,
|
418
|
+
"with ID #{header.id}. Deleting object."
|
419
|
+
delete_obj_by_address(pos, header.id)
|
384
420
|
else
|
385
421
|
PEROBS.log.fatal "Checksum failure while checking blob " +
|
386
|
-
"with ID #{id}"
|
422
|
+
"with ID #{header.id}"
|
387
423
|
end
|
388
424
|
end
|
389
|
-
rescue => e
|
390
|
-
PEROBS.log.fatal "Check of blob with ID #{
|
425
|
+
rescue IOError => e
|
426
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
391
427
|
e.message
|
392
428
|
end
|
393
429
|
end
|
@@ -416,32 +452,33 @@ module PEROBS
|
|
416
452
|
@index.clear
|
417
453
|
@space_list.clear
|
418
454
|
|
419
|
-
each_blob_header do |pos,
|
420
|
-
if
|
421
|
-
@
|
455
|
+
each_blob_header do |pos, header|
|
456
|
+
if header.is_valid?
|
457
|
+
@index.put_value(header.id, pos)
|
422
458
|
else
|
423
|
-
@
|
459
|
+
@space_list.add_space(pos, header.length) if header.length > 0
|
424
460
|
end
|
425
461
|
end
|
426
462
|
end
|
427
463
|
|
428
464
|
def has_space?(address, size)
|
429
|
-
header =
|
465
|
+
header = FlatFileBlobHeader.read_at(@f, address)
|
430
466
|
header.length == size
|
431
467
|
end
|
432
468
|
|
433
469
|
def has_id_at?(id, address)
|
434
|
-
header =
|
470
|
+
header = FlatFileBlobHeader.read_at(@f, address)
|
435
471
|
header.id == id
|
436
472
|
end
|
437
473
|
|
438
474
|
def inspect
|
439
475
|
s = '['
|
440
|
-
each_blob_header do |pos,
|
441
|
-
s << "{ :pos => #{pos}, :mark => #{mark}, " +
|
442
|
-
":length => #{length}, :id => #{
|
443
|
-
|
444
|
-
|
476
|
+
each_blob_header do |pos, header|
|
477
|
+
s << "{ :pos => #{pos}, :mark => #{header.mark}, " +
|
478
|
+
":length => #{header.length}, :id => #{header.id}, " +
|
479
|
+
":crc => #{header.crc}"
|
480
|
+
if header.is_valid?
|
481
|
+
s << ", :value => #{@f.read(header.length)}"
|
445
482
|
end
|
446
483
|
s << " }\n"
|
447
484
|
end
|
@@ -452,26 +489,19 @@ module PEROBS
|
|
452
489
|
|
453
490
|
private
|
454
491
|
|
455
|
-
def
|
456
|
-
|
492
|
+
def each_blob_header(&block)
|
493
|
+
pos = 0
|
457
494
|
begin
|
458
|
-
@f.seek(
|
459
|
-
|
460
|
-
|
495
|
+
@f.seek(0)
|
496
|
+
while (header = FlatFileBlobHeader.read(@f))
|
497
|
+
yield(pos, header)
|
498
|
+
|
499
|
+
pos += FlatFileBlobHeader::LENGTH + header.length
|
500
|
+
@f.seek(pos)
|
501
|
+
end
|
502
|
+
rescue IOError => e
|
461
503
|
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
462
504
|
end
|
463
|
-
if buf.nil? || buf.length != BLOB_HEADER_LENGTH
|
464
|
-
PEROBS.log.fatal "Cannot read blob header " +
|
465
|
-
"#{id ? "for ID #{id} " : ''}at address " +
|
466
|
-
"#{addr}"
|
467
|
-
end
|
468
|
-
header = Header.new(*buf.unpack(BLOB_HEADER_FORMAT))
|
469
|
-
if id && header.id != id
|
470
|
-
PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
|
471
|
-
"found for entry with ID #{id}/#{header.id}"
|
472
|
-
end
|
473
|
-
|
474
|
-
return header
|
475
505
|
end
|
476
506
|
|
477
507
|
def find_free_blob(bytes)
|
@@ -480,7 +510,7 @@ module PEROBS
|
|
480
510
|
# We have not found any suitable space. Return the end of the file.
|
481
511
|
return [ @f.size, -1 ]
|
482
512
|
end
|
483
|
-
if size == bytes || size -
|
513
|
+
if size == bytes || size - FlatFileBlobHeader::LENGTH >= bytes
|
484
514
|
return [ address, size ]
|
485
515
|
end
|
486
516
|
|
@@ -490,7 +520,8 @@ module PEROBS
|
|
490
520
|
|
491
521
|
# We need a space that is large enough to hold the bytes and the gap
|
492
522
|
# header.
|
493
|
-
@space_list.get_space(bytes +
|
523
|
+
@space_list.get_space(bytes + FlatFileBlobHeader::LENGTH) ||
|
524
|
+
[ @f.size, -1 ]
|
494
525
|
end
|
495
526
|
|
496
527
|
def checksum(raw_obj)
|
@@ -498,19 +529,20 @@ module PEROBS
|
|
498
529
|
end
|
499
530
|
|
500
531
|
def cross_check_entries
|
501
|
-
each_blob_header do |pos,
|
502
|
-
if
|
503
|
-
if length > 0
|
504
|
-
unless @space_list.has_space?(pos, length)
|
532
|
+
each_blob_header do |pos, header|
|
533
|
+
if !header.is_valid?
|
534
|
+
if header.length > 0
|
535
|
+
unless @space_list.has_space?(pos, header.length)
|
505
536
|
PEROBS.log.error "FlatFile has free space " +
|
506
|
-
"(addr: #{pos}, len: #{length}) that is not in
|
537
|
+
"(addr: #{pos}, len: #{header.length}) that is not in " +
|
538
|
+
"FreeSpaceManager"
|
507
539
|
return false
|
508
540
|
end
|
509
541
|
end
|
510
542
|
else
|
511
|
-
unless @index.get_value(
|
543
|
+
unless @index.get_value(header.id) == pos
|
512
544
|
PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
|
513
|
-
"in index with address #{@index.get_value(
|
545
|
+
"in index with address #{@index.get_value(header.id)}"
|
514
546
|
return false
|
515
547
|
end
|
516
548
|
end
|
@@ -519,22 +551,6 @@ module PEROBS
|
|
519
551
|
true
|
520
552
|
end
|
521
553
|
|
522
|
-
def each_blob_header(&block)
|
523
|
-
pos = 0
|
524
|
-
begin
|
525
|
-
@f.seek(0)
|
526
|
-
while (buf = @f.read(BLOB_HEADER_LENGTH))
|
527
|
-
mark, length, id, crc = buf.unpack(BLOB_HEADER_FORMAT)
|
528
|
-
yield(pos, mark, length, id, crc)
|
529
|
-
|
530
|
-
pos += BLOB_HEADER_LENGTH + length
|
531
|
-
@f.seek(pos)
|
532
|
-
end
|
533
|
-
rescue IOError => e
|
534
|
-
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
535
|
-
end
|
536
|
-
end
|
537
|
-
|
538
554
|
end
|
539
555
|
|
540
556
|
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FlatFileBlobHeader.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Log'
|
29
|
+
|
30
|
+
module PEROBS
|
31
|
+
|
32
|
+
# The FlatFile blob header has the following structure:
|
33
|
+
#
|
34
|
+
# 1 Byte: Mark byte.
|
35
|
+
# Bit 0: 0 deleted entry, 1 valid entry
|
36
|
+
# Bit 1: 0 unmarked, 1 marked
|
37
|
+
# Bit 2: 0 uncompressed data, 1 compressed data
|
38
|
+
# Bit 3 - 7: reserved, must be 0
|
39
|
+
# 8 bytes: Length of the data blob in bytes
|
40
|
+
# 8 bytes: ID of the value in the data blob
|
41
|
+
# 4 bytes: CRC32 checksum of the data blob
|
42
|
+
#
|
43
|
+
# If the bit 0 of the mark byte is 0, only the length is valid. The blob is
|
44
|
+
# empty. Only of bit 0 is set then entry is valid.
|
45
|
+
class FlatFileBlobHeader
|
46
|
+
|
47
|
+
# The 'pack()' format of the header.
|
48
|
+
FORMAT = 'CQQL'
|
49
|
+
# The length of the header in bytes.
|
50
|
+
LENGTH = 21
|
51
|
+
|
52
|
+
attr_reader :mark, :length, :id, :crc
|
53
|
+
|
54
|
+
# Create a new FlatFileBlobHeader with the given mark, length, id and crc.
|
55
|
+
# @param mark [Fixnum] 8 bit number, see above
|
56
|
+
# @param length [Fixnum] length of the header in bytes
|
57
|
+
# @param id [Integer] ID of the blob entry
|
58
|
+
# @param crc [Fixnum] CRC32 checksum of the blob entry
|
59
|
+
def initialize(mark, length, id, crc)
|
60
|
+
@mark = mark
|
61
|
+
@length = length
|
62
|
+
@id = id
|
63
|
+
@crc = crc
|
64
|
+
end
|
65
|
+
|
66
|
+
# Read the header from the given File.
|
67
|
+
# @param file [File]
|
68
|
+
# @return FlatFileBlobHeader
|
69
|
+
def FlatFileBlobHeader::read(file)
|
70
|
+
begin
|
71
|
+
buf = file.read(LENGTH)
|
72
|
+
rescue IOError => e
|
73
|
+
PEROBS.log.fatal "Cannot read blob header in flat file DB: #{e.message}"
|
74
|
+
end
|
75
|
+
|
76
|
+
return nil unless buf
|
77
|
+
|
78
|
+
FlatFileBlobHeader.new(*buf.unpack(FORMAT))
|
79
|
+
end
|
80
|
+
|
81
|
+
# Read the header from the given File.
|
82
|
+
# @param file [File]
|
83
|
+
# @param addr [Integer] address in the file to start reading
|
84
|
+
# @param id [Integer] Optional ID that the header should have
|
85
|
+
# @return FlatFileBlobHeader
|
86
|
+
def FlatFileBlobHeader::read_at(file, addr, id = nil)
|
87
|
+
buf = nil
|
88
|
+
begin
|
89
|
+
file.seek(addr)
|
90
|
+
buf = file.read(LENGTH)
|
91
|
+
rescue IOError => e
|
92
|
+
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
93
|
+
end
|
94
|
+
if buf.nil? || buf.length != LENGTH
|
95
|
+
PEROBS.log.fatal "Cannot read blob header " +
|
96
|
+
"#{id ? "for ID #{id} " : ''}at address " +
|
97
|
+
"#{addr}"
|
98
|
+
end
|
99
|
+
header = FlatFileBlobHeader.new(*buf.unpack(FORMAT))
|
100
|
+
if id && header.id != id
|
101
|
+
PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
|
102
|
+
"found for entry with ID #{id}/#{header.id}"
|
103
|
+
end
|
104
|
+
|
105
|
+
return header
|
106
|
+
end
|
107
|
+
|
108
|
+
# Write the header to a given File.
|
109
|
+
# @param file [File]
|
110
|
+
def write(file)
|
111
|
+
begin
|
112
|
+
file.write([ @mark, @length, @id, @crc].pack(FORMAT))
|
113
|
+
rescue IOError => e
|
114
|
+
PEROBS.log.fatal "Cannot write blob header into flat file DB: " +
|
115
|
+
e.message
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Return true if the header is for a non-empty blob.
|
120
|
+
def is_valid?
|
121
|
+
bit_set?(0)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Return true if the blob has been marked.
|
125
|
+
def is_marked?
|
126
|
+
bit_set?(1)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Return true if the blob contains compressed data.
|
130
|
+
def is_compressed?
|
131
|
+
bit_set?(2)
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def bit_set?(n)
|
137
|
+
mask = 1 << n
|
138
|
+
@mark & mask == mask
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
data/lib/perobs/FlatFileDB.rb
CHANGED
@@ -41,7 +41,7 @@ module PEROBS
|
|
41
41
|
|
42
42
|
# This version number increases whenever the on-disk format changes in a
|
43
43
|
# way that requires conversion actions after an update.
|
44
|
-
VERSION =
|
44
|
+
VERSION = 2
|
45
45
|
|
46
46
|
attr_reader :max_blob_size
|
47
47
|
|
@@ -57,7 +57,7 @@ module PEROBS
|
|
57
57
|
# Create the database directory if it doesn't exist yet.
|
58
58
|
ensure_dir_exists(@db_dir)
|
59
59
|
PEROBS.log.open(File.join(@db_dir, 'log'))
|
60
|
-
|
60
|
+
check_version_and_upgrade
|
61
61
|
|
62
62
|
# Read the existing DB config.
|
63
63
|
@config = get_hash('config')
|
@@ -202,9 +202,9 @@ module PEROBS
|
|
202
202
|
|
203
203
|
private
|
204
204
|
|
205
|
-
def
|
205
|
+
def check_version_and_upgrade
|
206
206
|
version_file = File.join(@db_dir, 'version')
|
207
|
-
version =
|
207
|
+
version = 1
|
208
208
|
|
209
209
|
if File.exist?(version_file)
|
210
210
|
begin
|
@@ -214,6 +214,7 @@ module PEROBS
|
|
214
214
|
"'#{version_file}': " + e.message
|
215
215
|
end
|
216
216
|
else
|
217
|
+
# Early versions of PEROBS did not have a version file.
|
217
218
|
write_version_file(version_file)
|
218
219
|
end
|
219
220
|
|
@@ -221,6 +222,20 @@ module PEROBS
|
|
221
222
|
PEROBS.log.fatal "Cannot downgrade the FlatFile database from " +
|
222
223
|
"version #{version} to version #{VERSION}"
|
223
224
|
end
|
225
|
+
|
226
|
+
if version == 1
|
227
|
+
# Version 1 had no support for data compression. Make sure all entries
|
228
|
+
# are compressed to save space.
|
229
|
+
open
|
230
|
+
@flat_file.refresh
|
231
|
+
close
|
232
|
+
end
|
233
|
+
|
234
|
+
# After a successful upgrade change the version number in the DB as
|
235
|
+
# well.
|
236
|
+
if version < VERSION
|
237
|
+
write_version_file(version_file)
|
238
|
+
end
|
224
239
|
end
|
225
240
|
|
226
241
|
def write_version_file(version_file)
|
data/lib/perobs/IndexTreeNode.rb
CHANGED
@@ -190,8 +190,8 @@ module PEROBS
|
|
190
190
|
# Recursively check this node and all sub nodes. Compare the found
|
191
191
|
# ID/address pairs with the corresponding entry in the given FlatFile.
|
192
192
|
# @param flat_file [FlatFile]
|
193
|
-
# @tree_level [Fixnum] Assumed level in the tree. Must correspond
|
194
|
-
#
|
193
|
+
# @param tree_level [Fixnum] Assumed level in the tree. Must correspond
|
194
|
+
# with @nibble_idx
|
195
195
|
# @return [Boolean] true if no errors were found, false otherwise
|
196
196
|
def check(flat_file, tree_level)
|
197
197
|
if tree_level >= 16
|
data/lib/perobs/Store.rb
CHANGED
@@ -160,7 +160,6 @@ module PEROBS
|
|
160
160
|
|
161
161
|
# Copy the store content into a new Store. The arguments are identical to
|
162
162
|
# Store.new().
|
163
|
-
# @param data_base [String] the name of the database
|
164
163
|
# @param options [Hash] various options to affect the operation of the
|
165
164
|
def copy(dir, options = {})
|
166
165
|
# Make sure all objects are persisted.
|
data/lib/perobs/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: perobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Schlaeger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -74,6 +74,7 @@ files:
|
|
74
74
|
- lib/perobs/DynamoDB.rb
|
75
75
|
- lib/perobs/FixedSizeBlobFile.rb
|
76
76
|
- lib/perobs/FlatFile.rb
|
77
|
+
- lib/perobs/FlatFileBlobHeader.rb
|
77
78
|
- lib/perobs/FlatFileDB.rb
|
78
79
|
- lib/perobs/FreeSpaceManager.rb
|
79
80
|
- lib/perobs/Handle.rb
|