perobs 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/perobs.rb +1 -0
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +83 -12
- data/lib/perobs/BTreeBlob.rb +1 -1
- data/lib/perobs/BTreeDB.rb +2 -2
- data/lib/perobs/BTreeNode.rb +365 -85
- data/lib/perobs/BigArray.rb +267 -0
- data/lib/perobs/BigArrayNode.rb +998 -0
- data/lib/perobs/BigHash.rb +262 -0
- data/lib/perobs/BigTree.rb +184 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +143 -51
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +363 -203
- data/lib/perobs/FlatFileBlobHeader.rb +98 -54
- data/lib/perobs/FlatFileDB.rb +42 -20
- data/lib/perobs/Hash.rb +58 -13
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +38 -4
- data/lib/perobs/PersistentObjectCache.rb +53 -67
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +71 -32
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +5 -2
- data/test/BigArray_spec.rb +214 -0
- data/test/BigHash_spec.rb +144 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +63 -14
- data/test/Hash_spec.rb +1 -2
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +151 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +290 -199
- data/test/spec_helper.rb +9 -4
- metadata +47 -10
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FNV_Hash_1a_64.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
# This is an implementation of the Fowler Noll Vo hashing algorithm in the
|
31
|
+
# 1a variant for 64 bit hash values.
|
32
|
+
# https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
33
|
+
class FNV_Hash_1a_64
|
34
|
+
|
35
|
+
@@OFFSET = 14695981039346656037
|
36
|
+
@@PRIME = 1099511628211
|
37
|
+
@@MASK = 2**64 - 1
|
38
|
+
|
39
|
+
def self.digest(item)
|
40
|
+
hash = @@OFFSET
|
41
|
+
|
42
|
+
item.to_s.each_byte do |byte|
|
43
|
+
hash ^= byte
|
44
|
+
hash *= @@PRIME
|
45
|
+
hash &= @@MASK
|
46
|
+
end
|
47
|
+
|
48
|
+
hash
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFile.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -31,6 +31,7 @@ require 'perobs/Log'
|
|
31
31
|
require 'perobs/FlatFileBlobHeader'
|
32
32
|
require 'perobs/BTree'
|
33
33
|
require 'perobs/SpaceTree'
|
34
|
+
require 'perobs/IDList'
|
34
35
|
|
35
36
|
module PEROBS
|
36
37
|
|
@@ -44,12 +45,13 @@ module PEROBS
|
|
44
45
|
|
45
46
|
# Create a new FlatFile object for a database in the given path.
|
46
47
|
# @param dir [String] Directory path for the data base file
|
47
|
-
def initialize(dir)
|
48
|
+
def initialize(dir, progressmeter)
|
48
49
|
@db_dir = dir
|
50
|
+
@progressmeter = progressmeter
|
49
51
|
@f = nil
|
50
|
-
@
|
51
|
-
@
|
52
|
-
@space_list = SpaceTree.new(@db_dir)
|
52
|
+
@marks = nil
|
53
|
+
@index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
|
54
|
+
@space_list = SpaceTree.new(@db_dir, @progressmeter)
|
53
55
|
end
|
54
56
|
|
55
57
|
# Open the flat file for reading and writing.
|
@@ -74,33 +76,19 @@ module PEROBS
|
|
74
76
|
end
|
75
77
|
@f.sync = true
|
76
78
|
|
77
|
-
|
78
|
-
@index.open(!new_db_created)
|
79
|
-
@space_list.open
|
80
|
-
rescue FatalError
|
81
|
-
# Ensure that the index is really closed.
|
82
|
-
@index.close
|
83
|
-
# Erase it completely
|
84
|
-
@index.erase
|
85
|
-
# Then create it again.
|
86
|
-
@index.open
|
87
|
-
|
88
|
-
# Ensure that the spaces list is really closed.
|
89
|
-
@space_list.close
|
90
|
-
# Erase it completely
|
91
|
-
@space_list.erase
|
92
|
-
# Then create it again
|
93
|
-
@space_list.open
|
94
|
-
|
95
|
-
regenerate_index_and_spaces
|
96
|
-
end
|
79
|
+
open_index_files(!new_db_created)
|
97
80
|
end
|
98
81
|
|
99
82
|
# Close the flat file. This method must be called to ensure that all data
|
100
83
|
# is really written into the filesystem.
|
101
84
|
def close
|
102
|
-
@space_list.close
|
103
|
-
@index.close
|
85
|
+
@space_list.close if @space_list.is_open?
|
86
|
+
@index.close if @index.is_open?
|
87
|
+
|
88
|
+
if @marks
|
89
|
+
@marks.erase
|
90
|
+
@marks = nil
|
91
|
+
end
|
104
92
|
|
105
93
|
if @f
|
106
94
|
@f.flush
|
@@ -139,29 +127,36 @@ module PEROBS
|
|
139
127
|
# @param addr [Integer] Address of the blob to delete
|
140
128
|
# @param id [Integer] ID of the blob to delete
|
141
129
|
def delete_obj_by_address(addr, id)
|
142
|
-
@index.remove(id)
|
143
|
-
header = FlatFileBlobHeader.
|
130
|
+
@index.remove(id) if @index.is_open?
|
131
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
144
132
|
header.clear_flags
|
145
|
-
@space_list.add_space(addr, header.length)
|
133
|
+
@space_list.add_space(addr, header.length) if @space_list.is_open?
|
146
134
|
end
|
147
135
|
|
148
136
|
# Delete all unmarked objects.
|
149
137
|
def delete_unmarked_objects
|
150
|
-
|
151
|
-
|
138
|
+
# We don't update the index and the space list during this operation as
|
139
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
140
|
+
# with an empty space list.
|
141
|
+
clear_index_files
|
142
|
+
|
143
|
+
deleted_objects_count = 0
|
144
|
+
@progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
|
145
|
+
each_blob_header do |header|
|
146
|
+
if header.is_valid? && !@marks.include?(header.id)
|
147
|
+
delete_obj_by_address(header.addr, header.id)
|
148
|
+
deleted_objects_count += 1
|
149
|
+
end
|
152
150
|
|
153
|
-
|
154
|
-
each_blob_header do |pos, header|
|
155
|
-
if header.is_valid? && @marks.get(header.id).nil?
|
156
|
-
delete_obj_by_address(pos, header.id)
|
157
|
-
deleted_ids << header.id
|
151
|
+
pm.update(header.addr)
|
158
152
|
end
|
159
153
|
end
|
160
154
|
defragmentize
|
161
155
|
|
162
|
-
|
163
|
-
|
164
|
-
|
156
|
+
# Update the index file and create a new, empty space list.
|
157
|
+
regenerate_index_and_spaces
|
158
|
+
|
159
|
+
deleted_objects_count
|
165
160
|
end
|
166
161
|
|
167
162
|
# Write the given object into the file. This method never uses in-place
|
@@ -177,7 +172,7 @@ module PEROBS
|
|
177
172
|
# operation is aborted or interrupted we ensure that we either have the
|
178
173
|
# old or the new version available.
|
179
174
|
if (old_addr = find_obj_addr_by_id(id))
|
180
|
-
old_header = FlatFileBlobHeader.
|
175
|
+
old_header = FlatFileBlobHeader.read(@f, old_addr)
|
181
176
|
old_header.set_outdated_flag
|
182
177
|
end
|
183
178
|
|
@@ -197,7 +192,7 @@ module PEROBS
|
|
197
192
|
begin
|
198
193
|
if length != -1
|
199
194
|
# Just a safeguard so we don't overwrite current data.
|
200
|
-
header = FlatFileBlobHeader.
|
195
|
+
header = FlatFileBlobHeader.read(@f, addr)
|
201
196
|
if header.length != length
|
202
197
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
203
198
|
"(#{header.length}) for address #{addr} don't match."
|
@@ -229,18 +224,22 @@ module PEROBS
|
|
229
224
|
FlatFileBlobHeader.new(@f, space_address, 0, space_length,
|
230
225
|
0, 0).write
|
231
226
|
# Register the new space with the space list.
|
232
|
-
@space_list.
|
227
|
+
if @space_list.is_open? && space_length > 0
|
228
|
+
@space_list.add_space(space_address, space_length)
|
229
|
+
end
|
233
230
|
end
|
234
231
|
|
235
232
|
# Once the blob has been written we can update the index as well.
|
236
|
-
@index.insert(id, addr)
|
233
|
+
@index.insert(id, addr) if @index.is_open?
|
237
234
|
|
238
235
|
if old_addr
|
239
236
|
# If we had an existing object stored for the ID we have to mark
|
240
237
|
# this entry as deleted now.
|
241
238
|
old_header.clear_flags
|
242
239
|
# And register the newly freed space with the space list.
|
243
|
-
@space_list.
|
240
|
+
if @space_list.is_open?
|
241
|
+
@space_list.add_space(old_addr, old_header.length)
|
242
|
+
end
|
244
243
|
else
|
245
244
|
@f.flush
|
246
245
|
end
|
@@ -270,21 +269,17 @@ module PEROBS
|
|
270
269
|
nil
|
271
270
|
end
|
272
271
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
end
|
277
|
-
|
278
|
-
nil
|
272
|
+
# @return [Integer] Number of items stored in the DB.
|
273
|
+
def item_counter
|
274
|
+
@index.entries_count
|
279
275
|
end
|
280
276
|
|
281
|
-
|
282
277
|
# Read the object at the specified address.
|
283
278
|
# @param addr [Integer] Offset in the flat file
|
284
279
|
# @param id [Integer] ID of the data blob
|
285
280
|
# @return [String] Raw object data
|
286
281
|
def read_obj_by_address(addr, id)
|
287
|
-
header = FlatFileBlobHeader.
|
282
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
288
283
|
if header.id != id
|
289
284
|
PEROBS.log.fatal "Database index corrupted: Index for object " +
|
290
285
|
"#{id} points to object with ID #{header.id}"
|
@@ -319,19 +314,22 @@ module PEROBS
|
|
319
314
|
# Mark the object with the given ID.
|
320
315
|
# @param id [Integer] ID of the object
|
321
316
|
def mark_obj_by_id(id)
|
322
|
-
@marks.insert(id
|
317
|
+
@marks.insert(id)
|
323
318
|
end
|
324
319
|
|
325
320
|
# Return true if the object with the given ID is marked, false otherwise.
|
326
321
|
# @param id [Integer] ID of the object
|
327
322
|
def is_marked_by_id?(id)
|
328
|
-
|
323
|
+
@marks.include?(id)
|
329
324
|
end
|
330
325
|
|
331
326
|
# Clear alls marks.
|
332
327
|
def clear_all_marks
|
333
|
-
@marks
|
334
|
-
|
328
|
+
if @marks
|
329
|
+
@marks.clear
|
330
|
+
else
|
331
|
+
@marks = IDList.new(@db_dir, 'marks', 8)
|
332
|
+
end
|
335
333
|
end
|
336
334
|
|
337
335
|
# Eliminate all the holes in the file. This is an in-place
|
@@ -340,59 +338,72 @@ module PEROBS
|
|
340
338
|
distance = 0
|
341
339
|
new_file_size = 0
|
342
340
|
deleted_blobs = 0
|
341
|
+
corrupted_blobs = 0
|
343
342
|
valid_blobs = 0
|
344
|
-
|
345
|
-
PEROBS.log.info "Defragmenting FlatFile"
|
343
|
+
|
346
344
|
# Iterate over all entries.
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
345
|
+
@progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
|
346
|
+
each_blob_header do |header|
|
347
|
+
# If we have stumbled over a corrupted blob we treat it similar to a
|
348
|
+
# deleted blob and reuse the space.
|
349
|
+
if header.corruption_start
|
350
|
+
distance += header.addr - header.corruption_start
|
351
|
+
corrupted_blobs += 1
|
352
|
+
end
|
353
|
+
|
354
|
+
# Total size of the current entry
|
355
|
+
entry_bytes = FlatFileBlobHeader::LENGTH + header.length
|
356
|
+
if header.is_valid?
|
357
|
+
# We have found a valid entry.
|
358
|
+
valid_blobs += 1
|
359
|
+
if distance > 0
|
360
|
+
begin
|
361
|
+
# Read current entry into a buffer
|
362
|
+
@f.seek(header.addr)
|
363
|
+
buf = @f.read(entry_bytes)
|
364
|
+
# Write the buffer right after the end of the previous entry.
|
365
|
+
@f.seek(header.addr - distance)
|
366
|
+
@f.write(buf)
|
367
|
+
# Mark the space between the relocated current entry and the
|
368
|
+
# next valid entry as deleted space.
|
369
|
+
FlatFileBlobHeader.new(@f, @f.pos, 0,
|
370
|
+
distance - FlatFileBlobHeader::LENGTH,
|
371
|
+
0, 0).write
|
372
|
+
@f.flush
|
373
|
+
rescue IOError => e
|
374
|
+
PEROBS.log.fatal "Error while moving blob for ID " +
|
375
|
+
"#{header.id}: #{e.message}"
|
376
|
+
end
|
372
377
|
end
|
378
|
+
new_file_size = header.addr - distance +
|
379
|
+
FlatFileBlobHeader::LENGTH + header.length
|
380
|
+
else
|
381
|
+
deleted_blobs += 1
|
382
|
+
distance += entry_bytes
|
373
383
|
end
|
374
|
-
|
375
|
-
|
376
|
-
deleted_blobs += 1
|
377
|
-
distance += entry_bytes
|
384
|
+
|
385
|
+
pm.update(header.addr)
|
378
386
|
end
|
379
387
|
end
|
380
|
-
|
388
|
+
|
381
389
|
PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
|
382
390
|
"#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
|
383
391
|
"#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
|
392
|
+
if corrupted_blobs > 0
|
393
|
+
PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
|
394
|
+
"was recycled."
|
395
|
+
end
|
384
396
|
|
385
397
|
@f.flush
|
386
398
|
@f.truncate(new_file_size)
|
387
399
|
@f.flush
|
388
|
-
@space_list.clear
|
389
400
|
|
390
401
|
sync
|
391
402
|
end
|
392
403
|
|
393
404
|
# This method iterates over all entries in the FlatFile and removes the
|
394
405
|
# entry and inserts it again. This is useful to update all entries in
|
395
|
-
#
|
406
|
+
# case the storage format has changed.
|
396
407
|
def refresh
|
397
408
|
# This iteration might look scary as we iterate over the entries while
|
398
409
|
# while we are rearranging them. Re-inserted items may be inserted
|
@@ -400,23 +411,34 @@ module PEROBS
|
|
400
411
|
# inserted after the current entry and will be re-read again unless they
|
401
412
|
# are inserted after the original file end.
|
402
413
|
file_size = @f.size
|
403
|
-
PEROBS.log.info "Refreshing the DB..."
|
404
|
-
t = Time.now
|
405
|
-
each_blob_header do |pos, header|
|
406
|
-
if header.is_valid?
|
407
|
-
buf = read_obj_by_address(pos, header.id)
|
408
|
-
delete_obj_by_address(pos, header.id)
|
409
|
-
write_obj_by_id(header.id, buf)
|
410
|
-
end
|
411
414
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
+
# We don't update the index and the space list during this operation as
|
416
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
417
|
+
# with an empty space list.
|
418
|
+
clear_index_files
|
419
|
+
|
420
|
+
@progressmeter.start('Converting objects to new storage format',
|
421
|
+
@f.size) do |pm|
|
422
|
+
each_blob_header do |header|
|
423
|
+
if header.is_valid?
|
424
|
+
buf = read_obj_by_address(header.addr, header.id)
|
425
|
+
delete_obj_by_address(header.addr, header.id)
|
426
|
+
write_obj_by_id(header.id, buf)
|
427
|
+
end
|
428
|
+
|
429
|
+
# Some re-inserted blobs may be inserted after the original file end.
|
430
|
+
# No need to process those blobs again.
|
431
|
+
break if header.addr >= file_size
|
432
|
+
|
433
|
+
pm.update(header.addr)
|
434
|
+
end
|
415
435
|
end
|
416
|
-
PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
|
417
436
|
|
418
437
|
# Reclaim the space saved by compressing entries.
|
419
438
|
defragmentize
|
439
|
+
|
440
|
+
# Recreate the index file and create an empty space list.
|
441
|
+
regenerate_index_and_spaces
|
420
442
|
end
|
421
443
|
|
422
444
|
# Check (and repair) the FlatFile.
|
@@ -433,95 +455,117 @@ module PEROBS
|
|
433
455
|
# First check the database blob file. Each entry should be readable and
|
434
456
|
# correct and all IDs must be unique. We use a shadow index to keep
|
435
457
|
# track of the already found IDs.
|
436
|
-
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER
|
458
|
+
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
|
459
|
+
@progressmeter)
|
437
460
|
new_index.erase
|
438
461
|
new_index.open
|
439
462
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
463
|
+
corrupted_blobs = 0
|
464
|
+
@progressmeter.start('Checking blobs file', @f.size) do |pm|
|
465
|
+
corrupted_blobs = each_blob_header do |header|
|
466
|
+
if header.is_valid?
|
467
|
+
# We have a non-deleted entry.
|
468
|
+
begin
|
469
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
470
|
+
buf = @f.read(header.length)
|
471
|
+
if buf.bytesize != header.length
|
472
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
473
|
+
"#{header.id}."
|
474
|
+
discard_damaged_blob(header) if repair
|
475
|
+
errors += 1
|
476
|
+
next
|
477
|
+
end
|
453
478
|
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
479
|
+
# Uncompress the data if the compression bit is set in the mark
|
480
|
+
# byte.
|
481
|
+
if header.is_compressed?
|
482
|
+
begin
|
483
|
+
buf = Zlib.inflate(buf)
|
484
|
+
rescue Zlib::BufError, Zlib::DataError
|
485
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
486
|
+
"#{header.id} found."
|
487
|
+
discard_damaged_blob(header) if repair
|
488
|
+
errors += 1
|
489
|
+
next
|
490
|
+
end
|
491
|
+
end
|
492
|
+
|
493
|
+
if header.crc && checksum(buf) != header.crc
|
494
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
495
|
+
"with ID #{header.id}"
|
462
496
|
discard_damaged_blob(header) if repair
|
463
497
|
errors += 1
|
464
498
|
next
|
465
499
|
end
|
500
|
+
rescue IOError => e
|
501
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
502
|
+
e.message
|
466
503
|
end
|
467
504
|
|
468
|
-
if
|
469
|
-
|
470
|
-
|
471
|
-
|
505
|
+
# Check if the ID has already been found in the file.
|
506
|
+
if (previous_address = new_index.get(header.id))
|
507
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
508
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
472
509
|
errors += 1
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
discard_damaged_blob(header)
|
491
|
-
elsif previous_header.is_outdated?
|
492
|
-
discard_damaged_blob(previous_header)
|
493
|
-
else
|
494
|
-
PEROBS.log.error "None of the blobs with same ID have " +
|
495
|
-
"the outdated flag set. Deleting the smaller one."
|
496
|
-
discard_damaged_blob(header.length < previous_header.length ?
|
497
|
-
header : previous_header)
|
510
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
511
|
+
header.id)
|
512
|
+
if repair
|
513
|
+
# We have two blobs with the same ID and we must discard one of
|
514
|
+
# them.
|
515
|
+
if header.is_outdated?
|
516
|
+
discard_damaged_blob(header)
|
517
|
+
elsif previous_header.is_outdated?
|
518
|
+
discard_damaged_blob(previous_header)
|
519
|
+
else
|
520
|
+
PEROBS.log.error "None of the blobs with same ID have " +
|
521
|
+
"the outdated flag set. Deleting the smaller one."
|
522
|
+
errors += 1
|
523
|
+
discard_damaged_blob(header.length < previous_header.length ?
|
524
|
+
header : previous_header)
|
525
|
+
end
|
526
|
+
next
|
498
527
|
end
|
499
|
-
|
528
|
+
else
|
529
|
+
# ID is unique so far. Add it to the shadow index.
|
530
|
+
new_index.insert(header.id, header.addr)
|
500
531
|
end
|
501
|
-
|
502
|
-
# ID is unique so far. Add it to the shadow index.
|
503
|
-
new_index.insert(header.id, pos)
|
532
|
+
|
504
533
|
end
|
505
534
|
|
535
|
+
pm.update(header.addr)
|
506
536
|
end
|
537
|
+
|
538
|
+
errors += corrupted_blobs
|
507
539
|
end
|
540
|
+
|
508
541
|
# We no longer need the new index.
|
509
542
|
new_index.close
|
510
543
|
new_index.erase
|
511
544
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
545
|
+
if repair && corrupted_blobs > 0
|
546
|
+
erase_index_files
|
547
|
+
defragmentize
|
548
|
+
regenerate_index_and_spaces
|
549
|
+
else
|
550
|
+
# Now we check the index data. It must be correct and the entries must
|
551
|
+
# match the blob file. All entries in the index must be in the blob file
|
552
|
+
# and vise versa.
|
553
|
+
begin
|
554
|
+
index_ok = @index.check do |id, address|
|
555
|
+
has_id_at?(id, address)
|
556
|
+
end
|
557
|
+
x_check_errs = 0
|
558
|
+
space_check_ok = true
|
559
|
+
unless index_ok && (space_check_ok = @space_list.check(self)) &&
|
560
|
+
(x_check_errs = cross_check_entries) == 0
|
561
|
+
errors += 1 unless index_ok && space_check_ok
|
562
|
+
errors += x_check_errs
|
563
|
+
regenerate_index_and_spaces if repair
|
564
|
+
end
|
565
|
+
rescue PEROBS::FatalError
|
566
|
+
errors += 1
|
520
567
|
regenerate_index_and_spaces if repair
|
521
568
|
end
|
522
|
-
rescue PEROBS::FatalError
|
523
|
-
errors += 1
|
524
|
-
regenerate_index_and_spaces if repair
|
525
569
|
end
|
526
570
|
|
527
571
|
sync if repair
|
@@ -535,22 +579,32 @@ module PEROBS
|
|
535
579
|
# regenerates them from the FlatFile.
|
536
580
|
def regenerate_index_and_spaces
|
537
581
|
PEROBS.log.warn "Re-generating FlatFileDB index and space files"
|
582
|
+
@index.open unless @index.is_open?
|
538
583
|
@index.clear
|
584
|
+
@space_list.open unless @space_list.is_open?
|
539
585
|
@space_list.clear
|
540
586
|
|
541
|
-
|
542
|
-
|
543
|
-
if
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
587
|
+
@progressmeter.start('Re-generating database index', @f.size) do |pm|
|
588
|
+
each_blob_header do |header|
|
589
|
+
if header.is_valid?
|
590
|
+
if (duplicate_pos = @index.get(header.id))
|
591
|
+
PEROBS.log.error "FlatFile contains multiple blobs for ID " +
|
592
|
+
"#{header.id}. First blob is at address #{duplicate_pos}. " +
|
593
|
+
"Other blob found at address #{header.addr}."
|
594
|
+
if header.length > 0
|
595
|
+
@space_list.add_space(header.addr, header.length)
|
596
|
+
end
|
597
|
+
discard_damaged_blob(header)
|
598
|
+
else
|
599
|
+
@index.insert(header.id, header.addr)
|
600
|
+
end
|
549
601
|
else
|
550
|
-
|
602
|
+
if header.length > 0
|
603
|
+
@space_list.add_space(header.addr, header.length)
|
604
|
+
end
|
551
605
|
end
|
552
|
-
|
553
|
-
|
606
|
+
|
607
|
+
pm.update(header.addr)
|
554
608
|
end
|
555
609
|
end
|
556
610
|
|
@@ -558,19 +612,19 @@ module PEROBS
|
|
558
612
|
end
|
559
613
|
|
560
614
|
def has_space?(address, size)
|
561
|
-
header = FlatFileBlobHeader.
|
615
|
+
header = FlatFileBlobHeader.read(@f, address)
|
562
616
|
!header.is_valid? && header.length == size
|
563
617
|
end
|
564
618
|
|
565
619
|
def has_id_at?(id, address)
|
566
|
-
header = FlatFileBlobHeader.
|
620
|
+
header = FlatFileBlobHeader.read(@f, address)
|
567
621
|
header.is_valid? && header.id == id
|
568
622
|
end
|
569
623
|
|
570
624
|
def inspect
|
571
625
|
s = '['
|
572
|
-
each_blob_header do |
|
573
|
-
s << "{ :pos => #{
|
626
|
+
each_blob_header do |header|
|
627
|
+
s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
|
574
628
|
":length => #{header.length}, :id => #{header.id}, " +
|
575
629
|
":crc => #{header.crc}"
|
576
630
|
if header.is_valid?
|
@@ -581,21 +635,68 @@ module PEROBS
|
|
581
635
|
s + ']'
|
582
636
|
end
|
583
637
|
|
638
|
+
def FlatFile::insert_header_checksums(db_dir)
|
639
|
+
old_file_name = File.join(db_dir, 'database.blobs')
|
640
|
+
new_file_name = File.join(db_dir, 'database_v4.blobs')
|
641
|
+
bak_file_name = File.join(db_dir, 'database_v3.blobs')
|
642
|
+
|
643
|
+
old_file = File.open(old_file_name, 'rb')
|
644
|
+
new_file = File.open(new_file_name, 'wb')
|
645
|
+
|
646
|
+
entries = 0
|
647
|
+
while (buf = old_file.read(21))
|
648
|
+
flags, length, id, crc = *buf.unpack('CQQL')
|
649
|
+
blob_data = old_file.read(length)
|
650
|
+
|
651
|
+
# Some basic sanity checking to ensure all reserved bits are 0. Older
|
652
|
+
# versions of PEROBS used to set bit 1 despite it being reserved now.
|
653
|
+
unless flags & 0xF0 == 0
|
654
|
+
PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
|
655
|
+
"flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
|
656
|
+
end
|
657
|
+
|
658
|
+
# Check if the blob is valid and current.
|
659
|
+
if flags & 0x1 == 1 && flags & 0x8 == 0
|
660
|
+
# Make sure the bit 1 is not set anymore.
|
661
|
+
flags = flags & 0x05
|
662
|
+
header_str = [ flags, length, id, crc ].pack('CQQL')
|
663
|
+
header_crc = Zlib.crc32(header_str, 0)
|
664
|
+
header_str += [ header_crc ].pack('L')
|
665
|
+
|
666
|
+
new_file.write(header_str + blob_data)
|
667
|
+
entries += 1
|
668
|
+
end
|
669
|
+
end
|
670
|
+
PEROBS.log.info "Header checksum added to #{entries} entries"
|
671
|
+
|
672
|
+
old_file.close
|
673
|
+
new_file.close
|
674
|
+
|
675
|
+
File.rename(old_file_name, bak_file_name)
|
676
|
+
File.rename(new_file_name, old_file_name)
|
677
|
+
end
|
678
|
+
|
584
679
|
private
|
585
680
|
|
586
681
|
def each_blob_header(&block)
|
587
|
-
|
682
|
+
corrupted_blobs = 0
|
683
|
+
|
588
684
|
begin
|
589
685
|
@f.seek(0)
|
590
686
|
while (header = FlatFileBlobHeader.read(@f))
|
591
|
-
|
687
|
+
if header.corruption_start
|
688
|
+
corrupted_blobs += 1
|
689
|
+
end
|
690
|
+
|
691
|
+
yield(header)
|
592
692
|
|
593
|
-
|
594
|
-
@f.seek(pos)
|
693
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
|
595
694
|
end
|
596
695
|
rescue IOError => e
|
597
696
|
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
598
697
|
end
|
698
|
+
|
699
|
+
corrupted_blobs
|
599
700
|
end
|
600
701
|
|
601
702
|
def find_free_blob(bytes)
|
@@ -625,26 +726,34 @@ module PEROBS
|
|
625
726
|
def cross_check_entries
|
626
727
|
errors = 0
|
627
728
|
|
628
|
-
|
629
|
-
|
630
|
-
if header.
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
729
|
+
@progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
|
730
|
+
each_blob_header do |header|
|
731
|
+
if !header.is_valid?
|
732
|
+
if header.length > 0
|
733
|
+
unless @space_list.has_space?(header.addr, header.length)
|
734
|
+
PEROBS.log.error "FlatFile has free space " +
|
735
|
+
"(addr: #{header.addr}, len: #{header.length}) that is " +
|
736
|
+
"not in FreeSpaceManager"
|
737
|
+
errors += 1
|
738
|
+
end
|
739
|
+
end
|
740
|
+
else
|
741
|
+
if (index_address = @index.get(header.id)).nil?
|
742
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
743
|
+
"is not listed in the index"
|
744
|
+
errors +=1
|
745
|
+
elsif index_address != header.addr
|
746
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
747
|
+
"is listed in index with address #{index_address}"
|
748
|
+
errors += 1
|
636
749
|
end
|
637
750
|
end
|
638
|
-
|
639
|
-
|
640
|
-
PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
|
641
|
-
"in index with address #{@index.get(header.id)}"
|
642
|
-
errors += 1
|
643
|
-
end
|
751
|
+
|
752
|
+
pm.update(header.addr)
|
644
753
|
end
|
645
754
|
end
|
646
755
|
|
647
|
-
errors
|
756
|
+
errors
|
648
757
|
end
|
649
758
|
|
650
759
|
def discard_damaged_blob(header)
|
@@ -653,6 +762,57 @@ module PEROBS
|
|
653
762
|
header.clear_flags
|
654
763
|
end
|
655
764
|
|
765
|
+
def open_index_files(abort_on_missing_files = false)
|
766
|
+
begin
|
767
|
+
@index.open(abort_on_missing_files)
|
768
|
+
@space_list.open
|
769
|
+
rescue FatalError
|
770
|
+
# Ensure that the index is really closed.
|
771
|
+
@index.close
|
772
|
+
# Erase it completely
|
773
|
+
@index.erase
|
774
|
+
# Then create it again.
|
775
|
+
@index.open
|
776
|
+
|
777
|
+
# Ensure that the spaces list is really closed.
|
778
|
+
@space_list.close
|
779
|
+
# Erase it completely
|
780
|
+
@space_list.erase
|
781
|
+
# Then create it again
|
782
|
+
@space_list.open
|
783
|
+
|
784
|
+
regenerate_index_and_spaces
|
785
|
+
end
|
786
|
+
end
|
787
|
+
|
788
|
+
def erase_index_files
|
789
|
+
# Ensure that the index is really closed.
|
790
|
+
@index.close
|
791
|
+
# Erase it completely
|
792
|
+
@index.erase
|
793
|
+
|
794
|
+
# Ensure that the spaces list is really closed.
|
795
|
+
@space_list.close
|
796
|
+
# Erase it completely
|
797
|
+
@space_list.erase
|
798
|
+
end
|
799
|
+
|
800
|
+
def clear_index_files
|
801
|
+
# Ensure that the index is really closed.
|
802
|
+
@index.close
|
803
|
+
# Erase it completely
|
804
|
+
@index.erase
|
805
|
+
# Then create it again.
|
806
|
+
@index.open
|
807
|
+
|
808
|
+
# Ensure that the spaces list is really closed.
|
809
|
+
@space_list.close
|
810
|
+
# Erase it completely
|
811
|
+
@space_list.erase
|
812
|
+
# Then create it again
|
813
|
+
@space_list.open
|
814
|
+
end
|
815
|
+
|
656
816
|
end
|
657
817
|
|
658
818
|
end
|