perobs 4.0.0 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/perobs.rb +1 -0
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +83 -12
- data/lib/perobs/BTreeBlob.rb +1 -1
- data/lib/perobs/BTreeDB.rb +2 -2
- data/lib/perobs/BTreeNode.rb +365 -85
- data/lib/perobs/BigArray.rb +267 -0
- data/lib/perobs/BigArrayNode.rb +998 -0
- data/lib/perobs/BigHash.rb +262 -0
- data/lib/perobs/BigTree.rb +184 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +143 -51
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +363 -203
- data/lib/perobs/FlatFileBlobHeader.rb +98 -54
- data/lib/perobs/FlatFileDB.rb +42 -20
- data/lib/perobs/Hash.rb +58 -13
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +38 -4
- data/lib/perobs/PersistentObjectCache.rb +53 -67
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +71 -32
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +5 -2
- data/test/BigArray_spec.rb +214 -0
- data/test/BigHash_spec.rb +144 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +63 -14
- data/test/Hash_spec.rb +1 -2
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +151 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +290 -199
- data/test/spec_helper.rb +9 -4
- metadata +47 -10
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FNV_Hash_1a_64.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
# This is an implementation of the Fowler Noll Vo hashing algorithm in the
|
31
|
+
# 1a variant for 64 bit hash values.
|
32
|
+
# https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
33
|
+
class FNV_Hash_1a_64
|
34
|
+
|
35
|
+
@@OFFSET = 14695981039346656037
|
36
|
+
@@PRIME = 1099511628211
|
37
|
+
@@MASK = 2**64 - 1
|
38
|
+
|
39
|
+
def self.digest(item)
|
40
|
+
hash = @@OFFSET
|
41
|
+
|
42
|
+
item.to_s.each_byte do |byte|
|
43
|
+
hash ^= byte
|
44
|
+
hash *= @@PRIME
|
45
|
+
hash &= @@MASK
|
46
|
+
end
|
47
|
+
|
48
|
+
hash
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFile.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -31,6 +31,7 @@ require 'perobs/Log'
|
|
31
31
|
require 'perobs/FlatFileBlobHeader'
|
32
32
|
require 'perobs/BTree'
|
33
33
|
require 'perobs/SpaceTree'
|
34
|
+
require 'perobs/IDList'
|
34
35
|
|
35
36
|
module PEROBS
|
36
37
|
|
@@ -44,12 +45,13 @@ module PEROBS
|
|
44
45
|
|
45
46
|
# Create a new FlatFile object for a database in the given path.
|
46
47
|
# @param dir [String] Directory path for the data base file
|
47
|
-
def initialize(dir)
|
48
|
+
def initialize(dir, progressmeter)
|
48
49
|
@db_dir = dir
|
50
|
+
@progressmeter = progressmeter
|
49
51
|
@f = nil
|
50
|
-
@
|
51
|
-
@
|
52
|
-
@space_list = SpaceTree.new(@db_dir)
|
52
|
+
@marks = nil
|
53
|
+
@index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
|
54
|
+
@space_list = SpaceTree.new(@db_dir, @progressmeter)
|
53
55
|
end
|
54
56
|
|
55
57
|
# Open the flat file for reading and writing.
|
@@ -74,33 +76,19 @@ module PEROBS
|
|
74
76
|
end
|
75
77
|
@f.sync = true
|
76
78
|
|
77
|
-
|
78
|
-
@index.open(!new_db_created)
|
79
|
-
@space_list.open
|
80
|
-
rescue FatalError
|
81
|
-
# Ensure that the index is really closed.
|
82
|
-
@index.close
|
83
|
-
# Erase it completely
|
84
|
-
@index.erase
|
85
|
-
# Then create it again.
|
86
|
-
@index.open
|
87
|
-
|
88
|
-
# Ensure that the spaces list is really closed.
|
89
|
-
@space_list.close
|
90
|
-
# Erase it completely
|
91
|
-
@space_list.erase
|
92
|
-
# Then create it again
|
93
|
-
@space_list.open
|
94
|
-
|
95
|
-
regenerate_index_and_spaces
|
96
|
-
end
|
79
|
+
open_index_files(!new_db_created)
|
97
80
|
end
|
98
81
|
|
99
82
|
# Close the flat file. This method must be called to ensure that all data
|
100
83
|
# is really written into the filesystem.
|
101
84
|
def close
|
102
|
-
@space_list.close
|
103
|
-
@index.close
|
85
|
+
@space_list.close if @space_list.is_open?
|
86
|
+
@index.close if @index.is_open?
|
87
|
+
|
88
|
+
if @marks
|
89
|
+
@marks.erase
|
90
|
+
@marks = nil
|
91
|
+
end
|
104
92
|
|
105
93
|
if @f
|
106
94
|
@f.flush
|
@@ -139,29 +127,36 @@ module PEROBS
|
|
139
127
|
# @param addr [Integer] Address of the blob to delete
|
140
128
|
# @param id [Integer] ID of the blob to delete
|
141
129
|
def delete_obj_by_address(addr, id)
|
142
|
-
@index.remove(id)
|
143
|
-
header = FlatFileBlobHeader.
|
130
|
+
@index.remove(id) if @index.is_open?
|
131
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
144
132
|
header.clear_flags
|
145
|
-
@space_list.add_space(addr, header.length)
|
133
|
+
@space_list.add_space(addr, header.length) if @space_list.is_open?
|
146
134
|
end
|
147
135
|
|
148
136
|
# Delete all unmarked objects.
|
149
137
|
def delete_unmarked_objects
|
150
|
-
|
151
|
-
|
138
|
+
# We don't update the index and the space list during this operation as
|
139
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
140
|
+
# with an empty space list.
|
141
|
+
clear_index_files
|
142
|
+
|
143
|
+
deleted_objects_count = 0
|
144
|
+
@progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
|
145
|
+
each_blob_header do |header|
|
146
|
+
if header.is_valid? && !@marks.include?(header.id)
|
147
|
+
delete_obj_by_address(header.addr, header.id)
|
148
|
+
deleted_objects_count += 1
|
149
|
+
end
|
152
150
|
|
153
|
-
|
154
|
-
each_blob_header do |pos, header|
|
155
|
-
if header.is_valid? && @marks.get(header.id).nil?
|
156
|
-
delete_obj_by_address(pos, header.id)
|
157
|
-
deleted_ids << header.id
|
151
|
+
pm.update(header.addr)
|
158
152
|
end
|
159
153
|
end
|
160
154
|
defragmentize
|
161
155
|
|
162
|
-
|
163
|
-
|
164
|
-
|
156
|
+
# Update the index file and create a new, empty space list.
|
157
|
+
regenerate_index_and_spaces
|
158
|
+
|
159
|
+
deleted_objects_count
|
165
160
|
end
|
166
161
|
|
167
162
|
# Write the given object into the file. This method never uses in-place
|
@@ -177,7 +172,7 @@ module PEROBS
|
|
177
172
|
# operation is aborted or interrupted we ensure that we either have the
|
178
173
|
# old or the new version available.
|
179
174
|
if (old_addr = find_obj_addr_by_id(id))
|
180
|
-
old_header = FlatFileBlobHeader.
|
175
|
+
old_header = FlatFileBlobHeader.read(@f, old_addr)
|
181
176
|
old_header.set_outdated_flag
|
182
177
|
end
|
183
178
|
|
@@ -197,7 +192,7 @@ module PEROBS
|
|
197
192
|
begin
|
198
193
|
if length != -1
|
199
194
|
# Just a safeguard so we don't overwrite current data.
|
200
|
-
header = FlatFileBlobHeader.
|
195
|
+
header = FlatFileBlobHeader.read(@f, addr)
|
201
196
|
if header.length != length
|
202
197
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
203
198
|
"(#{header.length}) for address #{addr} don't match."
|
@@ -229,18 +224,22 @@ module PEROBS
|
|
229
224
|
FlatFileBlobHeader.new(@f, space_address, 0, space_length,
|
230
225
|
0, 0).write
|
231
226
|
# Register the new space with the space list.
|
232
|
-
@space_list.
|
227
|
+
if @space_list.is_open? && space_length > 0
|
228
|
+
@space_list.add_space(space_address, space_length)
|
229
|
+
end
|
233
230
|
end
|
234
231
|
|
235
232
|
# Once the blob has been written we can update the index as well.
|
236
|
-
@index.insert(id, addr)
|
233
|
+
@index.insert(id, addr) if @index.is_open?
|
237
234
|
|
238
235
|
if old_addr
|
239
236
|
# If we had an existing object stored for the ID we have to mark
|
240
237
|
# this entry as deleted now.
|
241
238
|
old_header.clear_flags
|
242
239
|
# And register the newly freed space with the space list.
|
243
|
-
@space_list.
|
240
|
+
if @space_list.is_open?
|
241
|
+
@space_list.add_space(old_addr, old_header.length)
|
242
|
+
end
|
244
243
|
else
|
245
244
|
@f.flush
|
246
245
|
end
|
@@ -270,21 +269,17 @@ module PEROBS
|
|
270
269
|
nil
|
271
270
|
end
|
272
271
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
end
|
277
|
-
|
278
|
-
nil
|
272
|
+
# @return [Integer] Number of items stored in the DB.
|
273
|
+
def item_counter
|
274
|
+
@index.entries_count
|
279
275
|
end
|
280
276
|
|
281
|
-
|
282
277
|
# Read the object at the specified address.
|
283
278
|
# @param addr [Integer] Offset in the flat file
|
284
279
|
# @param id [Integer] ID of the data blob
|
285
280
|
# @return [String] Raw object data
|
286
281
|
def read_obj_by_address(addr, id)
|
287
|
-
header = FlatFileBlobHeader.
|
282
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
288
283
|
if header.id != id
|
289
284
|
PEROBS.log.fatal "Database index corrupted: Index for object " +
|
290
285
|
"#{id} points to object with ID #{header.id}"
|
@@ -319,19 +314,22 @@ module PEROBS
|
|
319
314
|
# Mark the object with the given ID.
|
320
315
|
# @param id [Integer] ID of the object
|
321
316
|
def mark_obj_by_id(id)
|
322
|
-
@marks.insert(id
|
317
|
+
@marks.insert(id)
|
323
318
|
end
|
324
319
|
|
325
320
|
# Return true if the object with the given ID is marked, false otherwise.
|
326
321
|
# @param id [Integer] ID of the object
|
327
322
|
def is_marked_by_id?(id)
|
328
|
-
|
323
|
+
@marks.include?(id)
|
329
324
|
end
|
330
325
|
|
331
326
|
# Clear alls marks.
|
332
327
|
def clear_all_marks
|
333
|
-
@marks
|
334
|
-
|
328
|
+
if @marks
|
329
|
+
@marks.clear
|
330
|
+
else
|
331
|
+
@marks = IDList.new(@db_dir, 'marks', 8)
|
332
|
+
end
|
335
333
|
end
|
336
334
|
|
337
335
|
# Eliminate all the holes in the file. This is an in-place
|
@@ -340,59 +338,72 @@ module PEROBS
|
|
340
338
|
distance = 0
|
341
339
|
new_file_size = 0
|
342
340
|
deleted_blobs = 0
|
341
|
+
corrupted_blobs = 0
|
343
342
|
valid_blobs = 0
|
344
|
-
|
345
|
-
PEROBS.log.info "Defragmenting FlatFile"
|
343
|
+
|
346
344
|
# Iterate over all entries.
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
345
|
+
@progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
|
346
|
+
each_blob_header do |header|
|
347
|
+
# If we have stumbled over a corrupted blob we treat it similar to a
|
348
|
+
# deleted blob and reuse the space.
|
349
|
+
if header.corruption_start
|
350
|
+
distance += header.addr - header.corruption_start
|
351
|
+
corrupted_blobs += 1
|
352
|
+
end
|
353
|
+
|
354
|
+
# Total size of the current entry
|
355
|
+
entry_bytes = FlatFileBlobHeader::LENGTH + header.length
|
356
|
+
if header.is_valid?
|
357
|
+
# We have found a valid entry.
|
358
|
+
valid_blobs += 1
|
359
|
+
if distance > 0
|
360
|
+
begin
|
361
|
+
# Read current entry into a buffer
|
362
|
+
@f.seek(header.addr)
|
363
|
+
buf = @f.read(entry_bytes)
|
364
|
+
# Write the buffer right after the end of the previous entry.
|
365
|
+
@f.seek(header.addr - distance)
|
366
|
+
@f.write(buf)
|
367
|
+
# Mark the space between the relocated current entry and the
|
368
|
+
# next valid entry as deleted space.
|
369
|
+
FlatFileBlobHeader.new(@f, @f.pos, 0,
|
370
|
+
distance - FlatFileBlobHeader::LENGTH,
|
371
|
+
0, 0).write
|
372
|
+
@f.flush
|
373
|
+
rescue IOError => e
|
374
|
+
PEROBS.log.fatal "Error while moving blob for ID " +
|
375
|
+
"#{header.id}: #{e.message}"
|
376
|
+
end
|
372
377
|
end
|
378
|
+
new_file_size = header.addr - distance +
|
379
|
+
FlatFileBlobHeader::LENGTH + header.length
|
380
|
+
else
|
381
|
+
deleted_blobs += 1
|
382
|
+
distance += entry_bytes
|
373
383
|
end
|
374
|
-
|
375
|
-
|
376
|
-
deleted_blobs += 1
|
377
|
-
distance += entry_bytes
|
384
|
+
|
385
|
+
pm.update(header.addr)
|
378
386
|
end
|
379
387
|
end
|
380
|
-
|
388
|
+
|
381
389
|
PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
|
382
390
|
"#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
|
383
391
|
"#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
|
392
|
+
if corrupted_blobs > 0
|
393
|
+
PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
|
394
|
+
"was recycled."
|
395
|
+
end
|
384
396
|
|
385
397
|
@f.flush
|
386
398
|
@f.truncate(new_file_size)
|
387
399
|
@f.flush
|
388
|
-
@space_list.clear
|
389
400
|
|
390
401
|
sync
|
391
402
|
end
|
392
403
|
|
393
404
|
# This method iterates over all entries in the FlatFile and removes the
|
394
405
|
# entry and inserts it again. This is useful to update all entries in
|
395
|
-
#
|
406
|
+
# case the storage format has changed.
|
396
407
|
def refresh
|
397
408
|
# This iteration might look scary as we iterate over the entries while
|
398
409
|
# while we are rearranging them. Re-inserted items may be inserted
|
@@ -400,23 +411,34 @@ module PEROBS
|
|
400
411
|
# inserted after the current entry and will be re-read again unless they
|
401
412
|
# are inserted after the original file end.
|
402
413
|
file_size = @f.size
|
403
|
-
PEROBS.log.info "Refreshing the DB..."
|
404
|
-
t = Time.now
|
405
|
-
each_blob_header do |pos, header|
|
406
|
-
if header.is_valid?
|
407
|
-
buf = read_obj_by_address(pos, header.id)
|
408
|
-
delete_obj_by_address(pos, header.id)
|
409
|
-
write_obj_by_id(header.id, buf)
|
410
|
-
end
|
411
414
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
+
# We don't update the index and the space list during this operation as
|
416
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
417
|
+
# with an empty space list.
|
418
|
+
clear_index_files
|
419
|
+
|
420
|
+
@progressmeter.start('Converting objects to new storage format',
|
421
|
+
@f.size) do |pm|
|
422
|
+
each_blob_header do |header|
|
423
|
+
if header.is_valid?
|
424
|
+
buf = read_obj_by_address(header.addr, header.id)
|
425
|
+
delete_obj_by_address(header.addr, header.id)
|
426
|
+
write_obj_by_id(header.id, buf)
|
427
|
+
end
|
428
|
+
|
429
|
+
# Some re-inserted blobs may be inserted after the original file end.
|
430
|
+
# No need to process those blobs again.
|
431
|
+
break if header.addr >= file_size
|
432
|
+
|
433
|
+
pm.update(header.addr)
|
434
|
+
end
|
415
435
|
end
|
416
|
-
PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
|
417
436
|
|
418
437
|
# Reclaim the space saved by compressing entries.
|
419
438
|
defragmentize
|
439
|
+
|
440
|
+
# Recreate the index file and create an empty space list.
|
441
|
+
regenerate_index_and_spaces
|
420
442
|
end
|
421
443
|
|
422
444
|
# Check (and repair) the FlatFile.
|
@@ -433,95 +455,117 @@ module PEROBS
|
|
433
455
|
# First check the database blob file. Each entry should be readable and
|
434
456
|
# correct and all IDs must be unique. We use a shadow index to keep
|
435
457
|
# track of the already found IDs.
|
436
|
-
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER
|
458
|
+
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
|
459
|
+
@progressmeter)
|
437
460
|
new_index.erase
|
438
461
|
new_index.open
|
439
462
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
463
|
+
corrupted_blobs = 0
|
464
|
+
@progressmeter.start('Checking blobs file', @f.size) do |pm|
|
465
|
+
corrupted_blobs = each_blob_header do |header|
|
466
|
+
if header.is_valid?
|
467
|
+
# We have a non-deleted entry.
|
468
|
+
begin
|
469
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
470
|
+
buf = @f.read(header.length)
|
471
|
+
if buf.bytesize != header.length
|
472
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
473
|
+
"#{header.id}."
|
474
|
+
discard_damaged_blob(header) if repair
|
475
|
+
errors += 1
|
476
|
+
next
|
477
|
+
end
|
453
478
|
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
479
|
+
# Uncompress the data if the compression bit is set in the mark
|
480
|
+
# byte.
|
481
|
+
if header.is_compressed?
|
482
|
+
begin
|
483
|
+
buf = Zlib.inflate(buf)
|
484
|
+
rescue Zlib::BufError, Zlib::DataError
|
485
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
486
|
+
"#{header.id} found."
|
487
|
+
discard_damaged_blob(header) if repair
|
488
|
+
errors += 1
|
489
|
+
next
|
490
|
+
end
|
491
|
+
end
|
492
|
+
|
493
|
+
if header.crc && checksum(buf) != header.crc
|
494
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
495
|
+
"with ID #{header.id}"
|
462
496
|
discard_damaged_blob(header) if repair
|
463
497
|
errors += 1
|
464
498
|
next
|
465
499
|
end
|
500
|
+
rescue IOError => e
|
501
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
502
|
+
e.message
|
466
503
|
end
|
467
504
|
|
468
|
-
if
|
469
|
-
|
470
|
-
|
471
|
-
|
505
|
+
# Check if the ID has already been found in the file.
|
506
|
+
if (previous_address = new_index.get(header.id))
|
507
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
508
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
472
509
|
errors += 1
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
discard_damaged_blob(header)
|
491
|
-
elsif previous_header.is_outdated?
|
492
|
-
discard_damaged_blob(previous_header)
|
493
|
-
else
|
494
|
-
PEROBS.log.error "None of the blobs with same ID have " +
|
495
|
-
"the outdated flag set. Deleting the smaller one."
|
496
|
-
discard_damaged_blob(header.length < previous_header.length ?
|
497
|
-
header : previous_header)
|
510
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
511
|
+
header.id)
|
512
|
+
if repair
|
513
|
+
# We have two blobs with the same ID and we must discard one of
|
514
|
+
# them.
|
515
|
+
if header.is_outdated?
|
516
|
+
discard_damaged_blob(header)
|
517
|
+
elsif previous_header.is_outdated?
|
518
|
+
discard_damaged_blob(previous_header)
|
519
|
+
else
|
520
|
+
PEROBS.log.error "None of the blobs with same ID have " +
|
521
|
+
"the outdated flag set. Deleting the smaller one."
|
522
|
+
errors += 1
|
523
|
+
discard_damaged_blob(header.length < previous_header.length ?
|
524
|
+
header : previous_header)
|
525
|
+
end
|
526
|
+
next
|
498
527
|
end
|
499
|
-
|
528
|
+
else
|
529
|
+
# ID is unique so far. Add it to the shadow index.
|
530
|
+
new_index.insert(header.id, header.addr)
|
500
531
|
end
|
501
|
-
|
502
|
-
# ID is unique so far. Add it to the shadow index.
|
503
|
-
new_index.insert(header.id, pos)
|
532
|
+
|
504
533
|
end
|
505
534
|
|
535
|
+
pm.update(header.addr)
|
506
536
|
end
|
537
|
+
|
538
|
+
errors += corrupted_blobs
|
507
539
|
end
|
540
|
+
|
508
541
|
# We no longer need the new index.
|
509
542
|
new_index.close
|
510
543
|
new_index.erase
|
511
544
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
545
|
+
if repair && corrupted_blobs > 0
|
546
|
+
erase_index_files
|
547
|
+
defragmentize
|
548
|
+
regenerate_index_and_spaces
|
549
|
+
else
|
550
|
+
# Now we check the index data. It must be correct and the entries must
|
551
|
+
# match the blob file. All entries in the index must be in the blob file
|
552
|
+
# and vise versa.
|
553
|
+
begin
|
554
|
+
index_ok = @index.check do |id, address|
|
555
|
+
has_id_at?(id, address)
|
556
|
+
end
|
557
|
+
x_check_errs = 0
|
558
|
+
space_check_ok = true
|
559
|
+
unless index_ok && (space_check_ok = @space_list.check(self)) &&
|
560
|
+
(x_check_errs = cross_check_entries) == 0
|
561
|
+
errors += 1 unless index_ok && space_check_ok
|
562
|
+
errors += x_check_errs
|
563
|
+
regenerate_index_and_spaces if repair
|
564
|
+
end
|
565
|
+
rescue PEROBS::FatalError
|
566
|
+
errors += 1
|
520
567
|
regenerate_index_and_spaces if repair
|
521
568
|
end
|
522
|
-
rescue PEROBS::FatalError
|
523
|
-
errors += 1
|
524
|
-
regenerate_index_and_spaces if repair
|
525
569
|
end
|
526
570
|
|
527
571
|
sync if repair
|
@@ -535,22 +579,32 @@ module PEROBS
|
|
535
579
|
# regenerates them from the FlatFile.
|
536
580
|
def regenerate_index_and_spaces
|
537
581
|
PEROBS.log.warn "Re-generating FlatFileDB index and space files"
|
582
|
+
@index.open unless @index.is_open?
|
538
583
|
@index.clear
|
584
|
+
@space_list.open unless @space_list.is_open?
|
539
585
|
@space_list.clear
|
540
586
|
|
541
|
-
|
542
|
-
|
543
|
-
if
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
587
|
+
@progressmeter.start('Re-generating database index', @f.size) do |pm|
|
588
|
+
each_blob_header do |header|
|
589
|
+
if header.is_valid?
|
590
|
+
if (duplicate_pos = @index.get(header.id))
|
591
|
+
PEROBS.log.error "FlatFile contains multiple blobs for ID " +
|
592
|
+
"#{header.id}. First blob is at address #{duplicate_pos}. " +
|
593
|
+
"Other blob found at address #{header.addr}."
|
594
|
+
if header.length > 0
|
595
|
+
@space_list.add_space(header.addr, header.length)
|
596
|
+
end
|
597
|
+
discard_damaged_blob(header)
|
598
|
+
else
|
599
|
+
@index.insert(header.id, header.addr)
|
600
|
+
end
|
549
601
|
else
|
550
|
-
|
602
|
+
if header.length > 0
|
603
|
+
@space_list.add_space(header.addr, header.length)
|
604
|
+
end
|
551
605
|
end
|
552
|
-
|
553
|
-
|
606
|
+
|
607
|
+
pm.update(header.addr)
|
554
608
|
end
|
555
609
|
end
|
556
610
|
|
@@ -558,19 +612,19 @@ module PEROBS
|
|
558
612
|
end
|
559
613
|
|
560
614
|
def has_space?(address, size)
|
561
|
-
header = FlatFileBlobHeader.
|
615
|
+
header = FlatFileBlobHeader.read(@f, address)
|
562
616
|
!header.is_valid? && header.length == size
|
563
617
|
end
|
564
618
|
|
565
619
|
def has_id_at?(id, address)
|
566
|
-
header = FlatFileBlobHeader.
|
620
|
+
header = FlatFileBlobHeader.read(@f, address)
|
567
621
|
header.is_valid? && header.id == id
|
568
622
|
end
|
569
623
|
|
570
624
|
def inspect
|
571
625
|
s = '['
|
572
|
-
each_blob_header do |
|
573
|
-
s << "{ :pos => #{
|
626
|
+
each_blob_header do |header|
|
627
|
+
s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
|
574
628
|
":length => #{header.length}, :id => #{header.id}, " +
|
575
629
|
":crc => #{header.crc}"
|
576
630
|
if header.is_valid?
|
@@ -581,21 +635,68 @@ module PEROBS
|
|
581
635
|
s + ']'
|
582
636
|
end
|
583
637
|
|
638
|
+
def FlatFile::insert_header_checksums(db_dir)
|
639
|
+
old_file_name = File.join(db_dir, 'database.blobs')
|
640
|
+
new_file_name = File.join(db_dir, 'database_v4.blobs')
|
641
|
+
bak_file_name = File.join(db_dir, 'database_v3.blobs')
|
642
|
+
|
643
|
+
old_file = File.open(old_file_name, 'rb')
|
644
|
+
new_file = File.open(new_file_name, 'wb')
|
645
|
+
|
646
|
+
entries = 0
|
647
|
+
while (buf = old_file.read(21))
|
648
|
+
flags, length, id, crc = *buf.unpack('CQQL')
|
649
|
+
blob_data = old_file.read(length)
|
650
|
+
|
651
|
+
# Some basic sanity checking to ensure all reserved bits are 0. Older
|
652
|
+
# versions of PEROBS used to set bit 1 despite it being reserved now.
|
653
|
+
unless flags & 0xF0 == 0
|
654
|
+
PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
|
655
|
+
"flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
|
656
|
+
end
|
657
|
+
|
658
|
+
# Check if the blob is valid and current.
|
659
|
+
if flags & 0x1 == 1 && flags & 0x8 == 0
|
660
|
+
# Make sure the bit 1 is not set anymore.
|
661
|
+
flags = flags & 0x05
|
662
|
+
header_str = [ flags, length, id, crc ].pack('CQQL')
|
663
|
+
header_crc = Zlib.crc32(header_str, 0)
|
664
|
+
header_str += [ header_crc ].pack('L')
|
665
|
+
|
666
|
+
new_file.write(header_str + blob_data)
|
667
|
+
entries += 1
|
668
|
+
end
|
669
|
+
end
|
670
|
+
PEROBS.log.info "Header checksum added to #{entries} entries"
|
671
|
+
|
672
|
+
old_file.close
|
673
|
+
new_file.close
|
674
|
+
|
675
|
+
File.rename(old_file_name, bak_file_name)
|
676
|
+
File.rename(new_file_name, old_file_name)
|
677
|
+
end
|
678
|
+
|
584
679
|
private
|
585
680
|
|
586
681
|
def each_blob_header(&block)
|
587
|
-
|
682
|
+
corrupted_blobs = 0
|
683
|
+
|
588
684
|
begin
|
589
685
|
@f.seek(0)
|
590
686
|
while (header = FlatFileBlobHeader.read(@f))
|
591
|
-
|
687
|
+
if header.corruption_start
|
688
|
+
corrupted_blobs += 1
|
689
|
+
end
|
690
|
+
|
691
|
+
yield(header)
|
592
692
|
|
593
|
-
|
594
|
-
@f.seek(pos)
|
693
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
|
595
694
|
end
|
596
695
|
rescue IOError => e
|
597
696
|
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
598
697
|
end
|
698
|
+
|
699
|
+
corrupted_blobs
|
599
700
|
end
|
600
701
|
|
601
702
|
def find_free_blob(bytes)
|
@@ -625,26 +726,34 @@ module PEROBS
|
|
625
726
|
def cross_check_entries
|
626
727
|
errors = 0
|
627
728
|
|
628
|
-
|
629
|
-
|
630
|
-
if header.
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
729
|
+
@progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
|
730
|
+
each_blob_header do |header|
|
731
|
+
if !header.is_valid?
|
732
|
+
if header.length > 0
|
733
|
+
unless @space_list.has_space?(header.addr, header.length)
|
734
|
+
PEROBS.log.error "FlatFile has free space " +
|
735
|
+
"(addr: #{header.addr}, len: #{header.length}) that is " +
|
736
|
+
"not in FreeSpaceManager"
|
737
|
+
errors += 1
|
738
|
+
end
|
739
|
+
end
|
740
|
+
else
|
741
|
+
if (index_address = @index.get(header.id)).nil?
|
742
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
743
|
+
"is not listed in the index"
|
744
|
+
errors +=1
|
745
|
+
elsif index_address != header.addr
|
746
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
747
|
+
"is listed in index with address #{index_address}"
|
748
|
+
errors += 1
|
636
749
|
end
|
637
750
|
end
|
638
|
-
|
639
|
-
|
640
|
-
PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
|
641
|
-
"in index with address #{@index.get(header.id)}"
|
642
|
-
errors += 1
|
643
|
-
end
|
751
|
+
|
752
|
+
pm.update(header.addr)
|
644
753
|
end
|
645
754
|
end
|
646
755
|
|
647
|
-
errors
|
756
|
+
errors
|
648
757
|
end
|
649
758
|
|
650
759
|
def discard_damaged_blob(header)
|
@@ -653,6 +762,57 @@ module PEROBS
|
|
653
762
|
header.clear_flags
|
654
763
|
end
|
655
764
|
|
765
|
+
def open_index_files(abort_on_missing_files = false)
|
766
|
+
begin
|
767
|
+
@index.open(abort_on_missing_files)
|
768
|
+
@space_list.open
|
769
|
+
rescue FatalError
|
770
|
+
# Ensure that the index is really closed.
|
771
|
+
@index.close
|
772
|
+
# Erase it completely
|
773
|
+
@index.erase
|
774
|
+
# Then create it again.
|
775
|
+
@index.open
|
776
|
+
|
777
|
+
# Ensure that the spaces list is really closed.
|
778
|
+
@space_list.close
|
779
|
+
# Erase it completely
|
780
|
+
@space_list.erase
|
781
|
+
# Then create it again
|
782
|
+
@space_list.open
|
783
|
+
|
784
|
+
regenerate_index_and_spaces
|
785
|
+
end
|
786
|
+
end
|
787
|
+
|
788
|
+
def erase_index_files
|
789
|
+
# Ensure that the index is really closed.
|
790
|
+
@index.close
|
791
|
+
# Erase it completely
|
792
|
+
@index.erase
|
793
|
+
|
794
|
+
# Ensure that the spaces list is really closed.
|
795
|
+
@space_list.close
|
796
|
+
# Erase it completely
|
797
|
+
@space_list.erase
|
798
|
+
end
|
799
|
+
|
800
|
+
def clear_index_files
|
801
|
+
# Ensure that the index is really closed.
|
802
|
+
@index.close
|
803
|
+
# Erase it completely
|
804
|
+
@index.erase
|
805
|
+
# Then create it again.
|
806
|
+
@index.open
|
807
|
+
|
808
|
+
# Ensure that the spaces list is really closed.
|
809
|
+
@space_list.close
|
810
|
+
# Erase it completely
|
811
|
+
@space_list.erase
|
812
|
+
# Then create it again
|
813
|
+
@space_list.open
|
814
|
+
end
|
815
|
+
|
656
816
|
end
|
657
817
|
|
658
818
|
end
|