perobs 3.0.1 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +19 -18
- data/lib/perobs.rb +2 -0
- data/lib/perobs/Array.rb +68 -21
- data/lib/perobs/BTree.rb +110 -54
- data/lib/perobs/BTreeBlob.rb +14 -13
- data/lib/perobs/BTreeDB.rb +11 -10
- data/lib/perobs/BTreeNode.rb +551 -197
- data/lib/perobs/BTreeNodeCache.rb +10 -8
- data/lib/perobs/BTreeNodeLink.rb +11 -1
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +47 -22
- data/lib/perobs/ClassMap.rb +2 -2
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +62 -20
- data/lib/perobs/EquiBlobsFile.rb +174 -59
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +536 -242
- data/lib/perobs/FlatFileBlobHeader.rb +120 -84
- data/lib/perobs/FlatFileDB.rb +58 -27
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +129 -35
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/LockFile.rb +3 -0
- data/lib/perobs/Object.rb +28 -20
- data/lib/perobs/ObjectBase.rb +53 -10
- data/lib/perobs/PersistentObjectCache.rb +142 -0
- data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +63 -47
- data/lib/perobs/SpaceTreeNode.rb +134 -115
- data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
- data/lib/perobs/StackFile.rb +1 -1
- data/lib/perobs/Store.rb +180 -70
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +48 -39
- data/test/BTreeDB_spec.rb +2 -2
- data/test/BTree_spec.rb +50 -1
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -5
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +199 -15
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +27 -16
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/Object_spec.rb +5 -5
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +27 -9
- data/test/Store_spec.rb +353 -206
- data/test/perobs_spec.rb +7 -3
- data/test/spec_helper.rb +9 -4
- metadata +59 -16
- data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FNV_Hash_1a_64.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
# This is an implementation of the Fowler Noll Vo hashing algorithm in the
|
31
|
+
# 1a variant for 64 bit hash values.
|
32
|
+
# https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
33
|
+
class FNV_Hash_1a_64
|
34
|
+
|
35
|
+
@@OFFSET = 14695981039346656037
|
36
|
+
@@PRIME = 1099511628211
|
37
|
+
@@MASK = 2**64 - 1
|
38
|
+
|
39
|
+
def self.digest(item)
|
40
|
+
hash = @@OFFSET
|
41
|
+
|
42
|
+
item.to_s.each_byte do |byte|
|
43
|
+
hash ^= byte
|
44
|
+
hash *= @@PRIME
|
45
|
+
hash &= @@MASK
|
46
|
+
end
|
47
|
+
|
48
|
+
hash
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFile.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -31,6 +31,8 @@ require 'perobs/Log'
|
|
31
31
|
require 'perobs/FlatFileBlobHeader'
|
32
32
|
require 'perobs/BTree'
|
33
33
|
require 'perobs/SpaceTree'
|
34
|
+
require 'perobs/SpaceManager'
|
35
|
+
require 'perobs/IDList'
|
34
36
|
|
35
37
|
module PEROBS
|
36
38
|
|
@@ -44,11 +46,20 @@ module PEROBS
|
|
44
46
|
|
45
47
|
# Create a new FlatFile object for a database in the given path.
|
46
48
|
# @param dir [String] Directory path for the data base file
|
47
|
-
def initialize(dir)
|
49
|
+
def initialize(dir, progressmeter)
|
48
50
|
@db_dir = dir
|
51
|
+
@progressmeter = progressmeter
|
49
52
|
@f = nil
|
50
|
-
@
|
51
|
-
@
|
53
|
+
@marks = nil
|
54
|
+
@index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
|
55
|
+
old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
|
56
|
+
if File.exist?(old_spaces_file)
|
57
|
+
# PEROBS version 4.1.0 and earlier used this space list format. It is
|
58
|
+
# deprecated now. Newly created DBs use the SpaceManager format.
|
59
|
+
@space_list = SpaceTree.new(@db_dir, @progressmeter)
|
60
|
+
else
|
61
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
62
|
+
end
|
52
63
|
end
|
53
64
|
|
54
65
|
# Open the flat file for reading and writing.
|
@@ -71,38 +82,26 @@ module PEROBS
|
|
71
82
|
PEROBS.log.fatal "FlatFile database '#{file_name}' is locked by " +
|
72
83
|
"another process"
|
73
84
|
end
|
85
|
+
@f.sync = true
|
74
86
|
|
75
|
-
|
76
|
-
@index.open(!new_db_created)
|
77
|
-
@space_list.open
|
78
|
-
rescue FatalError
|
79
|
-
# Ensure that the index is really closed.
|
80
|
-
@index.close
|
81
|
-
# Erase it completely
|
82
|
-
@index.erase
|
83
|
-
# Then create it again.
|
84
|
-
@index.open
|
85
|
-
|
86
|
-
# Ensure that the spaces list is really closed.
|
87
|
-
@space_list.close
|
88
|
-
# Erase it completely
|
89
|
-
@space_list.erase
|
90
|
-
# Then create it again
|
91
|
-
@space_list.open
|
92
|
-
|
93
|
-
regenerate_index_and_spaces
|
94
|
-
end
|
87
|
+
open_index_files(!new_db_created)
|
95
88
|
end
|
96
89
|
|
97
90
|
# Close the flat file. This method must be called to ensure that all data
|
98
91
|
# is really written into the filesystem.
|
99
92
|
def close
|
100
|
-
@space_list.close
|
101
|
-
@index.close
|
93
|
+
@space_list.close if @space_list.is_open?
|
94
|
+
@index.close if @index.is_open?
|
95
|
+
|
96
|
+
if @marks
|
97
|
+
@marks.erase
|
98
|
+
@marks = nil
|
99
|
+
end
|
102
100
|
|
103
101
|
if @f
|
104
102
|
@f.flush
|
105
103
|
@f.flock(File::LOCK_UN)
|
104
|
+
@f.fsync
|
106
105
|
@f.close
|
107
106
|
@f = nil
|
108
107
|
end
|
@@ -112,10 +111,12 @@ module PEROBS
|
|
112
111
|
def sync
|
113
112
|
begin
|
114
113
|
@f.flush
|
114
|
+
@f.fsync
|
115
115
|
rescue IOError => e
|
116
116
|
PEROBS.log.fatal "Cannot sync flat file database: #{e.message}"
|
117
117
|
end
|
118
118
|
@index.sync
|
119
|
+
@space_list.sync
|
119
120
|
end
|
120
121
|
|
121
122
|
# Delete the blob for the specified ID.
|
@@ -134,29 +135,37 @@ module PEROBS
|
|
134
135
|
# @param addr [Integer] Address of the blob to delete
|
135
136
|
# @param id [Integer] ID of the blob to delete
|
136
137
|
def delete_obj_by_address(addr, id)
|
137
|
-
@index.remove(id)
|
138
|
-
header = FlatFileBlobHeader.
|
138
|
+
@index.remove(id) if @index.is_open?
|
139
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
139
140
|
header.clear_flags
|
140
|
-
@space_list.add_space(addr, header.length)
|
141
|
+
@space_list.add_space(addr, header.length) if @space_list.is_open?
|
141
142
|
end
|
142
143
|
|
143
144
|
# Delete all unmarked objects.
|
144
|
-
def delete_unmarked_objects
|
145
|
-
|
146
|
-
|
145
|
+
def delete_unmarked_objects(&block)
|
146
|
+
# We don't update the index and the space list during this operation as
|
147
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
148
|
+
# with an empty space list.
|
149
|
+
clear_index_files
|
150
|
+
|
151
|
+
deleted_objects_count = 0
|
152
|
+
@progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
|
153
|
+
each_blob_header do |header|
|
154
|
+
if header.is_valid? && !@marks.include?(header.id)
|
155
|
+
delete_obj_by_address(header.addr, header.id)
|
156
|
+
yield(header.id) if block_given?
|
157
|
+
deleted_objects_count += 1
|
158
|
+
end
|
147
159
|
|
148
|
-
|
149
|
-
each_blob_header do |pos, header|
|
150
|
-
if header.is_valid? && !header.is_marked?
|
151
|
-
delete_obj_by_address(pos, header.id)
|
152
|
-
deleted_ids << header.id
|
160
|
+
pm.update(header.addr)
|
153
161
|
end
|
154
162
|
end
|
155
163
|
defragmentize
|
156
164
|
|
157
|
-
|
158
|
-
|
159
|
-
|
165
|
+
# Update the index file and create a new, empty space list.
|
166
|
+
regenerate_index_and_spaces
|
167
|
+
|
168
|
+
deleted_objects_count
|
160
169
|
end
|
161
170
|
|
162
171
|
# Write the given object into the file. This method never uses in-place
|
@@ -172,7 +181,7 @@ module PEROBS
|
|
172
181
|
# operation is aborted or interrupted we ensure that we either have the
|
173
182
|
# old or the new version available.
|
174
183
|
if (old_addr = find_obj_addr_by_id(id))
|
175
|
-
old_header = FlatFileBlobHeader.
|
184
|
+
old_header = FlatFileBlobHeader.read(@f, old_addr)
|
176
185
|
old_header.set_outdated_flag
|
177
186
|
end
|
178
187
|
|
@@ -183,57 +192,68 @@ module PEROBS
|
|
183
192
|
# performance impact of compression is not compensated by writing
|
184
193
|
# less data to the storage.
|
185
194
|
compressed = false
|
186
|
-
|
195
|
+
raw_obj_bytesize = raw_obj.bytesize
|
196
|
+
if raw_obj_bytesize > 256
|
187
197
|
raw_obj = Zlib.deflate(raw_obj)
|
198
|
+
raw_obj_bytesize = raw_obj.bytesize
|
188
199
|
compressed = true
|
189
200
|
end
|
190
201
|
|
191
|
-
addr, length = find_free_blob(
|
202
|
+
addr, length = find_free_blob(raw_obj_bytesize)
|
192
203
|
begin
|
193
204
|
if length != -1
|
194
205
|
# Just a safeguard so we don't overwrite current data.
|
195
|
-
header = FlatFileBlobHeader.
|
206
|
+
header = FlatFileBlobHeader.read(@f, addr)
|
196
207
|
if header.length != length
|
197
208
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
198
|
-
"(#{header.length}) don't match."
|
209
|
+
"(#{header.length}) for address #{addr} don't match."
|
199
210
|
end
|
200
|
-
if
|
201
|
-
PEROBS.log.fatal "Object (#{
|
211
|
+
if raw_obj_bytesize > header.length
|
212
|
+
PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
|
202
213
|
"blob space (#{header.length})."
|
203
214
|
end
|
204
215
|
if header.is_valid?
|
205
|
-
PEROBS.log.fatal "Entry
|
216
|
+
PEROBS.log.fatal "Entry at address #{addr} with flags: " +
|
217
|
+
"#{header.flags} is already used for ID #{header.id}."
|
206
218
|
end
|
207
219
|
end
|
208
220
|
flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
|
209
221
|
flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
|
210
|
-
FlatFileBlobHeader.new(@f, addr, flags,
|
222
|
+
FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
|
211
223
|
@f.write(raw_obj)
|
212
|
-
if length != -1 &&
|
224
|
+
if length != -1 && raw_obj_bytesize < length
|
213
225
|
# The new object was not appended and it did not completely fill the
|
214
226
|
# free space. So we have to write a new header to mark the remaining
|
215
227
|
# empty space.
|
216
|
-
unless length -
|
228
|
+
unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
|
217
229
|
PEROBS.log.fatal "Not enough space to append the empty space " +
|
218
|
-
"header (space: #{length} bytes, object: #{
|
230
|
+
"header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
|
219
231
|
"bytes)."
|
220
232
|
end
|
221
233
|
space_address = @f.pos
|
222
|
-
space_length = length - FlatFileBlobHeader::LENGTH -
|
234
|
+
space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
|
223
235
|
FlatFileBlobHeader.new(@f, space_address, 0, space_length,
|
224
236
|
0, 0).write
|
225
237
|
# Register the new space with the space list.
|
226
|
-
@space_list.
|
238
|
+
if @space_list.is_open? && space_length > 0
|
239
|
+
@space_list.add_space(space_address, space_length)
|
240
|
+
end
|
227
241
|
end
|
242
|
+
|
243
|
+
# Once the blob has been written we can update the index as well.
|
244
|
+
@index.insert(id, addr) if @index.is_open?
|
245
|
+
|
228
246
|
if old_addr
|
229
247
|
# If we had an existing object stored for the ID we have to mark
|
230
248
|
# this entry as deleted now.
|
231
249
|
old_header.clear_flags
|
250
|
+
# And register the newly freed space with the space list.
|
251
|
+
if @space_list.is_open?
|
252
|
+
@space_list.add_space(old_addr, old_header.length)
|
253
|
+
end
|
232
254
|
else
|
233
255
|
@f.flush
|
234
256
|
end
|
235
|
-
# Once the blob has been written we can update the index as well.
|
236
|
-
@index.insert(id, addr)
|
237
257
|
rescue IOError => e
|
238
258
|
PEROBS.log.fatal "Cannot write blob for ID #{id} to FlatFileDB: " +
|
239
259
|
e.message
|
@@ -260,15 +280,20 @@ module PEROBS
|
|
260
280
|
nil
|
261
281
|
end
|
262
282
|
|
283
|
+
# @return [Integer] Number of items stored in the DB.
|
284
|
+
def item_counter
|
285
|
+
@index.entries_count
|
286
|
+
end
|
287
|
+
|
263
288
|
# Read the object at the specified address.
|
264
289
|
# @param addr [Integer] Offset in the flat file
|
265
290
|
# @param id [Integer] ID of the data blob
|
266
291
|
# @return [String] Raw object data
|
267
292
|
def read_obj_by_address(addr, id)
|
268
|
-
header = FlatFileBlobHeader.
|
293
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
269
294
|
if header.id != id
|
270
295
|
PEROBS.log.fatal "Database index corrupted: Index for object " +
|
271
|
-
"#{id} points to object with ID #{header.id}"
|
296
|
+
"#{id} points to object with ID #{header.id} at address #{addr}"
|
272
297
|
end
|
273
298
|
|
274
299
|
buf = nil
|
@@ -277,7 +302,8 @@ module PEROBS
|
|
277
302
|
@f.seek(addr + FlatFileBlobHeader::LENGTH)
|
278
303
|
buf = @f.read(header.length)
|
279
304
|
rescue IOError => e
|
280
|
-
PEROBS.log.fatal "Cannot read blob for ID #{id}
|
305
|
+
PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
|
306
|
+
e.message
|
281
307
|
end
|
282
308
|
|
283
309
|
# Uncompress the data if the compression bit is set in the flags byte.
|
@@ -286,12 +312,13 @@ module PEROBS
|
|
286
312
|
buf = Zlib.inflate(buf)
|
287
313
|
rescue Zlib::BufError, Zlib::DataError
|
288
314
|
PEROBS.log.fatal "Corrupted compressed block with ID " +
|
289
|
-
"#{
|
315
|
+
"#{id} found at address #{addr}."
|
290
316
|
end
|
291
317
|
end
|
292
318
|
|
293
319
|
if checksum(buf) != header.crc
|
294
|
-
PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
|
320
|
+
PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
|
321
|
+
"at address #{addr}"
|
295
322
|
end
|
296
323
|
|
297
324
|
buf
|
@@ -300,47 +327,22 @@ module PEROBS
|
|
300
327
|
# Mark the object with the given ID.
|
301
328
|
# @param id [Integer] ID of the object
|
302
329
|
def mark_obj_by_id(id)
|
303
|
-
|
304
|
-
mark_obj_by_address(addr, id)
|
305
|
-
end
|
306
|
-
end
|
307
|
-
|
308
|
-
# Mark the object at the specified address.
|
309
|
-
# @param addr [Integer] Offset in the file
|
310
|
-
# @param id [Integer] ID of the object
|
311
|
-
def mark_obj_by_address(addr, id)
|
312
|
-
FlatFileBlobHeader.read_at(@f, addr, id).set_mark_flag
|
330
|
+
@marks.insert(id)
|
313
331
|
end
|
314
332
|
|
315
333
|
# Return true if the object with the given ID is marked, false otherwise.
|
316
334
|
# @param id [Integer] ID of the object
|
317
335
|
def is_marked_by_id?(id)
|
318
|
-
|
319
|
-
header = FlatFileBlobHeader.read_at(@f, addr, id)
|
320
|
-
return header.is_marked?
|
321
|
-
end
|
322
|
-
|
323
|
-
false
|
336
|
+
@marks.include?(id)
|
324
337
|
end
|
325
338
|
|
326
339
|
# Clear alls marks.
|
327
340
|
def clear_all_marks
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
marked_blob_count = 0
|
333
|
-
|
334
|
-
each_blob_header do |pos, header|
|
335
|
-
total_blob_count += 1
|
336
|
-
if header.is_valid? && header.is_marked?
|
337
|
-
# Clear all valid and marked blocks.
|
338
|
-
marked_blob_count += 1
|
339
|
-
header.clear_mark_flag
|
340
|
-
end
|
341
|
+
if @marks
|
342
|
+
@marks.clear
|
343
|
+
else
|
344
|
+
@marks = IDList.new(@db_dir, 'marks', item_counter)
|
341
345
|
end
|
342
|
-
PEROBS.log.info "#{marked_blob_count} marks in #{total_blob_count} " +
|
343
|
-
"objects cleared in #{Time.now - t} seconds"
|
344
346
|
end
|
345
347
|
|
346
348
|
# Eliminate all the holes in the file. This is an in-place
|
@@ -349,59 +351,72 @@ module PEROBS
|
|
349
351
|
distance = 0
|
350
352
|
new_file_size = 0
|
351
353
|
deleted_blobs = 0
|
354
|
+
corrupted_blobs = 0
|
352
355
|
valid_blobs = 0
|
353
|
-
|
354
|
-
PEROBS.log.info "Defragmenting FlatFile"
|
356
|
+
|
355
357
|
# Iterate over all entries.
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
358
|
+
@progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
|
359
|
+
each_blob_header do |header|
|
360
|
+
# If we have stumbled over a corrupted blob we treat it similar to a
|
361
|
+
# deleted blob and reuse the space.
|
362
|
+
if header.corruption_start
|
363
|
+
distance += header.addr - header.corruption_start
|
364
|
+
corrupted_blobs += 1
|
365
|
+
end
|
366
|
+
|
367
|
+
# Total size of the current entry
|
368
|
+
entry_bytes = FlatFileBlobHeader::LENGTH + header.length
|
369
|
+
if header.is_valid?
|
370
|
+
# We have found a valid entry.
|
371
|
+
valid_blobs += 1
|
372
|
+
if distance > 0
|
373
|
+
begin
|
374
|
+
# Read current entry into a buffer
|
375
|
+
@f.seek(header.addr)
|
376
|
+
buf = @f.read(entry_bytes)
|
377
|
+
# Write the buffer right after the end of the previous entry.
|
378
|
+
@f.seek(header.addr - distance)
|
379
|
+
@f.write(buf)
|
380
|
+
# Mark the space between the relocated current entry and the
|
381
|
+
# next valid entry as deleted space.
|
382
|
+
FlatFileBlobHeader.new(@f, @f.pos, 0,
|
383
|
+
distance - FlatFileBlobHeader::LENGTH,
|
384
|
+
0, 0).write
|
385
|
+
@f.flush
|
386
|
+
rescue IOError => e
|
387
|
+
PEROBS.log.fatal "Error while moving blob for ID " +
|
388
|
+
"#{header.id}: #{e.message}"
|
389
|
+
end
|
381
390
|
end
|
391
|
+
new_file_size = header.addr - distance +
|
392
|
+
FlatFileBlobHeader::LENGTH + header.length
|
393
|
+
else
|
394
|
+
deleted_blobs += 1
|
395
|
+
distance += entry_bytes
|
382
396
|
end
|
383
|
-
|
384
|
-
|
385
|
-
deleted_blobs += 1
|
386
|
-
distance += entry_bytes
|
397
|
+
|
398
|
+
pm.update(header.addr)
|
387
399
|
end
|
388
400
|
end
|
389
|
-
|
401
|
+
|
390
402
|
PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
|
391
403
|
"#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
|
392
404
|
"#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
|
405
|
+
if corrupted_blobs > 0
|
406
|
+
PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
|
407
|
+
"was recycled."
|
408
|
+
end
|
393
409
|
|
394
410
|
@f.flush
|
395
411
|
@f.truncate(new_file_size)
|
396
412
|
@f.flush
|
397
|
-
@space_list.clear
|
398
413
|
|
399
414
|
sync
|
400
415
|
end
|
401
416
|
|
402
417
|
# This method iterates over all entries in the FlatFile and removes the
|
403
418
|
# entry and inserts it again. This is useful to update all entries in
|
404
|
-
#
|
419
|
+
# case the storage format has changed.
|
405
420
|
def refresh
|
406
421
|
# This iteration might look scary as we iterate over the entries while
|
407
422
|
# while we are rearranging them. Re-inserted items may be inserted
|
@@ -409,132 +424,277 @@ module PEROBS
|
|
409
424
|
# inserted after the current entry and will be re-read again unless they
|
410
425
|
# are inserted after the original file end.
|
411
426
|
file_size = @f.size
|
412
|
-
PEROBS.log.info "Refreshing the DB..."
|
413
|
-
t = Time.now
|
414
|
-
each_blob_header do |pos, header|
|
415
|
-
if header.is_valid?
|
416
|
-
buf = read_obj_by_address(pos, header.id)
|
417
|
-
delete_obj_by_address(pos, header.id)
|
418
|
-
write_obj_by_id(header.id, buf)
|
419
|
-
end
|
420
427
|
|
421
|
-
|
422
|
-
|
423
|
-
|
428
|
+
# We don't update the index and the space list during this operation as
|
429
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
430
|
+
# with an empty space list.
|
431
|
+
clear_index_files
|
432
|
+
|
433
|
+
@progressmeter.start('Converting objects to new storage format',
|
434
|
+
@f.size) do |pm|
|
435
|
+
each_blob_header do |header|
|
436
|
+
if header.is_valid?
|
437
|
+
buf = read_obj_by_address(header.addr, header.id)
|
438
|
+
delete_obj_by_address(header.addr, header.id)
|
439
|
+
write_obj_by_id(header.id, buf)
|
440
|
+
end
|
441
|
+
|
442
|
+
# Some re-inserted blobs may be inserted after the original file end.
|
443
|
+
# No need to process those blobs again.
|
444
|
+
break if header.addr >= file_size
|
445
|
+
|
446
|
+
pm.update(header.addr)
|
447
|
+
end
|
424
448
|
end
|
425
|
-
PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
|
426
449
|
|
427
450
|
# Reclaim the space saved by compressing entries.
|
428
451
|
defragmentize
|
452
|
+
|
453
|
+
# Recreate the index file and create an empty space list.
|
454
|
+
regenerate_index_and_spaces
|
429
455
|
end
|
430
456
|
|
431
|
-
# Check
|
432
|
-
# @param repair [Boolean] True if errors should be fixed.
|
457
|
+
# Check the FlatFile.
|
433
458
|
# @return [Integer] Number of errors found
|
434
|
-
def check(
|
459
|
+
def check()
|
435
460
|
errors = 0
|
436
461
|
return errors unless @f
|
437
462
|
|
438
463
|
t = Time.now
|
439
|
-
PEROBS.log.info "Checking FlatFile database"
|
440
|
-
"#{repair ? ' in repair mode' : ''}..."
|
464
|
+
PEROBS.log.info "Checking FlatFile database..."
|
441
465
|
|
442
466
|
# First check the database blob file. Each entry should be readable and
|
443
467
|
# correct and all IDs must be unique. We use a shadow index to keep
|
444
468
|
# track of the already found IDs.
|
445
|
-
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER
|
469
|
+
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
|
470
|
+
@progressmeter)
|
446
471
|
new_index.erase
|
447
472
|
new_index.open
|
448
473
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
474
|
+
corrupted_blobs = 0
|
475
|
+
end_of_last_healthy_blob = nil
|
476
|
+
@progressmeter.start('Checking blobs file', @f.size) do |pm|
|
477
|
+
corrupted_blobs = each_blob_header do |header|
|
478
|
+
if header.is_valid?
|
479
|
+
# We have a non-deleted entry.
|
480
|
+
begin
|
481
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
482
|
+
buf = @f.read(header.length)
|
483
|
+
if buf.bytesize != header.length
|
484
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
485
|
+
"#{header.id}."
|
486
|
+
errors += 1
|
487
|
+
next
|
488
|
+
end
|
462
489
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
490
|
+
# Uncompress the data if the compression bit is set in the mark
|
491
|
+
# byte.
|
492
|
+
if header.is_compressed?
|
493
|
+
begin
|
494
|
+
buf = Zlib.inflate(buf)
|
495
|
+
rescue Zlib::BufError, Zlib::DataError
|
496
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
497
|
+
"#{header.id} found."
|
498
|
+
errors += 1
|
499
|
+
next
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
if header.crc && checksum(buf) != header.crc
|
504
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
505
|
+
"with ID #{header.id}"
|
472
506
|
errors += 1
|
473
507
|
next
|
474
508
|
end
|
509
|
+
rescue IOError => e
|
510
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
511
|
+
e.message
|
475
512
|
end
|
476
513
|
|
477
|
-
if
|
478
|
-
|
479
|
-
|
480
|
-
|
514
|
+
# Check if the ID has already been found in the file.
|
515
|
+
if (previous_address = new_index.get(header.id))
|
516
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
517
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
481
518
|
errors += 1
|
482
|
-
|
519
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
520
|
+
header.id)
|
521
|
+
else
|
522
|
+
# ID is unique so far. Add it to the shadow index.
|
523
|
+
new_index.insert(header.id, header.addr)
|
483
524
|
end
|
484
|
-
rescue IOError => e
|
485
|
-
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
486
|
-
e.message
|
487
525
|
end
|
526
|
+
end_of_last_healthy_blob = header.addr +
|
527
|
+
FlatFileBlobHeader::LENGTH + header.length
|
488
528
|
|
489
|
-
|
490
|
-
|
491
|
-
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
492
|
-
"Addresses: #{previous_address}, #{pos}"
|
493
|
-
previous_header = FlatFileBlobHeader.read_at(@f, previous_address,
|
494
|
-
header.id)
|
495
|
-
if repair
|
496
|
-
# We have two blobs with the same ID and we must discard one of
|
497
|
-
# them.
|
498
|
-
if header.is_outdated?
|
499
|
-
discard_damaged_blob(header)
|
500
|
-
elsif previous_header.is_outdated?
|
501
|
-
discard_damaged_blob(previous_header)
|
502
|
-
else
|
503
|
-
PEROBS.log.error "None of the blobs with same ID have " +
|
504
|
-
"the outdated flag set. Deleting the smaller one."
|
505
|
-
discard_damaged_blob(header.length < previous_header.length ?
|
506
|
-
header : previous_header)
|
507
|
-
end
|
508
|
-
next
|
509
|
-
end
|
510
|
-
else
|
511
|
-
# ID is unique so far. Add it to the shadow index.
|
512
|
-
new_index.insert(header.id, pos)
|
513
|
-
end
|
529
|
+
pm.update(header.addr)
|
530
|
+
end
|
514
531
|
|
532
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
533
|
+
# The blob file ends with a corrupted blob header.
|
534
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
535
|
+
'bytes found at the end of FlatFile.'
|
536
|
+
corrupted_blobs += 1
|
515
537
|
end
|
538
|
+
|
539
|
+
errors += corrupted_blobs
|
516
540
|
end
|
541
|
+
|
517
542
|
# We no longer need the new index.
|
518
543
|
new_index.close
|
519
544
|
new_index.erase
|
520
545
|
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
546
|
+
if corrupted_blobs == 0
|
547
|
+
# Now we check the index data. It must be correct and the entries must
|
548
|
+
# match the blob file. All entries in the index must be in the blob file
|
549
|
+
# and vise versa.
|
550
|
+
begin
|
551
|
+
index_ok = @index.check do |id, address|
|
552
|
+
unless has_id_at?(id, address)
|
553
|
+
PEROBS.log.error "Index contains an entry for " +
|
554
|
+
"ID #{id} at address #{address} that is not in FlatFile"
|
555
|
+
false
|
556
|
+
else
|
557
|
+
true
|
558
|
+
end
|
559
|
+
end
|
560
|
+
x_check_errs = 0
|
561
|
+
space_check_ok = true
|
562
|
+
unless index_ok && (space_check_ok = @space_list.check(self)) &&
|
563
|
+
(x_check_errs = cross_check_entries) == 0
|
564
|
+
errors += 1 unless index_ok && space_check_ok
|
565
|
+
errors += x_check_errs
|
566
|
+
end
|
567
|
+
rescue PEROBS::FatalError
|
568
|
+
errors += 1
|
569
|
+
end
|
570
|
+
end
|
571
|
+
|
572
|
+
PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
|
573
|
+
"#{errors} errors found."
|
574
|
+
|
575
|
+
errors
|
576
|
+
end
|
577
|
+
|
578
|
+
# Repair the FlatFile. In contrast to the repair functionality in the
|
579
|
+
# check() method this method is much faster. It simply re-creates the
|
580
|
+
# index and space list from the blob file.
|
581
|
+
# @param repair [Boolean] True if errors should be fixed.
|
582
|
+
# @return [Integer] Number of errors found
|
583
|
+
def repair
|
584
|
+
errors = 0
|
585
|
+
return errors unless @f
|
586
|
+
|
587
|
+
t = Time.now
|
588
|
+
PEROBS.log.info "Repairing FlatFile database"
|
589
|
+
|
590
|
+
# Erase and re-open the index and space list files. We purposely don't
|
591
|
+
# close the files at it would trigger needless flushing.
|
592
|
+
clear_index_files(true)
|
593
|
+
|
594
|
+
# Now we scan the blob file and re-index all blobs and spaces. Corrupted
|
595
|
+
# blobs will be skipped.
|
596
|
+
corrupted_blobs = 0
|
597
|
+
end_of_last_healthy_blob = nil
|
598
|
+
@progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
|
599
|
+
corrupted_blobs = each_blob_header do |header|
|
600
|
+
if header.corruption_start
|
601
|
+
# The blob is preceeded by a corrupted area. We create a new
|
602
|
+
# header of a deleted blob for this area and write the new blob
|
603
|
+
# over it.
|
604
|
+
if (data_length = header.addr - header.corruption_start -
|
605
|
+
FlatFileBlobHeader::LENGTH) <= 0
|
606
|
+
PEROBS.log.error "Found a corrupted blob that is too small to " +
|
607
|
+
"fit a header (#{data_length}). File must be defragmented."
|
608
|
+
else
|
609
|
+
new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
|
610
|
+
0, data_length, 0, 0)
|
611
|
+
new_header.write
|
612
|
+
@space_list.add_space(header.corruption_start, data_length)
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
if header.is_valid?
|
617
|
+
# We have a non-deleted entry.
|
618
|
+
begin
|
619
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
620
|
+
buf = @f.read(header.length)
|
621
|
+
if buf.bytesize != header.length
|
622
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
623
|
+
"#{header.id}."
|
624
|
+
discard_damaged_blob(header)
|
625
|
+
errors += 1
|
626
|
+
next
|
627
|
+
end
|
628
|
+
|
629
|
+
# Uncompress the data if the compression bit is set in the mark
|
630
|
+
# byte.
|
631
|
+
if header.is_compressed?
|
632
|
+
begin
|
633
|
+
buf = Zlib.inflate(buf)
|
634
|
+
rescue Zlib::BufError, Zlib::DataError
|
635
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
636
|
+
"#{header.id} found."
|
637
|
+
discard_damaged_blob(header)
|
638
|
+
errors += 1
|
639
|
+
next
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
if header.crc && checksum(buf) != header.crc
|
644
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
645
|
+
"with ID #{header.id}"
|
646
|
+
discard_damaged_blob(header)
|
647
|
+
errors += 1
|
648
|
+
next
|
649
|
+
end
|
650
|
+
rescue IOError => e
|
651
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
652
|
+
e.message
|
653
|
+
end
|
654
|
+
|
655
|
+
# Check if the ID has already been found in the file.
|
656
|
+
if (previous_address = @index.get(header.id))
|
657
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
658
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
659
|
+
errors += 1
|
660
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
661
|
+
header.id)
|
662
|
+
# We have two blobs with the same ID and we must discard one of
|
663
|
+
# them.
|
664
|
+
discard_duplicate_blobs(header, previous_header)
|
665
|
+
else
|
666
|
+
# ID is unique so far. Add it to the shadow index.
|
667
|
+
@index.insert(header.id, header.addr)
|
668
|
+
end
|
669
|
+
|
670
|
+
else
|
671
|
+
if header.length > 0
|
672
|
+
@space_list.add_space(header.addr, header.length)
|
673
|
+
end
|
674
|
+
end
|
675
|
+
end_of_last_healthy_blob = header.addr +
|
676
|
+
FlatFileBlobHeader::LENGTH + header.length
|
677
|
+
|
678
|
+
pm.update(header.addr)
|
527
679
|
end
|
528
|
-
|
529
|
-
|
680
|
+
|
681
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
682
|
+
# The blob file ends with a corrupted blob header.
|
683
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
684
|
+
'bytes found at the end of FlatFile.'
|
685
|
+
corrupted_blobs += 1
|
686
|
+
|
687
|
+
PEROBS.log.error "Truncating FlatFile to " +
|
688
|
+
"#{end_of_last_healthy_blob} bytes by discarding " +
|
689
|
+
"#{@f.size - end_of_last_healthy_blob} bytes"
|
690
|
+
@f.truncate(end_of_last_healthy_blob)
|
530
691
|
end
|
531
|
-
|
532
|
-
errors +=
|
533
|
-
regenerate_index_and_spaces if repair
|
692
|
+
|
693
|
+
errors += corrupted_blobs
|
534
694
|
end
|
535
695
|
|
536
|
-
sync
|
537
|
-
PEROBS.log.info "
|
696
|
+
sync
|
697
|
+
PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
|
538
698
|
"#{errors} errors found."
|
539
699
|
|
540
700
|
errors
|
@@ -544,32 +704,56 @@ module PEROBS
|
|
544
704
|
# regenerates them from the FlatFile.
|
545
705
|
def regenerate_index_and_spaces
|
546
706
|
PEROBS.log.warn "Re-generating FlatFileDB index and space files"
|
707
|
+
@index.open unless @index.is_open?
|
547
708
|
@index.clear
|
709
|
+
@space_list.open unless @space_list.is_open?
|
548
710
|
@space_list.clear
|
549
711
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
712
|
+
@progressmeter.start('Re-generating database index', @f.size) do |pm|
|
713
|
+
each_blob_header do |header|
|
714
|
+
if header.is_valid?
|
715
|
+
if (duplicate_pos = @index.get(header.id))
|
716
|
+
PEROBS.log.error "FlatFile contains multiple blobs for ID " +
|
717
|
+
"#{header.id}. First blob is at address #{duplicate_pos}. " +
|
718
|
+
"Other blob found at address #{header.addr}."
|
719
|
+
if header.length > 0
|
720
|
+
@space_list.add_space(header.addr, header.length)
|
721
|
+
end
|
722
|
+
discard_damaged_blob(header)
|
723
|
+
else
|
724
|
+
@index.insert(header.id, header.addr)
|
725
|
+
end
|
726
|
+
else
|
727
|
+
if header.length > 0
|
728
|
+
@space_list.add_space(header.addr, header.length)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
pm.update(header.addr)
|
555
733
|
end
|
556
734
|
end
|
735
|
+
|
736
|
+
sync
|
557
737
|
end
|
558
738
|
|
559
739
|
def has_space?(address, size)
|
560
|
-
header = FlatFileBlobHeader.
|
561
|
-
header.length == size
|
740
|
+
header = FlatFileBlobHeader.read(@f, address)
|
741
|
+
!header.is_valid? && header.length == size
|
562
742
|
end
|
563
743
|
|
564
744
|
def has_id_at?(id, address)
|
565
|
-
|
566
|
-
|
745
|
+
begin
|
746
|
+
header = FlatFileBlobHeader.read(@f, address)
|
747
|
+
rescue PEROBS::FatalError
|
748
|
+
return false
|
749
|
+
end
|
750
|
+
header.is_valid? && header.id == id
|
567
751
|
end
|
568
752
|
|
569
753
|
def inspect
|
570
754
|
s = '['
|
571
|
-
each_blob_header do |
|
572
|
-
s << "{ :pos => #{
|
755
|
+
each_blob_header do |header|
|
756
|
+
s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
|
573
757
|
":length => #{header.length}, :id => #{header.id}, " +
|
574
758
|
":crc => #{header.crc}"
|
575
759
|
if header.is_valid?
|
@@ -580,21 +764,68 @@ module PEROBS
|
|
580
764
|
s + ']'
|
581
765
|
end
|
582
766
|
|
767
|
+
def FlatFile::insert_header_checksums(db_dir)
|
768
|
+
old_file_name = File.join(db_dir, 'database.blobs')
|
769
|
+
new_file_name = File.join(db_dir, 'database_v4.blobs')
|
770
|
+
bak_file_name = File.join(db_dir, 'database_v3.blobs')
|
771
|
+
|
772
|
+
old_file = File.open(old_file_name, 'rb')
|
773
|
+
new_file = File.open(new_file_name, 'wb')
|
774
|
+
|
775
|
+
entries = 0
|
776
|
+
while (buf = old_file.read(21))
|
777
|
+
flags, length, id, crc = *buf.unpack('CQQL')
|
778
|
+
blob_data = old_file.read(length)
|
779
|
+
|
780
|
+
# Some basic sanity checking to ensure all reserved bits are 0. Older
|
781
|
+
# versions of PEROBS used to set bit 1 despite it being reserved now.
|
782
|
+
unless flags & 0xF0 == 0
|
783
|
+
PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
|
784
|
+
"flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
|
785
|
+
end
|
786
|
+
|
787
|
+
# Check if the blob is valid and current.
|
788
|
+
if flags & 0x1 == 1 && flags & 0x8 == 0
|
789
|
+
# Make sure the bit 1 is not set anymore.
|
790
|
+
flags = flags & 0x05
|
791
|
+
header_str = [ flags, length, id, crc ].pack('CQQL')
|
792
|
+
header_crc = Zlib.crc32(header_str, 0)
|
793
|
+
header_str += [ header_crc ].pack('L')
|
794
|
+
|
795
|
+
new_file.write(header_str + blob_data)
|
796
|
+
entries += 1
|
797
|
+
end
|
798
|
+
end
|
799
|
+
PEROBS.log.info "Header checksum added to #{entries} entries"
|
800
|
+
|
801
|
+
old_file.close
|
802
|
+
new_file.close
|
803
|
+
|
804
|
+
File.rename(old_file_name, bak_file_name)
|
805
|
+
File.rename(new_file_name, old_file_name)
|
806
|
+
end
|
807
|
+
|
583
808
|
private
|
584
809
|
|
585
810
|
def each_blob_header(&block)
|
586
|
-
|
811
|
+
corrupted_blobs = 0
|
812
|
+
|
587
813
|
begin
|
588
814
|
@f.seek(0)
|
589
815
|
while (header = FlatFileBlobHeader.read(@f))
|
590
|
-
|
816
|
+
if header.corruption_start
|
817
|
+
corrupted_blobs += 1
|
818
|
+
end
|
819
|
+
|
820
|
+
yield(header)
|
591
821
|
|
592
|
-
|
593
|
-
@f.seek(pos)
|
822
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
|
594
823
|
end
|
595
824
|
rescue IOError => e
|
596
825
|
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
597
826
|
end
|
827
|
+
|
828
|
+
corrupted_blobs
|
598
829
|
end
|
599
830
|
|
600
831
|
def find_free_blob(bytes)
|
@@ -624,26 +855,34 @@ module PEROBS
|
|
624
855
|
def cross_check_entries
|
625
856
|
errors = 0
|
626
857
|
|
627
|
-
|
628
|
-
|
629
|
-
if header.
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
858
|
+
@progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
|
859
|
+
each_blob_header do |header|
|
860
|
+
if !header.is_valid?
|
861
|
+
if header.length > 0
|
862
|
+
unless @space_list.has_space?(header.addr, header.length)
|
863
|
+
PEROBS.log.error "FlatFile has free space " +
|
864
|
+
"(addr: #{header.addr}, len: #{header.length}) that is " +
|
865
|
+
"not in SpaceManager"
|
866
|
+
errors += 1
|
867
|
+
end
|
868
|
+
end
|
869
|
+
else
|
870
|
+
if (index_address = @index.get(header.id)).nil?
|
871
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
872
|
+
"is not listed in the index"
|
873
|
+
errors +=1
|
874
|
+
elsif index_address != header.addr
|
875
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
876
|
+
"is listed in index with address #{index_address}"
|
877
|
+
errors += 1
|
635
878
|
end
|
636
879
|
end
|
637
|
-
|
638
|
-
|
639
|
-
PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
|
640
|
-
"in index with address #{@index.get(header.id)}"
|
641
|
-
errors += 1
|
642
|
-
end
|
880
|
+
|
881
|
+
pm.update(header.addr)
|
643
882
|
end
|
644
883
|
end
|
645
884
|
|
646
|
-
errors
|
885
|
+
errors
|
647
886
|
end
|
648
887
|
|
649
888
|
def discard_damaged_blob(header)
|
@@ -652,6 +891,61 @@ module PEROBS
|
|
652
891
|
header.clear_flags
|
653
892
|
end
|
654
893
|
|
894
|
+
def discard_duplicate_blobs(header, previous_header)
|
895
|
+
if header.is_outdated?
|
896
|
+
discard_damaged_blob(header)
|
897
|
+
elsif previous_header.is_outdated?
|
898
|
+
discard_damaged_blob(previous_header)
|
899
|
+
else
|
900
|
+
smaller, larger = header.length < previous_header.length ?
|
901
|
+
[ header, previous_header ] : [ previous_header, header ]
|
902
|
+
PEROBS.log.error "None of the blobs with same ID have " +
|
903
|
+
"the outdated flag set. Deleting the smaller one " +
|
904
|
+
"at address #{smaller.addr}"
|
905
|
+
discard_damaged_blob(smaller)
|
906
|
+
@space_list.add_space(smaller.addr, smaller.length)
|
907
|
+
@index.insert(larger.id, larger.addr)
|
908
|
+
end
|
909
|
+
end
|
910
|
+
|
911
|
+
def open_index_files(abort_on_missing_files = false)
|
912
|
+
begin
|
913
|
+
@index.open(abort_on_missing_files)
|
914
|
+
@space_list.open
|
915
|
+
rescue FatalError
|
916
|
+
clear_index_files
|
917
|
+
regenerate_index_and_spaces
|
918
|
+
end
|
919
|
+
end
|
920
|
+
|
921
|
+
def erase_index_files(dont_close_files = false)
|
922
|
+
# Ensure that the index is really closed.
|
923
|
+
@index.close unless dont_close_files
|
924
|
+
# Erase it completely
|
925
|
+
@index.erase
|
926
|
+
|
927
|
+
# Ensure that the spaces list is really closed.
|
928
|
+
@space_list.close unless dont_close_files
|
929
|
+
# Erase it completely
|
930
|
+
@space_list.erase
|
931
|
+
|
932
|
+
if @space_list.is_a?(SpaceTree)
|
933
|
+
# If we still use the old SpaceTree format, this is the moment to
|
934
|
+
# convert it to the new SpaceManager format.
|
935
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
936
|
+
PEROBS.log.warn "Converting space list from SpaceTree format " +
|
937
|
+
"to SpaceManager format"
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
def clear_index_files(dont_close_files = false)
|
942
|
+
erase_index_files(dont_close_files)
|
943
|
+
|
944
|
+
# Then create them again.
|
945
|
+
@index.open
|
946
|
+
@space_list.open
|
947
|
+
end
|
948
|
+
|
655
949
|
end
|
656
950
|
|
657
951
|
end
|