perobs 3.0.1 → 4.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +19 -18
- data/lib/perobs.rb +2 -0
- data/lib/perobs/Array.rb +68 -21
- data/lib/perobs/BTree.rb +110 -54
- data/lib/perobs/BTreeBlob.rb +14 -13
- data/lib/perobs/BTreeDB.rb +11 -10
- data/lib/perobs/BTreeNode.rb +551 -197
- data/lib/perobs/BTreeNodeCache.rb +10 -8
- data/lib/perobs/BTreeNodeLink.rb +11 -1
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +47 -22
- data/lib/perobs/ClassMap.rb +2 -2
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +62 -20
- data/lib/perobs/EquiBlobsFile.rb +174 -59
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +536 -242
- data/lib/perobs/FlatFileBlobHeader.rb +120 -84
- data/lib/perobs/FlatFileDB.rb +58 -27
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +129 -35
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/LockFile.rb +3 -0
- data/lib/perobs/Object.rb +28 -20
- data/lib/perobs/ObjectBase.rb +53 -10
- data/lib/perobs/PersistentObjectCache.rb +142 -0
- data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +63 -47
- data/lib/perobs/SpaceTreeNode.rb +134 -115
- data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
- data/lib/perobs/StackFile.rb +1 -1
- data/lib/perobs/Store.rb +180 -70
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +48 -39
- data/test/BTreeDB_spec.rb +2 -2
- data/test/BTree_spec.rb +50 -1
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -5
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +199 -15
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +27 -16
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/Object_spec.rb +5 -5
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +27 -9
- data/test/Store_spec.rb +353 -206
- data/test/perobs_spec.rb +7 -3
- data/test/spec_helper.rb +9 -4
- metadata +59 -16
- data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FNV_Hash_1a_64.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
# This is an implementation of the Fowler Noll Vo hashing algorithm in the
|
31
|
+
# 1a variant for 64 bit hash values.
|
32
|
+
# https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
33
|
+
class FNV_Hash_1a_64
|
34
|
+
|
35
|
+
@@OFFSET = 14695981039346656037
|
36
|
+
@@PRIME = 1099511628211
|
37
|
+
@@MASK = 2**64 - 1
|
38
|
+
|
39
|
+
def self.digest(item)
|
40
|
+
hash = @@OFFSET
|
41
|
+
|
42
|
+
item.to_s.each_byte do |byte|
|
43
|
+
hash ^= byte
|
44
|
+
hash *= @@PRIME
|
45
|
+
hash &= @@MASK
|
46
|
+
end
|
47
|
+
|
48
|
+
hash
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFile.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -31,6 +31,8 @@ require 'perobs/Log'
|
|
31
31
|
require 'perobs/FlatFileBlobHeader'
|
32
32
|
require 'perobs/BTree'
|
33
33
|
require 'perobs/SpaceTree'
|
34
|
+
require 'perobs/SpaceManager'
|
35
|
+
require 'perobs/IDList'
|
34
36
|
|
35
37
|
module PEROBS
|
36
38
|
|
@@ -44,11 +46,20 @@ module PEROBS
|
|
44
46
|
|
45
47
|
# Create a new FlatFile object for a database in the given path.
|
46
48
|
# @param dir [String] Directory path for the data base file
|
47
|
-
def initialize(dir)
|
49
|
+
def initialize(dir, progressmeter)
|
48
50
|
@db_dir = dir
|
51
|
+
@progressmeter = progressmeter
|
49
52
|
@f = nil
|
50
|
-
@
|
51
|
-
@
|
53
|
+
@marks = nil
|
54
|
+
@index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
|
55
|
+
old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
|
56
|
+
if File.exist?(old_spaces_file)
|
57
|
+
# PEROBS version 4.1.0 and earlier used this space list format. It is
|
58
|
+
# deprecated now. Newly created DBs use the SpaceManager format.
|
59
|
+
@space_list = SpaceTree.new(@db_dir, @progressmeter)
|
60
|
+
else
|
61
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
62
|
+
end
|
52
63
|
end
|
53
64
|
|
54
65
|
# Open the flat file for reading and writing.
|
@@ -71,38 +82,26 @@ module PEROBS
|
|
71
82
|
PEROBS.log.fatal "FlatFile database '#{file_name}' is locked by " +
|
72
83
|
"another process"
|
73
84
|
end
|
85
|
+
@f.sync = true
|
74
86
|
|
75
|
-
|
76
|
-
@index.open(!new_db_created)
|
77
|
-
@space_list.open
|
78
|
-
rescue FatalError
|
79
|
-
# Ensure that the index is really closed.
|
80
|
-
@index.close
|
81
|
-
# Erase it completely
|
82
|
-
@index.erase
|
83
|
-
# Then create it again.
|
84
|
-
@index.open
|
85
|
-
|
86
|
-
# Ensure that the spaces list is really closed.
|
87
|
-
@space_list.close
|
88
|
-
# Erase it completely
|
89
|
-
@space_list.erase
|
90
|
-
# Then create it again
|
91
|
-
@space_list.open
|
92
|
-
|
93
|
-
regenerate_index_and_spaces
|
94
|
-
end
|
87
|
+
open_index_files(!new_db_created)
|
95
88
|
end
|
96
89
|
|
97
90
|
# Close the flat file. This method must be called to ensure that all data
|
98
91
|
# is really written into the filesystem.
|
99
92
|
def close
|
100
|
-
@space_list.close
|
101
|
-
@index.close
|
93
|
+
@space_list.close if @space_list.is_open?
|
94
|
+
@index.close if @index.is_open?
|
95
|
+
|
96
|
+
if @marks
|
97
|
+
@marks.erase
|
98
|
+
@marks = nil
|
99
|
+
end
|
102
100
|
|
103
101
|
if @f
|
104
102
|
@f.flush
|
105
103
|
@f.flock(File::LOCK_UN)
|
104
|
+
@f.fsync
|
106
105
|
@f.close
|
107
106
|
@f = nil
|
108
107
|
end
|
@@ -112,10 +111,12 @@ module PEROBS
|
|
112
111
|
def sync
|
113
112
|
begin
|
114
113
|
@f.flush
|
114
|
+
@f.fsync
|
115
115
|
rescue IOError => e
|
116
116
|
PEROBS.log.fatal "Cannot sync flat file database: #{e.message}"
|
117
117
|
end
|
118
118
|
@index.sync
|
119
|
+
@space_list.sync
|
119
120
|
end
|
120
121
|
|
121
122
|
# Delete the blob for the specified ID.
|
@@ -134,29 +135,37 @@ module PEROBS
|
|
134
135
|
# @param addr [Integer] Address of the blob to delete
|
135
136
|
# @param id [Integer] ID of the blob to delete
|
136
137
|
def delete_obj_by_address(addr, id)
|
137
|
-
@index.remove(id)
|
138
|
-
header = FlatFileBlobHeader.
|
138
|
+
@index.remove(id) if @index.is_open?
|
139
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
139
140
|
header.clear_flags
|
140
|
-
@space_list.add_space(addr, header.length)
|
141
|
+
@space_list.add_space(addr, header.length) if @space_list.is_open?
|
141
142
|
end
|
142
143
|
|
143
144
|
# Delete all unmarked objects.
|
144
|
-
def delete_unmarked_objects
|
145
|
-
|
146
|
-
|
145
|
+
def delete_unmarked_objects(&block)
|
146
|
+
# We don't update the index and the space list during this operation as
|
147
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
148
|
+
# with an empty space list.
|
149
|
+
clear_index_files
|
150
|
+
|
151
|
+
deleted_objects_count = 0
|
152
|
+
@progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
|
153
|
+
each_blob_header do |header|
|
154
|
+
if header.is_valid? && !@marks.include?(header.id)
|
155
|
+
delete_obj_by_address(header.addr, header.id)
|
156
|
+
yield(header.id) if block_given?
|
157
|
+
deleted_objects_count += 1
|
158
|
+
end
|
147
159
|
|
148
|
-
|
149
|
-
each_blob_header do |pos, header|
|
150
|
-
if header.is_valid? && !header.is_marked?
|
151
|
-
delete_obj_by_address(pos, header.id)
|
152
|
-
deleted_ids << header.id
|
160
|
+
pm.update(header.addr)
|
153
161
|
end
|
154
162
|
end
|
155
163
|
defragmentize
|
156
164
|
|
157
|
-
|
158
|
-
|
159
|
-
|
165
|
+
# Update the index file and create a new, empty space list.
|
166
|
+
regenerate_index_and_spaces
|
167
|
+
|
168
|
+
deleted_objects_count
|
160
169
|
end
|
161
170
|
|
162
171
|
# Write the given object into the file. This method never uses in-place
|
@@ -172,7 +181,7 @@ module PEROBS
|
|
172
181
|
# operation is aborted or interrupted we ensure that we either have the
|
173
182
|
# old or the new version available.
|
174
183
|
if (old_addr = find_obj_addr_by_id(id))
|
175
|
-
old_header = FlatFileBlobHeader.
|
184
|
+
old_header = FlatFileBlobHeader.read(@f, old_addr)
|
176
185
|
old_header.set_outdated_flag
|
177
186
|
end
|
178
187
|
|
@@ -183,57 +192,68 @@ module PEROBS
|
|
183
192
|
# performance impact of compression is not compensated by writing
|
184
193
|
# less data to the storage.
|
185
194
|
compressed = false
|
186
|
-
|
195
|
+
raw_obj_bytesize = raw_obj.bytesize
|
196
|
+
if raw_obj_bytesize > 256
|
187
197
|
raw_obj = Zlib.deflate(raw_obj)
|
198
|
+
raw_obj_bytesize = raw_obj.bytesize
|
188
199
|
compressed = true
|
189
200
|
end
|
190
201
|
|
191
|
-
addr, length = find_free_blob(
|
202
|
+
addr, length = find_free_blob(raw_obj_bytesize)
|
192
203
|
begin
|
193
204
|
if length != -1
|
194
205
|
# Just a safeguard so we don't overwrite current data.
|
195
|
-
header = FlatFileBlobHeader.
|
206
|
+
header = FlatFileBlobHeader.read(@f, addr)
|
196
207
|
if header.length != length
|
197
208
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
198
|
-
"(#{header.length}) don't match."
|
209
|
+
"(#{header.length}) for address #{addr} don't match."
|
199
210
|
end
|
200
|
-
if
|
201
|
-
PEROBS.log.fatal "Object (#{
|
211
|
+
if raw_obj_bytesize > header.length
|
212
|
+
PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
|
202
213
|
"blob space (#{header.length})."
|
203
214
|
end
|
204
215
|
if header.is_valid?
|
205
|
-
PEROBS.log.fatal "Entry
|
216
|
+
PEROBS.log.fatal "Entry at address #{addr} with flags: " +
|
217
|
+
"#{header.flags} is already used for ID #{header.id}."
|
206
218
|
end
|
207
219
|
end
|
208
220
|
flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
|
209
221
|
flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
|
210
|
-
FlatFileBlobHeader.new(@f, addr, flags,
|
222
|
+
FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
|
211
223
|
@f.write(raw_obj)
|
212
|
-
if length != -1 &&
|
224
|
+
if length != -1 && raw_obj_bytesize < length
|
213
225
|
# The new object was not appended and it did not completely fill the
|
214
226
|
# free space. So we have to write a new header to mark the remaining
|
215
227
|
# empty space.
|
216
|
-
unless length -
|
228
|
+
unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
|
217
229
|
PEROBS.log.fatal "Not enough space to append the empty space " +
|
218
|
-
"header (space: #{length} bytes, object: #{
|
230
|
+
"header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
|
219
231
|
"bytes)."
|
220
232
|
end
|
221
233
|
space_address = @f.pos
|
222
|
-
space_length = length - FlatFileBlobHeader::LENGTH -
|
234
|
+
space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
|
223
235
|
FlatFileBlobHeader.new(@f, space_address, 0, space_length,
|
224
236
|
0, 0).write
|
225
237
|
# Register the new space with the space list.
|
226
|
-
@space_list.
|
238
|
+
if @space_list.is_open? && space_length > 0
|
239
|
+
@space_list.add_space(space_address, space_length)
|
240
|
+
end
|
227
241
|
end
|
242
|
+
|
243
|
+
# Once the blob has been written we can update the index as well.
|
244
|
+
@index.insert(id, addr) if @index.is_open?
|
245
|
+
|
228
246
|
if old_addr
|
229
247
|
# If we had an existing object stored for the ID we have to mark
|
230
248
|
# this entry as deleted now.
|
231
249
|
old_header.clear_flags
|
250
|
+
# And register the newly freed space with the space list.
|
251
|
+
if @space_list.is_open?
|
252
|
+
@space_list.add_space(old_addr, old_header.length)
|
253
|
+
end
|
232
254
|
else
|
233
255
|
@f.flush
|
234
256
|
end
|
235
|
-
# Once the blob has been written we can update the index as well.
|
236
|
-
@index.insert(id, addr)
|
237
257
|
rescue IOError => e
|
238
258
|
PEROBS.log.fatal "Cannot write blob for ID #{id} to FlatFileDB: " +
|
239
259
|
e.message
|
@@ -260,15 +280,20 @@ module PEROBS
|
|
260
280
|
nil
|
261
281
|
end
|
262
282
|
|
283
|
+
# @return [Integer] Number of items stored in the DB.
|
284
|
+
def item_counter
|
285
|
+
@index.entries_count
|
286
|
+
end
|
287
|
+
|
263
288
|
# Read the object at the specified address.
|
264
289
|
# @param addr [Integer] Offset in the flat file
|
265
290
|
# @param id [Integer] ID of the data blob
|
266
291
|
# @return [String] Raw object data
|
267
292
|
def read_obj_by_address(addr, id)
|
268
|
-
header = FlatFileBlobHeader.
|
293
|
+
header = FlatFileBlobHeader.read(@f, addr, id)
|
269
294
|
if header.id != id
|
270
295
|
PEROBS.log.fatal "Database index corrupted: Index for object " +
|
271
|
-
"#{id} points to object with ID #{header.id}"
|
296
|
+
"#{id} points to object with ID #{header.id} at address #{addr}"
|
272
297
|
end
|
273
298
|
|
274
299
|
buf = nil
|
@@ -277,7 +302,8 @@ module PEROBS
|
|
277
302
|
@f.seek(addr + FlatFileBlobHeader::LENGTH)
|
278
303
|
buf = @f.read(header.length)
|
279
304
|
rescue IOError => e
|
280
|
-
PEROBS.log.fatal "Cannot read blob for ID #{id}
|
305
|
+
PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
|
306
|
+
e.message
|
281
307
|
end
|
282
308
|
|
283
309
|
# Uncompress the data if the compression bit is set in the flags byte.
|
@@ -286,12 +312,13 @@ module PEROBS
|
|
286
312
|
buf = Zlib.inflate(buf)
|
287
313
|
rescue Zlib::BufError, Zlib::DataError
|
288
314
|
PEROBS.log.fatal "Corrupted compressed block with ID " +
|
289
|
-
"#{
|
315
|
+
"#{id} found at address #{addr}."
|
290
316
|
end
|
291
317
|
end
|
292
318
|
|
293
319
|
if checksum(buf) != header.crc
|
294
|
-
PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
|
320
|
+
PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
|
321
|
+
"at address #{addr}"
|
295
322
|
end
|
296
323
|
|
297
324
|
buf
|
@@ -300,47 +327,22 @@ module PEROBS
|
|
300
327
|
# Mark the object with the given ID.
|
301
328
|
# @param id [Integer] ID of the object
|
302
329
|
def mark_obj_by_id(id)
|
303
|
-
|
304
|
-
mark_obj_by_address(addr, id)
|
305
|
-
end
|
306
|
-
end
|
307
|
-
|
308
|
-
# Mark the object at the specified address.
|
309
|
-
# @param addr [Integer] Offset in the file
|
310
|
-
# @param id [Integer] ID of the object
|
311
|
-
def mark_obj_by_address(addr, id)
|
312
|
-
FlatFileBlobHeader.read_at(@f, addr, id).set_mark_flag
|
330
|
+
@marks.insert(id)
|
313
331
|
end
|
314
332
|
|
315
333
|
# Return true if the object with the given ID is marked, false otherwise.
|
316
334
|
# @param id [Integer] ID of the object
|
317
335
|
def is_marked_by_id?(id)
|
318
|
-
|
319
|
-
header = FlatFileBlobHeader.read_at(@f, addr, id)
|
320
|
-
return header.is_marked?
|
321
|
-
end
|
322
|
-
|
323
|
-
false
|
336
|
+
@marks.include?(id)
|
324
337
|
end
|
325
338
|
|
326
339
|
# Clear alls marks.
|
327
340
|
def clear_all_marks
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
marked_blob_count = 0
|
333
|
-
|
334
|
-
each_blob_header do |pos, header|
|
335
|
-
total_blob_count += 1
|
336
|
-
if header.is_valid? && header.is_marked?
|
337
|
-
# Clear all valid and marked blocks.
|
338
|
-
marked_blob_count += 1
|
339
|
-
header.clear_mark_flag
|
340
|
-
end
|
341
|
+
if @marks
|
342
|
+
@marks.clear
|
343
|
+
else
|
344
|
+
@marks = IDList.new(@db_dir, 'marks', item_counter)
|
341
345
|
end
|
342
|
-
PEROBS.log.info "#{marked_blob_count} marks in #{total_blob_count} " +
|
343
|
-
"objects cleared in #{Time.now - t} seconds"
|
344
346
|
end
|
345
347
|
|
346
348
|
# Eliminate all the holes in the file. This is an in-place
|
@@ -349,59 +351,72 @@ module PEROBS
|
|
349
351
|
distance = 0
|
350
352
|
new_file_size = 0
|
351
353
|
deleted_blobs = 0
|
354
|
+
corrupted_blobs = 0
|
352
355
|
valid_blobs = 0
|
353
|
-
|
354
|
-
PEROBS.log.info "Defragmenting FlatFile"
|
356
|
+
|
355
357
|
# Iterate over all entries.
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
358
|
+
@progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
|
359
|
+
each_blob_header do |header|
|
360
|
+
# If we have stumbled over a corrupted blob we treat it similar to a
|
361
|
+
# deleted blob and reuse the space.
|
362
|
+
if header.corruption_start
|
363
|
+
distance += header.addr - header.corruption_start
|
364
|
+
corrupted_blobs += 1
|
365
|
+
end
|
366
|
+
|
367
|
+
# Total size of the current entry
|
368
|
+
entry_bytes = FlatFileBlobHeader::LENGTH + header.length
|
369
|
+
if header.is_valid?
|
370
|
+
# We have found a valid entry.
|
371
|
+
valid_blobs += 1
|
372
|
+
if distance > 0
|
373
|
+
begin
|
374
|
+
# Read current entry into a buffer
|
375
|
+
@f.seek(header.addr)
|
376
|
+
buf = @f.read(entry_bytes)
|
377
|
+
# Write the buffer right after the end of the previous entry.
|
378
|
+
@f.seek(header.addr - distance)
|
379
|
+
@f.write(buf)
|
380
|
+
# Mark the space between the relocated current entry and the
|
381
|
+
# next valid entry as deleted space.
|
382
|
+
FlatFileBlobHeader.new(@f, @f.pos, 0,
|
383
|
+
distance - FlatFileBlobHeader::LENGTH,
|
384
|
+
0, 0).write
|
385
|
+
@f.flush
|
386
|
+
rescue IOError => e
|
387
|
+
PEROBS.log.fatal "Error while moving blob for ID " +
|
388
|
+
"#{header.id}: #{e.message}"
|
389
|
+
end
|
381
390
|
end
|
391
|
+
new_file_size = header.addr - distance +
|
392
|
+
FlatFileBlobHeader::LENGTH + header.length
|
393
|
+
else
|
394
|
+
deleted_blobs += 1
|
395
|
+
distance += entry_bytes
|
382
396
|
end
|
383
|
-
|
384
|
-
|
385
|
-
deleted_blobs += 1
|
386
|
-
distance += entry_bytes
|
397
|
+
|
398
|
+
pm.update(header.addr)
|
387
399
|
end
|
388
400
|
end
|
389
|
-
|
401
|
+
|
390
402
|
PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
|
391
403
|
"#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
|
392
404
|
"#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
|
405
|
+
if corrupted_blobs > 0
|
406
|
+
PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
|
407
|
+
"was recycled."
|
408
|
+
end
|
393
409
|
|
394
410
|
@f.flush
|
395
411
|
@f.truncate(new_file_size)
|
396
412
|
@f.flush
|
397
|
-
@space_list.clear
|
398
413
|
|
399
414
|
sync
|
400
415
|
end
|
401
416
|
|
402
417
|
# This method iterates over all entries in the FlatFile and removes the
|
403
418
|
# entry and inserts it again. This is useful to update all entries in
|
404
|
-
#
|
419
|
+
# case the storage format has changed.
|
405
420
|
def refresh
|
406
421
|
# This iteration might look scary as we iterate over the entries while
|
407
422
|
# while we are rearranging them. Re-inserted items may be inserted
|
@@ -409,132 +424,277 @@ module PEROBS
|
|
409
424
|
# inserted after the current entry and will be re-read again unless they
|
410
425
|
# are inserted after the original file end.
|
411
426
|
file_size = @f.size
|
412
|
-
PEROBS.log.info "Refreshing the DB..."
|
413
|
-
t = Time.now
|
414
|
-
each_blob_header do |pos, header|
|
415
|
-
if header.is_valid?
|
416
|
-
buf = read_obj_by_address(pos, header.id)
|
417
|
-
delete_obj_by_address(pos, header.id)
|
418
|
-
write_obj_by_id(header.id, buf)
|
419
|
-
end
|
420
427
|
|
421
|
-
|
422
|
-
|
423
|
-
|
428
|
+
# We don't update the index and the space list during this operation as
|
429
|
+
# we defragmentize the blob file at the end. We'll end the operation
|
430
|
+
# with an empty space list.
|
431
|
+
clear_index_files
|
432
|
+
|
433
|
+
@progressmeter.start('Converting objects to new storage format',
|
434
|
+
@f.size) do |pm|
|
435
|
+
each_blob_header do |header|
|
436
|
+
if header.is_valid?
|
437
|
+
buf = read_obj_by_address(header.addr, header.id)
|
438
|
+
delete_obj_by_address(header.addr, header.id)
|
439
|
+
write_obj_by_id(header.id, buf)
|
440
|
+
end
|
441
|
+
|
442
|
+
# Some re-inserted blobs may be inserted after the original file end.
|
443
|
+
# No need to process those blobs again.
|
444
|
+
break if header.addr >= file_size
|
445
|
+
|
446
|
+
pm.update(header.addr)
|
447
|
+
end
|
424
448
|
end
|
425
|
-
PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
|
426
449
|
|
427
450
|
# Reclaim the space saved by compressing entries.
|
428
451
|
defragmentize
|
452
|
+
|
453
|
+
# Recreate the index file and create an empty space list.
|
454
|
+
regenerate_index_and_spaces
|
429
455
|
end
|
430
456
|
|
431
|
-
# Check
|
432
|
-
# @param repair [Boolean] True if errors should be fixed.
|
457
|
+
# Check the FlatFile.
|
433
458
|
# @return [Integer] Number of errors found
|
434
|
-
def check(
|
459
|
+
def check()
|
435
460
|
errors = 0
|
436
461
|
return errors unless @f
|
437
462
|
|
438
463
|
t = Time.now
|
439
|
-
PEROBS.log.info "Checking FlatFile database"
|
440
|
-
"#{repair ? ' in repair mode' : ''}..."
|
464
|
+
PEROBS.log.info "Checking FlatFile database..."
|
441
465
|
|
442
466
|
# First check the database blob file. Each entry should be readable and
|
443
467
|
# correct and all IDs must be unique. We use a shadow index to keep
|
444
468
|
# track of the already found IDs.
|
445
|
-
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER
|
469
|
+
new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
|
470
|
+
@progressmeter)
|
446
471
|
new_index.erase
|
447
472
|
new_index.open
|
448
473
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
474
|
+
corrupted_blobs = 0
|
475
|
+
end_of_last_healthy_blob = nil
|
476
|
+
@progressmeter.start('Checking blobs file', @f.size) do |pm|
|
477
|
+
corrupted_blobs = each_blob_header do |header|
|
478
|
+
if header.is_valid?
|
479
|
+
# We have a non-deleted entry.
|
480
|
+
begin
|
481
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
482
|
+
buf = @f.read(header.length)
|
483
|
+
if buf.bytesize != header.length
|
484
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
485
|
+
"#{header.id}."
|
486
|
+
errors += 1
|
487
|
+
next
|
488
|
+
end
|
462
489
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
490
|
+
# Uncompress the data if the compression bit is set in the mark
|
491
|
+
# byte.
|
492
|
+
if header.is_compressed?
|
493
|
+
begin
|
494
|
+
buf = Zlib.inflate(buf)
|
495
|
+
rescue Zlib::BufError, Zlib::DataError
|
496
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
497
|
+
"#{header.id} found."
|
498
|
+
errors += 1
|
499
|
+
next
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
if header.crc && checksum(buf) != header.crc
|
504
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
505
|
+
"with ID #{header.id}"
|
472
506
|
errors += 1
|
473
507
|
next
|
474
508
|
end
|
509
|
+
rescue IOError => e
|
510
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
511
|
+
e.message
|
475
512
|
end
|
476
513
|
|
477
|
-
if
|
478
|
-
|
479
|
-
|
480
|
-
|
514
|
+
# Check if the ID has already been found in the file.
|
515
|
+
if (previous_address = new_index.get(header.id))
|
516
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
517
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
481
518
|
errors += 1
|
482
|
-
|
519
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
520
|
+
header.id)
|
521
|
+
else
|
522
|
+
# ID is unique so far. Add it to the shadow index.
|
523
|
+
new_index.insert(header.id, header.addr)
|
483
524
|
end
|
484
|
-
rescue IOError => e
|
485
|
-
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
486
|
-
e.message
|
487
525
|
end
|
526
|
+
end_of_last_healthy_blob = header.addr +
|
527
|
+
FlatFileBlobHeader::LENGTH + header.length
|
488
528
|
|
489
|
-
|
490
|
-
|
491
|
-
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
492
|
-
"Addresses: #{previous_address}, #{pos}"
|
493
|
-
previous_header = FlatFileBlobHeader.read_at(@f, previous_address,
|
494
|
-
header.id)
|
495
|
-
if repair
|
496
|
-
# We have two blobs with the same ID and we must discard one of
|
497
|
-
# them.
|
498
|
-
if header.is_outdated?
|
499
|
-
discard_damaged_blob(header)
|
500
|
-
elsif previous_header.is_outdated?
|
501
|
-
discard_damaged_blob(previous_header)
|
502
|
-
else
|
503
|
-
PEROBS.log.error "None of the blobs with same ID have " +
|
504
|
-
"the outdated flag set. Deleting the smaller one."
|
505
|
-
discard_damaged_blob(header.length < previous_header.length ?
|
506
|
-
header : previous_header)
|
507
|
-
end
|
508
|
-
next
|
509
|
-
end
|
510
|
-
else
|
511
|
-
# ID is unique so far. Add it to the shadow index.
|
512
|
-
new_index.insert(header.id, pos)
|
513
|
-
end
|
529
|
+
pm.update(header.addr)
|
530
|
+
end
|
514
531
|
|
532
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
533
|
+
# The blob file ends with a corrupted blob header.
|
534
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
535
|
+
'bytes found at the end of FlatFile.'
|
536
|
+
corrupted_blobs += 1
|
515
537
|
end
|
538
|
+
|
539
|
+
errors += corrupted_blobs
|
516
540
|
end
|
541
|
+
|
517
542
|
# We no longer need the new index.
|
518
543
|
new_index.close
|
519
544
|
new_index.erase
|
520
545
|
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
546
|
+
if corrupted_blobs == 0
|
547
|
+
# Now we check the index data. It must be correct and the entries must
|
548
|
+
# match the blob file. All entries in the index must be in the blob file
|
549
|
+
# and vise versa.
|
550
|
+
begin
|
551
|
+
index_ok = @index.check do |id, address|
|
552
|
+
unless has_id_at?(id, address)
|
553
|
+
PEROBS.log.error "Index contains an entry for " +
|
554
|
+
"ID #{id} at address #{address} that is not in FlatFile"
|
555
|
+
false
|
556
|
+
else
|
557
|
+
true
|
558
|
+
end
|
559
|
+
end
|
560
|
+
x_check_errs = 0
|
561
|
+
space_check_ok = true
|
562
|
+
unless index_ok && (space_check_ok = @space_list.check(self)) &&
|
563
|
+
(x_check_errs = cross_check_entries) == 0
|
564
|
+
errors += 1 unless index_ok && space_check_ok
|
565
|
+
errors += x_check_errs
|
566
|
+
end
|
567
|
+
rescue PEROBS::FatalError
|
568
|
+
errors += 1
|
569
|
+
end
|
570
|
+
end
|
571
|
+
|
572
|
+
PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
|
573
|
+
"#{errors} errors found."
|
574
|
+
|
575
|
+
errors
|
576
|
+
end
|
577
|
+
|
578
|
+
# Repair the FlatFile. In contrast to the repair functionality in the
|
579
|
+
# check() method this method is much faster. It simply re-creates the
|
580
|
+
# index and space list from the blob file.
|
581
|
+
# @param repair [Boolean] True if errors should be fixed.
|
582
|
+
# @return [Integer] Number of errors found
|
583
|
+
def repair
|
584
|
+
errors = 0
|
585
|
+
return errors unless @f
|
586
|
+
|
587
|
+
t = Time.now
|
588
|
+
PEROBS.log.info "Repairing FlatFile database"
|
589
|
+
|
590
|
+
# Erase and re-open the index and space list files. We purposely don't
|
591
|
+
# close the files at it would trigger needless flushing.
|
592
|
+
clear_index_files(true)
|
593
|
+
|
594
|
+
# Now we scan the blob file and re-index all blobs and spaces. Corrupted
|
595
|
+
# blobs will be skipped.
|
596
|
+
corrupted_blobs = 0
|
597
|
+
end_of_last_healthy_blob = nil
|
598
|
+
@progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
|
599
|
+
corrupted_blobs = each_blob_header do |header|
|
600
|
+
if header.corruption_start
|
601
|
+
# The blob is preceeded by a corrupted area. We create a new
|
602
|
+
# header of a deleted blob for this area and write the new blob
|
603
|
+
# over it.
|
604
|
+
if (data_length = header.addr - header.corruption_start -
|
605
|
+
FlatFileBlobHeader::LENGTH) <= 0
|
606
|
+
PEROBS.log.error "Found a corrupted blob that is too small to " +
|
607
|
+
"fit a header (#{data_length}). File must be defragmented."
|
608
|
+
else
|
609
|
+
new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
|
610
|
+
0, data_length, 0, 0)
|
611
|
+
new_header.write
|
612
|
+
@space_list.add_space(header.corruption_start, data_length)
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
if header.is_valid?
|
617
|
+
# We have a non-deleted entry.
|
618
|
+
begin
|
619
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
620
|
+
buf = @f.read(header.length)
|
621
|
+
if buf.bytesize != header.length
|
622
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
623
|
+
"#{header.id}."
|
624
|
+
discard_damaged_blob(header)
|
625
|
+
errors += 1
|
626
|
+
next
|
627
|
+
end
|
628
|
+
|
629
|
+
# Uncompress the data if the compression bit is set in the mark
|
630
|
+
# byte.
|
631
|
+
if header.is_compressed?
|
632
|
+
begin
|
633
|
+
buf = Zlib.inflate(buf)
|
634
|
+
rescue Zlib::BufError, Zlib::DataError
|
635
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
636
|
+
"#{header.id} found."
|
637
|
+
discard_damaged_blob(header)
|
638
|
+
errors += 1
|
639
|
+
next
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
if header.crc && checksum(buf) != header.crc
|
644
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
645
|
+
"with ID #{header.id}"
|
646
|
+
discard_damaged_blob(header)
|
647
|
+
errors += 1
|
648
|
+
next
|
649
|
+
end
|
650
|
+
rescue IOError => e
|
651
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
652
|
+
e.message
|
653
|
+
end
|
654
|
+
|
655
|
+
# Check if the ID has already been found in the file.
|
656
|
+
if (previous_address = @index.get(header.id))
|
657
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
658
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
659
|
+
errors += 1
|
660
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
661
|
+
header.id)
|
662
|
+
# We have two blobs with the same ID and we must discard one of
|
663
|
+
# them.
|
664
|
+
discard_duplicate_blobs(header, previous_header)
|
665
|
+
else
|
666
|
+
# ID is unique so far. Add it to the shadow index.
|
667
|
+
@index.insert(header.id, header.addr)
|
668
|
+
end
|
669
|
+
|
670
|
+
else
|
671
|
+
if header.length > 0
|
672
|
+
@space_list.add_space(header.addr, header.length)
|
673
|
+
end
|
674
|
+
end
|
675
|
+
end_of_last_healthy_blob = header.addr +
|
676
|
+
FlatFileBlobHeader::LENGTH + header.length
|
677
|
+
|
678
|
+
pm.update(header.addr)
|
527
679
|
end
|
528
|
-
|
529
|
-
|
680
|
+
|
681
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
682
|
+
# The blob file ends with a corrupted blob header.
|
683
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
684
|
+
'bytes found at the end of FlatFile.'
|
685
|
+
corrupted_blobs += 1
|
686
|
+
|
687
|
+
PEROBS.log.error "Truncating FlatFile to " +
|
688
|
+
"#{end_of_last_healthy_blob} bytes by discarding " +
|
689
|
+
"#{@f.size - end_of_last_healthy_blob} bytes"
|
690
|
+
@f.truncate(end_of_last_healthy_blob)
|
530
691
|
end
|
531
|
-
|
532
|
-
errors +=
|
533
|
-
regenerate_index_and_spaces if repair
|
692
|
+
|
693
|
+
errors += corrupted_blobs
|
534
694
|
end
|
535
695
|
|
536
|
-
sync
|
537
|
-
PEROBS.log.info "
|
696
|
+
sync
|
697
|
+
PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
|
538
698
|
"#{errors} errors found."
|
539
699
|
|
540
700
|
errors
|
@@ -544,32 +704,56 @@ module PEROBS
|
|
544
704
|
# regenerates them from the FlatFile.
|
545
705
|
def regenerate_index_and_spaces
|
546
706
|
PEROBS.log.warn "Re-generating FlatFileDB index and space files"
|
707
|
+
@index.open unless @index.is_open?
|
547
708
|
@index.clear
|
709
|
+
@space_list.open unless @space_list.is_open?
|
548
710
|
@space_list.clear
|
549
711
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
712
|
+
@progressmeter.start('Re-generating database index', @f.size) do |pm|
|
713
|
+
each_blob_header do |header|
|
714
|
+
if header.is_valid?
|
715
|
+
if (duplicate_pos = @index.get(header.id))
|
716
|
+
PEROBS.log.error "FlatFile contains multiple blobs for ID " +
|
717
|
+
"#{header.id}. First blob is at address #{duplicate_pos}. " +
|
718
|
+
"Other blob found at address #{header.addr}."
|
719
|
+
if header.length > 0
|
720
|
+
@space_list.add_space(header.addr, header.length)
|
721
|
+
end
|
722
|
+
discard_damaged_blob(header)
|
723
|
+
else
|
724
|
+
@index.insert(header.id, header.addr)
|
725
|
+
end
|
726
|
+
else
|
727
|
+
if header.length > 0
|
728
|
+
@space_list.add_space(header.addr, header.length)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
pm.update(header.addr)
|
555
733
|
end
|
556
734
|
end
|
735
|
+
|
736
|
+
sync
|
557
737
|
end
|
558
738
|
|
559
739
|
def has_space?(address, size)
|
560
|
-
header = FlatFileBlobHeader.
|
561
|
-
header.length == size
|
740
|
+
header = FlatFileBlobHeader.read(@f, address)
|
741
|
+
!header.is_valid? && header.length == size
|
562
742
|
end
|
563
743
|
|
564
744
|
def has_id_at?(id, address)
|
565
|
-
|
566
|
-
|
745
|
+
begin
|
746
|
+
header = FlatFileBlobHeader.read(@f, address)
|
747
|
+
rescue PEROBS::FatalError
|
748
|
+
return false
|
749
|
+
end
|
750
|
+
header.is_valid? && header.id == id
|
567
751
|
end
|
568
752
|
|
569
753
|
def inspect
|
570
754
|
s = '['
|
571
|
-
each_blob_header do |
|
572
|
-
s << "{ :pos => #{
|
755
|
+
each_blob_header do |header|
|
756
|
+
s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
|
573
757
|
":length => #{header.length}, :id => #{header.id}, " +
|
574
758
|
":crc => #{header.crc}"
|
575
759
|
if header.is_valid?
|
@@ -580,21 +764,68 @@ module PEROBS
|
|
580
764
|
s + ']'
|
581
765
|
end
|
582
766
|
|
767
|
+
def FlatFile::insert_header_checksums(db_dir)
|
768
|
+
old_file_name = File.join(db_dir, 'database.blobs')
|
769
|
+
new_file_name = File.join(db_dir, 'database_v4.blobs')
|
770
|
+
bak_file_name = File.join(db_dir, 'database_v3.blobs')
|
771
|
+
|
772
|
+
old_file = File.open(old_file_name, 'rb')
|
773
|
+
new_file = File.open(new_file_name, 'wb')
|
774
|
+
|
775
|
+
entries = 0
|
776
|
+
while (buf = old_file.read(21))
|
777
|
+
flags, length, id, crc = *buf.unpack('CQQL')
|
778
|
+
blob_data = old_file.read(length)
|
779
|
+
|
780
|
+
# Some basic sanity checking to ensure all reserved bits are 0. Older
|
781
|
+
# versions of PEROBS used to set bit 1 despite it being reserved now.
|
782
|
+
unless flags & 0xF0 == 0
|
783
|
+
PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
|
784
|
+
"flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
|
785
|
+
end
|
786
|
+
|
787
|
+
# Check if the blob is valid and current.
|
788
|
+
if flags & 0x1 == 1 && flags & 0x8 == 0
|
789
|
+
# Make sure the bit 1 is not set anymore.
|
790
|
+
flags = flags & 0x05
|
791
|
+
header_str = [ flags, length, id, crc ].pack('CQQL')
|
792
|
+
header_crc = Zlib.crc32(header_str, 0)
|
793
|
+
header_str += [ header_crc ].pack('L')
|
794
|
+
|
795
|
+
new_file.write(header_str + blob_data)
|
796
|
+
entries += 1
|
797
|
+
end
|
798
|
+
end
|
799
|
+
PEROBS.log.info "Header checksum added to #{entries} entries"
|
800
|
+
|
801
|
+
old_file.close
|
802
|
+
new_file.close
|
803
|
+
|
804
|
+
File.rename(old_file_name, bak_file_name)
|
805
|
+
File.rename(new_file_name, old_file_name)
|
806
|
+
end
|
807
|
+
|
583
808
|
private
|
584
809
|
|
585
810
|
def each_blob_header(&block)
|
586
|
-
|
811
|
+
corrupted_blobs = 0
|
812
|
+
|
587
813
|
begin
|
588
814
|
@f.seek(0)
|
589
815
|
while (header = FlatFileBlobHeader.read(@f))
|
590
|
-
|
816
|
+
if header.corruption_start
|
817
|
+
corrupted_blobs += 1
|
818
|
+
end
|
819
|
+
|
820
|
+
yield(header)
|
591
821
|
|
592
|
-
|
593
|
-
@f.seek(pos)
|
822
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
|
594
823
|
end
|
595
824
|
rescue IOError => e
|
596
825
|
PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
|
597
826
|
end
|
827
|
+
|
828
|
+
corrupted_blobs
|
598
829
|
end
|
599
830
|
|
600
831
|
def find_free_blob(bytes)
|
@@ -624,26 +855,34 @@ module PEROBS
|
|
624
855
|
def cross_check_entries
|
625
856
|
errors = 0
|
626
857
|
|
627
|
-
|
628
|
-
|
629
|
-
if header.
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
858
|
+
@progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
|
859
|
+
each_blob_header do |header|
|
860
|
+
if !header.is_valid?
|
861
|
+
if header.length > 0
|
862
|
+
unless @space_list.has_space?(header.addr, header.length)
|
863
|
+
PEROBS.log.error "FlatFile has free space " +
|
864
|
+
"(addr: #{header.addr}, len: #{header.length}) that is " +
|
865
|
+
"not in SpaceManager"
|
866
|
+
errors += 1
|
867
|
+
end
|
868
|
+
end
|
869
|
+
else
|
870
|
+
if (index_address = @index.get(header.id)).nil?
|
871
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
872
|
+
"is not listed in the index"
|
873
|
+
errors +=1
|
874
|
+
elsif index_address != header.addr
|
875
|
+
PEROBS.log.error "FlatFile blob at address #{header.addr} " +
|
876
|
+
"is listed in index with address #{index_address}"
|
877
|
+
errors += 1
|
635
878
|
end
|
636
879
|
end
|
637
|
-
|
638
|
-
|
639
|
-
PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
|
640
|
-
"in index with address #{@index.get(header.id)}"
|
641
|
-
errors += 1
|
642
|
-
end
|
880
|
+
|
881
|
+
pm.update(header.addr)
|
643
882
|
end
|
644
883
|
end
|
645
884
|
|
646
|
-
errors
|
885
|
+
errors
|
647
886
|
end
|
648
887
|
|
649
888
|
def discard_damaged_blob(header)
|
@@ -652,6 +891,61 @@ module PEROBS
|
|
652
891
|
header.clear_flags
|
653
892
|
end
|
654
893
|
|
894
|
+
def discard_duplicate_blobs(header, previous_header)
|
895
|
+
if header.is_outdated?
|
896
|
+
discard_damaged_blob(header)
|
897
|
+
elsif previous_header.is_outdated?
|
898
|
+
discard_damaged_blob(previous_header)
|
899
|
+
else
|
900
|
+
smaller, larger = header.length < previous_header.length ?
|
901
|
+
[ header, previous_header ] : [ previous_header, header ]
|
902
|
+
PEROBS.log.error "None of the blobs with same ID have " +
|
903
|
+
"the outdated flag set. Deleting the smaller one " +
|
904
|
+
"at address #{smaller.addr}"
|
905
|
+
discard_damaged_blob(smaller)
|
906
|
+
@space_list.add_space(smaller.addr, smaller.length)
|
907
|
+
@index.insert(larger.id, larger.addr)
|
908
|
+
end
|
909
|
+
end
|
910
|
+
|
911
|
+
def open_index_files(abort_on_missing_files = false)
|
912
|
+
begin
|
913
|
+
@index.open(abort_on_missing_files)
|
914
|
+
@space_list.open
|
915
|
+
rescue FatalError
|
916
|
+
clear_index_files
|
917
|
+
regenerate_index_and_spaces
|
918
|
+
end
|
919
|
+
end
|
920
|
+
|
921
|
+
def erase_index_files(dont_close_files = false)
|
922
|
+
# Ensure that the index is really closed.
|
923
|
+
@index.close unless dont_close_files
|
924
|
+
# Erase it completely
|
925
|
+
@index.erase
|
926
|
+
|
927
|
+
# Ensure that the spaces list is really closed.
|
928
|
+
@space_list.close unless dont_close_files
|
929
|
+
# Erase it completely
|
930
|
+
@space_list.erase
|
931
|
+
|
932
|
+
if @space_list.is_a?(SpaceTree)
|
933
|
+
# If we still use the old SpaceTree format, this is the moment to
|
934
|
+
# convert it to the new SpaceManager format.
|
935
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
936
|
+
PEROBS.log.warn "Converting space list from SpaceTree format " +
|
937
|
+
"to SpaceManager format"
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
def clear_index_files(dont_close_files = false)
|
942
|
+
erase_index_files(dont_close_files)
|
943
|
+
|
944
|
+
# Then create them again.
|
945
|
+
@index.open
|
946
|
+
@space_list.open
|
947
|
+
end
|
948
|
+
|
655
949
|
end
|
656
950
|
|
657
951
|
end
|