perobs 4.1.0 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/perobs/BTree.rb +33 -13
- data/lib/perobs/BTreeBlob.rb +3 -2
- data/lib/perobs/BTreeDB.rb +4 -3
- data/lib/perobs/BTreeNode.rb +107 -78
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +19 -1
- data/lib/perobs/BigArrayNode.rb +13 -9
- data/lib/perobs/BigHash.rb +8 -24
- data/lib/perobs/BigTree.rb +14 -1
- data/lib/perobs/BigTreeNode.rb +2 -2
- data/lib/perobs/Cache.rb +31 -6
- data/lib/perobs/EquiBlobsFile.rb +12 -1
- data/lib/perobs/FlatFile.rb +197 -45
- data/lib/perobs/FlatFileBlobHeader.rb +20 -5
- data/lib/perobs/FlatFileDB.rb +8 -4
- data/lib/perobs/FuzzyStringMatcher.rb +192 -0
- data/lib/perobs/Hash.rb +4 -0
- data/lib/perobs/IDListPageFile.rb +1 -2
- data/lib/perobs/ObjectBase.rb +1 -1
- data/lib/perobs/PersistentObjectCache.rb +7 -4
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +1 -1
- data/lib/perobs/Store.rb +67 -25
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +2 -2
- data/test/BTree_spec.rb +1 -0
- data/test/BigArray_spec.rb +53 -6
- data/test/BigHash_spec.rb +8 -0
- data/test/FlatFileDB_spec.rb +108 -3
- data/test/FuzzyStringMatcher_spec.rb +171 -0
- data/test/LegacyDBs/LegacyDB.rb +4 -0
- data/test/SpaceManager_spec.rb +176 -0
- data/test/Store_spec.rb +2 -5
- metadata +12 -6
data/lib/perobs/BTreeNodeLink.rb
CHANGED
@@ -67,6 +67,8 @@ module PEROBS
|
|
67
67
|
get_node.respond_to?(method)
|
68
68
|
end
|
69
69
|
|
70
|
+
# Directly define some commonly used methods to avoid the method_missing
|
71
|
+
# overhead.
|
70
72
|
def is_leaf
|
71
73
|
get_node.is_leaf
|
72
74
|
end
|
@@ -91,10 +93,18 @@ module PEROBS
|
|
91
93
|
get_node.search_key_index(key)
|
92
94
|
end
|
93
95
|
|
96
|
+
def insert(key, value)
|
97
|
+
get_node.insert(key, value)
|
98
|
+
end
|
99
|
+
|
94
100
|
def insert_element(key, voc)
|
95
101
|
get_node.insert_element(key, voc)
|
96
102
|
end
|
97
103
|
|
104
|
+
def split_node
|
105
|
+
get_node.split_node
|
106
|
+
end
|
107
|
+
|
98
108
|
# Compare this node to another node.
|
99
109
|
# @return [Boolean] true if node address is identical, false otherwise
|
100
110
|
def ==(node)
|
data/lib/perobs/BigArray.rb
CHANGED
@@ -123,7 +123,11 @@ module PEROBS
|
|
123
123
|
# @param index [Integer] Position in the array
|
124
124
|
# @return [Integer or nil] found value or nil
|
125
125
|
def [](index)
|
126
|
-
|
126
|
+
begin
|
127
|
+
index = validate_index_range(index)
|
128
|
+
rescue IndexError
|
129
|
+
return nil
|
130
|
+
end
|
127
131
|
|
128
132
|
return nil if index >= @entry_counter
|
129
133
|
|
@@ -190,6 +194,20 @@ module PEROBS
|
|
190
194
|
@entry_counter == 0
|
191
195
|
end
|
192
196
|
|
197
|
+
# Return the first entry of the Array.
|
198
|
+
def first
|
199
|
+
return nil unless @first_leaf
|
200
|
+
|
201
|
+
@first_leaf.values.first
|
202
|
+
end
|
203
|
+
|
204
|
+
# Return the last entry of the Array.
|
205
|
+
def last
|
206
|
+
return nil unless @last_leaf
|
207
|
+
|
208
|
+
@last_leaf.values.last
|
209
|
+
end
|
210
|
+
|
193
211
|
# Iterate over all entries in the tree. Entries are always sorted by the
|
194
212
|
# key.
|
195
213
|
# @yield [key, value]
|
data/lib/perobs/BigArrayNode.rb
CHANGED
@@ -147,7 +147,9 @@ module PEROBS
|
|
147
147
|
else
|
148
148
|
# Descend into the right child node to add the value to.
|
149
149
|
cidx = node.search_child_index(index)
|
150
|
-
index -= node.offsets[cidx]
|
150
|
+
if (index -= node.offsets[cidx]) < 0
|
151
|
+
node.fatal "Index (#{index}) became negative"
|
152
|
+
end
|
151
153
|
node = node.children[cidx]
|
152
154
|
end
|
153
155
|
end
|
@@ -392,7 +394,7 @@ module PEROBS
|
|
392
394
|
@offsets.each_with_index do |offset, i|
|
393
395
|
if i > 0
|
394
396
|
if offset < last_offset
|
395
|
-
error "
|
397
|
+
error "Offsets are not strictly monotoneously " +
|
396
398
|
"increasing: #{@offsets.inspect}"
|
397
399
|
return false
|
398
400
|
end
|
@@ -471,11 +473,12 @@ module PEROBS
|
|
471
473
|
else
|
472
474
|
begin
|
473
475
|
if node.is_leaf?
|
474
|
-
if
|
476
|
+
if position <= node.size
|
475
477
|
str += "#{node.tree_prefix} " +
|
476
478
|
"#{position == node.size ? '-' : '|'} " +
|
477
479
|
"[ #{node.value_index(position - 1)}: " +
|
478
|
-
"#{node.values[position - 1]
|
480
|
+
"#{node.values[position - 1].nil? ?
|
481
|
+
'nil' : node.values[position - 1]} ]\n"
|
479
482
|
end
|
480
483
|
end
|
481
484
|
rescue => e
|
@@ -613,7 +616,7 @@ module PEROBS
|
|
613
616
|
# Handle special case for empty offsets list.
|
614
617
|
return 0 if @offsets.empty? || offset <= @offsets.first
|
615
618
|
|
616
|
-
(@offsets.bsearch_index { |o| o
|
619
|
+
(@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
|
617
620
|
end
|
618
621
|
|
619
622
|
# @return The index of the current node in the children list of the parent
|
@@ -656,7 +659,7 @@ module PEROBS
|
|
656
659
|
|
657
660
|
# This method takes care of adjusting the offsets in tree in case elements
|
658
661
|
# were inserted or removed. All nodes that hold children after the
|
659
|
-
# insert/remove operation
|
662
|
+
# insert/remove operation need to be adjusted. Since child nodes get their
|
660
663
|
# offsets via their parents, only the parent node and the direct ancestor
|
661
664
|
# followers need to be adjusted.
|
662
665
|
# @param after_child [BigArrayNode] specifies the modified leaf node
|
@@ -910,7 +913,7 @@ module PEROBS
|
|
910
913
|
# Root Node +--------------------------------+
|
911
914
|
# Offsets | 0 11 |
|
912
915
|
# Children | |
|
913
|
-
#
|
916
|
+
# pred v child v
|
914
917
|
# Level 1 +--------------------------++--------------------------+
|
915
918
|
# Offsets | 0 4 7 || 0 2 5 |
|
916
919
|
# Children | | | | | |
|
@@ -922,8 +925,9 @@ module PEROBS
|
|
922
925
|
#
|
923
926
|
# Remove the last predecessor offset and update the child offset with
|
924
927
|
# it
|
925
|
-
delta =
|
926
|
-
@offsets[child_index]
|
928
|
+
delta = pred.children.last.values_count
|
929
|
+
@offsets[child_index] -= delta
|
930
|
+
pred.offsets.pop
|
927
931
|
# Adjust all the offsets of the child
|
928
932
|
child.offsets.map! { |o| o += delta }
|
929
933
|
# And prepend the 0 offset
|
data/lib/perobs/BigHash.rb
CHANGED
@@ -68,7 +68,7 @@ module PEROBS
|
|
68
68
|
class Collisions < PEROBS::Array
|
69
69
|
end
|
70
70
|
|
71
|
-
attr_persist :btree
|
71
|
+
attr_persist :btree
|
72
72
|
|
73
73
|
# Create a new BigHash object.
|
74
74
|
# @param p [Handle] Store handle
|
@@ -76,7 +76,6 @@ module PEROBS
|
|
76
76
|
super(p)
|
77
77
|
restore
|
78
78
|
self.btree = @store.new(PEROBS::BigTree)
|
79
|
-
self.entry_counter = 0
|
80
79
|
end
|
81
80
|
|
82
81
|
def restore
|
@@ -106,7 +105,6 @@ module PEROBS
|
|
106
105
|
end
|
107
106
|
index_to_insert += 1
|
108
107
|
end
|
109
|
-
self.entry_counter += 1 unless overwrite
|
110
108
|
existing_entry[index_to_insert] = entry
|
111
109
|
elsif existing_entry.key == key
|
112
110
|
# The existing value is for the identical key. We can safely
|
@@ -119,12 +117,10 @@ module PEROBS
|
|
119
117
|
array_entry << existing_entry
|
120
118
|
array_entry << entry
|
121
119
|
@btree.insert(hashed_key, array_entry)
|
122
|
-
self.entry_counter += 1
|
123
120
|
end
|
124
121
|
else
|
125
122
|
# No existing entry. Insert the new entry.
|
126
123
|
@btree.insert(hashed_key, entry)
|
127
|
-
self.entry_counter += 1
|
128
124
|
end
|
129
125
|
end
|
130
126
|
end
|
@@ -170,6 +166,8 @@ module PEROBS
|
|
170
166
|
false
|
171
167
|
end
|
172
168
|
|
169
|
+
alias include? has_key?
|
170
|
+
|
173
171
|
# Delete and return the entry for the given key. Return nil if no matching
|
174
172
|
# entry exists.
|
175
173
|
# @param key [Integer or String]
|
@@ -183,7 +181,6 @@ module PEROBS
|
|
183
181
|
if entry.is_a?(PEROBS::Array)
|
184
182
|
entry.each_with_index do |ae, i|
|
185
183
|
if ae.key == key
|
186
|
-
self.entry_counter -= 1
|
187
184
|
return entry.delete_at(i).value
|
188
185
|
end
|
189
186
|
end
|
@@ -197,7 +194,7 @@ module PEROBS
|
|
197
194
|
# Return the number of entries stored in the hash.
|
198
195
|
# @return [Integer]
|
199
196
|
def length
|
200
|
-
@entry_counter
|
197
|
+
@btree.entry_counter
|
201
198
|
end
|
202
199
|
|
203
200
|
alias size length
|
@@ -205,7 +202,7 @@ module PEROBS
|
|
205
202
|
# Return true if hash is empty. False otherweise.
|
206
203
|
# @return [TrueClass, FalseClass]
|
207
204
|
def empty?
|
208
|
-
@entry_counter == 0
|
205
|
+
@btree.entry_counter == 0
|
209
206
|
end
|
210
207
|
|
211
208
|
# Calls the given block for each key/value pair.
|
@@ -213,9 +210,9 @@ module PEROBS
|
|
213
210
|
def each(&block)
|
214
211
|
@btree.each do |index, entry|
|
215
212
|
if entry.is_a?(Collisions)
|
216
|
-
break
|
213
|
+
break if entry.each do |c_entry|
|
217
214
|
yield(c_entry.key, c_entry.value)
|
218
|
-
end
|
215
|
+
end.nil?
|
219
216
|
else
|
220
217
|
yield(entry.key, entry.value)
|
221
218
|
end
|
@@ -234,20 +231,7 @@ module PEROBS
|
|
234
231
|
# Check if the data structure contains any errors.
|
235
232
|
# @return [Boolean] true if no erros were found, false otherwise
|
236
233
|
def check
|
237
|
-
return
|
238
|
-
|
239
|
-
i = 0
|
240
|
-
each do |k, v|
|
241
|
-
i += 1
|
242
|
-
end
|
243
|
-
|
244
|
-
unless @entry_counter == i
|
245
|
-
PEROBS.log.error "BigHash contains #{i} values but entry counter " +
|
246
|
-
"is #{@entry_counter}"
|
247
|
-
return false
|
248
|
-
end
|
249
|
-
|
250
|
-
true
|
234
|
+
return @btree.check
|
251
235
|
end
|
252
236
|
|
253
237
|
private
|
data/lib/perobs/BigTree.rb
CHANGED
@@ -140,7 +140,7 @@ module PEROBS
|
|
140
140
|
def each(&block)
|
141
141
|
node = @first_leaf
|
142
142
|
while node
|
143
|
-
node.each_element(&block)
|
143
|
+
break if node.each_element(&block).nil?
|
144
144
|
node = node.next_sibling
|
145
145
|
end
|
146
146
|
end
|
@@ -166,6 +166,19 @@ module PEROBS
|
|
166
166
|
# @return [Boolean] true if no erros were found, false otherwise
|
167
167
|
def check(&block)
|
168
168
|
@root.check(&block)
|
169
|
+
|
170
|
+
i = 0
|
171
|
+
each do |k, v|
|
172
|
+
i += 1
|
173
|
+
end
|
174
|
+
|
175
|
+
unless @entry_counter == i
|
176
|
+
PEROBS.log.error "BigTree contains #{i} values but entry counter " +
|
177
|
+
"is #{@entry_counter}"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
169
182
|
end
|
170
183
|
|
171
184
|
# Gather some statistics regarding the tree structure.
|
data/lib/perobs/BigTreeNode.rb
CHANGED
@@ -227,7 +227,7 @@ module PEROBS
|
|
227
227
|
# Iterate over all the key/value pairs of the node.
|
228
228
|
# @yield [key, value]
|
229
229
|
def each_element
|
230
|
-
return unless is_leaf?
|
230
|
+
return self unless is_leaf?
|
231
231
|
|
232
232
|
0.upto(@keys.length - 1) do |i|
|
233
233
|
yield(@keys[i], @values[i])
|
@@ -237,7 +237,7 @@ module PEROBS
|
|
237
237
|
# Iterate over all the key/value pairs of the node in reverse order.
|
238
238
|
# @yield [key, value]
|
239
239
|
def reverse_each_element
|
240
|
-
return unless is_leaf?
|
240
|
+
return self unless is_leaf?
|
241
241
|
|
242
242
|
(@keys.length - 1).downto(0) do |i|
|
243
243
|
yield(@keys[i], @values[i])
|
data/lib/perobs/Cache.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = Cache.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2015, 2016, 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -66,10 +66,10 @@ module PEROBS
|
|
66
66
|
def cache_write(obj)
|
67
67
|
# This is just a safety check. It can probably be disabled in the future
|
68
68
|
# to increase performance.
|
69
|
-
if obj.respond_to?(:is_poxreference?)
|
70
|
-
|
71
|
-
|
72
|
-
end
|
69
|
+
#if obj.respond_to?(:is_poxreference?)
|
70
|
+
# # If this condition triggers, we have a bug in the library.
|
71
|
+
# PEROBS.log.fatal "POXReference objects should never be cached"
|
72
|
+
#end
|
73
73
|
|
74
74
|
if @transaction_stack.empty?
|
75
75
|
# We are not in transaction mode.
|
@@ -93,6 +93,31 @@ module PEROBS
|
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
|
+
# Evict the object with the given ID from the cache.
|
97
|
+
# @param id [Integer] ID of the cached PEROBS::ObjectBase
|
98
|
+
# @return [True/False] True if object was stored in the cache. False
|
99
|
+
# otherwise.
|
100
|
+
def evict(id)
|
101
|
+
unless @transaction_stack.empty?
|
102
|
+
PEROBS.log.fatal "You cannot evict entries during a transaction."
|
103
|
+
end
|
104
|
+
|
105
|
+
idx = id & @mask
|
106
|
+
# The index is just a hash. We still need to check if the object IDs are
|
107
|
+
# actually the same before we can return the object.
|
108
|
+
if (obj = @writes[idx]) && obj._id == id
|
109
|
+
# The object is in the write cache.
|
110
|
+
@writes[idx] = nil
|
111
|
+
return true
|
112
|
+
elsif (obj = @reads[idx]) && obj._id == id
|
113
|
+
# The object is in the read cache.
|
114
|
+
@reads[idx] = nil
|
115
|
+
return true
|
116
|
+
end
|
117
|
+
|
118
|
+
false
|
119
|
+
end
|
120
|
+
|
96
121
|
# Return the PEROBS::Object with the specified ID or nil if not found.
|
97
122
|
# @param id [Integer] ID of the cached PEROBS::ObjectBase
|
98
123
|
def object_by_id(id)
|
@@ -160,7 +185,7 @@ module PEROBS
|
|
160
185
|
transactions = @transaction_stack.pop
|
161
186
|
# Merge the two lists
|
162
187
|
@transaction_stack.push(@transaction_stack.pop + transactions)
|
163
|
-
# Ensure that each object is only included once in the list.
|
188
|
+
# Ensure that each object ID is only included once in the list.
|
164
189
|
@transaction_stack.last.uniq!
|
165
190
|
end
|
166
191
|
end
|
data/lib/perobs/EquiBlobsFile.rb
CHANGED
@@ -40,7 +40,10 @@ module PEROBS
|
|
40
40
|
# bytes long header that stores the total entry count, the total space
|
41
41
|
# count, the offset of the first entry and the offset of the first space.
|
42
42
|
# The header is followed by a custom entry section. Each entry is also 8
|
43
|
-
# bytes long. After the custom entry section the data blobs start.
|
43
|
+
# bytes long. After the custom entry section the data blobs start. Each data
|
44
|
+
# blob starts with a mark byte that indicates if the blob is valid data (2),
|
45
|
+
# a free space (0) or reseved space (1). Then it is followed by @entry_bytes
|
46
|
+
# number of bytes for the data blob.
|
44
47
|
class EquiBlobsFile
|
45
48
|
|
46
49
|
TOTAL_ENTRIES_OFFSET = 0
|
@@ -501,6 +504,7 @@ module PEROBS
|
|
501
504
|
next_offset = address_to_offset(1)
|
502
505
|
total_entries = 0
|
503
506
|
total_spaces = 0
|
507
|
+
last_entry_is_space = false
|
504
508
|
@progressmeter.start("Checking #{@name} entries",
|
505
509
|
@total_spaces + @total_entries) do |pm|
|
506
510
|
begin
|
@@ -511,6 +515,7 @@ module PEROBS
|
|
511
515
|
case marker
|
512
516
|
when 0
|
513
517
|
total_spaces += 1
|
518
|
+
last_entry_is_space = true
|
514
519
|
when 1
|
515
520
|
PEROBS.log.error "Entry at address " +
|
516
521
|
"#{offset_to_address(next_offset)} in EquiBlobsFile " +
|
@@ -518,6 +523,7 @@ module PEROBS
|
|
518
523
|
return false
|
519
524
|
when 2
|
520
525
|
total_entries += 1
|
526
|
+
last_entry_is_space = false
|
521
527
|
else
|
522
528
|
PEROBS.log.error "Entry at address " +
|
523
529
|
"#{offset_to_address(next_offset)} in EquiBlobsFile " +
|
@@ -535,6 +541,11 @@ module PEROBS
|
|
535
541
|
end
|
536
542
|
end
|
537
543
|
|
544
|
+
if last_entry_is_space
|
545
|
+
PEROBS.log.error "EquiBlobsFile #{@file_name} is not properly trimmed"
|
546
|
+
return false
|
547
|
+
end
|
548
|
+
|
538
549
|
unless total_spaces == @total_spaces
|
539
550
|
PEROBS.log.error "Mismatch between space counter and spaces in " +
|
540
551
|
"EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFile.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -31,6 +31,7 @@ require 'perobs/Log'
|
|
31
31
|
require 'perobs/FlatFileBlobHeader'
|
32
32
|
require 'perobs/BTree'
|
33
33
|
require 'perobs/SpaceTree'
|
34
|
+
require 'perobs/SpaceManager'
|
34
35
|
require 'perobs/IDList'
|
35
36
|
|
36
37
|
module PEROBS
|
@@ -51,7 +52,14 @@ module PEROBS
|
|
51
52
|
@f = nil
|
52
53
|
@marks = nil
|
53
54
|
@index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
|
54
|
-
|
55
|
+
old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
|
56
|
+
if File.exist?(old_spaces_file)
|
57
|
+
# PEROBS version 4.1.0 and earlier used this space list format. It is
|
58
|
+
# deprecated now. Newly created DBs use the SpaceManager format.
|
59
|
+
@space_list = SpaceTree.new(@db_dir, @progressmeter)
|
60
|
+
else
|
61
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
62
|
+
end
|
55
63
|
end
|
56
64
|
|
57
65
|
# Open the flat file for reading and writing.
|
@@ -134,7 +142,7 @@ module PEROBS
|
|
134
142
|
end
|
135
143
|
|
136
144
|
# Delete all unmarked objects.
|
137
|
-
def delete_unmarked_objects
|
145
|
+
def delete_unmarked_objects(&block)
|
138
146
|
# We don't update the index and the space list during this operation as
|
139
147
|
# we defragmentize the blob file at the end. We'll end the operation
|
140
148
|
# with an empty space list.
|
@@ -145,6 +153,7 @@ module PEROBS
|
|
145
153
|
each_blob_header do |header|
|
146
154
|
if header.is_valid? && !@marks.include?(header.id)
|
147
155
|
delete_obj_by_address(header.addr, header.id)
|
156
|
+
yield(header.id) if block_given?
|
148
157
|
deleted_objects_count += 1
|
149
158
|
end
|
150
159
|
|
@@ -183,12 +192,14 @@ module PEROBS
|
|
183
192
|
# performance impact of compression is not compensated by writing
|
184
193
|
# less data to the storage.
|
185
194
|
compressed = false
|
186
|
-
|
195
|
+
raw_obj_bytesize = raw_obj.bytesize
|
196
|
+
if raw_obj_bytesize > 256
|
187
197
|
raw_obj = Zlib.deflate(raw_obj)
|
198
|
+
raw_obj_bytesize = raw_obj.bytesize
|
188
199
|
compressed = true
|
189
200
|
end
|
190
201
|
|
191
|
-
addr, length = find_free_blob(
|
202
|
+
addr, length = find_free_blob(raw_obj_bytesize)
|
192
203
|
begin
|
193
204
|
if length != -1
|
194
205
|
# Just a safeguard so we don't overwrite current data.
|
@@ -197,8 +208,8 @@ module PEROBS
|
|
197
208
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
198
209
|
"(#{header.length}) for address #{addr} don't match."
|
199
210
|
end
|
200
|
-
if
|
201
|
-
PEROBS.log.fatal "Object (#{
|
211
|
+
if raw_obj_bytesize > header.length
|
212
|
+
PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
|
202
213
|
"blob space (#{header.length})."
|
203
214
|
end
|
204
215
|
if header.is_valid?
|
@@ -208,19 +219,19 @@ module PEROBS
|
|
208
219
|
end
|
209
220
|
flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
|
210
221
|
flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
|
211
|
-
FlatFileBlobHeader.new(@f, addr, flags,
|
222
|
+
FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
|
212
223
|
@f.write(raw_obj)
|
213
|
-
if length != -1 &&
|
224
|
+
if length != -1 && raw_obj_bytesize < length
|
214
225
|
# The new object was not appended and it did not completely fill the
|
215
226
|
# free space. So we have to write a new header to mark the remaining
|
216
227
|
# empty space.
|
217
|
-
unless length -
|
228
|
+
unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
|
218
229
|
PEROBS.log.fatal "Not enough space to append the empty space " +
|
219
|
-
"header (space: #{length} bytes, object: #{
|
230
|
+
"header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
|
220
231
|
"bytes)."
|
221
232
|
end
|
222
233
|
space_address = @f.pos
|
223
|
-
space_length = length - FlatFileBlobHeader::LENGTH -
|
234
|
+
space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
|
224
235
|
FlatFileBlobHeader.new(@f, space_address, 0, space_length,
|
225
236
|
0, 0).write
|
226
237
|
# Register the new space with the space list.
|
@@ -461,6 +472,7 @@ module PEROBS
|
|
461
472
|
new_index.open
|
462
473
|
|
463
474
|
corrupted_blobs = 0
|
475
|
+
end_of_last_healthy_blob = nil
|
464
476
|
@progressmeter.start('Checking blobs file', @f.size) do |pm|
|
465
477
|
corrupted_blobs = each_blob_header do |header|
|
466
478
|
if header.is_valid?
|
@@ -529,12 +541,26 @@ module PEROBS
|
|
529
541
|
# ID is unique so far. Add it to the shadow index.
|
530
542
|
new_index.insert(header.id, header.addr)
|
531
543
|
end
|
532
|
-
|
533
544
|
end
|
545
|
+
end_of_last_healthy_blob = header.addr +
|
546
|
+
FlatFileBlobHeader::LENGTH + header.length
|
534
547
|
|
535
548
|
pm.update(header.addr)
|
536
549
|
end
|
537
550
|
|
551
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
552
|
+
# The blob file ends with a corrupted blob header.
|
553
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
554
|
+
'bytes found at the end of FlatFile.'
|
555
|
+
corrupted_blobs += 1
|
556
|
+
if repair
|
557
|
+
PEROBS.log.error "Truncating FlatFile to " +
|
558
|
+
"#{end_of_last_healthy_blob} bytes by discarding " +
|
559
|
+
"#{@f.size - end_of_last_healthy_blob} bytes"
|
560
|
+
@f.truncate(end_of_last_healthy_blob)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
538
564
|
errors += corrupted_blobs
|
539
565
|
end
|
540
566
|
|
@@ -546,7 +572,7 @@ module PEROBS
|
|
546
572
|
erase_index_files
|
547
573
|
defragmentize
|
548
574
|
regenerate_index_and_spaces
|
549
|
-
|
575
|
+
elsif corrupted_blobs == 0
|
550
576
|
# Now we check the index data. It must be correct and the entries must
|
551
577
|
# match the blob file. All entries in the index must be in the blob file
|
552
578
|
# and vise versa.
|
@@ -575,6 +601,141 @@ module PEROBS
|
|
575
601
|
errors
|
576
602
|
end
|
577
603
|
|
604
|
+
# Repair the FlatFile. In contrast to the repair functionality in the
|
605
|
+
# check() method this method is much faster. It simply re-creates the
|
606
|
+
# index and space list from the blob file.
|
607
|
+
# @param repair [Boolean] True if errors should be fixed.
|
608
|
+
# @return [Integer] Number of errors found
|
609
|
+
def repair
|
610
|
+
errors = 0
|
611
|
+
return errors unless @f
|
612
|
+
|
613
|
+
t = Time.now
|
614
|
+
PEROBS.log.info "Repairing FlatFile database"
|
615
|
+
|
616
|
+
# Erase and re-open the index and space list files. We purposely don't
|
617
|
+
# close the files at it would trigger needless flushing.
|
618
|
+
clear_index_files(true)
|
619
|
+
|
620
|
+
# Now we scan the blob file and re-index all blobs and spaces. Corrupted
|
621
|
+
# blobs will be skipped.
|
622
|
+
corrupted_blobs = 0
|
623
|
+
end_of_last_healthy_blob = nil
|
624
|
+
@progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
|
625
|
+
corrupted_blobs = each_blob_header do |header|
|
626
|
+
if header.corruption_start
|
627
|
+
# The blob is preceeded by a corrupted area. We create a new
|
628
|
+
# header of a deleted blob for this area and write the new blob
|
629
|
+
# over it.
|
630
|
+
if (data_length = header.addr - header.corruption_start -
|
631
|
+
FlatFileBlobHeader::LENGTH) <= 0
|
632
|
+
PEROBS.log.error "Found a corrupted blob that is too small to " +
|
633
|
+
"fit a header (#{data_length}). File must be defragmented."
|
634
|
+
else
|
635
|
+
new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
|
636
|
+
0, data_length, 0, 0)
|
637
|
+
new_header.write
|
638
|
+
@space_list.add_space(header.corruption_start, data_length)
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
if header.is_valid?
|
643
|
+
# We have a non-deleted entry.
|
644
|
+
begin
|
645
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
646
|
+
buf = @f.read(header.length)
|
647
|
+
if buf.bytesize != header.length
|
648
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
649
|
+
"#{header.id}."
|
650
|
+
discard_damaged_blob(header)
|
651
|
+
errors += 1
|
652
|
+
next
|
653
|
+
end
|
654
|
+
|
655
|
+
# Uncompress the data if the compression bit is set in the mark
|
656
|
+
# byte.
|
657
|
+
if header.is_compressed?
|
658
|
+
begin
|
659
|
+
buf = Zlib.inflate(buf)
|
660
|
+
rescue Zlib::BufError, Zlib::DataError
|
661
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
662
|
+
"#{header.id} found."
|
663
|
+
discard_damaged_blob(header)
|
664
|
+
errors += 1
|
665
|
+
next
|
666
|
+
end
|
667
|
+
end
|
668
|
+
|
669
|
+
if header.crc && checksum(buf) != header.crc
|
670
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
671
|
+
"with ID #{header.id}"
|
672
|
+
discard_damaged_blob(header)
|
673
|
+
errors += 1
|
674
|
+
next
|
675
|
+
end
|
676
|
+
rescue IOError => e
|
677
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
678
|
+
e.message
|
679
|
+
end
|
680
|
+
|
681
|
+
# Check if the ID has already been found in the file.
|
682
|
+
if (previous_address = @index.get(header.id))
|
683
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
684
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
685
|
+
errors += 1
|
686
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
687
|
+
header.id)
|
688
|
+
# We have two blobs with the same ID and we must discard one of
|
689
|
+
# them.
|
690
|
+
if header.is_outdated?
|
691
|
+
discard_damaged_blob(header)
|
692
|
+
elsif previous_header.is_outdated?
|
693
|
+
discard_damaged_blob(previous_header)
|
694
|
+
else
|
695
|
+
PEROBS.log.error "None of the blobs with same ID have " +
|
696
|
+
"the outdated flag set. Deleting the smaller one."
|
697
|
+
errors += 1
|
698
|
+
discard_damaged_blob(header.length < previous_header.length ?
|
699
|
+
header : previous_header)
|
700
|
+
end
|
701
|
+
else
|
702
|
+
# ID is unique so far. Add it to the shadow index.
|
703
|
+
@index.insert(header.id, header.addr)
|
704
|
+
end
|
705
|
+
|
706
|
+
else
|
707
|
+
if header.length > 0
|
708
|
+
@space_list.add_space(header.addr, header.length)
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end_of_last_healthy_blob = header.addr +
|
712
|
+
FlatFileBlobHeader::LENGTH + header.length
|
713
|
+
|
714
|
+
pm.update(header.addr)
|
715
|
+
end
|
716
|
+
|
717
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
718
|
+
# The blob file ends with a corrupted blob header.
|
719
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
720
|
+
'bytes found at the end of FlatFile.'
|
721
|
+
corrupted_blobs += 1
|
722
|
+
|
723
|
+
PEROBS.log.error "Truncating FlatFile to " +
|
724
|
+
"#{end_of_last_healthy_blob} bytes by discarding " +
|
725
|
+
"#{@f.size - end_of_last_healthy_blob} bytes"
|
726
|
+
@f.truncate(end_of_last_healthy_blob)
|
727
|
+
end
|
728
|
+
|
729
|
+
errors += corrupted_blobs
|
730
|
+
end
|
731
|
+
|
732
|
+
sync
|
733
|
+
PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
|
734
|
+
"#{errors} errors found."
|
735
|
+
|
736
|
+
errors
|
737
|
+
end
|
738
|
+
|
578
739
|
# This method clears the index tree and the free space list and
|
579
740
|
# regenerates them from the FlatFile.
|
580
741
|
def regenerate_index_and_spaces
|
@@ -617,7 +778,11 @@ module PEROBS
|
|
617
778
|
end
|
618
779
|
|
619
780
|
def has_id_at?(id, address)
|
620
|
-
|
781
|
+
begin
|
782
|
+
header = FlatFileBlobHeader.read(@f, address)
|
783
|
+
rescue PEROBS::FatalError
|
784
|
+
return false
|
785
|
+
end
|
621
786
|
header.is_valid? && header.id == id
|
622
787
|
end
|
623
788
|
|
@@ -733,7 +898,7 @@ module PEROBS
|
|
733
898
|
unless @space_list.has_space?(header.addr, header.length)
|
734
899
|
PEROBS.log.error "FlatFile has free space " +
|
735
900
|
"(addr: #{header.addr}, len: #{header.length}) that is " +
|
736
|
-
"not in
|
901
|
+
"not in SpaceManager"
|
737
902
|
errors += 1
|
738
903
|
end
|
739
904
|
end
|
@@ -767,49 +932,36 @@ module PEROBS
|
|
767
932
|
@index.open(abort_on_missing_files)
|
768
933
|
@space_list.open
|
769
934
|
rescue FatalError
|
770
|
-
|
771
|
-
@index.close
|
772
|
-
# Erase it completely
|
773
|
-
@index.erase
|
774
|
-
# Then create it again.
|
775
|
-
@index.open
|
776
|
-
|
777
|
-
# Ensure that the spaces list is really closed.
|
778
|
-
@space_list.close
|
779
|
-
# Erase it completely
|
780
|
-
@space_list.erase
|
781
|
-
# Then create it again
|
782
|
-
@space_list.open
|
783
|
-
|
935
|
+
clear_index_files
|
784
936
|
regenerate_index_and_spaces
|
785
937
|
end
|
786
938
|
end
|
787
939
|
|
788
|
-
def erase_index_files
|
940
|
+
def erase_index_files(dont_close_files = false)
|
789
941
|
# Ensure that the index is really closed.
|
790
|
-
@index.close
|
942
|
+
@index.close unless dont_close_files
|
791
943
|
# Erase it completely
|
792
944
|
@index.erase
|
793
945
|
|
794
946
|
# Ensure that the spaces list is really closed.
|
795
|
-
@space_list.close
|
947
|
+
@space_list.close unless dont_close_files
|
796
948
|
# Erase it completely
|
797
949
|
@space_list.erase
|
950
|
+
|
951
|
+
if @space_list.is_a?(SpaceTree)
|
952
|
+
# If we still use the old SpaceTree format, this is the moment to
|
953
|
+
# convert it to the new SpaceManager format.
|
954
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
955
|
+
PEROBS.log.warn "Converting space list from SpaceTree format " +
|
956
|
+
"to SpaceManager format"
|
957
|
+
end
|
798
958
|
end
|
799
959
|
|
800
|
-
def clear_index_files
|
801
|
-
|
802
|
-
@index.close
|
803
|
-
# Erase it completely
|
804
|
-
@index.erase
|
805
|
-
# Then create it again.
|
806
|
-
@index.open
|
960
|
+
def clear_index_files(dont_close_files = false)
|
961
|
+
erase_index_files(dont_close_files)
|
807
962
|
|
808
|
-
#
|
809
|
-
@
|
810
|
-
# Erase it completely
|
811
|
-
@space_list.erase
|
812
|
-
# Then create it again
|
963
|
+
# Then create them again.
|
964
|
+
@index.open
|
813
965
|
@space_list.open
|
814
966
|
end
|
815
967
|
|