perobs 4.1.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/perobs/BTree.rb +33 -13
- data/lib/perobs/BTreeBlob.rb +3 -2
- data/lib/perobs/BTreeDB.rb +4 -3
- data/lib/perobs/BTreeNode.rb +107 -78
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +19 -1
- data/lib/perobs/BigArrayNode.rb +13 -9
- data/lib/perobs/BigHash.rb +8 -24
- data/lib/perobs/BigTree.rb +14 -1
- data/lib/perobs/BigTreeNode.rb +2 -2
- data/lib/perobs/Cache.rb +31 -6
- data/lib/perobs/EquiBlobsFile.rb +12 -1
- data/lib/perobs/FlatFile.rb +197 -45
- data/lib/perobs/FlatFileBlobHeader.rb +20 -5
- data/lib/perobs/FlatFileDB.rb +8 -4
- data/lib/perobs/FuzzyStringMatcher.rb +192 -0
- data/lib/perobs/Hash.rb +4 -0
- data/lib/perobs/IDListPageFile.rb +1 -2
- data/lib/perobs/ObjectBase.rb +1 -1
- data/lib/perobs/PersistentObjectCache.rb +7 -4
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +1 -1
- data/lib/perobs/Store.rb +67 -25
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +2 -2
- data/test/BTree_spec.rb +1 -0
- data/test/BigArray_spec.rb +53 -6
- data/test/BigHash_spec.rb +8 -0
- data/test/FlatFileDB_spec.rb +108 -3
- data/test/FuzzyStringMatcher_spec.rb +171 -0
- data/test/LegacyDBs/LegacyDB.rb +4 -0
- data/test/SpaceManager_spec.rb +176 -0
- data/test/Store_spec.rb +2 -5
- metadata +12 -6
data/lib/perobs/BTreeNodeLink.rb
CHANGED
@@ -67,6 +67,8 @@ module PEROBS
|
|
67
67
|
get_node.respond_to?(method)
|
68
68
|
end
|
69
69
|
|
70
|
+
# Directly define some commonly used methods to avoid the method_missing
|
71
|
+
# overhead.
|
70
72
|
def is_leaf
|
71
73
|
get_node.is_leaf
|
72
74
|
end
|
@@ -91,10 +93,18 @@ module PEROBS
|
|
91
93
|
get_node.search_key_index(key)
|
92
94
|
end
|
93
95
|
|
96
|
+
def insert(key, value)
|
97
|
+
get_node.insert(key, value)
|
98
|
+
end
|
99
|
+
|
94
100
|
def insert_element(key, voc)
|
95
101
|
get_node.insert_element(key, voc)
|
96
102
|
end
|
97
103
|
|
104
|
+
def split_node
|
105
|
+
get_node.split_node
|
106
|
+
end
|
107
|
+
|
98
108
|
# Compare this node to another node.
|
99
109
|
# @return [Boolean] true if node address is identical, false otherwise
|
100
110
|
def ==(node)
|
data/lib/perobs/BigArray.rb
CHANGED
@@ -123,7 +123,11 @@ module PEROBS
|
|
123
123
|
# @param index [Integer] Position in the array
|
124
124
|
# @return [Integer or nil] found value or nil
|
125
125
|
def [](index)
|
126
|
-
|
126
|
+
begin
|
127
|
+
index = validate_index_range(index)
|
128
|
+
rescue IndexError
|
129
|
+
return nil
|
130
|
+
end
|
127
131
|
|
128
132
|
return nil if index >= @entry_counter
|
129
133
|
|
@@ -190,6 +194,20 @@ module PEROBS
|
|
190
194
|
@entry_counter == 0
|
191
195
|
end
|
192
196
|
|
197
|
+
# Return the first entry of the Array.
|
198
|
+
def first
|
199
|
+
return nil unless @first_leaf
|
200
|
+
|
201
|
+
@first_leaf.values.first
|
202
|
+
end
|
203
|
+
|
204
|
+
# Return the last entry of the Array.
|
205
|
+
def last
|
206
|
+
return nil unless @last_leaf
|
207
|
+
|
208
|
+
@last_leaf.values.last
|
209
|
+
end
|
210
|
+
|
193
211
|
# Iterate over all entries in the tree. Entries are always sorted by the
|
194
212
|
# key.
|
195
213
|
# @yield [key, value]
|
data/lib/perobs/BigArrayNode.rb
CHANGED
@@ -147,7 +147,9 @@ module PEROBS
|
|
147
147
|
else
|
148
148
|
# Descend into the right child node to add the value to.
|
149
149
|
cidx = node.search_child_index(index)
|
150
|
-
index -= node.offsets[cidx]
|
150
|
+
if (index -= node.offsets[cidx]) < 0
|
151
|
+
node.fatal "Index (#{index}) became negative"
|
152
|
+
end
|
151
153
|
node = node.children[cidx]
|
152
154
|
end
|
153
155
|
end
|
@@ -392,7 +394,7 @@ module PEROBS
|
|
392
394
|
@offsets.each_with_index do |offset, i|
|
393
395
|
if i > 0
|
394
396
|
if offset < last_offset
|
395
|
-
error "
|
397
|
+
error "Offsets are not strictly monotoneously " +
|
396
398
|
"increasing: #{@offsets.inspect}"
|
397
399
|
return false
|
398
400
|
end
|
@@ -471,11 +473,12 @@ module PEROBS
|
|
471
473
|
else
|
472
474
|
begin
|
473
475
|
if node.is_leaf?
|
474
|
-
if
|
476
|
+
if position <= node.size
|
475
477
|
str += "#{node.tree_prefix} " +
|
476
478
|
"#{position == node.size ? '-' : '|'} " +
|
477
479
|
"[ #{node.value_index(position - 1)}: " +
|
478
|
-
"#{node.values[position - 1]
|
480
|
+
"#{node.values[position - 1].nil? ?
|
481
|
+
'nil' : node.values[position - 1]} ]\n"
|
479
482
|
end
|
480
483
|
end
|
481
484
|
rescue => e
|
@@ -613,7 +616,7 @@ module PEROBS
|
|
613
616
|
# Handle special case for empty offsets list.
|
614
617
|
return 0 if @offsets.empty? || offset <= @offsets.first
|
615
618
|
|
616
|
-
(@offsets.bsearch_index { |o| o
|
619
|
+
(@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
|
617
620
|
end
|
618
621
|
|
619
622
|
# @return The index of the current node in the children list of the parent
|
@@ -656,7 +659,7 @@ module PEROBS
|
|
656
659
|
|
657
660
|
# This method takes care of adjusting the offsets in tree in case elements
|
658
661
|
# were inserted or removed. All nodes that hold children after the
|
659
|
-
# insert/remove operation
|
662
|
+
# insert/remove operation need to be adjusted. Since child nodes get their
|
660
663
|
# offsets via their parents, only the parent node and the direct ancestor
|
661
664
|
# followers need to be adjusted.
|
662
665
|
# @param after_child [BigArrayNode] specifies the modified leaf node
|
@@ -910,7 +913,7 @@ module PEROBS
|
|
910
913
|
# Root Node +--------------------------------+
|
911
914
|
# Offsets | 0 11 |
|
912
915
|
# Children | |
|
913
|
-
#
|
916
|
+
# pred v child v
|
914
917
|
# Level 1 +--------------------------++--------------------------+
|
915
918
|
# Offsets | 0 4 7 || 0 2 5 |
|
916
919
|
# Children | | | | | |
|
@@ -922,8 +925,9 @@ module PEROBS
|
|
922
925
|
#
|
923
926
|
# Remove the last predecessor offset and update the child offset with
|
924
927
|
# it
|
925
|
-
delta =
|
926
|
-
@offsets[child_index]
|
928
|
+
delta = pred.children.last.values_count
|
929
|
+
@offsets[child_index] -= delta
|
930
|
+
pred.offsets.pop
|
927
931
|
# Adjust all the offsets of the child
|
928
932
|
child.offsets.map! { |o| o += delta }
|
929
933
|
# And prepend the 0 offset
|
data/lib/perobs/BigHash.rb
CHANGED
@@ -68,7 +68,7 @@ module PEROBS
|
|
68
68
|
class Collisions < PEROBS::Array
|
69
69
|
end
|
70
70
|
|
71
|
-
attr_persist :btree
|
71
|
+
attr_persist :btree
|
72
72
|
|
73
73
|
# Create a new BigHash object.
|
74
74
|
# @param p [Handle] Store handle
|
@@ -76,7 +76,6 @@ module PEROBS
|
|
76
76
|
super(p)
|
77
77
|
restore
|
78
78
|
self.btree = @store.new(PEROBS::BigTree)
|
79
|
-
self.entry_counter = 0
|
80
79
|
end
|
81
80
|
|
82
81
|
def restore
|
@@ -106,7 +105,6 @@ module PEROBS
|
|
106
105
|
end
|
107
106
|
index_to_insert += 1
|
108
107
|
end
|
109
|
-
self.entry_counter += 1 unless overwrite
|
110
108
|
existing_entry[index_to_insert] = entry
|
111
109
|
elsif existing_entry.key == key
|
112
110
|
# The existing value is for the identical key. We can safely
|
@@ -119,12 +117,10 @@ module PEROBS
|
|
119
117
|
array_entry << existing_entry
|
120
118
|
array_entry << entry
|
121
119
|
@btree.insert(hashed_key, array_entry)
|
122
|
-
self.entry_counter += 1
|
123
120
|
end
|
124
121
|
else
|
125
122
|
# No existing entry. Insert the new entry.
|
126
123
|
@btree.insert(hashed_key, entry)
|
127
|
-
self.entry_counter += 1
|
128
124
|
end
|
129
125
|
end
|
130
126
|
end
|
@@ -170,6 +166,8 @@ module PEROBS
|
|
170
166
|
false
|
171
167
|
end
|
172
168
|
|
169
|
+
alias include? has_key?
|
170
|
+
|
173
171
|
# Delete and return the entry for the given key. Return nil if no matching
|
174
172
|
# entry exists.
|
175
173
|
# @param key [Integer or String]
|
@@ -183,7 +181,6 @@ module PEROBS
|
|
183
181
|
if entry.is_a?(PEROBS::Array)
|
184
182
|
entry.each_with_index do |ae, i|
|
185
183
|
if ae.key == key
|
186
|
-
self.entry_counter -= 1
|
187
184
|
return entry.delete_at(i).value
|
188
185
|
end
|
189
186
|
end
|
@@ -197,7 +194,7 @@ module PEROBS
|
|
197
194
|
# Return the number of entries stored in the hash.
|
198
195
|
# @return [Integer]
|
199
196
|
def length
|
200
|
-
@entry_counter
|
197
|
+
@btree.entry_counter
|
201
198
|
end
|
202
199
|
|
203
200
|
alias size length
|
@@ -205,7 +202,7 @@ module PEROBS
|
|
205
202
|
# Return true if hash is empty. False otherweise.
|
206
203
|
# @return [TrueClass, FalseClass]
|
207
204
|
def empty?
|
208
|
-
@entry_counter == 0
|
205
|
+
@btree.entry_counter == 0
|
209
206
|
end
|
210
207
|
|
211
208
|
# Calls the given block for each key/value pair.
|
@@ -213,9 +210,9 @@ module PEROBS
|
|
213
210
|
def each(&block)
|
214
211
|
@btree.each do |index, entry|
|
215
212
|
if entry.is_a?(Collisions)
|
216
|
-
break
|
213
|
+
break if entry.each do |c_entry|
|
217
214
|
yield(c_entry.key, c_entry.value)
|
218
|
-
end
|
215
|
+
end.nil?
|
219
216
|
else
|
220
217
|
yield(entry.key, entry.value)
|
221
218
|
end
|
@@ -234,20 +231,7 @@ module PEROBS
|
|
234
231
|
# Check if the data structure contains any errors.
|
235
232
|
# @return [Boolean] true if no erros were found, false otherwise
|
236
233
|
def check
|
237
|
-
return
|
238
|
-
|
239
|
-
i = 0
|
240
|
-
each do |k, v|
|
241
|
-
i += 1
|
242
|
-
end
|
243
|
-
|
244
|
-
unless @entry_counter == i
|
245
|
-
PEROBS.log.error "BigHash contains #{i} values but entry counter " +
|
246
|
-
"is #{@entry_counter}"
|
247
|
-
return false
|
248
|
-
end
|
249
|
-
|
250
|
-
true
|
234
|
+
return @btree.check
|
251
235
|
end
|
252
236
|
|
253
237
|
private
|
data/lib/perobs/BigTree.rb
CHANGED
@@ -140,7 +140,7 @@ module PEROBS
|
|
140
140
|
def each(&block)
|
141
141
|
node = @first_leaf
|
142
142
|
while node
|
143
|
-
node.each_element(&block)
|
143
|
+
break if node.each_element(&block).nil?
|
144
144
|
node = node.next_sibling
|
145
145
|
end
|
146
146
|
end
|
@@ -166,6 +166,19 @@ module PEROBS
|
|
166
166
|
# @return [Boolean] true if no erros were found, false otherwise
|
167
167
|
def check(&block)
|
168
168
|
@root.check(&block)
|
169
|
+
|
170
|
+
i = 0
|
171
|
+
each do |k, v|
|
172
|
+
i += 1
|
173
|
+
end
|
174
|
+
|
175
|
+
unless @entry_counter == i
|
176
|
+
PEROBS.log.error "BigTree contains #{i} values but entry counter " +
|
177
|
+
"is #{@entry_counter}"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
169
182
|
end
|
170
183
|
|
171
184
|
# Gather some statistics regarding the tree structure.
|
data/lib/perobs/BigTreeNode.rb
CHANGED
@@ -227,7 +227,7 @@ module PEROBS
|
|
227
227
|
# Iterate over all the key/value pairs of the node.
|
228
228
|
# @yield [key, value]
|
229
229
|
def each_element
|
230
|
-
return unless is_leaf?
|
230
|
+
return self unless is_leaf?
|
231
231
|
|
232
232
|
0.upto(@keys.length - 1) do |i|
|
233
233
|
yield(@keys[i], @values[i])
|
@@ -237,7 +237,7 @@ module PEROBS
|
|
237
237
|
# Iterate over all the key/value pairs of the node in reverse order.
|
238
238
|
# @yield [key, value]
|
239
239
|
def reverse_each_element
|
240
|
-
return unless is_leaf?
|
240
|
+
return self unless is_leaf?
|
241
241
|
|
242
242
|
(@keys.length - 1).downto(0) do |i|
|
243
243
|
yield(@keys[i], @values[i])
|
data/lib/perobs/Cache.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = Cache.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2015, 2016, 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -66,10 +66,10 @@ module PEROBS
|
|
66
66
|
def cache_write(obj)
|
67
67
|
# This is just a safety check. It can probably be disabled in the future
|
68
68
|
# to increase performance.
|
69
|
-
if obj.respond_to?(:is_poxreference?)
|
70
|
-
|
71
|
-
|
72
|
-
end
|
69
|
+
#if obj.respond_to?(:is_poxreference?)
|
70
|
+
# # If this condition triggers, we have a bug in the library.
|
71
|
+
# PEROBS.log.fatal "POXReference objects should never be cached"
|
72
|
+
#end
|
73
73
|
|
74
74
|
if @transaction_stack.empty?
|
75
75
|
# We are not in transaction mode.
|
@@ -93,6 +93,31 @@ module PEROBS
|
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
|
+
# Evict the object with the given ID from the cache.
|
97
|
+
# @param id [Integer] ID of the cached PEROBS::ObjectBase
|
98
|
+
# @return [True/False] True if object was stored in the cache. False
|
99
|
+
# otherwise.
|
100
|
+
def evict(id)
|
101
|
+
unless @transaction_stack.empty?
|
102
|
+
PEROBS.log.fatal "You cannot evict entries during a transaction."
|
103
|
+
end
|
104
|
+
|
105
|
+
idx = id & @mask
|
106
|
+
# The index is just a hash. We still need to check if the object IDs are
|
107
|
+
# actually the same before we can return the object.
|
108
|
+
if (obj = @writes[idx]) && obj._id == id
|
109
|
+
# The object is in the write cache.
|
110
|
+
@writes[idx] = nil
|
111
|
+
return true
|
112
|
+
elsif (obj = @reads[idx]) && obj._id == id
|
113
|
+
# The object is in the read cache.
|
114
|
+
@reads[idx] = nil
|
115
|
+
return true
|
116
|
+
end
|
117
|
+
|
118
|
+
false
|
119
|
+
end
|
120
|
+
|
96
121
|
# Return the PEROBS::Object with the specified ID or nil if not found.
|
97
122
|
# @param id [Integer] ID of the cached PEROBS::ObjectBase
|
98
123
|
def object_by_id(id)
|
@@ -160,7 +185,7 @@ module PEROBS
|
|
160
185
|
transactions = @transaction_stack.pop
|
161
186
|
# Merge the two lists
|
162
187
|
@transaction_stack.push(@transaction_stack.pop + transactions)
|
163
|
-
# Ensure that each object is only included once in the list.
|
188
|
+
# Ensure that each object ID is only included once in the list.
|
164
189
|
@transaction_stack.last.uniq!
|
165
190
|
end
|
166
191
|
end
|
data/lib/perobs/EquiBlobsFile.rb
CHANGED
@@ -40,7 +40,10 @@ module PEROBS
|
|
40
40
|
# bytes long header that stores the total entry count, the total space
|
41
41
|
# count, the offset of the first entry and the offset of the first space.
|
42
42
|
# The header is followed by a custom entry section. Each entry is also 8
|
43
|
-
# bytes long. After the custom entry section the data blobs start.
|
43
|
+
# bytes long. After the custom entry section the data blobs start. Each data
|
44
|
+
# blob starts with a mark byte that indicates if the blob is valid data (2),
|
45
|
+
# a free space (0) or reseved space (1). Then it is followed by @entry_bytes
|
46
|
+
# number of bytes for the data blob.
|
44
47
|
class EquiBlobsFile
|
45
48
|
|
46
49
|
TOTAL_ENTRIES_OFFSET = 0
|
@@ -501,6 +504,7 @@ module PEROBS
|
|
501
504
|
next_offset = address_to_offset(1)
|
502
505
|
total_entries = 0
|
503
506
|
total_spaces = 0
|
507
|
+
last_entry_is_space = false
|
504
508
|
@progressmeter.start("Checking #{@name} entries",
|
505
509
|
@total_spaces + @total_entries) do |pm|
|
506
510
|
begin
|
@@ -511,6 +515,7 @@ module PEROBS
|
|
511
515
|
case marker
|
512
516
|
when 0
|
513
517
|
total_spaces += 1
|
518
|
+
last_entry_is_space = true
|
514
519
|
when 1
|
515
520
|
PEROBS.log.error "Entry at address " +
|
516
521
|
"#{offset_to_address(next_offset)} in EquiBlobsFile " +
|
@@ -518,6 +523,7 @@ module PEROBS
|
|
518
523
|
return false
|
519
524
|
when 2
|
520
525
|
total_entries += 1
|
526
|
+
last_entry_is_space = false
|
521
527
|
else
|
522
528
|
PEROBS.log.error "Entry at address " +
|
523
529
|
"#{offset_to_address(next_offset)} in EquiBlobsFile " +
|
@@ -535,6 +541,11 @@ module PEROBS
|
|
535
541
|
end
|
536
542
|
end
|
537
543
|
|
544
|
+
if last_entry_is_space
|
545
|
+
PEROBS.log.error "EquiBlobsFile #{@file_name} is not properly trimmed"
|
546
|
+
return false
|
547
|
+
end
|
548
|
+
|
538
549
|
unless total_spaces == @total_spaces
|
539
550
|
PEROBS.log.error "Mismatch between space counter and spaces in " +
|
540
551
|
"EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
|
data/lib/perobs/FlatFile.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFile.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -31,6 +31,7 @@ require 'perobs/Log'
|
|
31
31
|
require 'perobs/FlatFileBlobHeader'
|
32
32
|
require 'perobs/BTree'
|
33
33
|
require 'perobs/SpaceTree'
|
34
|
+
require 'perobs/SpaceManager'
|
34
35
|
require 'perobs/IDList'
|
35
36
|
|
36
37
|
module PEROBS
|
@@ -51,7 +52,14 @@ module PEROBS
|
|
51
52
|
@f = nil
|
52
53
|
@marks = nil
|
53
54
|
@index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
|
54
|
-
|
55
|
+
old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
|
56
|
+
if File.exist?(old_spaces_file)
|
57
|
+
# PEROBS version 4.1.0 and earlier used this space list format. It is
|
58
|
+
# deprecated now. Newly created DBs use the SpaceManager format.
|
59
|
+
@space_list = SpaceTree.new(@db_dir, @progressmeter)
|
60
|
+
else
|
61
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
62
|
+
end
|
55
63
|
end
|
56
64
|
|
57
65
|
# Open the flat file for reading and writing.
|
@@ -134,7 +142,7 @@ module PEROBS
|
|
134
142
|
end
|
135
143
|
|
136
144
|
# Delete all unmarked objects.
|
137
|
-
def delete_unmarked_objects
|
145
|
+
def delete_unmarked_objects(&block)
|
138
146
|
# We don't update the index and the space list during this operation as
|
139
147
|
# we defragmentize the blob file at the end. We'll end the operation
|
140
148
|
# with an empty space list.
|
@@ -145,6 +153,7 @@ module PEROBS
|
|
145
153
|
each_blob_header do |header|
|
146
154
|
if header.is_valid? && !@marks.include?(header.id)
|
147
155
|
delete_obj_by_address(header.addr, header.id)
|
156
|
+
yield(header.id) if block_given?
|
148
157
|
deleted_objects_count += 1
|
149
158
|
end
|
150
159
|
|
@@ -183,12 +192,14 @@ module PEROBS
|
|
183
192
|
# performance impact of compression is not compensated by writing
|
184
193
|
# less data to the storage.
|
185
194
|
compressed = false
|
186
|
-
|
195
|
+
raw_obj_bytesize = raw_obj.bytesize
|
196
|
+
if raw_obj_bytesize > 256
|
187
197
|
raw_obj = Zlib.deflate(raw_obj)
|
198
|
+
raw_obj_bytesize = raw_obj.bytesize
|
188
199
|
compressed = true
|
189
200
|
end
|
190
201
|
|
191
|
-
addr, length = find_free_blob(
|
202
|
+
addr, length = find_free_blob(raw_obj_bytesize)
|
192
203
|
begin
|
193
204
|
if length != -1
|
194
205
|
# Just a safeguard so we don't overwrite current data.
|
@@ -197,8 +208,8 @@ module PEROBS
|
|
197
208
|
PEROBS.log.fatal "Length in free list (#{length}) and header " +
|
198
209
|
"(#{header.length}) for address #{addr} don't match."
|
199
210
|
end
|
200
|
-
if
|
201
|
-
PEROBS.log.fatal "Object (#{
|
211
|
+
if raw_obj_bytesize > header.length
|
212
|
+
PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
|
202
213
|
"blob space (#{header.length})."
|
203
214
|
end
|
204
215
|
if header.is_valid?
|
@@ -208,19 +219,19 @@ module PEROBS
|
|
208
219
|
end
|
209
220
|
flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
|
210
221
|
flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
|
211
|
-
FlatFileBlobHeader.new(@f, addr, flags,
|
222
|
+
FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
|
212
223
|
@f.write(raw_obj)
|
213
|
-
if length != -1 &&
|
224
|
+
if length != -1 && raw_obj_bytesize < length
|
214
225
|
# The new object was not appended and it did not completely fill the
|
215
226
|
# free space. So we have to write a new header to mark the remaining
|
216
227
|
# empty space.
|
217
|
-
unless length -
|
228
|
+
unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
|
218
229
|
PEROBS.log.fatal "Not enough space to append the empty space " +
|
219
|
-
"header (space: #{length} bytes, object: #{
|
230
|
+
"header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
|
220
231
|
"bytes)."
|
221
232
|
end
|
222
233
|
space_address = @f.pos
|
223
|
-
space_length = length - FlatFileBlobHeader::LENGTH -
|
234
|
+
space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
|
224
235
|
FlatFileBlobHeader.new(@f, space_address, 0, space_length,
|
225
236
|
0, 0).write
|
226
237
|
# Register the new space with the space list.
|
@@ -461,6 +472,7 @@ module PEROBS
|
|
461
472
|
new_index.open
|
462
473
|
|
463
474
|
corrupted_blobs = 0
|
475
|
+
end_of_last_healthy_blob = nil
|
464
476
|
@progressmeter.start('Checking blobs file', @f.size) do |pm|
|
465
477
|
corrupted_blobs = each_blob_header do |header|
|
466
478
|
if header.is_valid?
|
@@ -529,12 +541,26 @@ module PEROBS
|
|
529
541
|
# ID is unique so far. Add it to the shadow index.
|
530
542
|
new_index.insert(header.id, header.addr)
|
531
543
|
end
|
532
|
-
|
533
544
|
end
|
545
|
+
end_of_last_healthy_blob = header.addr +
|
546
|
+
FlatFileBlobHeader::LENGTH + header.length
|
534
547
|
|
535
548
|
pm.update(header.addr)
|
536
549
|
end
|
537
550
|
|
551
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
552
|
+
# The blob file ends with a corrupted blob header.
|
553
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
554
|
+
'bytes found at the end of FlatFile.'
|
555
|
+
corrupted_blobs += 1
|
556
|
+
if repair
|
557
|
+
PEROBS.log.error "Truncating FlatFile to " +
|
558
|
+
"#{end_of_last_healthy_blob} bytes by discarding " +
|
559
|
+
"#{@f.size - end_of_last_healthy_blob} bytes"
|
560
|
+
@f.truncate(end_of_last_healthy_blob)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
538
564
|
errors += corrupted_blobs
|
539
565
|
end
|
540
566
|
|
@@ -546,7 +572,7 @@ module PEROBS
|
|
546
572
|
erase_index_files
|
547
573
|
defragmentize
|
548
574
|
regenerate_index_and_spaces
|
549
|
-
|
575
|
+
elsif corrupted_blobs == 0
|
550
576
|
# Now we check the index data. It must be correct and the entries must
|
551
577
|
# match the blob file. All entries in the index must be in the blob file
|
552
578
|
# and vise versa.
|
@@ -575,6 +601,141 @@ module PEROBS
|
|
575
601
|
errors
|
576
602
|
end
|
577
603
|
|
604
|
+
# Repair the FlatFile. In contrast to the repair functionality in the
|
605
|
+
# check() method this method is much faster. It simply re-creates the
|
606
|
+
# index and space list from the blob file.
|
607
|
+
# @param repair [Boolean] True if errors should be fixed.
|
608
|
+
# @return [Integer] Number of errors found
|
609
|
+
def repair
|
610
|
+
errors = 0
|
611
|
+
return errors unless @f
|
612
|
+
|
613
|
+
t = Time.now
|
614
|
+
PEROBS.log.info "Repairing FlatFile database"
|
615
|
+
|
616
|
+
# Erase and re-open the index and space list files. We purposely don't
|
617
|
+
# close the files at it would trigger needless flushing.
|
618
|
+
clear_index_files(true)
|
619
|
+
|
620
|
+
# Now we scan the blob file and re-index all blobs and spaces. Corrupted
|
621
|
+
# blobs will be skipped.
|
622
|
+
corrupted_blobs = 0
|
623
|
+
end_of_last_healthy_blob = nil
|
624
|
+
@progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
|
625
|
+
corrupted_blobs = each_blob_header do |header|
|
626
|
+
if header.corruption_start
|
627
|
+
# The blob is preceeded by a corrupted area. We create a new
|
628
|
+
# header of a deleted blob for this area and write the new blob
|
629
|
+
# over it.
|
630
|
+
if (data_length = header.addr - header.corruption_start -
|
631
|
+
FlatFileBlobHeader::LENGTH) <= 0
|
632
|
+
PEROBS.log.error "Found a corrupted blob that is too small to " +
|
633
|
+
"fit a header (#{data_length}). File must be defragmented."
|
634
|
+
else
|
635
|
+
new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
|
636
|
+
0, data_length, 0, 0)
|
637
|
+
new_header.write
|
638
|
+
@space_list.add_space(header.corruption_start, data_length)
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
if header.is_valid?
|
643
|
+
# We have a non-deleted entry.
|
644
|
+
begin
|
645
|
+
@f.seek(header.addr + FlatFileBlobHeader::LENGTH)
|
646
|
+
buf = @f.read(header.length)
|
647
|
+
if buf.bytesize != header.length
|
648
|
+
PEROBS.log.error "Premature end of file in blob with ID " +
|
649
|
+
"#{header.id}."
|
650
|
+
discard_damaged_blob(header)
|
651
|
+
errors += 1
|
652
|
+
next
|
653
|
+
end
|
654
|
+
|
655
|
+
# Uncompress the data if the compression bit is set in the mark
|
656
|
+
# byte.
|
657
|
+
if header.is_compressed?
|
658
|
+
begin
|
659
|
+
buf = Zlib.inflate(buf)
|
660
|
+
rescue Zlib::BufError, Zlib::DataError
|
661
|
+
PEROBS.log.error "Corrupted compressed block with ID " +
|
662
|
+
"#{header.id} found."
|
663
|
+
discard_damaged_blob(header)
|
664
|
+
errors += 1
|
665
|
+
next
|
666
|
+
end
|
667
|
+
end
|
668
|
+
|
669
|
+
if header.crc && checksum(buf) != header.crc
|
670
|
+
PEROBS.log.error "Checksum failure while checking blob " +
|
671
|
+
"with ID #{header.id}"
|
672
|
+
discard_damaged_blob(header)
|
673
|
+
errors += 1
|
674
|
+
next
|
675
|
+
end
|
676
|
+
rescue IOError => e
|
677
|
+
PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
|
678
|
+
e.message
|
679
|
+
end
|
680
|
+
|
681
|
+
# Check if the ID has already been found in the file.
|
682
|
+
if (previous_address = @index.get(header.id))
|
683
|
+
PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
|
684
|
+
"Addresses: #{previous_address}, #{header.addr}"
|
685
|
+
errors += 1
|
686
|
+
previous_header = FlatFileBlobHeader.read(@f, previous_address,
|
687
|
+
header.id)
|
688
|
+
# We have two blobs with the same ID and we must discard one of
|
689
|
+
# them.
|
690
|
+
if header.is_outdated?
|
691
|
+
discard_damaged_blob(header)
|
692
|
+
elsif previous_header.is_outdated?
|
693
|
+
discard_damaged_blob(previous_header)
|
694
|
+
else
|
695
|
+
PEROBS.log.error "None of the blobs with same ID have " +
|
696
|
+
"the outdated flag set. Deleting the smaller one."
|
697
|
+
errors += 1
|
698
|
+
discard_damaged_blob(header.length < previous_header.length ?
|
699
|
+
header : previous_header)
|
700
|
+
end
|
701
|
+
else
|
702
|
+
# ID is unique so far. Add it to the shadow index.
|
703
|
+
@index.insert(header.id, header.addr)
|
704
|
+
end
|
705
|
+
|
706
|
+
else
|
707
|
+
if header.length > 0
|
708
|
+
@space_list.add_space(header.addr, header.length)
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end_of_last_healthy_blob = header.addr +
|
712
|
+
FlatFileBlobHeader::LENGTH + header.length
|
713
|
+
|
714
|
+
pm.update(header.addr)
|
715
|
+
end
|
716
|
+
|
717
|
+
if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
|
718
|
+
# The blob file ends with a corrupted blob header.
|
719
|
+
PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
|
720
|
+
'bytes found at the end of FlatFile.'
|
721
|
+
corrupted_blobs += 1
|
722
|
+
|
723
|
+
PEROBS.log.error "Truncating FlatFile to " +
|
724
|
+
"#{end_of_last_healthy_blob} bytes by discarding " +
|
725
|
+
"#{@f.size - end_of_last_healthy_blob} bytes"
|
726
|
+
@f.truncate(end_of_last_healthy_blob)
|
727
|
+
end
|
728
|
+
|
729
|
+
errors += corrupted_blobs
|
730
|
+
end
|
731
|
+
|
732
|
+
sync
|
733
|
+
PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
|
734
|
+
"#{errors} errors found."
|
735
|
+
|
736
|
+
errors
|
737
|
+
end
|
738
|
+
|
578
739
|
# This method clears the index tree and the free space list and
|
579
740
|
# regenerates them from the FlatFile.
|
580
741
|
def regenerate_index_and_spaces
|
@@ -617,7 +778,11 @@ module PEROBS
|
|
617
778
|
end
|
618
779
|
|
619
780
|
def has_id_at?(id, address)
|
620
|
-
|
781
|
+
begin
|
782
|
+
header = FlatFileBlobHeader.read(@f, address)
|
783
|
+
rescue PEROBS::FatalError
|
784
|
+
return false
|
785
|
+
end
|
621
786
|
header.is_valid? && header.id == id
|
622
787
|
end
|
623
788
|
|
@@ -733,7 +898,7 @@ module PEROBS
|
|
733
898
|
unless @space_list.has_space?(header.addr, header.length)
|
734
899
|
PEROBS.log.error "FlatFile has free space " +
|
735
900
|
"(addr: #{header.addr}, len: #{header.length}) that is " +
|
736
|
-
"not in
|
901
|
+
"not in SpaceManager"
|
737
902
|
errors += 1
|
738
903
|
end
|
739
904
|
end
|
@@ -767,49 +932,36 @@ module PEROBS
|
|
767
932
|
@index.open(abort_on_missing_files)
|
768
933
|
@space_list.open
|
769
934
|
rescue FatalError
|
770
|
-
|
771
|
-
@index.close
|
772
|
-
# Erase it completely
|
773
|
-
@index.erase
|
774
|
-
# Then create it again.
|
775
|
-
@index.open
|
776
|
-
|
777
|
-
# Ensure that the spaces list is really closed.
|
778
|
-
@space_list.close
|
779
|
-
# Erase it completely
|
780
|
-
@space_list.erase
|
781
|
-
# Then create it again
|
782
|
-
@space_list.open
|
783
|
-
|
935
|
+
clear_index_files
|
784
936
|
regenerate_index_and_spaces
|
785
937
|
end
|
786
938
|
end
|
787
939
|
|
788
|
-
def erase_index_files
|
940
|
+
def erase_index_files(dont_close_files = false)
|
789
941
|
# Ensure that the index is really closed.
|
790
|
-
@index.close
|
942
|
+
@index.close unless dont_close_files
|
791
943
|
# Erase it completely
|
792
944
|
@index.erase
|
793
945
|
|
794
946
|
# Ensure that the spaces list is really closed.
|
795
|
-
@space_list.close
|
947
|
+
@space_list.close unless dont_close_files
|
796
948
|
# Erase it completely
|
797
949
|
@space_list.erase
|
950
|
+
|
951
|
+
if @space_list.is_a?(SpaceTree)
|
952
|
+
# If we still use the old SpaceTree format, this is the moment to
|
953
|
+
# convert it to the new SpaceManager format.
|
954
|
+
@space_list = SpaceManager.new(@db_dir, @progressmeter)
|
955
|
+
PEROBS.log.warn "Converting space list from SpaceTree format " +
|
956
|
+
"to SpaceManager format"
|
957
|
+
end
|
798
958
|
end
|
799
959
|
|
800
|
-
def clear_index_files
|
801
|
-
|
802
|
-
@index.close
|
803
|
-
# Erase it completely
|
804
|
-
@index.erase
|
805
|
-
# Then create it again.
|
806
|
-
@index.open
|
960
|
+
def clear_index_files(dont_close_files = false)
|
961
|
+
erase_index_files(dont_close_files)
|
807
962
|
|
808
|
-
#
|
809
|
-
@
|
810
|
-
# Erase it completely
|
811
|
-
@space_list.erase
|
812
|
-
# Then create it again
|
963
|
+
# Then create them again.
|
964
|
+
@index.open
|
813
965
|
@space_list.open
|
814
966
|
end
|
815
967
|
|