perobs 4.1.0 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -67,6 +67,8 @@ module PEROBS
67
67
  get_node.respond_to?(method)
68
68
  end
69
69
 
70
+ # Directly define some commonly used methods to avoid the method_missing
71
+ # overhead.
70
72
  def is_leaf
71
73
  get_node.is_leaf
72
74
  end
@@ -91,10 +93,18 @@ module PEROBS
91
93
  get_node.search_key_index(key)
92
94
  end
93
95
 
96
+ def insert(key, value)
97
+ get_node.insert(key, value)
98
+ end
99
+
94
100
  def insert_element(key, voc)
95
101
  get_node.insert_element(key, voc)
96
102
  end
97
103
 
104
+ def split_node
105
+ get_node.split_node
106
+ end
107
+
98
108
  # Compare this node to another node.
99
109
  # @return [Boolean] true if node address is identical, false otherwise
100
110
  def ==(node)
@@ -123,7 +123,11 @@ module PEROBS
123
123
  # @param index [Integer] Position in the array
124
124
  # @return [Integer or nil] found value or nil
125
125
  def [](index)
126
- index = validate_index_range(index)
126
+ begin
127
+ index = validate_index_range(index)
128
+ rescue IndexError
129
+ return nil
130
+ end
127
131
 
128
132
  return nil if index >= @entry_counter
129
133
 
@@ -190,6 +194,20 @@ module PEROBS
190
194
  @entry_counter == 0
191
195
  end
192
196
 
197
+ # Return the first entry of the Array.
198
+ def first
199
+ return nil unless @first_leaf
200
+
201
+ @first_leaf.values.first
202
+ end
203
+
204
+ # Return the last entry of the Array.
205
+ def last
206
+ return nil unless @last_leaf
207
+
208
+ @last_leaf.values.last
209
+ end
210
+
193
211
  # Iterate over all entries in the tree. Entries are always sorted by the
194
212
  # key.
195
213
  # @yield [key, value]
@@ -147,7 +147,9 @@ module PEROBS
147
147
  else
148
148
  # Descend into the right child node to add the value to.
149
149
  cidx = node.search_child_index(index)
150
- index -= node.offsets[cidx]
150
+ if (index -= node.offsets[cidx]) < 0
151
+ node.fatal "Index (#{index}) became negative"
152
+ end
151
153
  node = node.children[cidx]
152
154
  end
153
155
  end
@@ -392,7 +394,7 @@ module PEROBS
392
394
  @offsets.each_with_index do |offset, i|
393
395
  if i > 0
394
396
  if offset < last_offset
395
- error "Offset are not strictly monotoneously " +
397
+ error "Offsets are not strictly monotoneously " +
396
398
  "increasing: #{@offsets.inspect}"
397
399
  return false
398
400
  end
@@ -471,11 +473,12 @@ module PEROBS
471
473
  else
472
474
  begin
473
475
  if node.is_leaf?
474
- if node.values[position - 1]
476
+ if position <= node.size
475
477
  str += "#{node.tree_prefix} " +
476
478
  "#{position == node.size ? '-' : '|'} " +
477
479
  "[ #{node.value_index(position - 1)}: " +
478
- "#{node.values[position - 1]} ]\n"
480
+ "#{node.values[position - 1].nil? ?
481
+ 'nil' : node.values[position - 1]} ]\n"
479
482
  end
480
483
  end
481
484
  rescue => e
@@ -613,7 +616,7 @@ module PEROBS
613
616
  # Handle special case for empty offsets list.
614
617
  return 0 if @offsets.empty? || offset <= @offsets.first
615
618
 
616
- (@offsets.bsearch_index { |o| o >= offset } || @offsets.length) - 1
619
+ (@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
617
620
  end
618
621
 
619
622
  # @return The index of the current node in the children list of the parent
@@ -656,7 +659,7 @@ module PEROBS
656
659
 
657
660
  # This method takes care of adjusting the offsets in tree in case elements
658
661
  # were inserted or removed. All nodes that hold children after the
659
- # insert/remove operation needs to be adjusted. Since child nodes get their
662
+ # insert/remove operation need to be adjusted. Since child nodes get their
660
663
  # offsets via their parents, only the parent node and the direct ancestor
661
664
  # followers need to be adjusted.
662
665
  # @param after_child [BigArrayNode] specifies the modified leaf node
@@ -910,7 +913,7 @@ module PEROBS
910
913
  # Root Node +--------------------------------+
911
914
  # Offsets | 0 11 |
912
915
  # Children | |
913
- # prepd v child v
916
+ # pred v child v
914
917
  # Level 1 +--------------------------++--------------------------+
915
918
  # Offsets | 0 4 7 || 0 2 5 |
916
919
  # Children | | | | | |
@@ -922,8 +925,9 @@ module PEROBS
922
925
  #
923
926
  # Remove the last predecessor offset and update the child offset with
924
927
  # it
925
- delta = @offsets[child_index] - pred.offsets.last
926
- @offsets[child_index] = pred.offsets.pop
928
+ delta = pred.children.last.values_count
929
+ @offsets[child_index] -= delta
930
+ pred.offsets.pop
927
931
  # Adjust all the offsets of the child
928
932
  child.offsets.map! { |o| o += delta }
929
933
  # And prepend the 0 offset
@@ -68,7 +68,7 @@ module PEROBS
68
68
  class Collisions < PEROBS::Array
69
69
  end
70
70
 
71
- attr_persist :btree, :entry_counter
71
+ attr_persist :btree
72
72
 
73
73
  # Create a new BigHash object.
74
74
  # @param p [Handle] Store handle
@@ -76,7 +76,6 @@ module PEROBS
76
76
  super(p)
77
77
  restore
78
78
  self.btree = @store.new(PEROBS::BigTree)
79
- self.entry_counter = 0
80
79
  end
81
80
 
82
81
  def restore
@@ -106,7 +105,6 @@ module PEROBS
106
105
  end
107
106
  index_to_insert += 1
108
107
  end
109
- self.entry_counter += 1 unless overwrite
110
108
  existing_entry[index_to_insert] = entry
111
109
  elsif existing_entry.key == key
112
110
  # The existing value is for the identical key. We can safely
@@ -119,12 +117,10 @@ module PEROBS
119
117
  array_entry << existing_entry
120
118
  array_entry << entry
121
119
  @btree.insert(hashed_key, array_entry)
122
- self.entry_counter += 1
123
120
  end
124
121
  else
125
122
  # No existing entry. Insert the new entry.
126
123
  @btree.insert(hashed_key, entry)
127
- self.entry_counter += 1
128
124
  end
129
125
  end
130
126
  end
@@ -170,6 +166,8 @@ module PEROBS
170
166
  false
171
167
  end
172
168
 
169
+ alias include? has_key?
170
+
173
171
  # Delete and return the entry for the given key. Return nil if no matching
174
172
  # entry exists.
175
173
  # @param key [Integer or String]
@@ -183,7 +181,6 @@ module PEROBS
183
181
  if entry.is_a?(PEROBS::Array)
184
182
  entry.each_with_index do |ae, i|
185
183
  if ae.key == key
186
- self.entry_counter -= 1
187
184
  return entry.delete_at(i).value
188
185
  end
189
186
  end
@@ -197,7 +194,7 @@ module PEROBS
197
194
  # Return the number of entries stored in the hash.
198
195
  # @return [Integer]
199
196
  def length
200
- @entry_counter
197
+ @btree.entry_counter
201
198
  end
202
199
 
203
200
  alias size length
@@ -205,7 +202,7 @@ module PEROBS
205
202
  # Return true if hash is empty. False otherweise.
206
203
  # @return [TrueClass, FalseClass]
207
204
  def empty?
208
- @entry_counter == 0
205
+ @btree.entry_counter == 0
209
206
  end
210
207
 
211
208
  # Calls the given block for each key/value pair.
@@ -213,9 +210,9 @@ module PEROBS
213
210
  def each(&block)
214
211
  @btree.each do |index, entry|
215
212
  if entry.is_a?(Collisions)
216
- break unless entry.each do |c_entry|
213
+ break if entry.each do |c_entry|
217
214
  yield(c_entry.key, c_entry.value)
218
- end
215
+ end.nil?
219
216
  else
220
217
  yield(entry.key, entry.value)
221
218
  end
@@ -234,20 +231,7 @@ module PEROBS
234
231
  # Check if the data structure contains any errors.
235
232
  # @return [Boolean] true if no erros were found, false otherwise
236
233
  def check
237
- return false unless @btree.check
238
-
239
- i = 0
240
- each do |k, v|
241
- i += 1
242
- end
243
-
244
- unless @entry_counter == i
245
- PEROBS.log.error "BigHash contains #{i} values but entry counter " +
246
- "is #{@entry_counter}"
247
- return false
248
- end
249
-
250
- true
234
+ return @btree.check
251
235
  end
252
236
 
253
237
  private
@@ -140,7 +140,7 @@ module PEROBS
140
140
  def each(&block)
141
141
  node = @first_leaf
142
142
  while node
143
- node.each_element(&block)
143
+ break if node.each_element(&block).nil?
144
144
  node = node.next_sibling
145
145
  end
146
146
  end
@@ -166,6 +166,19 @@ module PEROBS
166
166
  # @return [Boolean] true if no erros were found, false otherwise
167
167
  def check(&block)
168
168
  @root.check(&block)
169
+
170
+ i = 0
171
+ each do |k, v|
172
+ i += 1
173
+ end
174
+
175
+ unless @entry_counter == i
176
+ PEROBS.log.error "BigTree contains #{i} values but entry counter " +
177
+ "is #{@entry_counter}"
178
+ return false
179
+ end
180
+
181
+ true
169
182
  end
170
183
 
171
184
  # Gather some statistics regarding the tree structure.
@@ -227,7 +227,7 @@ module PEROBS
227
227
  # Iterate over all the key/value pairs of the node.
228
228
  # @yield [key, value]
229
229
  def each_element
230
- return unless is_leaf?
230
+ return self unless is_leaf?
231
231
 
232
232
  0.upto(@keys.length - 1) do |i|
233
233
  yield(@keys[i], @values[i])
@@ -237,7 +237,7 @@ module PEROBS
237
237
  # Iterate over all the key/value pairs of the node in reverse order.
238
238
  # @yield [key, value]
239
239
  def reverse_each_element
240
- return unless is_leaf?
240
+ return self unless is_leaf?
241
241
 
242
242
  (@keys.length - 1).downto(0) do |i|
243
243
  yield(@keys[i], @values[i])
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Cache.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -66,10 +66,10 @@ module PEROBS
66
66
  def cache_write(obj)
67
67
  # This is just a safety check. It can probably be disabled in the future
68
68
  # to increase performance.
69
- if obj.respond_to?(:is_poxreference?)
70
- # If this condition triggers, we have a bug in the library.
71
- PEROBS.log.fatal "POXReference objects should never be cached"
72
- end
69
+ #if obj.respond_to?(:is_poxreference?)
70
+ # # If this condition triggers, we have a bug in the library.
71
+ # PEROBS.log.fatal "POXReference objects should never be cached"
72
+ #end
73
73
 
74
74
  if @transaction_stack.empty?
75
75
  # We are not in transaction mode.
@@ -93,6 +93,31 @@ module PEROBS
93
93
  end
94
94
  end
95
95
 
96
+ # Evict the object with the given ID from the cache.
97
+ # @param id [Integer] ID of the cached PEROBS::ObjectBase
98
+ # @return [True/False] True if object was stored in the cache. False
99
+ # otherwise.
100
+ def evict(id)
101
+ unless @transaction_stack.empty?
102
+ PEROBS.log.fatal "You cannot evict entries during a transaction."
103
+ end
104
+
105
+ idx = id & @mask
106
+ # The index is just a hash. We still need to check if the object IDs are
107
+ # actually the same before we can return the object.
108
+ if (obj = @writes[idx]) && obj._id == id
109
+ # The object is in the write cache.
110
+ @writes[idx] = nil
111
+ return true
112
+ elsif (obj = @reads[idx]) && obj._id == id
113
+ # The object is in the read cache.
114
+ @reads[idx] = nil
115
+ return true
116
+ end
117
+
118
+ false
119
+ end
120
+
96
121
  # Return the PEROBS::Object with the specified ID or nil if not found.
97
122
  # @param id [Integer] ID of the cached PEROBS::ObjectBase
98
123
  def object_by_id(id)
@@ -160,7 +185,7 @@ module PEROBS
160
185
  transactions = @transaction_stack.pop
161
186
  # Merge the two lists
162
187
  @transaction_stack.push(@transaction_stack.pop + transactions)
163
- # Ensure that each object is only included once in the list.
188
+ # Ensure that each object ID is only included once in the list.
164
189
  @transaction_stack.last.uniq!
165
190
  end
166
191
  end
@@ -40,7 +40,10 @@ module PEROBS
40
40
  # bytes long header that stores the total entry count, the total space
41
41
  # count, the offset of the first entry and the offset of the first space.
42
42
  # The header is followed by a custom entry section. Each entry is also 8
43
- # bytes long. After the custom entry section the data blobs start.
43
+ # bytes long. After the custom entry section the data blobs start. Each data
44
+ # blob starts with a mark byte that indicates if the blob is valid data (2),
45
+ # a free space (0) or reseved space (1). Then it is followed by @entry_bytes
46
+ # number of bytes for the data blob.
44
47
  class EquiBlobsFile
45
48
 
46
49
  TOTAL_ENTRIES_OFFSET = 0
@@ -501,6 +504,7 @@ module PEROBS
501
504
  next_offset = address_to_offset(1)
502
505
  total_entries = 0
503
506
  total_spaces = 0
507
+ last_entry_is_space = false
504
508
  @progressmeter.start("Checking #{@name} entries",
505
509
  @total_spaces + @total_entries) do |pm|
506
510
  begin
@@ -511,6 +515,7 @@ module PEROBS
511
515
  case marker
512
516
  when 0
513
517
  total_spaces += 1
518
+ last_entry_is_space = true
514
519
  when 1
515
520
  PEROBS.log.error "Entry at address " +
516
521
  "#{offset_to_address(next_offset)} in EquiBlobsFile " +
@@ -518,6 +523,7 @@ module PEROBS
518
523
  return false
519
524
  when 2
520
525
  total_entries += 1
526
+ last_entry_is_space = false
521
527
  else
522
528
  PEROBS.log.error "Entry at address " +
523
529
  "#{offset_to_address(next_offset)} in EquiBlobsFile " +
@@ -535,6 +541,11 @@ module PEROBS
535
541
  end
536
542
  end
537
543
 
544
+ if last_entry_is_space
545
+ PEROBS.log.error "EquiBlobsFile #{@file_name} is not properly trimmed"
546
+ return false
547
+ end
548
+
538
549
  unless total_spaces == @total_spaces
539
550
  PEROBS.log.error "Mismatch between space counter and spaces in " +
540
551
  "EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = FlatFile.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -31,6 +31,7 @@ require 'perobs/Log'
31
31
  require 'perobs/FlatFileBlobHeader'
32
32
  require 'perobs/BTree'
33
33
  require 'perobs/SpaceTree'
34
+ require 'perobs/SpaceManager'
34
35
  require 'perobs/IDList'
35
36
 
36
37
  module PEROBS
@@ -51,7 +52,14 @@ module PEROBS
51
52
  @f = nil
52
53
  @marks = nil
53
54
  @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
54
- @space_list = SpaceTree.new(@db_dir, @progressmeter)
55
+ old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
56
+ if File.exist?(old_spaces_file)
57
+ # PEROBS version 4.1.0 and earlier used this space list format. It is
58
+ # deprecated now. Newly created DBs use the SpaceManager format.
59
+ @space_list = SpaceTree.new(@db_dir, @progressmeter)
60
+ else
61
+ @space_list = SpaceManager.new(@db_dir, @progressmeter)
62
+ end
55
63
  end
56
64
 
57
65
  # Open the flat file for reading and writing.
@@ -134,7 +142,7 @@ module PEROBS
134
142
  end
135
143
 
136
144
  # Delete all unmarked objects.
137
- def delete_unmarked_objects
145
+ def delete_unmarked_objects(&block)
138
146
  # We don't update the index and the space list during this operation as
139
147
  # we defragmentize the blob file at the end. We'll end the operation
140
148
  # with an empty space list.
@@ -145,6 +153,7 @@ module PEROBS
145
153
  each_blob_header do |header|
146
154
  if header.is_valid? && !@marks.include?(header.id)
147
155
  delete_obj_by_address(header.addr, header.id)
156
+ yield(header.id) if block_given?
148
157
  deleted_objects_count += 1
149
158
  end
150
159
 
@@ -183,12 +192,14 @@ module PEROBS
183
192
  # performance impact of compression is not compensated by writing
184
193
  # less data to the storage.
185
194
  compressed = false
186
- if raw_obj.bytesize > 256
195
+ raw_obj_bytesize = raw_obj.bytesize
196
+ if raw_obj_bytesize > 256
187
197
  raw_obj = Zlib.deflate(raw_obj)
198
+ raw_obj_bytesize = raw_obj.bytesize
188
199
  compressed = true
189
200
  end
190
201
 
191
- addr, length = find_free_blob(raw_obj.bytesize)
202
+ addr, length = find_free_blob(raw_obj_bytesize)
192
203
  begin
193
204
  if length != -1
194
205
  # Just a safeguard so we don't overwrite current data.
@@ -197,8 +208,8 @@ module PEROBS
197
208
  PEROBS.log.fatal "Length in free list (#{length}) and header " +
198
209
  "(#{header.length}) for address #{addr} don't match."
199
210
  end
200
- if raw_obj.bytesize > header.length
201
- PEROBS.log.fatal "Object (#{raw_obj.bytesize}) is longer than " +
211
+ if raw_obj_bytesize > header.length
212
+ PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
202
213
  "blob space (#{header.length})."
203
214
  end
204
215
  if header.is_valid?
@@ -208,19 +219,19 @@ module PEROBS
208
219
  end
209
220
  flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
210
221
  flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
211
- FlatFileBlobHeader.new(@f, addr, flags, raw_obj.bytesize, id, crc).write
222
+ FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
212
223
  @f.write(raw_obj)
213
- if length != -1 && raw_obj.bytesize < length
224
+ if length != -1 && raw_obj_bytesize < length
214
225
  # The new object was not appended and it did not completely fill the
215
226
  # free space. So we have to write a new header to mark the remaining
216
227
  # empty space.
217
- unless length - raw_obj.bytesize >= FlatFileBlobHeader::LENGTH
228
+ unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
218
229
  PEROBS.log.fatal "Not enough space to append the empty space " +
219
- "header (space: #{length} bytes, object: #{raw_obj.bytesize} " +
230
+ "header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
220
231
  "bytes)."
221
232
  end
222
233
  space_address = @f.pos
223
- space_length = length - FlatFileBlobHeader::LENGTH - raw_obj.bytesize
234
+ space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
224
235
  FlatFileBlobHeader.new(@f, space_address, 0, space_length,
225
236
  0, 0).write
226
237
  # Register the new space with the space list.
@@ -461,6 +472,7 @@ module PEROBS
461
472
  new_index.open
462
473
 
463
474
  corrupted_blobs = 0
475
+ end_of_last_healthy_blob = nil
464
476
  @progressmeter.start('Checking blobs file', @f.size) do |pm|
465
477
  corrupted_blobs = each_blob_header do |header|
466
478
  if header.is_valid?
@@ -529,12 +541,26 @@ module PEROBS
529
541
  # ID is unique so far. Add it to the shadow index.
530
542
  new_index.insert(header.id, header.addr)
531
543
  end
532
-
533
544
  end
545
+ end_of_last_healthy_blob = header.addr +
546
+ FlatFileBlobHeader::LENGTH + header.length
534
547
 
535
548
  pm.update(header.addr)
536
549
  end
537
550
 
551
+ if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
552
+ # The blob file ends with a corrupted blob header.
553
+ PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
554
+ 'bytes found at the end of FlatFile.'
555
+ corrupted_blobs += 1
556
+ if repair
557
+ PEROBS.log.error "Truncating FlatFile to " +
558
+ "#{end_of_last_healthy_blob} bytes by discarding " +
559
+ "#{@f.size - end_of_last_healthy_blob} bytes"
560
+ @f.truncate(end_of_last_healthy_blob)
561
+ end
562
+ end
563
+
538
564
  errors += corrupted_blobs
539
565
  end
540
566
 
@@ -546,7 +572,7 @@ module PEROBS
546
572
  erase_index_files
547
573
  defragmentize
548
574
  regenerate_index_and_spaces
549
- else
575
+ elsif corrupted_blobs == 0
550
576
  # Now we check the index data. It must be correct and the entries must
551
577
  # match the blob file. All entries in the index must be in the blob file
552
578
  # and vise versa.
@@ -575,6 +601,141 @@ module PEROBS
575
601
  errors
576
602
  end
577
603
 
604
+ # Repair the FlatFile. In contrast to the repair functionality in the
605
+ # check() method this method is much faster. It simply re-creates the
606
+ # index and space list from the blob file.
607
+ # @param repair [Boolean] True if errors should be fixed.
608
+ # @return [Integer] Number of errors found
609
+ def repair
610
+ errors = 0
611
+ return errors unless @f
612
+
613
+ t = Time.now
614
+ PEROBS.log.info "Repairing FlatFile database"
615
+
616
+ # Erase and re-open the index and space list files. We purposely don't
617
+ # close the files at it would trigger needless flushing.
618
+ clear_index_files(true)
619
+
620
+ # Now we scan the blob file and re-index all blobs and spaces. Corrupted
621
+ # blobs will be skipped.
622
+ corrupted_blobs = 0
623
+ end_of_last_healthy_blob = nil
624
+ @progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
625
+ corrupted_blobs = each_blob_header do |header|
626
+ if header.corruption_start
627
+ # The blob is preceeded by a corrupted area. We create a new
628
+ # header of a deleted blob for this area and write the new blob
629
+ # over it.
630
+ if (data_length = header.addr - header.corruption_start -
631
+ FlatFileBlobHeader::LENGTH) <= 0
632
+ PEROBS.log.error "Found a corrupted blob that is too small to " +
633
+ "fit a header (#{data_length}). File must be defragmented."
634
+ else
635
+ new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
636
+ 0, data_length, 0, 0)
637
+ new_header.write
638
+ @space_list.add_space(header.corruption_start, data_length)
639
+ end
640
+ end
641
+
642
+ if header.is_valid?
643
+ # We have a non-deleted entry.
644
+ begin
645
+ @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
646
+ buf = @f.read(header.length)
647
+ if buf.bytesize != header.length
648
+ PEROBS.log.error "Premature end of file in blob with ID " +
649
+ "#{header.id}."
650
+ discard_damaged_blob(header)
651
+ errors += 1
652
+ next
653
+ end
654
+
655
+ # Uncompress the data if the compression bit is set in the mark
656
+ # byte.
657
+ if header.is_compressed?
658
+ begin
659
+ buf = Zlib.inflate(buf)
660
+ rescue Zlib::BufError, Zlib::DataError
661
+ PEROBS.log.error "Corrupted compressed block with ID " +
662
+ "#{header.id} found."
663
+ discard_damaged_blob(header)
664
+ errors += 1
665
+ next
666
+ end
667
+ end
668
+
669
+ if header.crc && checksum(buf) != header.crc
670
+ PEROBS.log.error "Checksum failure while checking blob " +
671
+ "with ID #{header.id}"
672
+ discard_damaged_blob(header)
673
+ errors += 1
674
+ next
675
+ end
676
+ rescue IOError => e
677
+ PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
678
+ e.message
679
+ end
680
+
681
+ # Check if the ID has already been found in the file.
682
+ if (previous_address = @index.get(header.id))
683
+ PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
684
+ "Addresses: #{previous_address}, #{header.addr}"
685
+ errors += 1
686
+ previous_header = FlatFileBlobHeader.read(@f, previous_address,
687
+ header.id)
688
+ # We have two blobs with the same ID and we must discard one of
689
+ # them.
690
+ if header.is_outdated?
691
+ discard_damaged_blob(header)
692
+ elsif previous_header.is_outdated?
693
+ discard_damaged_blob(previous_header)
694
+ else
695
+ PEROBS.log.error "None of the blobs with same ID have " +
696
+ "the outdated flag set. Deleting the smaller one."
697
+ errors += 1
698
+ discard_damaged_blob(header.length < previous_header.length ?
699
+ header : previous_header)
700
+ end
701
+ else
702
+ # ID is unique so far. Add it to the shadow index.
703
+ @index.insert(header.id, header.addr)
704
+ end
705
+
706
+ else
707
+ if header.length > 0
708
+ @space_list.add_space(header.addr, header.length)
709
+ end
710
+ end
711
+ end_of_last_healthy_blob = header.addr +
712
+ FlatFileBlobHeader::LENGTH + header.length
713
+
714
+ pm.update(header.addr)
715
+ end
716
+
717
+ if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
718
+ # The blob file ends with a corrupted blob header.
719
+ PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
720
+ 'bytes found at the end of FlatFile.'
721
+ corrupted_blobs += 1
722
+
723
+ PEROBS.log.error "Truncating FlatFile to " +
724
+ "#{end_of_last_healthy_blob} bytes by discarding " +
725
+ "#{@f.size - end_of_last_healthy_blob} bytes"
726
+ @f.truncate(end_of_last_healthy_blob)
727
+ end
728
+
729
+ errors += corrupted_blobs
730
+ end
731
+
732
+ sync
733
+ PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
734
+ "#{errors} errors found."
735
+
736
+ errors
737
+ end
738
+
578
739
  # This method clears the index tree and the free space list and
579
740
  # regenerates them from the FlatFile.
580
741
  def regenerate_index_and_spaces
@@ -617,7 +778,11 @@ module PEROBS
617
778
  end
618
779
 
619
780
  def has_id_at?(id, address)
620
- header = FlatFileBlobHeader.read(@f, address)
781
+ begin
782
+ header = FlatFileBlobHeader.read(@f, address)
783
+ rescue PEROBS::FatalError
784
+ return false
785
+ end
621
786
  header.is_valid? && header.id == id
622
787
  end
623
788
 
@@ -733,7 +898,7 @@ module PEROBS
733
898
  unless @space_list.has_space?(header.addr, header.length)
734
899
  PEROBS.log.error "FlatFile has free space " +
735
900
  "(addr: #{header.addr}, len: #{header.length}) that is " +
736
- "not in FreeSpaceManager"
901
+ "not in SpaceManager"
737
902
  errors += 1
738
903
  end
739
904
  end
@@ -767,49 +932,36 @@ module PEROBS
767
932
  @index.open(abort_on_missing_files)
768
933
  @space_list.open
769
934
  rescue FatalError
770
- # Ensure that the index is really closed.
771
- @index.close
772
- # Erase it completely
773
- @index.erase
774
- # Then create it again.
775
- @index.open
776
-
777
- # Ensure that the spaces list is really closed.
778
- @space_list.close
779
- # Erase it completely
780
- @space_list.erase
781
- # Then create it again
782
- @space_list.open
783
-
935
+ clear_index_files
784
936
  regenerate_index_and_spaces
785
937
  end
786
938
  end
787
939
 
788
- def erase_index_files
940
+ def erase_index_files(dont_close_files = false)
789
941
  # Ensure that the index is really closed.
790
- @index.close
942
+ @index.close unless dont_close_files
791
943
  # Erase it completely
792
944
  @index.erase
793
945
 
794
946
  # Ensure that the spaces list is really closed.
795
- @space_list.close
947
+ @space_list.close unless dont_close_files
796
948
  # Erase it completely
797
949
  @space_list.erase
950
+
951
+ if @space_list.is_a?(SpaceTree)
952
+ # If we still use the old SpaceTree format, this is the moment to
953
+ # convert it to the new SpaceManager format.
954
+ @space_list = SpaceManager.new(@db_dir, @progressmeter)
955
+ PEROBS.log.warn "Converting space list from SpaceTree format " +
956
+ "to SpaceManager format"
957
+ end
798
958
  end
799
959
 
800
- def clear_index_files
801
- # Ensure that the index is really closed.
802
- @index.close
803
- # Erase it completely
804
- @index.erase
805
- # Then create it again.
806
- @index.open
960
+ def clear_index_files(dont_close_files = false)
961
+ erase_index_files(dont_close_files)
807
962
 
808
- # Ensure that the spaces list is really closed.
809
- @space_list.close
810
- # Erase it completely
811
- @space_list.erase
812
- # Then create it again
963
+ # Then create them again.
964
+ @index.open
813
965
  @space_list.open
814
966
  end
815
967