perobs 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,6 +67,8 @@ module PEROBS
67
67
  get_node.respond_to?(method)
68
68
  end
69
69
 
70
+ # Directly define some commonly used methods to avoid the method_missing
71
+ # overhead.
70
72
  def is_leaf
71
73
  get_node.is_leaf
72
74
  end
@@ -91,10 +93,18 @@ module PEROBS
91
93
  get_node.search_key_index(key)
92
94
  end
93
95
 
96
+ def insert(key, value)
97
+ get_node.insert(key, value)
98
+ end
99
+
94
100
  def insert_element(key, voc)
95
101
  get_node.insert_element(key, voc)
96
102
  end
97
103
 
104
+ def split_node
105
+ get_node.split_node
106
+ end
107
+
98
108
  # Compare this node to another node.
99
109
  # @return [Boolean] true if node address is identical, false otherwise
100
110
  def ==(node)
@@ -123,7 +123,11 @@ module PEROBS
123
123
  # @param index [Integer] Position in the array
124
124
  # @return [Integer or nil] found value or nil
125
125
  def [](index)
126
- index = validate_index_range(index)
126
+ begin
127
+ index = validate_index_range(index)
128
+ rescue IndexError
129
+ return nil
130
+ end
127
131
 
128
132
  return nil if index >= @entry_counter
129
133
 
@@ -190,6 +194,20 @@ module PEROBS
190
194
  @entry_counter == 0
191
195
  end
192
196
 
197
+ # Return the first entry of the Array.
198
+ def first
199
+ return nil unless @first_leaf
200
+
201
+ @first_leaf.values.first
202
+ end
203
+
204
+ # Return the last entry of the Array.
205
+ def last
206
+ return nil unless @last_leaf
207
+
208
+ @last_leaf.values.last
209
+ end
210
+
193
211
  # Iterate over all entries in the tree. Entries are always sorted by the
194
212
  # key.
195
213
  # @yield [key, value]
@@ -147,7 +147,9 @@ module PEROBS
147
147
  else
148
148
  # Descend into the right child node to add the value to.
149
149
  cidx = node.search_child_index(index)
150
- index -= node.offsets[cidx]
150
+ if (index -= node.offsets[cidx]) < 0
151
+ node.fatal "Index (#{index}) became negative"
152
+ end
151
153
  node = node.children[cidx]
152
154
  end
153
155
  end
@@ -392,7 +394,7 @@ module PEROBS
392
394
  @offsets.each_with_index do |offset, i|
393
395
  if i > 0
394
396
  if offset < last_offset
395
- error "Offset are not strictly monotoneously " +
397
+ error "Offsets are not strictly monotoneously " +
396
398
  "increasing: #{@offsets.inspect}"
397
399
  return false
398
400
  end
@@ -471,11 +473,12 @@ module PEROBS
471
473
  else
472
474
  begin
473
475
  if node.is_leaf?
474
- if node.values[position - 1]
476
+ if position <= node.size
475
477
  str += "#{node.tree_prefix} " +
476
478
  "#{position == node.size ? '-' : '|'} " +
477
479
  "[ #{node.value_index(position - 1)}: " +
478
- "#{node.values[position - 1]} ]\n"
480
+ "#{node.values[position - 1].nil? ?
481
+ 'nil' : node.values[position - 1]} ]\n"
479
482
  end
480
483
  end
481
484
  rescue => e
@@ -613,7 +616,7 @@ module PEROBS
613
616
  # Handle special case for empty offsets list.
614
617
  return 0 if @offsets.empty? || offset <= @offsets.first
615
618
 
616
- (@offsets.bsearch_index { |o| o >= offset } || @offsets.length) - 1
619
+ (@offsets.bsearch_index { |o| o > offset } || @offsets.length) - 1
617
620
  end
618
621
 
619
622
  # @return The index of the current node in the children list of the parent
@@ -656,7 +659,7 @@ module PEROBS
656
659
 
657
660
  # This method takes care of adjusting the offsets in tree in case elements
658
661
  # were inserted or removed. All nodes that hold children after the
659
- # insert/remove operation needs to be adjusted. Since child nodes get their
662
+ # insert/remove operation need to be adjusted. Since child nodes get their
660
663
  # offsets via their parents, only the parent node and the direct ancestor
661
664
  # followers need to be adjusted.
662
665
  # @param after_child [BigArrayNode] specifies the modified leaf node
@@ -910,7 +913,7 @@ module PEROBS
910
913
  # Root Node +--------------------------------+
911
914
  # Offsets | 0 11 |
912
915
  # Children | |
913
- # prepd v child v
916
+ # pred v child v
914
917
  # Level 1 +--------------------------++--------------------------+
915
918
  # Offsets | 0 4 7 || 0 2 5 |
916
919
  # Children | | | | | |
@@ -922,8 +925,9 @@ module PEROBS
922
925
  #
923
926
  # Remove the last predecessor offset and update the child offset with
924
927
  # it
925
- delta = @offsets[child_index] - pred.offsets.last
926
- @offsets[child_index] = pred.offsets.pop
928
+ delta = pred.children.last.values_count
929
+ @offsets[child_index] -= delta
930
+ pred.offsets.pop
927
931
  # Adjust all the offsets of the child
928
932
  child.offsets.map! { |o| o += delta }
929
933
  # And prepend the 0 offset
@@ -68,7 +68,7 @@ module PEROBS
68
68
  class Collisions < PEROBS::Array
69
69
  end
70
70
 
71
- attr_persist :btree, :entry_counter
71
+ attr_persist :btree
72
72
 
73
73
  # Create a new BigHash object.
74
74
  # @param p [Handle] Store handle
@@ -76,7 +76,6 @@ module PEROBS
76
76
  super(p)
77
77
  restore
78
78
  self.btree = @store.new(PEROBS::BigTree)
79
- self.entry_counter = 0
80
79
  end
81
80
 
82
81
  def restore
@@ -106,7 +105,6 @@ module PEROBS
106
105
  end
107
106
  index_to_insert += 1
108
107
  end
109
- self.entry_counter += 1 unless overwrite
110
108
  existing_entry[index_to_insert] = entry
111
109
  elsif existing_entry.key == key
112
110
  # The existing value is for the identical key. We can safely
@@ -119,12 +117,10 @@ module PEROBS
119
117
  array_entry << existing_entry
120
118
  array_entry << entry
121
119
  @btree.insert(hashed_key, array_entry)
122
- self.entry_counter += 1
123
120
  end
124
121
  else
125
122
  # No existing entry. Insert the new entry.
126
123
  @btree.insert(hashed_key, entry)
127
- self.entry_counter += 1
128
124
  end
129
125
  end
130
126
  end
@@ -170,6 +166,8 @@ module PEROBS
170
166
  false
171
167
  end
172
168
 
169
+ alias include? has_key?
170
+
173
171
  # Delete and return the entry for the given key. Return nil if no matching
174
172
  # entry exists.
175
173
  # @param key [Integer or String]
@@ -183,7 +181,6 @@ module PEROBS
183
181
  if entry.is_a?(PEROBS::Array)
184
182
  entry.each_with_index do |ae, i|
185
183
  if ae.key == key
186
- self.entry_counter -= 1
187
184
  return entry.delete_at(i).value
188
185
  end
189
186
  end
@@ -197,7 +194,7 @@ module PEROBS
197
194
  # Return the number of entries stored in the hash.
198
195
  # @return [Integer]
199
196
  def length
200
- @entry_counter
197
+ @btree.entry_counter
201
198
  end
202
199
 
203
200
  alias size length
@@ -205,7 +202,7 @@ module PEROBS
205
202
  # Return true if hash is empty. False otherweise.
206
203
  # @return [TrueClass, FalseClass]
207
204
  def empty?
208
- @entry_counter == 0
205
+ @btree.entry_counter == 0
209
206
  end
210
207
 
211
208
  # Calls the given block for each key/value pair.
@@ -213,9 +210,9 @@ module PEROBS
213
210
  def each(&block)
214
211
  @btree.each do |index, entry|
215
212
  if entry.is_a?(Collisions)
216
- break unless entry.each do |c_entry|
213
+ break if entry.each do |c_entry|
217
214
  yield(c_entry.key, c_entry.value)
218
- end
215
+ end.nil?
219
216
  else
220
217
  yield(entry.key, entry.value)
221
218
  end
@@ -234,20 +231,7 @@ module PEROBS
234
231
  # Check if the data structure contains any errors.
235
232
  # @return [Boolean] true if no erros were found, false otherwise
236
233
  def check
237
- return false unless @btree.check
238
-
239
- i = 0
240
- each do |k, v|
241
- i += 1
242
- end
243
-
244
- unless @entry_counter == i
245
- PEROBS.log.error "BigHash contains #{i} values but entry counter " +
246
- "is #{@entry_counter}"
247
- return false
248
- end
249
-
250
- true
234
+ return @btree.check
251
235
  end
252
236
 
253
237
  private
@@ -140,7 +140,7 @@ module PEROBS
140
140
  def each(&block)
141
141
  node = @first_leaf
142
142
  while node
143
- node.each_element(&block)
143
+ break if node.each_element(&block).nil?
144
144
  node = node.next_sibling
145
145
  end
146
146
  end
@@ -166,6 +166,19 @@ module PEROBS
166
166
  # @return [Boolean] true if no erros were found, false otherwise
167
167
  def check(&block)
168
168
  @root.check(&block)
169
+
170
+ i = 0
171
+ each do |k, v|
172
+ i += 1
173
+ end
174
+
175
+ unless @entry_counter == i
176
+ PEROBS.log.error "BigTree contains #{i} values but entry counter " +
177
+ "is #{@entry_counter}"
178
+ return false
179
+ end
180
+
181
+ true
169
182
  end
170
183
 
171
184
  # Gather some statistics regarding the tree structure.
@@ -227,7 +227,7 @@ module PEROBS
227
227
  # Iterate over all the key/value pairs of the node.
228
228
  # @yield [key, value]
229
229
  def each_element
230
- return unless is_leaf?
230
+ return self unless is_leaf?
231
231
 
232
232
  0.upto(@keys.length - 1) do |i|
233
233
  yield(@keys[i], @values[i])
@@ -237,7 +237,7 @@ module PEROBS
237
237
  # Iterate over all the key/value pairs of the node in reverse order.
238
238
  # @yield [key, value]
239
239
  def reverse_each_element
240
- return unless is_leaf?
240
+ return self unless is_leaf?
241
241
 
242
242
  (@keys.length - 1).downto(0) do |i|
243
243
  yield(@keys[i], @values[i])
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Cache.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -66,10 +66,10 @@ module PEROBS
66
66
  def cache_write(obj)
67
67
  # This is just a safety check. It can probably be disabled in the future
68
68
  # to increase performance.
69
- if obj.respond_to?(:is_poxreference?)
70
- # If this condition triggers, we have a bug in the library.
71
- PEROBS.log.fatal "POXReference objects should never be cached"
72
- end
69
+ #if obj.respond_to?(:is_poxreference?)
70
+ # # If this condition triggers, we have a bug in the library.
71
+ # PEROBS.log.fatal "POXReference objects should never be cached"
72
+ #end
73
73
 
74
74
  if @transaction_stack.empty?
75
75
  # We are not in transaction mode.
@@ -93,6 +93,31 @@ module PEROBS
93
93
  end
94
94
  end
95
95
 
96
+ # Evict the object with the given ID from the cache.
97
+ # @param id [Integer] ID of the cached PEROBS::ObjectBase
98
+ # @return [True/False] True if object was stored in the cache. False
99
+ # otherwise.
100
+ def evict(id)
101
+ unless @transaction_stack.empty?
102
+ PEROBS.log.fatal "You cannot evict entries during a transaction."
103
+ end
104
+
105
+ idx = id & @mask
106
+ # The index is just a hash. We still need to check if the object IDs are
107
+ # actually the same before we can return the object.
108
+ if (obj = @writes[idx]) && obj._id == id
109
+ # The object is in the write cache.
110
+ @writes[idx] = nil
111
+ return true
112
+ elsif (obj = @reads[idx]) && obj._id == id
113
+ # The object is in the read cache.
114
+ @reads[idx] = nil
115
+ return true
116
+ end
117
+
118
+ false
119
+ end
120
+
96
121
  # Return the PEROBS::Object with the specified ID or nil if not found.
97
122
  # @param id [Integer] ID of the cached PEROBS::ObjectBase
98
123
  def object_by_id(id)
@@ -160,7 +185,7 @@ module PEROBS
160
185
  transactions = @transaction_stack.pop
161
186
  # Merge the two lists
162
187
  @transaction_stack.push(@transaction_stack.pop + transactions)
163
- # Ensure that each object is only included once in the list.
188
+ # Ensure that each object ID is only included once in the list.
164
189
  @transaction_stack.last.uniq!
165
190
  end
166
191
  end
@@ -40,7 +40,10 @@ module PEROBS
40
40
  # bytes long header that stores the total entry count, the total space
41
41
  # count, the offset of the first entry and the offset of the first space.
42
42
  # The header is followed by a custom entry section. Each entry is also 8
43
- # bytes long. After the custom entry section the data blobs start.
43
+ # bytes long. After the custom entry section the data blobs start. Each data
44
+ # blob starts with a mark byte that indicates if the blob is valid data (2),
45
+ # a free space (0) or reseved space (1). Then it is followed by @entry_bytes
46
+ # number of bytes for the data blob.
44
47
  class EquiBlobsFile
45
48
 
46
49
  TOTAL_ENTRIES_OFFSET = 0
@@ -501,6 +504,7 @@ module PEROBS
501
504
  next_offset = address_to_offset(1)
502
505
  total_entries = 0
503
506
  total_spaces = 0
507
+ last_entry_is_space = false
504
508
  @progressmeter.start("Checking #{@name} entries",
505
509
  @total_spaces + @total_entries) do |pm|
506
510
  begin
@@ -511,6 +515,7 @@ module PEROBS
511
515
  case marker
512
516
  when 0
513
517
  total_spaces += 1
518
+ last_entry_is_space = true
514
519
  when 1
515
520
  PEROBS.log.error "Entry at address " +
516
521
  "#{offset_to_address(next_offset)} in EquiBlobsFile " +
@@ -518,6 +523,7 @@ module PEROBS
518
523
  return false
519
524
  when 2
520
525
  total_entries += 1
526
+ last_entry_is_space = false
521
527
  else
522
528
  PEROBS.log.error "Entry at address " +
523
529
  "#{offset_to_address(next_offset)} in EquiBlobsFile " +
@@ -535,6 +541,11 @@ module PEROBS
535
541
  end
536
542
  end
537
543
 
544
+ if last_entry_is_space
545
+ PEROBS.log.error "EquiBlobsFile #{@file_name} is not properly trimmed"
546
+ return false
547
+ end
548
+
538
549
  unless total_spaces == @total_spaces
539
550
  PEROBS.log.error "Mismatch between space counter and spaces in " +
540
551
  "EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = FlatFile.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -31,6 +31,7 @@ require 'perobs/Log'
31
31
  require 'perobs/FlatFileBlobHeader'
32
32
  require 'perobs/BTree'
33
33
  require 'perobs/SpaceTree'
34
+ require 'perobs/SpaceManager'
34
35
  require 'perobs/IDList'
35
36
 
36
37
  module PEROBS
@@ -51,7 +52,14 @@ module PEROBS
51
52
  @f = nil
52
53
  @marks = nil
53
54
  @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
54
- @space_list = SpaceTree.new(@db_dir, @progressmeter)
55
+ old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
56
+ if File.exist?(old_spaces_file)
57
+ # PEROBS version 4.1.0 and earlier used this space list format. It is
58
+ # deprecated now. Newly created DBs use the SpaceManager format.
59
+ @space_list = SpaceTree.new(@db_dir, @progressmeter)
60
+ else
61
+ @space_list = SpaceManager.new(@db_dir, @progressmeter)
62
+ end
55
63
  end
56
64
 
57
65
  # Open the flat file for reading and writing.
@@ -134,7 +142,7 @@ module PEROBS
134
142
  end
135
143
 
136
144
  # Delete all unmarked objects.
137
- def delete_unmarked_objects
145
+ def delete_unmarked_objects(&block)
138
146
  # We don't update the index and the space list during this operation as
139
147
  # we defragmentize the blob file at the end. We'll end the operation
140
148
  # with an empty space list.
@@ -145,6 +153,7 @@ module PEROBS
145
153
  each_blob_header do |header|
146
154
  if header.is_valid? && !@marks.include?(header.id)
147
155
  delete_obj_by_address(header.addr, header.id)
156
+ yield(header.id) if block_given?
148
157
  deleted_objects_count += 1
149
158
  end
150
159
 
@@ -183,12 +192,14 @@ module PEROBS
183
192
  # performance impact of compression is not compensated by writing
184
193
  # less data to the storage.
185
194
  compressed = false
186
- if raw_obj.bytesize > 256
195
+ raw_obj_bytesize = raw_obj.bytesize
196
+ if raw_obj_bytesize > 256
187
197
  raw_obj = Zlib.deflate(raw_obj)
198
+ raw_obj_bytesize = raw_obj.bytesize
188
199
  compressed = true
189
200
  end
190
201
 
191
- addr, length = find_free_blob(raw_obj.bytesize)
202
+ addr, length = find_free_blob(raw_obj_bytesize)
192
203
  begin
193
204
  if length != -1
194
205
  # Just a safeguard so we don't overwrite current data.
@@ -197,8 +208,8 @@ module PEROBS
197
208
  PEROBS.log.fatal "Length in free list (#{length}) and header " +
198
209
  "(#{header.length}) for address #{addr} don't match."
199
210
  end
200
- if raw_obj.bytesize > header.length
201
- PEROBS.log.fatal "Object (#{raw_obj.bytesize}) is longer than " +
211
+ if raw_obj_bytesize > header.length
212
+ PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
202
213
  "blob space (#{header.length})."
203
214
  end
204
215
  if header.is_valid?
@@ -208,19 +219,19 @@ module PEROBS
208
219
  end
209
220
  flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
210
221
  flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
211
- FlatFileBlobHeader.new(@f, addr, flags, raw_obj.bytesize, id, crc).write
222
+ FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
212
223
  @f.write(raw_obj)
213
- if length != -1 && raw_obj.bytesize < length
224
+ if length != -1 && raw_obj_bytesize < length
214
225
  # The new object was not appended and it did not completely fill the
215
226
  # free space. So we have to write a new header to mark the remaining
216
227
  # empty space.
217
- unless length - raw_obj.bytesize >= FlatFileBlobHeader::LENGTH
228
+ unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
218
229
  PEROBS.log.fatal "Not enough space to append the empty space " +
219
- "header (space: #{length} bytes, object: #{raw_obj.bytesize} " +
230
+ "header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
220
231
  "bytes)."
221
232
  end
222
233
  space_address = @f.pos
223
- space_length = length - FlatFileBlobHeader::LENGTH - raw_obj.bytesize
234
+ space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
224
235
  FlatFileBlobHeader.new(@f, space_address, 0, space_length,
225
236
  0, 0).write
226
237
  # Register the new space with the space list.
@@ -461,6 +472,7 @@ module PEROBS
461
472
  new_index.open
462
473
 
463
474
  corrupted_blobs = 0
475
+ end_of_last_healthy_blob = nil
464
476
  @progressmeter.start('Checking blobs file', @f.size) do |pm|
465
477
  corrupted_blobs = each_blob_header do |header|
466
478
  if header.is_valid?
@@ -529,12 +541,26 @@ module PEROBS
529
541
  # ID is unique so far. Add it to the shadow index.
530
542
  new_index.insert(header.id, header.addr)
531
543
  end
532
-
533
544
  end
545
+ end_of_last_healthy_blob = header.addr +
546
+ FlatFileBlobHeader::LENGTH + header.length
534
547
 
535
548
  pm.update(header.addr)
536
549
  end
537
550
 
551
+ if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
552
+ # The blob file ends with a corrupted blob header.
553
+ PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
554
+ 'bytes found at the end of FlatFile.'
555
+ corrupted_blobs += 1
556
+ if repair
557
+ PEROBS.log.error "Truncating FlatFile to " +
558
+ "#{end_of_last_healthy_blob} bytes by discarding " +
559
+ "#{@f.size - end_of_last_healthy_blob} bytes"
560
+ @f.truncate(end_of_last_healthy_blob)
561
+ end
562
+ end
563
+
538
564
  errors += corrupted_blobs
539
565
  end
540
566
 
@@ -546,7 +572,7 @@ module PEROBS
546
572
  erase_index_files
547
573
  defragmentize
548
574
  regenerate_index_and_spaces
549
- else
575
+ elsif corrupted_blobs == 0
550
576
  # Now we check the index data. It must be correct and the entries must
551
577
  # match the blob file. All entries in the index must be in the blob file
552
578
  # and vise versa.
@@ -575,6 +601,141 @@ module PEROBS
575
601
  errors
576
602
  end
577
603
 
604
+ # Repair the FlatFile. In contrast to the repair functionality in the
605
+ # check() method this method is much faster. It simply re-creates the
606
+ # index and space list from the blob file.
607
+ # @param repair [Boolean] True if errors should be fixed.
608
+ # @return [Integer] Number of errors found
609
+ def repair
610
+ errors = 0
611
+ return errors unless @f
612
+
613
+ t = Time.now
614
+ PEROBS.log.info "Repairing FlatFile database"
615
+
616
+ # Erase and re-open the index and space list files. We purposely don't
617
+ # close the files at it would trigger needless flushing.
618
+ clear_index_files(true)
619
+
620
+ # Now we scan the blob file and re-index all blobs and spaces. Corrupted
621
+ # blobs will be skipped.
622
+ corrupted_blobs = 0
623
+ end_of_last_healthy_blob = nil
624
+ @progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
625
+ corrupted_blobs = each_blob_header do |header|
626
+ if header.corruption_start
627
+ # The blob is preceeded by a corrupted area. We create a new
628
+ # header of a deleted blob for this area and write the new blob
629
+ # over it.
630
+ if (data_length = header.addr - header.corruption_start -
631
+ FlatFileBlobHeader::LENGTH) <= 0
632
+ PEROBS.log.error "Found a corrupted blob that is too small to " +
633
+ "fit a header (#{data_length}). File must be defragmented."
634
+ else
635
+ new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
636
+ 0, data_length, 0, 0)
637
+ new_header.write
638
+ @space_list.add_space(header.corruption_start, data_length)
639
+ end
640
+ end
641
+
642
+ if header.is_valid?
643
+ # We have a non-deleted entry.
644
+ begin
645
+ @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
646
+ buf = @f.read(header.length)
647
+ if buf.bytesize != header.length
648
+ PEROBS.log.error "Premature end of file in blob with ID " +
649
+ "#{header.id}."
650
+ discard_damaged_blob(header)
651
+ errors += 1
652
+ next
653
+ end
654
+
655
+ # Uncompress the data if the compression bit is set in the mark
656
+ # byte.
657
+ if header.is_compressed?
658
+ begin
659
+ buf = Zlib.inflate(buf)
660
+ rescue Zlib::BufError, Zlib::DataError
661
+ PEROBS.log.error "Corrupted compressed block with ID " +
662
+ "#{header.id} found."
663
+ discard_damaged_blob(header)
664
+ errors += 1
665
+ next
666
+ end
667
+ end
668
+
669
+ if header.crc && checksum(buf) != header.crc
670
+ PEROBS.log.error "Checksum failure while checking blob " +
671
+ "with ID #{header.id}"
672
+ discard_damaged_blob(header)
673
+ errors += 1
674
+ next
675
+ end
676
+ rescue IOError => e
677
+ PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
678
+ e.message
679
+ end
680
+
681
+ # Check if the ID has already been found in the file.
682
+ if (previous_address = @index.get(header.id))
683
+ PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
684
+ "Addresses: #{previous_address}, #{header.addr}"
685
+ errors += 1
686
+ previous_header = FlatFileBlobHeader.read(@f, previous_address,
687
+ header.id)
688
+ # We have two blobs with the same ID and we must discard one of
689
+ # them.
690
+ if header.is_outdated?
691
+ discard_damaged_blob(header)
692
+ elsif previous_header.is_outdated?
693
+ discard_damaged_blob(previous_header)
694
+ else
695
+ PEROBS.log.error "None of the blobs with same ID have " +
696
+ "the outdated flag set. Deleting the smaller one."
697
+ errors += 1
698
+ discard_damaged_blob(header.length < previous_header.length ?
699
+ header : previous_header)
700
+ end
701
+ else
702
+ # ID is unique so far. Add it to the shadow index.
703
+ @index.insert(header.id, header.addr)
704
+ end
705
+
706
+ else
707
+ if header.length > 0
708
+ @space_list.add_space(header.addr, header.length)
709
+ end
710
+ end
711
+ end_of_last_healthy_blob = header.addr +
712
+ FlatFileBlobHeader::LENGTH + header.length
713
+
714
+ pm.update(header.addr)
715
+ end
716
+
717
+ if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
718
+ # The blob file ends with a corrupted blob header.
719
+ PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
720
+ 'bytes found at the end of FlatFile.'
721
+ corrupted_blobs += 1
722
+
723
+ PEROBS.log.error "Truncating FlatFile to " +
724
+ "#{end_of_last_healthy_blob} bytes by discarding " +
725
+ "#{@f.size - end_of_last_healthy_blob} bytes"
726
+ @f.truncate(end_of_last_healthy_blob)
727
+ end
728
+
729
+ errors += corrupted_blobs
730
+ end
731
+
732
+ sync
733
+ PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
734
+ "#{errors} errors found."
735
+
736
+ errors
737
+ end
738
+
578
739
  # This method clears the index tree and the free space list and
579
740
  # regenerates them from the FlatFile.
580
741
  def regenerate_index_and_spaces
@@ -617,7 +778,11 @@ module PEROBS
617
778
  end
618
779
 
619
780
  def has_id_at?(id, address)
620
- header = FlatFileBlobHeader.read(@f, address)
781
+ begin
782
+ header = FlatFileBlobHeader.read(@f, address)
783
+ rescue PEROBS::FatalError
784
+ return false
785
+ end
621
786
  header.is_valid? && header.id == id
622
787
  end
623
788
 
@@ -733,7 +898,7 @@ module PEROBS
733
898
  unless @space_list.has_space?(header.addr, header.length)
734
899
  PEROBS.log.error "FlatFile has free space " +
735
900
  "(addr: #{header.addr}, len: #{header.length}) that is " +
736
- "not in FreeSpaceManager"
901
+ "not in SpaceManager"
737
902
  errors += 1
738
903
  end
739
904
  end
@@ -767,49 +932,36 @@ module PEROBS
767
932
  @index.open(abort_on_missing_files)
768
933
  @space_list.open
769
934
  rescue FatalError
770
- # Ensure that the index is really closed.
771
- @index.close
772
- # Erase it completely
773
- @index.erase
774
- # Then create it again.
775
- @index.open
776
-
777
- # Ensure that the spaces list is really closed.
778
- @space_list.close
779
- # Erase it completely
780
- @space_list.erase
781
- # Then create it again
782
- @space_list.open
783
-
935
+ clear_index_files
784
936
  regenerate_index_and_spaces
785
937
  end
786
938
  end
787
939
 
788
- def erase_index_files
940
+ def erase_index_files(dont_close_files = false)
789
941
  # Ensure that the index is really closed.
790
- @index.close
942
+ @index.close unless dont_close_files
791
943
  # Erase it completely
792
944
  @index.erase
793
945
 
794
946
  # Ensure that the spaces list is really closed.
795
- @space_list.close
947
+ @space_list.close unless dont_close_files
796
948
  # Erase it completely
797
949
  @space_list.erase
950
+
951
+ if @space_list.is_a?(SpaceTree)
952
+ # If we still use the old SpaceTree format, this is the moment to
953
+ # convert it to the new SpaceManager format.
954
+ @space_list = SpaceManager.new(@db_dir, @progressmeter)
955
+ PEROBS.log.warn "Converting space list from SpaceTree format " +
956
+ "to SpaceManager format"
957
+ end
798
958
  end
799
959
 
800
- def clear_index_files
801
- # Ensure that the index is really closed.
802
- @index.close
803
- # Erase it completely
804
- @index.erase
805
- # Then create it again.
806
- @index.open
960
+ def clear_index_files(dont_close_files = false)
961
+ erase_index_files(dont_close_files)
807
962
 
808
- # Ensure that the spaces list is really closed.
809
- @space_list.close
810
- # Erase it completely
811
- @space_list.erase
812
- # Then create it again
963
+ # Then create them again.
964
+ @index.open
813
965
  @space_list.open
814
966
  end
815
967