perobs 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,54 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = FNV_Hash_1a_64.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ # This is an implementation of the Fowler Noll Vo hashing algorithm in the
31
+ # 1a variant for 64 bit hash values.
32
+ # https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
33
+ class FNV_Hash_1a_64
34
+
35
+ @@OFFSET = 14695981039346656037
36
+ @@PRIME = 1099511628211
37
+ @@MASK = 2**64 - 1
38
+
39
+ def self.digest(item)
40
+ hash = @@OFFSET
41
+
42
+ item.to_s.each_byte do |byte|
43
+ hash ^= byte
44
+ hash *= @@PRIME
45
+ hash &= @@MASK
46
+ end
47
+
48
+ hash
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = FlatFile.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -31,6 +31,7 @@ require 'perobs/Log'
31
31
  require 'perobs/FlatFileBlobHeader'
32
32
  require 'perobs/BTree'
33
33
  require 'perobs/SpaceTree'
34
+ require 'perobs/IDList'
34
35
 
35
36
  module PEROBS
36
37
 
@@ -44,12 +45,13 @@ module PEROBS
44
45
 
45
46
  # Create a new FlatFile object for a database in the given path.
46
47
  # @param dir [String] Directory path for the data base file
47
- def initialize(dir)
48
+ def initialize(dir, progressmeter)
48
49
  @db_dir = dir
50
+ @progressmeter = progressmeter
49
51
  @f = nil
50
- @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER)
51
- @marks = BTree.new(@db_dir, 'marks', INDEX_BTREE_ORDER)
52
- @space_list = SpaceTree.new(@db_dir)
52
+ @marks = nil
53
+ @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
54
+ @space_list = SpaceTree.new(@db_dir, @progressmeter)
53
55
  end
54
56
 
55
57
  # Open the flat file for reading and writing.
@@ -74,33 +76,19 @@ module PEROBS
74
76
  end
75
77
  @f.sync = true
76
78
 
77
- begin
78
- @index.open(!new_db_created)
79
- @space_list.open
80
- rescue FatalError
81
- # Ensure that the index is really closed.
82
- @index.close
83
- # Erase it completely
84
- @index.erase
85
- # Then create it again.
86
- @index.open
87
-
88
- # Ensure that the spaces list is really closed.
89
- @space_list.close
90
- # Erase it completely
91
- @space_list.erase
92
- # Then create it again
93
- @space_list.open
94
-
95
- regenerate_index_and_spaces
96
- end
79
+ open_index_files(!new_db_created)
97
80
  end
98
81
 
99
82
  # Close the flat file. This method must be called to ensure that all data
100
83
  # is really written into the filesystem.
101
84
  def close
102
- @space_list.close
103
- @index.close
85
+ @space_list.close if @space_list.is_open?
86
+ @index.close if @index.is_open?
87
+
88
+ if @marks
89
+ @marks.erase
90
+ @marks = nil
91
+ end
104
92
 
105
93
  if @f
106
94
  @f.flush
@@ -139,29 +127,36 @@ module PEROBS
139
127
  # @param addr [Integer] Address of the blob to delete
140
128
  # @param id [Integer] ID of the blob to delete
141
129
  def delete_obj_by_address(addr, id)
142
- @index.remove(id)
143
- header = FlatFileBlobHeader.read_at(@f, addr, id)
130
+ @index.remove(id) if @index.is_open?
131
+ header = FlatFileBlobHeader.read(@f, addr, id)
144
132
  header.clear_flags
145
- @space_list.add_space(addr, header.length)
133
+ @space_list.add_space(addr, header.length) if @space_list.is_open?
146
134
  end
147
135
 
148
136
  # Delete all unmarked objects.
149
137
  def delete_unmarked_objects
150
- PEROBS.log.info "Deleting unmarked objects..."
151
- t = Time.now
138
+ # We don't update the index and the space list during this operation as
139
+ # we defragmentize the blob file at the end. We'll end the operation
140
+ # with an empty space list.
141
+ clear_index_files
142
+
143
+ deleted_objects_count = 0
144
+ @progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
145
+ each_blob_header do |header|
146
+ if header.is_valid? && !@marks.include?(header.id)
147
+ delete_obj_by_address(header.addr, header.id)
148
+ deleted_objects_count += 1
149
+ end
152
150
 
153
- deleted_ids = []
154
- each_blob_header do |pos, header|
155
- if header.is_valid? && @marks.get(header.id).nil?
156
- delete_obj_by_address(pos, header.id)
157
- deleted_ids << header.id
151
+ pm.update(header.addr)
158
152
  end
159
153
  end
160
154
  defragmentize
161
155
 
162
- PEROBS.log.info "#{deleted_ids.length} unmarked objects deleted " +
163
- "in #{Time.now - t} seconds"
164
- deleted_ids
156
+ # Update the index file and create a new, empty space list.
157
+ regenerate_index_and_spaces
158
+
159
+ deleted_objects_count
165
160
  end
166
161
 
167
162
  # Write the given object into the file. This method never uses in-place
@@ -177,7 +172,7 @@ module PEROBS
177
172
  # operation is aborted or interrupted we ensure that we either have the
178
173
  # old or the new version available.
179
174
  if (old_addr = find_obj_addr_by_id(id))
180
- old_header = FlatFileBlobHeader.read_at(@f, old_addr)
175
+ old_header = FlatFileBlobHeader.read(@f, old_addr)
181
176
  old_header.set_outdated_flag
182
177
  end
183
178
 
@@ -197,7 +192,7 @@ module PEROBS
197
192
  begin
198
193
  if length != -1
199
194
  # Just a safeguard so we don't overwrite current data.
200
- header = FlatFileBlobHeader.read_at(@f, addr)
195
+ header = FlatFileBlobHeader.read(@f, addr)
201
196
  if header.length != length
202
197
  PEROBS.log.fatal "Length in free list (#{length}) and header " +
203
198
  "(#{header.length}) for address #{addr} don't match."
@@ -229,18 +224,22 @@ module PEROBS
229
224
  FlatFileBlobHeader.new(@f, space_address, 0, space_length,
230
225
  0, 0).write
231
226
  # Register the new space with the space list.
232
- @space_list.add_space(space_address, space_length) if space_length > 0
227
+ if @space_list.is_open? && space_length > 0
228
+ @space_list.add_space(space_address, space_length)
229
+ end
233
230
  end
234
231
 
235
232
  # Once the blob has been written we can update the index as well.
236
- @index.insert(id, addr)
233
+ @index.insert(id, addr) if @index.is_open?
237
234
 
238
235
  if old_addr
239
236
  # If we had an existing object stored for the ID we have to mark
240
237
  # this entry as deleted now.
241
238
  old_header.clear_flags
242
239
  # And register the newly freed space with the space list.
243
- @space_list.add_space(old_addr, old_header.length)
240
+ if @space_list.is_open?
241
+ @space_list.add_space(old_addr, old_header.length)
242
+ end
244
243
  else
245
244
  @f.flush
246
245
  end
@@ -270,21 +269,17 @@ module PEROBS
270
269
  nil
271
270
  end
272
271
 
273
- def search_object(id)
274
- each_blob_header do |pos, header|
275
- return read_obj_by_address(pos, id)
276
- end
277
-
278
- nil
272
+ # @return [Integer] Number of items stored in the DB.
273
+ def item_counter
274
+ @index.entries_count
279
275
  end
280
276
 
281
-
282
277
  # Read the object at the specified address.
283
278
  # @param addr [Integer] Offset in the flat file
284
279
  # @param id [Integer] ID of the data blob
285
280
  # @return [String] Raw object data
286
281
  def read_obj_by_address(addr, id)
287
- header = FlatFileBlobHeader.read_at(@f, addr, id)
282
+ header = FlatFileBlobHeader.read(@f, addr, id)
288
283
  if header.id != id
289
284
  PEROBS.log.fatal "Database index corrupted: Index for object " +
290
285
  "#{id} points to object with ID #{header.id}"
@@ -319,19 +314,22 @@ module PEROBS
319
314
  # Mark the object with the given ID.
320
315
  # @param id [Integer] ID of the object
321
316
  def mark_obj_by_id(id)
322
- @marks.insert(id, 0)
317
+ @marks.insert(id)
323
318
  end
324
319
 
325
320
  # Return true if the object with the given ID is marked, false otherwise.
326
321
  # @param id [Integer] ID of the object
327
322
  def is_marked_by_id?(id)
328
- !@marks.get(id).nil?
323
+ @marks.include?(id)
329
324
  end
330
325
 
331
326
  # Clear alls marks.
332
327
  def clear_all_marks
333
- @marks.erase
334
- @marks.open
328
+ if @marks
329
+ @marks.clear
330
+ else
331
+ @marks = IDList.new(@db_dir, 'marks', 8)
332
+ end
335
333
  end
336
334
 
337
335
  # Eliminate all the holes in the file. This is an in-place
@@ -340,59 +338,72 @@ module PEROBS
340
338
  distance = 0
341
339
  new_file_size = 0
342
340
  deleted_blobs = 0
341
+ corrupted_blobs = 0
343
342
  valid_blobs = 0
344
- t = Time.now
345
- PEROBS.log.info "Defragmenting FlatFile"
343
+
346
344
  # Iterate over all entries.
347
- each_blob_header do |pos, header|
348
- # Total size of the current entry
349
- entry_bytes = FlatFileBlobHeader::LENGTH + header.length
350
- if header.is_valid?
351
- # We have found a valid entry.
352
- valid_blobs += 1
353
- if distance > 0
354
- begin
355
- # Read current entry into a buffer
356
- @f.seek(pos)
357
- buf = @f.read(entry_bytes)
358
- # Write the buffer right after the end of the previous entry.
359
- @f.seek(pos - distance)
360
- @f.write(buf)
361
- # Update the index with the new position
362
- @index.insert(header.id, pos - distance)
363
- # Mark the space between the relocated current entry and the
364
- # next valid entry as deleted space.
365
- FlatFileBlobHeader.new(@f, @f.pos, 0,
366
- distance - FlatFileBlobHeader::LENGTH,
367
- 0, 0).write
368
- @f.flush
369
- rescue IOError => e
370
- PEROBS.log.fatal "Error while moving blob for ID #{header.id}: " +
371
- e.message
345
+ @progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
346
+ each_blob_header do |header|
347
+ # If we have stumbled over a corrupted blob we treat it similar to a
348
+ # deleted blob and reuse the space.
349
+ if header.corruption_start
350
+ distance += header.addr - header.corruption_start
351
+ corrupted_blobs += 1
352
+ end
353
+
354
+ # Total size of the current entry
355
+ entry_bytes = FlatFileBlobHeader::LENGTH + header.length
356
+ if header.is_valid?
357
+ # We have found a valid entry.
358
+ valid_blobs += 1
359
+ if distance > 0
360
+ begin
361
+ # Read current entry into a buffer
362
+ @f.seek(header.addr)
363
+ buf = @f.read(entry_bytes)
364
+ # Write the buffer right after the end of the previous entry.
365
+ @f.seek(header.addr - distance)
366
+ @f.write(buf)
367
+ # Mark the space between the relocated current entry and the
368
+ # next valid entry as deleted space.
369
+ FlatFileBlobHeader.new(@f, @f.pos, 0,
370
+ distance - FlatFileBlobHeader::LENGTH,
371
+ 0, 0).write
372
+ @f.flush
373
+ rescue IOError => e
374
+ PEROBS.log.fatal "Error while moving blob for ID " +
375
+ "#{header.id}: #{e.message}"
376
+ end
372
377
  end
378
+ new_file_size = header.addr - distance +
379
+ FlatFileBlobHeader::LENGTH + header.length
380
+ else
381
+ deleted_blobs += 1
382
+ distance += entry_bytes
373
383
  end
374
- new_file_size = pos + FlatFileBlobHeader::LENGTH + header.length
375
- else
376
- deleted_blobs += 1
377
- distance += entry_bytes
384
+
385
+ pm.update(header.addr)
378
386
  end
379
387
  end
380
- PEROBS.log.info "FlatFile defragmented in #{Time.now - t} seconds"
388
+
381
389
  PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
382
390
  "#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
383
391
  "#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
392
+ if corrupted_blobs > 0
393
+ PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
394
+ "was recycled."
395
+ end
384
396
 
385
397
  @f.flush
386
398
  @f.truncate(new_file_size)
387
399
  @f.flush
388
- @space_list.clear
389
400
 
390
401
  sync
391
402
  end
392
403
 
393
404
  # This method iterates over all entries in the FlatFile and removes the
394
405
  # entry and inserts it again. This is useful to update all entries in
395
- # cased the storage format has changed.
406
+ # case the storage format has changed.
396
407
  def refresh
397
408
  # This iteration might look scary as we iterate over the entries while
398
409
  # while we are rearranging them. Re-inserted items may be inserted
@@ -400,23 +411,34 @@ module PEROBS
400
411
  # inserted after the current entry and will be re-read again unless they
401
412
  # are inserted after the original file end.
402
413
  file_size = @f.size
403
- PEROBS.log.info "Refreshing the DB..."
404
- t = Time.now
405
- each_blob_header do |pos, header|
406
- if header.is_valid?
407
- buf = read_obj_by_address(pos, header.id)
408
- delete_obj_by_address(pos, header.id)
409
- write_obj_by_id(header.id, buf)
410
- end
411
414
 
412
- # Some re-inserted blobs may be inserted after the original file end.
413
- # No need to process those blobs again.
414
- break if pos >= file_size
415
+ # We don't update the index and the space list during this operation as
416
+ # we defragmentize the blob file at the end. We'll end the operation
417
+ # with an empty space list.
418
+ clear_index_files
419
+
420
+ @progressmeter.start('Converting objects to new storage format',
421
+ @f.size) do |pm|
422
+ each_blob_header do |header|
423
+ if header.is_valid?
424
+ buf = read_obj_by_address(header.addr, header.id)
425
+ delete_obj_by_address(header.addr, header.id)
426
+ write_obj_by_id(header.id, buf)
427
+ end
428
+
429
+ # Some re-inserted blobs may be inserted after the original file end.
430
+ # No need to process those blobs again.
431
+ break if header.addr >= file_size
432
+
433
+ pm.update(header.addr)
434
+ end
415
435
  end
416
- PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
417
436
 
418
437
  # Reclaim the space saved by compressing entries.
419
438
  defragmentize
439
+
440
+ # Recreate the index file and create an empty space list.
441
+ regenerate_index_and_spaces
420
442
  end
421
443
 
422
444
  # Check (and repair) the FlatFile.
@@ -433,95 +455,117 @@ module PEROBS
433
455
  # First check the database blob file. Each entry should be readable and
434
456
  # correct and all IDs must be unique. We use a shadow index to keep
435
457
  # track of the already found IDs.
436
- new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER)
458
+ new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
459
+ @progressmeter)
437
460
  new_index.erase
438
461
  new_index.open
439
462
 
440
- each_blob_header do |pos, header|
441
- if header.is_valid?
442
- # We have a non-deleted entry.
443
- begin
444
- @f.seek(pos + FlatFileBlobHeader::LENGTH)
445
- buf = @f.read(header.length)
446
- if buf.bytesize != header.length
447
- PEROBS.log.error "Premature end of file in blob with ID " +
448
- "#{header.id}."
449
- discard_damaged_blob(header) if repair
450
- errors += 1
451
- next
452
- end
463
+ corrupted_blobs = 0
464
+ @progressmeter.start('Checking blobs file', @f.size) do |pm|
465
+ corrupted_blobs = each_blob_header do |header|
466
+ if header.is_valid?
467
+ # We have a non-deleted entry.
468
+ begin
469
+ @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
470
+ buf = @f.read(header.length)
471
+ if buf.bytesize != header.length
472
+ PEROBS.log.error "Premature end of file in blob with ID " +
473
+ "#{header.id}."
474
+ discard_damaged_blob(header) if repair
475
+ errors += 1
476
+ next
477
+ end
453
478
 
454
- # Uncompress the data if the compression bit is set in the mark
455
- # byte.
456
- if header.is_compressed?
457
- begin
458
- buf = Zlib.inflate(buf)
459
- rescue Zlib::BufError, Zlib::DataError
460
- PEROBS.log.error "Corrupted compressed block with ID " +
461
- "#{header.id} found."
479
+ # Uncompress the data if the compression bit is set in the mark
480
+ # byte.
481
+ if header.is_compressed?
482
+ begin
483
+ buf = Zlib.inflate(buf)
484
+ rescue Zlib::BufError, Zlib::DataError
485
+ PEROBS.log.error "Corrupted compressed block with ID " +
486
+ "#{header.id} found."
487
+ discard_damaged_blob(header) if repair
488
+ errors += 1
489
+ next
490
+ end
491
+ end
492
+
493
+ if header.crc && checksum(buf) != header.crc
494
+ PEROBS.log.error "Checksum failure while checking blob " +
495
+ "with ID #{header.id}"
462
496
  discard_damaged_blob(header) if repair
463
497
  errors += 1
464
498
  next
465
499
  end
500
+ rescue IOError => e
501
+ PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
502
+ e.message
466
503
  end
467
504
 
468
- if header.crc && checksum(buf) != header.crc
469
- PEROBS.log.error "Checksum failure while checking blob " +
470
- "with ID #{header.id}"
471
- discard_damaged_blob(header) if repair
505
+ # Check if the ID has already been found in the file.
506
+ if (previous_address = new_index.get(header.id))
507
+ PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
508
+ "Addresses: #{previous_address}, #{header.addr}"
472
509
  errors += 1
473
- next
474
- end
475
- rescue IOError => e
476
- PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
477
- e.message
478
- end
479
-
480
- # Check if the ID has already been found in the file.
481
- if (previous_address = new_index.get(header.id))
482
- PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
483
- "Addresses: #{previous_address}, #{pos}"
484
- previous_header = FlatFileBlobHeader.read_at(@f, previous_address,
485
- header.id)
486
- if repair
487
- # We have two blobs with the same ID and we must discard one of
488
- # them.
489
- if header.is_outdated?
490
- discard_damaged_blob(header)
491
- elsif previous_header.is_outdated?
492
- discard_damaged_blob(previous_header)
493
- else
494
- PEROBS.log.error "None of the blobs with same ID have " +
495
- "the outdated flag set. Deleting the smaller one."
496
- discard_damaged_blob(header.length < previous_header.length ?
497
- header : previous_header)
510
+ previous_header = FlatFileBlobHeader.read(@f, previous_address,
511
+ header.id)
512
+ if repair
513
+ # We have two blobs with the same ID and we must discard one of
514
+ # them.
515
+ if header.is_outdated?
516
+ discard_damaged_blob(header)
517
+ elsif previous_header.is_outdated?
518
+ discard_damaged_blob(previous_header)
519
+ else
520
+ PEROBS.log.error "None of the blobs with same ID have " +
521
+ "the outdated flag set. Deleting the smaller one."
522
+ errors += 1
523
+ discard_damaged_blob(header.length < previous_header.length ?
524
+ header : previous_header)
525
+ end
526
+ next
498
527
  end
499
- next
528
+ else
529
+ # ID is unique so far. Add it to the shadow index.
530
+ new_index.insert(header.id, header.addr)
500
531
  end
501
- else
502
- # ID is unique so far. Add it to the shadow index.
503
- new_index.insert(header.id, pos)
532
+
504
533
  end
505
534
 
535
+ pm.update(header.addr)
506
536
  end
537
+
538
+ errors += corrupted_blobs
507
539
  end
540
+
508
541
  # We no longer need the new index.
509
542
  new_index.close
510
543
  new_index.erase
511
544
 
512
- # Now we check the index data. It must be correct and the entries must
513
- # match the blob file. All entries in the index must be in the blob file
514
- # and vise versa.
515
- begin
516
- index_ok = @index.check do |id, address|
517
- has_id_at?(id, address)
518
- end
519
- unless index_ok && @space_list.check(self) && cross_check_entries
545
+ if repair && corrupted_blobs > 0
546
+ erase_index_files
547
+ defragmentize
548
+ regenerate_index_and_spaces
549
+ else
550
+ # Now we check the index data. It must be correct and the entries must
551
+ # match the blob file. All entries in the index must be in the blob file
552
+ # and vise versa.
553
+ begin
554
+ index_ok = @index.check do |id, address|
555
+ has_id_at?(id, address)
556
+ end
557
+ x_check_errs = 0
558
+ space_check_ok = true
559
+ unless index_ok && (space_check_ok = @space_list.check(self)) &&
560
+ (x_check_errs = cross_check_entries) == 0
561
+ errors += 1 unless index_ok && space_check_ok
562
+ errors += x_check_errs
563
+ regenerate_index_and_spaces if repair
564
+ end
565
+ rescue PEROBS::FatalError
566
+ errors += 1
520
567
  regenerate_index_and_spaces if repair
521
568
  end
522
- rescue PEROBS::FatalError
523
- errors += 1
524
- regenerate_index_and_spaces if repair
525
569
  end
526
570
 
527
571
  sync if repair
@@ -535,22 +579,32 @@ module PEROBS
535
579
  # regenerates them from the FlatFile.
536
580
  def regenerate_index_and_spaces
537
581
  PEROBS.log.warn "Re-generating FlatFileDB index and space files"
582
+ @index.open unless @index.is_open?
538
583
  @index.clear
584
+ @space_list.open unless @space_list.is_open?
539
585
  @space_list.clear
540
586
 
541
- each_blob_header do |pos, header|
542
- if header.is_valid?
543
- if (duplicate_pos = @index.get(header.id))
544
- PEROBS.log.error "FlatFile contains multiple blobs for ID " +
545
- "#{header.id}. First blob is at address #{duplicate_pos}. " +
546
- "Other blob found at address #{pos}."
547
- @space_list.add_space(pos, header.length) if header.length > 0
548
- discard_damaged_blob(header)
587
+ @progressmeter.start('Re-generating database index', @f.size) do |pm|
588
+ each_blob_header do |header|
589
+ if header.is_valid?
590
+ if (duplicate_pos = @index.get(header.id))
591
+ PEROBS.log.error "FlatFile contains multiple blobs for ID " +
592
+ "#{header.id}. First blob is at address #{duplicate_pos}. " +
593
+ "Other blob found at address #{header.addr}."
594
+ if header.length > 0
595
+ @space_list.add_space(header.addr, header.length)
596
+ end
597
+ discard_damaged_blob(header)
598
+ else
599
+ @index.insert(header.id, header.addr)
600
+ end
549
601
  else
550
- @index.insert(header.id, pos)
602
+ if header.length > 0
603
+ @space_list.add_space(header.addr, header.length)
604
+ end
551
605
  end
552
- else
553
- @space_list.add_space(pos, header.length) if header.length > 0
606
+
607
+ pm.update(header.addr)
554
608
  end
555
609
  end
556
610
 
@@ -558,19 +612,19 @@ module PEROBS
558
612
  end
559
613
 
560
614
  def has_space?(address, size)
561
- header = FlatFileBlobHeader.read_at(@f, address)
615
+ header = FlatFileBlobHeader.read(@f, address)
562
616
  !header.is_valid? && header.length == size
563
617
  end
564
618
 
565
619
  def has_id_at?(id, address)
566
- header = FlatFileBlobHeader.read_at(@f, address)
620
+ header = FlatFileBlobHeader.read(@f, address)
567
621
  header.is_valid? && header.id == id
568
622
  end
569
623
 
570
624
  def inspect
571
625
  s = '['
572
- each_blob_header do |pos, header|
573
- s << "{ :pos => #{pos}, :flags => #{header.flags}, " +
626
+ each_blob_header do |header|
627
+ s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
574
628
  ":length => #{header.length}, :id => #{header.id}, " +
575
629
  ":crc => #{header.crc}"
576
630
  if header.is_valid?
@@ -581,21 +635,68 @@ module PEROBS
581
635
  s + ']'
582
636
  end
583
637
 
638
+ def FlatFile::insert_header_checksums(db_dir)
639
+ old_file_name = File.join(db_dir, 'database.blobs')
640
+ new_file_name = File.join(db_dir, 'database_v4.blobs')
641
+ bak_file_name = File.join(db_dir, 'database_v3.blobs')
642
+
643
+ old_file = File.open(old_file_name, 'rb')
644
+ new_file = File.open(new_file_name, 'wb')
645
+
646
+ entries = 0
647
+ while (buf = old_file.read(21))
648
+ flags, length, id, crc = *buf.unpack('CQQL')
649
+ blob_data = old_file.read(length)
650
+
651
+ # Some basic sanity checking to ensure all reserved bits are 0. Older
652
+ # versions of PEROBS used to set bit 1 despite it being reserved now.
653
+ unless flags & 0xF0 == 0
654
+ PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
655
+ "flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
656
+ end
657
+
658
+ # Check if the blob is valid and current.
659
+ if flags & 0x1 == 1 && flags & 0x8 == 0
660
+ # Make sure the bit 1 is not set anymore.
661
+ flags = flags & 0x05
662
+ header_str = [ flags, length, id, crc ].pack('CQQL')
663
+ header_crc = Zlib.crc32(header_str, 0)
664
+ header_str += [ header_crc ].pack('L')
665
+
666
+ new_file.write(header_str + blob_data)
667
+ entries += 1
668
+ end
669
+ end
670
+ PEROBS.log.info "Header checksum added to #{entries} entries"
671
+
672
+ old_file.close
673
+ new_file.close
674
+
675
+ File.rename(old_file_name, bak_file_name)
676
+ File.rename(new_file_name, old_file_name)
677
+ end
678
+
584
679
  private
585
680
 
586
681
  def each_blob_header(&block)
587
- pos = 0
682
+ corrupted_blobs = 0
683
+
588
684
  begin
589
685
  @f.seek(0)
590
686
  while (header = FlatFileBlobHeader.read(@f))
591
- yield(pos, header)
687
+ if header.corruption_start
688
+ corrupted_blobs += 1
689
+ end
690
+
691
+ yield(header)
592
692
 
593
- pos += FlatFileBlobHeader::LENGTH + header.length
594
- @f.seek(pos)
693
+ @f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
595
694
  end
596
695
  rescue IOError => e
597
696
  PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
598
697
  end
698
+
699
+ corrupted_blobs
599
700
  end
600
701
 
601
702
  def find_free_blob(bytes)
@@ -625,26 +726,34 @@ module PEROBS
625
726
  def cross_check_entries
626
727
  errors = 0
627
728
 
628
- each_blob_header do |pos, header|
629
- if !header.is_valid?
630
- if header.length > 0
631
- unless @space_list.has_space?(pos, header.length)
632
- PEROBS.log.error "FlatFile has free space " +
633
- "(addr: #{pos}, len: #{header.length}) that is not in " +
634
- "FreeSpaceManager"
635
- errors += 1
729
+ @progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
730
+ each_blob_header do |header|
731
+ if !header.is_valid?
732
+ if header.length > 0
733
+ unless @space_list.has_space?(header.addr, header.length)
734
+ PEROBS.log.error "FlatFile has free space " +
735
+ "(addr: #{header.addr}, len: #{header.length}) that is " +
736
+ "not in FreeSpaceManager"
737
+ errors += 1
738
+ end
739
+ end
740
+ else
741
+ if (index_address = @index.get(header.id)).nil?
742
+ PEROBS.log.error "FlatFile blob at address #{header.addr} " +
743
+ "is not listed in the index"
744
+ errors +=1
745
+ elsif index_address != header.addr
746
+ PEROBS.log.error "FlatFile blob at address #{header.addr} " +
747
+ "is listed in index with address #{index_address}"
748
+ errors += 1
636
749
  end
637
750
  end
638
- else
639
- unless @index.get(header.id) == pos
640
- PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
641
- "in index with address #{@index.get(header.id)}"
642
- errors += 1
643
- end
751
+
752
+ pm.update(header.addr)
644
753
  end
645
754
  end
646
755
 
647
- errors == 0
756
+ errors
648
757
  end
649
758
 
650
759
  def discard_damaged_blob(header)
@@ -653,6 +762,57 @@ module PEROBS
653
762
  header.clear_flags
654
763
  end
655
764
 
765
+ def open_index_files(abort_on_missing_files = false)
766
+ begin
767
+ @index.open(abort_on_missing_files)
768
+ @space_list.open
769
+ rescue FatalError
770
+ # Ensure that the index is really closed.
771
+ @index.close
772
+ # Erase it completely
773
+ @index.erase
774
+ # Then create it again.
775
+ @index.open
776
+
777
+ # Ensure that the spaces list is really closed.
778
+ @space_list.close
779
+ # Erase it completely
780
+ @space_list.erase
781
+ # Then create it again
782
+ @space_list.open
783
+
784
+ regenerate_index_and_spaces
785
+ end
786
+ end
787
+
788
+ def erase_index_files
789
+ # Ensure that the index is really closed.
790
+ @index.close
791
+ # Erase it completely
792
+ @index.erase
793
+
794
+ # Ensure that the spaces list is really closed.
795
+ @space_list.close
796
+ # Erase it completely
797
+ @space_list.erase
798
+ end
799
+
800
+ def clear_index_files
801
+ # Ensure that the index is really closed.
802
+ @index.close
803
+ # Erase it completely
804
+ @index.erase
805
+ # Then create it again.
806
+ @index.open
807
+
808
+ # Ensure that the spaces list is really closed.
809
+ @space_list.close
810
+ # Erase it completely
811
+ @space_list.erase
812
+ # Then create it again
813
+ @space_list.open
814
+ end
815
+
656
816
  end
657
817
 
658
818
  end