perobs 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,54 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = FNV_Hash_1a_64.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2019 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ # This is an implementation of the Fowler Noll Vo hashing algorithm in the
31
+ # 1a variant for 64 bit hash values.
32
+ # https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
33
+ class FNV_Hash_1a_64
34
+
35
+ @@OFFSET = 14695981039346656037
36
+ @@PRIME = 1099511628211
37
+ @@MASK = 2**64 - 1
38
+
39
+ def self.digest(item)
40
+ hash = @@OFFSET
41
+
42
+ item.to_s.each_byte do |byte|
43
+ hash ^= byte
44
+ hash *= @@PRIME
45
+ hash &= @@MASK
46
+ end
47
+
48
+ hash
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = FlatFile.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -31,6 +31,7 @@ require 'perobs/Log'
31
31
  require 'perobs/FlatFileBlobHeader'
32
32
  require 'perobs/BTree'
33
33
  require 'perobs/SpaceTree'
34
+ require 'perobs/IDList'
34
35
 
35
36
  module PEROBS
36
37
 
@@ -44,12 +45,13 @@ module PEROBS
44
45
 
45
46
  # Create a new FlatFile object for a database in the given path.
46
47
  # @param dir [String] Directory path for the data base file
47
- def initialize(dir)
48
+ def initialize(dir, progressmeter)
48
49
  @db_dir = dir
50
+ @progressmeter = progressmeter
49
51
  @f = nil
50
- @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER)
51
- @marks = BTree.new(@db_dir, 'marks', INDEX_BTREE_ORDER)
52
- @space_list = SpaceTree.new(@db_dir)
52
+ @marks = nil
53
+ @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
54
+ @space_list = SpaceTree.new(@db_dir, @progressmeter)
53
55
  end
54
56
 
55
57
  # Open the flat file for reading and writing.
@@ -74,33 +76,19 @@ module PEROBS
74
76
  end
75
77
  @f.sync = true
76
78
 
77
- begin
78
- @index.open(!new_db_created)
79
- @space_list.open
80
- rescue FatalError
81
- # Ensure that the index is really closed.
82
- @index.close
83
- # Erase it completely
84
- @index.erase
85
- # Then create it again.
86
- @index.open
87
-
88
- # Ensure that the spaces list is really closed.
89
- @space_list.close
90
- # Erase it completely
91
- @space_list.erase
92
- # Then create it again
93
- @space_list.open
94
-
95
- regenerate_index_and_spaces
96
- end
79
+ open_index_files(!new_db_created)
97
80
  end
98
81
 
99
82
  # Close the flat file. This method must be called to ensure that all data
100
83
  # is really written into the filesystem.
101
84
  def close
102
- @space_list.close
103
- @index.close
85
+ @space_list.close if @space_list.is_open?
86
+ @index.close if @index.is_open?
87
+
88
+ if @marks
89
+ @marks.erase
90
+ @marks = nil
91
+ end
104
92
 
105
93
  if @f
106
94
  @f.flush
@@ -139,29 +127,36 @@ module PEROBS
139
127
  # @param addr [Integer] Address of the blob to delete
140
128
  # @param id [Integer] ID of the blob to delete
141
129
  def delete_obj_by_address(addr, id)
142
- @index.remove(id)
143
- header = FlatFileBlobHeader.read_at(@f, addr, id)
130
+ @index.remove(id) if @index.is_open?
131
+ header = FlatFileBlobHeader.read(@f, addr, id)
144
132
  header.clear_flags
145
- @space_list.add_space(addr, header.length)
133
+ @space_list.add_space(addr, header.length) if @space_list.is_open?
146
134
  end
147
135
 
148
136
  # Delete all unmarked objects.
149
137
  def delete_unmarked_objects
150
- PEROBS.log.info "Deleting unmarked objects..."
151
- t = Time.now
138
+ # We don't update the index and the space list during this operation as
139
+ # we defragmentize the blob file at the end. We'll end the operation
140
+ # with an empty space list.
141
+ clear_index_files
142
+
143
+ deleted_objects_count = 0
144
+ @progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
145
+ each_blob_header do |header|
146
+ if header.is_valid? && !@marks.include?(header.id)
147
+ delete_obj_by_address(header.addr, header.id)
148
+ deleted_objects_count += 1
149
+ end
152
150
 
153
- deleted_ids = []
154
- each_blob_header do |pos, header|
155
- if header.is_valid? && @marks.get(header.id).nil?
156
- delete_obj_by_address(pos, header.id)
157
- deleted_ids << header.id
151
+ pm.update(header.addr)
158
152
  end
159
153
  end
160
154
  defragmentize
161
155
 
162
- PEROBS.log.info "#{deleted_ids.length} unmarked objects deleted " +
163
- "in #{Time.now - t} seconds"
164
- deleted_ids
156
+ # Update the index file and create a new, empty space list.
157
+ regenerate_index_and_spaces
158
+
159
+ deleted_objects_count
165
160
  end
166
161
 
167
162
  # Write the given object into the file. This method never uses in-place
@@ -177,7 +172,7 @@ module PEROBS
177
172
  # operation is aborted or interrupted we ensure that we either have the
178
173
  # old or the new version available.
179
174
  if (old_addr = find_obj_addr_by_id(id))
180
- old_header = FlatFileBlobHeader.read_at(@f, old_addr)
175
+ old_header = FlatFileBlobHeader.read(@f, old_addr)
181
176
  old_header.set_outdated_flag
182
177
  end
183
178
 
@@ -197,7 +192,7 @@ module PEROBS
197
192
  begin
198
193
  if length != -1
199
194
  # Just a safeguard so we don't overwrite current data.
200
- header = FlatFileBlobHeader.read_at(@f, addr)
195
+ header = FlatFileBlobHeader.read(@f, addr)
201
196
  if header.length != length
202
197
  PEROBS.log.fatal "Length in free list (#{length}) and header " +
203
198
  "(#{header.length}) for address #{addr} don't match."
@@ -229,18 +224,22 @@ module PEROBS
229
224
  FlatFileBlobHeader.new(@f, space_address, 0, space_length,
230
225
  0, 0).write
231
226
  # Register the new space with the space list.
232
- @space_list.add_space(space_address, space_length) if space_length > 0
227
+ if @space_list.is_open? && space_length > 0
228
+ @space_list.add_space(space_address, space_length)
229
+ end
233
230
  end
234
231
 
235
232
  # Once the blob has been written we can update the index as well.
236
- @index.insert(id, addr)
233
+ @index.insert(id, addr) if @index.is_open?
237
234
 
238
235
  if old_addr
239
236
  # If we had an existing object stored for the ID we have to mark
240
237
  # this entry as deleted now.
241
238
  old_header.clear_flags
242
239
  # And register the newly freed space with the space list.
243
- @space_list.add_space(old_addr, old_header.length)
240
+ if @space_list.is_open?
241
+ @space_list.add_space(old_addr, old_header.length)
242
+ end
244
243
  else
245
244
  @f.flush
246
245
  end
@@ -270,21 +269,17 @@ module PEROBS
270
269
  nil
271
270
  end
272
271
 
273
- def search_object(id)
274
- each_blob_header do |pos, header|
275
- return read_obj_by_address(pos, id)
276
- end
277
-
278
- nil
272
+ # @return [Integer] Number of items stored in the DB.
273
+ def item_counter
274
+ @index.entries_count
279
275
  end
280
276
 
281
-
282
277
  # Read the object at the specified address.
283
278
  # @param addr [Integer] Offset in the flat file
284
279
  # @param id [Integer] ID of the data blob
285
280
  # @return [String] Raw object data
286
281
  def read_obj_by_address(addr, id)
287
- header = FlatFileBlobHeader.read_at(@f, addr, id)
282
+ header = FlatFileBlobHeader.read(@f, addr, id)
288
283
  if header.id != id
289
284
  PEROBS.log.fatal "Database index corrupted: Index for object " +
290
285
  "#{id} points to object with ID #{header.id}"
@@ -319,19 +314,22 @@ module PEROBS
319
314
  # Mark the object with the given ID.
320
315
  # @param id [Integer] ID of the object
321
316
  def mark_obj_by_id(id)
322
- @marks.insert(id, 0)
317
+ @marks.insert(id)
323
318
  end
324
319
 
325
320
  # Return true if the object with the given ID is marked, false otherwise.
326
321
  # @param id [Integer] ID of the object
327
322
  def is_marked_by_id?(id)
328
- !@marks.get(id).nil?
323
+ @marks.include?(id)
329
324
  end
330
325
 
331
326
  # Clear alls marks.
332
327
  def clear_all_marks
333
- @marks.erase
334
- @marks.open
328
+ if @marks
329
+ @marks.clear
330
+ else
331
+ @marks = IDList.new(@db_dir, 'marks', 8)
332
+ end
335
333
  end
336
334
 
337
335
  # Eliminate all the holes in the file. This is an in-place
@@ -340,59 +338,72 @@ module PEROBS
340
338
  distance = 0
341
339
  new_file_size = 0
342
340
  deleted_blobs = 0
341
+ corrupted_blobs = 0
343
342
  valid_blobs = 0
344
- t = Time.now
345
- PEROBS.log.info "Defragmenting FlatFile"
343
+
346
344
  # Iterate over all entries.
347
- each_blob_header do |pos, header|
348
- # Total size of the current entry
349
- entry_bytes = FlatFileBlobHeader::LENGTH + header.length
350
- if header.is_valid?
351
- # We have found a valid entry.
352
- valid_blobs += 1
353
- if distance > 0
354
- begin
355
- # Read current entry into a buffer
356
- @f.seek(pos)
357
- buf = @f.read(entry_bytes)
358
- # Write the buffer right after the end of the previous entry.
359
- @f.seek(pos - distance)
360
- @f.write(buf)
361
- # Update the index with the new position
362
- @index.insert(header.id, pos - distance)
363
- # Mark the space between the relocated current entry and the
364
- # next valid entry as deleted space.
365
- FlatFileBlobHeader.new(@f, @f.pos, 0,
366
- distance - FlatFileBlobHeader::LENGTH,
367
- 0, 0).write
368
- @f.flush
369
- rescue IOError => e
370
- PEROBS.log.fatal "Error while moving blob for ID #{header.id}: " +
371
- e.message
345
+ @progressmeter.start('Defragmentizing blobs file', @f.size) do |pm|
346
+ each_blob_header do |header|
347
+ # If we have stumbled over a corrupted blob we treat it similar to a
348
+ # deleted blob and reuse the space.
349
+ if header.corruption_start
350
+ distance += header.addr - header.corruption_start
351
+ corrupted_blobs += 1
352
+ end
353
+
354
+ # Total size of the current entry
355
+ entry_bytes = FlatFileBlobHeader::LENGTH + header.length
356
+ if header.is_valid?
357
+ # We have found a valid entry.
358
+ valid_blobs += 1
359
+ if distance > 0
360
+ begin
361
+ # Read current entry into a buffer
362
+ @f.seek(header.addr)
363
+ buf = @f.read(entry_bytes)
364
+ # Write the buffer right after the end of the previous entry.
365
+ @f.seek(header.addr - distance)
366
+ @f.write(buf)
367
+ # Mark the space between the relocated current entry and the
368
+ # next valid entry as deleted space.
369
+ FlatFileBlobHeader.new(@f, @f.pos, 0,
370
+ distance - FlatFileBlobHeader::LENGTH,
371
+ 0, 0).write
372
+ @f.flush
373
+ rescue IOError => e
374
+ PEROBS.log.fatal "Error while moving blob for ID " +
375
+ "#{header.id}: #{e.message}"
376
+ end
372
377
  end
378
+ new_file_size = header.addr - distance +
379
+ FlatFileBlobHeader::LENGTH + header.length
380
+ else
381
+ deleted_blobs += 1
382
+ distance += entry_bytes
373
383
  end
374
- new_file_size = pos + FlatFileBlobHeader::LENGTH + header.length
375
- else
376
- deleted_blobs += 1
377
- distance += entry_bytes
384
+
385
+ pm.update(header.addr)
378
386
  end
379
387
  end
380
- PEROBS.log.info "FlatFile defragmented in #{Time.now - t} seconds"
388
+
381
389
  PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
382
390
  "#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
383
391
  "#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
392
+ if corrupted_blobs > 0
393
+ PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
394
+ "was recycled."
395
+ end
384
396
 
385
397
  @f.flush
386
398
  @f.truncate(new_file_size)
387
399
  @f.flush
388
- @space_list.clear
389
400
 
390
401
  sync
391
402
  end
392
403
 
393
404
  # This method iterates over all entries in the FlatFile and removes the
394
405
  # entry and inserts it again. This is useful to update all entries in
395
- # cased the storage format has changed.
406
+ # case the storage format has changed.
396
407
  def refresh
397
408
  # This iteration might look scary as we iterate over the entries while
398
409
  # while we are rearranging them. Re-inserted items may be inserted
@@ -400,23 +411,34 @@ module PEROBS
400
411
  # inserted after the current entry and will be re-read again unless they
401
412
  # are inserted after the original file end.
402
413
  file_size = @f.size
403
- PEROBS.log.info "Refreshing the DB..."
404
- t = Time.now
405
- each_blob_header do |pos, header|
406
- if header.is_valid?
407
- buf = read_obj_by_address(pos, header.id)
408
- delete_obj_by_address(pos, header.id)
409
- write_obj_by_id(header.id, buf)
410
- end
411
414
 
412
- # Some re-inserted blobs may be inserted after the original file end.
413
- # No need to process those blobs again.
414
- break if pos >= file_size
415
+ # We don't update the index and the space list during this operation as
416
+ # we defragmentize the blob file at the end. We'll end the operation
417
+ # with an empty space list.
418
+ clear_index_files
419
+
420
+ @progressmeter.start('Converting objects to new storage format',
421
+ @f.size) do |pm|
422
+ each_blob_header do |header|
423
+ if header.is_valid?
424
+ buf = read_obj_by_address(header.addr, header.id)
425
+ delete_obj_by_address(header.addr, header.id)
426
+ write_obj_by_id(header.id, buf)
427
+ end
428
+
429
+ # Some re-inserted blobs may be inserted after the original file end.
430
+ # No need to process those blobs again.
431
+ break if header.addr >= file_size
432
+
433
+ pm.update(header.addr)
434
+ end
415
435
  end
416
- PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
417
436
 
418
437
  # Reclaim the space saved by compressing entries.
419
438
  defragmentize
439
+
440
+ # Recreate the index file and create an empty space list.
441
+ regenerate_index_and_spaces
420
442
  end
421
443
 
422
444
  # Check (and repair) the FlatFile.
@@ -433,95 +455,117 @@ module PEROBS
433
455
  # First check the database blob file. Each entry should be readable and
434
456
  # correct and all IDs must be unique. We use a shadow index to keep
435
457
  # track of the already found IDs.
436
- new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER)
458
+ new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
459
+ @progressmeter)
437
460
  new_index.erase
438
461
  new_index.open
439
462
 
440
- each_blob_header do |pos, header|
441
- if header.is_valid?
442
- # We have a non-deleted entry.
443
- begin
444
- @f.seek(pos + FlatFileBlobHeader::LENGTH)
445
- buf = @f.read(header.length)
446
- if buf.bytesize != header.length
447
- PEROBS.log.error "Premature end of file in blob with ID " +
448
- "#{header.id}."
449
- discard_damaged_blob(header) if repair
450
- errors += 1
451
- next
452
- end
463
+ corrupted_blobs = 0
464
+ @progressmeter.start('Checking blobs file', @f.size) do |pm|
465
+ corrupted_blobs = each_blob_header do |header|
466
+ if header.is_valid?
467
+ # We have a non-deleted entry.
468
+ begin
469
+ @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
470
+ buf = @f.read(header.length)
471
+ if buf.bytesize != header.length
472
+ PEROBS.log.error "Premature end of file in blob with ID " +
473
+ "#{header.id}."
474
+ discard_damaged_blob(header) if repair
475
+ errors += 1
476
+ next
477
+ end
453
478
 
454
- # Uncompress the data if the compression bit is set in the mark
455
- # byte.
456
- if header.is_compressed?
457
- begin
458
- buf = Zlib.inflate(buf)
459
- rescue Zlib::BufError, Zlib::DataError
460
- PEROBS.log.error "Corrupted compressed block with ID " +
461
- "#{header.id} found."
479
+ # Uncompress the data if the compression bit is set in the mark
480
+ # byte.
481
+ if header.is_compressed?
482
+ begin
483
+ buf = Zlib.inflate(buf)
484
+ rescue Zlib::BufError, Zlib::DataError
485
+ PEROBS.log.error "Corrupted compressed block with ID " +
486
+ "#{header.id} found."
487
+ discard_damaged_blob(header) if repair
488
+ errors += 1
489
+ next
490
+ end
491
+ end
492
+
493
+ if header.crc && checksum(buf) != header.crc
494
+ PEROBS.log.error "Checksum failure while checking blob " +
495
+ "with ID #{header.id}"
462
496
  discard_damaged_blob(header) if repair
463
497
  errors += 1
464
498
  next
465
499
  end
500
+ rescue IOError => e
501
+ PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
502
+ e.message
466
503
  end
467
504
 
468
- if header.crc && checksum(buf) != header.crc
469
- PEROBS.log.error "Checksum failure while checking blob " +
470
- "with ID #{header.id}"
471
- discard_damaged_blob(header) if repair
505
+ # Check if the ID has already been found in the file.
506
+ if (previous_address = new_index.get(header.id))
507
+ PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
508
+ "Addresses: #{previous_address}, #{header.addr}"
472
509
  errors += 1
473
- next
474
- end
475
- rescue IOError => e
476
- PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
477
- e.message
478
- end
479
-
480
- # Check if the ID has already been found in the file.
481
- if (previous_address = new_index.get(header.id))
482
- PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
483
- "Addresses: #{previous_address}, #{pos}"
484
- previous_header = FlatFileBlobHeader.read_at(@f, previous_address,
485
- header.id)
486
- if repair
487
- # We have two blobs with the same ID and we must discard one of
488
- # them.
489
- if header.is_outdated?
490
- discard_damaged_blob(header)
491
- elsif previous_header.is_outdated?
492
- discard_damaged_blob(previous_header)
493
- else
494
- PEROBS.log.error "None of the blobs with same ID have " +
495
- "the outdated flag set. Deleting the smaller one."
496
- discard_damaged_blob(header.length < previous_header.length ?
497
- header : previous_header)
510
+ previous_header = FlatFileBlobHeader.read(@f, previous_address,
511
+ header.id)
512
+ if repair
513
+ # We have two blobs with the same ID and we must discard one of
514
+ # them.
515
+ if header.is_outdated?
516
+ discard_damaged_blob(header)
517
+ elsif previous_header.is_outdated?
518
+ discard_damaged_blob(previous_header)
519
+ else
520
+ PEROBS.log.error "None of the blobs with same ID have " +
521
+ "the outdated flag set. Deleting the smaller one."
522
+ errors += 1
523
+ discard_damaged_blob(header.length < previous_header.length ?
524
+ header : previous_header)
525
+ end
526
+ next
498
527
  end
499
- next
528
+ else
529
+ # ID is unique so far. Add it to the shadow index.
530
+ new_index.insert(header.id, header.addr)
500
531
  end
501
- else
502
- # ID is unique so far. Add it to the shadow index.
503
- new_index.insert(header.id, pos)
532
+
504
533
  end
505
534
 
535
+ pm.update(header.addr)
506
536
  end
537
+
538
+ errors += corrupted_blobs
507
539
  end
540
+
508
541
  # We no longer need the new index.
509
542
  new_index.close
510
543
  new_index.erase
511
544
 
512
- # Now we check the index data. It must be correct and the entries must
513
- # match the blob file. All entries in the index must be in the blob file
514
- # and vise versa.
515
- begin
516
- index_ok = @index.check do |id, address|
517
- has_id_at?(id, address)
518
- end
519
- unless index_ok && @space_list.check(self) && cross_check_entries
545
+ if repair && corrupted_blobs > 0
546
+ erase_index_files
547
+ defragmentize
548
+ regenerate_index_and_spaces
549
+ else
550
+ # Now we check the index data. It must be correct and the entries must
551
+ # match the blob file. All entries in the index must be in the blob file
552
+ # and vise versa.
553
+ begin
554
+ index_ok = @index.check do |id, address|
555
+ has_id_at?(id, address)
556
+ end
557
+ x_check_errs = 0
558
+ space_check_ok = true
559
+ unless index_ok && (space_check_ok = @space_list.check(self)) &&
560
+ (x_check_errs = cross_check_entries) == 0
561
+ errors += 1 unless index_ok && space_check_ok
562
+ errors += x_check_errs
563
+ regenerate_index_and_spaces if repair
564
+ end
565
+ rescue PEROBS::FatalError
566
+ errors += 1
520
567
  regenerate_index_and_spaces if repair
521
568
  end
522
- rescue PEROBS::FatalError
523
- errors += 1
524
- regenerate_index_and_spaces if repair
525
569
  end
526
570
 
527
571
  sync if repair
@@ -535,22 +579,32 @@ module PEROBS
535
579
  # regenerates them from the FlatFile.
536
580
  def regenerate_index_and_spaces
537
581
  PEROBS.log.warn "Re-generating FlatFileDB index and space files"
582
+ @index.open unless @index.is_open?
538
583
  @index.clear
584
+ @space_list.open unless @space_list.is_open?
539
585
  @space_list.clear
540
586
 
541
- each_blob_header do |pos, header|
542
- if header.is_valid?
543
- if (duplicate_pos = @index.get(header.id))
544
- PEROBS.log.error "FlatFile contains multiple blobs for ID " +
545
- "#{header.id}. First blob is at address #{duplicate_pos}. " +
546
- "Other blob found at address #{pos}."
547
- @space_list.add_space(pos, header.length) if header.length > 0
548
- discard_damaged_blob(header)
587
+ @progressmeter.start('Re-generating database index', @f.size) do |pm|
588
+ each_blob_header do |header|
589
+ if header.is_valid?
590
+ if (duplicate_pos = @index.get(header.id))
591
+ PEROBS.log.error "FlatFile contains multiple blobs for ID " +
592
+ "#{header.id}. First blob is at address #{duplicate_pos}. " +
593
+ "Other blob found at address #{header.addr}."
594
+ if header.length > 0
595
+ @space_list.add_space(header.addr, header.length)
596
+ end
597
+ discard_damaged_blob(header)
598
+ else
599
+ @index.insert(header.id, header.addr)
600
+ end
549
601
  else
550
- @index.insert(header.id, pos)
602
+ if header.length > 0
603
+ @space_list.add_space(header.addr, header.length)
604
+ end
551
605
  end
552
- else
553
- @space_list.add_space(pos, header.length) if header.length > 0
606
+
607
+ pm.update(header.addr)
554
608
  end
555
609
  end
556
610
 
@@ -558,19 +612,19 @@ module PEROBS
558
612
  end
559
613
 
560
614
  def has_space?(address, size)
561
- header = FlatFileBlobHeader.read_at(@f, address)
615
+ header = FlatFileBlobHeader.read(@f, address)
562
616
  !header.is_valid? && header.length == size
563
617
  end
564
618
 
565
619
  def has_id_at?(id, address)
566
- header = FlatFileBlobHeader.read_at(@f, address)
620
+ header = FlatFileBlobHeader.read(@f, address)
567
621
  header.is_valid? && header.id == id
568
622
  end
569
623
 
570
624
  def inspect
571
625
  s = '['
572
- each_blob_header do |pos, header|
573
- s << "{ :pos => #{pos}, :flags => #{header.flags}, " +
626
+ each_blob_header do |header|
627
+ s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
574
628
  ":length => #{header.length}, :id => #{header.id}, " +
575
629
  ":crc => #{header.crc}"
576
630
  if header.is_valid?
@@ -581,21 +635,68 @@ module PEROBS
581
635
  s + ']'
582
636
  end
583
637
 
638
+ def FlatFile::insert_header_checksums(db_dir)
639
+ old_file_name = File.join(db_dir, 'database.blobs')
640
+ new_file_name = File.join(db_dir, 'database_v4.blobs')
641
+ bak_file_name = File.join(db_dir, 'database_v3.blobs')
642
+
643
+ old_file = File.open(old_file_name, 'rb')
644
+ new_file = File.open(new_file_name, 'wb')
645
+
646
+ entries = 0
647
+ while (buf = old_file.read(21))
648
+ flags, length, id, crc = *buf.unpack('CQQL')
649
+ blob_data = old_file.read(length)
650
+
651
+ # Some basic sanity checking to ensure all reserved bits are 0. Older
652
+ # versions of PEROBS used to set bit 1 despite it being reserved now.
653
+ unless flags & 0xF0 == 0
654
+ PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
655
+ "flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
656
+ end
657
+
658
+ # Check if the blob is valid and current.
659
+ if flags & 0x1 == 1 && flags & 0x8 == 0
660
+ # Make sure the bit 1 is not set anymore.
661
+ flags = flags & 0x05
662
+ header_str = [ flags, length, id, crc ].pack('CQQL')
663
+ header_crc = Zlib.crc32(header_str, 0)
664
+ header_str += [ header_crc ].pack('L')
665
+
666
+ new_file.write(header_str + blob_data)
667
+ entries += 1
668
+ end
669
+ end
670
+ PEROBS.log.info "Header checksum added to #{entries} entries"
671
+
672
+ old_file.close
673
+ new_file.close
674
+
675
+ File.rename(old_file_name, bak_file_name)
676
+ File.rename(new_file_name, old_file_name)
677
+ end
678
+
584
679
  private
585
680
 
586
681
  def each_blob_header(&block)
587
- pos = 0
682
+ corrupted_blobs = 0
683
+
588
684
  begin
589
685
  @f.seek(0)
590
686
  while (header = FlatFileBlobHeader.read(@f))
591
- yield(pos, header)
687
+ if header.corruption_start
688
+ corrupted_blobs += 1
689
+ end
690
+
691
+ yield(header)
592
692
 
593
- pos += FlatFileBlobHeader::LENGTH + header.length
594
- @f.seek(pos)
693
+ @f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
595
694
  end
596
695
  rescue IOError => e
597
696
  PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
598
697
  end
698
+
699
+ corrupted_blobs
599
700
  end
600
701
 
601
702
  def find_free_blob(bytes)
@@ -625,26 +726,34 @@ module PEROBS
625
726
  def cross_check_entries
626
727
  errors = 0
627
728
 
628
- each_blob_header do |pos, header|
629
- if !header.is_valid?
630
- if header.length > 0
631
- unless @space_list.has_space?(pos, header.length)
632
- PEROBS.log.error "FlatFile has free space " +
633
- "(addr: #{pos}, len: #{header.length}) that is not in " +
634
- "FreeSpaceManager"
635
- errors += 1
729
+ @progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
730
+ each_blob_header do |header|
731
+ if !header.is_valid?
732
+ if header.length > 0
733
+ unless @space_list.has_space?(header.addr, header.length)
734
+ PEROBS.log.error "FlatFile has free space " +
735
+ "(addr: #{header.addr}, len: #{header.length}) that is " +
736
+ "not in FreeSpaceManager"
737
+ errors += 1
738
+ end
739
+ end
740
+ else
741
+ if (index_address = @index.get(header.id)).nil?
742
+ PEROBS.log.error "FlatFile blob at address #{header.addr} " +
743
+ "is not listed in the index"
744
+ errors +=1
745
+ elsif index_address != header.addr
746
+ PEROBS.log.error "FlatFile blob at address #{header.addr} " +
747
+ "is listed in index with address #{index_address}"
748
+ errors += 1
636
749
  end
637
750
  end
638
- else
639
- unless @index.get(header.id) == pos
640
- PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
641
- "in index with address #{@index.get(header.id)}"
642
- errors += 1
643
- end
751
+
752
+ pm.update(header.addr)
644
753
  end
645
754
  end
646
755
 
647
- errors == 0
756
+ errors
648
757
  end
649
758
 
650
759
  def discard_damaged_blob(header)
@@ -653,6 +762,57 @@ module PEROBS
653
762
  header.clear_flags
654
763
  end
655
764
 
765
+ def open_index_files(abort_on_missing_files = false)
766
+ begin
767
+ @index.open(abort_on_missing_files)
768
+ @space_list.open
769
+ rescue FatalError
770
+ # Ensure that the index is really closed.
771
+ @index.close
772
+ # Erase it completely
773
+ @index.erase
774
+ # Then create it again.
775
+ @index.open
776
+
777
+ # Ensure that the spaces list is really closed.
778
+ @space_list.close
779
+ # Erase it completely
780
+ @space_list.erase
781
+ # Then create it again
782
+ @space_list.open
783
+
784
+ regenerate_index_and_spaces
785
+ end
786
+ end
787
+
788
+ def erase_index_files
789
+ # Ensure that the index is really closed.
790
+ @index.close
791
+ # Erase it completely
792
+ @index.erase
793
+
794
+ # Ensure that the spaces list is really closed.
795
+ @space_list.close
796
+ # Erase it completely
797
+ @space_list.erase
798
+ end
799
+
800
+ def clear_index_files
801
+ # Ensure that the index is really closed.
802
+ @index.close
803
+ # Erase it completely
804
+ @index.erase
805
+ # Then create it again.
806
+ @index.open
807
+
808
+ # Ensure that the spaces list is really closed.
809
+ @space_list.close
810
+ # Erase it completely
811
+ @space_list.erase
812
+ # Then create it again
813
+ @space_list.open
814
+ end
815
+
656
816
  end
657
817
 
658
818
  end