perobs 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = SpaceTree.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -40,20 +40,21 @@ module PEROBS
40
40
  # size which drastically simplifies the backing store operation.
41
41
  class SpaceTree
42
42
 
43
- attr_reader :nodes, :cache
43
+ attr_reader :nodes, :cache, :progressmeter
44
44
 
45
45
  # Manage the free spaces tree in the specified directory
46
46
  # @param dir [String] directory path of an existing directory
47
- def initialize(dir)
47
+ def initialize(dir, progressmeter)
48
48
  @dir = dir
49
+ @progressmeter = progressmeter
49
50
 
50
51
  # This EquiBlobsFile contains the nodes of the SpaceTree.
51
- @nodes = EquiBlobsFile.new(@dir, 'database_spaces',
52
+ @nodes = EquiBlobsFile.new(@dir, 'database_spaces', progressmeter,
52
53
  SpaceTreeNode::NODE_BYTES, 1)
53
54
 
54
55
  # Benchmark runs showed a cache size of 128 to be a good compromise
55
56
  # between read and write performance trade-offs and memory consumption.
56
- @cache = PersistentObjectCache.new(128, SpaceTreeNode, self)
57
+ @cache = PersistentObjectCache.new(128, 5000, SpaceTreeNode, self)
57
58
  end
58
59
 
59
60
  # Open the SpaceTree file.
@@ -74,9 +75,14 @@ module PEROBS
74
75
  @cache.clear
75
76
  end
76
77
 
78
+ # @return true if file is currently open.
79
+ def is_open?
80
+ !@root_address.nil?
81
+ end
82
+
77
83
  # Flush all pending writes to the file system.
78
84
  def sync
79
- @cache.flush
85
+ @cache.flush(true)
80
86
  @nodes.sync
81
87
  end
82
88
 
@@ -105,10 +111,12 @@ module PEROBS
105
111
  if size <= 0
106
112
  PEROBS.log.fatal "Size (#{size}) must be larger than 0."
107
113
  end
108
- if has_space?(address, size)
109
- PEROBS.log.fatal "The space with address #{address} and size #{size} " +
110
- "can't be added twice."
111
- end
114
+ # The following check is fairly costly and should never trigger unless
115
+ # there is a bug in the PEROBS code. Only use this for debugging.
116
+ #if has_space?(address, size)
117
+ # PEROBS.log.fatal "The space with address #{address} and size " +
118
+ # "#{size} can't be added twice."
119
+ #end
112
120
  root.add_space(address, size)
113
121
  end
114
122
 
@@ -157,8 +165,9 @@ module PEROBS
157
165
  # @param flat_file [FlatFile] Flat file to compare with
158
166
  # @return True if space list matches, flase otherwise
159
167
  def check(flat_file = nil)
160
- @nodes.check
161
- root.check(flat_file)
168
+ sync
169
+ return false unless @nodes.check
170
+ root.check(flat_file, @nodes.total_entries)
162
171
  end
163
172
 
164
173
  # Iterate over all entries and yield address and size.
@@ -74,24 +74,11 @@ module PEROBS
74
74
  @smaller = smaller
75
75
  @equal = equal
76
76
  @larger = larger
77
-
78
- ObjectSpace.define_finalizer(
79
- self, SpaceTreeNode._finalize(@tree, @node_address, object_id))
80
- @tree.cache.insert(self, false)
81
- end
82
-
83
- # This method generates the destructor for the objects of this class. It
84
- # is done this way to prevent the Proc object hanging on to a reference to
85
- # self which would prevent the object from being collected. This internal
86
- # method is not intended for users to call.
87
- def SpaceTreeNode._finalize(tree, node_address, ruby_object_id)
88
- proc { tree.cache._collect(node_address, ruby_object_id) }
89
77
  end
90
78
 
91
79
  # Create a new SpaceTreeNode. This method should be used for the creation
92
80
  # of new nodes instead of calling the constructor directly.
93
81
  # @param tree [SpaceTree] The tree the node should belong to
94
- # @param node_address [Integer] Address of the node in the file
95
82
  # @param blob_address [Integer] Address of the free space blob
96
83
  # @param size [Integer] Size of the free space blob
97
84
  # @param parent [SpaceTreeNode] Parent node in the tree
@@ -99,7 +86,7 @@ module PEROBS
99
86
  node_address = tree.nodes.free_address
100
87
 
101
88
  node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
102
- node.save
89
+ tree.cache.insert(node)
103
90
 
104
91
  node
105
92
  end
@@ -107,7 +94,7 @@ module PEROBS
107
94
  # Restore a node from the backing store at the given address and tree.
108
95
  # @param tree [SpaceTree] The tree the node belongs to
109
96
  # @param node_address [Integer] The address in the file.
110
- def SpaceTreeNode::load(tree, node_address)
97
+ def SpaceTreeNode::load(tree, node_address, unused = nil)
111
98
  unless node_address > 0
112
99
  PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
113
100
  end
@@ -132,6 +119,8 @@ module PEROBS
132
119
  node = SpaceTreeNode.new(tree, node_address, blob_address, size,
133
120
  parent, smaller, equal, larger)
134
121
 
122
+ tree.cache.insert(node, false)
123
+
135
124
  node
136
125
  end
137
126
 
@@ -529,62 +518,65 @@ module PEROBS
529
518
  # errors.
530
519
  # @param flat_file [FlatFile] If given, check that the space is also
531
520
  # present in the given flat file.
521
+ # @param count [Integer] The total number of entries in the tree
532
522
  # @return [false,true] True if OK, false otherwise
533
- def check(flat_file)
523
+ def check(flat_file, count)
534
524
  node_counter = 0
535
525
  max_depth = 0
536
526
 
537
- each do |node, mode, stack|
538
- max_depth = stack.size if stack.size > max_depth
539
-
540
- case mode
541
- when :smaller
542
- if node.smaller
543
- return false unless node.check_node_link('smaller', stack)
544
- smaller_node = node.smaller
545
- if smaller_node.size >= node.size
546
- PEROBS.log.error "Smaller SpaceTreeNode size " +
547
- "(#{smaller_node}) is not smaller than #{node}"
548
- return false
527
+ @tree.progressmeter.start('Checking space list entries', count) do |pm|
528
+ each do |node, mode, stack|
529
+ max_depth = stack.size if stack.size > max_depth
530
+
531
+ case mode
532
+ when :smaller
533
+ if node.smaller
534
+ return false unless node.check_node_link('smaller', stack)
535
+ smaller_node = node.smaller
536
+ if smaller_node.size >= node.size
537
+ PEROBS.log.error "Smaller SpaceTreeNode size " +
538
+ "(#{smaller_node}) is not smaller than #{node}"
539
+ return false
540
+ end
549
541
  end
550
- end
551
- when :equal
552
- if node.equal
553
- return false unless node.check_node_link('equal', stack)
554
- equal_node = node.equal
555
-
556
- if equal_node.smaller || equal_node.larger
557
- PEROBS.log.error "Equal node #{equal_node} must not have " +
558
- "smaller/larger childs"
559
- return false
542
+ when :equal
543
+ if node.equal
544
+ return false unless node.check_node_link('equal', stack)
545
+ equal_node = node.equal
546
+
547
+ if equal_node.smaller || equal_node.larger
548
+ PEROBS.log.error "Equal node #{equal_node} must not have " +
549
+ "smaller/larger childs"
550
+ return false
551
+ end
552
+
553
+ if node.size != equal_node.size
554
+ PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
555
+ "is not equal parent node #{node}"
556
+ return false
557
+ end
560
558
  end
561
-
562
- if node.size != equal_node.size
563
- PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) is " +
564
- "not equal parent node #{node}"
565
- return false
559
+ when :larger
560
+ if node.larger
561
+ return false unless node.check_node_link('larger', stack)
562
+ larger_node = node.larger
563
+ if larger_node.size <= node.size
564
+ PEROBS.log.error "Larger SpaceTreeNode size " +
565
+ "(#{larger_node}) is not larger than #{node}"
566
+ return false
567
+ end
566
568
  end
567
- end
568
- when :larger
569
- if node.larger
570
- return false unless node.check_node_link('larger', stack)
571
- larger_node = node.larger
572
- if larger_node.size <= node.size
573
- PEROBS.log.error "Larger SpaceTreeNode size " +
574
- "(#{larger_node}) is not larger than #{node}"
569
+ when :on_exit
570
+ if flat_file &&
571
+ !flat_file.has_space?(node.blob_address, node.size)
572
+ PEROBS.log.error "SpaceTreeNode has space at offset " +
573
+ "#{node.blob_address} of size #{node.size} that isn't " +
574
+ "available in the FlatFile."
575
575
  return false
576
576
  end
577
- end
578
- when :on_exit
579
- if flat_file &&
580
- !flat_file.has_space?(node.blob_address, node.size)
581
- PEROBS.log.error "SpaceTreeNode has space at offset " +
582
- "#{node.blob_address} of size #{node.size} that isn't " +
583
- "available in the FlatFile."
584
- return false
585
- end
586
577
 
587
- node_counter += 1
578
+ pm.update(node_counter += 1)
579
+ end
588
580
  end
589
581
  end
590
582
  PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = Store.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -36,6 +37,11 @@ require 'perobs/FlatFileDB'
36
37
  require 'perobs/Object'
37
38
  require 'perobs/Hash'
38
39
  require 'perobs/Array'
40
+ require 'perobs/BigTree'
41
+ require 'perobs/BigHash'
42
+ require 'perobs/BigArray'
43
+ require 'perobs/ProgressMeter'
44
+ require 'perobs/ConsoleProgressMeter'
39
45
 
40
46
  # PErsistent Ruby OBject Store
41
47
  module PEROBS
@@ -132,8 +138,14 @@ module PEROBS
132
138
  # :yaml : Can also handle most Ruby data types and is
133
139
  # portable between Ruby versions (1.9 and later).
134
140
  # Unfortunately, it is 10x slower than marshal.
141
+ # :progressmeter : reference to a ProgressMeter object that receives
142
+ # progress information during longer running tasks.
143
+ # It defaults to ProgressMeter which only logs into
144
+ # the log. Use ConsoleProgressMeter or a derived
145
+ # class for more fancy progress reporting.
135
146
  def initialize(data_base, options = {})
136
147
  # Create a backing store handler
148
+ @progressmeter = (options[:progressmeter] ||= ProgressMeter.new)
137
149
  @db = (options[:engine] || FlatFileDB).new(data_base, options)
138
150
  @db.open
139
151
  # Create a map that can translate classes to numerical IDs and vice
@@ -197,7 +209,10 @@ module PEROBS
197
209
  # method was called.
198
210
  def exit
199
211
  if @cache && @cache.in_transaction?
200
- PEROBS.log.fatal 'You cannot call exit() during a transaction'
212
+ @cache.abort_transaction
213
+ @cache.flush
214
+ @db.close if @db
215
+ PEROBS.log.fatal "You cannot call exit() during a transaction: #{Kernel.caller}"
201
216
  end
202
217
  @cache.flush if @cache
203
218
  @db.close if @db
@@ -297,20 +312,30 @@ module PEROBS
297
312
  # needed.
298
313
  def sync
299
314
  if @cache.in_transaction?
300
- PEROBS.log.fatal 'You cannot call sync() during a transaction'
315
+ @cache.abort_transaction
316
+ @cache.flush
317
+ PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
318
+ Kernel.caller.join("\n")
301
319
  end
302
320
  @cache.flush
303
321
  end
304
322
 
323
+ # Return the number of object stored in the store. CAVEAT: This method
324
+ # will only return correct values when it is separated from any mutating
325
+ # call by a call to sync().
326
+ # @return [Integer] Number of persistently stored objects in the Store.
327
+ def size
328
+ # We don't include the Hash that stores the root objects into the object
329
+ # count.
330
+ @db.item_counter - 1
331
+ end
332
+
305
333
  # Discard all objects that are not somehow connected to the root objects
306
334
  # from the back-end storage. The garbage collector is not invoked
307
335
  # automatically. Depending on your usage pattern, you need to call this
308
336
  # method periodically.
309
337
  # @return [Integer] The number of collected objects
310
338
  def gc
311
- if @cache.in_transaction?
312
- PEROBS.log.fatal 'You cannot call gc() during a transaction'
313
- end
314
339
  sync
315
340
  mark
316
341
  sweep
@@ -354,10 +379,6 @@ module PEROBS
354
379
  return obj
355
380
  end
356
381
 
357
- #if (obj = @db.search_object(id))
358
- # PEROBS.log.fatal "Object was not in index but in DB"
359
- #end
360
-
361
382
  # The requested object does not exist. Return nil.
362
383
  nil
363
384
  end
@@ -370,38 +391,42 @@ module PEROBS
370
391
  # made.
371
392
  # @return [Integer] The number of references to bad objects found.
372
393
  def check(repair = false)
394
+ stats = { :errors => 0, :object_cnt => 0 }
395
+
373
396
  # All objects must have in-db version.
374
397
  sync
375
398
  # Run basic consistency checks first.
376
- errors = @db.check_db(repair)
399
+ stats[:errors] += @db.check_db(repair)
377
400
 
378
401
  # We will use the mark to mark all objects that we have checked already.
379
402
  # Before we start, we need to clear all marks.
380
403
  @db.clear_marks
381
404
 
382
- objects = 0
383
- @root_objects.each do |name, id|
384
- objects += 1
385
- errors += check_object(id, repair)
405
+ @progressmeter.start("Checking object link structure",
406
+ @db.item_counter) do
407
+ @root_objects.each do |name, id|
408
+ check_object(id, repair, stats)
409
+ end
386
410
  end
387
411
 
388
412
  # Delete all broken root objects.
389
413
  if repair
390
414
  @root_objects.delete_if do |name, id|
391
- unless (res = @db.check(id, repair))
415
+ unless @db.check(id, repair)
392
416
  PEROBS.log.error "Discarding broken root object '#{name}' " +
393
417
  "with ID #{id}"
394
- errors += 1
418
+ stats[:errors] += 1
395
419
  end
396
- !res
397
420
  end
398
421
  end
399
422
 
400
- if errors > 0
423
+ if stats[:errors] > 0
401
424
  if repair
402
- PEROBS.log.error "#{errors} errors found in #{objects} objects"
425
+ PEROBS.log.error "#{stats[:errors]} errors found in " +
426
+ "#{stats[:object_cnt]} objects"
403
427
  else
404
- PEROBS.log.fatal "#{errors} errors found in #{objects} objects"
428
+ PEROBS.log.fatal "#{stats[:errors]} errors found in " +
429
+ "#{stats[:object_cnt]} objects"
405
430
  end
406
431
  else
407
432
  PEROBS.log.debug "No errors found"
@@ -410,7 +435,7 @@ module PEROBS
410
435
  # Ensure that any fixes are written into the DB.
411
436
  sync if repair
412
437
 
413
- errors
438
+ stats[:errors]
414
439
  end
415
440
 
416
441
  # This method will execute the provided block as an atomic transaction
@@ -439,7 +464,10 @@ module PEROBS
439
464
  stack = [ 0 ] + @root_objects.values
440
465
  while !stack.empty?
441
466
  # Get an object index from the stack.
442
- unless (obj = object_by_id(id = stack.pop))
467
+ id = stack.pop
468
+ next if @db.is_marked?(id)
469
+
470
+ unless (obj = object_by_id(id))
443
471
  PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
444
472
  "not found."
445
473
  end
@@ -510,19 +538,25 @@ module PEROBS
510
538
  def mark
511
539
  classes = Set.new
512
540
  marked_objects = 0
513
- each { |obj| classes.add(obj.class); marked_objects += 1 }
541
+ @progressmeter.start("Marking linked objects", @db.item_counter) do
542
+ each do |obj|
543
+ classes.add(obj.class)
544
+ @progressmeter.update(marked_objects += 1)
545
+ end
546
+ end
514
547
  @class_map.keep(classes.map { |c| c.to_s })
515
548
 
516
549
  # The root_objects object is included in the count, but we only want to
517
550
  # count user objects here.
518
- PEROBS.log.debug "#{marked_objects - 1} objects marked"
551
+ PEROBS.log.debug "#{marked_objects - 1} of #{@db.item_counter} " +
552
+ "objects marked"
519
553
  @stats.marked_objects = marked_objects - 1
520
554
  end
521
555
 
522
556
  # Sweep phase of a mark-and-sweep garbage collector. It will remove all
523
557
  # unmarked objects from the store.
524
558
  def sweep
525
- @stats.swept_objects = @db.delete_unmarked_objects.length
559
+ @stats.swept_objects = @db.delete_unmarked_objects
526
560
  @cache.reset
527
561
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
528
562
  @stats.swept_objects
@@ -534,8 +568,7 @@ module PEROBS
534
568
  # with
535
569
  # @param repair [Boolean] Delete refernces to broken objects if true
536
570
  # @return [Integer] The number of references to bad objects.
537
- def check_object(start_id, repair)
538
- errors = 0
571
+ def check_object(start_id, repair, stats)
539
572
  @db.mark(start_id)
540
573
  # The todo list holds a touple for each object that still needs to be
541
574
  # checked. The first item is the referring object and the second is the
@@ -546,7 +579,13 @@ module PEROBS
546
579
  # Get the next PEROBS object to check
547
580
  ref_obj, id = todo_list.pop
548
581
 
549
- if (obj = object_by_id(id))
582
+ begin
583
+ obj = object_by_id(id)
584
+ rescue PEROBS::FatalError
585
+ obj = nil
586
+ end
587
+
588
+ if obj
550
589
  # The object exists and is OK. Mark is as checked.
551
590
  @db.mark(id)
552
591
  # Now look at all other objects referenced by this object.
@@ -569,11 +608,11 @@ module PEROBS
569
608
  ref_obj.inspect
570
609
  end
571
610
  end
572
- errors += 1
611
+ stats[:errors] += 1
573
612
  end
574
- end
575
613
 
576
- errors
614
+ @progressmeter.update(stats[:object_cnt] += 1)
615
+ end
577
616
  end
578
617
 
579
618
  end