perobs 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = SpaceTree.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -40,20 +40,21 @@ module PEROBS
40
40
  # size which drastically simplifies the backing store operation.
41
41
  class SpaceTree
42
42
 
43
- attr_reader :nodes, :cache
43
+ attr_reader :nodes, :cache, :progressmeter
44
44
 
45
45
  # Manage the free spaces tree in the specified directory
46
46
  # @param dir [String] directory path of an existing directory
47
- def initialize(dir)
47
+ def initialize(dir, progressmeter)
48
48
  @dir = dir
49
+ @progressmeter = progressmeter
49
50
 
50
51
  # This EquiBlobsFile contains the nodes of the SpaceTree.
51
- @nodes = EquiBlobsFile.new(@dir, 'database_spaces',
52
+ @nodes = EquiBlobsFile.new(@dir, 'database_spaces', progressmeter,
52
53
  SpaceTreeNode::NODE_BYTES, 1)
53
54
 
54
55
  # Benchmark runs showed a cache size of 128 to be a good compromise
55
56
  # between read and write performance trade-offs and memory consumption.
56
- @cache = PersistentObjectCache.new(128, SpaceTreeNode, self)
57
+ @cache = PersistentObjectCache.new(128, 5000, SpaceTreeNode, self)
57
58
  end
58
59
 
59
60
  # Open the SpaceTree file.
@@ -74,9 +75,14 @@ module PEROBS
74
75
  @cache.clear
75
76
  end
76
77
 
78
+ # @return true if file is currently open.
79
+ def is_open?
80
+ !@root_address.nil?
81
+ end
82
+
77
83
  # Flush all pending writes to the file system.
78
84
  def sync
79
- @cache.flush
85
+ @cache.flush(true)
80
86
  @nodes.sync
81
87
  end
82
88
 
@@ -105,10 +111,12 @@ module PEROBS
105
111
  if size <= 0
106
112
  PEROBS.log.fatal "Size (#{size}) must be larger than 0."
107
113
  end
108
- if has_space?(address, size)
109
- PEROBS.log.fatal "The space with address #{address} and size #{size} " +
110
- "can't be added twice."
111
- end
114
+ # The following check is fairly costly and should never trigger unless
115
+ # there is a bug in the PEROBS code. Only use this for debugging.
116
+ #if has_space?(address, size)
117
+ # PEROBS.log.fatal "The space with address #{address} and size " +
118
+ # "#{size} can't be added twice."
119
+ #end
112
120
  root.add_space(address, size)
113
121
  end
114
122
 
@@ -157,8 +165,9 @@ module PEROBS
157
165
  # @param flat_file [FlatFile] Flat file to compare with
158
166
  # @return True if space list matches, flase otherwise
159
167
  def check(flat_file = nil)
160
- @nodes.check
161
- root.check(flat_file)
168
+ sync
169
+ return false unless @nodes.check
170
+ root.check(flat_file, @nodes.total_entries)
162
171
  end
163
172
 
164
173
  # Iterate over all entries and yield address and size.
@@ -74,24 +74,11 @@ module PEROBS
74
74
  @smaller = smaller
75
75
  @equal = equal
76
76
  @larger = larger
77
-
78
- ObjectSpace.define_finalizer(
79
- self, SpaceTreeNode._finalize(@tree, @node_address, object_id))
80
- @tree.cache.insert(self, false)
81
- end
82
-
83
- # This method generates the destructor for the objects of this class. It
84
- # is done this way to prevent the Proc object hanging on to a reference to
85
- # self which would prevent the object from being collected. This internal
86
- # method is not intended for users to call.
87
- def SpaceTreeNode._finalize(tree, node_address, ruby_object_id)
88
- proc { tree.cache._collect(node_address, ruby_object_id) }
89
77
  end
90
78
 
91
79
  # Create a new SpaceTreeNode. This method should be used for the creation
92
80
  # of new nodes instead of calling the constructor directly.
93
81
  # @param tree [SpaceTree] The tree the node should belong to
94
- # @param node_address [Integer] Address of the node in the file
95
82
  # @param blob_address [Integer] Address of the free space blob
96
83
  # @param size [Integer] Size of the free space blob
97
84
  # @param parent [SpaceTreeNode] Parent node in the tree
@@ -99,7 +86,7 @@ module PEROBS
99
86
  node_address = tree.nodes.free_address
100
87
 
101
88
  node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
102
- node.save
89
+ tree.cache.insert(node)
103
90
 
104
91
  node
105
92
  end
@@ -107,7 +94,7 @@ module PEROBS
107
94
  # Restore a node from the backing store at the given address and tree.
108
95
  # @param tree [SpaceTree] The tree the node belongs to
109
96
  # @param node_address [Integer] The address in the file.
110
- def SpaceTreeNode::load(tree, node_address)
97
+ def SpaceTreeNode::load(tree, node_address, unused = nil)
111
98
  unless node_address > 0
112
99
  PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
113
100
  end
@@ -132,6 +119,8 @@ module PEROBS
132
119
  node = SpaceTreeNode.new(tree, node_address, blob_address, size,
133
120
  parent, smaller, equal, larger)
134
121
 
122
+ tree.cache.insert(node, false)
123
+
135
124
  node
136
125
  end
137
126
 
@@ -529,62 +518,65 @@ module PEROBS
529
518
  # errors.
530
519
  # @param flat_file [FlatFile] If given, check that the space is also
531
520
  # present in the given flat file.
521
+ # @param count [Integer] The total number of entries in the tree
532
522
  # @return [false,true] True if OK, false otherwise
533
- def check(flat_file)
523
+ def check(flat_file, count)
534
524
  node_counter = 0
535
525
  max_depth = 0
536
526
 
537
- each do |node, mode, stack|
538
- max_depth = stack.size if stack.size > max_depth
539
-
540
- case mode
541
- when :smaller
542
- if node.smaller
543
- return false unless node.check_node_link('smaller', stack)
544
- smaller_node = node.smaller
545
- if smaller_node.size >= node.size
546
- PEROBS.log.error "Smaller SpaceTreeNode size " +
547
- "(#{smaller_node}) is not smaller than #{node}"
548
- return false
527
+ @tree.progressmeter.start('Checking space list entries', count) do |pm|
528
+ each do |node, mode, stack|
529
+ max_depth = stack.size if stack.size > max_depth
530
+
531
+ case mode
532
+ when :smaller
533
+ if node.smaller
534
+ return false unless node.check_node_link('smaller', stack)
535
+ smaller_node = node.smaller
536
+ if smaller_node.size >= node.size
537
+ PEROBS.log.error "Smaller SpaceTreeNode size " +
538
+ "(#{smaller_node}) is not smaller than #{node}"
539
+ return false
540
+ end
549
541
  end
550
- end
551
- when :equal
552
- if node.equal
553
- return false unless node.check_node_link('equal', stack)
554
- equal_node = node.equal
555
-
556
- if equal_node.smaller || equal_node.larger
557
- PEROBS.log.error "Equal node #{equal_node} must not have " +
558
- "smaller/larger childs"
559
- return false
542
+ when :equal
543
+ if node.equal
544
+ return false unless node.check_node_link('equal', stack)
545
+ equal_node = node.equal
546
+
547
+ if equal_node.smaller || equal_node.larger
548
+ PEROBS.log.error "Equal node #{equal_node} must not have " +
549
+ "smaller/larger childs"
550
+ return false
551
+ end
552
+
553
+ if node.size != equal_node.size
554
+ PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
555
+ "is not equal parent node #{node}"
556
+ return false
557
+ end
560
558
  end
561
-
562
- if node.size != equal_node.size
563
- PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) is " +
564
- "not equal parent node #{node}"
565
- return false
559
+ when :larger
560
+ if node.larger
561
+ return false unless node.check_node_link('larger', stack)
562
+ larger_node = node.larger
563
+ if larger_node.size <= node.size
564
+ PEROBS.log.error "Larger SpaceTreeNode size " +
565
+ "(#{larger_node}) is not larger than #{node}"
566
+ return false
567
+ end
566
568
  end
567
- end
568
- when :larger
569
- if node.larger
570
- return false unless node.check_node_link('larger', stack)
571
- larger_node = node.larger
572
- if larger_node.size <= node.size
573
- PEROBS.log.error "Larger SpaceTreeNode size " +
574
- "(#{larger_node}) is not larger than #{node}"
569
+ when :on_exit
570
+ if flat_file &&
571
+ !flat_file.has_space?(node.blob_address, node.size)
572
+ PEROBS.log.error "SpaceTreeNode has space at offset " +
573
+ "#{node.blob_address} of size #{node.size} that isn't " +
574
+ "available in the FlatFile."
575
575
  return false
576
576
  end
577
- end
578
- when :on_exit
579
- if flat_file &&
580
- !flat_file.has_space?(node.blob_address, node.size)
581
- PEROBS.log.error "SpaceTreeNode has space at offset " +
582
- "#{node.blob_address} of size #{node.size} that isn't " +
583
- "available in the FlatFile."
584
- return false
585
- end
586
577
 
587
- node_counter += 1
578
+ pm.update(node_counter += 1)
579
+ end
588
580
  end
589
581
  end
590
582
  PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = Store.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -36,6 +37,11 @@ require 'perobs/FlatFileDB'
36
37
  require 'perobs/Object'
37
38
  require 'perobs/Hash'
38
39
  require 'perobs/Array'
40
+ require 'perobs/BigTree'
41
+ require 'perobs/BigHash'
42
+ require 'perobs/BigArray'
43
+ require 'perobs/ProgressMeter'
44
+ require 'perobs/ConsoleProgressMeter'
39
45
 
40
46
  # PErsistent Ruby OBject Store
41
47
  module PEROBS
@@ -132,8 +138,14 @@ module PEROBS
132
138
  # :yaml : Can also handle most Ruby data types and is
133
139
  # portable between Ruby versions (1.9 and later).
134
140
  # Unfortunately, it is 10x slower than marshal.
141
+ # :progressmeter : reference to a ProgressMeter object that receives
142
+ # progress information during longer running tasks.
143
+ # It defaults to ProgressMeter which only logs into
144
+ # the log. Use ConsoleProgressMeter or a derived
145
+ # class for more fancy progress reporting.
135
146
  def initialize(data_base, options = {})
136
147
  # Create a backing store handler
148
+ @progressmeter = (options[:progressmeter] ||= ProgressMeter.new)
137
149
  @db = (options[:engine] || FlatFileDB).new(data_base, options)
138
150
  @db.open
139
151
  # Create a map that can translate classes to numerical IDs and vice
@@ -197,7 +209,10 @@ module PEROBS
197
209
  # method was called.
198
210
  def exit
199
211
  if @cache && @cache.in_transaction?
200
- PEROBS.log.fatal 'You cannot call exit() during a transaction'
212
+ @cache.abort_transaction
213
+ @cache.flush
214
+ @db.close if @db
215
+ PEROBS.log.fatal "You cannot call exit() during a transaction: #{Kernel.caller}"
201
216
  end
202
217
  @cache.flush if @cache
203
218
  @db.close if @db
@@ -297,20 +312,30 @@ module PEROBS
297
312
  # needed.
298
313
  def sync
299
314
  if @cache.in_transaction?
300
- PEROBS.log.fatal 'You cannot call sync() during a transaction'
315
+ @cache.abort_transaction
316
+ @cache.flush
317
+ PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
318
+ Kernel.caller.join("\n")
301
319
  end
302
320
  @cache.flush
303
321
  end
304
322
 
323
+ # Return the number of object stored in the store. CAVEAT: This method
324
+ # will only return correct values when it is separated from any mutating
325
+ # call by a call to sync().
326
+ # @return [Integer] Number of persistently stored objects in the Store.
327
+ def size
328
+ # We don't include the Hash that stores the root objects into the object
329
+ # count.
330
+ @db.item_counter - 1
331
+ end
332
+
305
333
  # Discard all objects that are not somehow connected to the root objects
306
334
  # from the back-end storage. The garbage collector is not invoked
307
335
  # automatically. Depending on your usage pattern, you need to call this
308
336
  # method periodically.
309
337
  # @return [Integer] The number of collected objects
310
338
  def gc
311
- if @cache.in_transaction?
312
- PEROBS.log.fatal 'You cannot call gc() during a transaction'
313
- end
314
339
  sync
315
340
  mark
316
341
  sweep
@@ -354,10 +379,6 @@ module PEROBS
354
379
  return obj
355
380
  end
356
381
 
357
- #if (obj = @db.search_object(id))
358
- # PEROBS.log.fatal "Object was not in index but in DB"
359
- #end
360
-
361
382
  # The requested object does not exist. Return nil.
362
383
  nil
363
384
  end
@@ -370,38 +391,42 @@ module PEROBS
370
391
  # made.
371
392
  # @return [Integer] The number of references to bad objects found.
372
393
  def check(repair = false)
394
+ stats = { :errors => 0, :object_cnt => 0 }
395
+
373
396
  # All objects must have in-db version.
374
397
  sync
375
398
  # Run basic consistency checks first.
376
- errors = @db.check_db(repair)
399
+ stats[:errors] += @db.check_db(repair)
377
400
 
378
401
  # We will use the mark to mark all objects that we have checked already.
379
402
  # Before we start, we need to clear all marks.
380
403
  @db.clear_marks
381
404
 
382
- objects = 0
383
- @root_objects.each do |name, id|
384
- objects += 1
385
- errors += check_object(id, repair)
405
+ @progressmeter.start("Checking object link structure",
406
+ @db.item_counter) do
407
+ @root_objects.each do |name, id|
408
+ check_object(id, repair, stats)
409
+ end
386
410
  end
387
411
 
388
412
  # Delete all broken root objects.
389
413
  if repair
390
414
  @root_objects.delete_if do |name, id|
391
- unless (res = @db.check(id, repair))
415
+ unless @db.check(id, repair)
392
416
  PEROBS.log.error "Discarding broken root object '#{name}' " +
393
417
  "with ID #{id}"
394
- errors += 1
418
+ stats[:errors] += 1
395
419
  end
396
- !res
397
420
  end
398
421
  end
399
422
 
400
- if errors > 0
423
+ if stats[:errors] > 0
401
424
  if repair
402
- PEROBS.log.error "#{errors} errors found in #{objects} objects"
425
+ PEROBS.log.error "#{stats[:errors]} errors found in " +
426
+ "#{stats[:object_cnt]} objects"
403
427
  else
404
- PEROBS.log.fatal "#{errors} errors found in #{objects} objects"
428
+ PEROBS.log.fatal "#{stats[:errors]} errors found in " +
429
+ "#{stats[:object_cnt]} objects"
405
430
  end
406
431
  else
407
432
  PEROBS.log.debug "No errors found"
@@ -410,7 +435,7 @@ module PEROBS
410
435
  # Ensure that any fixes are written into the DB.
411
436
  sync if repair
412
437
 
413
- errors
438
+ stats[:errors]
414
439
  end
415
440
 
416
441
  # This method will execute the provided block as an atomic transaction
@@ -439,7 +464,10 @@ module PEROBS
439
464
  stack = [ 0 ] + @root_objects.values
440
465
  while !stack.empty?
441
466
  # Get an object index from the stack.
442
- unless (obj = object_by_id(id = stack.pop))
467
+ id = stack.pop
468
+ next if @db.is_marked?(id)
469
+
470
+ unless (obj = object_by_id(id))
443
471
  PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
444
472
  "not found."
445
473
  end
@@ -510,19 +538,25 @@ module PEROBS
510
538
  def mark
511
539
  classes = Set.new
512
540
  marked_objects = 0
513
- each { |obj| classes.add(obj.class); marked_objects += 1 }
541
+ @progressmeter.start("Marking linked objects", @db.item_counter) do
542
+ each do |obj|
543
+ classes.add(obj.class)
544
+ @progressmeter.update(marked_objects += 1)
545
+ end
546
+ end
514
547
  @class_map.keep(classes.map { |c| c.to_s })
515
548
 
516
549
  # The root_objects object is included in the count, but we only want to
517
550
  # count user objects here.
518
- PEROBS.log.debug "#{marked_objects - 1} objects marked"
551
+ PEROBS.log.debug "#{marked_objects - 1} of #{@db.item_counter} " +
552
+ "objects marked"
519
553
  @stats.marked_objects = marked_objects - 1
520
554
  end
521
555
 
522
556
  # Sweep phase of a mark-and-sweep garbage collector. It will remove all
523
557
  # unmarked objects from the store.
524
558
  def sweep
525
- @stats.swept_objects = @db.delete_unmarked_objects.length
559
+ @stats.swept_objects = @db.delete_unmarked_objects
526
560
  @cache.reset
527
561
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
528
562
  @stats.swept_objects
@@ -534,8 +568,7 @@ module PEROBS
534
568
  # with
535
569
  # @param repair [Boolean] Delete refernces to broken objects if true
536
570
  # @return [Integer] The number of references to bad objects.
537
- def check_object(start_id, repair)
538
- errors = 0
571
+ def check_object(start_id, repair, stats)
539
572
  @db.mark(start_id)
540
573
  # The todo list holds a touple for each object that still needs to be
541
574
  # checked. The first item is the referring object and the second is the
@@ -546,7 +579,13 @@ module PEROBS
546
579
  # Get the next PEROBS object to check
547
580
  ref_obj, id = todo_list.pop
548
581
 
549
- if (obj = object_by_id(id))
582
+ begin
583
+ obj = object_by_id(id)
584
+ rescue PEROBS::FatalError
585
+ obj = nil
586
+ end
587
+
588
+ if obj
550
589
  # The object exists and is OK. Mark is as checked.
551
590
  @db.mark(id)
552
591
  # Now look at all other objects referenced by this object.
@@ -569,11 +608,11 @@ module PEROBS
569
608
  ref_obj.inspect
570
609
  end
571
610
  end
572
- errors += 1
611
+ stats[:errors] += 1
573
612
  end
574
- end
575
613
 
576
- errors
614
+ @progressmeter.update(stats[:object_cnt] += 1)
615
+ end
577
616
  end
578
617
 
579
618
  end