perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,273 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = SpaceManager.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2020 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/BTree'
29
+ require 'perobs/EquiBlobsFile'
30
+ require 'perobs/FlatFile'
31
+ require 'perobs/FlatFileBlobHeader'
32
+
33
+ module PEROBS
34
+
35
+ # The SpaceManager is used to keep a list of all the empty spaces in a
36
+ # FlatFileDB file. An empty space is described by its starting address and
37
+ # its length in bytes. The SpaceManager keeps a list of all the spaces and
38
+ # can find the best fit space when a new blob needs to be added to the
39
+ # FlatFileDB.
40
+ #
41
+ # The SpaceManager uses two files to store the list. The first is a file
42
+ # with the actual addresses. This is a set of linked address lists. Each
43
+ # list holds the addresses for spaces that have exactly the same size. The
44
+ # second file is a BTree file that serves as the index. It is used to map
45
+ # the length of a space to the address of the linked list for that
46
+ # particular length. The linked list consists of elements that only hold 2
47
+ # items. The actual address in the FlatFileDB and the address of the next
48
+ # entry in the linked list in the list file.
49
+ class SpaceManager
50
+
51
+ attr_reader :added_spaces, :recycled_spaces, :failed_requests
52
+
53
+ def initialize(db_dir, progressmeter, btree_order = 65)
54
+ @db_dir = db_dir
55
+ @progressmeter = progressmeter
56
+
57
+ @index = BTree.new(@db_dir, 'space_index', btree_order, @progressmeter)
58
+ # The space list contains blobs that have each 2 entries. The address of
59
+ # the space in the FlatFile and the address of the next blob in the
60
+ # space list file that is an entry for the same space size. An address
61
+ # of 0 marks the end of the list.
62
+ @list = EquiBlobsFile.new(@db_dir, 'space_list', @progressmeter, 2 * 8, 1)
63
+ end
64
+
65
+ def open
66
+ @index.open
67
+ @list.open
68
+ reset_stats
69
+ end
70
+
71
+ def close
72
+ if @index.is_open?
73
+ PEROBS.log.info "SpaceManager has currently #{@list.total_entries} " +
74
+ "used blobs and #{@list.total_spaces} unused blobs in list " +
75
+ "EquiBlobsFile"
76
+ PEROBS.log.info "#{@added_spaces} were added, #{@recycled_spaces} " +
77
+ "spaces were recycled and #{@failed_requests} requests failed"
78
+
79
+ @list.close
80
+ @index.close
81
+ end
82
+ end
83
+
84
+ def is_open?
85
+ @index.is_open?
86
+ end
87
+
88
+ def sync
89
+ @list.sync
90
+ @index.sync
91
+ end
92
+
93
+ def add_space(address, length)
94
+ if (list_entry_addr = @index.get(length))
95
+ # There is already at least one move entry for this length.
96
+ new_list_entry_addr = insert_space_in_list(address, list_entry_addr)
97
+ else
98
+ new_list_entry_addr = insert_space_in_list(address, 0)
99
+ end
100
+ @index.insert(length, new_list_entry_addr)
101
+ @added_spaces += 1
102
+ end
103
+
104
+ def has_space?(address, length)
105
+ if (list_entry_addr = @index.get(length))
106
+ while list_entry_addr > 0
107
+ blob = @list.retrieve_blob(list_entry_addr)
108
+ space_address, next_entry_addr = blob.unpack('QQ')
109
+ return true if space_address == address
110
+ list_entry_addr = next_entry_addr
111
+ end
112
+ end
113
+
114
+ false
115
+ end
116
+
117
+ def get_space(length)
118
+ # We use a simple exact fit strategy. All attempts to use a more
119
+ # elaborate scheme were actually less efficient. Non-exact matches
120
+ # generate new spaces for the remainder and fragment the blob file with
121
+ # lots of unusable small spaces. Most applications seem to have
122
+ # clustered their blob sizes around a number of popular sizes. So exact
123
+ # match is very efficient to implement and results in the highest
124
+ # probability that a space will be reused soon.
125
+ list_entry_addr = @index.get(length)
126
+
127
+ if list_entry_addr
128
+ blob = @list.retrieve_blob(list_entry_addr)
129
+ space_address, next_entry_addr = blob.unpack('QQ')
130
+ @list.delete_blob(list_entry_addr)
131
+
132
+ if next_entry_addr > 0
133
+ # Update the index entry for the length to point to the
134
+ # following space list entry.
135
+ @index.insert(length, next_entry_addr)
136
+ else
137
+ # The space list for this length is empty. Remove the entry
138
+ # from the index.
139
+ @index.remove(length)
140
+ end
141
+ @recycled_spaces += 1
142
+
143
+ # We return the length to remain compatible with the old SpaceTree
144
+ # API.
145
+ return [ space_address, length ]
146
+ end
147
+
148
+ @failed_requests += 1
149
+ nil
150
+ end
151
+
152
+ def clear
153
+ @list.clear
154
+ @index.clear
155
+ reset_stats
156
+ end
157
+
158
+ def erase
159
+ @list.erase
160
+ @index.erase
161
+ end
162
+
163
+ def check(flat_file = nil)
164
+ sync
165
+ return false unless @index.check
166
+ return false unless @list.check
167
+
168
+ smallest_space = nil
169
+ largest_space = nil
170
+ total_space_bytes = 0
171
+ space_distribution = ::Hash.new(0)
172
+
173
+ @index.each do |length, list_entry_addr|
174
+ if list_entry_addr <= 0
175
+ PEROBS.log.error "list_entry_addr (#{list_entry_addr}) " +
176
+ "must be positive"
177
+ return false
178
+ end
179
+
180
+ # Detect smallest and largest space
181
+ if smallest_space.nil? || length < smallest_space
182
+ smallest_space = length
183
+ end
184
+ if largest_space.nil? || length > largest_space
185
+ largest_space = length
186
+ end
187
+
188
+ known_addresses = [ list_entry_addr ]
189
+ entries = 0
190
+ while list_entry_addr > 0
191
+ entries += 1
192
+ unless (blob = @list.retrieve_blob(list_entry_addr))
193
+ PEROBS.log.error "SpaceManager points to non-existing " +
194
+ "space list entry at address #{list_entry_addr}"
195
+ return false
196
+ end
197
+ space_address, next_entry_addr = blob.unpack('QQ')
198
+
199
+ if known_addresses.include?(next_entry_addr)
200
+ PEROBS.log.error "Space list is cyclic: "
201
+ "#{known_addresses + next_entry_addr}"
202
+ return false
203
+ end
204
+ if flat_file &&
205
+ !flat_file.has_space?(space_address, length)
206
+ PEROBS.log.error "SpaceManager has space at offset " +
207
+ "#{space_address} of size #{length} that isn't " +
208
+ "available in the FlatFile."
209
+ return false
210
+ end
211
+ list_entry_addr = next_entry_addr
212
+ end
213
+
214
+ total_space_bytes += length * entries
215
+ space_distribution[msb(length)] += entries
216
+ end
217
+
218
+ PEROBS.log.info "SpaceManager stats: smallest: #{smallest_space}; " +
219
+ "largest: #{largest_space}; total bytes: #{total_space_bytes}; " +
220
+ "distribution: " +
221
+ "#{space_distribution.map { |l, c| "#{2 ** (l - 1)}-#{2 ** l - 1}:#{c}; " }}"
222
+
223
+ true
224
+ end
225
+
226
+ def to_a
227
+ a = []
228
+
229
+ @index.each do |length, list_entry_addr|
230
+ while list_entry_addr > 0
231
+ blob = @list.retrieve_blob(list_entry_addr)
232
+ space_address, next_entry_addr = blob.unpack('QQ')
233
+
234
+ a << [ space_address, length ]
235
+
236
+ list_entry_addr = next_entry_addr
237
+ end
238
+ end
239
+
240
+ a.sort { |a, b| a[0] <=> b[0] }
241
+ end
242
+
243
+ private
244
+
245
+ def insert_space_in_list(next_element_addr, space_address)
246
+ blob = [ next_element_addr, space_address ].pack('QQ')
247
+ @list.store_blob(blob_addr = @list.free_address, blob)
248
+
249
+ blob_addr
250
+ end
251
+
252
+ def msb(i)
253
+ return 63 if i < 0
254
+
255
+ bit = 0
256
+ while (i > 0)
257
+ bit += 1
258
+ i = i >> 1
259
+ end
260
+
261
+ bit
262
+ end
263
+
264
+ def reset_stats
265
+ @added_spaces = 0
266
+ @recycled_spaces = 0
267
+ @failed_requests = 0
268
+ end
269
+
270
+ end
271
+
272
+ end
273
+
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = SpaceTree.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -40,20 +40,21 @@ module PEROBS
40
40
  # size which drastically simplifies the backing store operation.
41
41
  class SpaceTree
42
42
 
43
- attr_reader :nodes, :cache
43
+ attr_reader :nodes, :cache, :progressmeter
44
44
 
45
45
  # Manage the free spaces tree in the specified directory
46
46
  # @param dir [String] directory path of an existing directory
47
- def initialize(dir)
47
+ def initialize(dir, progressmeter)
48
48
  @dir = dir
49
+ @progressmeter = progressmeter
49
50
 
50
51
  # This EquiBlobsFile contains the nodes of the SpaceTree.
51
- @nodes = EquiBlobsFile.new(@dir, 'database_spaces',
52
+ @nodes = EquiBlobsFile.new(@dir, 'database_spaces', progressmeter,
52
53
  SpaceTreeNode::NODE_BYTES, 1)
53
54
 
54
55
  # Benchmark runs showed a cache size of 128 to be a good compromise
55
56
  # between read and write performance trade-offs and memory consumption.
56
- @cache = PersistentObjectCache.new(128, SpaceTreeNode, self)
57
+ @cache = PersistentObjectCache.new(256, 256, SpaceTreeNode, self)
57
58
  end
58
59
 
59
60
  # Open the SpaceTree file.
@@ -74,9 +75,14 @@ module PEROBS
74
75
  @cache.clear
75
76
  end
76
77
 
78
+ # @return true if file is currently open.
79
+ def is_open?
80
+ !@root_address.nil?
81
+ end
82
+
77
83
  # Flush all pending writes to the file system.
78
84
  def sync
79
- @cache.flush
85
+ @cache.flush(true)
80
86
  @nodes.sync
81
87
  end
82
88
 
@@ -105,10 +111,12 @@ module PEROBS
105
111
  if size <= 0
106
112
  PEROBS.log.fatal "Size (#{size}) must be larger than 0."
107
113
  end
108
- if has_space?(address, size)
109
- PEROBS.log.fatal "The space with address #{address} and size #{size} " +
110
- "can't be added twice."
111
- end
114
+ # The following check is fairly costly and should never trigger unless
115
+ # there is a bug in the PEROBS code. Only use this for debugging.
116
+ #if has_space?(address, size)
117
+ # PEROBS.log.fatal "The space with address #{address} and size " +
118
+ # "#{size} can't be added twice."
119
+ #end
112
120
  root.add_space(address, size)
113
121
  end
114
122
 
@@ -157,8 +165,9 @@ module PEROBS
157
165
  # @param flat_file [FlatFile] Flat file to compare with
158
166
  # @return True if space list matches, flase otherwise
159
167
  def check(flat_file = nil)
160
- @nodes.check
161
- root.check(flat_file)
168
+ sync
169
+ return false unless @nodes.check
170
+ root.check(flat_file, @nodes.total_entries)
162
171
  end
163
172
 
164
173
  # Iterate over all entries and yield address and size.
@@ -74,24 +74,11 @@ module PEROBS
74
74
  @smaller = smaller
75
75
  @equal = equal
76
76
  @larger = larger
77
-
78
- ObjectSpace.define_finalizer(
79
- self, SpaceTreeNode._finalize(@tree, @node_address, object_id))
80
- @tree.cache.insert(self, false)
81
- end
82
-
83
- # This method generates the destructor for the objects of this class. It
84
- # is done this way to prevent the Proc object hanging on to a reference to
85
- # self which would prevent the object from being collected. This internal
86
- # method is not intended for users to call.
87
- def SpaceTreeNode._finalize(tree, node_address, ruby_object_id)
88
- proc { tree.cache._collect(node_address, ruby_object_id) }
89
77
  end
90
78
 
91
79
  # Create a new SpaceTreeNode. This method should be used for the creation
92
80
  # of new nodes instead of calling the constructor directly.
93
81
  # @param tree [SpaceTree] The tree the node should belong to
94
- # @param node_address [Integer] Address of the node in the file
95
82
  # @param blob_address [Integer] Address of the free space blob
96
83
  # @param size [Integer] Size of the free space blob
97
84
  # @param parent [SpaceTreeNode] Parent node in the tree
@@ -99,7 +86,7 @@ module PEROBS
99
86
  node_address = tree.nodes.free_address
100
87
 
101
88
  node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
102
- node.save
89
+ tree.cache.insert(node)
103
90
 
104
91
  node
105
92
  end
@@ -107,7 +94,7 @@ module PEROBS
107
94
  # Restore a node from the backing store at the given address and tree.
108
95
  # @param tree [SpaceTree] The tree the node belongs to
109
96
  # @param node_address [Integer] The address in the file.
110
- def SpaceTreeNode::load(tree, node_address)
97
+ def SpaceTreeNode::load(tree, node_address, unused = nil)
111
98
  unless node_address > 0
112
99
  PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
113
100
  end
@@ -132,6 +119,8 @@ module PEROBS
132
119
  node = SpaceTreeNode.new(tree, node_address, blob_address, size,
133
120
  parent, smaller, equal, larger)
134
121
 
122
+ tree.cache.insert(node, false)
123
+
135
124
  node
136
125
  end
137
126
 
@@ -529,62 +518,65 @@ module PEROBS
529
518
  # errors.
530
519
  # @param flat_file [FlatFile] If given, check that the space is also
531
520
  # present in the given flat file.
521
+ # @param count [Integer] The total number of entries in the tree
532
522
  # @return [false,true] True if OK, false otherwise
533
- def check(flat_file)
523
+ def check(flat_file, count)
534
524
  node_counter = 0
535
525
  max_depth = 0
536
526
 
537
- each do |node, mode, stack|
538
- max_depth = stack.size if stack.size > max_depth
539
-
540
- case mode
541
- when :smaller
542
- if node.smaller
543
- return false unless node.check_node_link('smaller', stack)
544
- smaller_node = node.smaller
545
- if smaller_node.size >= node.size
546
- PEROBS.log.error "Smaller SpaceTreeNode size " +
547
- "(#{smaller_node}) is not smaller than #{node}"
548
- return false
527
+ @tree.progressmeter.start('Checking space list entries', count) do |pm|
528
+ each do |node, mode, stack|
529
+ max_depth = stack.size if stack.size > max_depth
530
+
531
+ case mode
532
+ when :smaller
533
+ if node.smaller
534
+ return false unless node.check_node_link('smaller', stack)
535
+ smaller_node = node.smaller
536
+ if smaller_node.size >= node.size
537
+ PEROBS.log.error "Smaller SpaceTreeNode size " +
538
+ "(#{smaller_node}) is not smaller than #{node}"
539
+ return false
540
+ end
549
541
  end
550
- end
551
- when :equal
552
- if node.equal
553
- return false unless node.check_node_link('equal', stack)
554
- equal_node = node.equal
555
-
556
- if equal_node.smaller || equal_node.larger
557
- PEROBS.log.error "Equal node #{equal_node} must not have " +
558
- "smaller/larger childs"
559
- return false
542
+ when :equal
543
+ if node.equal
544
+ return false unless node.check_node_link('equal', stack)
545
+ equal_node = node.equal
546
+
547
+ if equal_node.smaller || equal_node.larger
548
+ PEROBS.log.error "Equal node #{equal_node} must not have " +
549
+ "smaller/larger childs"
550
+ return false
551
+ end
552
+
553
+ if node.size != equal_node.size
554
+ PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
555
+ "is not equal parent node #{node}"
556
+ return false
557
+ end
560
558
  end
561
-
562
- if node.size != equal_node.size
563
- PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) is " +
564
- "not equal parent node #{node}"
565
- return false
559
+ when :larger
560
+ if node.larger
561
+ return false unless node.check_node_link('larger', stack)
562
+ larger_node = node.larger
563
+ if larger_node.size <= node.size
564
+ PEROBS.log.error "Larger SpaceTreeNode size " +
565
+ "(#{larger_node}) is not larger than #{node}"
566
+ return false
567
+ end
566
568
  end
567
- end
568
- when :larger
569
- if node.larger
570
- return false unless node.check_node_link('larger', stack)
571
- larger_node = node.larger
572
- if larger_node.size <= node.size
573
- PEROBS.log.error "Larger SpaceTreeNode size " +
574
- "(#{larger_node}) is not larger than #{node}"
569
+ when :on_exit
570
+ if flat_file &&
571
+ !flat_file.has_space?(node.blob_address, node.size)
572
+ PEROBS.log.error "SpaceTreeNode has space at offset " +
573
+ "#{node.blob_address} of size #{node.size} that isn't " +
574
+ "available in the FlatFile."
575
575
  return false
576
576
  end
577
- end
578
- when :on_exit
579
- if flat_file &&
580
- !flat_file.has_space?(node.blob_address, node.size)
581
- PEROBS.log.error "SpaceTreeNode has space at offset " +
582
- "#{node.blob_address} of size #{node.size} that isn't " +
583
- "available in the FlatFile."
584
- return false
585
- end
586
577
 
587
- node_counter += 1
578
+ pm.update(node_counter += 1)
579
+ end
588
580
  end
589
581
  end
590
582
  PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"