perobs 4.0.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,273 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = SpaceManager.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2020 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/BTree'
29
+ require 'perobs/EquiBlobsFile'
30
+ require 'perobs/FlatFile'
31
+ require 'perobs/FlatFileBlobHeader'
32
+
33
+ module PEROBS
34
+
35
+ # The SpaceManager is used to keep a list of all the empty spaces in a
36
+ # FlatFileDB file. An empty space is described by its starting address and
37
+ # its length in bytes. The SpaceManager keeps a list of all the spaces and
38
+ # can find the best fit space when a new blob needs to be added to the
39
+ # FlatFileDB.
40
+ #
41
+ # The SpaceManager uses two files to store the list. The first is a file
42
+ # with the actual addresses. This is a set of linked address lists. Each
43
+ # list holds the addresses for spaces that have exactly the same size. The
44
+ # second file is a BTree file that serves as the index. It is used to map
45
+ # the length of a space to the address of the linked list for that
46
+ # particular length. The linked list consists of elements that only hold 2
47
+ # items. The actual address in the FlatFileDB and the address of the next
48
+ # entry in the linked list in the list file.
49
+ class SpaceManager
50
+
51
+ attr_reader :added_spaces, :recycled_spaces, :failed_requests
52
+
53
+ def initialize(db_dir, progressmeter, btree_order = 65)
54
+ @db_dir = db_dir
55
+ @progressmeter = progressmeter
56
+
57
+ @index = BTree.new(@db_dir, 'space_index', btree_order, @progressmeter)
58
+ # The space list contains blobs that have each 2 entries. The address of
59
+ # the space in the FlatFile and the address of the next blob in the
60
+ # space list file that is an entry for the same space size. An address
61
+ # of 0 marks the end of the list.
62
+ @list = EquiBlobsFile.new(@db_dir, 'space_list', @progressmeter, 2 * 8, 1)
63
+ end
64
+
65
+ def open
66
+ @index.open
67
+ @list.open
68
+ reset_stats
69
+ end
70
+
71
+ def close
72
+ if @index.is_open?
73
+ PEROBS.log.info "SpaceManager has currently #{@list.total_entries} " +
74
+ "used blobs and #{@list.total_spaces} unused blobs in list " +
75
+ "EquiBlobsFile"
76
+ PEROBS.log.info "#{@added_spaces} were added, #{@recycled_spaces} " +
77
+ "spaces were recycled and #{@failed_requests} requests failed"
78
+
79
+ @list.close
80
+ @index.close
81
+ end
82
+ end
83
+
84
+ def is_open?
85
+ @index.is_open?
86
+ end
87
+
88
+ def sync
89
+ @list.sync
90
+ @index.sync
91
+ end
92
+
93
+ def add_space(address, length)
94
+ if (list_entry_addr = @index.get(length))
95
+ # There is already at least one move entry for this length.
96
+ new_list_entry_addr = insert_space_in_list(address, list_entry_addr)
97
+ else
98
+ new_list_entry_addr = insert_space_in_list(address, 0)
99
+ end
100
+ @index.insert(length, new_list_entry_addr)
101
+ @added_spaces += 1
102
+ end
103
+
104
+ def has_space?(address, length)
105
+ if (list_entry_addr = @index.get(length))
106
+ while list_entry_addr > 0
107
+ blob = @list.retrieve_blob(list_entry_addr)
108
+ space_address, next_entry_addr = blob.unpack('QQ')
109
+ return true if space_address == address
110
+ list_entry_addr = next_entry_addr
111
+ end
112
+ end
113
+
114
+ false
115
+ end
116
+
117
+ def get_space(length)
118
+ # We use a simple exact fit strategy. All attempts to use a more
119
+ # elaborate scheme were actually less efficient. Non-exact matches
120
+ # generate new spaces for the remainder and fragment the blob file with
121
+ # lots of unusable small spaces. Most applications seem to have
122
+ # clustered their blob sizes around a number of popular sizes. So exact
123
+ # match is very efficient to implement and results in the highest
124
+ # probability that a space will be reused soon.
125
+ list_entry_addr = @index.get(length)
126
+
127
+ if list_entry_addr
128
+ blob = @list.retrieve_blob(list_entry_addr)
129
+ space_address, next_entry_addr = blob.unpack('QQ')
130
+ @list.delete_blob(list_entry_addr)
131
+
132
+ if next_entry_addr > 0
133
+ # Update the index entry for the length to point to the
134
+ # following space list entry.
135
+ @index.insert(length, next_entry_addr)
136
+ else
137
+ # The space list for this length is empty. Remove the entry
138
+ # from the index.
139
+ @index.remove(length)
140
+ end
141
+ @recycled_spaces += 1
142
+
143
+ # We return the length to remain compatible with the old SpaceTree
144
+ # API.
145
+ return [ space_address, length ]
146
+ end
147
+
148
+ @failed_requests += 1
149
+ nil
150
+ end
151
+
152
+ def clear
153
+ @list.clear
154
+ @index.clear
155
+ reset_stats
156
+ end
157
+
158
+ def erase
159
+ @list.erase
160
+ @index.erase
161
+ end
162
+
163
+ def check(flat_file = nil)
164
+ sync
165
+ return false unless @index.check
166
+ return false unless @list.check
167
+
168
+ smallest_space = nil
169
+ largest_space = nil
170
+ total_space_bytes = 0
171
+ space_distribution = ::Hash.new(0)
172
+
173
+ @index.each do |length, list_entry_addr|
174
+ if list_entry_addr <= 0
175
+ PEROBS.log.error "list_entry_addr (#{list_entry_addr}) " +
176
+ "must be positive"
177
+ return false
178
+ end
179
+
180
+ # Detect smallest and largest space
181
+ if smallest_space.nil? || length < smallest_space
182
+ smallest_space = length
183
+ end
184
+ if largest_space.nil? || length > largest_space
185
+ largest_space = length
186
+ end
187
+
188
+ known_addresses = [ list_entry_addr ]
189
+ entries = 0
190
+ while list_entry_addr > 0
191
+ entries += 1
192
+ unless (blob = @list.retrieve_blob(list_entry_addr))
193
+ PEROBS.log.error "SpaceManager points to non-existing " +
194
+ "space list entry at address #{list_entry_addr}"
195
+ return false
196
+ end
197
+ space_address, next_entry_addr = blob.unpack('QQ')
198
+
199
+ if known_addresses.include?(next_entry_addr)
200
+ PEROBS.log.error "Space list is cyclic: "
201
+ "#{known_addresses + next_entry_addr}"
202
+ return false
203
+ end
204
+ if flat_file &&
205
+ !flat_file.has_space?(space_address, length)
206
+ PEROBS.log.error "SpaceManager has space at offset " +
207
+ "#{space_address} of size #{length} that isn't " +
208
+ "available in the FlatFile."
209
+ return false
210
+ end
211
+ list_entry_addr = next_entry_addr
212
+ end
213
+
214
+ total_space_bytes += length * entries
215
+ space_distribution[msb(length)] += entries
216
+ end
217
+
218
+ PEROBS.log.info "SpaceManager stats: smallest: #{smallest_space}; " +
219
+ "largest: #{largest_space}; total bytes: #{total_space_bytes}; " +
220
+ "distribution: " +
221
+ "#{space_distribution.map { |l, c| "#{2 ** (l - 1)}-#{2 ** l - 1}:#{c}; " }}"
222
+
223
+ true
224
+ end
225
+
226
+ def to_a
227
+ a = []
228
+
229
+ @index.each do |length, list_entry_addr|
230
+ while list_entry_addr > 0
231
+ blob = @list.retrieve_blob(list_entry_addr)
232
+ space_address, next_entry_addr = blob.unpack('QQ')
233
+
234
+ a << [ space_address, length ]
235
+
236
+ list_entry_addr = next_entry_addr
237
+ end
238
+ end
239
+
240
+ a.sort { |a, b| a[0] <=> b[0] }
241
+ end
242
+
243
+ private
244
+
245
+ def insert_space_in_list(next_element_addr, space_address)
246
+ blob = [ next_element_addr, space_address ].pack('QQ')
247
+ @list.store_blob(blob_addr = @list.free_address, blob)
248
+
249
+ blob_addr
250
+ end
251
+
252
+ def msb(i)
253
+ return 63 if i < 0
254
+
255
+ bit = 0
256
+ while (i > 0)
257
+ bit += 1
258
+ i = i >> 1
259
+ end
260
+
261
+ bit
262
+ end
263
+
264
+ def reset_stats
265
+ @added_spaces = 0
266
+ @recycled_spaces = 0
267
+ @failed_requests = 0
268
+ end
269
+
270
+ end
271
+
272
+ end
273
+
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = SpaceTree.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -40,20 +40,21 @@ module PEROBS
40
40
  # size which drastically simplifies the backing store operation.
41
41
  class SpaceTree
42
42
 
43
- attr_reader :nodes, :cache
43
+ attr_reader :nodes, :cache, :progressmeter
44
44
 
45
45
  # Manage the free spaces tree in the specified directory
46
46
  # @param dir [String] directory path of an existing directory
47
- def initialize(dir)
47
+ def initialize(dir, progressmeter)
48
48
  @dir = dir
49
+ @progressmeter = progressmeter
49
50
 
50
51
  # This EquiBlobsFile contains the nodes of the SpaceTree.
51
- @nodes = EquiBlobsFile.new(@dir, 'database_spaces',
52
+ @nodes = EquiBlobsFile.new(@dir, 'database_spaces', progressmeter,
52
53
  SpaceTreeNode::NODE_BYTES, 1)
53
54
 
54
55
  # Benchmark runs showed a cache size of 128 to be a good compromise
55
56
  # between read and write performance trade-offs and memory consumption.
56
- @cache = PersistentObjectCache.new(128, SpaceTreeNode, self)
57
+ @cache = PersistentObjectCache.new(256, 256, SpaceTreeNode, self)
57
58
  end
58
59
 
59
60
  # Open the SpaceTree file.
@@ -74,9 +75,14 @@ module PEROBS
74
75
  @cache.clear
75
76
  end
76
77
 
78
+ # @return true if file is currently open.
79
+ def is_open?
80
+ !@root_address.nil?
81
+ end
82
+
77
83
  # Flush all pending writes to the file system.
78
84
  def sync
79
- @cache.flush
85
+ @cache.flush(true)
80
86
  @nodes.sync
81
87
  end
82
88
 
@@ -105,10 +111,12 @@ module PEROBS
105
111
  if size <= 0
106
112
  PEROBS.log.fatal "Size (#{size}) must be larger than 0."
107
113
  end
108
- if has_space?(address, size)
109
- PEROBS.log.fatal "The space with address #{address} and size #{size} " +
110
- "can't be added twice."
111
- end
114
+ # The following check is fairly costly and should never trigger unless
115
+ # there is a bug in the PEROBS code. Only use this for debugging.
116
+ #if has_space?(address, size)
117
+ # PEROBS.log.fatal "The space with address #{address} and size " +
118
+ # "#{size} can't be added twice."
119
+ #end
112
120
  root.add_space(address, size)
113
121
  end
114
122
 
@@ -157,8 +165,9 @@ module PEROBS
157
165
  # @param flat_file [FlatFile] Flat file to compare with
158
166
  # @return True if space list matches, flase otherwise
159
167
  def check(flat_file = nil)
160
- @nodes.check
161
- root.check(flat_file)
168
+ sync
169
+ return false unless @nodes.check
170
+ root.check(flat_file, @nodes.total_entries)
162
171
  end
163
172
 
164
173
  # Iterate over all entries and yield address and size.
@@ -74,24 +74,11 @@ module PEROBS
74
74
  @smaller = smaller
75
75
  @equal = equal
76
76
  @larger = larger
77
-
78
- ObjectSpace.define_finalizer(
79
- self, SpaceTreeNode._finalize(@tree, @node_address, object_id))
80
- @tree.cache.insert(self, false)
81
- end
82
-
83
- # This method generates the destructor for the objects of this class. It
84
- # is done this way to prevent the Proc object hanging on to a reference to
85
- # self which would prevent the object from being collected. This internal
86
- # method is not intended for users to call.
87
- def SpaceTreeNode._finalize(tree, node_address, ruby_object_id)
88
- proc { tree.cache._collect(node_address, ruby_object_id) }
89
77
  end
90
78
 
91
79
  # Create a new SpaceTreeNode. This method should be used for the creation
92
80
  # of new nodes instead of calling the constructor directly.
93
81
  # @param tree [SpaceTree] The tree the node should belong to
94
- # @param node_address [Integer] Address of the node in the file
95
82
  # @param blob_address [Integer] Address of the free space blob
96
83
  # @param size [Integer] Size of the free space blob
97
84
  # @param parent [SpaceTreeNode] Parent node in the tree
@@ -99,7 +86,7 @@ module PEROBS
99
86
  node_address = tree.nodes.free_address
100
87
 
101
88
  node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
102
- node.save
89
+ tree.cache.insert(node)
103
90
 
104
91
  node
105
92
  end
@@ -107,7 +94,7 @@ module PEROBS
107
94
  # Restore a node from the backing store at the given address and tree.
108
95
  # @param tree [SpaceTree] The tree the node belongs to
109
96
  # @param node_address [Integer] The address in the file.
110
- def SpaceTreeNode::load(tree, node_address)
97
+ def SpaceTreeNode::load(tree, node_address, unused = nil)
111
98
  unless node_address > 0
112
99
  PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
113
100
  end
@@ -132,6 +119,8 @@ module PEROBS
132
119
  node = SpaceTreeNode.new(tree, node_address, blob_address, size,
133
120
  parent, smaller, equal, larger)
134
121
 
122
+ tree.cache.insert(node, false)
123
+
135
124
  node
136
125
  end
137
126
 
@@ -529,62 +518,65 @@ module PEROBS
529
518
  # errors.
530
519
  # @param flat_file [FlatFile] If given, check that the space is also
531
520
  # present in the given flat file.
521
+ # @param count [Integer] The total number of entries in the tree
532
522
  # @return [false,true] True if OK, false otherwise
533
- def check(flat_file)
523
+ def check(flat_file, count)
534
524
  node_counter = 0
535
525
  max_depth = 0
536
526
 
537
- each do |node, mode, stack|
538
- max_depth = stack.size if stack.size > max_depth
539
-
540
- case mode
541
- when :smaller
542
- if node.smaller
543
- return false unless node.check_node_link('smaller', stack)
544
- smaller_node = node.smaller
545
- if smaller_node.size >= node.size
546
- PEROBS.log.error "Smaller SpaceTreeNode size " +
547
- "(#{smaller_node}) is not smaller than #{node}"
548
- return false
527
+ @tree.progressmeter.start('Checking space list entries', count) do |pm|
528
+ each do |node, mode, stack|
529
+ max_depth = stack.size if stack.size > max_depth
530
+
531
+ case mode
532
+ when :smaller
533
+ if node.smaller
534
+ return false unless node.check_node_link('smaller', stack)
535
+ smaller_node = node.smaller
536
+ if smaller_node.size >= node.size
537
+ PEROBS.log.error "Smaller SpaceTreeNode size " +
538
+ "(#{smaller_node}) is not smaller than #{node}"
539
+ return false
540
+ end
549
541
  end
550
- end
551
- when :equal
552
- if node.equal
553
- return false unless node.check_node_link('equal', stack)
554
- equal_node = node.equal
555
-
556
- if equal_node.smaller || equal_node.larger
557
- PEROBS.log.error "Equal node #{equal_node} must not have " +
558
- "smaller/larger childs"
559
- return false
542
+ when :equal
543
+ if node.equal
544
+ return false unless node.check_node_link('equal', stack)
545
+ equal_node = node.equal
546
+
547
+ if equal_node.smaller || equal_node.larger
548
+ PEROBS.log.error "Equal node #{equal_node} must not have " +
549
+ "smaller/larger childs"
550
+ return false
551
+ end
552
+
553
+ if node.size != equal_node.size
554
+ PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
555
+ "is not equal parent node #{node}"
556
+ return false
557
+ end
560
558
  end
561
-
562
- if node.size != equal_node.size
563
- PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) is " +
564
- "not equal parent node #{node}"
565
- return false
559
+ when :larger
560
+ if node.larger
561
+ return false unless node.check_node_link('larger', stack)
562
+ larger_node = node.larger
563
+ if larger_node.size <= node.size
564
+ PEROBS.log.error "Larger SpaceTreeNode size " +
565
+ "(#{larger_node}) is not larger than #{node}"
566
+ return false
567
+ end
566
568
  end
567
- end
568
- when :larger
569
- if node.larger
570
- return false unless node.check_node_link('larger', stack)
571
- larger_node = node.larger
572
- if larger_node.size <= node.size
573
- PEROBS.log.error "Larger SpaceTreeNode size " +
574
- "(#{larger_node}) is not larger than #{node}"
569
+ when :on_exit
570
+ if flat_file &&
571
+ !flat_file.has_space?(node.blob_address, node.size)
572
+ PEROBS.log.error "SpaceTreeNode has space at offset " +
573
+ "#{node.blob_address} of size #{node.size} that isn't " +
574
+ "available in the FlatFile."
575
575
  return false
576
576
  end
577
- end
578
- when :on_exit
579
- if flat_file &&
580
- !flat_file.has_space?(node.blob_address, node.size)
581
- PEROBS.log.error "SpaceTreeNode has space at offset " +
582
- "#{node.blob_address} of size #{node.size} that isn't " +
583
- "available in the FlatFile."
584
- return false
585
- end
586
577
 
587
- node_counter += 1
578
+ pm.update(node_counter += 1)
579
+ end
588
580
  end
589
581
  end
590
582
  PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"