perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -67,6 +67,8 @@ module PEROBS
67
67
  get_node.respond_to?(method)
68
68
  end
69
69
 
70
+ # Directly define some commonly used methods to avoid the method_missing
71
+ # overhead.
70
72
  def is_leaf
71
73
  get_node.is_leaf
72
74
  end
@@ -91,10 +93,18 @@ module PEROBS
91
93
  get_node.search_key_index(key)
92
94
  end
93
95
 
96
+ def insert(key, value)
97
+ get_node.insert(key, value)
98
+ end
99
+
94
100
  def insert_element(key, voc)
95
101
  get_node.insert_element(key, voc)
96
102
  end
97
103
 
104
+ def split_node
105
+ get_node.split_node
106
+ end
107
+
98
108
  # Compare this node to another node.
99
109
  # @return [Boolean] true if node address is identical, false otherwise
100
110
  def ==(node)
@@ -0,0 +1,285 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArray.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/BigArrayNode'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArray class implements an Array that stores the data in segments. It
35
+ # only loads the currently needed parts of the Array into memory. To provide
36
+ # an efficient access to the data by index a B+Tree like data structure is
37
+ # used. Each segment is stored in a leaf node of the B+Tree.
38
+ class BigArray < PEROBS::Object
39
+
40
+ class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
41
+ :max_depth)
42
+ end
43
+
44
+ attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
45
+
46
+ # Internal constructor. Use Store.new() instead.
47
+ # @param p [Handle]
48
+ # @param node_size [Integer] The size of the tree nodes. This determines
49
+ # how many entries must be read/written for each operation. The
50
+ # default of 150 was emperically found to be a performance sweet
51
+ # spot. Smaller values will improve write operations. Larger
52
+ # values will improve read operations. 20 - 500 is a reasonable
53
+ # range to try.
54
+ def initialize(p, node_size = 150)
55
+ super(p)
56
+ unless node_size > 3
57
+ PEROBS.log.fatal "Node size (#{node_size}) must be larger than 3"
58
+ end
59
+ unless node_size % 2 == 0
60
+ PEROBS.log.fatal "Node size (#{node_size}) must be an even number"
61
+ end
62
+
63
+ self.node_size = node_size
64
+ clear
65
+ end
66
+
67
+ # Remove all entries from the BigArray.
68
+ def clear
69
+ self.root = self.first_leaf = self.last_leaf =
70
+ @store.new(BigArrayNode, myself, true)
71
+ self.entry_counter = 0
72
+ end
73
+
74
+ # Store the value at the given index. If the index already exists the old
75
+ # value will be overwritten.
76
+ # @param index [Integer] Position in the array
77
+ # @param value [Integer] value
78
+ def []=(index, value)
79
+ index = validate_index_range(index)
80
+
81
+ @store.transaction do
82
+ if index < @entry_counter
83
+ # Overwrite of an existing element
84
+ @root.set(index, value)
85
+ elsif index == @entry_counter
86
+ # Append right at the end
87
+ @root.insert(index, value)
88
+ self.entry_counter += 1
89
+ else
90
+ # Append with nil padding
91
+ @entry_counter.upto(index - 1) do |i|
92
+ @root.insert(i, nil)
93
+ end
94
+ @root.insert(index, value)
95
+ self.entry_counter = index + 1
96
+ end
97
+ end
98
+ end
99
+
100
+ def <<(value)
101
+ self[@entry_counter] = value
102
+ end
103
+
104
+ # Insert the value at the given index. If the index already exists the old
105
+ # value will be overwritten.
106
+ # @param index [Integer] Position in the array
107
+ # @param value [Integer] value
108
+ def insert(index, value)
109
+ index = validate_index_range(index)
110
+
111
+ if index < @entry_counter
112
+ # Insert in between existing elements
113
+ @store.transaction do
114
+ @root.insert(index, value)
115
+ self.entry_counter += 1
116
+ end
117
+ else
118
+ self[index] = value
119
+ end
120
+ end
121
+
122
+ # Return the value stored at the given index.
123
+ # @param index [Integer] Position in the array
124
+ # @return [Integer or nil] found value or nil
125
+ def [](index)
126
+ begin
127
+ index = validate_index_range(index)
128
+ rescue IndexError
129
+ return nil
130
+ end
131
+
132
+ return nil if index >= @entry_counter
133
+
134
+ @root.get(index)
135
+ end
136
+
137
+ # Check if there is an entry for the given key.
138
+ # @param key [Integer] Unique key
139
+ # @return [Boolean] True if key is present, false otherwise.
140
+ def has_key?(key)
141
+ @root.has_key?(key)
142
+ end
143
+
144
+ # Delete the element at the specified index, returning that element, or
145
+ # nil if the index is out of range.
146
+ # @param index [Integer] Index in the BigArray
147
+ # @return [Object] found value or nil
148
+ def delete_at(index)
149
+ if index < 0
150
+ index = @entry_counter + index
151
+ end
152
+
153
+ return nil if index < 0 || index >= @entry_counter
154
+
155
+ deleted_value = nil
156
+ @store.transaction do
157
+ deleted_value = @root.delete_at(index)
158
+ self.entry_counter -= 1
159
+
160
+ # Eliminate single entry nodes at the top.
161
+ while !@root.is_leaf? && @root.size == 1
162
+ @root = @root.children.first
163
+ @root.parent = nil
164
+ end
165
+ end
166
+
167
+ deleted_value
168
+ end
169
+
170
+ # Delete all entries for which the passed block yields true. The
171
+ # implementation is optimized for large bulk deletes. It rebuilds a new
172
+ # BTree for the elements to keep. If only few elements are deleted the
173
+ # overhead of rebuilding the BTree is rather high.
174
+ # @yield [key, value]
175
+ def delete_if
176
+ old_root = @root
177
+ clear
178
+ old_root.each do |k, v|
179
+ if !yield(k, v)
180
+ insert(k, v)
181
+ end
182
+ end
183
+ end
184
+
185
+ # @return [Integer] The number of entries stored in the tree.
186
+ def length
187
+ @entry_counter
188
+ end
189
+
190
+ alias size length
191
+
192
+ # Return true if the BigArray has no stored entries.
193
+ def empty?
194
+ @entry_counter == 0
195
+ end
196
+
197
+ # Return the first entry of the Array.
198
+ def first
199
+ return nil unless @first_leaf
200
+
201
+ @first_leaf.values.first
202
+ end
203
+
204
+ # Return the last entry of the Array.
205
+ def last
206
+ return nil unless @last_leaf
207
+
208
+ @last_leaf.values.last
209
+ end
210
+
211
+ # Iterate over all entries in the tree. Entries are always sorted by the
212
+ # key.
213
+ # @yield [key, value]
214
+ def each(&block)
215
+ node = @first_leaf
216
+ while node
217
+ break unless node.each(&block)
218
+ node = node.next_sibling
219
+ end
220
+ end
221
+
222
+ # Iterate over all entries in the tree in reverse order. Entries are
223
+ # always sorted by the key.
224
+ # @yield [key, value]
225
+ def reverse_each(&block)
226
+ node = @last_leaf
227
+ while node
228
+ break unless node.reverse_each(&block)
229
+ node = node.prev_sibling
230
+ end
231
+ end
232
+
233
+ # Convert the BigArray into a Ruby Array. This is primarily intended for
234
+ # debugging as real-world BigArray objects are likely too big to fit into
235
+ # memory.
236
+ def to_a
237
+ ary = []
238
+ node = @first_leaf
239
+ while node do
240
+ ary += node.values
241
+ node = node.next_sibling
242
+ end
243
+
244
+ ary
245
+ end
246
+
247
+ # @return [String] Human reable form of the tree. This is only intended
248
+ # for debugging and should only be used with small BigArray objects.
249
+ def to_s
250
+ @root.to_s
251
+ end
252
+
253
+ # Check if the tree file contains any errors.
254
+ # @return [Boolean] true if no erros were found, false otherwise
255
+ def check(&block)
256
+ @root.check(&block)
257
+ end
258
+
259
+ # Gather some statistics regarding the tree structure.
260
+ # @return [Stats] Structs with gathered data
261
+ def statistics
262
+ stats = Stats.new(0, 0, nil, nil)
263
+ @root.statistics(stats)
264
+ stats
265
+ end
266
+
267
+ private
268
+
269
+ def validate_index_range(index)
270
+ if index < 0
271
+ if -index > @entry_counter
272
+ raise IndexError, "index #{index} too small for array; " +
273
+ "minimum #{-@entry_counter}"
274
+ end
275
+
276
+ index = @entry_counter + index
277
+ end
278
+
279
+ index
280
+ end
281
+
282
+ end
283
+
284
+ end
285
+