perobs 4.0.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -67,6 +67,8 @@ module PEROBS
67
67
  get_node.respond_to?(method)
68
68
  end
69
69
 
70
+ # Directly define some commonly used methods to avoid the method_missing
71
+ # overhead.
70
72
  def is_leaf
71
73
  get_node.is_leaf
72
74
  end
@@ -91,10 +93,18 @@ module PEROBS
91
93
  get_node.search_key_index(key)
92
94
  end
93
95
 
96
+ def insert(key, value)
97
+ get_node.insert(key, value)
98
+ end
99
+
94
100
  def insert_element(key, voc)
95
101
  get_node.insert_element(key, voc)
96
102
  end
97
103
 
104
+ def split_node
105
+ get_node.split_node
106
+ end
107
+
98
108
  # Compare this node to another node.
99
109
  # @return [Boolean] true if node address is identical, false otherwise
100
110
  def ==(node)
@@ -0,0 +1,285 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigArray.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
7
+ #
8
+ # MIT License
9
+ #
10
+ # Permission is hereby granted, free of charge, to any person obtaining
11
+ # a copy of this software and associated documentation files (the
12
+ # "Software"), to deal in the Software without restriction, including
13
+ # without limitation the rights to use, copy, modify, merge, publish,
14
+ # distribute, sublicense, and/or sell copies of the Software, and to
15
+ # permit persons to whom the Software is furnished to do so, subject to
16
+ # the following conditions:
17
+ #
18
+ # The above copyright notice and this permission notice shall be
19
+ # included in all copies or substantial portions of the Software.
20
+ #
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
+
29
+ require 'perobs/Object'
30
+ require 'perobs/BigArrayNode'
31
+
32
+ module PEROBS
33
+
34
+ # The BigArray class implements an Array that stores the data in segments. It
35
+ # only loads the currently needed parts of the Array into memory. To provide
36
+ # an efficient access to the data by index a B+Tree like data structure is
37
+ # used. Each segment is stored in a leaf node of the B+Tree.
38
+ class BigArray < PEROBS::Object
39
+
40
+ class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
41
+ :max_depth)
42
+ end
43
+
44
+ attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
45
+
46
+ # Internal constructor. Use Store.new() instead.
47
+ # @param p [Handle]
48
+ # @param node_size [Integer] The size of the tree nodes. This determines
49
+ # how many entries must be read/written for each operation. The
50
+ # default of 150 was emperically found to be a performance sweet
51
+ # spot. Smaller values will improve write operations. Larger
52
+ # values will improve read operations. 20 - 500 is a reasonable
53
+ # range to try.
54
+ def initialize(p, node_size = 150)
55
+ super(p)
56
+ unless node_size > 3
57
+ PEROBS.log.fatal "Node size (#{node_size}) must be larger than 3"
58
+ end
59
+ unless node_size % 2 == 0
60
+ PEROBS.log.fatal "Node size (#{node_size}) must be an even number"
61
+ end
62
+
63
+ self.node_size = node_size
64
+ clear
65
+ end
66
+
67
+ # Remove all entries from the BigArray.
68
+ def clear
69
+ self.root = self.first_leaf = self.last_leaf =
70
+ @store.new(BigArrayNode, myself, true)
71
+ self.entry_counter = 0
72
+ end
73
+
74
+ # Store the value at the given index. If the index already exists the old
75
+ # value will be overwritten.
76
+ # @param index [Integer] Position in the array
77
+ # @param value [Integer] value
78
+ def []=(index, value)
79
+ index = validate_index_range(index)
80
+
81
+ @store.transaction do
82
+ if index < @entry_counter
83
+ # Overwrite of an existing element
84
+ @root.set(index, value)
85
+ elsif index == @entry_counter
86
+ # Append right at the end
87
+ @root.insert(index, value)
88
+ self.entry_counter += 1
89
+ else
90
+ # Append with nil padding
91
+ @entry_counter.upto(index - 1) do |i|
92
+ @root.insert(i, nil)
93
+ end
94
+ @root.insert(index, value)
95
+ self.entry_counter = index + 1
96
+ end
97
+ end
98
+ end
99
+
100
+ def <<(value)
101
+ self[@entry_counter] = value
102
+ end
103
+
104
+ # Insert the value at the given index. If the index already exists the old
105
+ # value will be overwritten.
106
+ # @param index [Integer] Position in the array
107
+ # @param value [Integer] value
108
+ def insert(index, value)
109
+ index = validate_index_range(index)
110
+
111
+ if index < @entry_counter
112
+ # Insert in between existing elements
113
+ @store.transaction do
114
+ @root.insert(index, value)
115
+ self.entry_counter += 1
116
+ end
117
+ else
118
+ self[index] = value
119
+ end
120
+ end
121
+
122
+ # Return the value stored at the given index.
123
+ # @param index [Integer] Position in the array
124
+ # @return [Integer or nil] found value or nil
125
+ def [](index)
126
+ begin
127
+ index = validate_index_range(index)
128
+ rescue IndexError
129
+ return nil
130
+ end
131
+
132
+ return nil if index >= @entry_counter
133
+
134
+ @root.get(index)
135
+ end
136
+
137
+ # Check if there is an entry for the given key.
138
+ # @param key [Integer] Unique key
139
+ # @return [Boolean] True if key is present, false otherwise.
140
+ def has_key?(key)
141
+ @root.has_key?(key)
142
+ end
143
+
144
+ # Delete the element at the specified index, returning that element, or
145
+ # nil if the index is out of range.
146
+ # @param index [Integer] Index in the BigArray
147
+ # @return [Object] found value or nil
148
+ def delete_at(index)
149
+ if index < 0
150
+ index = @entry_counter + index
151
+ end
152
+
153
+ return nil if index < 0 || index >= @entry_counter
154
+
155
+ deleted_value = nil
156
+ @store.transaction do
157
+ deleted_value = @root.delete_at(index)
158
+ self.entry_counter -= 1
159
+
160
+ # Eliminate single entry nodes at the top.
161
+ while !@root.is_leaf? && @root.size == 1
162
+ @root = @root.children.first
163
+ @root.parent = nil
164
+ end
165
+ end
166
+
167
+ deleted_value
168
+ end
169
+
170
+ # Delete all entries for which the passed block yields true. The
171
+ # implementation is optimized for large bulk deletes. It rebuilds a new
172
+ # BTree for the elements to keep. If only few elements are deleted the
173
+ # overhead of rebuilding the BTree is rather high.
174
+ # @yield [key, value]
175
+ def delete_if
176
+ old_root = @root
177
+ clear
178
+ old_root.each do |k, v|
179
+ if !yield(k, v)
180
+ insert(k, v)
181
+ end
182
+ end
183
+ end
184
+
185
+ # @return [Integer] The number of entries stored in the tree.
186
+ def length
187
+ @entry_counter
188
+ end
189
+
190
+ alias size length
191
+
192
+ # Return true if the BigArray has no stored entries.
193
+ def empty?
194
+ @entry_counter == 0
195
+ end
196
+
197
+ # Return the first entry of the Array.
198
+ def first
199
+ return nil unless @first_leaf
200
+
201
+ @first_leaf.values.first
202
+ end
203
+
204
+ # Return the last entry of the Array.
205
+ def last
206
+ return nil unless @last_leaf
207
+
208
+ @last_leaf.values.last
209
+ end
210
+
211
+ # Iterate over all entries in the tree. Entries are always sorted by the
212
+ # key.
213
+ # @yield [key, value]
214
+ def each(&block)
215
+ node = @first_leaf
216
+ while node
217
+ break unless node.each(&block)
218
+ node = node.next_sibling
219
+ end
220
+ end
221
+
222
+ # Iterate over all entries in the tree in reverse order. Entries are
223
+ # always sorted by the key.
224
+ # @yield [key, value]
225
+ def reverse_each(&block)
226
+ node = @last_leaf
227
+ while node
228
+ break unless node.reverse_each(&block)
229
+ node = node.prev_sibling
230
+ end
231
+ end
232
+
233
+ # Convert the BigArray into a Ruby Array. This is primarily intended for
234
+ # debugging as real-world BigArray objects are likely too big to fit into
235
+ # memory.
236
+ def to_a
237
+ ary = []
238
+ node = @first_leaf
239
+ while node do
240
+ ary += node.values
241
+ node = node.next_sibling
242
+ end
243
+
244
+ ary
245
+ end
246
+
247
+ # @return [String] Human reable form of the tree. This is only intended
248
+ # for debugging and should only be used with small BigArray objects.
249
+ def to_s
250
+ @root.to_s
251
+ end
252
+
253
+ # Check if the tree file contains any errors.
254
+ # @return [Boolean] true if no erros were found, false otherwise
255
+ def check(&block)
256
+ @root.check(&block)
257
+ end
258
+
259
+ # Gather some statistics regarding the tree structure.
260
+ # @return [Stats] Structs with gathered data
261
+ def statistics
262
+ stats = Stats.new(0, 0, nil, nil)
263
+ @root.statistics(stats)
264
+ stats
265
+ end
266
+
267
+ private
268
+
269
+ def validate_index_range(index)
270
+ if index < 0
271
+ if -index > @entry_counter
272
+ raise IndexError, "index #{index} too small for array; " +
273
+ "minimum #{-@entry_counter}"
274
+ end
275
+
276
+ index = @entry_counter + index
277
+ end
278
+
279
+ index
280
+ end
281
+
282
+ end
283
+
284
+ end
285
+