perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/BTreeNodeLink.rb
CHANGED
@@ -67,6 +67,8 @@ module PEROBS
|
|
67
67
|
get_node.respond_to?(method)
|
68
68
|
end
|
69
69
|
|
70
|
+
# Directly define some commonly used methods to avoid the method_missing
|
71
|
+
# overhead.
|
70
72
|
def is_leaf
|
71
73
|
get_node.is_leaf
|
72
74
|
end
|
@@ -91,10 +93,18 @@ module PEROBS
|
|
91
93
|
get_node.search_key_index(key)
|
92
94
|
end
|
93
95
|
|
96
|
+
def insert(key, value)
|
97
|
+
get_node.insert(key, value)
|
98
|
+
end
|
99
|
+
|
94
100
|
def insert_element(key, voc)
|
95
101
|
get_node.insert_element(key, voc)
|
96
102
|
end
|
97
103
|
|
104
|
+
def split_node
|
105
|
+
get_node.split_node
|
106
|
+
end
|
107
|
+
|
98
108
|
# Compare this node to another node.
|
99
109
|
# @return [Boolean] true if node address is identical, false otherwise
|
100
110
|
def ==(node)
|
@@ -0,0 +1,285 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigArray.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017, 2018, 2019
|
6
|
+
# by Chris Schlaeger <chris@taskjuggler.org>
|
7
|
+
#
|
8
|
+
# MIT License
|
9
|
+
#
|
10
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
11
|
+
# a copy of this software and associated documentation files (the
|
12
|
+
# "Software"), to deal in the Software without restriction, including
|
13
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
14
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
15
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
16
|
+
# the following conditions:
|
17
|
+
#
|
18
|
+
# The above copyright notice and this permission notice shall be
|
19
|
+
# included in all copies or substantial portions of the Software.
|
20
|
+
#
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
22
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
23
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
24
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
25
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
26
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
27
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
+
|
29
|
+
require 'perobs/Object'
|
30
|
+
require 'perobs/BigArrayNode'
|
31
|
+
|
32
|
+
module PEROBS
|
33
|
+
|
34
|
+
# The BigArray class implements an Array that stores the data in segments. It
|
35
|
+
# only loads the currently needed parts of the Array into memory. To provide
|
36
|
+
# an efficient access to the data by index a B+Tree like data structure is
|
37
|
+
# used. Each segment is stored in a leaf node of the B+Tree.
|
38
|
+
class BigArray < PEROBS::Object
|
39
|
+
|
40
|
+
class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
|
41
|
+
:max_depth)
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
|
45
|
+
|
46
|
+
# Internal constructor. Use Store.new() instead.
|
47
|
+
# @param p [Handle]
|
48
|
+
# @param node_size [Integer] The size of the tree nodes. This determines
|
49
|
+
# how many entries must be read/written for each operation. The
|
50
|
+
# default of 150 was emperically found to be a performance sweet
|
51
|
+
# spot. Smaller values will improve write operations. Larger
|
52
|
+
# values will improve read operations. 20 - 500 is a reasonable
|
53
|
+
# range to try.
|
54
|
+
def initialize(p, node_size = 150)
|
55
|
+
super(p)
|
56
|
+
unless node_size > 3
|
57
|
+
PEROBS.log.fatal "Node size (#{node_size}) must be larger than 3"
|
58
|
+
end
|
59
|
+
unless node_size % 2 == 0
|
60
|
+
PEROBS.log.fatal "Node size (#{node_size}) must be an even number"
|
61
|
+
end
|
62
|
+
|
63
|
+
self.node_size = node_size
|
64
|
+
clear
|
65
|
+
end
|
66
|
+
|
67
|
+
# Remove all entries from the BigArray.
|
68
|
+
def clear
|
69
|
+
self.root = self.first_leaf = self.last_leaf =
|
70
|
+
@store.new(BigArrayNode, myself, true)
|
71
|
+
self.entry_counter = 0
|
72
|
+
end
|
73
|
+
|
74
|
+
# Store the value at the given index. If the index already exists the old
|
75
|
+
# value will be overwritten.
|
76
|
+
# @param index [Integer] Position in the array
|
77
|
+
# @param value [Integer] value
|
78
|
+
def []=(index, value)
|
79
|
+
index = validate_index_range(index)
|
80
|
+
|
81
|
+
@store.transaction do
|
82
|
+
if index < @entry_counter
|
83
|
+
# Overwrite of an existing element
|
84
|
+
@root.set(index, value)
|
85
|
+
elsif index == @entry_counter
|
86
|
+
# Append right at the end
|
87
|
+
@root.insert(index, value)
|
88
|
+
self.entry_counter += 1
|
89
|
+
else
|
90
|
+
# Append with nil padding
|
91
|
+
@entry_counter.upto(index - 1) do |i|
|
92
|
+
@root.insert(i, nil)
|
93
|
+
end
|
94
|
+
@root.insert(index, value)
|
95
|
+
self.entry_counter = index + 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def <<(value)
|
101
|
+
self[@entry_counter] = value
|
102
|
+
end
|
103
|
+
|
104
|
+
# Insert the value at the given index. If the index already exists the old
|
105
|
+
# value will be overwritten.
|
106
|
+
# @param index [Integer] Position in the array
|
107
|
+
# @param value [Integer] value
|
108
|
+
def insert(index, value)
|
109
|
+
index = validate_index_range(index)
|
110
|
+
|
111
|
+
if index < @entry_counter
|
112
|
+
# Insert in between existing elements
|
113
|
+
@store.transaction do
|
114
|
+
@root.insert(index, value)
|
115
|
+
self.entry_counter += 1
|
116
|
+
end
|
117
|
+
else
|
118
|
+
self[index] = value
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Return the value stored at the given index.
|
123
|
+
# @param index [Integer] Position in the array
|
124
|
+
# @return [Integer or nil] found value or nil
|
125
|
+
def [](index)
|
126
|
+
begin
|
127
|
+
index = validate_index_range(index)
|
128
|
+
rescue IndexError
|
129
|
+
return nil
|
130
|
+
end
|
131
|
+
|
132
|
+
return nil if index >= @entry_counter
|
133
|
+
|
134
|
+
@root.get(index)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Check if there is an entry for the given key.
|
138
|
+
# @param key [Integer] Unique key
|
139
|
+
# @return [Boolean] True if key is present, false otherwise.
|
140
|
+
def has_key?(key)
|
141
|
+
@root.has_key?(key)
|
142
|
+
end
|
143
|
+
|
144
|
+
# Delete the element at the specified index, returning that element, or
|
145
|
+
# nil if the index is out of range.
|
146
|
+
# @param index [Integer] Index in the BigArray
|
147
|
+
# @return [Object] found value or nil
|
148
|
+
def delete_at(index)
|
149
|
+
if index < 0
|
150
|
+
index = @entry_counter + index
|
151
|
+
end
|
152
|
+
|
153
|
+
return nil if index < 0 || index >= @entry_counter
|
154
|
+
|
155
|
+
deleted_value = nil
|
156
|
+
@store.transaction do
|
157
|
+
deleted_value = @root.delete_at(index)
|
158
|
+
self.entry_counter -= 1
|
159
|
+
|
160
|
+
# Eliminate single entry nodes at the top.
|
161
|
+
while !@root.is_leaf? && @root.size == 1
|
162
|
+
@root = @root.children.first
|
163
|
+
@root.parent = nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
deleted_value
|
168
|
+
end
|
169
|
+
|
170
|
+
# Delete all entries for which the passed block yields true. The
|
171
|
+
# implementation is optimized for large bulk deletes. It rebuilds a new
|
172
|
+
# BTree for the elements to keep. If only few elements are deleted the
|
173
|
+
# overhead of rebuilding the BTree is rather high.
|
174
|
+
# @yield [key, value]
|
175
|
+
def delete_if
|
176
|
+
old_root = @root
|
177
|
+
clear
|
178
|
+
old_root.each do |k, v|
|
179
|
+
if !yield(k, v)
|
180
|
+
insert(k, v)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# @return [Integer] The number of entries stored in the tree.
|
186
|
+
def length
|
187
|
+
@entry_counter
|
188
|
+
end
|
189
|
+
|
190
|
+
alias size length
|
191
|
+
|
192
|
+
# Return true if the BigArray has no stored entries.
|
193
|
+
def empty?
|
194
|
+
@entry_counter == 0
|
195
|
+
end
|
196
|
+
|
197
|
+
# Return the first entry of the Array.
|
198
|
+
def first
|
199
|
+
return nil unless @first_leaf
|
200
|
+
|
201
|
+
@first_leaf.values.first
|
202
|
+
end
|
203
|
+
|
204
|
+
# Return the last entry of the Array.
|
205
|
+
def last
|
206
|
+
return nil unless @last_leaf
|
207
|
+
|
208
|
+
@last_leaf.values.last
|
209
|
+
end
|
210
|
+
|
211
|
+
# Iterate over all entries in the tree. Entries are always sorted by the
|
212
|
+
# key.
|
213
|
+
# @yield [key, value]
|
214
|
+
def each(&block)
|
215
|
+
node = @first_leaf
|
216
|
+
while node
|
217
|
+
break unless node.each(&block)
|
218
|
+
node = node.next_sibling
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# Iterate over all entries in the tree in reverse order. Entries are
|
223
|
+
# always sorted by the key.
|
224
|
+
# @yield [key, value]
|
225
|
+
def reverse_each(&block)
|
226
|
+
node = @last_leaf
|
227
|
+
while node
|
228
|
+
break unless node.reverse_each(&block)
|
229
|
+
node = node.prev_sibling
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Convert the BigArray into a Ruby Array. This is primarily intended for
|
234
|
+
# debugging as real-world BigArray objects are likely too big to fit into
|
235
|
+
# memory.
|
236
|
+
def to_a
|
237
|
+
ary = []
|
238
|
+
node = @first_leaf
|
239
|
+
while node do
|
240
|
+
ary += node.values
|
241
|
+
node = node.next_sibling
|
242
|
+
end
|
243
|
+
|
244
|
+
ary
|
245
|
+
end
|
246
|
+
|
247
|
+
# @return [String] Human reable form of the tree. This is only intended
|
248
|
+
# for debugging and should only be used with small BigArray objects.
|
249
|
+
def to_s
|
250
|
+
@root.to_s
|
251
|
+
end
|
252
|
+
|
253
|
+
# Check if the tree file contains any errors.
|
254
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
255
|
+
def check(&block)
|
256
|
+
@root.check(&block)
|
257
|
+
end
|
258
|
+
|
259
|
+
# Gather some statistics regarding the tree structure.
|
260
|
+
# @return [Stats] Structs with gathered data
|
261
|
+
def statistics
|
262
|
+
stats = Stats.new(0, 0, nil, nil)
|
263
|
+
@root.statistics(stats)
|
264
|
+
stats
|
265
|
+
end
|
266
|
+
|
267
|
+
private
|
268
|
+
|
269
|
+
def validate_index_range(index)
|
270
|
+
if index < 0
|
271
|
+
if -index > @entry_counter
|
272
|
+
raise IndexError, "index #{index} too small for array; " +
|
273
|
+
"minimum #{-@entry_counter}"
|
274
|
+
end
|
275
|
+
|
276
|
+
index = @entry_counter + index
|
277
|
+
end
|
278
|
+
|
279
|
+
index
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
285
|
+
|