perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,246 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigHash.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/BigTree'
|
30
|
+
require 'perobs/Array'
|
31
|
+
require 'perobs/FNV_Hash_1a_64'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The BigHash is similar to the Hash object in that it provides a simple
|
36
|
+
# hash functionality. The difference is that this class scales to much
|
37
|
+
# larger data sets essentially limited to the amount of space available on
|
38
|
+
# your backing store. The data is persisted immediately and uses
|
39
|
+
# transactions to ensure the data consistent. It only provides a small
|
40
|
+
# subset of the methods provided by the native Hash class that make sense
|
41
|
+
# for giant data sets.
|
42
|
+
class BigHash < PEROBS::Object
|
43
|
+
|
44
|
+
# Internally this class uses BigTree to store the values by the hashed
|
45
|
+
# key. We are using a 64 bit hash space so collisions are fairly unlikely
|
46
|
+
# but not impossible. Therefor we have to store the originial key with the
|
47
|
+
# value to ensure that we got the right value. The key and value are
|
48
|
+
# stored in an Entry object.
|
49
|
+
#
|
50
|
+
# In case we have a collision we need to store multiple values for the
|
51
|
+
# same hashed key. In that case we store the Entry objects for the same
|
52
|
+
# hashed key in a Collisions object instead of storing the Entry
|
53
|
+
# directly in the BigTree.
|
54
|
+
class Entry < PEROBS::Object
|
55
|
+
|
56
|
+
attr_persist :key, :value
|
57
|
+
|
58
|
+
def initialize(p, key, value)
|
59
|
+
super(p)
|
60
|
+
self.key = key
|
61
|
+
self.value = value
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# Since the BigHash can also store PEROBS::Array values we need to
|
67
|
+
# introduce a new class so we can tell apart collisions from Arrays.
|
68
|
+
class Collisions < PEROBS::Array
|
69
|
+
end
|
70
|
+
|
71
|
+
attr_persist :btree
|
72
|
+
|
73
|
+
# Create a new BigHash object.
|
74
|
+
# @param p [Handle] Store handle
|
75
|
+
def initialize(p)
|
76
|
+
super(p)
|
77
|
+
restore
|
78
|
+
self.btree = @store.new(PEROBS::BigTree)
|
79
|
+
end
|
80
|
+
|
81
|
+
def restore
|
82
|
+
end
|
83
|
+
|
84
|
+
# Insert a value that is associated with the given key. If a value for
|
85
|
+
# this key already exists, the value will be overwritten with the newly
|
86
|
+
# provided value.
|
87
|
+
# @param key [Integer or String]
|
88
|
+
# @param value [Any PEROBS storable object]
|
89
|
+
def []=(key, value)
|
90
|
+
hashed_key = hash_key(key)
|
91
|
+
@store.transaction do
|
92
|
+
entry = @store.new(Entry, key, value)
|
93
|
+
|
94
|
+
if (existing_entry = @btree.get(hashed_key))
|
95
|
+
# There is already an existing entry for this hashed key.
|
96
|
+
if existing_entry.is_a?(Collisions)
|
97
|
+
# Find the right index to insert the new entry. If there is
|
98
|
+
# already an entry with the same key overwrite that entry.
|
99
|
+
index_to_insert = 0
|
100
|
+
overwrite = false
|
101
|
+
existing_entry.each do |ae|
|
102
|
+
if ae.key == key
|
103
|
+
overwrite = true
|
104
|
+
break
|
105
|
+
end
|
106
|
+
index_to_insert += 1
|
107
|
+
end
|
108
|
+
existing_entry[index_to_insert] = entry
|
109
|
+
elsif existing_entry.key == key
|
110
|
+
# The existing value is for the identical key. We can safely
|
111
|
+
# overwrite
|
112
|
+
@btree.insert(hashed_key, entry)
|
113
|
+
else
|
114
|
+
# There is a single existing entry, but for a different key. Create
|
115
|
+
# a new PEROBS::Array and store both entries.
|
116
|
+
array_entry = @store.new(Collisions)
|
117
|
+
array_entry << existing_entry
|
118
|
+
array_entry << entry
|
119
|
+
@btree.insert(hashed_key, array_entry)
|
120
|
+
end
|
121
|
+
else
|
122
|
+
# No existing entry. Insert the new entry.
|
123
|
+
@btree.insert(hashed_key, entry)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Retrieve the value for the given key. If no value for the key is found
|
129
|
+
# nil is returned.
|
130
|
+
# @param key [Integer or String]
|
131
|
+
# @return [Any PEROBS storable object]
|
132
|
+
def [](key)
|
133
|
+
hashed_key = hash_key(key)
|
134
|
+
unless (entry = @btree.get(hashed_key))
|
135
|
+
return nil
|
136
|
+
end
|
137
|
+
|
138
|
+
if entry.is_a?(PEROBS::Array)
|
139
|
+
entry.each do |ae|
|
140
|
+
return ae.value if ae.key == key
|
141
|
+
end
|
142
|
+
else
|
143
|
+
return entry.value if entry.key == key
|
144
|
+
end
|
145
|
+
|
146
|
+
nil
|
147
|
+
end
|
148
|
+
|
149
|
+
# Check if the is a value stored for the given key.
|
150
|
+
# @param key [Integer or String]
|
151
|
+
# @return [TrueClass or FalseClass]
|
152
|
+
def has_key?(key)
|
153
|
+
hashed_key = hash_key(key)
|
154
|
+
unless (entry = @btree.get(hashed_key))
|
155
|
+
return false
|
156
|
+
end
|
157
|
+
|
158
|
+
if entry.is_a?(PEROBS::Array)
|
159
|
+
entry.each do |ae|
|
160
|
+
return true if ae.key == key
|
161
|
+
end
|
162
|
+
else
|
163
|
+
return true if entry.key == key
|
164
|
+
end
|
165
|
+
|
166
|
+
false
|
167
|
+
end
|
168
|
+
|
169
|
+
alias include? has_key?
|
170
|
+
|
171
|
+
# Delete and return the entry for the given key. Return nil if no matching
|
172
|
+
# entry exists.
|
173
|
+
# @param key [Integer or String]
|
174
|
+
# @return [Object] Deleted entry
|
175
|
+
def delete(key)
|
176
|
+
hashed_key = hash_key(key)
|
177
|
+
unless (entry = @btree.get(hashed_key))
|
178
|
+
return nil
|
179
|
+
end
|
180
|
+
|
181
|
+
if entry.is_a?(PEROBS::Array)
|
182
|
+
entry.each_with_index do |ae, i|
|
183
|
+
if ae.key == key
|
184
|
+
return entry.delete_at(i).value
|
185
|
+
end
|
186
|
+
end
|
187
|
+
else
|
188
|
+
return entry.value if entry.key == key
|
189
|
+
end
|
190
|
+
|
191
|
+
nil
|
192
|
+
end
|
193
|
+
|
194
|
+
# Return the number of entries stored in the hash.
|
195
|
+
# @return [Integer]
|
196
|
+
def length
|
197
|
+
@btree.entry_counter
|
198
|
+
end
|
199
|
+
|
200
|
+
alias size length
|
201
|
+
|
202
|
+
# Return true if hash is empty. False otherweise.
|
203
|
+
# @return [TrueClass, FalseClass]
|
204
|
+
def empty?
|
205
|
+
@btree.entry_counter == 0
|
206
|
+
end
|
207
|
+
|
208
|
+
# Calls the given block for each key/value pair.
|
209
|
+
# @yield(key, value)
|
210
|
+
def each(&block)
|
211
|
+
@btree.each do |index, entry|
|
212
|
+
if entry.is_a?(Collisions)
|
213
|
+
break if entry.each do |c_entry|
|
214
|
+
yield(c_entry.key, c_entry.value)
|
215
|
+
end.nil?
|
216
|
+
else
|
217
|
+
yield(entry.key, entry.value)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# This is mostly intended for debugging as the result can be very big.
|
223
|
+
# It returns an Array of keys stored in the hash.
|
224
|
+
# @return [Array] A list of all keys
|
225
|
+
def keys
|
226
|
+
ks = []
|
227
|
+
each { |k, v| ks << k }
|
228
|
+
ks
|
229
|
+
end
|
230
|
+
|
231
|
+
# Check if the data structure contains any errors.
|
232
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
233
|
+
def check
|
234
|
+
return @btree.check
|
235
|
+
end
|
236
|
+
|
237
|
+
private
|
238
|
+
|
239
|
+
def hash_key(key)
|
240
|
+
FNV_Hash_1a_64::digest(key)
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigTree.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/BigTreeNode'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The BigTree class implements a BTree as a PEROBS object. It allows to
|
34
|
+
# manage huge amounts of data in a reasonably efficient way. The number of
|
35
|
+
# entries is limited by the space on the backing store, not the main
|
36
|
+
# memory. Entries are addressed by a Integer key.
|
37
|
+
class BigTree < PEROBS::Object
|
38
|
+
|
39
|
+
class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
|
40
|
+
:max_depth)
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
|
44
|
+
|
45
|
+
# Internal constructor. Use Store.new() instead.
|
46
|
+
# @param p [Handle]
|
47
|
+
# @param node_size [Integer] The size of the tree nodes. This determines
|
48
|
+
# how many entries must be read/written for each operation.
|
49
|
+
def initialize(p, node_size = 127)
|
50
|
+
super(p)
|
51
|
+
unless node_size > 2
|
52
|
+
PEROBS.log.fatal "Node size (#{node_size}) must be larger than 2"
|
53
|
+
end
|
54
|
+
attr_init(:node_size, node_size)
|
55
|
+
clear unless instance_variable_defined?('@root')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Remove all entries from the BigTree.
|
59
|
+
def clear
|
60
|
+
self.root = self.first_leaf = self.last_leaf =
|
61
|
+
@store.new(BigTreeNode, myself, true)
|
62
|
+
self.entry_counter = 0
|
63
|
+
end
|
64
|
+
|
65
|
+
# Insert a new value into the tree using the key as a unique index. If the
|
66
|
+
# key already exists the old value will be overwritten.
|
67
|
+
# @param key [Integer] Unique key
|
68
|
+
# @param value [Integer] value
|
69
|
+
def insert(key, value)
|
70
|
+
@store.transaction do
|
71
|
+
@root.insert(key, value)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Retrieve the value associated with the given key. If no entry was found,
|
76
|
+
# return nil.
|
77
|
+
# @param key [Integer] Unique key
|
78
|
+
# @return [Integer or nil] found value or nil
|
79
|
+
def get(key)
|
80
|
+
@root.get(key)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return the node chain from the root to the leaf node storing the
|
84
|
+
# key/value pair.
|
85
|
+
# @param key [Integer] key to search for
|
86
|
+
# @return [Array of BigTreeNode] node list (may be empty)
|
87
|
+
def node_chain(key)
|
88
|
+
@root.node_chain(key)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Check if there is an entry for the given key.
|
92
|
+
# @param key [Integer] Unique key
|
93
|
+
# @return [Boolean] True if key is present, false otherwise.
|
94
|
+
def has_key?(key)
|
95
|
+
@root.has_key?(key)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Find and remove the value associated with the given key. If no entry was
|
99
|
+
# found, return nil, otherwise the found value.
|
100
|
+
# @param key [Integer] Unique key
|
101
|
+
# @return [Integer or nil] found value or nil
|
102
|
+
def remove(key)
|
103
|
+
removed_value = nil
|
104
|
+
|
105
|
+
@store.transaction do
|
106
|
+
removed_value = @root.remove(key)
|
107
|
+
end
|
108
|
+
|
109
|
+
removed_value
|
110
|
+
end
|
111
|
+
|
112
|
+
# Delete all entries for which the passed block yields true. The
|
113
|
+
# implementation is optimized for large bulk deletes. It rebuilds a new
|
114
|
+
# BTree for the elements to keep. If only few elements are deleted the
|
115
|
+
# overhead of rebuilding the BTree is rather high.
|
116
|
+
# @yield [key, value]
|
117
|
+
def delete_if
|
118
|
+
old_root = @root
|
119
|
+
clear
|
120
|
+
old_root.each do |k, v|
|
121
|
+
if !yield(k, v)
|
122
|
+
insert(k, v)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# @return [Integer] The number of entries stored in the tree.
|
128
|
+
def length
|
129
|
+
@entry_counter
|
130
|
+
end
|
131
|
+
|
132
|
+
# Return true if the BigTree has no stored entries.
|
133
|
+
def empty?
|
134
|
+
@entry_counter == 0
|
135
|
+
end
|
136
|
+
|
137
|
+
# Iterate over all entries in the tree. Entries are always sorted by the
|
138
|
+
# key.
|
139
|
+
# @yield [key, value]
|
140
|
+
def each(&block)
|
141
|
+
node = @first_leaf
|
142
|
+
while node
|
143
|
+
break if node.each_element(&block).nil?
|
144
|
+
node = node.next_sibling
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Iterate over all entries in the tree in reverse order. Entries are
|
149
|
+
# always sorted by the key.
|
150
|
+
# @yield [key, value]
|
151
|
+
def reverse_each(&block)
|
152
|
+
node = @last_leaf
|
153
|
+
while node
|
154
|
+
node.reverse_each_element(&block)
|
155
|
+
node = node.prev_sibling
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# @return [String] Human reable form of the tree.
|
161
|
+
def to_s
|
162
|
+
@root.to_s
|
163
|
+
end
|
164
|
+
|
165
|
+
# Check if the tree file contains any errors.
|
166
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
167
|
+
def check(&block)
|
168
|
+
@root.check(&block)
|
169
|
+
|
170
|
+
i = 0
|
171
|
+
each do |k, v|
|
172
|
+
i += 1
|
173
|
+
end
|
174
|
+
|
175
|
+
unless @entry_counter == i
|
176
|
+
PEROBS.log.error "BigTree contains #{i} values but entry counter " +
|
177
|
+
"is #{@entry_counter}"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
182
|
+
end
|
183
|
+
|
184
|
+
# Gather some statistics regarding the tree structure.
|
185
|
+
# @return [Stats] Structs with gathered data
|
186
|
+
def statistics
|
187
|
+
stats = Stats.new(0, 0, nil, nil)
|
188
|
+
@root.statistics(stats)
|
189
|
+
stats
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
|