perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,246 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigHash.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Object'
29
+ require 'perobs/BigTree'
30
+ require 'perobs/Array'
31
+ require 'perobs/FNV_Hash_1a_64'
32
+
33
+ module PEROBS
34
+
35
+ # The BigHash is similar to the Hash object in that it provides a simple
36
+ # hash functionality. The difference is that this class scales to much
37
+ # larger data sets essentially limited to the amount of space available on
38
+ # your backing store. The data is persisted immediately and uses
39
+ # transactions to ensure the data consistent. It only provides a small
40
+ # subset of the methods provided by the native Hash class that make sense
41
+ # for giant data sets.
42
+ class BigHash < PEROBS::Object
43
+
44
+ # Internally this class uses BigTree to store the values by the hashed
45
+ # key. We are using a 64 bit hash space so collisions are fairly unlikely
46
+ # but not impossible. Therefor we have to store the originial key with the
47
+ # value to ensure that we got the right value. The key and value are
48
+ # stored in an Entry object.
49
+ #
50
+ # In case we have a collision we need to store multiple values for the
51
+ # same hashed key. In that case we store the Entry objects for the same
52
+ # hashed key in a Collisions object instead of storing the Entry
53
+ # directly in the BigTree.
54
+ class Entry < PEROBS::Object
55
+
56
+ attr_persist :key, :value
57
+
58
+ def initialize(p, key, value)
59
+ super(p)
60
+ self.key = key
61
+ self.value = value
62
+ end
63
+
64
+ end
65
+
66
+ # Since the BigHash can also store PEROBS::Array values we need to
67
+ # introduce a new class so we can tell apart collisions from Arrays.
68
+ class Collisions < PEROBS::Array
69
+ end
70
+
71
+ attr_persist :btree
72
+
73
+ # Create a new BigHash object.
74
+ # @param p [Handle] Store handle
75
+ def initialize(p)
76
+ super(p)
77
+ restore
78
+ self.btree = @store.new(PEROBS::BigTree)
79
+ end
80
+
81
+ def restore
82
+ end
83
+
84
+ # Insert a value that is associated with the given key. If a value for
85
+ # this key already exists, the value will be overwritten with the newly
86
+ # provided value.
87
+ # @param key [Integer or String]
88
+ # @param value [Any PEROBS storable object]
89
+ def []=(key, value)
90
+ hashed_key = hash_key(key)
91
+ @store.transaction do
92
+ entry = @store.new(Entry, key, value)
93
+
94
+ if (existing_entry = @btree.get(hashed_key))
95
+ # There is already an existing entry for this hashed key.
96
+ if existing_entry.is_a?(Collisions)
97
+ # Find the right index to insert the new entry. If there is
98
+ # already an entry with the same key overwrite that entry.
99
+ index_to_insert = 0
100
+ overwrite = false
101
+ existing_entry.each do |ae|
102
+ if ae.key == key
103
+ overwrite = true
104
+ break
105
+ end
106
+ index_to_insert += 1
107
+ end
108
+ existing_entry[index_to_insert] = entry
109
+ elsif existing_entry.key == key
110
+ # The existing value is for the identical key. We can safely
111
+ # overwrite
112
+ @btree.insert(hashed_key, entry)
113
+ else
114
+ # There is a single existing entry, but for a different key. Create
115
+ # a new PEROBS::Array and store both entries.
116
+ array_entry = @store.new(Collisions)
117
+ array_entry << existing_entry
118
+ array_entry << entry
119
+ @btree.insert(hashed_key, array_entry)
120
+ end
121
+ else
122
+ # No existing entry. Insert the new entry.
123
+ @btree.insert(hashed_key, entry)
124
+ end
125
+ end
126
+ end
127
+
128
+ # Retrieve the value for the given key. If no value for the key is found
129
+ # nil is returned.
130
+ # @param key [Integer or String]
131
+ # @return [Any PEROBS storable object]
132
+ def [](key)
133
+ hashed_key = hash_key(key)
134
+ unless (entry = @btree.get(hashed_key))
135
+ return nil
136
+ end
137
+
138
+ if entry.is_a?(PEROBS::Array)
139
+ entry.each do |ae|
140
+ return ae.value if ae.key == key
141
+ end
142
+ else
143
+ return entry.value if entry.key == key
144
+ end
145
+
146
+ nil
147
+ end
148
+
149
+ # Check if the is a value stored for the given key.
150
+ # @param key [Integer or String]
151
+ # @return [TrueClass or FalseClass]
152
+ def has_key?(key)
153
+ hashed_key = hash_key(key)
154
+ unless (entry = @btree.get(hashed_key))
155
+ return false
156
+ end
157
+
158
+ if entry.is_a?(PEROBS::Array)
159
+ entry.each do |ae|
160
+ return true if ae.key == key
161
+ end
162
+ else
163
+ return true if entry.key == key
164
+ end
165
+
166
+ false
167
+ end
168
+
169
+ alias include? has_key?
170
+
171
+ # Delete and return the entry for the given key. Return nil if no matching
172
+ # entry exists.
173
+ # @param key [Integer or String]
174
+ # @return [Object] Deleted entry
175
+ def delete(key)
176
+ hashed_key = hash_key(key)
177
+ unless (entry = @btree.get(hashed_key))
178
+ return nil
179
+ end
180
+
181
+ if entry.is_a?(PEROBS::Array)
182
+ entry.each_with_index do |ae, i|
183
+ if ae.key == key
184
+ return entry.delete_at(i).value
185
+ end
186
+ end
187
+ else
188
+ return entry.value if entry.key == key
189
+ end
190
+
191
+ nil
192
+ end
193
+
194
+ # Return the number of entries stored in the hash.
195
+ # @return [Integer]
196
+ def length
197
+ @btree.entry_counter
198
+ end
199
+
200
+ alias size length
201
+
202
+ # Return true if hash is empty. False otherweise.
203
+ # @return [TrueClass, FalseClass]
204
+ def empty?
205
+ @btree.entry_counter == 0
206
+ end
207
+
208
+ # Calls the given block for each key/value pair.
209
+ # @yield(key, value)
210
+ def each(&block)
211
+ @btree.each do |index, entry|
212
+ if entry.is_a?(Collisions)
213
+ break if entry.each do |c_entry|
214
+ yield(c_entry.key, c_entry.value)
215
+ end.nil?
216
+ else
217
+ yield(entry.key, entry.value)
218
+ end
219
+ end
220
+ end
221
+
222
+ # This is mostly intended for debugging as the result can be very big.
223
+ # It returns an Array of keys stored in the hash.
224
+ # @return [Array] A list of all keys
225
+ def keys
226
+ ks = []
227
+ each { |k, v| ks << k }
228
+ ks
229
+ end
230
+
231
+ # Check if the data structure contains any errors.
232
+ # @return [Boolean] true if no erros were found, false otherwise
233
+ def check
234
+ return @btree.check
235
+ end
236
+
237
+ private
238
+
239
+ def hash_key(key)
240
+ FNV_Hash_1a_64::digest(key)
241
+ end
242
+
243
+ end
244
+
245
+ end
246
+
@@ -0,0 +1,197 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigTree.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Object'
29
+ require 'perobs/BigTreeNode'
30
+
31
+ module PEROBS
32
+
33
+ # The BigTree class implements a BTree as a PEROBS object. It allows to
34
+ # manage huge amounts of data in a reasonably efficient way. The number of
35
+ # entries is limited by the space on the backing store, not the main
36
+ # memory. Entries are addressed by a Integer key.
37
+ class BigTree < PEROBS::Object
38
+
39
+ class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
40
+ :max_depth)
41
+ end
42
+
43
+ attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
44
+
45
+ # Internal constructor. Use Store.new() instead.
46
+ # @param p [Handle]
47
+ # @param node_size [Integer] The size of the tree nodes. This determines
48
+ # how many entries must be read/written for each operation.
49
+ def initialize(p, node_size = 127)
50
+ super(p)
51
+ unless node_size > 2
52
+ PEROBS.log.fatal "Node size (#{node_size}) must be larger than 2"
53
+ end
54
+ attr_init(:node_size, node_size)
55
+ clear unless instance_variable_defined?('@root')
56
+ end
57
+
58
+ # Remove all entries from the BigTree.
59
+ def clear
60
+ self.root = self.first_leaf = self.last_leaf =
61
+ @store.new(BigTreeNode, myself, true)
62
+ self.entry_counter = 0
63
+ end
64
+
65
+ # Insert a new value into the tree using the key as a unique index. If the
66
+ # key already exists the old value will be overwritten.
67
+ # @param key [Integer] Unique key
68
+ # @param value [Integer] value
69
+ def insert(key, value)
70
+ @store.transaction do
71
+ @root.insert(key, value)
72
+ end
73
+ end
74
+
75
+ # Retrieve the value associated with the given key. If no entry was found,
76
+ # return nil.
77
+ # @param key [Integer] Unique key
78
+ # @return [Integer or nil] found value or nil
79
+ def get(key)
80
+ @root.get(key)
81
+ end
82
+
83
+ # Return the node chain from the root to the leaf node storing the
84
+ # key/value pair.
85
+ # @param key [Integer] key to search for
86
+ # @return [Array of BigTreeNode] node list (may be empty)
87
+ def node_chain(key)
88
+ @root.node_chain(key)
89
+ end
90
+
91
+ # Check if there is an entry for the given key.
92
+ # @param key [Integer] Unique key
93
+ # @return [Boolean] True if key is present, false otherwise.
94
+ def has_key?(key)
95
+ @root.has_key?(key)
96
+ end
97
+
98
+ # Find and remove the value associated with the given key. If no entry was
99
+ # found, return nil, otherwise the found value.
100
+ # @param key [Integer] Unique key
101
+ # @return [Integer or nil] found value or nil
102
+ def remove(key)
103
+ removed_value = nil
104
+
105
+ @store.transaction do
106
+ removed_value = @root.remove(key)
107
+ end
108
+
109
+ removed_value
110
+ end
111
+
112
+ # Delete all entries for which the passed block yields true. The
113
+ # implementation is optimized for large bulk deletes. It rebuilds a new
114
+ # BTree for the elements to keep. If only few elements are deleted the
115
+ # overhead of rebuilding the BTree is rather high.
116
+ # @yield [key, value]
117
+ def delete_if
118
+ old_root = @root
119
+ clear
120
+ old_root.each do |k, v|
121
+ if !yield(k, v)
122
+ insert(k, v)
123
+ end
124
+ end
125
+ end
126
+
127
+ # @return [Integer] The number of entries stored in the tree.
128
+ def length
129
+ @entry_counter
130
+ end
131
+
132
+ # Return true if the BigTree has no stored entries.
133
+ def empty?
134
+ @entry_counter == 0
135
+ end
136
+
137
+ # Iterate over all entries in the tree. Entries are always sorted by the
138
+ # key.
139
+ # @yield [key, value]
140
+ def each(&block)
141
+ node = @first_leaf
142
+ while node
143
+ break if node.each_element(&block).nil?
144
+ node = node.next_sibling
145
+ end
146
+ end
147
+
148
+ # Iterate over all entries in the tree in reverse order. Entries are
149
+ # always sorted by the key.
150
+ # @yield [key, value]
151
+ def reverse_each(&block)
152
+ node = @last_leaf
153
+ while node
154
+ node.reverse_each_element(&block)
155
+ node = node.prev_sibling
156
+ end
157
+ end
158
+
159
+
160
+ # @return [String] Human reable form of the tree.
161
+ def to_s
162
+ @root.to_s
163
+ end
164
+
165
+ # Check if the tree file contains any errors.
166
+ # @return [Boolean] true if no erros were found, false otherwise
167
+ def check(&block)
168
+ @root.check(&block)
169
+
170
+ i = 0
171
+ each do |k, v|
172
+ i += 1
173
+ end
174
+
175
+ unless @entry_counter == i
176
+ PEROBS.log.error "BigTree contains #{i} values but entry counter " +
177
+ "is #{@entry_counter}"
178
+ return false
179
+ end
180
+
181
+ true
182
+ end
183
+
184
+ # Gather some statistics regarding the tree structure.
185
+ # @return [Stats] Structs with gathered data
186
+ def statistics
187
+ stats = Stats.new(0, 0, nil, nil)
188
+ @root.statistics(stats)
189
+ stats
190
+ end
191
+
192
+ private
193
+
194
+ end
195
+
196
+ end
197
+