perobs 4.0.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,246 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigHash.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Object'
29
+ require 'perobs/BigTree'
30
+ require 'perobs/Array'
31
+ require 'perobs/FNV_Hash_1a_64'
32
+
33
+ module PEROBS
34
+
35
+ # The BigHash is similar to the Hash object in that it provides a simple
36
+ # hash functionality. The difference is that this class scales to much
37
+ # larger data sets essentially limited to the amount of space available on
38
+ # your backing store. The data is persisted immediately and uses
39
+ # transactions to ensure the data consistent. It only provides a small
40
+ # subset of the methods provided by the native Hash class that make sense
41
+ # for giant data sets.
42
+ class BigHash < PEROBS::Object
43
+
44
+ # Internally this class uses BigTree to store the values by the hashed
45
+ # key. We are using a 64 bit hash space so collisions are fairly unlikely
46
+ # but not impossible. Therefor we have to store the originial key with the
47
+ # value to ensure that we got the right value. The key and value are
48
+ # stored in an Entry object.
49
+ #
50
+ # In case we have a collision we need to store multiple values for the
51
+ # same hashed key. In that case we store the Entry objects for the same
52
+ # hashed key in a Collisions object instead of storing the Entry
53
+ # directly in the BigTree.
54
+ class Entry < PEROBS::Object
55
+
56
+ attr_persist :key, :value
57
+
58
+ def initialize(p, key, value)
59
+ super(p)
60
+ self.key = key
61
+ self.value = value
62
+ end
63
+
64
+ end
65
+
66
+ # Since the BigHash can also store PEROBS::Array values we need to
67
+ # introduce a new class so we can tell apart collisions from Arrays.
68
+ class Collisions < PEROBS::Array
69
+ end
70
+
71
+ attr_persist :btree
72
+
73
+ # Create a new BigHash object.
74
+ # @param p [Handle] Store handle
75
+ def initialize(p)
76
+ super(p)
77
+ restore
78
+ self.btree = @store.new(PEROBS::BigTree)
79
+ end
80
+
81
+ def restore
82
+ end
83
+
84
+ # Insert a value that is associated with the given key. If a value for
85
+ # this key already exists, the value will be overwritten with the newly
86
+ # provided value.
87
+ # @param key [Integer or String]
88
+ # @param value [Any PEROBS storable object]
89
+ def []=(key, value)
90
+ hashed_key = hash_key(key)
91
+ @store.transaction do
92
+ entry = @store.new(Entry, key, value)
93
+
94
+ if (existing_entry = @btree.get(hashed_key))
95
+ # There is already an existing entry for this hashed key.
96
+ if existing_entry.is_a?(Collisions)
97
+ # Find the right index to insert the new entry. If there is
98
+ # already an entry with the same key overwrite that entry.
99
+ index_to_insert = 0
100
+ overwrite = false
101
+ existing_entry.each do |ae|
102
+ if ae.key == key
103
+ overwrite = true
104
+ break
105
+ end
106
+ index_to_insert += 1
107
+ end
108
+ existing_entry[index_to_insert] = entry
109
+ elsif existing_entry.key == key
110
+ # The existing value is for the identical key. We can safely
111
+ # overwrite
112
+ @btree.insert(hashed_key, entry)
113
+ else
114
+ # There is a single existing entry, but for a different key. Create
115
+ # a new PEROBS::Array and store both entries.
116
+ array_entry = @store.new(Collisions)
117
+ array_entry << existing_entry
118
+ array_entry << entry
119
+ @btree.insert(hashed_key, array_entry)
120
+ end
121
+ else
122
+ # No existing entry. Insert the new entry.
123
+ @btree.insert(hashed_key, entry)
124
+ end
125
+ end
126
+ end
127
+
128
+ # Retrieve the value for the given key. If no value for the key is found
129
+ # nil is returned.
130
+ # @param key [Integer or String]
131
+ # @return [Any PEROBS storable object]
132
+ def [](key)
133
+ hashed_key = hash_key(key)
134
+ unless (entry = @btree.get(hashed_key))
135
+ return nil
136
+ end
137
+
138
+ if entry.is_a?(PEROBS::Array)
139
+ entry.each do |ae|
140
+ return ae.value if ae.key == key
141
+ end
142
+ else
143
+ return entry.value if entry.key == key
144
+ end
145
+
146
+ nil
147
+ end
148
+
149
+ # Check if the is a value stored for the given key.
150
+ # @param key [Integer or String]
151
+ # @return [TrueClass or FalseClass]
152
+ def has_key?(key)
153
+ hashed_key = hash_key(key)
154
+ unless (entry = @btree.get(hashed_key))
155
+ return false
156
+ end
157
+
158
+ if entry.is_a?(PEROBS::Array)
159
+ entry.each do |ae|
160
+ return true if ae.key == key
161
+ end
162
+ else
163
+ return true if entry.key == key
164
+ end
165
+
166
+ false
167
+ end
168
+
169
+ alias include? has_key?
170
+
171
+ # Delete and return the entry for the given key. Return nil if no matching
172
+ # entry exists.
173
+ # @param key [Integer or String]
174
+ # @return [Object] Deleted entry
175
+ def delete(key)
176
+ hashed_key = hash_key(key)
177
+ unless (entry = @btree.get(hashed_key))
178
+ return nil
179
+ end
180
+
181
+ if entry.is_a?(PEROBS::Array)
182
+ entry.each_with_index do |ae, i|
183
+ if ae.key == key
184
+ return entry.delete_at(i).value
185
+ end
186
+ end
187
+ else
188
+ return entry.value if entry.key == key
189
+ end
190
+
191
+ nil
192
+ end
193
+
194
+ # Return the number of entries stored in the hash.
195
+ # @return [Integer]
196
+ def length
197
+ @btree.entry_counter
198
+ end
199
+
200
+ alias size length
201
+
202
+ # Return true if hash is empty. False otherweise.
203
+ # @return [TrueClass, FalseClass]
204
+ def empty?
205
+ @btree.entry_counter == 0
206
+ end
207
+
208
+ # Calls the given block for each key/value pair.
209
+ # @yield(key, value)
210
+ def each(&block)
211
+ @btree.each do |index, entry|
212
+ if entry.is_a?(Collisions)
213
+ break if entry.each do |c_entry|
214
+ yield(c_entry.key, c_entry.value)
215
+ end.nil?
216
+ else
217
+ yield(entry.key, entry.value)
218
+ end
219
+ end
220
+ end
221
+
222
+ # This is mostly intended for debugging as the result can be very big.
223
+ # It returns an Array of keys stored in the hash.
224
+ # @return [Array] A list of all keys
225
+ def keys
226
+ ks = []
227
+ each { |k, v| ks << k }
228
+ ks
229
+ end
230
+
231
+ # Check if the data structure contains any errors.
232
+ # @return [Boolean] true if no erros were found, false otherwise
233
+ def check
234
+ return @btree.check
235
+ end
236
+
237
+ private
238
+
239
+ def hash_key(key)
240
+ FNV_Hash_1a_64::digest(key)
241
+ end
242
+
243
+ end
244
+
245
+ end
246
+
@@ -0,0 +1,197 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigTree.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Object'
29
+ require 'perobs/BigTreeNode'
30
+
31
+ module PEROBS
32
+
33
+ # The BigTree class implements a BTree as a PEROBS object. It allows to
34
+ # manage huge amounts of data in a reasonably efficient way. The number of
35
+ # entries is limited by the space on the backing store, not the main
36
+ # memory. Entries are addressed by a Integer key.
37
+ class BigTree < PEROBS::Object
38
+
39
+ class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
40
+ :max_depth)
41
+ end
42
+
43
+ attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
44
+
45
+ # Internal constructor. Use Store.new() instead.
46
+ # @param p [Handle]
47
+ # @param node_size [Integer] The size of the tree nodes. This determines
48
+ # how many entries must be read/written for each operation.
49
+ def initialize(p, node_size = 127)
50
+ super(p)
51
+ unless node_size > 2
52
+ PEROBS.log.fatal "Node size (#{node_size}) must be larger than 2"
53
+ end
54
+ attr_init(:node_size, node_size)
55
+ clear unless instance_variable_defined?('@root')
56
+ end
57
+
58
+ # Remove all entries from the BigTree.
59
+ def clear
60
+ self.root = self.first_leaf = self.last_leaf =
61
+ @store.new(BigTreeNode, myself, true)
62
+ self.entry_counter = 0
63
+ end
64
+
65
+ # Insert a new value into the tree using the key as a unique index. If the
66
+ # key already exists the old value will be overwritten.
67
+ # @param key [Integer] Unique key
68
+ # @param value [Integer] value
69
+ def insert(key, value)
70
+ @store.transaction do
71
+ @root.insert(key, value)
72
+ end
73
+ end
74
+
75
+ # Retrieve the value associated with the given key. If no entry was found,
76
+ # return nil.
77
+ # @param key [Integer] Unique key
78
+ # @return [Integer or nil] found value or nil
79
+ def get(key)
80
+ @root.get(key)
81
+ end
82
+
83
+ # Return the node chain from the root to the leaf node storing the
84
+ # key/value pair.
85
+ # @param key [Integer] key to search for
86
+ # @return [Array of BigTreeNode] node list (may be empty)
87
+ def node_chain(key)
88
+ @root.node_chain(key)
89
+ end
90
+
91
+ # Check if there is an entry for the given key.
92
+ # @param key [Integer] Unique key
93
+ # @return [Boolean] True if key is present, false otherwise.
94
+ def has_key?(key)
95
+ @root.has_key?(key)
96
+ end
97
+
98
+ # Find and remove the value associated with the given key. If no entry was
99
+ # found, return nil, otherwise the found value.
100
+ # @param key [Integer] Unique key
101
+ # @return [Integer or nil] found value or nil
102
+ def remove(key)
103
+ removed_value = nil
104
+
105
+ @store.transaction do
106
+ removed_value = @root.remove(key)
107
+ end
108
+
109
+ removed_value
110
+ end
111
+
112
+ # Delete all entries for which the passed block yields true. The
113
+ # implementation is optimized for large bulk deletes. It rebuilds a new
114
+ # BTree for the elements to keep. If only few elements are deleted the
115
+ # overhead of rebuilding the BTree is rather high.
116
+ # @yield [key, value]
117
+ def delete_if
118
+ old_root = @root
119
+ clear
120
+ old_root.each do |k, v|
121
+ if !yield(k, v)
122
+ insert(k, v)
123
+ end
124
+ end
125
+ end
126
+
127
+ # @return [Integer] The number of entries stored in the tree.
128
+ def length
129
+ @entry_counter
130
+ end
131
+
132
+ # Return true if the BigTree has no stored entries.
133
+ def empty?
134
+ @entry_counter == 0
135
+ end
136
+
137
+ # Iterate over all entries in the tree. Entries are always sorted by the
138
+ # key.
139
+ # @yield [key, value]
140
+ def each(&block)
141
+ node = @first_leaf
142
+ while node
143
+ break if node.each_element(&block).nil?
144
+ node = node.next_sibling
145
+ end
146
+ end
147
+
148
+ # Iterate over all entries in the tree in reverse order. Entries are
149
+ # always sorted by the key.
150
+ # @yield [key, value]
151
+ def reverse_each(&block)
152
+ node = @last_leaf
153
+ while node
154
+ node.reverse_each_element(&block)
155
+ node = node.prev_sibling
156
+ end
157
+ end
158
+
159
+
160
+ # @return [String] Human reable form of the tree.
161
+ def to_s
162
+ @root.to_s
163
+ end
164
+
165
+ # Check if the tree file contains any errors.
166
+ # @return [Boolean] true if no erros were found, false otherwise
167
+ def check(&block)
168
+ @root.check(&block)
169
+
170
+ i = 0
171
+ each do |k, v|
172
+ i += 1
173
+ end
174
+
175
+ unless @entry_counter == i
176
+ PEROBS.log.error "BigTree contains #{i} values but entry counter " +
177
+ "is #{@entry_counter}"
178
+ return false
179
+ end
180
+
181
+ true
182
+ end
183
+
184
+ # Gather some statistics regarding the tree structure.
185
+ # @return [Stats] Structs with gathered data
186
+ def statistics
187
+ stats = Stats.new(0, 0, nil, nil)
188
+ @root.statistics(stats)
189
+ stats
190
+ end
191
+
192
+ private
193
+
194
+ end
195
+
196
+ end
197
+