perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/Hash.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = Hash.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -37,20 +37,36 @@ module PEROBS
|
|
37
37
|
# The implementation is largely a proxy around the standard Hash class. But
|
38
38
|
# all mutating methods must be re-implemented to convert PEROBS::Objects to
|
39
39
|
# POXReference objects and to register the object as modified with the
|
40
|
-
# cache.
|
40
|
+
# cache. However, it is not designed for large data sets as it always reads
|
41
|
+
# and writes the full data set for every access (unless it is cached). For
|
42
|
+
# data sets that could have more than a few hundred entries BigHash is the
|
43
|
+
# recommended alternative.
|
41
44
|
#
|
42
45
|
# We explicitely don't support Hash::store() as it conflicts with
|
43
46
|
# ObjectBase::store() method to access the store.
|
44
47
|
class Hash < ObjectBase
|
45
48
|
|
49
|
+
# These methods do not mutate the Hash. They only perform read
|
50
|
+
# operations and return a new PEROBS::Hash object.
|
51
|
+
([
|
52
|
+
:invert, :merge, :reject, :select
|
53
|
+
] + Enumerable.instance_methods).uniq.each do |method_sym|
|
54
|
+
# Create a wrapper method that passes the call to @data.
|
55
|
+
define_method(method_sym) do |*args, &block|
|
56
|
+
# Register the read operation with the cache.
|
57
|
+
@store.cache.cache_read(self)
|
58
|
+
@store.new(PEROBS::Hash, @data.send(method_sym, *args, &block))
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
46
62
|
# These methods do not mutate the Hash. They only perform read
|
47
63
|
# operations.
|
48
64
|
([
|
49
65
|
:==, :[], :assoc, :compare_by_identity, :compare_by_identity?, :default,
|
50
66
|
:default_proc, :each, :each_key, :each_pair, :each_value, :empty?,
|
51
67
|
:eql?, :fetch, :flatten, :has_key?, :has_value?, :hash, :include?,
|
52
|
-
:
|
53
|
-
:pretty_print, :pretty_print_cycle, :rassoc, :
|
68
|
+
:key, :key?, :keys, :length, :member?,
|
69
|
+
:pretty_print, :pretty_print_cycle, :rassoc, :size,
|
54
70
|
:to_a, :to_h, :to_hash, :to_s, :value?, :values, :values_at
|
55
71
|
] + Enumerable.instance_methods).uniq.each do |method_sym|
|
56
72
|
# Create a wrapper method that passes the call to @data.
|
@@ -61,11 +77,22 @@ module PEROBS
|
|
61
77
|
end
|
62
78
|
end
|
63
79
|
|
64
|
-
# These methods mutate the Hash
|
80
|
+
# These methods mutate the Hash and return self
|
81
|
+
[
|
82
|
+
:clear, :keep_if, :merge!, :rehash, :reject!, :replace, :select!, :update
|
83
|
+
].each do |method_sym|
|
84
|
+
# Create a wrapper method that passes the call to @data.
|
85
|
+
define_method(method_sym) do |*args, &block|
|
86
|
+
# Register the write operation with the cache.
|
87
|
+
@store.cache.cache_write(self)
|
88
|
+
@data.send(method_sym, *args, &block)
|
89
|
+
myself
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# These methods mutate the Hash and return basic Ruby type objects.
|
65
94
|
[
|
66
|
-
:
|
67
|
-
:initialize_copy, :keep_if, :merge!, :rehash, :reject!, :replace,
|
68
|
-
:select!, :shift, :update
|
95
|
+
:delete, :delete_if, :shift
|
69
96
|
].each do |method_sym|
|
70
97
|
# Create a wrapper method that passes the call to @data.
|
71
98
|
define_method(method_sym) do |*args, &block|
|
@@ -79,33 +106,70 @@ module PEROBS
|
|
79
106
|
# PEROBS users should never call this method or equivalents of derived
|
80
107
|
# methods directly.
|
81
108
|
# @param p [PEROBS::Handle] PEROBS handle
|
82
|
-
# @param default [
|
83
|
-
# stored for a specific key.
|
84
|
-
|
109
|
+
# @param default [Object] The default value that is returned when no value
|
110
|
+
# is stored for a specific key. The default must be of the
|
111
|
+
# supported type.
|
112
|
+
def initialize(p, default = nil, &block)
|
85
113
|
super(p)
|
86
|
-
|
87
|
-
|
114
|
+
_check_assignment_value(default)
|
115
|
+
if block_given?
|
116
|
+
@data = ::Hash.new(&block)
|
117
|
+
else
|
118
|
+
@data = ::Hash.new(default)
|
119
|
+
end
|
88
120
|
|
89
121
|
# Ensure that the newly created object will be pushed into the database.
|
90
122
|
@store.cache.cache_write(self)
|
91
123
|
end
|
92
124
|
|
125
|
+
# Proxy for assignment method.
|
126
|
+
def []=(key, value)
|
127
|
+
unless key.is_a?(String) || key.respond_to?(:is_poxreference?)
|
128
|
+
raise ArgumentError, "PEROBS::Hash[] key must be a String or " +
|
129
|
+
"a PEROBS object but is a #{key.class}"
|
130
|
+
end
|
131
|
+
_check_assignment_value(value)
|
132
|
+
@store.cache.cache_write(self)
|
133
|
+
@data[key] = value
|
134
|
+
end
|
135
|
+
|
136
|
+
# Proxy for default= method.
|
137
|
+
def default=(value)
|
138
|
+
_check_assignment_value(value)
|
139
|
+
@data.default=(value)
|
140
|
+
end
|
141
|
+
|
93
142
|
# Return a list of all object IDs of all persistend objects that this Hash
|
94
143
|
# is referencing.
|
95
144
|
# @return [Array of Integer] IDs of referenced objects
|
96
145
|
def _referenced_object_ids
|
97
|
-
|
98
|
-
|
146
|
+
ids = []
|
147
|
+
@data.each do |k, v|
|
148
|
+
if k && k.respond_to?(:is_poxreference?)
|
149
|
+
ids << k.id
|
150
|
+
end
|
151
|
+
if v && v.respond_to?(:is_poxreference?)
|
152
|
+
ids << v.id
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
ids
|
99
157
|
end
|
100
158
|
|
101
159
|
# This method should only be used during store repair operations. It will
|
102
160
|
# delete all referenced to the given object ID.
|
103
161
|
# @param id [Integer] targeted object ID
|
104
162
|
def _delete_reference_to_id(id)
|
163
|
+
original_length = @data.length
|
164
|
+
|
105
165
|
@data.delete_if do |k, v|
|
106
|
-
|
166
|
+
(k && k.respond_to?(:is_poxreference?) && k.id == id) ||
|
167
|
+
(v && v.respond_to?(:is_poxreference?) && v.id == id)
|
168
|
+
end
|
169
|
+
|
170
|
+
if @data.length != original_length
|
171
|
+
@store.cache.cache_write(self)
|
107
172
|
end
|
108
|
-
@store.cache.cache_write(self)
|
109
173
|
end
|
110
174
|
|
111
175
|
# Restore the persistent data from a single data structure.
|
@@ -114,8 +178,18 @@ module PEROBS
|
|
114
178
|
# @private
|
115
179
|
def _deserialize(data)
|
116
180
|
@data = {}
|
117
|
-
|
118
|
-
|
181
|
+
|
182
|
+
data.each do |k, v|
|
183
|
+
# References to other PEROBS Objects are marshalled with our own
|
184
|
+
# format. If we detect such a marshalled String we convert it into a
|
185
|
+
# POXReference object.
|
186
|
+
if (match = /^#<PEROBS::POReference id=([0-9]+)>$/.match(k))
|
187
|
+
k = POXReference.new(@store, match[1].to_i)
|
188
|
+
end
|
189
|
+
dv = v.is_a?(POReference) ? POXReference.new(@store, v.id) : v
|
190
|
+
@data[k] = dv
|
191
|
+
end
|
192
|
+
|
119
193
|
@data
|
120
194
|
end
|
121
195
|
|
@@ -136,26 +210,46 @@ module PEROBS
|
|
136
210
|
data = {}
|
137
211
|
|
138
212
|
@data.each do |k, v|
|
139
|
-
if
|
140
|
-
|
141
|
-
|
142
|
-
#
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
v.inspect
|
151
|
-
end
|
152
|
-
data[k] = v
|
213
|
+
if k.respond_to?(:is_poxreference?)
|
214
|
+
# JSON only supports Strings as hash keys. Since JSON is the default
|
215
|
+
# internal storage format in the database, we have to marshall
|
216
|
+
# PEROBS::Object references ourselves.
|
217
|
+
k = "#<PEROBS::POReference id=#{k.id}>"
|
218
|
+
elsif k[0..24] == '#<PEROBS::POReference id='
|
219
|
+
# This could obviously result in conflicts with 'normal' String hash
|
220
|
+
# keys. This is extremely unlikely, but we better catch this case
|
221
|
+
# before it causes hard to debug trouble.
|
222
|
+
raise ArgumentError, "Hash key #{k} conflicts with PEROBS " +
|
223
|
+
"internal representation of marshalled hash keys!"
|
153
224
|
end
|
225
|
+
data[k] = serialize_helper(v)
|
154
226
|
end
|
155
227
|
|
156
228
|
data
|
157
229
|
end
|
158
230
|
|
231
|
+
def serialize_helper(v)
|
232
|
+
if v.respond_to?(:is_poxreference?)
|
233
|
+
# References to other PEROBS objects (POXReference) are stored as
|
234
|
+
# POReference in the database.
|
235
|
+
return POReference.new(v.id)
|
236
|
+
else
|
237
|
+
# Outside of the PEROBS library all PEROBS::ObjectBase derived
|
238
|
+
# objects should not be used directly. The library only exposes them
|
239
|
+
# via POXReference proxy objects.
|
240
|
+
if v.is_a?(ObjectBase)
|
241
|
+
PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
|
242
|
+
"It is stored in a PEROBS::Hash. " +
|
243
|
+
'Have you used self() instead of myself() to ' +
|
244
|
+
"get the reference of this PEROBS object?\n" +
|
245
|
+
v.inspect
|
246
|
+
end
|
247
|
+
|
248
|
+
# All other objects are serialized by their native methods.
|
249
|
+
return v
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
159
253
|
end
|
160
254
|
|
161
255
|
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = IDList.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/IDListPageFile'
|
29
|
+
require 'perobs/IDListPageRecord'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# This class stores a list of 64 bit values. Values can be added to the list
|
34
|
+
# and the presence of a certain value can be checked. It can hold up to 2^64
|
35
|
+
# values. It tries to keep values in memory but can store them in a file if
|
36
|
+
# needed. A threshold for the in-memory values can be set in the
|
37
|
+
# constructor. The stored values are grouped in pages. Each page can hold up
|
38
|
+
# to page_size entries.
|
39
|
+
class IDList
|
40
|
+
|
41
|
+
# Create a new IDList object. The data that can't be kept in memory will
|
42
|
+
# be stored in the specified directory under the given name.
|
43
|
+
# @param dir [String] Path of the directory
|
44
|
+
# @param name [String] Name of the file
|
45
|
+
# @param max_in_memory [Integer] Specifies the maximum number of values
|
46
|
+
# that will be kept in memory. If the list is larger, values will
|
47
|
+
# be cached in the specified file.
|
48
|
+
# @param page_size [Integer] The number of values per page. The default
|
49
|
+
# value is 32 which was found the best performing config in tests.
|
50
|
+
def initialize(dir, name, max_in_memory, page_size = 32)
|
51
|
+
# The page_file manages the pages that store the values.
|
52
|
+
@page_file = IDListPageFile.new(self, dir, name,
|
53
|
+
max_in_memory, page_size)
|
54
|
+
clear
|
55
|
+
end
|
56
|
+
|
57
|
+
# Insert a new value into the list.
|
58
|
+
# @param id [Integer] The value to add
|
59
|
+
def insert(id)
|
60
|
+
# Find the index of the page that should hold ID.
|
61
|
+
index = @page_records.bsearch_index { |pr| pr.max_id >= id }
|
62
|
+
# Get the corresponding IDListPageRecord object.
|
63
|
+
page = @page_records[index]
|
64
|
+
|
65
|
+
# In case the page is already full we'll have to create a new page.
|
66
|
+
# There is no guarantee that a split will yield an page with space as we
|
67
|
+
# split by ID range, not by distributing the values evenly across the
|
68
|
+
# two pages.
|
69
|
+
while page.is_full?
|
70
|
+
new_page = page.split
|
71
|
+
# Store the newly created page into the page_records list.
|
72
|
+
@page_records.insert(index + 1, new_page)
|
73
|
+
if id >= new_page.min_id
|
74
|
+
# We need to insert the ID into the newly created page. Adjust index
|
75
|
+
# and page reference accordingly.
|
76
|
+
index += 1
|
77
|
+
page = new_page
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Insert the ID into the page.
|
82
|
+
page.insert(id)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Check if a given value is already stored in the list.
|
86
|
+
# @param id [Integer] The value to check for
|
87
|
+
def include?(id)
|
88
|
+
@page_records.bsearch { |pr| pr.max_id >= id }.include?(id)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Clear the list and empty the filesystem cache file.
|
92
|
+
def clear
|
93
|
+
@page_file.clear
|
94
|
+
@page_records = [ IDListPageRecord.new(@page_file, 0, 2 ** 64) ]
|
95
|
+
end
|
96
|
+
|
97
|
+
# Erase the list including the filesystem cache file. The IDList is no
|
98
|
+
# longer usable after this call but the cache file is removed from the
|
99
|
+
# filesystem.
|
100
|
+
def erase
|
101
|
+
@page_file.erase
|
102
|
+
@page_records = nil
|
103
|
+
end
|
104
|
+
|
105
|
+
# Perform some consistency checks on the internal data structures. Raises
|
106
|
+
# a RuntimeError in case a problem is found.
|
107
|
+
def check
|
108
|
+
last_max = -1
|
109
|
+
unless (min_id = @page_records.first.min_id) == 0
|
110
|
+
raise RuntimeError, "min_id of first record (#{min_id}) " +
|
111
|
+
"must be 0."
|
112
|
+
end
|
113
|
+
|
114
|
+
@page_records.each do |pr|
|
115
|
+
unless pr.min_id == last_max + 1
|
116
|
+
raise RuntimeError, "max_id of previous record (#{last_max}) " +
|
117
|
+
"must be exactly 1 smaller than current record (#{pr.min_id})."
|
118
|
+
end
|
119
|
+
last_max = pr.max_id
|
120
|
+
pr.check
|
121
|
+
end
|
122
|
+
|
123
|
+
unless last_max == 2 ** 64
|
124
|
+
raise RuntimeError, "max_id of last records " +
|
125
|
+
"(#{@page_records.last.max_id}) must be #{2 ** 64})."
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def to_a
|
130
|
+
a = []
|
131
|
+
@page_records.each { |pr| a += pr.values }
|
132
|
+
a
|
133
|
+
end
|
134
|
+
|
135
|
+
# Print a human readable form of the tree that stores the list. This is
|
136
|
+
# only meant for debugging purposes and does not scale for larger trees.
|
137
|
+
def to_s
|
138
|
+
"\n" + @root.to_s
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = IDListPage.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
class IDListPage
|
31
|
+
|
32
|
+
attr_reader :uid, :values
|
33
|
+
attr_accessor :record
|
34
|
+
|
35
|
+
def initialize(page_file, record, uid, values = [])
|
36
|
+
@page_file = page_file
|
37
|
+
@record = record
|
38
|
+
@uid = uid
|
39
|
+
@values = values
|
40
|
+
@record.page_entries = @values.length
|
41
|
+
end
|
42
|
+
|
43
|
+
def IDListPage::load(page_file, uid, ref)
|
44
|
+
page_file.load(uid, ref)
|
45
|
+
end
|
46
|
+
|
47
|
+
def is_full?
|
48
|
+
@values.length >= @page_file.page_size
|
49
|
+
end
|
50
|
+
|
51
|
+
def length
|
52
|
+
@values.length
|
53
|
+
end
|
54
|
+
|
55
|
+
def save
|
56
|
+
@page_file.save_page(self)
|
57
|
+
end
|
58
|
+
|
59
|
+
def insert(id)
|
60
|
+
if is_full?
|
61
|
+
raise ArgumentError, "IDListPage is already full"
|
62
|
+
end
|
63
|
+
index = @values.bsearch_index { |v| v >= id } || @values.length
|
64
|
+
|
65
|
+
# If the value isn't stored already, insert it.
|
66
|
+
if @values[index] != id
|
67
|
+
@values.insert(index, id)
|
68
|
+
@record.page_entries = @values.length
|
69
|
+
@page_file.mark_page_as_modified(self)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def include?(id)
|
74
|
+
!(v = @values.bsearch { |v| v >= id }).nil? && v == id
|
75
|
+
end
|
76
|
+
|
77
|
+
def delete(max_id)
|
78
|
+
a = []
|
79
|
+
@values.delete_if { |v| v > max_id ? a << v : false }
|
80
|
+
|
81
|
+
unless a.empty?
|
82
|
+
@record.page_entries = @values.length
|
83
|
+
@page_file.mark_page_as_modified(self)
|
84
|
+
end
|
85
|
+
|
86
|
+
a
|
87
|
+
end
|
88
|
+
|
89
|
+
def check
|
90
|
+
last_value = nil
|
91
|
+
@values.each_with_index do |v, i|
|
92
|
+
if last_value && last_value >= v
|
93
|
+
raise RuntimeError, "The values #{last_value} and #{v} must be " +
|
94
|
+
"strictly ascending: #{@values.inspect}"
|
95
|
+
end
|
96
|
+
last_value = v
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
"[ #{@values.join(', ')} ]"
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = IDListPageFile.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/IDListPage'
|
29
|
+
require 'perobs/IDListPageRecord'
|
30
|
+
require 'perobs/Log'
|
31
|
+
require 'perobs/PersistentObjectCache'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The IDListPageFile class provides filesystem based cache for the
|
36
|
+
# IDListPage objects. The IDListRecord objects only hold the index of the
|
37
|
+
# page in this cache. This allows the pages to be garbage collected and
|
38
|
+
# swapped to the file. If accessed, the pages will be swaped in again. While
|
39
|
+
# this process is similar to the demand paging of the OS it has absolutely
|
40
|
+
# nothing to do with it.
|
41
|
+
class IDListPageFile
|
42
|
+
|
43
|
+
attr_reader :page_size, :pages
|
44
|
+
|
45
|
+
# Create a new IDListPageFile object that uses the given file in the given
|
46
|
+
# directory as cache file.
|
47
|
+
# @param list [IDList] The IDList object that caches pages here
|
48
|
+
# @param dir [String] An existing directory
|
49
|
+
# @param name [String] A file name (without path)
|
50
|
+
# @param max_in_memory [Integer] Maximum number of pages to keep in memory
|
51
|
+
# @param page_size [Integer] The number of values in each page
|
52
|
+
def initialize(list, dir, name, max_in_memory, page_size)
|
53
|
+
@list = list
|
54
|
+
@file_name = File.join(dir, name + '.cache')
|
55
|
+
@page_size = page_size
|
56
|
+
open
|
57
|
+
@pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
|
58
|
+
IDListPage, self)
|
59
|
+
@page_counter = 0
|
60
|
+
end
|
61
|
+
|
62
|
+
# Load the IDListPage from the cache file.
|
63
|
+
# @param page_idx [Integer] The page index in the page file
|
64
|
+
# @param record [IDListPageRecord] the corresponding IDListPageRecord
|
65
|
+
# @return [IDListPage] The loaded values
|
66
|
+
def load(page_idx, record)
|
67
|
+
# The IDListPageRecord will tell us the actual number of values stored
|
68
|
+
# in this page.
|
69
|
+
values = []
|
70
|
+
unless (entries = record.page_entries) == 0
|
71
|
+
begin
|
72
|
+
@f.seek(page_idx * @page_size * 8)
|
73
|
+
values = @f.read(entries * 8).unpack("Q#{entries}")
|
74
|
+
rescue IOError => e
|
75
|
+
PEROBS.log.fatal "Cannot read cache file #{@file_name}: #{e.message}"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Create the IDListPage object with the given values.
|
80
|
+
p = IDListPage.new(self, record, page_idx, values)
|
81
|
+
@pages.insert(p, false)
|
82
|
+
|
83
|
+
p
|
84
|
+
end
|
85
|
+
|
86
|
+
# Return the number of registered pages.
|
87
|
+
def page_count
|
88
|
+
@page_counter
|
89
|
+
end
|
90
|
+
|
91
|
+
# Create a new IDListPage and register it.
|
92
|
+
# @param record [IDListPageRecord] The corresponding record.
|
93
|
+
# @param values [Array of Integer] The values stored in the page
|
94
|
+
# @return [IDListPage]
|
95
|
+
def new_page(record, values = [])
|
96
|
+
idx = @page_counter
|
97
|
+
@page_counter += 1
|
98
|
+
mark_page_as_modified(IDListPage.new(self, record, idx, values))
|
99
|
+
idx
|
100
|
+
end
|
101
|
+
|
102
|
+
# Return the IDListPage object with the given index.
|
103
|
+
# @param record [IDListPageRecord] the corresponding IDListPageRecord
|
104
|
+
# @return [IDListPage] The page corresponding to the index.
|
105
|
+
def page(record)
|
106
|
+
p = @pages.get(record.page_idx, record) || load(record.page_idx, record)
|
107
|
+
unless p.uid == record.page_idx
|
108
|
+
raise RuntimeError, "Page reference mismatch. Record " +
|
109
|
+
"#{record.page_idx} points to page #{p.uid}"
|
110
|
+
end
|
111
|
+
|
112
|
+
p
|
113
|
+
end
|
114
|
+
|
115
|
+
# Mark a page as modified. This means it has to be written into the cache
|
116
|
+
# before it is removed from memory.
|
117
|
+
# @param p [IDListPage] page reference
|
118
|
+
def mark_page_as_modified(p)
|
119
|
+
@pages.insert(p)
|
120
|
+
@pages.flush
|
121
|
+
end
|
122
|
+
|
123
|
+
# Clear all pages, erase the cache and re-open it again.
|
124
|
+
def clear
|
125
|
+
@pages.clear
|
126
|
+
@page_counter = 0
|
127
|
+
begin
|
128
|
+
@f.truncate(0)
|
129
|
+
rescue IOError => e
|
130
|
+
raise RuntimeError, "Cannote truncate cache file #{@file_name}: " +
|
131
|
+
e.message
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Discard all pages and erase the cache file.
|
136
|
+
def erase
|
137
|
+
@pages.clear
|
138
|
+
@page_counter = 0
|
139
|
+
close
|
140
|
+
end
|
141
|
+
|
142
|
+
# Save the given IDListPage into the cache file.
|
143
|
+
# @param p [IDListPage] page to store
|
144
|
+
def save_page(p)
|
145
|
+
if p.record.page_entries != p.values.length
|
146
|
+
raise RuntimeError, "page_entries mismatch for node #{p.uid}"
|
147
|
+
end
|
148
|
+
begin
|
149
|
+
@f.seek(p.uid * @page_size * 8)
|
150
|
+
@f.write(p.values.pack('Q*'))
|
151
|
+
rescue IOError => e
|
152
|
+
PEROBS.log.fatal "Cannot write cache file #{@file_name}: #{e.message}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def open
|
159
|
+
begin
|
160
|
+
# Create a new file by writing a new header.
|
161
|
+
@f = File.open(@file_name, 'wb+')
|
162
|
+
rescue IOError => e
|
163
|
+
PEROBS.log.fatal "Cannot open cache file #{@file_name}: #{e.message}"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def close
|
168
|
+
begin
|
169
|
+
@f.close
|
170
|
+
File.delete(@file_name) if File.exist?(@file_name)
|
171
|
+
rescue IOError => e
|
172
|
+
PEROBS.log.fatal "Cannot erase cache file #{@file_name}: #{e.message}"
|
173
|
+
end
|
174
|
+
@f = nil
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
|