perobs 4.0.0 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/Hash.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = Hash.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
|
5
|
+
# Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
6
|
#
|
7
7
|
# MIT License
|
8
8
|
#
|
@@ -37,20 +37,36 @@ module PEROBS
|
|
37
37
|
# The implementation is largely a proxy around the standard Hash class. But
|
38
38
|
# all mutating methods must be re-implemented to convert PEROBS::Objects to
|
39
39
|
# POXReference objects and to register the object as modified with the
|
40
|
-
# cache.
|
40
|
+
# cache. However, it is not designed for large data sets as it always reads
|
41
|
+
# and writes the full data set for every access (unless it is cached). For
|
42
|
+
# data sets that could have more than a few hundred entries BigHash is the
|
43
|
+
# recommended alternative.
|
41
44
|
#
|
42
45
|
# We explicitely don't support Hash::store() as it conflicts with
|
43
46
|
# ObjectBase::store() method to access the store.
|
44
47
|
class Hash < ObjectBase
|
45
48
|
|
49
|
+
# These methods do not mutate the Hash. They only perform read
|
50
|
+
# operations and return a new PEROBS::Hash object.
|
51
|
+
([
|
52
|
+
:invert, :merge, :reject, :select
|
53
|
+
] + Enumerable.instance_methods).uniq.each do |method_sym|
|
54
|
+
# Create a wrapper method that passes the call to @data.
|
55
|
+
define_method(method_sym) do |*args, &block|
|
56
|
+
# Register the read operation with the cache.
|
57
|
+
@store.cache.cache_read(self)
|
58
|
+
@store.new(PEROBS::Hash, @data.send(method_sym, *args, &block))
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
46
62
|
# These methods do not mutate the Hash. They only perform read
|
47
63
|
# operations.
|
48
64
|
([
|
49
65
|
:==, :[], :assoc, :compare_by_identity, :compare_by_identity?, :default,
|
50
66
|
:default_proc, :each, :each_key, :each_pair, :each_value, :empty?,
|
51
67
|
:eql?, :fetch, :flatten, :has_key?, :has_value?, :hash, :include?,
|
52
|
-
:
|
53
|
-
:pretty_print, :pretty_print_cycle, :rassoc, :
|
68
|
+
:key, :key?, :keys, :length, :member?,
|
69
|
+
:pretty_print, :pretty_print_cycle, :rassoc, :size,
|
54
70
|
:to_a, :to_h, :to_hash, :to_s, :value?, :values, :values_at
|
55
71
|
] + Enumerable.instance_methods).uniq.each do |method_sym|
|
56
72
|
# Create a wrapper method that passes the call to @data.
|
@@ -61,11 +77,22 @@ module PEROBS
|
|
61
77
|
end
|
62
78
|
end
|
63
79
|
|
64
|
-
# These methods mutate the Hash
|
80
|
+
# These methods mutate the Hash and return self
|
81
|
+
[
|
82
|
+
:clear, :keep_if, :merge!, :rehash, :reject!, :replace, :select!, :update
|
83
|
+
].each do |method_sym|
|
84
|
+
# Create a wrapper method that passes the call to @data.
|
85
|
+
define_method(method_sym) do |*args, &block|
|
86
|
+
# Register the write operation with the cache.
|
87
|
+
@store.cache.cache_write(self)
|
88
|
+
@data.send(method_sym, *args, &block)
|
89
|
+
myself
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# These methods mutate the Hash and return basic Ruby type objects.
|
65
94
|
[
|
66
|
-
:
|
67
|
-
:initialize_copy, :keep_if, :merge!, :rehash, :reject!, :replace,
|
68
|
-
:select!, :shift, :update
|
95
|
+
:delete, :delete_if, :shift
|
69
96
|
].each do |method_sym|
|
70
97
|
# Create a wrapper method that passes the call to @data.
|
71
98
|
define_method(method_sym) do |*args, &block|
|
@@ -79,33 +106,70 @@ module PEROBS
|
|
79
106
|
# PEROBS users should never call this method or equivalents of derived
|
80
107
|
# methods directly.
|
81
108
|
# @param p [PEROBS::Handle] PEROBS handle
|
82
|
-
# @param default [
|
83
|
-
# stored for a specific key.
|
84
|
-
|
109
|
+
# @param default [Object] The default value that is returned when no value
|
110
|
+
# is stored for a specific key. The default must be of the
|
111
|
+
# supported type.
|
112
|
+
def initialize(p, default = nil, &block)
|
85
113
|
super(p)
|
86
|
-
|
87
|
-
|
114
|
+
_check_assignment_value(default)
|
115
|
+
if block_given?
|
116
|
+
@data = ::Hash.new(&block)
|
117
|
+
else
|
118
|
+
@data = ::Hash.new(default)
|
119
|
+
end
|
88
120
|
|
89
121
|
# Ensure that the newly created object will be pushed into the database.
|
90
122
|
@store.cache.cache_write(self)
|
91
123
|
end
|
92
124
|
|
125
|
+
# Proxy for assignment method.
|
126
|
+
def []=(key, value)
|
127
|
+
unless key.is_a?(String) || key.respond_to?(:is_poxreference?)
|
128
|
+
raise ArgumentError, "PEROBS::Hash[] key must be a String or " +
|
129
|
+
"a PEROBS object but is a #{key.class}"
|
130
|
+
end
|
131
|
+
_check_assignment_value(value)
|
132
|
+
@store.cache.cache_write(self)
|
133
|
+
@data[key] = value
|
134
|
+
end
|
135
|
+
|
136
|
+
# Proxy for default= method.
|
137
|
+
def default=(value)
|
138
|
+
_check_assignment_value(value)
|
139
|
+
@data.default=(value)
|
140
|
+
end
|
141
|
+
|
93
142
|
# Return a list of all object IDs of all persistend objects that this Hash
|
94
143
|
# is referencing.
|
95
144
|
# @return [Array of Integer] IDs of referenced objects
|
96
145
|
def _referenced_object_ids
|
97
|
-
|
98
|
-
|
146
|
+
ids = []
|
147
|
+
@data.each do |k, v|
|
148
|
+
if k && k.respond_to?(:is_poxreference?)
|
149
|
+
ids << k.id
|
150
|
+
end
|
151
|
+
if v && v.respond_to?(:is_poxreference?)
|
152
|
+
ids << v.id
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
ids
|
99
157
|
end
|
100
158
|
|
101
159
|
# This method should only be used during store repair operations. It will
|
102
160
|
# delete all referenced to the given object ID.
|
103
161
|
# @param id [Integer] targeted object ID
|
104
162
|
def _delete_reference_to_id(id)
|
163
|
+
original_length = @data.length
|
164
|
+
|
105
165
|
@data.delete_if do |k, v|
|
106
|
-
|
166
|
+
(k && k.respond_to?(:is_poxreference?) && k.id == id) ||
|
167
|
+
(v && v.respond_to?(:is_poxreference?) && v.id == id)
|
168
|
+
end
|
169
|
+
|
170
|
+
if @data.length != original_length
|
171
|
+
@store.cache.cache_write(self)
|
107
172
|
end
|
108
|
-
@store.cache.cache_write(self)
|
109
173
|
end
|
110
174
|
|
111
175
|
# Restore the persistent data from a single data structure.
|
@@ -114,8 +178,18 @@ module PEROBS
|
|
114
178
|
# @private
|
115
179
|
def _deserialize(data)
|
116
180
|
@data = {}
|
117
|
-
|
118
|
-
|
181
|
+
|
182
|
+
data.each do |k, v|
|
183
|
+
# References to other PEROBS Objects are marshalled with our own
|
184
|
+
# format. If we detect such a marshalled String we convert it into a
|
185
|
+
# POXReference object.
|
186
|
+
if (match = /^#<PEROBS::POReference id=([0-9]+)>$/.match(k))
|
187
|
+
k = POXReference.new(@store, match[1].to_i)
|
188
|
+
end
|
189
|
+
dv = v.is_a?(POReference) ? POXReference.new(@store, v.id) : v
|
190
|
+
@data[k] = dv
|
191
|
+
end
|
192
|
+
|
119
193
|
@data
|
120
194
|
end
|
121
195
|
|
@@ -136,26 +210,46 @@ module PEROBS
|
|
136
210
|
data = {}
|
137
211
|
|
138
212
|
@data.each do |k, v|
|
139
|
-
if
|
140
|
-
|
141
|
-
|
142
|
-
#
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
v.inspect
|
151
|
-
end
|
152
|
-
data[k] = v
|
213
|
+
if k.respond_to?(:is_poxreference?)
|
214
|
+
# JSON only supports Strings as hash keys. Since JSON is the default
|
215
|
+
# internal storage format in the database, we have to marshall
|
216
|
+
# PEROBS::Object references ourselves.
|
217
|
+
k = "#<PEROBS::POReference id=#{k.id}>"
|
218
|
+
elsif k[0..24] == '#<PEROBS::POReference id='
|
219
|
+
# This could obviously result in conflicts with 'normal' String hash
|
220
|
+
# keys. This is extremely unlikely, but we better catch this case
|
221
|
+
# before it causes hard to debug trouble.
|
222
|
+
raise ArgumentError, "Hash key #{k} conflicts with PEROBS " +
|
223
|
+
"internal representation of marshalled hash keys!"
|
153
224
|
end
|
225
|
+
data[k] = serialize_helper(v)
|
154
226
|
end
|
155
227
|
|
156
228
|
data
|
157
229
|
end
|
158
230
|
|
231
|
+
def serialize_helper(v)
|
232
|
+
if v.respond_to?(:is_poxreference?)
|
233
|
+
# References to other PEROBS objects (POXReference) are stored as
|
234
|
+
# POReference in the database.
|
235
|
+
return POReference.new(v.id)
|
236
|
+
else
|
237
|
+
# Outside of the PEROBS library all PEROBS::ObjectBase derived
|
238
|
+
# objects should not be used directly. The library only exposes them
|
239
|
+
# via POXReference proxy objects.
|
240
|
+
if v.is_a?(ObjectBase)
|
241
|
+
PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
|
242
|
+
"It is stored in a PEROBS::Hash. " +
|
243
|
+
'Have you used self() instead of myself() to ' +
|
244
|
+
"get the reference of this PEROBS object?\n" +
|
245
|
+
v.inspect
|
246
|
+
end
|
247
|
+
|
248
|
+
# All other objects are serialized by their native methods.
|
249
|
+
return v
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
159
253
|
end
|
160
254
|
|
161
255
|
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = IDList.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/IDListPageFile'
|
29
|
+
require 'perobs/IDListPageRecord'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# This class stores a list of 64 bit values. Values can be added to the list
|
34
|
+
# and the presence of a certain value can be checked. It can hold up to 2^64
|
35
|
+
# values. It tries to keep values in memory but can store them in a file if
|
36
|
+
# needed. A threshold for the in-memory values can be set in the
|
37
|
+
# constructor. The stored values are grouped in pages. Each page can hold up
|
38
|
+
# to page_size entries.
|
39
|
+
class IDList
|
40
|
+
|
41
|
+
# Create a new IDList object. The data that can't be kept in memory will
|
42
|
+
# be stored in the specified directory under the given name.
|
43
|
+
# @param dir [String] Path of the directory
|
44
|
+
# @param name [String] Name of the file
|
45
|
+
# @param max_in_memory [Integer] Specifies the maximum number of values
|
46
|
+
# that will be kept in memory. If the list is larger, values will
|
47
|
+
# be cached in the specified file.
|
48
|
+
# @param page_size [Integer] The number of values per page. The default
|
49
|
+
# value is 32 which was found the best performing config in tests.
|
50
|
+
def initialize(dir, name, max_in_memory, page_size = 32)
|
51
|
+
# The page_file manages the pages that store the values.
|
52
|
+
@page_file = IDListPageFile.new(self, dir, name,
|
53
|
+
max_in_memory, page_size)
|
54
|
+
clear
|
55
|
+
end
|
56
|
+
|
57
|
+
# Insert a new value into the list.
|
58
|
+
# @param id [Integer] The value to add
|
59
|
+
def insert(id)
|
60
|
+
# Find the index of the page that should hold ID.
|
61
|
+
index = @page_records.bsearch_index { |pr| pr.max_id >= id }
|
62
|
+
# Get the corresponding IDListPageRecord object.
|
63
|
+
page = @page_records[index]
|
64
|
+
|
65
|
+
# In case the page is already full we'll have to create a new page.
|
66
|
+
# There is no guarantee that a split will yield an page with space as we
|
67
|
+
# split by ID range, not by distributing the values evenly across the
|
68
|
+
# two pages.
|
69
|
+
while page.is_full?
|
70
|
+
new_page = page.split
|
71
|
+
# Store the newly created page into the page_records list.
|
72
|
+
@page_records.insert(index + 1, new_page)
|
73
|
+
if id >= new_page.min_id
|
74
|
+
# We need to insert the ID into the newly created page. Adjust index
|
75
|
+
# and page reference accordingly.
|
76
|
+
index += 1
|
77
|
+
page = new_page
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Insert the ID into the page.
|
82
|
+
page.insert(id)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Check if a given value is already stored in the list.
|
86
|
+
# @param id [Integer] The value to check for
|
87
|
+
def include?(id)
|
88
|
+
@page_records.bsearch { |pr| pr.max_id >= id }.include?(id)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Clear the list and empty the filesystem cache file.
|
92
|
+
def clear
|
93
|
+
@page_file.clear
|
94
|
+
@page_records = [ IDListPageRecord.new(@page_file, 0, 2 ** 64) ]
|
95
|
+
end
|
96
|
+
|
97
|
+
# Erase the list including the filesystem cache file. The IDList is no
|
98
|
+
# longer usable after this call but the cache file is removed from the
|
99
|
+
# filesystem.
|
100
|
+
def erase
|
101
|
+
@page_file.erase
|
102
|
+
@page_records = nil
|
103
|
+
end
|
104
|
+
|
105
|
+
# Perform some consistency checks on the internal data structures. Raises
|
106
|
+
# a RuntimeError in case a problem is found.
|
107
|
+
def check
|
108
|
+
last_max = -1
|
109
|
+
unless (min_id = @page_records.first.min_id) == 0
|
110
|
+
raise RuntimeError, "min_id of first record (#{min_id}) " +
|
111
|
+
"must be 0."
|
112
|
+
end
|
113
|
+
|
114
|
+
@page_records.each do |pr|
|
115
|
+
unless pr.min_id == last_max + 1
|
116
|
+
raise RuntimeError, "max_id of previous record (#{last_max}) " +
|
117
|
+
"must be exactly 1 smaller than current record (#{pr.min_id})."
|
118
|
+
end
|
119
|
+
last_max = pr.max_id
|
120
|
+
pr.check
|
121
|
+
end
|
122
|
+
|
123
|
+
unless last_max == 2 ** 64
|
124
|
+
raise RuntimeError, "max_id of last records " +
|
125
|
+
"(#{@page_records.last.max_id}) must be #{2 ** 64})."
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def to_a
|
130
|
+
a = []
|
131
|
+
@page_records.each { |pr| a += pr.values }
|
132
|
+
a
|
133
|
+
end
|
134
|
+
|
135
|
+
# Print a human readable form of the tree that stores the list. This is
|
136
|
+
# only meant for debugging purposes and does not scale for larger trees.
|
137
|
+
def to_s
|
138
|
+
"\n" + @root.to_s
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = IDListPage.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
class IDListPage
|
31
|
+
|
32
|
+
attr_reader :uid, :values
|
33
|
+
attr_accessor :record
|
34
|
+
|
35
|
+
def initialize(page_file, record, uid, values = [])
|
36
|
+
@page_file = page_file
|
37
|
+
@record = record
|
38
|
+
@uid = uid
|
39
|
+
@values = values
|
40
|
+
@record.page_entries = @values.length
|
41
|
+
end
|
42
|
+
|
43
|
+
def IDListPage::load(page_file, uid, ref)
|
44
|
+
page_file.load(uid, ref)
|
45
|
+
end
|
46
|
+
|
47
|
+
def is_full?
|
48
|
+
@values.length >= @page_file.page_size
|
49
|
+
end
|
50
|
+
|
51
|
+
def length
|
52
|
+
@values.length
|
53
|
+
end
|
54
|
+
|
55
|
+
def save
|
56
|
+
@page_file.save_page(self)
|
57
|
+
end
|
58
|
+
|
59
|
+
def insert(id)
|
60
|
+
if is_full?
|
61
|
+
raise ArgumentError, "IDListPage is already full"
|
62
|
+
end
|
63
|
+
index = @values.bsearch_index { |v| v >= id } || @values.length
|
64
|
+
|
65
|
+
# If the value isn't stored already, insert it.
|
66
|
+
if @values[index] != id
|
67
|
+
@values.insert(index, id)
|
68
|
+
@record.page_entries = @values.length
|
69
|
+
@page_file.mark_page_as_modified(self)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def include?(id)
|
74
|
+
!(v = @values.bsearch { |v| v >= id }).nil? && v == id
|
75
|
+
end
|
76
|
+
|
77
|
+
def delete(max_id)
|
78
|
+
a = []
|
79
|
+
@values.delete_if { |v| v > max_id ? a << v : false }
|
80
|
+
|
81
|
+
unless a.empty?
|
82
|
+
@record.page_entries = @values.length
|
83
|
+
@page_file.mark_page_as_modified(self)
|
84
|
+
end
|
85
|
+
|
86
|
+
a
|
87
|
+
end
|
88
|
+
|
89
|
+
def check
|
90
|
+
last_value = nil
|
91
|
+
@values.each_with_index do |v, i|
|
92
|
+
if last_value && last_value >= v
|
93
|
+
raise RuntimeError, "The values #{last_value} and #{v} must be " +
|
94
|
+
"strictly ascending: #{@values.inspect}"
|
95
|
+
end
|
96
|
+
last_value = v
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
"[ #{@values.join(', ')} ]"
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = IDListPageFile.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/IDListPage'
|
29
|
+
require 'perobs/IDListPageRecord'
|
30
|
+
require 'perobs/Log'
|
31
|
+
require 'perobs/PersistentObjectCache'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The IDListPageFile class provides filesystem based cache for the
|
36
|
+
# IDListPage objects. The IDListRecord objects only hold the index of the
|
37
|
+
# page in this cache. This allows the pages to be garbage collected and
|
38
|
+
# swapped to the file. If accessed, the pages will be swaped in again. While
|
39
|
+
# this process is similar to the demand paging of the OS it has absolutely
|
40
|
+
# nothing to do with it.
|
41
|
+
class IDListPageFile
|
42
|
+
|
43
|
+
attr_reader :page_size, :pages
|
44
|
+
|
45
|
+
# Create a new IDListPageFile object that uses the given file in the given
|
46
|
+
# directory as cache file.
|
47
|
+
# @param list [IDList] The IDList object that caches pages here
|
48
|
+
# @param dir [String] An existing directory
|
49
|
+
# @param name [String] A file name (without path)
|
50
|
+
# @param max_in_memory [Integer] Maximum number of pages to keep in memory
|
51
|
+
# @param page_size [Integer] The number of values in each page
|
52
|
+
def initialize(list, dir, name, max_in_memory, page_size)
|
53
|
+
@list = list
|
54
|
+
@file_name = File.join(dir, name + '.cache')
|
55
|
+
@page_size = page_size
|
56
|
+
open
|
57
|
+
@pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
|
58
|
+
IDListPage, self)
|
59
|
+
@page_counter = 0
|
60
|
+
end
|
61
|
+
|
62
|
+
# Load the IDListPage from the cache file.
|
63
|
+
# @param page_idx [Integer] The page index in the page file
|
64
|
+
# @param record [IDListPageRecord] the corresponding IDListPageRecord
|
65
|
+
# @return [IDListPage] The loaded values
|
66
|
+
def load(page_idx, record)
|
67
|
+
# The IDListPageRecord will tell us the actual number of values stored
|
68
|
+
# in this page.
|
69
|
+
values = []
|
70
|
+
unless (entries = record.page_entries) == 0
|
71
|
+
begin
|
72
|
+
@f.seek(page_idx * @page_size * 8)
|
73
|
+
values = @f.read(entries * 8).unpack("Q#{entries}")
|
74
|
+
rescue IOError => e
|
75
|
+
PEROBS.log.fatal "Cannot read cache file #{@file_name}: #{e.message}"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Create the IDListPage object with the given values.
|
80
|
+
p = IDListPage.new(self, record, page_idx, values)
|
81
|
+
@pages.insert(p, false)
|
82
|
+
|
83
|
+
p
|
84
|
+
end
|
85
|
+
|
86
|
+
# Return the number of registered pages.
|
87
|
+
def page_count
|
88
|
+
@page_counter
|
89
|
+
end
|
90
|
+
|
91
|
+
# Create a new IDListPage and register it.
|
92
|
+
# @param record [IDListPageRecord] The corresponding record.
|
93
|
+
# @param values [Array of Integer] The values stored in the page
|
94
|
+
# @return [IDListPage]
|
95
|
+
def new_page(record, values = [])
|
96
|
+
idx = @page_counter
|
97
|
+
@page_counter += 1
|
98
|
+
mark_page_as_modified(IDListPage.new(self, record, idx, values))
|
99
|
+
idx
|
100
|
+
end
|
101
|
+
|
102
|
+
# Return the IDListPage object with the given index.
|
103
|
+
# @param record [IDListPageRecord] the corresponding IDListPageRecord
|
104
|
+
# @return [IDListPage] The page corresponding to the index.
|
105
|
+
def page(record)
|
106
|
+
p = @pages.get(record.page_idx, record) || load(record.page_idx, record)
|
107
|
+
unless p.uid == record.page_idx
|
108
|
+
raise RuntimeError, "Page reference mismatch. Record " +
|
109
|
+
"#{record.page_idx} points to page #{p.uid}"
|
110
|
+
end
|
111
|
+
|
112
|
+
p
|
113
|
+
end
|
114
|
+
|
115
|
+
# Mark a page as modified. This means it has to be written into the cache
|
116
|
+
# before it is removed from memory.
|
117
|
+
# @param p [IDListPage] page reference
|
118
|
+
def mark_page_as_modified(p)
|
119
|
+
@pages.insert(p)
|
120
|
+
@pages.flush
|
121
|
+
end
|
122
|
+
|
123
|
+
# Clear all pages, erase the cache and re-open it again.
|
124
|
+
def clear
|
125
|
+
@pages.clear
|
126
|
+
@page_counter = 0
|
127
|
+
begin
|
128
|
+
@f.truncate(0)
|
129
|
+
rescue IOError => e
|
130
|
+
raise RuntimeError, "Cannote truncate cache file #{@file_name}: " +
|
131
|
+
e.message
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Discard all pages and erase the cache file.
|
136
|
+
def erase
|
137
|
+
@pages.clear
|
138
|
+
@page_counter = 0
|
139
|
+
close
|
140
|
+
end
|
141
|
+
|
142
|
+
# Save the given IDListPage into the cache file.
|
143
|
+
# @param p [IDListPage] page to store
|
144
|
+
def save_page(p)
|
145
|
+
if p.record.page_entries != p.values.length
|
146
|
+
raise RuntimeError, "page_entries mismatch for node #{p.uid}"
|
147
|
+
end
|
148
|
+
begin
|
149
|
+
@f.seek(p.uid * @page_size * 8)
|
150
|
+
@f.write(p.values.pack('Q*'))
|
151
|
+
rescue IOError => e
|
152
|
+
PEROBS.log.fatal "Cannot write cache file #{@file_name}: #{e.message}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def open
|
159
|
+
begin
|
160
|
+
# Create a new file by writing a new header.
|
161
|
+
@f = File.open(@file_name, 'wb+')
|
162
|
+
rescue IOError => e
|
163
|
+
PEROBS.log.fatal "Cannot open cache file #{@file_name}: #{e.message}"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def close
|
168
|
+
begin
|
169
|
+
@f.close
|
170
|
+
File.delete(@file_name) if File.exist?(@file_name)
|
171
|
+
rescue IOError => e
|
172
|
+
PEROBS.log.fatal "Cannot erase cache file #{@file_name}: #{e.message}"
|
173
|
+
end
|
174
|
+
@f = nil
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
|