perobs 4.1.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/perobs/BTree.rb +33 -13
- data/lib/perobs/BTreeBlob.rb +3 -2
- data/lib/perobs/BTreeDB.rb +4 -3
- data/lib/perobs/BTreeNode.rb +107 -78
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +19 -1
- data/lib/perobs/BigArrayNode.rb +13 -9
- data/lib/perobs/BigHash.rb +8 -24
- data/lib/perobs/BigTree.rb +14 -1
- data/lib/perobs/BigTreeNode.rb +2 -2
- data/lib/perobs/Cache.rb +31 -6
- data/lib/perobs/EquiBlobsFile.rb +12 -1
- data/lib/perobs/FlatFile.rb +197 -45
- data/lib/perobs/FlatFileBlobHeader.rb +20 -5
- data/lib/perobs/FlatFileDB.rb +8 -4
- data/lib/perobs/FuzzyStringMatcher.rb +192 -0
- data/lib/perobs/Hash.rb +4 -0
- data/lib/perobs/IDListPageFile.rb +1 -2
- data/lib/perobs/ObjectBase.rb +1 -1
- data/lib/perobs/PersistentObjectCache.rb +7 -4
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +1 -1
- data/lib/perobs/Store.rb +67 -25
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +2 -2
- data/test/BTree_spec.rb +1 -0
- data/test/BigArray_spec.rb +53 -6
- data/test/BigHash_spec.rb +8 -0
- data/test/FlatFileDB_spec.rb +108 -3
- data/test/FuzzyStringMatcher_spec.rb +171 -0
- data/test/LegacyDBs/LegacyDB.rb +4 -0
- data/test/SpaceManager_spec.rb +176 -0
- data/test/Store_spec.rb +2 -5
- metadata +12 -6
@@ -115,6 +115,10 @@ module PEROBS
|
|
115
115
|
PEROBS.log.fatal "Cannot read blob header " +
|
116
116
|
"#{id ? "for ID #{id} " : ''}at address #{addr}"
|
117
117
|
else
|
118
|
+
if corruption_start
|
119
|
+
PEROBS.log.error "Corruption found at end of blob file at " +
|
120
|
+
"address #{addr}"
|
121
|
+
end
|
118
122
|
# We have reached the end of the file.
|
119
123
|
return nil
|
120
124
|
end
|
@@ -122,10 +126,15 @@ module PEROBS
|
|
122
126
|
|
123
127
|
# Did we get the full header?
|
124
128
|
if buf_with_crc.length != LENGTH
|
125
|
-
|
129
|
+
msg = "Incomplete FlatFileBlobHeader: Only " +
|
126
130
|
"#{buf_with_crc.length} " +
|
127
131
|
"bytes of #{LENGTH} could be read "
|
128
132
|
"#{id ? "for ID #{id} " : ''}at address #{addr}"
|
133
|
+
if errors_are_fatal
|
134
|
+
PEROBS.log.fatal msg
|
135
|
+
else
|
136
|
+
PEROBS.log.error msg
|
137
|
+
end
|
129
138
|
return nil
|
130
139
|
end
|
131
140
|
|
@@ -148,10 +157,16 @@ module PEROBS
|
|
148
157
|
"#{'%08x' % crc}."
|
149
158
|
else
|
150
159
|
if corruption_start.nil?
|
151
|
-
|
152
|
-
"
|
153
|
-
|
154
|
-
|
160
|
+
if errors_are_fatal
|
161
|
+
PEROBS.log.fatal "FlatFile corruption found. The FlatFile " +
|
162
|
+
"Header CRC mismatch at address #{addr}. Header CRC is " +
|
163
|
+
"#{'%08x' % read_crc} but should be #{'%08x' % crc}."
|
164
|
+
else
|
165
|
+
PEROBS.log.error "FlatFile corruption found. The FlatFile " +
|
166
|
+
"Header CRC mismatch at address #{addr}. Header CRC is " +
|
167
|
+
"#{'%08x' % read_crc} but should be #{'%08x' % crc}. " +
|
168
|
+
"Trying to find the next header."
|
169
|
+
end
|
155
170
|
corruption_start = addr
|
156
171
|
end
|
157
172
|
# The blob file is corrupted. There is no valid header at the
|
data/lib/perobs/FlatFileDB.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFileDB.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016, 2017, 2018
|
5
|
+
# Copyright (c) 2015, 2016, 2017, 2018, 2019
|
6
6
|
# by Chris Schlaeger <chris@taskjuggler.org>
|
7
7
|
#
|
8
8
|
# MIT License
|
@@ -161,8 +161,8 @@ module PEROBS
|
|
161
161
|
# Permanently delete all objects that have not been marked. Those are
|
162
162
|
# orphaned and are no longer referenced by any actively used object.
|
163
163
|
# @return [Integer] Number of the removed objects from the DB.
|
164
|
-
def delete_unmarked_objects
|
165
|
-
@flat_file.delete_unmarked_objects
|
164
|
+
def delete_unmarked_objects(&block)
|
165
|
+
@flat_file.delete_unmarked_objects(&block)
|
166
166
|
end
|
167
167
|
|
168
168
|
# Mark an object.
|
@@ -184,7 +184,11 @@ module PEROBS
|
|
184
184
|
# repaired.
|
185
185
|
# @return number of errors found
|
186
186
|
def check_db(repair = false)
|
187
|
-
|
187
|
+
if repair
|
188
|
+
@flat_file.repair
|
189
|
+
else
|
190
|
+
@flat_file.check
|
191
|
+
end
|
188
192
|
end
|
189
193
|
|
190
194
|
# Check if the stored object is syntactically correct.
|
@@ -0,0 +1,192 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FuzzyStringMatcher.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2020 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Log'
|
29
|
+
require 'perobs/ObjectBase'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The fuzzy string matcher can be used to perform a fuzzy string search
|
34
|
+
# against a known set of strings. The dictionary of known strings does not
|
35
|
+
# store the actual strings but references to arbitrary objects. These could
|
36
|
+
# be the string, but can be something else related to the learned strings.
|
37
|
+
# To use this class a list of strings with their references must be learned.
|
38
|
+
# Once the dictionary has been established, fuzzy matches can be done.
|
39
|
+
class FuzzyStringMatcher
|
40
|
+
|
41
|
+
# Create a new FuzzyStringMatcher.
|
42
|
+
# @param store [PEROBS::Store] place to store the dictionary
|
43
|
+
# @param name [String] Unique name of the string matcher
|
44
|
+
# @param case_sensitive [Boolean] True if case matters for matching
|
45
|
+
# @param n [Integer] Determines what kind of n-gramm is used to store the
|
46
|
+
# references in the dictionary. It also determines the minimum word
|
47
|
+
# length that can be used for fuzzy matches.
|
48
|
+
def initialize(store, name, case_sensitive = false, n = 4)
|
49
|
+
@store = store
|
50
|
+
@dict_name = "FuzzyStringMatcher::#{name}"
|
51
|
+
if n < 2 || n > 10
|
52
|
+
raise ArgumentError, 'n must be between 2 and 10'
|
53
|
+
end
|
54
|
+
@case_sensitive = case_sensitive
|
55
|
+
@n = n
|
56
|
+
|
57
|
+
clear unless (@dict = @store[@dict_name])
|
58
|
+
end
|
59
|
+
|
60
|
+
# Wipe the dictionary.
|
61
|
+
def clear
|
62
|
+
@store[@dict_name] = @dict = @store.new(BigHash)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Add a string with its reference to the dictionary.
|
66
|
+
# @param string [String] The string to store
|
67
|
+
# @param reference [Object] Any object that is associated with the string
|
68
|
+
def learn(string, reference = string)
|
69
|
+
reference = string if reference.nil?
|
70
|
+
|
71
|
+
unless @case_sensitive
|
72
|
+
string = string.downcase
|
73
|
+
end
|
74
|
+
# Enclose string in 'start of text' and 'end of text' ASCII values.
|
75
|
+
string = "\002" + string + "\003"
|
76
|
+
|
77
|
+
each_n_gramm(string) do |n_gramm|
|
78
|
+
unless (ng_list = @dict[n_gramm])
|
79
|
+
@dict[n_gramm] = ng_list = @store.new(Hash)
|
80
|
+
end
|
81
|
+
|
82
|
+
if ng_list.include?(reference)
|
83
|
+
ng_list[reference] += 1
|
84
|
+
else
|
85
|
+
ng_list[reference] = 0
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
|
92
|
+
# Find the references who's string best matches the given string.
|
93
|
+
# @param string [String] string to search for
|
94
|
+
# @param min_score [Float] Value 0.01 and 1.0 that specifies how strict
|
95
|
+
# the matching should be done. The larger the value the more closer
|
96
|
+
# the given string needs to be.
|
97
|
+
# @param max_count [Integer] The maximum number of matches that should be
|
98
|
+
# returned.
|
99
|
+
# @return [Array] The result is an Array of Arrays. The nested Arrays only
|
100
|
+
# have 2 entries. The reference and a Float value between 0 and
|
101
|
+
# 1.0 that describes how good the match is. The matches are sorted
|
102
|
+
# in descending order by the match score.
|
103
|
+
def best_matches(string, min_score = 0.5, max_count = 100)
|
104
|
+
unless @case_sensitive
|
105
|
+
string = string.downcase
|
106
|
+
end
|
107
|
+
# Enclose string in 'start of text' and 'end of text' ASCII values.
|
108
|
+
string = "\002" + string + "\003"
|
109
|
+
|
110
|
+
matches = {}
|
111
|
+
|
112
|
+
# This will be the best possible score for a perfect match.
|
113
|
+
best_possible_score = 0
|
114
|
+
each_n_gramm(string) do |n_gramm|
|
115
|
+
best_possible_score += 1
|
116
|
+
if (ng_list = @dict[n_gramm])
|
117
|
+
ng_list.each do |reference, count|
|
118
|
+
if matches.include?(reference)
|
119
|
+
matches[reference] += 1
|
120
|
+
else
|
121
|
+
# We use internally a 10 times larger list so that we don't
|
122
|
+
# throw away good matches too early. If the max_count value is
|
123
|
+
# chosen too small there is a risk of not finding the best
|
124
|
+
# matches!
|
125
|
+
if matches.size > 10 * max_count
|
126
|
+
matches = discard_worst_match(matches)
|
127
|
+
end
|
128
|
+
matches[reference] = 1
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
return [] if matches.empty?
|
135
|
+
|
136
|
+
# Sort in the order of occurance count downwards.
|
137
|
+
match_list = matches.to_a.sort do |a, b|
|
138
|
+
b[1] <=> a[1]
|
139
|
+
end
|
140
|
+
|
141
|
+
# Set occurance counters to scores relative to the best possible score.
|
142
|
+
match_list.map! { |a, b| [ a, b.to_f / best_possible_score ] }
|
143
|
+
|
144
|
+
# Delete all matches that occured less than half as often than the
|
145
|
+
# top match.
|
146
|
+
match_list.delete_if { |a| a[1] < min_score }
|
147
|
+
|
148
|
+
match_list[0..max_count]
|
149
|
+
end
|
150
|
+
|
151
|
+
# Returns some internal stats about the dictionary.
|
152
|
+
def stats
|
153
|
+
s = {}
|
154
|
+
s['dictionary_size'] = @dict.size
|
155
|
+
max = total = 0
|
156
|
+
@dict.each do |n_gramm, ng_list|
|
157
|
+
size = ng_list.length
|
158
|
+
max = size if size > max
|
159
|
+
total += size
|
160
|
+
end
|
161
|
+
s['max_list_size'] = max
|
162
|
+
s['avg_list_size'] = total > 0 ? total.to_f / s['dictionary_size'] : 0
|
163
|
+
|
164
|
+
s
|
165
|
+
end
|
166
|
+
|
167
|
+
private
|
168
|
+
|
169
|
+
def each_n_gramm(string, &block)
|
170
|
+
return if string.length < @n
|
171
|
+
|
172
|
+
0.upto(string.length - @n) do |i|
|
173
|
+
n_gramm = string[i, @n]
|
174
|
+
|
175
|
+
yield(n_gramm)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def discard_worst_match(matches)
|
180
|
+
# Sort in the order of occurance count downwards.
|
181
|
+
match_list = matches.to_a.sort do |a, b|
|
182
|
+
b[1] <=> a[1]
|
183
|
+
end
|
184
|
+
# Discard the lowest half of the matches
|
185
|
+
match_list = match_list[0..match_list.length / 2]
|
186
|
+
match_list.to_h
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|
192
|
+
|
data/lib/perobs/Hash.rb
CHANGED
@@ -124,6 +124,10 @@ module PEROBS
|
|
124
124
|
|
125
125
|
# Proxy for assignment method.
|
126
126
|
def []=(key, value)
|
127
|
+
unless key.is_a?(String)
|
128
|
+
raise ArgumentError, "PEROBS::Hash[] key must be a String but is a " +
|
129
|
+
"#{key.class}"
|
130
|
+
end
|
127
131
|
_check_assignment_value(value)
|
128
132
|
@store.cache.cache_write(self)
|
129
133
|
@data[key] = value
|
@@ -54,8 +54,7 @@ module PEROBS
|
|
54
54
|
@file_name = File.join(dir, name + '.cache')
|
55
55
|
@page_size = page_size
|
56
56
|
open
|
57
|
-
@pages = PersistentObjectCache.new(max_in_memory,
|
58
|
-
IDListPage, self)
|
57
|
+
@pages = PersistentObjectCache.new(max_in_memory, -1, IDListPage, self)
|
59
58
|
@page_counter = 0
|
60
59
|
end
|
61
60
|
|
data/lib/perobs/ObjectBase.rb
CHANGED
@@ -250,7 +250,7 @@ module PEROBS
|
|
250
250
|
def _restore(level)
|
251
251
|
# Find the most recently stored state of this object. This could be on
|
252
252
|
# any previous stash level or in the regular object DB. If the object
|
253
|
-
# was created during the transaction, there is
|
253
|
+
# was created during the transaction, there is no previous state to
|
254
254
|
# restore to.
|
255
255
|
data = nil
|
256
256
|
if @_stash_map
|
@@ -44,7 +44,8 @@ module PEROBS
|
|
44
44
|
# cache objects.
|
45
45
|
# @param size [Integer] Minimum number of objects to be cached at a time
|
46
46
|
# @param flush_delay [Integer] Determines how often non-forced flushes are
|
47
|
-
# ignored in a row before the flush is really done.
|
47
|
+
# ignored in a row before the flush is really done. If flush_delay
|
48
|
+
# is smaller than 0 non-forced flushed will always be ignored.
|
48
49
|
# @param klass [Class] The class of the objects to be cached. Objects must
|
49
50
|
# provide a uid() method that returns a unique ID for every object.
|
50
51
|
# @param collection [] The object collection the objects belong to. It
|
@@ -71,8 +72,7 @@ module PEROBS
|
|
71
72
|
if modified
|
72
73
|
@modified_entries[object.uid] = object
|
73
74
|
else
|
74
|
-
|
75
|
-
@unmodified_entries[index] = object
|
75
|
+
@unmodified_entries[object.uid % @size] = object
|
76
76
|
end
|
77
77
|
|
78
78
|
nil
|
@@ -111,9 +111,12 @@ module PEROBS
|
|
111
111
|
# all modified objects will be written.
|
112
112
|
# @param now [Boolean]
|
113
113
|
def flush(now = false)
|
114
|
-
if now || (@flush_counter -= 1) <= 0
|
114
|
+
if now || (@flush_delay >= 0 && (@flush_counter -= 1) <= 0)
|
115
115
|
@modified_entries.each do |id, object|
|
116
116
|
object.save
|
117
|
+
# Add the object to the unmodified object cache. We might still need
|
118
|
+
# it again soon.
|
119
|
+
@unmodified_entries[object.uid % @size] = object
|
117
120
|
end
|
118
121
|
@modified_entries = ::Hash.new
|
119
122
|
@flush_counter = @flush_delay
|
@@ -0,0 +1,273 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = SpaceManager.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2020 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/BTree'
|
29
|
+
require 'perobs/EquiBlobsFile'
|
30
|
+
require 'perobs/FlatFile'
|
31
|
+
require 'perobs/FlatFileBlobHeader'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The SpaceManager is used to keep a list of all the empty spaces in a
|
36
|
+
# FlatFileDB file. An empty space is described by its starting address and
|
37
|
+
# its length in bytes. The SpaceManager keeps a list of all the spaces and
|
38
|
+
# can find the best fit space when a new blob needs to be added to the
|
39
|
+
# FlatFileDB.
|
40
|
+
#
|
41
|
+
# The SpaceManager uses two files to store the list. The first is a file
|
42
|
+
# with the actual addresses. This is a set of linked address lists. Each
|
43
|
+
# list holds the addresses for spaces that have exactly the same size. The
|
44
|
+
# second file is a BTree file that serves as the index. It is used to map
|
45
|
+
# the length of a space to the address of the linked list for that
|
46
|
+
# particular length. The linked list consists of elements that only hold 2
|
47
|
+
# items. The actual address in the FlatFileDB and the address of the next
|
48
|
+
# entry in the linked list in the list file.
|
49
|
+
class SpaceManager
|
50
|
+
|
51
|
+
attr_reader :added_spaces, :recycled_spaces, :failed_requests
|
52
|
+
|
53
|
+
def initialize(db_dir, progressmeter, btree_order = 65)
|
54
|
+
@db_dir = db_dir
|
55
|
+
@progressmeter = progressmeter
|
56
|
+
|
57
|
+
@index = BTree.new(@db_dir, 'space_index', btree_order, @progressmeter)
|
58
|
+
# The space list contains blobs that have each 2 entries. The address of
|
59
|
+
# the space in the FlatFile and the address of the next blob in the
|
60
|
+
# space list file that is an entry for the same space size. An address
|
61
|
+
# of 0 marks the end of the list.
|
62
|
+
@list = EquiBlobsFile.new(@db_dir, 'space_list', @progressmeter, 2 * 8, 1)
|
63
|
+
end
|
64
|
+
|
65
|
+
def open
|
66
|
+
@index.open
|
67
|
+
@list.open
|
68
|
+
reset_stats
|
69
|
+
end
|
70
|
+
|
71
|
+
def close
|
72
|
+
if @index.is_open?
|
73
|
+
PEROBS.log.info "SpaceManager has currently #{@list.total_entries} " +
|
74
|
+
"used blobs and #{@list.total_spaces} unused blobs in list " +
|
75
|
+
"EquiBlobsFile"
|
76
|
+
PEROBS.log.info "#{@added_spaces} were added, #{@recycled_spaces} " +
|
77
|
+
"spaces were recycled and #{@failed_requests} requests failed"
|
78
|
+
|
79
|
+
@list.close
|
80
|
+
@index.close
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def is_open?
|
85
|
+
@index.is_open?
|
86
|
+
end
|
87
|
+
|
88
|
+
def sync
|
89
|
+
@list.sync
|
90
|
+
@index.sync
|
91
|
+
end
|
92
|
+
|
93
|
+
def add_space(address, length)
|
94
|
+
if (list_entry_addr = @index.get(length))
|
95
|
+
# There is already at least one move entry for this length.
|
96
|
+
new_list_entry_addr = insert_space_in_list(address, list_entry_addr)
|
97
|
+
else
|
98
|
+
new_list_entry_addr = insert_space_in_list(address, 0)
|
99
|
+
end
|
100
|
+
@index.insert(length, new_list_entry_addr)
|
101
|
+
@added_spaces += 1
|
102
|
+
end
|
103
|
+
|
104
|
+
def has_space?(address, length)
|
105
|
+
if (list_entry_addr = @index.get(length))
|
106
|
+
while list_entry_addr > 0
|
107
|
+
blob = @list.retrieve_blob(list_entry_addr)
|
108
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
109
|
+
return true if space_address == address
|
110
|
+
list_entry_addr = next_entry_addr
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
false
|
115
|
+
end
|
116
|
+
|
117
|
+
def get_space(length)
|
118
|
+
# We use a simple exact fit strategy. All attempts to use a more
|
119
|
+
# elaborate scheme were actually less efficient. Non-exact matches
|
120
|
+
# generate new spaces for the remainder and fragment the blob file with
|
121
|
+
# lots of unusable small spaces. Most applications seem to have
|
122
|
+
# clustered their blob sizes around a number of popular sizes. So exact
|
123
|
+
# match is very efficient to implement and results in the highest
|
124
|
+
# probability that a space will be reused soon.
|
125
|
+
list_entry_addr = @index.get(length)
|
126
|
+
|
127
|
+
if list_entry_addr
|
128
|
+
blob = @list.retrieve_blob(list_entry_addr)
|
129
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
130
|
+
@list.delete_blob(list_entry_addr)
|
131
|
+
|
132
|
+
if next_entry_addr > 0
|
133
|
+
# Update the index entry for the length to point to the
|
134
|
+
# following space list entry.
|
135
|
+
@index.insert(length, next_entry_addr)
|
136
|
+
else
|
137
|
+
# The space list for this length is empty. Remove the entry
|
138
|
+
# from the index.
|
139
|
+
@index.remove(length)
|
140
|
+
end
|
141
|
+
@recycled_spaces += 1
|
142
|
+
|
143
|
+
# We return the length to remain compatible with the old SpaceTree
|
144
|
+
# API.
|
145
|
+
return [ space_address, length ]
|
146
|
+
end
|
147
|
+
|
148
|
+
@failed_requests += 1
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
def clear
|
153
|
+
@list.clear
|
154
|
+
@index.clear
|
155
|
+
reset_stats
|
156
|
+
end
|
157
|
+
|
158
|
+
def erase
|
159
|
+
@list.erase
|
160
|
+
@index.erase
|
161
|
+
end
|
162
|
+
|
163
|
+
def check(flat_file = nil)
|
164
|
+
sync
|
165
|
+
return false unless @index.check
|
166
|
+
return false unless @list.check
|
167
|
+
|
168
|
+
smallest_space = nil
|
169
|
+
largest_space = nil
|
170
|
+
total_space_bytes = 0
|
171
|
+
space_distribution = ::Hash.new(0)
|
172
|
+
|
173
|
+
@index.each do |length, list_entry_addr|
|
174
|
+
if list_entry_addr <= 0
|
175
|
+
PEROBS.log.error "list_entry_addr (#{list_entry_addr}) " +
|
176
|
+
"must be positive"
|
177
|
+
return false
|
178
|
+
end
|
179
|
+
|
180
|
+
# Detect smallest and largest space
|
181
|
+
if smallest_space.nil? || length < smallest_space
|
182
|
+
smallest_space = length
|
183
|
+
end
|
184
|
+
if largest_space.nil? || length > largest_space
|
185
|
+
largest_space = length
|
186
|
+
end
|
187
|
+
|
188
|
+
known_addresses = [ list_entry_addr ]
|
189
|
+
entries = 0
|
190
|
+
while list_entry_addr > 0
|
191
|
+
entries += 1
|
192
|
+
unless (blob = @list.retrieve_blob(list_entry_addr))
|
193
|
+
PEROBS.log.error "SpaceManager points to non-existing " +
|
194
|
+
"space list entry at address #{list_entry_addr}"
|
195
|
+
return false
|
196
|
+
end
|
197
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
198
|
+
|
199
|
+
if known_addresses.include?(next_entry_addr)
|
200
|
+
PEROBS.log.error "Space list is cyclic: "
|
201
|
+
"#{known_addresses + next_entry_addr}"
|
202
|
+
return false
|
203
|
+
end
|
204
|
+
if flat_file &&
|
205
|
+
!flat_file.has_space?(space_address, length)
|
206
|
+
PEROBS.log.error "SpaceManager has space at offset " +
|
207
|
+
"#{space_address} of size #{length} that isn't " +
|
208
|
+
"available in the FlatFile."
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
list_entry_addr = next_entry_addr
|
212
|
+
end
|
213
|
+
|
214
|
+
total_space_bytes += length * entries
|
215
|
+
space_distribution[msb(length)] += entries
|
216
|
+
end
|
217
|
+
|
218
|
+
PEROBS.log.info "SpaceManager stats: smallest: #{smallest_space}; " +
|
219
|
+
"largest: #{largest_space}; total bytes: #{total_space_bytes}; " +
|
220
|
+
"distribution: " +
|
221
|
+
"#{space_distribution.map { |l, c| "#{2 ** (l - 1)}-#{2 ** l - 1}:#{c}; " }}"
|
222
|
+
|
223
|
+
true
|
224
|
+
end
|
225
|
+
|
226
|
+
def to_a
|
227
|
+
a = []
|
228
|
+
|
229
|
+
@index.each do |length, list_entry_addr|
|
230
|
+
while list_entry_addr > 0
|
231
|
+
blob = @list.retrieve_blob(list_entry_addr)
|
232
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
233
|
+
|
234
|
+
a << [ space_address, length ]
|
235
|
+
|
236
|
+
list_entry_addr = next_entry_addr
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
a.sort { |a, b| a[0] <=> b[0] }
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
def insert_space_in_list(next_element_addr, space_address)
|
246
|
+
blob = [ next_element_addr, space_address ].pack('QQ')
|
247
|
+
@list.store_blob(blob_addr = @list.free_address, blob)
|
248
|
+
|
249
|
+
blob_addr
|
250
|
+
end
|
251
|
+
|
252
|
+
def msb(i)
|
253
|
+
return 63 if i < 0
|
254
|
+
|
255
|
+
bit = 0
|
256
|
+
while (i > 0)
|
257
|
+
bit += 1
|
258
|
+
i = i >> 1
|
259
|
+
end
|
260
|
+
|
261
|
+
bit
|
262
|
+
end
|
263
|
+
|
264
|
+
def reset_stats
|
265
|
+
@added_spaces = 0
|
266
|
+
@recycled_spaces = 0
|
267
|
+
@failed_requests = 0
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|