perobs 4.1.0 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/perobs/BTree.rb +33 -13
- data/lib/perobs/BTreeBlob.rb +3 -2
- data/lib/perobs/BTreeDB.rb +4 -3
- data/lib/perobs/BTreeNode.rb +107 -78
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +19 -1
- data/lib/perobs/BigArrayNode.rb +13 -9
- data/lib/perobs/BigHash.rb +8 -24
- data/lib/perobs/BigTree.rb +14 -1
- data/lib/perobs/BigTreeNode.rb +2 -2
- data/lib/perobs/Cache.rb +31 -6
- data/lib/perobs/EquiBlobsFile.rb +12 -1
- data/lib/perobs/FlatFile.rb +197 -45
- data/lib/perobs/FlatFileBlobHeader.rb +20 -5
- data/lib/perobs/FlatFileDB.rb +8 -4
- data/lib/perobs/FuzzyStringMatcher.rb +192 -0
- data/lib/perobs/Hash.rb +4 -0
- data/lib/perobs/IDListPageFile.rb +1 -2
- data/lib/perobs/ObjectBase.rb +1 -1
- data/lib/perobs/PersistentObjectCache.rb +7 -4
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +1 -1
- data/lib/perobs/Store.rb +67 -25
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +2 -2
- data/test/BTree_spec.rb +1 -0
- data/test/BigArray_spec.rb +53 -6
- data/test/BigHash_spec.rb +8 -0
- data/test/FlatFileDB_spec.rb +108 -3
- data/test/FuzzyStringMatcher_spec.rb +171 -0
- data/test/LegacyDBs/LegacyDB.rb +4 -0
- data/test/SpaceManager_spec.rb +176 -0
- data/test/Store_spec.rb +2 -5
- metadata +12 -6
@@ -115,6 +115,10 @@ module PEROBS
|
|
115
115
|
PEROBS.log.fatal "Cannot read blob header " +
|
116
116
|
"#{id ? "for ID #{id} " : ''}at address #{addr}"
|
117
117
|
else
|
118
|
+
if corruption_start
|
119
|
+
PEROBS.log.error "Corruption found at end of blob file at " +
|
120
|
+
"address #{addr}"
|
121
|
+
end
|
118
122
|
# We have reached the end of the file.
|
119
123
|
return nil
|
120
124
|
end
|
@@ -122,10 +126,15 @@ module PEROBS
|
|
122
126
|
|
123
127
|
# Did we get the full header?
|
124
128
|
if buf_with_crc.length != LENGTH
|
125
|
-
|
129
|
+
msg = "Incomplete FlatFileBlobHeader: Only " +
|
126
130
|
"#{buf_with_crc.length} " +
|
127
131
|
"bytes of #{LENGTH} could be read "
|
128
132
|
"#{id ? "for ID #{id} " : ''}at address #{addr}"
|
133
|
+
if errors_are_fatal
|
134
|
+
PEROBS.log.fatal msg
|
135
|
+
else
|
136
|
+
PEROBS.log.error msg
|
137
|
+
end
|
129
138
|
return nil
|
130
139
|
end
|
131
140
|
|
@@ -148,10 +157,16 @@ module PEROBS
|
|
148
157
|
"#{'%08x' % crc}."
|
149
158
|
else
|
150
159
|
if corruption_start.nil?
|
151
|
-
|
152
|
-
"
|
153
|
-
|
154
|
-
|
160
|
+
if errors_are_fatal
|
161
|
+
PEROBS.log.fatal "FlatFile corruption found. The FlatFile " +
|
162
|
+
"Header CRC mismatch at address #{addr}. Header CRC is " +
|
163
|
+
"#{'%08x' % read_crc} but should be #{'%08x' % crc}."
|
164
|
+
else
|
165
|
+
PEROBS.log.error "FlatFile corruption found. The FlatFile " +
|
166
|
+
"Header CRC mismatch at address #{addr}. Header CRC is " +
|
167
|
+
"#{'%08x' % read_crc} but should be #{'%08x' % crc}. " +
|
168
|
+
"Trying to find the next header."
|
169
|
+
end
|
155
170
|
corruption_start = addr
|
156
171
|
end
|
157
172
|
# The blob file is corrupted. There is no valid header at the
|
data/lib/perobs/FlatFileDB.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# = FlatFileDB.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016, 2017, 2018
|
5
|
+
# Copyright (c) 2015, 2016, 2017, 2018, 2019
|
6
6
|
# by Chris Schlaeger <chris@taskjuggler.org>
|
7
7
|
#
|
8
8
|
# MIT License
|
@@ -161,8 +161,8 @@ module PEROBS
|
|
161
161
|
# Permanently delete all objects that have not been marked. Those are
|
162
162
|
# orphaned and are no longer referenced by any actively used object.
|
163
163
|
# @return [Integer] Number of the removed objects from the DB.
|
164
|
-
def delete_unmarked_objects
|
165
|
-
@flat_file.delete_unmarked_objects
|
164
|
+
def delete_unmarked_objects(&block)
|
165
|
+
@flat_file.delete_unmarked_objects(&block)
|
166
166
|
end
|
167
167
|
|
168
168
|
# Mark an object.
|
@@ -184,7 +184,11 @@ module PEROBS
|
|
184
184
|
# repaired.
|
185
185
|
# @return number of errors found
|
186
186
|
def check_db(repair = false)
|
187
|
-
|
187
|
+
if repair
|
188
|
+
@flat_file.repair
|
189
|
+
else
|
190
|
+
@flat_file.check
|
191
|
+
end
|
188
192
|
end
|
189
193
|
|
190
194
|
# Check if the stored object is syntactically correct.
|
@@ -0,0 +1,192 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = FuzzyStringMatcher.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2020 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Log'
|
29
|
+
require 'perobs/ObjectBase'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The fuzzy string matcher can be used to perform a fuzzy string search
|
34
|
+
# against a known set of strings. The dictionary of known strings does not
|
35
|
+
# store the actual strings but references to arbitrary objects. These could
|
36
|
+
# be the string, but can be something else related to the learned strings.
|
37
|
+
# To use this class a list of strings with their references must be learned.
|
38
|
+
# Once the dictionary has been established, fuzzy matches can be done.
|
39
|
+
class FuzzyStringMatcher
|
40
|
+
|
41
|
+
# Create a new FuzzyStringMatcher.
|
42
|
+
# @param store [PEROBS::Store] place to store the dictionary
|
43
|
+
# @param name [String] Unique name of the string matcher
|
44
|
+
# @param case_sensitive [Boolean] True if case matters for matching
|
45
|
+
# @param n [Integer] Determines what kind of n-gramm is used to store the
|
46
|
+
# references in the dictionary. It also determines the minimum word
|
47
|
+
# length that can be used for fuzzy matches.
|
48
|
+
def initialize(store, name, case_sensitive = false, n = 4)
|
49
|
+
@store = store
|
50
|
+
@dict_name = "FuzzyStringMatcher::#{name}"
|
51
|
+
if n < 2 || n > 10
|
52
|
+
raise ArgumentError, 'n must be between 2 and 10'
|
53
|
+
end
|
54
|
+
@case_sensitive = case_sensitive
|
55
|
+
@n = n
|
56
|
+
|
57
|
+
clear unless (@dict = @store[@dict_name])
|
58
|
+
end
|
59
|
+
|
60
|
+
# Wipe the dictionary.
|
61
|
+
def clear
|
62
|
+
@store[@dict_name] = @dict = @store.new(BigHash)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Add a string with its reference to the dictionary.
|
66
|
+
# @param string [String] The string to store
|
67
|
+
# @param reference [Object] Any object that is associated with the string
|
68
|
+
def learn(string, reference = string)
|
69
|
+
reference = string if reference.nil?
|
70
|
+
|
71
|
+
unless @case_sensitive
|
72
|
+
string = string.downcase
|
73
|
+
end
|
74
|
+
# Enclose string in 'start of text' and 'end of text' ASCII values.
|
75
|
+
string = "\002" + string + "\003"
|
76
|
+
|
77
|
+
each_n_gramm(string) do |n_gramm|
|
78
|
+
unless (ng_list = @dict[n_gramm])
|
79
|
+
@dict[n_gramm] = ng_list = @store.new(Hash)
|
80
|
+
end
|
81
|
+
|
82
|
+
if ng_list.include?(reference)
|
83
|
+
ng_list[reference] += 1
|
84
|
+
else
|
85
|
+
ng_list[reference] = 0
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
|
92
|
+
# Find the references who's string best matches the given string.
|
93
|
+
# @param string [String] string to search for
|
94
|
+
# @param min_score [Float] Value 0.01 and 1.0 that specifies how strict
|
95
|
+
# the matching should be done. The larger the value the more closer
|
96
|
+
# the given string needs to be.
|
97
|
+
# @param max_count [Integer] The maximum number of matches that should be
|
98
|
+
# returned.
|
99
|
+
# @return [Array] The result is an Array of Arrays. The nested Arrays only
|
100
|
+
# have 2 entries. The reference and a Float value between 0 and
|
101
|
+
# 1.0 that describes how good the match is. The matches are sorted
|
102
|
+
# in descending order by the match score.
|
103
|
+
def best_matches(string, min_score = 0.5, max_count = 100)
|
104
|
+
unless @case_sensitive
|
105
|
+
string = string.downcase
|
106
|
+
end
|
107
|
+
# Enclose string in 'start of text' and 'end of text' ASCII values.
|
108
|
+
string = "\002" + string + "\003"
|
109
|
+
|
110
|
+
matches = {}
|
111
|
+
|
112
|
+
# This will be the best possible score for a perfect match.
|
113
|
+
best_possible_score = 0
|
114
|
+
each_n_gramm(string) do |n_gramm|
|
115
|
+
best_possible_score += 1
|
116
|
+
if (ng_list = @dict[n_gramm])
|
117
|
+
ng_list.each do |reference, count|
|
118
|
+
if matches.include?(reference)
|
119
|
+
matches[reference] += 1
|
120
|
+
else
|
121
|
+
# We use internally a 10 times larger list so that we don't
|
122
|
+
# throw away good matches too early. If the max_count value is
|
123
|
+
# chosen too small there is a risk of not finding the best
|
124
|
+
# matches!
|
125
|
+
if matches.size > 10 * max_count
|
126
|
+
matches = discard_worst_match(matches)
|
127
|
+
end
|
128
|
+
matches[reference] = 1
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
return [] if matches.empty?
|
135
|
+
|
136
|
+
# Sort in the order of occurance count downwards.
|
137
|
+
match_list = matches.to_a.sort do |a, b|
|
138
|
+
b[1] <=> a[1]
|
139
|
+
end
|
140
|
+
|
141
|
+
# Set occurance counters to scores relative to the best possible score.
|
142
|
+
match_list.map! { |a, b| [ a, b.to_f / best_possible_score ] }
|
143
|
+
|
144
|
+
# Delete all matches that occured less than half as often than the
|
145
|
+
# top match.
|
146
|
+
match_list.delete_if { |a| a[1] < min_score }
|
147
|
+
|
148
|
+
match_list[0..max_count]
|
149
|
+
end
|
150
|
+
|
151
|
+
# Returns some internal stats about the dictionary.
|
152
|
+
def stats
|
153
|
+
s = {}
|
154
|
+
s['dictionary_size'] = @dict.size
|
155
|
+
max = total = 0
|
156
|
+
@dict.each do |n_gramm, ng_list|
|
157
|
+
size = ng_list.length
|
158
|
+
max = size if size > max
|
159
|
+
total += size
|
160
|
+
end
|
161
|
+
s['max_list_size'] = max
|
162
|
+
s['avg_list_size'] = total > 0 ? total.to_f / s['dictionary_size'] : 0
|
163
|
+
|
164
|
+
s
|
165
|
+
end
|
166
|
+
|
167
|
+
private
|
168
|
+
|
169
|
+
def each_n_gramm(string, &block)
|
170
|
+
return if string.length < @n
|
171
|
+
|
172
|
+
0.upto(string.length - @n) do |i|
|
173
|
+
n_gramm = string[i, @n]
|
174
|
+
|
175
|
+
yield(n_gramm)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def discard_worst_match(matches)
|
180
|
+
# Sort in the order of occurance count downwards.
|
181
|
+
match_list = matches.to_a.sort do |a, b|
|
182
|
+
b[1] <=> a[1]
|
183
|
+
end
|
184
|
+
# Discard the lowest half of the matches
|
185
|
+
match_list = match_list[0..match_list.length / 2]
|
186
|
+
match_list.to_h
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|
192
|
+
|
data/lib/perobs/Hash.rb
CHANGED
@@ -124,6 +124,10 @@ module PEROBS
|
|
124
124
|
|
125
125
|
# Proxy for assignment method.
|
126
126
|
def []=(key, value)
|
127
|
+
unless key.is_a?(String)
|
128
|
+
raise ArgumentError, "PEROBS::Hash[] key must be a String but is a " +
|
129
|
+
"#{key.class}"
|
130
|
+
end
|
127
131
|
_check_assignment_value(value)
|
128
132
|
@store.cache.cache_write(self)
|
129
133
|
@data[key] = value
|
@@ -54,8 +54,7 @@ module PEROBS
|
|
54
54
|
@file_name = File.join(dir, name + '.cache')
|
55
55
|
@page_size = page_size
|
56
56
|
open
|
57
|
-
@pages = PersistentObjectCache.new(max_in_memory,
|
58
|
-
IDListPage, self)
|
57
|
+
@pages = PersistentObjectCache.new(max_in_memory, -1, IDListPage, self)
|
59
58
|
@page_counter = 0
|
60
59
|
end
|
61
60
|
|
data/lib/perobs/ObjectBase.rb
CHANGED
@@ -250,7 +250,7 @@ module PEROBS
|
|
250
250
|
def _restore(level)
|
251
251
|
# Find the most recently stored state of this object. This could be on
|
252
252
|
# any previous stash level or in the regular object DB. If the object
|
253
|
-
# was created during the transaction, there is
|
253
|
+
# was created during the transaction, there is no previous state to
|
254
254
|
# restore to.
|
255
255
|
data = nil
|
256
256
|
if @_stash_map
|
@@ -44,7 +44,8 @@ module PEROBS
|
|
44
44
|
# cache objects.
|
45
45
|
# @param size [Integer] Minimum number of objects to be cached at a time
|
46
46
|
# @param flush_delay [Integer] Determines how often non-forced flushes are
|
47
|
-
# ignored in a row before the flush is really done.
|
47
|
+
# ignored in a row before the flush is really done. If flush_delay
|
48
|
+
# is smaller than 0 non-forced flushed will always be ignored.
|
48
49
|
# @param klass [Class] The class of the objects to be cached. Objects must
|
49
50
|
# provide a uid() method that returns a unique ID for every object.
|
50
51
|
# @param collection [] The object collection the objects belong to. It
|
@@ -71,8 +72,7 @@ module PEROBS
|
|
71
72
|
if modified
|
72
73
|
@modified_entries[object.uid] = object
|
73
74
|
else
|
74
|
-
|
75
|
-
@unmodified_entries[index] = object
|
75
|
+
@unmodified_entries[object.uid % @size] = object
|
76
76
|
end
|
77
77
|
|
78
78
|
nil
|
@@ -111,9 +111,12 @@ module PEROBS
|
|
111
111
|
# all modified objects will be written.
|
112
112
|
# @param now [Boolean]
|
113
113
|
def flush(now = false)
|
114
|
-
if now || (@flush_counter -= 1) <= 0
|
114
|
+
if now || (@flush_delay >= 0 && (@flush_counter -= 1) <= 0)
|
115
115
|
@modified_entries.each do |id, object|
|
116
116
|
object.save
|
117
|
+
# Add the object to the unmodified object cache. We might still need
|
118
|
+
# it again soon.
|
119
|
+
@unmodified_entries[object.uid % @size] = object
|
117
120
|
end
|
118
121
|
@modified_entries = ::Hash.new
|
119
122
|
@flush_counter = @flush_delay
|
@@ -0,0 +1,273 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = SpaceManager.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2020 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/BTree'
|
29
|
+
require 'perobs/EquiBlobsFile'
|
30
|
+
require 'perobs/FlatFile'
|
31
|
+
require 'perobs/FlatFileBlobHeader'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The SpaceManager is used to keep a list of all the empty spaces in a
|
36
|
+
# FlatFileDB file. An empty space is described by its starting address and
|
37
|
+
# its length in bytes. The SpaceManager keeps a list of all the spaces and
|
38
|
+
# can find the best fit space when a new blob needs to be added to the
|
39
|
+
# FlatFileDB.
|
40
|
+
#
|
41
|
+
# The SpaceManager uses two files to store the list. The first is a file
|
42
|
+
# with the actual addresses. This is a set of linked address lists. Each
|
43
|
+
# list holds the addresses for spaces that have exactly the same size. The
|
44
|
+
# second file is a BTree file that serves as the index. It is used to map
|
45
|
+
# the length of a space to the address of the linked list for that
|
46
|
+
# particular length. The linked list consists of elements that only hold 2
|
47
|
+
# items. The actual address in the FlatFileDB and the address of the next
|
48
|
+
# entry in the linked list in the list file.
|
49
|
+
class SpaceManager
|
50
|
+
|
51
|
+
attr_reader :added_spaces, :recycled_spaces, :failed_requests
|
52
|
+
|
53
|
+
def initialize(db_dir, progressmeter, btree_order = 65)
|
54
|
+
@db_dir = db_dir
|
55
|
+
@progressmeter = progressmeter
|
56
|
+
|
57
|
+
@index = BTree.new(@db_dir, 'space_index', btree_order, @progressmeter)
|
58
|
+
# The space list contains blobs that have each 2 entries. The address of
|
59
|
+
# the space in the FlatFile and the address of the next blob in the
|
60
|
+
# space list file that is an entry for the same space size. An address
|
61
|
+
# of 0 marks the end of the list.
|
62
|
+
@list = EquiBlobsFile.new(@db_dir, 'space_list', @progressmeter, 2 * 8, 1)
|
63
|
+
end
|
64
|
+
|
65
|
+
def open
|
66
|
+
@index.open
|
67
|
+
@list.open
|
68
|
+
reset_stats
|
69
|
+
end
|
70
|
+
|
71
|
+
def close
|
72
|
+
if @index.is_open?
|
73
|
+
PEROBS.log.info "SpaceManager has currently #{@list.total_entries} " +
|
74
|
+
"used blobs and #{@list.total_spaces} unused blobs in list " +
|
75
|
+
"EquiBlobsFile"
|
76
|
+
PEROBS.log.info "#{@added_spaces} were added, #{@recycled_spaces} " +
|
77
|
+
"spaces were recycled and #{@failed_requests} requests failed"
|
78
|
+
|
79
|
+
@list.close
|
80
|
+
@index.close
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def is_open?
|
85
|
+
@index.is_open?
|
86
|
+
end
|
87
|
+
|
88
|
+
def sync
|
89
|
+
@list.sync
|
90
|
+
@index.sync
|
91
|
+
end
|
92
|
+
|
93
|
+
def add_space(address, length)
|
94
|
+
if (list_entry_addr = @index.get(length))
|
95
|
+
# There is already at least one move entry for this length.
|
96
|
+
new_list_entry_addr = insert_space_in_list(address, list_entry_addr)
|
97
|
+
else
|
98
|
+
new_list_entry_addr = insert_space_in_list(address, 0)
|
99
|
+
end
|
100
|
+
@index.insert(length, new_list_entry_addr)
|
101
|
+
@added_spaces += 1
|
102
|
+
end
|
103
|
+
|
104
|
+
def has_space?(address, length)
|
105
|
+
if (list_entry_addr = @index.get(length))
|
106
|
+
while list_entry_addr > 0
|
107
|
+
blob = @list.retrieve_blob(list_entry_addr)
|
108
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
109
|
+
return true if space_address == address
|
110
|
+
list_entry_addr = next_entry_addr
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
false
|
115
|
+
end
|
116
|
+
|
117
|
+
def get_space(length)
|
118
|
+
# We use a simple exact fit strategy. All attempts to use a more
|
119
|
+
# elaborate scheme were actually less efficient. Non-exact matches
|
120
|
+
# generate new spaces for the remainder and fragment the blob file with
|
121
|
+
# lots of unusable small spaces. Most applications seem to have
|
122
|
+
# clustered their blob sizes around a number of popular sizes. So exact
|
123
|
+
# match is very efficient to implement and results in the highest
|
124
|
+
# probability that a space will be reused soon.
|
125
|
+
list_entry_addr = @index.get(length)
|
126
|
+
|
127
|
+
if list_entry_addr
|
128
|
+
blob = @list.retrieve_blob(list_entry_addr)
|
129
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
130
|
+
@list.delete_blob(list_entry_addr)
|
131
|
+
|
132
|
+
if next_entry_addr > 0
|
133
|
+
# Update the index entry for the length to point to the
|
134
|
+
# following space list entry.
|
135
|
+
@index.insert(length, next_entry_addr)
|
136
|
+
else
|
137
|
+
# The space list for this length is empty. Remove the entry
|
138
|
+
# from the index.
|
139
|
+
@index.remove(length)
|
140
|
+
end
|
141
|
+
@recycled_spaces += 1
|
142
|
+
|
143
|
+
# We return the length to remain compatible with the old SpaceTree
|
144
|
+
# API.
|
145
|
+
return [ space_address, length ]
|
146
|
+
end
|
147
|
+
|
148
|
+
@failed_requests += 1
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
def clear
|
153
|
+
@list.clear
|
154
|
+
@index.clear
|
155
|
+
reset_stats
|
156
|
+
end
|
157
|
+
|
158
|
+
def erase
|
159
|
+
@list.erase
|
160
|
+
@index.erase
|
161
|
+
end
|
162
|
+
|
163
|
+
def check(flat_file = nil)
|
164
|
+
sync
|
165
|
+
return false unless @index.check
|
166
|
+
return false unless @list.check
|
167
|
+
|
168
|
+
smallest_space = nil
|
169
|
+
largest_space = nil
|
170
|
+
total_space_bytes = 0
|
171
|
+
space_distribution = ::Hash.new(0)
|
172
|
+
|
173
|
+
@index.each do |length, list_entry_addr|
|
174
|
+
if list_entry_addr <= 0
|
175
|
+
PEROBS.log.error "list_entry_addr (#{list_entry_addr}) " +
|
176
|
+
"must be positive"
|
177
|
+
return false
|
178
|
+
end
|
179
|
+
|
180
|
+
# Detect smallest and largest space
|
181
|
+
if smallest_space.nil? || length < smallest_space
|
182
|
+
smallest_space = length
|
183
|
+
end
|
184
|
+
if largest_space.nil? || length > largest_space
|
185
|
+
largest_space = length
|
186
|
+
end
|
187
|
+
|
188
|
+
known_addresses = [ list_entry_addr ]
|
189
|
+
entries = 0
|
190
|
+
while list_entry_addr > 0
|
191
|
+
entries += 1
|
192
|
+
unless (blob = @list.retrieve_blob(list_entry_addr))
|
193
|
+
PEROBS.log.error "SpaceManager points to non-existing " +
|
194
|
+
"space list entry at address #{list_entry_addr}"
|
195
|
+
return false
|
196
|
+
end
|
197
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
198
|
+
|
199
|
+
if known_addresses.include?(next_entry_addr)
|
200
|
+
PEROBS.log.error "Space list is cyclic: "
|
201
|
+
"#{known_addresses + next_entry_addr}"
|
202
|
+
return false
|
203
|
+
end
|
204
|
+
if flat_file &&
|
205
|
+
!flat_file.has_space?(space_address, length)
|
206
|
+
PEROBS.log.error "SpaceManager has space at offset " +
|
207
|
+
"#{space_address} of size #{length} that isn't " +
|
208
|
+
"available in the FlatFile."
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
list_entry_addr = next_entry_addr
|
212
|
+
end
|
213
|
+
|
214
|
+
total_space_bytes += length * entries
|
215
|
+
space_distribution[msb(length)] += entries
|
216
|
+
end
|
217
|
+
|
218
|
+
PEROBS.log.info "SpaceManager stats: smallest: #{smallest_space}; " +
|
219
|
+
"largest: #{largest_space}; total bytes: #{total_space_bytes}; " +
|
220
|
+
"distribution: " +
|
221
|
+
"#{space_distribution.map { |l, c| "#{2 ** (l - 1)}-#{2 ** l - 1}:#{c}; " }}"
|
222
|
+
|
223
|
+
true
|
224
|
+
end
|
225
|
+
|
226
|
+
def to_a
|
227
|
+
a = []
|
228
|
+
|
229
|
+
@index.each do |length, list_entry_addr|
|
230
|
+
while list_entry_addr > 0
|
231
|
+
blob = @list.retrieve_blob(list_entry_addr)
|
232
|
+
space_address, next_entry_addr = blob.unpack('QQ')
|
233
|
+
|
234
|
+
a << [ space_address, length ]
|
235
|
+
|
236
|
+
list_entry_addr = next_entry_addr
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
a.sort { |a, b| a[0] <=> b[0] }
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
def insert_space_in_list(next_element_addr, space_address)
|
246
|
+
blob = [ next_element_addr, space_address ].pack('QQ')
|
247
|
+
@list.store_blob(blob_addr = @list.free_address, blob)
|
248
|
+
|
249
|
+
blob_addr
|
250
|
+
end
|
251
|
+
|
252
|
+
def msb(i)
|
253
|
+
return 63 if i < 0
|
254
|
+
|
255
|
+
bit = 0
|
256
|
+
while (i > 0)
|
257
|
+
bit += 1
|
258
|
+
i = i >> 1
|
259
|
+
end
|
260
|
+
|
261
|
+
bit
|
262
|
+
end
|
263
|
+
|
264
|
+
def reset_stats
|
265
|
+
@added_spaces = 0
|
266
|
+
@recycled_spaces = 0
|
267
|
+
@failed_requests = 0
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|