perobs 4.0.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/Hash.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Hash.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -37,20 +37,36 @@ module PEROBS
37
37
  # The implementation is largely a proxy around the standard Hash class. But
38
38
  # all mutating methods must be re-implemented to convert PEROBS::Objects to
39
39
  # POXReference objects and to register the object as modified with the
40
- # cache.
40
+ # cache. However, it is not designed for large data sets as it always reads
41
+ # and writes the full data set for every access (unless it is cached). For
42
+ # data sets that could have more than a few hundred entries BigHash is the
43
+ # recommended alternative.
41
44
  #
42
45
  # We explicitely don't support Hash::store() as it conflicts with
43
46
  # ObjectBase::store() method to access the store.
44
47
  class Hash < ObjectBase
45
48
 
49
+ # These methods do not mutate the Hash. They only perform read
50
+ # operations and return a new PEROBS::Hash object.
51
+ ([
52
+ :invert, :merge, :reject, :select
53
+ ] + Enumerable.instance_methods).uniq.each do |method_sym|
54
+ # Create a wrapper method that passes the call to @data.
55
+ define_method(method_sym) do |*args, &block|
56
+ # Register the read operation with the cache.
57
+ @store.cache.cache_read(self)
58
+ @store.new(PEROBS::Hash, @data.send(method_sym, *args, &block))
59
+ end
60
+ end
61
+
46
62
  # These methods do not mutate the Hash. They only perform read
47
63
  # operations.
48
64
  ([
49
65
  :==, :[], :assoc, :compare_by_identity, :compare_by_identity?, :default,
50
66
  :default_proc, :each, :each_key, :each_pair, :each_value, :empty?,
51
67
  :eql?, :fetch, :flatten, :has_key?, :has_value?, :hash, :include?,
52
- :invert, :key, :key?, :keys, :length, :member?, :merge,
53
- :pretty_print, :pretty_print_cycle, :rassoc, :reject, :select, :size,
68
+ :key, :key?, :keys, :length, :member?,
69
+ :pretty_print, :pretty_print_cycle, :rassoc, :size,
54
70
  :to_a, :to_h, :to_hash, :to_s, :value?, :values, :values_at
55
71
  ] + Enumerable.instance_methods).uniq.each do |method_sym|
56
72
  # Create a wrapper method that passes the call to @data.
@@ -61,11 +77,22 @@ module PEROBS
61
77
  end
62
78
  end
63
79
 
64
- # These methods mutate the Hash.
80
+ # These methods mutate the Hash and return self
81
+ [
82
+ :clear, :keep_if, :merge!, :rehash, :reject!, :replace, :select!, :update
83
+ ].each do |method_sym|
84
+ # Create a wrapper method that passes the call to @data.
85
+ define_method(method_sym) do |*args, &block|
86
+ # Register the write operation with the cache.
87
+ @store.cache.cache_write(self)
88
+ @data.send(method_sym, *args, &block)
89
+ myself
90
+ end
91
+ end
92
+
93
+ # These methods mutate the Hash and return basic Ruby type objects.
65
94
  [
66
- :[]=, :clear, :default=, :default_proc=, :delete, :delete_if,
67
- :initialize_copy, :keep_if, :merge!, :rehash, :reject!, :replace,
68
- :select!, :shift, :update
95
+ :delete, :delete_if, :shift
69
96
  ].each do |method_sym|
70
97
  # Create a wrapper method that passes the call to @data.
71
98
  define_method(method_sym) do |*args, &block|
@@ -79,33 +106,70 @@ module PEROBS
79
106
  # PEROBS users should never call this method or equivalents of derived
80
107
  # methods directly.
81
108
  # @param p [PEROBS::Handle] PEROBS handle
82
- # @param default [Any] The default value that is returned when no value is
83
- # stored for a specific key.
84
- def initialize(p, default = nil)
109
+ # @param default [Object] The default value that is returned when no value
110
+ # is stored for a specific key. The default must be of the
111
+ # supported type.
112
+ def initialize(p, default = nil, &block)
85
113
  super(p)
86
- @default = nil
87
- @data = {}
114
+ _check_assignment_value(default)
115
+ if block_given?
116
+ @data = ::Hash.new(&block)
117
+ else
118
+ @data = ::Hash.new(default)
119
+ end
88
120
 
89
121
  # Ensure that the newly created object will be pushed into the database.
90
122
  @store.cache.cache_write(self)
91
123
  end
92
124
 
125
+ # Proxy for assignment method.
126
+ def []=(key, value)
127
+ unless key.is_a?(String) || key.respond_to?(:is_poxreference?)
128
+ raise ArgumentError, "PEROBS::Hash[] key must be a String or " +
129
+ "a PEROBS object but is a #{key.class}"
130
+ end
131
+ _check_assignment_value(value)
132
+ @store.cache.cache_write(self)
133
+ @data[key] = value
134
+ end
135
+
136
+ # Proxy for default= method.
137
+ def default=(value)
138
+ _check_assignment_value(value)
139
+ @data.default=(value)
140
+ end
141
+
93
142
  # Return a list of all object IDs of all persistend objects that this Hash
94
143
  # is referencing.
95
144
  # @return [Array of Integer] IDs of referenced objects
96
145
  def _referenced_object_ids
97
- @data.each_value.select { |v| v && v.respond_to?(:is_poxreference?) }.
98
- map { |o| o.id }
146
+ ids = []
147
+ @data.each do |k, v|
148
+ if k && k.respond_to?(:is_poxreference?)
149
+ ids << k.id
150
+ end
151
+ if v && v.respond_to?(:is_poxreference?)
152
+ ids << v.id
153
+ end
154
+ end
155
+
156
+ ids
99
157
  end
100
158
 
101
159
  # This method should only be used during store repair operations. It will
102
160
  # delete all referenced to the given object ID.
103
161
  # @param id [Integer] targeted object ID
104
162
  def _delete_reference_to_id(id)
163
+ original_length = @data.length
164
+
105
165
  @data.delete_if do |k, v|
106
- v && v.respond_to?(:is_poxreference?) && v.id == id
166
+ (k && k.respond_to?(:is_poxreference?) && k.id == id) ||
167
+ (v && v.respond_to?(:is_poxreference?) && v.id == id)
168
+ end
169
+
170
+ if @data.length != original_length
171
+ @store.cache.cache_write(self)
107
172
  end
108
- @store.cache.cache_write(self)
109
173
  end
110
174
 
111
175
  # Restore the persistent data from a single data structure.
@@ -114,8 +178,18 @@ module PEROBS
114
178
  # @private
115
179
  def _deserialize(data)
116
180
  @data = {}
117
- data.each { |k, v| @data[k] = v.is_a?(POReference) ?
118
- POXReference.new(@store, v.id) : v }
181
+
182
+ data.each do |k, v|
183
+ # References to other PEROBS Objects are marshalled with our own
184
+ # format. If we detect such a marshalled String we convert it into a
185
+ # POXReference object.
186
+ if (match = /^#<PEROBS::POReference id=([0-9]+)>$/.match(k))
187
+ k = POXReference.new(@store, match[1].to_i)
188
+ end
189
+ dv = v.is_a?(POReference) ? POXReference.new(@store, v.id) : v
190
+ @data[k] = dv
191
+ end
192
+
119
193
  @data
120
194
  end
121
195
 
@@ -136,26 +210,46 @@ module PEROBS
136
210
  data = {}
137
211
 
138
212
  @data.each do |k, v|
139
- if v.respond_to?(:is_poxreference?)
140
- data[k] = POReference.new(v.id)
141
- else
142
- # Outside of the PEROBS library all PEROBS::ObjectBase derived
143
- # objects should not be used directly. The library only exposes them
144
- # via POXReference proxy objects.
145
- if v.is_a?(ObjectBase)
146
- PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
147
- "It is stored in a PEROBS::Hash with key #{k.inspect}. " +
148
- 'Have you used self() instead of myself() to ' +
149
- "get the reference of this PEROBS object?\n" +
150
- v.inspect
151
- end
152
- data[k] = v
213
+ if k.respond_to?(:is_poxreference?)
214
+ # JSON only supports Strings as hash keys. Since JSON is the default
215
+ # internal storage format in the database, we have to marshall
216
+ # PEROBS::Object references ourselves.
217
+ k = "#<PEROBS::POReference id=#{k.id}>"
218
+ elsif k[0..24] == '#<PEROBS::POReference id='
219
+ # This could obviously result in conflicts with 'normal' String hash
220
+ # keys. This is extremely unlikely, but we better catch this case
221
+ # before it causes hard to debug trouble.
222
+ raise ArgumentError, "Hash key #{k} conflicts with PEROBS " +
223
+ "internal representation of marshalled hash keys!"
153
224
  end
225
+ data[k] = serialize_helper(v)
154
226
  end
155
227
 
156
228
  data
157
229
  end
158
230
 
231
+ def serialize_helper(v)
232
+ if v.respond_to?(:is_poxreference?)
233
+ # References to other PEROBS objects (POXReference) are stored as
234
+ # POReference in the database.
235
+ return POReference.new(v.id)
236
+ else
237
+ # Outside of the PEROBS library all PEROBS::ObjectBase derived
238
+ # objects should not be used directly. The library only exposes them
239
+ # via POXReference proxy objects.
240
+ if v.is_a?(ObjectBase)
241
+ PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
242
+ "It is stored in a PEROBS::Hash. " +
243
+ 'Have you used self() instead of myself() to ' +
244
+ "get the reference of this PEROBS object?\n" +
245
+ v.inspect
246
+ end
247
+
248
+ # All other objects are serialized by their native methods.
249
+ return v
250
+ end
251
+ end
252
+
159
253
  end
160
254
 
161
255
  end
@@ -0,0 +1,144 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDList.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/IDListPageFile'
29
+ require 'perobs/IDListPageRecord'
30
+
31
+ module PEROBS
32
+
33
+ # This class stores a list of 64 bit values. Values can be added to the list
34
+ # and the presence of a certain value can be checked. It can hold up to 2^64
35
+ # values. It tries to keep values in memory but can store them in a file if
36
+ # needed. A threshold for the in-memory values can be set in the
37
+ # constructor. The stored values are grouped in pages. Each page can hold up
38
+ # to page_size entries.
39
+ class IDList
40
+
41
+ # Create a new IDList object. The data that can't be kept in memory will
42
+ # be stored in the specified directory under the given name.
43
+ # @param dir [String] Path of the directory
44
+ # @param name [String] Name of the file
45
+ # @param max_in_memory [Integer] Specifies the maximum number of values
46
+ # that will be kept in memory. If the list is larger, values will
47
+ # be cached in the specified file.
48
+ # @param page_size [Integer] The number of values per page. The default
49
+ # value is 32 which was found the best performing config in tests.
50
+ def initialize(dir, name, max_in_memory, page_size = 32)
51
+ # The page_file manages the pages that store the values.
52
+ @page_file = IDListPageFile.new(self, dir, name,
53
+ max_in_memory, page_size)
54
+ clear
55
+ end
56
+
57
+ # Insert a new value into the list.
58
+ # @param id [Integer] The value to add
59
+ def insert(id)
60
+ # Find the index of the page that should hold ID.
61
+ index = @page_records.bsearch_index { |pr| pr.max_id >= id }
62
+ # Get the corresponding IDListPageRecord object.
63
+ page = @page_records[index]
64
+
65
+ # In case the page is already full we'll have to create a new page.
66
+ # There is no guarantee that a split will yield an page with space as we
67
+ # split by ID range, not by distributing the values evenly across the
68
+ # two pages.
69
+ while page.is_full?
70
+ new_page = page.split
71
+ # Store the newly created page into the page_records list.
72
+ @page_records.insert(index + 1, new_page)
73
+ if id >= new_page.min_id
74
+ # We need to insert the ID into the newly created page. Adjust index
75
+ # and page reference accordingly.
76
+ index += 1
77
+ page = new_page
78
+ end
79
+ end
80
+
81
+ # Insert the ID into the page.
82
+ page.insert(id)
83
+ end
84
+
85
+ # Check if a given value is already stored in the list.
86
+ # @param id [Integer] The value to check for
87
+ def include?(id)
88
+ @page_records.bsearch { |pr| pr.max_id >= id }.include?(id)
89
+ end
90
+
91
+ # Clear the list and empty the filesystem cache file.
92
+ def clear
93
+ @page_file.clear
94
+ @page_records = [ IDListPageRecord.new(@page_file, 0, 2 ** 64) ]
95
+ end
96
+
97
+ # Erase the list including the filesystem cache file. The IDList is no
98
+ # longer usable after this call but the cache file is removed from the
99
+ # filesystem.
100
+ def erase
101
+ @page_file.erase
102
+ @page_records = nil
103
+ end
104
+
105
+ # Perform some consistency checks on the internal data structures. Raises
106
+ # a RuntimeError in case a problem is found.
107
+ def check
108
+ last_max = -1
109
+ unless (min_id = @page_records.first.min_id) == 0
110
+ raise RuntimeError, "min_id of first record (#{min_id}) " +
111
+ "must be 0."
112
+ end
113
+
114
+ @page_records.each do |pr|
115
+ unless pr.min_id == last_max + 1
116
+ raise RuntimeError, "max_id of previous record (#{last_max}) " +
117
+ "must be exactly 1 smaller than current record (#{pr.min_id})."
118
+ end
119
+ last_max = pr.max_id
120
+ pr.check
121
+ end
122
+
123
+ unless last_max == 2 ** 64
124
+ raise RuntimeError, "max_id of last records " +
125
+ "(#{@page_records.last.max_id}) must be #{2 ** 64})."
126
+ end
127
+ end
128
+
129
+ def to_a
130
+ a = []
131
+ @page_records.each { |pr| a += pr.values }
132
+ a
133
+ end
134
+
135
+ # Print a human readable form of the tree that stores the list. This is
136
+ # only meant for debugging purposes and does not scale for larger trees.
137
+ def to_s
138
+ "\n" + @root.to_s
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+
@@ -0,0 +1,107 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPage.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ class IDListPage
31
+
32
+ attr_reader :uid, :values
33
+ attr_accessor :record
34
+
35
+ def initialize(page_file, record, uid, values = [])
36
+ @page_file = page_file
37
+ @record = record
38
+ @uid = uid
39
+ @values = values
40
+ @record.page_entries = @values.length
41
+ end
42
+
43
+ def IDListPage::load(page_file, uid, ref)
44
+ page_file.load(uid, ref)
45
+ end
46
+
47
+ def is_full?
48
+ @values.length >= @page_file.page_size
49
+ end
50
+
51
+ def length
52
+ @values.length
53
+ end
54
+
55
+ def save
56
+ @page_file.save_page(self)
57
+ end
58
+
59
+ def insert(id)
60
+ if is_full?
61
+ raise ArgumentError, "IDListPage is already full"
62
+ end
63
+ index = @values.bsearch_index { |v| v >= id } || @values.length
64
+
65
+ # If the value isn't stored already, insert it.
66
+ if @values[index] != id
67
+ @values.insert(index, id)
68
+ @record.page_entries = @values.length
69
+ @page_file.mark_page_as_modified(self)
70
+ end
71
+ end
72
+
73
+ def include?(id)
74
+ !(v = @values.bsearch { |v| v >= id }).nil? && v == id
75
+ end
76
+
77
+ def delete(max_id)
78
+ a = []
79
+ @values.delete_if { |v| v > max_id ? a << v : false }
80
+
81
+ unless a.empty?
82
+ @record.page_entries = @values.length
83
+ @page_file.mark_page_as_modified(self)
84
+ end
85
+
86
+ a
87
+ end
88
+
89
+ def check
90
+ last_value = nil
91
+ @values.each_with_index do |v, i|
92
+ if last_value && last_value >= v
93
+ raise RuntimeError, "The values #{last_value} and #{v} must be " +
94
+ "strictly ascending: #{@values.inspect}"
95
+ end
96
+ last_value = v
97
+ end
98
+ end
99
+
100
+ def to_s
101
+ "[ #{@values.join(', ')} ]"
102
+ end
103
+
104
+ end
105
+
106
+ end
107
+
@@ -0,0 +1,180 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPageFile.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/IDListPage'
29
+ require 'perobs/IDListPageRecord'
30
+ require 'perobs/Log'
31
+ require 'perobs/PersistentObjectCache'
32
+
33
+ module PEROBS
34
+
35
+ # The IDListPageFile class provides filesystem based cache for the
36
+ # IDListPage objects. The IDListRecord objects only hold the index of the
37
+ # page in this cache. This allows the pages to be garbage collected and
38
+ # swapped to the file. If accessed, the pages will be swaped in again. While
39
+ # this process is similar to the demand paging of the OS it has absolutely
40
+ # nothing to do with it.
41
+ class IDListPageFile
42
+
43
+ attr_reader :page_size, :pages
44
+
45
+ # Create a new IDListPageFile object that uses the given file in the given
46
+ # directory as cache file.
47
+ # @param list [IDList] The IDList object that caches pages here
48
+ # @param dir [String] An existing directory
49
+ # @param name [String] A file name (without path)
50
+ # @param max_in_memory [Integer] Maximum number of pages to keep in memory
51
+ # @param page_size [Integer] The number of values in each page
52
+ def initialize(list, dir, name, max_in_memory, page_size)
53
+ @list = list
54
+ @file_name = File.join(dir, name + '.cache')
55
+ @page_size = page_size
56
+ open
57
+ @pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
58
+ IDListPage, self)
59
+ @page_counter = 0
60
+ end
61
+
62
+ # Load the IDListPage from the cache file.
63
+ # @param page_idx [Integer] The page index in the page file
64
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
65
+ # @return [IDListPage] The loaded values
66
+ def load(page_idx, record)
67
+ # The IDListPageRecord will tell us the actual number of values stored
68
+ # in this page.
69
+ values = []
70
+ unless (entries = record.page_entries) == 0
71
+ begin
72
+ @f.seek(page_idx * @page_size * 8)
73
+ values = @f.read(entries * 8).unpack("Q#{entries}")
74
+ rescue IOError => e
75
+ PEROBS.log.fatal "Cannot read cache file #{@file_name}: #{e.message}"
76
+ end
77
+ end
78
+
79
+ # Create the IDListPage object with the given values.
80
+ p = IDListPage.new(self, record, page_idx, values)
81
+ @pages.insert(p, false)
82
+
83
+ p
84
+ end
85
+
86
+ # Return the number of registered pages.
87
+ def page_count
88
+ @page_counter
89
+ end
90
+
91
+ # Create a new IDListPage and register it.
92
+ # @param record [IDListPageRecord] The corresponding record.
93
+ # @param values [Array of Integer] The values stored in the page
94
+ # @return [IDListPage]
95
+ def new_page(record, values = [])
96
+ idx = @page_counter
97
+ @page_counter += 1
98
+ mark_page_as_modified(IDListPage.new(self, record, idx, values))
99
+ idx
100
+ end
101
+
102
+ # Return the IDListPage object with the given index.
103
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
104
+ # @return [IDListPage] The page corresponding to the index.
105
+ def page(record)
106
+ p = @pages.get(record.page_idx, record) || load(record.page_idx, record)
107
+ unless p.uid == record.page_idx
108
+ raise RuntimeError, "Page reference mismatch. Record " +
109
+ "#{record.page_idx} points to page #{p.uid}"
110
+ end
111
+
112
+ p
113
+ end
114
+
115
+ # Mark a page as modified. This means it has to be written into the cache
116
+ # before it is removed from memory.
117
+ # @param p [IDListPage] page reference
118
+ def mark_page_as_modified(p)
119
+ @pages.insert(p)
120
+ @pages.flush
121
+ end
122
+
123
+ # Clear all pages, erase the cache and re-open it again.
124
+ def clear
125
+ @pages.clear
126
+ @page_counter = 0
127
+ begin
128
+ @f.truncate(0)
129
+ rescue IOError => e
130
+ raise RuntimeError, "Cannote truncate cache file #{@file_name}: " +
131
+ e.message
132
+ end
133
+ end
134
+
135
+ # Discard all pages and erase the cache file.
136
+ def erase
137
+ @pages.clear
138
+ @page_counter = 0
139
+ close
140
+ end
141
+
142
+ # Save the given IDListPage into the cache file.
143
+ # @param p [IDListPage] page to store
144
+ def save_page(p)
145
+ if p.record.page_entries != p.values.length
146
+ raise RuntimeError, "page_entries mismatch for node #{p.uid}"
147
+ end
148
+ begin
149
+ @f.seek(p.uid * @page_size * 8)
150
+ @f.write(p.values.pack('Q*'))
151
+ rescue IOError => e
152
+ PEROBS.log.fatal "Cannot write cache file #{@file_name}: #{e.message}"
153
+ end
154
+ end
155
+
156
+ private
157
+
158
+ def open
159
+ begin
160
+ # Create a new file by writing a new header.
161
+ @f = File.open(@file_name, 'wb+')
162
+ rescue IOError => e
163
+ PEROBS.log.fatal "Cannot open cache file #{@file_name}: #{e.message}"
164
+ end
165
+ end
166
+
167
+ def close
168
+ begin
169
+ @f.close
170
+ File.delete(@file_name) if File.exist?(@file_name)
171
+ rescue IOError => e
172
+ PEROBS.log.fatal "Cannot erase cache file #{@file_name}: #{e.message}"
173
+ end
174
+ @f = nil
175
+ end
176
+
177
+ end
178
+
179
+ end
180
+