perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/Hash.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Hash.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -37,20 +37,36 @@ module PEROBS
37
37
  # The implementation is largely a proxy around the standard Hash class. But
38
38
  # all mutating methods must be re-implemented to convert PEROBS::Objects to
39
39
  # POXReference objects and to register the object as modified with the
40
- # cache.
40
+ # cache. However, it is not designed for large data sets as it always reads
41
+ # and writes the full data set for every access (unless it is cached). For
42
+ # data sets that could have more than a few hundred entries BigHash is the
43
+ # recommended alternative.
41
44
  #
42
45
  # We explicitely don't support Hash::store() as it conflicts with
43
46
  # ObjectBase::store() method to access the store.
44
47
  class Hash < ObjectBase
45
48
 
49
+ # These methods do not mutate the Hash. They only perform read
50
+ # operations and return a new PEROBS::Hash object.
51
+ ([
52
+ :invert, :merge, :reject, :select
53
+ ] + Enumerable.instance_methods).uniq.each do |method_sym|
54
+ # Create a wrapper method that passes the call to @data.
55
+ define_method(method_sym) do |*args, &block|
56
+ # Register the read operation with the cache.
57
+ @store.cache.cache_read(self)
58
+ @store.new(PEROBS::Hash, @data.send(method_sym, *args, &block))
59
+ end
60
+ end
61
+
46
62
  # These methods do not mutate the Hash. They only perform read
47
63
  # operations.
48
64
  ([
49
65
  :==, :[], :assoc, :compare_by_identity, :compare_by_identity?, :default,
50
66
  :default_proc, :each, :each_key, :each_pair, :each_value, :empty?,
51
67
  :eql?, :fetch, :flatten, :has_key?, :has_value?, :hash, :include?,
52
- :invert, :key, :key?, :keys, :length, :member?, :merge,
53
- :pretty_print, :pretty_print_cycle, :rassoc, :reject, :select, :size,
68
+ :key, :key?, :keys, :length, :member?,
69
+ :pretty_print, :pretty_print_cycle, :rassoc, :size,
54
70
  :to_a, :to_h, :to_hash, :to_s, :value?, :values, :values_at
55
71
  ] + Enumerable.instance_methods).uniq.each do |method_sym|
56
72
  # Create a wrapper method that passes the call to @data.
@@ -61,11 +77,22 @@ module PEROBS
61
77
  end
62
78
  end
63
79
 
64
- # These methods mutate the Hash.
80
+ # These methods mutate the Hash and return self
81
+ [
82
+ :clear, :keep_if, :merge!, :rehash, :reject!, :replace, :select!, :update
83
+ ].each do |method_sym|
84
+ # Create a wrapper method that passes the call to @data.
85
+ define_method(method_sym) do |*args, &block|
86
+ # Register the write operation with the cache.
87
+ @store.cache.cache_write(self)
88
+ @data.send(method_sym, *args, &block)
89
+ myself
90
+ end
91
+ end
92
+
93
+ # These methods mutate the Hash and return basic Ruby type objects.
65
94
  [
66
- :[]=, :clear, :default=, :default_proc=, :delete, :delete_if,
67
- :initialize_copy, :keep_if, :merge!, :rehash, :reject!, :replace,
68
- :select!, :shift, :update
95
+ :delete, :delete_if, :shift
69
96
  ].each do |method_sym|
70
97
  # Create a wrapper method that passes the call to @data.
71
98
  define_method(method_sym) do |*args, &block|
@@ -79,33 +106,70 @@ module PEROBS
79
106
  # PEROBS users should never call this method or equivalents of derived
80
107
  # methods directly.
81
108
  # @param p [PEROBS::Handle] PEROBS handle
82
- # @param default [Any] The default value that is returned when no value is
83
- # stored for a specific key.
84
- def initialize(p, default = nil)
109
+ # @param default [Object] The default value that is returned when no value
110
+ # is stored for a specific key. The default must be of the
111
+ # supported type.
112
+ def initialize(p, default = nil, &block)
85
113
  super(p)
86
- @default = nil
87
- @data = {}
114
+ _check_assignment_value(default)
115
+ if block_given?
116
+ @data = ::Hash.new(&block)
117
+ else
118
+ @data = ::Hash.new(default)
119
+ end
88
120
 
89
121
  # Ensure that the newly created object will be pushed into the database.
90
122
  @store.cache.cache_write(self)
91
123
  end
92
124
 
125
+ # Proxy for assignment method.
126
+ def []=(key, value)
127
+ unless key.is_a?(String) || key.respond_to?(:is_poxreference?)
128
+ raise ArgumentError, "PEROBS::Hash[] key must be a String or " +
129
+ "a PEROBS object but is a #{key.class}"
130
+ end
131
+ _check_assignment_value(value)
132
+ @store.cache.cache_write(self)
133
+ @data[key] = value
134
+ end
135
+
136
+ # Proxy for default= method.
137
+ def default=(value)
138
+ _check_assignment_value(value)
139
+ @data.default=(value)
140
+ end
141
+
93
142
  # Return a list of all object IDs of all persistend objects that this Hash
94
143
  # is referencing.
95
144
  # @return [Array of Integer] IDs of referenced objects
96
145
  def _referenced_object_ids
97
- @data.each_value.select { |v| v && v.respond_to?(:is_poxreference?) }.
98
- map { |o| o.id }
146
+ ids = []
147
+ @data.each do |k, v|
148
+ if k && k.respond_to?(:is_poxreference?)
149
+ ids << k.id
150
+ end
151
+ if v && v.respond_to?(:is_poxreference?)
152
+ ids << v.id
153
+ end
154
+ end
155
+
156
+ ids
99
157
  end
100
158
 
101
159
  # This method should only be used during store repair operations. It will
102
160
  # delete all referenced to the given object ID.
103
161
  # @param id [Integer] targeted object ID
104
162
  def _delete_reference_to_id(id)
163
+ original_length = @data.length
164
+
105
165
  @data.delete_if do |k, v|
106
- v && v.respond_to?(:is_poxreference?) && v.id == id
166
+ (k && k.respond_to?(:is_poxreference?) && k.id == id) ||
167
+ (v && v.respond_to?(:is_poxreference?) && v.id == id)
168
+ end
169
+
170
+ if @data.length != original_length
171
+ @store.cache.cache_write(self)
107
172
  end
108
- @store.cache.cache_write(self)
109
173
  end
110
174
 
111
175
  # Restore the persistent data from a single data structure.
@@ -114,8 +178,18 @@ module PEROBS
114
178
  # @private
115
179
  def _deserialize(data)
116
180
  @data = {}
117
- data.each { |k, v| @data[k] = v.is_a?(POReference) ?
118
- POXReference.new(@store, v.id) : v }
181
+
182
+ data.each do |k, v|
183
+ # References to other PEROBS Objects are marshalled with our own
184
+ # format. If we detect such a marshalled String we convert it into a
185
+ # POXReference object.
186
+ if (match = /^#<PEROBS::POReference id=([0-9]+)>$/.match(k))
187
+ k = POXReference.new(@store, match[1].to_i)
188
+ end
189
+ dv = v.is_a?(POReference) ? POXReference.new(@store, v.id) : v
190
+ @data[k] = dv
191
+ end
192
+
119
193
  @data
120
194
  end
121
195
 
@@ -136,26 +210,46 @@ module PEROBS
136
210
  data = {}
137
211
 
138
212
  @data.each do |k, v|
139
- if v.respond_to?(:is_poxreference?)
140
- data[k] = POReference.new(v.id)
141
- else
142
- # Outside of the PEROBS library all PEROBS::ObjectBase derived
143
- # objects should not be used directly. The library only exposes them
144
- # via POXReference proxy objects.
145
- if v.is_a?(ObjectBase)
146
- PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
147
- "It is stored in a PEROBS::Hash with key #{k.inspect}. " +
148
- 'Have you used self() instead of myself() to ' +
149
- "get the reference of this PEROBS object?\n" +
150
- v.inspect
151
- end
152
- data[k] = v
213
+ if k.respond_to?(:is_poxreference?)
214
+ # JSON only supports Strings as hash keys. Since JSON is the default
215
+ # internal storage format in the database, we have to marshall
216
+ # PEROBS::Object references ourselves.
217
+ k = "#<PEROBS::POReference id=#{k.id}>"
218
+ elsif k[0..24] == '#<PEROBS::POReference id='
219
+ # This could obviously result in conflicts with 'normal' String hash
220
+ # keys. This is extremely unlikely, but we better catch this case
221
+ # before it causes hard to debug trouble.
222
+ raise ArgumentError, "Hash key #{k} conflicts with PEROBS " +
223
+ "internal representation of marshalled hash keys!"
153
224
  end
225
+ data[k] = serialize_helper(v)
154
226
  end
155
227
 
156
228
  data
157
229
  end
158
230
 
231
+ def serialize_helper(v)
232
+ if v.respond_to?(:is_poxreference?)
233
+ # References to other PEROBS objects (POXReference) are stored as
234
+ # POReference in the database.
235
+ return POReference.new(v.id)
236
+ else
237
+ # Outside of the PEROBS library all PEROBS::ObjectBase derived
238
+ # objects should not be used directly. The library only exposes them
239
+ # via POXReference proxy objects.
240
+ if v.is_a?(ObjectBase)
241
+ PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
242
+ "It is stored in a PEROBS::Hash. " +
243
+ 'Have you used self() instead of myself() to ' +
244
+ "get the reference of this PEROBS object?\n" +
245
+ v.inspect
246
+ end
247
+
248
+ # All other objects are serialized by their native methods.
249
+ return v
250
+ end
251
+ end
252
+
159
253
  end
160
254
 
161
255
  end
@@ -0,0 +1,144 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDList.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/IDListPageFile'
29
+ require 'perobs/IDListPageRecord'
30
+
31
+ module PEROBS
32
+
33
+ # This class stores a list of 64 bit values. Values can be added to the list
34
+ # and the presence of a certain value can be checked. It can hold up to 2^64
35
+ # values. It tries to keep values in memory but can store them in a file if
36
+ # needed. A threshold for the in-memory values can be set in the
37
+ # constructor. The stored values are grouped in pages. Each page can hold up
38
+ # to page_size entries.
39
+ class IDList
40
+
41
+ # Create a new IDList object. The data that can't be kept in memory will
42
+ # be stored in the specified directory under the given name.
43
+ # @param dir [String] Path of the directory
44
+ # @param name [String] Name of the file
45
+ # @param max_in_memory [Integer] Specifies the maximum number of values
46
+ # that will be kept in memory. If the list is larger, values will
47
+ # be cached in the specified file.
48
+ # @param page_size [Integer] The number of values per page. The default
49
+ # value is 32 which was found the best performing config in tests.
50
+ def initialize(dir, name, max_in_memory, page_size = 32)
51
+ # The page_file manages the pages that store the values.
52
+ @page_file = IDListPageFile.new(self, dir, name,
53
+ max_in_memory, page_size)
54
+ clear
55
+ end
56
+
57
+ # Insert a new value into the list.
58
+ # @param id [Integer] The value to add
59
+ def insert(id)
60
+ # Find the index of the page that should hold ID.
61
+ index = @page_records.bsearch_index { |pr| pr.max_id >= id }
62
+ # Get the corresponding IDListPageRecord object.
63
+ page = @page_records[index]
64
+
65
+ # In case the page is already full we'll have to create a new page.
66
+ # There is no guarantee that a split will yield an page with space as we
67
+ # split by ID range, not by distributing the values evenly across the
68
+ # two pages.
69
+ while page.is_full?
70
+ new_page = page.split
71
+ # Store the newly created page into the page_records list.
72
+ @page_records.insert(index + 1, new_page)
73
+ if id >= new_page.min_id
74
+ # We need to insert the ID into the newly created page. Adjust index
75
+ # and page reference accordingly.
76
+ index += 1
77
+ page = new_page
78
+ end
79
+ end
80
+
81
+ # Insert the ID into the page.
82
+ page.insert(id)
83
+ end
84
+
85
+ # Check if a given value is already stored in the list.
86
+ # @param id [Integer] The value to check for
87
+ def include?(id)
88
+ @page_records.bsearch { |pr| pr.max_id >= id }.include?(id)
89
+ end
90
+
91
+ # Clear the list and empty the filesystem cache file.
92
+ def clear
93
+ @page_file.clear
94
+ @page_records = [ IDListPageRecord.new(@page_file, 0, 2 ** 64) ]
95
+ end
96
+
97
+ # Erase the list including the filesystem cache file. The IDList is no
98
+ # longer usable after this call but the cache file is removed from the
99
+ # filesystem.
100
+ def erase
101
+ @page_file.erase
102
+ @page_records = nil
103
+ end
104
+
105
+ # Perform some consistency checks on the internal data structures. Raises
106
+ # a RuntimeError in case a problem is found.
107
+ def check
108
+ last_max = -1
109
+ unless (min_id = @page_records.first.min_id) == 0
110
+ raise RuntimeError, "min_id of first record (#{min_id}) " +
111
+ "must be 0."
112
+ end
113
+
114
+ @page_records.each do |pr|
115
+ unless pr.min_id == last_max + 1
116
+ raise RuntimeError, "max_id of previous record (#{last_max}) " +
117
+ "must be exactly 1 smaller than current record (#{pr.min_id})."
118
+ end
119
+ last_max = pr.max_id
120
+ pr.check
121
+ end
122
+
123
+ unless last_max == 2 ** 64
124
+ raise RuntimeError, "max_id of last records " +
125
+ "(#{@page_records.last.max_id}) must be #{2 ** 64})."
126
+ end
127
+ end
128
+
129
+ def to_a
130
+ a = []
131
+ @page_records.each { |pr| a += pr.values }
132
+ a
133
+ end
134
+
135
+ # Print a human readable form of the tree that stores the list. This is
136
+ # only meant for debugging purposes and does not scale for larger trees.
137
+ def to_s
138
+ "\n" + @root.to_s
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+
@@ -0,0 +1,107 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPage.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ class IDListPage
31
+
32
+ attr_reader :uid, :values
33
+ attr_accessor :record
34
+
35
+ def initialize(page_file, record, uid, values = [])
36
+ @page_file = page_file
37
+ @record = record
38
+ @uid = uid
39
+ @values = values
40
+ @record.page_entries = @values.length
41
+ end
42
+
43
+ def IDListPage::load(page_file, uid, ref)
44
+ page_file.load(uid, ref)
45
+ end
46
+
47
+ def is_full?
48
+ @values.length >= @page_file.page_size
49
+ end
50
+
51
+ def length
52
+ @values.length
53
+ end
54
+
55
+ def save
56
+ @page_file.save_page(self)
57
+ end
58
+
59
+ def insert(id)
60
+ if is_full?
61
+ raise ArgumentError, "IDListPage is already full"
62
+ end
63
+ index = @values.bsearch_index { |v| v >= id } || @values.length
64
+
65
+ # If the value isn't stored already, insert it.
66
+ if @values[index] != id
67
+ @values.insert(index, id)
68
+ @record.page_entries = @values.length
69
+ @page_file.mark_page_as_modified(self)
70
+ end
71
+ end
72
+
73
+ def include?(id)
74
+ !(v = @values.bsearch { |v| v >= id }).nil? && v == id
75
+ end
76
+
77
+ def delete(max_id)
78
+ a = []
79
+ @values.delete_if { |v| v > max_id ? a << v : false }
80
+
81
+ unless a.empty?
82
+ @record.page_entries = @values.length
83
+ @page_file.mark_page_as_modified(self)
84
+ end
85
+
86
+ a
87
+ end
88
+
89
+ def check
90
+ last_value = nil
91
+ @values.each_with_index do |v, i|
92
+ if last_value && last_value >= v
93
+ raise RuntimeError, "The values #{last_value} and #{v} must be " +
94
+ "strictly ascending: #{@values.inspect}"
95
+ end
96
+ last_value = v
97
+ end
98
+ end
99
+
100
+ def to_s
101
+ "[ #{@values.join(', ')} ]"
102
+ end
103
+
104
+ end
105
+
106
+ end
107
+
@@ -0,0 +1,180 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPageFile.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/IDListPage'
29
+ require 'perobs/IDListPageRecord'
30
+ require 'perobs/Log'
31
+ require 'perobs/PersistentObjectCache'
32
+
33
+ module PEROBS
34
+
35
+ # The IDListPageFile class provides filesystem based cache for the
36
+ # IDListPage objects. The IDListRecord objects only hold the index of the
37
+ # page in this cache. This allows the pages to be garbage collected and
38
+ # swapped to the file. If accessed, the pages will be swaped in again. While
39
+ # this process is similar to the demand paging of the OS it has absolutely
40
+ # nothing to do with it.
41
+ class IDListPageFile
42
+
43
+ attr_reader :page_size, :pages
44
+
45
+ # Create a new IDListPageFile object that uses the given file in the given
46
+ # directory as cache file.
47
+ # @param list [IDList] The IDList object that caches pages here
48
+ # @param dir [String] An existing directory
49
+ # @param name [String] A file name (without path)
50
+ # @param max_in_memory [Integer] Maximum number of pages to keep in memory
51
+ # @param page_size [Integer] The number of values in each page
52
+ def initialize(list, dir, name, max_in_memory, page_size)
53
+ @list = list
54
+ @file_name = File.join(dir, name + '.cache')
55
+ @page_size = page_size
56
+ open
57
+ @pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
58
+ IDListPage, self)
59
+ @page_counter = 0
60
+ end
61
+
62
+ # Load the IDListPage from the cache file.
63
+ # @param page_idx [Integer] The page index in the page file
64
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
65
+ # @return [IDListPage] The loaded values
66
+ def load(page_idx, record)
67
+ # The IDListPageRecord will tell us the actual number of values stored
68
+ # in this page.
69
+ values = []
70
+ unless (entries = record.page_entries) == 0
71
+ begin
72
+ @f.seek(page_idx * @page_size * 8)
73
+ values = @f.read(entries * 8).unpack("Q#{entries}")
74
+ rescue IOError => e
75
+ PEROBS.log.fatal "Cannot read cache file #{@file_name}: #{e.message}"
76
+ end
77
+ end
78
+
79
+ # Create the IDListPage object with the given values.
80
+ p = IDListPage.new(self, record, page_idx, values)
81
+ @pages.insert(p, false)
82
+
83
+ p
84
+ end
85
+
86
+ # Return the number of registered pages.
87
+ def page_count
88
+ @page_counter
89
+ end
90
+
91
+ # Create a new IDListPage and register it.
92
+ # @param record [IDListPageRecord] The corresponding record.
93
+ # @param values [Array of Integer] The values stored in the page
94
+ # @return [IDListPage]
95
+ def new_page(record, values = [])
96
+ idx = @page_counter
97
+ @page_counter += 1
98
+ mark_page_as_modified(IDListPage.new(self, record, idx, values))
99
+ idx
100
+ end
101
+
102
+ # Return the IDListPage object with the given index.
103
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
104
+ # @return [IDListPage] The page corresponding to the index.
105
+ def page(record)
106
+ p = @pages.get(record.page_idx, record) || load(record.page_idx, record)
107
+ unless p.uid == record.page_idx
108
+ raise RuntimeError, "Page reference mismatch. Record " +
109
+ "#{record.page_idx} points to page #{p.uid}"
110
+ end
111
+
112
+ p
113
+ end
114
+
115
+ # Mark a page as modified. This means it has to be written into the cache
116
+ # before it is removed from memory.
117
+ # @param p [IDListPage] page reference
118
+ def mark_page_as_modified(p)
119
+ @pages.insert(p)
120
+ @pages.flush
121
+ end
122
+
123
+ # Clear all pages, erase the cache and re-open it again.
124
+ def clear
125
+ @pages.clear
126
+ @page_counter = 0
127
+ begin
128
+ @f.truncate(0)
129
+ rescue IOError => e
130
+ raise RuntimeError, "Cannote truncate cache file #{@file_name}: " +
131
+ e.message
132
+ end
133
+ end
134
+
135
+ # Discard all pages and erase the cache file.
136
+ def erase
137
+ @pages.clear
138
+ @page_counter = 0
139
+ close
140
+ end
141
+
142
+ # Save the given IDListPage into the cache file.
143
+ # @param p [IDListPage] page to store
144
+ def save_page(p)
145
+ if p.record.page_entries != p.values.length
146
+ raise RuntimeError, "page_entries mismatch for node #{p.uid}"
147
+ end
148
+ begin
149
+ @f.seek(p.uid * @page_size * 8)
150
+ @f.write(p.values.pack('Q*'))
151
+ rescue IOError => e
152
+ PEROBS.log.fatal "Cannot write cache file #{@file_name}: #{e.message}"
153
+ end
154
+ end
155
+
156
+ private
157
+
158
+ def open
159
+ begin
160
+ # Create a new file by writing a new header.
161
+ @f = File.open(@file_name, 'wb+')
162
+ rescue IOError => e
163
+ PEROBS.log.fatal "Cannot open cache file #{@file_name}: #{e.message}"
164
+ end
165
+ end
166
+
167
+ def close
168
+ begin
169
+ @f.close
170
+ File.delete(@file_name) if File.exist?(@file_name)
171
+ rescue IOError => e
172
+ PEROBS.log.fatal "Cannot erase cache file #{@file_name}: #{e.message}"
173
+ end
174
+ @f = nil
175
+ end
176
+
177
+ end
178
+
179
+ end
180
+