perobs 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -48,12 +48,13 @@ module PEROBS
48
48
  # The 'pack()' format of the header.
49
49
  FORMAT = 'CQQL'
50
50
  # The length of the header in bytes.
51
- LENGTH = 21
51
+ LENGTH = 25
52
52
  VALID_FLAG_BIT = 0
53
53
  COMPRESSED_FLAG_BIT = 2
54
54
  OUTDATED_FLAG_BIT = 3
55
55
 
56
56
  attr_reader :addr, :flags, :length, :id, :crc
57
+ attr_accessor :corruption_start
57
58
 
58
59
  # Create a new FlatFileBlobHeader with the given flags, length, id and crc.
59
60
  # @param file [File] the FlatFile that contains the header
@@ -69,50 +70,105 @@ module PEROBS
69
70
  @length = length
70
71
  @id = id
71
72
  @crc = crc
73
+ # This is only set if the header is preceded by a corrupted blob.
74
+ @corruption_start = nil
72
75
  end
73
76
 
74
77
  # Read the header from the given File.
75
78
  # @param file [File]
76
- # @return FlatFileBlobHeader
77
- def FlatFileBlobHeader::read(file)
78
- begin
79
- addr = file.pos
80
- buf = file.read(LENGTH)
81
- rescue IOError => e
82
- PEROBS.log.error "Cannot read blob header in flat file DB: #{e.message}"
83
- return nil
79
+ # @param addr [Integer] address in the file to start reading. If no
80
+ # address is specified use the current position in the file.
81
+ # @param id [Integer] Optional ID that the header should have. If no id is
82
+ # specified there is no check against the actual ID done.
83
+ # @return FlatFileBlobHeader or nil if there are no more blobs to read in
84
+ # the file.
85
+ def FlatFileBlobHeader::read(file, addr = nil, id = nil)
86
+ # If an address was specified we expect the read to always succeed. If
87
+ # no address is specified and we can't read the header we generate an
88
+ # error message but it is not fatal.
89
+ errors_are_fatal = !addr.nil?
90
+
91
+ mode = :searching_next_header
92
+ addr = file.pos unless addr
93
+ buf = nil
94
+ corruption_start = nil
95
+
96
+ loop do
97
+ buf_with_crc = nil
98
+ begin
99
+ file.seek(addr)
100
+ buf_with_crc = file.read(LENGTH)
101
+ rescue IOError => e
102
+ if errors_are_fatal
103
+ PEROBS.log.fatal "Cannot read blob header in flat file DB at " +
104
+ "address #{addr}: #{e.message}"
105
+ else
106
+ PEROBS.log.error "Cannot read blob header in flat file DB: " +
107
+ e.message
108
+ return nil
109
+ end
110
+ end
111
+
112
+ # Did we read anything?
113
+ if buf_with_crc.nil?
114
+ if errors_are_fatal
115
+ PEROBS.log.fatal "Cannot read blob header " +
116
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
117
+ else
118
+ # We have reached the end of the file.
119
+ return nil
120
+ end
121
+ end
122
+
123
+ # Did we get the full header?
124
+ if buf_with_crc.length != LENGTH
125
+ PEROBS.log.error "Incomplete FlatFileBlobHeader: Only " +
126
+ "#{buf_with_crc.length} " +
127
+ "bytes of #{LENGTH} could be read "
128
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
129
+ return nil
130
+ end
131
+
132
+ # Check the CRC of the header
133
+ buf = buf_with_crc[0..-5]
134
+ crc = buf_with_crc[-4..-1].unpack('L')[0]
135
+
136
+ if (read_crc = Zlib.crc32(buf, 0)) == crc
137
+ # We have found a valid header.
138
+ if corruption_start
139
+ PEROBS.log.error "FlatFile corruption ends at #{addr}. " +
140
+ "#{addr - corruption_start} bytes skipped. Some data may " +
141
+ "not be recoverable."
142
+ end
143
+ break
144
+ else
145
+ if errors_are_fatal
146
+ PEROBS.log.fatal "FlatFile Header CRC mismatch at address " +
147
+ "#{addr}. Header CRC is #{'%08x' % read_crc} but should be " +
148
+ "#{'%08x' % crc}."
149
+ else
150
+ if corruption_start.nil?
151
+ PEROBS.log.error "FlatFile corruption found. The FlatFile " +
152
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
153
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}. Trying " +
154
+ "to find the next header."
155
+ corruption_start = addr
156
+ end
157
+ # The blob file is corrupted. There is no valid header at the
158
+ # current position in the file. We now try to find the next valid
159
+ # header by iterating over the remainder of the file advanding one
160
+ # byte with each step until we hit the end of the file or find the
161
+ # next valid header.
162
+ addr += 1
163
+ end
164
+ end
84
165
  end
85
166
 
86
- return nil unless buf
87
-
88
- if buf.length != LENGTH
89
- PEROBS.log.error "Incomplete FlatFileBlobHeader: Only #{buf.length} " +
90
- "bytes of #{LENGTH} could be read"
91
- return nil
167
+ header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
168
+ if corruption_start
169
+ header.corruption_start = corruption_start
92
170
  end
93
171
 
94
- FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
95
- end
96
-
97
- # Read the header from the given File.
98
- # @param file [File]
99
- # @param addr [Integer] address in the file to start reading
100
- # @param id [Integer] Optional ID that the header should have
101
- # @return FlatFileBlobHeader
102
- def FlatFileBlobHeader::read_at(file, addr, id = nil)
103
- buf = nil
104
- begin
105
- file.seek(addr)
106
- buf = file.read(LENGTH)
107
- rescue IOError => e
108
- PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
109
- end
110
- if buf.nil? || buf.length != LENGTH
111
- PEROBS.log.fatal "Cannot read blob header " +
112
- "#{id ? "for ID #{id} " : ''}at address " +
113
- "#{addr}"
114
- end
115
- header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
116
172
  if id && header.id != id
117
173
  PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
118
174
  "found. FlatFile has entry with ID #{header.id} at address " +
@@ -123,11 +179,12 @@ module PEROBS
123
179
  end
124
180
 
125
181
  # Write the header to a given File.
126
- # @param file [File]
127
182
  def write
128
183
  begin
184
+ buf = [ @flags, @length, @id, @crc].pack(FORMAT)
185
+ crc = Zlib.crc32(buf, 0)
129
186
  @file.seek(@addr)
130
- @file.write([ @flags, @length, @id, @crc].pack(FORMAT))
187
+ @file.write(buf + [ crc ].pack('L'))
131
188
  rescue IOError => e
132
189
  PEROBS.log.fatal "Cannot write blob header into flat file DB: " +
133
190
  e.message
@@ -135,11 +192,9 @@ module PEROBS
135
192
  end
136
193
 
137
194
  # Reset all the flags bit to 0. This marks the blob as invalid.
138
- # @param file [File] The file handle of the blob file.
139
- # @param addr [Integer] The address of the header
140
195
  def clear_flags
141
196
  @flags = 0
142
- write_flags
197
+ write
143
198
  end
144
199
 
145
200
  # Return true if the header is for a non-empty blob.
@@ -156,7 +211,7 @@ module PEROBS
156
211
  # transaction has been completed.
157
212
  def set_outdated_flag
158
213
  set_flag(OUTDATED_FLAG_BIT)
159
- write_flags
214
+ write
160
215
  end
161
216
 
162
217
  # Return true if the blob contains outdated data.
@@ -166,17 +221,6 @@ module PEROBS
166
221
 
167
222
  private
168
223
 
169
- def write_flags
170
- begin
171
- @file.seek(@addr)
172
- @file.write([ @flags ].pack('C'))
173
- @file.flush
174
- rescue IOError => e
175
- PEROBS.log.fatal "Writing flags of FlatFileBlobHeader with ID #{@id} " +
176
- "failed: #{e.message}"
177
- end
178
- end
179
-
180
224
  def bit_set?(n)
181
225
  mask = 1 << n
182
226
  @flags & mask == mask
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = FlatFileDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -41,7 +42,7 @@ module PEROBS
41
42
 
42
43
  # This version number increases whenever the on-disk format changes in a
43
44
  # way that requires conversion actions after an update.
44
- VERSION = 2
45
+ VERSION = 4
45
46
 
46
47
  attr_reader :max_blob_size
47
48
 
@@ -50,13 +51,17 @@ module PEROBS
50
51
  # @param options [Hash] options to customize the behavior. Currently only
51
52
  # the following options are supported:
52
53
  # :serializer : Can be :marshal, :json, :yaml
54
+ # :progressmeter : Reference to a ProgressMeter object
55
+ # :log : IO that should be used for logging
56
+ # :log_level : Minimum Logger level to log
53
57
  def initialize(db_name, options = {})
54
- super(options[:serializer] || :json)
58
+ super(options)
55
59
 
56
60
  @db_dir = db_name
57
61
  # Create the database directory if it doesn't exist yet.
58
62
  ensure_dir_exists(@db_dir)
59
- PEROBS.log.open(File.join(@db_dir, 'log'))
63
+ PEROBS.log.level = options[:log_level] if options[:log_level]
64
+ PEROBS.log.open(options[:log] || File.join(@db_dir, 'log'))
60
65
  check_version_and_upgrade
61
66
 
62
67
  # Read the existing DB config.
@@ -68,7 +73,7 @@ module PEROBS
68
73
 
69
74
  # Open the FlatFileDB for transactions.
70
75
  def open
71
- @flat_file = FlatFile.new(@db_dir)
76
+ @flat_file = FlatFile.new(@db_dir, @progressmeter)
72
77
  @flat_file.open
73
78
  PEROBS.log.info "FlatFile '#{@db_dir}' opened"
74
79
  end
@@ -143,8 +148,9 @@ module PEROBS
143
148
  end
144
149
  end
145
150
 
146
- def search_object(id)
147
- @flat_file.search_object(id)
151
+ # @return [Integer] Number of objects stored in the DB.
152
+ def item_counter
153
+ @flat_file.item_counter
148
154
  end
149
155
 
150
156
  # This method must be called to initiate the marking process.
@@ -154,7 +160,7 @@ module PEROBS
154
160
 
155
161
  # Permanently delete all objects that have not been marked. Those are
156
162
  # orphaned and are no longer referenced by any actively used object.
157
- # @return [Array] List of IDs that have been removed from the DB.
163
+ # @return [Integer] Number of the removed objects from the DB.
158
164
  def delete_unmarked_objects
159
165
  @flat_file.delete_unmarked_objects
160
166
  end
@@ -226,7 +232,8 @@ module PEROBS
226
232
  "'#{version_file}': " + e.message
227
233
  end
228
234
  else
229
- # Early versions of PEROBS did not have a version file.
235
+ # The DB is brand new.
236
+ version = VERSION
230
237
  write_version_file(version_file)
231
238
  end
232
239
 
@@ -234,25 +241,40 @@ module PEROBS
234
241
  PEROBS.log.fatal "Cannot downgrade the FlatFile database from " +
235
242
  "version #{version} to version #{VERSION}"
236
243
  end
237
-
238
- if version == 1
239
- # Version 1 had no support for data compression. Make sure all entries
240
- # are compressed to save space.
241
- open
242
- @flat_file.refresh
243
- close
244
+ if version < 3
245
+ PEROBS.log.fatal "The upgrade of this version of the PEROBS database " +
246
+ "is not supported by this version of PEROBS. Please try an earlier " +
247
+ "version of PEROBS to upgrade the database before using this version."
244
248
  end
245
249
 
246
- # After a successful upgrade change the version number in the DB as
247
- # well.
248
- if version < VERSION
250
+ # Version upgrades must be done one version number at a time. If the
251
+ # existing DB is multiple versions older than what the current PEROBS
252
+ # version expects than multiple upgrade runs will be needed.
253
+ while version < VERSION
254
+ if version == 3
255
+ PEROBS.log.warn "Updating FlatFileDB #{@db_dir} from version 3 to " +
256
+ "version 4 ..."
257
+ # Version 4 adds checksums for blob file headers. We have to convert
258
+ # the blob file to include the checksums.
259
+ FlatFile.insert_header_checksums(@db_dir)
260
+ open
261
+ @flat_file.regenerate_index_and_spaces
262
+ close
263
+ end
264
+
265
+ # After a successful upgrade change the version number in the DB as
266
+ # well.
249
267
  write_version_file(version_file)
250
268
  PEROBS.log.warn "Update of FlatFileDB '#{@db_dir}' from version " +
251
- "#{version} to version #{VERSION} completed"
269
+ "#{version} to version #{version + 1} completed"
270
+
271
+ # Update version variable to new version.
272
+ version += 1
252
273
  end
253
274
  end
254
275
 
255
276
  def write_version_file(version_file)
277
+
256
278
  begin
257
279
  RobustFile.write(version_file, VERSION)
258
280
  rescue IOError => e
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Hash.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -37,20 +37,36 @@ module PEROBS
37
37
  # The implementation is largely a proxy around the standard Hash class. But
38
38
  # all mutating methods must be re-implemented to convert PEROBS::Objects to
39
39
  # POXReference objects and to register the object as modified with the
40
- # cache.
40
+ # cache. However, it is not designed for large data sets as it always reads
41
+ # and writes the full data set for every access (unless it is cached). For
42
+ # data sets that could have more than a few hundred entries BigHash is the
43
+ # recommended alternative.
41
44
  #
42
45
  # We explicitely don't support Hash::store() as it conflicts with
43
46
  # ObjectBase::store() method to access the store.
44
47
  class Hash < ObjectBase
45
48
 
49
+ # These methods do not mutate the Hash. They only perform read
50
+ # operations and return a new PEROBS::Hash object.
51
+ ([
52
+ :invert, :merge, :reject, :select
53
+ ] + Enumerable.instance_methods).uniq.each do |method_sym|
54
+ # Create a wrapper method that passes the call to @data.
55
+ define_method(method_sym) do |*args, &block|
56
+ # Register the read operation with the cache.
57
+ @store.cache.cache_read(self)
58
+ @store.new(PEROBS::Hash, @data.send(method_sym, *args, &block))
59
+ end
60
+ end
61
+
46
62
  # These methods do not mutate the Hash. They only perform read
47
63
  # operations.
48
64
  ([
49
65
  :==, :[], :assoc, :compare_by_identity, :compare_by_identity?, :default,
50
66
  :default_proc, :each, :each_key, :each_pair, :each_value, :empty?,
51
67
  :eql?, :fetch, :flatten, :has_key?, :has_value?, :hash, :include?,
52
- :invert, :key, :key?, :keys, :length, :member?, :merge,
53
- :pretty_print, :pretty_print_cycle, :rassoc, :reject, :select, :size,
68
+ :key, :key?, :keys, :length, :member?,
69
+ :pretty_print, :pretty_print_cycle, :rassoc, :size,
54
70
  :to_a, :to_h, :to_hash, :to_s, :value?, :values, :values_at
55
71
  ] + Enumerable.instance_methods).uniq.each do |method_sym|
56
72
  # Create a wrapper method that passes the call to @data.
@@ -61,11 +77,22 @@ module PEROBS
61
77
  end
62
78
  end
63
79
 
64
- # These methods mutate the Hash.
80
+ # These methods mutate the Hash and return self
65
81
  [
66
- :[]=, :clear, :default=, :default_proc=, :delete, :delete_if,
67
- :initialize_copy, :keep_if, :merge!, :rehash, :reject!, :replace,
68
- :select!, :shift, :update
82
+ :clear, :keep_if, :merge!, :rehash, :reject!, :replace, :select!, :update
83
+ ].each do |method_sym|
84
+ # Create a wrapper method that passes the call to @data.
85
+ define_method(method_sym) do |*args, &block|
86
+ # Register the write operation with the cache.
87
+ @store.cache.cache_write(self)
88
+ @data.send(method_sym, *args, &block)
89
+ myself
90
+ end
91
+ end
92
+
93
+ # These methods mutate the Hash and return basic Ruby type objects.
94
+ [
95
+ :delete, :delete_if, :shift
69
96
  ].each do |method_sym|
70
97
  # Create a wrapper method that passes the call to @data.
71
98
  define_method(method_sym) do |*args, &block|
@@ -79,17 +106,35 @@ module PEROBS
79
106
  # PEROBS users should never call this method or equivalents of derived
80
107
  # methods directly.
81
108
  # @param p [PEROBS::Handle] PEROBS handle
82
- # @param default [Any] The default value that is returned when no value is
83
- # stored for a specific key.
84
- def initialize(p, default = nil)
109
+ # @param default [Object] The default value that is returned when no value
110
+ # is stored for a specific key. The default must be of the
111
+ # supported type.
112
+ def initialize(p, default = nil, &block)
85
113
  super(p)
86
- @default = nil
87
- @data = {}
114
+ _check_assignment_value(default)
115
+ if block_given?
116
+ @data = ::Hash.new(&block)
117
+ else
118
+ @data = ::Hash.new(default)
119
+ end
88
120
 
89
121
  # Ensure that the newly created object will be pushed into the database.
90
122
  @store.cache.cache_write(self)
91
123
  end
92
124
 
125
+ # Proxy for assignment method.
126
+ def []=(key, value)
127
+ _check_assignment_value(value)
128
+ @store.cache.cache_write(self)
129
+ @data[key] = value
130
+ end
131
+
132
+ # Proxy for default= method.
133
+ def default=(value)
134
+ _check_assignment_value(value)
135
+ @data.default=(value)
136
+ end
137
+
93
138
  # Return a list of all object IDs of all persistend objects that this Hash
94
139
  # is referencing.
95
140
  # @return [Array of Integer] IDs of referenced objects