perobs 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -48,12 +48,13 @@ module PEROBS
48
48
  # The 'pack()' format of the header.
49
49
  FORMAT = 'CQQL'
50
50
  # The length of the header in bytes.
51
- LENGTH = 21
51
+ LENGTH = 25
52
52
  VALID_FLAG_BIT = 0
53
53
  COMPRESSED_FLAG_BIT = 2
54
54
  OUTDATED_FLAG_BIT = 3
55
55
 
56
56
  attr_reader :addr, :flags, :length, :id, :crc
57
+ attr_accessor :corruption_start
57
58
 
58
59
  # Create a new FlatFileBlobHeader with the given flags, length, id and crc.
59
60
  # @param file [File] the FlatFile that contains the header
@@ -69,50 +70,105 @@ module PEROBS
69
70
  @length = length
70
71
  @id = id
71
72
  @crc = crc
73
+ # This is only set if the header is preceded by a corrupted blob.
74
+ @corruption_start = nil
72
75
  end
73
76
 
74
77
  # Read the header from the given File.
75
78
  # @param file [File]
76
- # @return FlatFileBlobHeader
77
- def FlatFileBlobHeader::read(file)
78
- begin
79
- addr = file.pos
80
- buf = file.read(LENGTH)
81
- rescue IOError => e
82
- PEROBS.log.error "Cannot read blob header in flat file DB: #{e.message}"
83
- return nil
79
+ # @param addr [Integer] address in the file to start reading. If no
80
+ # address is specified use the current position in the file.
81
+ # @param id [Integer] Optional ID that the header should have. If no id is
82
+ # specified there is no check against the actual ID done.
83
+ # @return FlatFileBlobHeader or nil if there are no more blobs to read in
84
+ # the file.
85
+ def FlatFileBlobHeader::read(file, addr = nil, id = nil)
86
+ # If an address was specified we expect the read to always succeed. If
87
+ # no address is specified and we can't read the header we generate an
88
+ # error message but it is not fatal.
89
+ errors_are_fatal = !addr.nil?
90
+
91
+ mode = :searching_next_header
92
+ addr = file.pos unless addr
93
+ buf = nil
94
+ corruption_start = nil
95
+
96
+ loop do
97
+ buf_with_crc = nil
98
+ begin
99
+ file.seek(addr)
100
+ buf_with_crc = file.read(LENGTH)
101
+ rescue IOError => e
102
+ if errors_are_fatal
103
+ PEROBS.log.fatal "Cannot read blob header in flat file DB at " +
104
+ "address #{addr}: #{e.message}"
105
+ else
106
+ PEROBS.log.error "Cannot read blob header in flat file DB: " +
107
+ e.message
108
+ return nil
109
+ end
110
+ end
111
+
112
+ # Did we read anything?
113
+ if buf_with_crc.nil?
114
+ if errors_are_fatal
115
+ PEROBS.log.fatal "Cannot read blob header " +
116
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
117
+ else
118
+ # We have reached the end of the file.
119
+ return nil
120
+ end
121
+ end
122
+
123
+ # Did we get the full header?
124
+ if buf_with_crc.length != LENGTH
125
+ PEROBS.log.error "Incomplete FlatFileBlobHeader: Only " +
126
+ "#{buf_with_crc.length} " +
127
+ "bytes of #{LENGTH} could be read "
128
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
129
+ return nil
130
+ end
131
+
132
+ # Check the CRC of the header
133
+ buf = buf_with_crc[0..-5]
134
+ crc = buf_with_crc[-4..-1].unpack('L')[0]
135
+
136
+ if (read_crc = Zlib.crc32(buf, 0)) == crc
137
+ # We have found a valid header.
138
+ if corruption_start
139
+ PEROBS.log.error "FlatFile corruption ends at #{addr}. " +
140
+ "#{addr - corruption_start} bytes skipped. Some data may " +
141
+ "not be recoverable."
142
+ end
143
+ break
144
+ else
145
+ if errors_are_fatal
146
+ PEROBS.log.fatal "FlatFile Header CRC mismatch at address " +
147
+ "#{addr}. Header CRC is #{'%08x' % read_crc} but should be " +
148
+ "#{'%08x' % crc}."
149
+ else
150
+ if corruption_start.nil?
151
+ PEROBS.log.error "FlatFile corruption found. The FlatFile " +
152
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
153
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}. Trying " +
154
+ "to find the next header."
155
+ corruption_start = addr
156
+ end
157
+ # The blob file is corrupted. There is no valid header at the
158
+ # current position in the file. We now try to find the next valid
159
+ # header by iterating over the remainder of the file advanding one
160
+ # byte with each step until we hit the end of the file or find the
161
+ # next valid header.
162
+ addr += 1
163
+ end
164
+ end
84
165
  end
85
166
 
86
- return nil unless buf
87
-
88
- if buf.length != LENGTH
89
- PEROBS.log.error "Incomplete FlatFileBlobHeader: Only #{buf.length} " +
90
- "bytes of #{LENGTH} could be read"
91
- return nil
167
+ header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
168
+ if corruption_start
169
+ header.corruption_start = corruption_start
92
170
  end
93
171
 
94
- FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
95
- end
96
-
97
- # Read the header from the given File.
98
- # @param file [File]
99
- # @param addr [Integer] address in the file to start reading
100
- # @param id [Integer] Optional ID that the header should have
101
- # @return FlatFileBlobHeader
102
- def FlatFileBlobHeader::read_at(file, addr, id = nil)
103
- buf = nil
104
- begin
105
- file.seek(addr)
106
- buf = file.read(LENGTH)
107
- rescue IOError => e
108
- PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
109
- end
110
- if buf.nil? || buf.length != LENGTH
111
- PEROBS.log.fatal "Cannot read blob header " +
112
- "#{id ? "for ID #{id} " : ''}at address " +
113
- "#{addr}"
114
- end
115
- header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
116
172
  if id && header.id != id
117
173
  PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
118
174
  "found. FlatFile has entry with ID #{header.id} at address " +
@@ -123,11 +179,12 @@ module PEROBS
123
179
  end
124
180
 
125
181
  # Write the header to a given File.
126
- # @param file [File]
127
182
  def write
128
183
  begin
184
+ buf = [ @flags, @length, @id, @crc].pack(FORMAT)
185
+ crc = Zlib.crc32(buf, 0)
129
186
  @file.seek(@addr)
130
- @file.write([ @flags, @length, @id, @crc].pack(FORMAT))
187
+ @file.write(buf + [ crc ].pack('L'))
131
188
  rescue IOError => e
132
189
  PEROBS.log.fatal "Cannot write blob header into flat file DB: " +
133
190
  e.message
@@ -135,11 +192,9 @@ module PEROBS
135
192
  end
136
193
 
137
194
  # Reset all the flags bit to 0. This marks the blob as invalid.
138
- # @param file [File] The file handle of the blob file.
139
- # @param addr [Integer] The address of the header
140
195
  def clear_flags
141
196
  @flags = 0
142
- write_flags
197
+ write
143
198
  end
144
199
 
145
200
  # Return true if the header is for a non-empty blob.
@@ -156,7 +211,7 @@ module PEROBS
156
211
  # transaction has been completed.
157
212
  def set_outdated_flag
158
213
  set_flag(OUTDATED_FLAG_BIT)
159
- write_flags
214
+ write
160
215
  end
161
216
 
162
217
  # Return true if the blob contains outdated data.
@@ -166,17 +221,6 @@ module PEROBS
166
221
 
167
222
  private
168
223
 
169
- def write_flags
170
- begin
171
- @file.seek(@addr)
172
- @file.write([ @flags ].pack('C'))
173
- @file.flush
174
- rescue IOError => e
175
- PEROBS.log.fatal "Writing flags of FlatFileBlobHeader with ID #{@id} " +
176
- "failed: #{e.message}"
177
- end
178
- end
179
-
180
224
  def bit_set?(n)
181
225
  mask = 1 << n
182
226
  @flags & mask == mask
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = FlatFileDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -41,7 +42,7 @@ module PEROBS
41
42
 
42
43
  # This version number increases whenever the on-disk format changes in a
43
44
  # way that requires conversion actions after an update.
44
- VERSION = 2
45
+ VERSION = 4
45
46
 
46
47
  attr_reader :max_blob_size
47
48
 
@@ -50,13 +51,17 @@ module PEROBS
50
51
  # @param options [Hash] options to customize the behavior. Currently only
51
52
  # the following options are supported:
52
53
  # :serializer : Can be :marshal, :json, :yaml
54
+ # :progressmeter : Reference to a ProgressMeter object
55
+ # :log : IO that should be used for logging
56
+ # :log_level : Minimum Logger level to log
53
57
  def initialize(db_name, options = {})
54
- super(options[:serializer] || :json)
58
+ super(options)
55
59
 
56
60
  @db_dir = db_name
57
61
  # Create the database directory if it doesn't exist yet.
58
62
  ensure_dir_exists(@db_dir)
59
- PEROBS.log.open(File.join(@db_dir, 'log'))
63
+ PEROBS.log.level = options[:log_level] if options[:log_level]
64
+ PEROBS.log.open(options[:log] || File.join(@db_dir, 'log'))
60
65
  check_version_and_upgrade
61
66
 
62
67
  # Read the existing DB config.
@@ -68,7 +73,7 @@ module PEROBS
68
73
 
69
74
  # Open the FlatFileDB for transactions.
70
75
  def open
71
- @flat_file = FlatFile.new(@db_dir)
76
+ @flat_file = FlatFile.new(@db_dir, @progressmeter)
72
77
  @flat_file.open
73
78
  PEROBS.log.info "FlatFile '#{@db_dir}' opened"
74
79
  end
@@ -143,8 +148,9 @@ module PEROBS
143
148
  end
144
149
  end
145
150
 
146
- def search_object(id)
147
- @flat_file.search_object(id)
151
+ # @return [Integer] Number of objects stored in the DB.
152
+ def item_counter
153
+ @flat_file.item_counter
148
154
  end
149
155
 
150
156
  # This method must be called to initiate the marking process.
@@ -154,7 +160,7 @@ module PEROBS
154
160
 
155
161
  # Permanently delete all objects that have not been marked. Those are
156
162
  # orphaned and are no longer referenced by any actively used object.
157
- # @return [Array] List of IDs that have been removed from the DB.
163
+ # @return [Integer] Number of the removed objects from the DB.
158
164
  def delete_unmarked_objects
159
165
  @flat_file.delete_unmarked_objects
160
166
  end
@@ -226,7 +232,8 @@ module PEROBS
226
232
  "'#{version_file}': " + e.message
227
233
  end
228
234
  else
229
- # Early versions of PEROBS did not have a version file.
235
+ # The DB is brand new.
236
+ version = VERSION
230
237
  write_version_file(version_file)
231
238
  end
232
239
 
@@ -234,25 +241,40 @@ module PEROBS
234
241
  PEROBS.log.fatal "Cannot downgrade the FlatFile database from " +
235
242
  "version #{version} to version #{VERSION}"
236
243
  end
237
-
238
- if version == 1
239
- # Version 1 had no support for data compression. Make sure all entries
240
- # are compressed to save space.
241
- open
242
- @flat_file.refresh
243
- close
244
+ if version < 3
245
+ PEROBS.log.fatal "The upgrade of this version of the PEROBS database " +
246
+ "is not supported by this version of PEROBS. Please try an earlier " +
247
+ "version of PEROBS to upgrade the database before using this version."
244
248
  end
245
249
 
246
- # After a successful upgrade change the version number in the DB as
247
- # well.
248
- if version < VERSION
250
+ # Version upgrades must be done one version number at a time. If the
251
+ # existing DB is multiple versions older than what the current PEROBS
252
+ # version expects than multiple upgrade runs will be needed.
253
+ while version < VERSION
254
+ if version == 3
255
+ PEROBS.log.warn "Updating FlatFileDB #{@db_dir} from version 3 to " +
256
+ "version 4 ..."
257
+ # Version 4 adds checksums for blob file headers. We have to convert
258
+ # the blob file to include the checksums.
259
+ FlatFile.insert_header_checksums(@db_dir)
260
+ open
261
+ @flat_file.regenerate_index_and_spaces
262
+ close
263
+ end
264
+
265
+ # After a successful upgrade change the version number in the DB as
266
+ # well.
249
267
  write_version_file(version_file)
250
268
  PEROBS.log.warn "Update of FlatFileDB '#{@db_dir}' from version " +
251
- "#{version} to version #{VERSION} completed"
269
+ "#{version} to version #{version + 1} completed"
270
+
271
+ # Update version variable to new version.
272
+ version += 1
252
273
  end
253
274
  end
254
275
 
255
276
  def write_version_file(version_file)
277
+
256
278
  begin
257
279
  RobustFile.write(version_file, VERSION)
258
280
  rescue IOError => e
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Hash.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -37,20 +37,36 @@ module PEROBS
37
37
  # The implementation is largely a proxy around the standard Hash class. But
38
38
  # all mutating methods must be re-implemented to convert PEROBS::Objects to
39
39
  # POXReference objects and to register the object as modified with the
40
- # cache.
40
+ # cache. However, it is not designed for large data sets as it always reads
41
+ # and writes the full data set for every access (unless it is cached). For
42
+ # data sets that could have more than a few hundred entries BigHash is the
43
+ # recommended alternative.
41
44
  #
42
45
  # We explicitely don't support Hash::store() as it conflicts with
43
46
  # ObjectBase::store() method to access the store.
44
47
  class Hash < ObjectBase
45
48
 
49
+ # These methods do not mutate the Hash. They only perform read
50
+ # operations and return a new PEROBS::Hash object.
51
+ ([
52
+ :invert, :merge, :reject, :select
53
+ ] + Enumerable.instance_methods).uniq.each do |method_sym|
54
+ # Create a wrapper method that passes the call to @data.
55
+ define_method(method_sym) do |*args, &block|
56
+ # Register the read operation with the cache.
57
+ @store.cache.cache_read(self)
58
+ @store.new(PEROBS::Hash, @data.send(method_sym, *args, &block))
59
+ end
60
+ end
61
+
46
62
  # These methods do not mutate the Hash. They only perform read
47
63
  # operations.
48
64
  ([
49
65
  :==, :[], :assoc, :compare_by_identity, :compare_by_identity?, :default,
50
66
  :default_proc, :each, :each_key, :each_pair, :each_value, :empty?,
51
67
  :eql?, :fetch, :flatten, :has_key?, :has_value?, :hash, :include?,
52
- :invert, :key, :key?, :keys, :length, :member?, :merge,
53
- :pretty_print, :pretty_print_cycle, :rassoc, :reject, :select, :size,
68
+ :key, :key?, :keys, :length, :member?,
69
+ :pretty_print, :pretty_print_cycle, :rassoc, :size,
54
70
  :to_a, :to_h, :to_hash, :to_s, :value?, :values, :values_at
55
71
  ] + Enumerable.instance_methods).uniq.each do |method_sym|
56
72
  # Create a wrapper method that passes the call to @data.
@@ -61,11 +77,22 @@ module PEROBS
61
77
  end
62
78
  end
63
79
 
64
- # These methods mutate the Hash.
80
+ # These methods mutate the Hash and return self
65
81
  [
66
- :[]=, :clear, :default=, :default_proc=, :delete, :delete_if,
67
- :initialize_copy, :keep_if, :merge!, :rehash, :reject!, :replace,
68
- :select!, :shift, :update
82
+ :clear, :keep_if, :merge!, :rehash, :reject!, :replace, :select!, :update
83
+ ].each do |method_sym|
84
+ # Create a wrapper method that passes the call to @data.
85
+ define_method(method_sym) do |*args, &block|
86
+ # Register the write operation with the cache.
87
+ @store.cache.cache_write(self)
88
+ @data.send(method_sym, *args, &block)
89
+ myself
90
+ end
91
+ end
92
+
93
+ # These methods mutate the Hash and return basic Ruby type objects.
94
+ [
95
+ :delete, :delete_if, :shift
69
96
  ].each do |method_sym|
70
97
  # Create a wrapper method that passes the call to @data.
71
98
  define_method(method_sym) do |*args, &block|
@@ -79,17 +106,35 @@ module PEROBS
79
106
  # PEROBS users should never call this method or equivalents of derived
80
107
  # methods directly.
81
108
  # @param p [PEROBS::Handle] PEROBS handle
82
- # @param default [Any] The default value that is returned when no value is
83
- # stored for a specific key.
84
- def initialize(p, default = nil)
109
+ # @param default [Object] The default value that is returned when no value
110
+ # is stored for a specific key. The default must be of the
111
+ # supported type.
112
+ def initialize(p, default = nil, &block)
85
113
  super(p)
86
- @default = nil
87
- @data = {}
114
+ _check_assignment_value(default)
115
+ if block_given?
116
+ @data = ::Hash.new(&block)
117
+ else
118
+ @data = ::Hash.new(default)
119
+ end
88
120
 
89
121
  # Ensure that the newly created object will be pushed into the database.
90
122
  @store.cache.cache_write(self)
91
123
  end
92
124
 
125
+ # Proxy for assignment method.
126
+ def []=(key, value)
127
+ _check_assignment_value(value)
128
+ @store.cache.cache_write(self)
129
+ @data[key] = value
130
+ end
131
+
132
+ # Proxy for default= method.
133
+ def default=(value)
134
+ _check_assignment_value(value)
135
+ @data.default=(value)
136
+ end
137
+
93
138
  # Return a list of all object IDs of all persistend objects that this Hash
94
139
  # is referencing.
95
140
  # @return [Array of Integer] IDs of referenced objects