perobs 3.0.1 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +19 -18
  3. data/lib/perobs.rb +2 -0
  4. data/lib/perobs/Array.rb +68 -21
  5. data/lib/perobs/BTree.rb +110 -54
  6. data/lib/perobs/BTreeBlob.rb +14 -13
  7. data/lib/perobs/BTreeDB.rb +11 -10
  8. data/lib/perobs/BTreeNode.rb +551 -197
  9. data/lib/perobs/BTreeNodeCache.rb +10 -8
  10. data/lib/perobs/BTreeNodeLink.rb +11 -1
  11. data/lib/perobs/BigArray.rb +285 -0
  12. data/lib/perobs/BigArrayNode.rb +1002 -0
  13. data/lib/perobs/BigHash.rb +246 -0
  14. data/lib/perobs/BigTree.rb +197 -0
  15. data/lib/perobs/BigTreeNode.rb +873 -0
  16. data/lib/perobs/Cache.rb +47 -22
  17. data/lib/perobs/ClassMap.rb +2 -2
  18. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  19. data/lib/perobs/DataBase.rb +4 -3
  20. data/lib/perobs/DynamoDB.rb +62 -20
  21. data/lib/perobs/EquiBlobsFile.rb +174 -59
  22. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  23. data/lib/perobs/FlatFile.rb +536 -242
  24. data/lib/perobs/FlatFileBlobHeader.rb +120 -84
  25. data/lib/perobs/FlatFileDB.rb +58 -27
  26. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  27. data/lib/perobs/Hash.rb +129 -35
  28. data/lib/perobs/IDList.rb +144 -0
  29. data/lib/perobs/IDListPage.rb +107 -0
  30. data/lib/perobs/IDListPageFile.rb +180 -0
  31. data/lib/perobs/IDListPageRecord.rb +142 -0
  32. data/lib/perobs/LockFile.rb +3 -0
  33. data/lib/perobs/Object.rb +28 -20
  34. data/lib/perobs/ObjectBase.rb +53 -10
  35. data/lib/perobs/PersistentObjectCache.rb +142 -0
  36. data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
  37. data/lib/perobs/ProgressMeter.rb +97 -0
  38. data/lib/perobs/SpaceManager.rb +273 -0
  39. data/lib/perobs/SpaceTree.rb +63 -47
  40. data/lib/perobs/SpaceTreeNode.rb +134 -115
  41. data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
  42. data/lib/perobs/StackFile.rb +1 -1
  43. data/lib/perobs/Store.rb +180 -70
  44. data/lib/perobs/version.rb +1 -1
  45. data/perobs.gemspec +4 -4
  46. data/test/Array_spec.rb +48 -39
  47. data/test/BTreeDB_spec.rb +2 -2
  48. data/test/BTree_spec.rb +50 -1
  49. data/test/BigArray_spec.rb +261 -0
  50. data/test/BigHash_spec.rb +152 -0
  51. data/test/BigTreeNode_spec.rb +153 -0
  52. data/test/BigTree_spec.rb +259 -0
  53. data/test/EquiBlobsFile_spec.rb +105 -5
  54. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  55. data/test/FlatFileDB_spec.rb +199 -15
  56. data/test/FuzzyStringMatcher_spec.rb +261 -0
  57. data/test/Hash_spec.rb +27 -16
  58. data/test/IDList_spec.rb +77 -0
  59. data/test/LegacyDBs/LegacyDB.rb +155 -0
  60. data/test/LegacyDBs/version_3/class_map.json +1 -0
  61. data/test/LegacyDBs/version_3/config.json +1 -0
  62. data/test/LegacyDBs/version_3/database.blobs +0 -0
  63. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  64. data/test/LegacyDBs/version_3/index.blobs +0 -0
  65. data/test/LegacyDBs/version_3/version +1 -0
  66. data/test/LockFile_spec.rb +9 -6
  67. data/test/Object_spec.rb +5 -5
  68. data/test/SpaceManager_spec.rb +176 -0
  69. data/test/SpaceTree_spec.rb +27 -9
  70. data/test/Store_spec.rb +353 -206
  71. data/test/perobs_spec.rb +7 -3
  72. data/test/spec_helper.rb +9 -4
  73. metadata +59 -16
  74. data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
  75. data/lib/perobs/TreeDB.rb +0 -277
@@ -33,7 +33,7 @@ module PEROBS
33
33
  #
34
34
  # 1 Byte: Flags byte.
35
35
  # Bit 0: 0 deleted entry, 1 valid entry
36
- # Bit 1: 0 unmarked, 1 marked
36
+ # Bit 1: 0 reserved, must be 0
37
37
  # Bit 2: 0 uncompressed data, 1 compressed data
38
38
  # Bit 3: 0 current entry, 1 outdated entry
39
39
  # Bit 4 - 7: reserved, must be 0
@@ -48,21 +48,21 @@ module PEROBS
48
48
  # The 'pack()' format of the header.
49
49
  FORMAT = 'CQQL'
50
50
  # The length of the header in bytes.
51
- LENGTH = 21
51
+ LENGTH = 25
52
52
  VALID_FLAG_BIT = 0
53
- MARK_FLAG_BIT = 1
54
53
  COMPRESSED_FLAG_BIT = 2
55
54
  OUTDATED_FLAG_BIT = 3
56
55
 
57
56
  attr_reader :addr, :flags, :length, :id, :crc
57
+ attr_accessor :corruption_start
58
58
 
59
59
  # Create a new FlatFileBlobHeader with the given flags, length, id and crc.
60
60
  # @param file [File] the FlatFile that contains the header
61
61
  # @param addr [Integer] the offset address of the header in the file
62
- # @param flags [Fixnum] 8 bit number, see above
63
- # @param length [Fixnum] length of the header in bytes
62
+ # @param flags [Integer] 8 bit number, see above
63
+ # @param length [Integer] length of the header in bytes
64
64
  # @param id [Integer] ID of the blob entry
65
- # @param crc [Fixnum] CRC32 checksum of the blob entry
65
+ # @param crc [Integer] CRC32 checksum of the blob entry
66
66
  def initialize(file, addr, flags, length, id, crc)
67
67
  @file = file
68
68
  @addr = addr
@@ -70,64 +70,136 @@ module PEROBS
70
70
  @length = length
71
71
  @id = id
72
72
  @crc = crc
73
+ # This is only set if the header is preceded by a corrupted blob.
74
+ @corruption_start = nil
73
75
  end
74
76
 
75
77
  # Read the header from the given File.
76
78
  # @param file [File]
77
- # @return FlatFileBlobHeader
78
- def FlatFileBlobHeader::read(file)
79
- begin
80
- addr = file.pos
81
- buf = file.read(LENGTH)
82
- rescue IOError => e
83
- PEROBS.log.error "Cannot read blob header in flat file DB: #{e.message}"
84
- return nil
79
+ # @param addr [Integer] address in the file to start reading. If no
80
+ # address is specified use the current position in the file.
81
+ # @param id [Integer] Optional ID that the header should have. If no id is
82
+ # specified there is no check against the actual ID done.
83
+ # @return FlatFileBlobHeader or nil if there are no more blobs to read in
84
+ # the file.
85
+ def FlatFileBlobHeader::read(file, addr = nil, id = nil)
86
+ # If an address was specified we expect the read to always succeed. If
87
+ # no address is specified and we can't read the header we generate an
88
+ # error message but it is not fatal.
89
+ errors_are_fatal = !addr.nil?
90
+
91
+ mode = :searching_next_header
92
+ addr = file.pos unless addr
93
+ buf = nil
94
+ corruption_start = nil
95
+
96
+ loop do
97
+ buf_with_crc = nil
98
+ begin
99
+ file.seek(addr)
100
+ buf_with_crc = file.read(LENGTH)
101
+ rescue IOError => e
102
+ if errors_are_fatal
103
+ PEROBS.log.fatal "Cannot read blob header in flat file DB at " +
104
+ "address #{addr}: #{e.message}"
105
+ else
106
+ PEROBS.log.error "Cannot read blob header in flat file DB: " +
107
+ e.message
108
+ return nil
109
+ end
110
+ end
111
+
112
+ # Did we read anything?
113
+ if buf_with_crc.nil?
114
+ if errors_are_fatal
115
+ PEROBS.log.fatal "Cannot read blob header " +
116
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
117
+ else
118
+ if corruption_start
119
+ PEROBS.log.error "Corruption found at end of blob file at " +
120
+ "address #{addr}"
121
+ end
122
+ # We have reached the end of the file.
123
+ return nil
124
+ end
125
+ end
126
+
127
+ # Did we get the full header?
128
+ if buf_with_crc.length != LENGTH
129
+ msg = "Incomplete FlatFileBlobHeader: Only " +
130
+ "#{buf_with_crc.length} " +
131
+ "bytes of #{LENGTH} could be read "
132
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
133
+ if errors_are_fatal
134
+ PEROBS.log.fatal msg
135
+ else
136
+ PEROBS.log.error msg
137
+ end
138
+ return nil
139
+ end
140
+
141
+ # Check the CRC of the header
142
+ buf = buf_with_crc[0..-5]
143
+ crc = buf_with_crc[-4..-1].unpack('L')[0]
144
+
145
+ if (read_crc = Zlib.crc32(buf, 0)) == crc
146
+ # We have found a valid header.
147
+ if corruption_start
148
+ PEROBS.log.error "FlatFile corruption ends at #{addr}. " +
149
+ "#{addr - corruption_start} bytes skipped. Some data may " +
150
+ "not be recoverable."
151
+ end
152
+ break
153
+ else
154
+ if errors_are_fatal
155
+ PEROBS.log.fatal "FlatFile Header CRC mismatch at address " +
156
+ "#{addr}. Header CRC is #{'%08x' % read_crc} but should be " +
157
+ "#{'%08x' % crc}."
158
+ else
159
+ if corruption_start.nil?
160
+ if errors_are_fatal
161
+ PEROBS.log.fatal "FlatFile corruption found. The FlatFile " +
162
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
163
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}."
164
+ else
165
+ PEROBS.log.error "FlatFile corruption found. The FlatFile " +
166
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
167
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}. " +
168
+ "Trying to find the next header."
169
+ end
170
+ corruption_start = addr
171
+ end
172
+ # The blob file is corrupted. There is no valid header at the
173
+ # current position in the file. We now try to find the next valid
174
+ # header by iterating over the remainder of the file advanding one
175
+ # byte with each step until we hit the end of the file or find the
176
+ # next valid header.
177
+ addr += 1
178
+ end
179
+ end
85
180
  end
86
181
 
87
- return nil unless buf
88
-
89
- if buf.length != LENGTH
90
- PEROBS.log.error "Incomplete FlatFileBlobHeader: Only #{buf.length} " +
91
- "bytes of #{LENGTH} could be read"
92
- return nil
182
+ header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
183
+ if corruption_start
184
+ header.corruption_start = corruption_start
93
185
  end
94
186
 
95
- FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
96
- end
97
-
98
- # Read the header from the given File.
99
- # @param file [File]
100
- # @param addr [Integer] address in the file to start reading
101
- # @param id [Integer] Optional ID that the header should have
102
- # @return FlatFileBlobHeader
103
- def FlatFileBlobHeader::read_at(file, addr, id = nil)
104
- buf = nil
105
- begin
106
- file.seek(addr)
107
- buf = file.read(LENGTH)
108
- rescue IOError => e
109
- PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
110
- end
111
- if buf.nil? || buf.length != LENGTH
112
- PEROBS.log.fatal "Cannot read blob header " +
113
- "#{id ? "for ID #{id} " : ''}at address " +
114
- "#{addr}"
115
- end
116
- header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
117
187
  if id && header.id != id
118
188
  PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
119
- "found for entry with ID #{id}/#{header.id}"
189
+ "found. FlatFile has entry with ID #{header.id} at address " +
190
+ "#{addr}. Index has ID #{id} for this address."
120
191
  end
121
192
 
122
193
  return header
123
194
  end
124
195
 
125
196
  # Write the header to a given File.
126
- # @param file [File]
127
197
  def write
128
198
  begin
199
+ buf = [ @flags, @length, @id, @crc].pack(FORMAT)
200
+ crc = Zlib.crc32(buf, 0)
129
201
  @file.seek(@addr)
130
- @file.write([ @flags, @length, @id, @crc].pack(FORMAT))
202
+ @file.write(buf + [ crc ].pack('L'))
131
203
  rescue IOError => e
132
204
  PEROBS.log.fatal "Cannot write blob header into flat file DB: " +
133
205
  e.message
@@ -135,17 +207,9 @@ module PEROBS
135
207
  end
136
208
 
137
209
  # Reset all the flags bit to 0. This marks the blob as invalid.
138
- # @param file [File] The file handle of the blob file.
139
- # @param addr [Integer] The address of the header
140
210
  def clear_flags
141
- begin
142
- @file.seek(@addr)
143
- @file.write([ 0 ].pack('C'))
144
- @file.flush
145
- rescue IOError => e
146
- PEROBS.log.fatal "Clearing flags of FlatFileBlobHeader with ID " +
147
- "#{@id} failed: #{e.message}"
148
- end
211
+ @flags = 0
212
+ write
149
213
  end
150
214
 
151
215
  # Return true if the header is for a non-empty blob.
@@ -153,23 +217,6 @@ module PEROBS
153
217
  bit_set?(VALID_FLAG_BIT)
154
218
  end
155
219
 
156
- # Return true if the blob has been marked.
157
- def is_marked?
158
- bit_set?(MARK_FLAG_BIT)
159
- end
160
-
161
- # Set the mark bit.
162
- def set_mark_flag
163
- set_flag(MARK_FLAG_BIT)
164
- write_flags
165
- end
166
-
167
- # Clear the mark bit.
168
- def clear_mark_flag
169
- clear_flag(MARK_FLAG_BIT)
170
- write_flags
171
- end
172
-
173
220
  # Return true if the blob contains compressed data.
174
221
  def is_compressed?
175
222
  bit_set?(COMPRESSED_FLAG_BIT)
@@ -179,7 +226,7 @@ module PEROBS
179
226
  # transaction has been completed.
180
227
  def set_outdated_flag
181
228
  set_flag(OUTDATED_FLAG_BIT)
182
- write_flags
229
+ write
183
230
  end
184
231
 
185
232
  # Return true if the blob contains outdated data.
@@ -189,17 +236,6 @@ module PEROBS
189
236
 
190
237
  private
191
238
 
192
- def write_flags
193
- begin
194
- @file.seek(@addr)
195
- @file.write([ @flags ].pack('C'))
196
- @file.flush
197
- rescue IOError => e
198
- PEROBS.log.fatal "Writing flags of FlatFileBlobHeader with ID #{@id} " +
199
- "failed: #{e.message}"
200
- end
201
- end
202
-
203
239
  def bit_set?(n)
204
240
  mask = 1 << n
205
241
  @flags & mask == mask
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = FlatFileDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -41,7 +42,7 @@ module PEROBS
41
42
 
42
43
  # This version number increases whenever the on-disk format changes in a
43
44
  # way that requires conversion actions after an update.
44
- VERSION = 2
45
+ VERSION = 4
45
46
 
46
47
  attr_reader :max_blob_size
47
48
 
@@ -50,13 +51,17 @@ module PEROBS
50
51
  # @param options [Hash] options to customize the behavior. Currently only
51
52
  # the following options are supported:
52
53
  # :serializer : Can be :marshal, :json, :yaml
54
+ # :progressmeter : Reference to a ProgressMeter object
55
+ # :log : IO that should be used for logging
56
+ # :log_level : Minimum Logger level to log
53
57
  def initialize(db_name, options = {})
54
- super(options[:serializer] || :json)
58
+ super(options)
55
59
 
56
60
  @db_dir = db_name
57
61
  # Create the database directory if it doesn't exist yet.
58
62
  ensure_dir_exists(@db_dir)
59
- PEROBS.log.open(File.join(@db_dir, 'log'))
63
+ PEROBS.log.level = options[:log_level] if options[:log_level]
64
+ PEROBS.log.open(options[:log] || File.join(@db_dir, 'log'))
60
65
  check_version_and_upgrade
61
66
 
62
67
  # Read the existing DB config.
@@ -68,7 +73,7 @@ module PEROBS
68
73
 
69
74
  # Open the FlatFileDB for transactions.
70
75
  def open
71
- @flat_file = FlatFile.new(@db_dir)
76
+ @flat_file = FlatFile.new(@db_dir, @progressmeter)
72
77
  @flat_file.open
73
78
  PEROBS.log.info "FlatFile '#{@db_dir}' opened"
74
79
  end
@@ -87,11 +92,12 @@ module PEROBS
87
92
  end
88
93
 
89
94
  def FlatFileDB::delete_db(db_name)
95
+ close
90
96
  FileUtils.rm_rf(db_name)
91
97
  end
92
98
 
93
99
  # Return true if the object with given ID exists
94
- # @param id [Fixnum or Bignum]
100
+ # @param id [Integer]
95
101
  def include?(id)
96
102
  !@flat_file.find_obj_addr_by_id(id).nil?
97
103
  end
@@ -131,7 +137,7 @@ module PEROBS
131
137
  end
132
138
 
133
139
  # Load the given object from the filesystem.
134
- # @param id [Fixnum or Bignum] object ID
140
+ # @param id [Integer] object ID
135
141
  # @return [Hash] Object as defined by PEROBS::ObjectBase or nil if ID does
136
142
  # not exist
137
143
  def get_object(id)
@@ -142,6 +148,11 @@ module PEROBS
142
148
  end
143
149
  end
144
150
 
151
+ # @return [Integer] Number of objects stored in the DB.
152
+ def item_counter
153
+ @flat_file.item_counter
154
+ end
155
+
145
156
  # This method must be called to initiate the marking process.
146
157
  def clear_marks
147
158
  @flat_file.clear_all_marks
@@ -149,19 +160,19 @@ module PEROBS
149
160
 
150
161
  # Permanently delete all objects that have not been marked. Those are
151
162
  # orphaned and are no longer referenced by any actively used object.
152
- # @return [Array] List of IDs that have been removed from the DB.
153
- def delete_unmarked_objects
154
- @flat_file.delete_unmarked_objects
163
+ # @return [Integer] Number of the removed objects from the DB.
164
+ def delete_unmarked_objects(&block)
165
+ @flat_file.delete_unmarked_objects(&block)
155
166
  end
156
167
 
157
168
  # Mark an object.
158
- # @param id [Fixnum or Bignum] ID of the object to mark
169
+ # @param id [Integer] ID of the object to mark
159
170
  def mark(id)
160
171
  @flat_file.mark_obj_by_id(id)
161
172
  end
162
173
 
163
174
  # Check if the object is marked.
164
- # @param id [Fixnum or Bignum] ID of the object to check
175
+ # @param id [Integer] ID of the object to check
165
176
  # @param ignore_errors [Boolean] If set to true no errors will be raised
166
177
  # for non-existing objects.
167
178
  def is_marked?(id, ignore_errors = false)
@@ -173,11 +184,15 @@ module PEROBS
173
184
  # repaired.
174
185
  # @return number of errors found
175
186
  def check_db(repair = false)
176
- @flat_file.check(repair)
187
+ if repair
188
+ @flat_file.repair
189
+ else
190
+ @flat_file.check
191
+ end
177
192
  end
178
193
 
179
194
  # Check if the stored object is syntactically correct.
180
- # @param id [Fixnum/Bignum] Object ID
195
+ # @param id [Integer] Object ID
181
196
  # @param repair [TrueClass/FalseClass] True if an repair attempt should be
182
197
  # made.
183
198
  # @return [TrueClass/FalseClass] True if the object is OK, otherwise
@@ -202,7 +217,7 @@ module PEROBS
202
217
  # Store the given serialized object into the cluster files. This method is
203
218
  # for internal use only!
204
219
  # @param raw [String] Serialized Object as defined by PEROBS::ObjectBase
205
- # @param id [Fixnum or Bignum] Object ID
220
+ # @param id [Integer] Object ID
206
221
  def put_raw_object(raw, id)
207
222
  @flat_file.write_obj_by_id(id, raw)
208
223
  end
@@ -221,7 +236,8 @@ module PEROBS
221
236
  "'#{version_file}': " + e.message
222
237
  end
223
238
  else
224
- # Early versions of PEROBS did not have a version file.
239
+ # The DB is brand new.
240
+ version = VERSION
225
241
  write_version_file(version_file)
226
242
  end
227
243
 
@@ -229,25 +245,40 @@ module PEROBS
229
245
  PEROBS.log.fatal "Cannot downgrade the FlatFile database from " +
230
246
  "version #{version} to version #{VERSION}"
231
247
  end
232
-
233
- if version == 1
234
- # Version 1 had no support for data compression. Make sure all entries
235
- # are compressed to save space.
236
- open
237
- @flat_file.refresh
238
- close
248
+ if version < 3
249
+ PEROBS.log.fatal "The upgrade of this version of the PEROBS database " +
250
+ "is not supported by this version of PEROBS. Please try an earlier " +
251
+ "version of PEROBS to upgrade the database before using this version."
239
252
  end
240
253
 
241
- # After a successful upgrade change the version number in the DB as
242
- # well.
243
- if version < VERSION
254
+ # Version upgrades must be done one version number at a time. If the
255
+ # existing DB is multiple versions older than what the current PEROBS
256
+ # version expects than multiple upgrade runs will be needed.
257
+ while version < VERSION
258
+ if version == 3
259
+ PEROBS.log.warn "Updating FlatFileDB #{@db_dir} from version 3 to " +
260
+ "version 4 ..."
261
+ # Version 4 adds checksums for blob file headers. We have to convert
262
+ # the blob file to include the checksums.
263
+ FlatFile.insert_header_checksums(@db_dir)
264
+ open
265
+ @flat_file.regenerate_index_and_spaces
266
+ close
267
+ end
268
+
269
+ # After a successful upgrade change the version number in the DB as
270
+ # well.
244
271
  write_version_file(version_file)
245
272
  PEROBS.log.warn "Update of FlatFileDB '#{@db_dir}' from version " +
246
- "#{version} to version #{VERSION} completed"
273
+ "#{version} to version #{version + 1} completed"
274
+
275
+ # Update version variable to new version.
276
+ version += 1
247
277
  end
248
278
  end
249
279
 
250
280
  def write_version_file(version_file)
281
+
251
282
  begin
252
283
  RobustFile.write(version_file, VERSION)
253
284
  rescue IOError => e