perobs 3.0.1 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +19 -18
  3. data/lib/perobs.rb +2 -0
  4. data/lib/perobs/Array.rb +68 -21
  5. data/lib/perobs/BTree.rb +110 -54
  6. data/lib/perobs/BTreeBlob.rb +14 -13
  7. data/lib/perobs/BTreeDB.rb +11 -10
  8. data/lib/perobs/BTreeNode.rb +551 -197
  9. data/lib/perobs/BTreeNodeCache.rb +10 -8
  10. data/lib/perobs/BTreeNodeLink.rb +11 -1
  11. data/lib/perobs/BigArray.rb +285 -0
  12. data/lib/perobs/BigArrayNode.rb +1002 -0
  13. data/lib/perobs/BigHash.rb +246 -0
  14. data/lib/perobs/BigTree.rb +197 -0
  15. data/lib/perobs/BigTreeNode.rb +873 -0
  16. data/lib/perobs/Cache.rb +47 -22
  17. data/lib/perobs/ClassMap.rb +2 -2
  18. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  19. data/lib/perobs/DataBase.rb +4 -3
  20. data/lib/perobs/DynamoDB.rb +62 -20
  21. data/lib/perobs/EquiBlobsFile.rb +174 -59
  22. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  23. data/lib/perobs/FlatFile.rb +536 -242
  24. data/lib/perobs/FlatFileBlobHeader.rb +120 -84
  25. data/lib/perobs/FlatFileDB.rb +58 -27
  26. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  27. data/lib/perobs/Hash.rb +129 -35
  28. data/lib/perobs/IDList.rb +144 -0
  29. data/lib/perobs/IDListPage.rb +107 -0
  30. data/lib/perobs/IDListPageFile.rb +180 -0
  31. data/lib/perobs/IDListPageRecord.rb +142 -0
  32. data/lib/perobs/LockFile.rb +3 -0
  33. data/lib/perobs/Object.rb +28 -20
  34. data/lib/perobs/ObjectBase.rb +53 -10
  35. data/lib/perobs/PersistentObjectCache.rb +142 -0
  36. data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
  37. data/lib/perobs/ProgressMeter.rb +97 -0
  38. data/lib/perobs/SpaceManager.rb +273 -0
  39. data/lib/perobs/SpaceTree.rb +63 -47
  40. data/lib/perobs/SpaceTreeNode.rb +134 -115
  41. data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
  42. data/lib/perobs/StackFile.rb +1 -1
  43. data/lib/perobs/Store.rb +180 -70
  44. data/lib/perobs/version.rb +1 -1
  45. data/perobs.gemspec +4 -4
  46. data/test/Array_spec.rb +48 -39
  47. data/test/BTreeDB_spec.rb +2 -2
  48. data/test/BTree_spec.rb +50 -1
  49. data/test/BigArray_spec.rb +261 -0
  50. data/test/BigHash_spec.rb +152 -0
  51. data/test/BigTreeNode_spec.rb +153 -0
  52. data/test/BigTree_spec.rb +259 -0
  53. data/test/EquiBlobsFile_spec.rb +105 -5
  54. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  55. data/test/FlatFileDB_spec.rb +199 -15
  56. data/test/FuzzyStringMatcher_spec.rb +261 -0
  57. data/test/Hash_spec.rb +27 -16
  58. data/test/IDList_spec.rb +77 -0
  59. data/test/LegacyDBs/LegacyDB.rb +155 -0
  60. data/test/LegacyDBs/version_3/class_map.json +1 -0
  61. data/test/LegacyDBs/version_3/config.json +1 -0
  62. data/test/LegacyDBs/version_3/database.blobs +0 -0
  63. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  64. data/test/LegacyDBs/version_3/index.blobs +0 -0
  65. data/test/LegacyDBs/version_3/version +1 -0
  66. data/test/LockFile_spec.rb +9 -6
  67. data/test/Object_spec.rb +5 -5
  68. data/test/SpaceManager_spec.rb +176 -0
  69. data/test/SpaceTree_spec.rb +27 -9
  70. data/test/Store_spec.rb +353 -206
  71. data/test/perobs_spec.rb +7 -3
  72. data/test/spec_helper.rb +9 -4
  73. metadata +59 -16
  74. data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
  75. data/lib/perobs/TreeDB.rb +0 -277
@@ -33,7 +33,7 @@ module PEROBS
33
33
  #
34
34
  # 1 Byte: Flags byte.
35
35
  # Bit 0: 0 deleted entry, 1 valid entry
36
- # Bit 1: 0 unmarked, 1 marked
36
+ # Bit 1: 0 reserved, must be 0
37
37
  # Bit 2: 0 uncompressed data, 1 compressed data
38
38
  # Bit 3: 0 current entry, 1 outdated entry
39
39
  # Bit 4 - 7: reserved, must be 0
@@ -48,21 +48,21 @@ module PEROBS
48
48
  # The 'pack()' format of the header.
49
49
  FORMAT = 'CQQL'
50
50
  # The length of the header in bytes.
51
- LENGTH = 21
51
+ LENGTH = 25
52
52
  VALID_FLAG_BIT = 0
53
- MARK_FLAG_BIT = 1
54
53
  COMPRESSED_FLAG_BIT = 2
55
54
  OUTDATED_FLAG_BIT = 3
56
55
 
57
56
  attr_reader :addr, :flags, :length, :id, :crc
57
+ attr_accessor :corruption_start
58
58
 
59
59
  # Create a new FlatFileBlobHeader with the given flags, length, id and crc.
60
60
  # @param file [File] the FlatFile that contains the header
61
61
  # @param addr [Integer] the offset address of the header in the file
62
- # @param flags [Fixnum] 8 bit number, see above
63
- # @param length [Fixnum] length of the header in bytes
62
+ # @param flags [Integer] 8 bit number, see above
63
+ # @param length [Integer] length of the header in bytes
64
64
  # @param id [Integer] ID of the blob entry
65
- # @param crc [Fixnum] CRC32 checksum of the blob entry
65
+ # @param crc [Integer] CRC32 checksum of the blob entry
66
66
  def initialize(file, addr, flags, length, id, crc)
67
67
  @file = file
68
68
  @addr = addr
@@ -70,64 +70,136 @@ module PEROBS
70
70
  @length = length
71
71
  @id = id
72
72
  @crc = crc
73
+ # This is only set if the header is preceded by a corrupted blob.
74
+ @corruption_start = nil
73
75
  end
74
76
 
75
77
  # Read the header from the given File.
76
78
  # @param file [File]
77
- # @return FlatFileBlobHeader
78
- def FlatFileBlobHeader::read(file)
79
- begin
80
- addr = file.pos
81
- buf = file.read(LENGTH)
82
- rescue IOError => e
83
- PEROBS.log.error "Cannot read blob header in flat file DB: #{e.message}"
84
- return nil
79
+ # @param addr [Integer] address in the file to start reading. If no
80
+ # address is specified use the current position in the file.
81
+ # @param id [Integer] Optional ID that the header should have. If no id is
82
+ # specified there is no check against the actual ID done.
83
+ # @return FlatFileBlobHeader or nil if there are no more blobs to read in
84
+ # the file.
85
+ def FlatFileBlobHeader::read(file, addr = nil, id = nil)
86
+ # If an address was specified we expect the read to always succeed. If
87
+ # no address is specified and we can't read the header we generate an
88
+ # error message but it is not fatal.
89
+ errors_are_fatal = !addr.nil?
90
+
91
+ mode = :searching_next_header
92
+ addr = file.pos unless addr
93
+ buf = nil
94
+ corruption_start = nil
95
+
96
+ loop do
97
+ buf_with_crc = nil
98
+ begin
99
+ file.seek(addr)
100
+ buf_with_crc = file.read(LENGTH)
101
+ rescue IOError => e
102
+ if errors_are_fatal
103
+ PEROBS.log.fatal "Cannot read blob header in flat file DB at " +
104
+ "address #{addr}: #{e.message}"
105
+ else
106
+ PEROBS.log.error "Cannot read blob header in flat file DB: " +
107
+ e.message
108
+ return nil
109
+ end
110
+ end
111
+
112
+ # Did we read anything?
113
+ if buf_with_crc.nil?
114
+ if errors_are_fatal
115
+ PEROBS.log.fatal "Cannot read blob header " +
116
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
117
+ else
118
+ if corruption_start
119
+ PEROBS.log.error "Corruption found at end of blob file at " +
120
+ "address #{addr}"
121
+ end
122
+ # We have reached the end of the file.
123
+ return nil
124
+ end
125
+ end
126
+
127
+ # Did we get the full header?
128
+ if buf_with_crc.length != LENGTH
129
+ msg = "Incomplete FlatFileBlobHeader: Only " +
130
+ "#{buf_with_crc.length} " +
131
+ "bytes of #{LENGTH} could be read "
132
+ "#{id ? "for ID #{id} " : ''}at address #{addr}"
133
+ if errors_are_fatal
134
+ PEROBS.log.fatal msg
135
+ else
136
+ PEROBS.log.error msg
137
+ end
138
+ return nil
139
+ end
140
+
141
+ # Check the CRC of the header
142
+ buf = buf_with_crc[0..-5]
143
+ crc = buf_with_crc[-4..-1].unpack('L')[0]
144
+
145
+ if (read_crc = Zlib.crc32(buf, 0)) == crc
146
+ # We have found a valid header.
147
+ if corruption_start
148
+ PEROBS.log.error "FlatFile corruption ends at #{addr}. " +
149
+ "#{addr - corruption_start} bytes skipped. Some data may " +
150
+ "not be recoverable."
151
+ end
152
+ break
153
+ else
154
+ if errors_are_fatal
155
+ PEROBS.log.fatal "FlatFile Header CRC mismatch at address " +
156
+ "#{addr}. Header CRC is #{'%08x' % read_crc} but should be " +
157
+ "#{'%08x' % crc}."
158
+ else
159
+ if corruption_start.nil?
160
+ if errors_are_fatal
161
+ PEROBS.log.fatal "FlatFile corruption found. The FlatFile " +
162
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
163
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}."
164
+ else
165
+ PEROBS.log.error "FlatFile corruption found. The FlatFile " +
166
+ "Header CRC mismatch at address #{addr}. Header CRC is " +
167
+ "#{'%08x' % read_crc} but should be #{'%08x' % crc}. " +
168
+ "Trying to find the next header."
169
+ end
170
+ corruption_start = addr
171
+ end
172
+ # The blob file is corrupted. There is no valid header at the
173
+ # current position in the file. We now try to find the next valid
174
+ # header by iterating over the remainder of the file advanding one
175
+ # byte with each step until we hit the end of the file or find the
176
+ # next valid header.
177
+ addr += 1
178
+ end
179
+ end
85
180
  end
86
181
 
87
- return nil unless buf
88
-
89
- if buf.length != LENGTH
90
- PEROBS.log.error "Incomplete FlatFileBlobHeader: Only #{buf.length} " +
91
- "bytes of #{LENGTH} could be read"
92
- return nil
182
+ header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
183
+ if corruption_start
184
+ header.corruption_start = corruption_start
93
185
  end
94
186
 
95
- FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
96
- end
97
-
98
- # Read the header from the given File.
99
- # @param file [File]
100
- # @param addr [Integer] address in the file to start reading
101
- # @param id [Integer] Optional ID that the header should have
102
- # @return FlatFileBlobHeader
103
- def FlatFileBlobHeader::read_at(file, addr, id = nil)
104
- buf = nil
105
- begin
106
- file.seek(addr)
107
- buf = file.read(LENGTH)
108
- rescue IOError => e
109
- PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
110
- end
111
- if buf.nil? || buf.length != LENGTH
112
- PEROBS.log.fatal "Cannot read blob header " +
113
- "#{id ? "for ID #{id} " : ''}at address " +
114
- "#{addr}"
115
- end
116
- header = FlatFileBlobHeader.new(file, addr, *buf.unpack(FORMAT))
117
187
  if id && header.id != id
118
188
  PEROBS.log.fatal "Mismatch between FlatFile index and blob file " +
119
- "found for entry with ID #{id}/#{header.id}"
189
+ "found. FlatFile has entry with ID #{header.id} at address " +
190
+ "#{addr}. Index has ID #{id} for this address."
120
191
  end
121
192
 
122
193
  return header
123
194
  end
124
195
 
125
196
  # Write the header to a given File.
126
- # @param file [File]
127
197
  def write
128
198
  begin
199
+ buf = [ @flags, @length, @id, @crc].pack(FORMAT)
200
+ crc = Zlib.crc32(buf, 0)
129
201
  @file.seek(@addr)
130
- @file.write([ @flags, @length, @id, @crc].pack(FORMAT))
202
+ @file.write(buf + [ crc ].pack('L'))
131
203
  rescue IOError => e
132
204
  PEROBS.log.fatal "Cannot write blob header into flat file DB: " +
133
205
  e.message
@@ -135,17 +207,9 @@ module PEROBS
135
207
  end
136
208
 
137
209
  # Reset all the flags bit to 0. This marks the blob as invalid.
138
- # @param file [File] The file handle of the blob file.
139
- # @param addr [Integer] The address of the header
140
210
  def clear_flags
141
- begin
142
- @file.seek(@addr)
143
- @file.write([ 0 ].pack('C'))
144
- @file.flush
145
- rescue IOError => e
146
- PEROBS.log.fatal "Clearing flags of FlatFileBlobHeader with ID " +
147
- "#{@id} failed: #{e.message}"
148
- end
211
+ @flags = 0
212
+ write
149
213
  end
150
214
 
151
215
  # Return true if the header is for a non-empty blob.
@@ -153,23 +217,6 @@ module PEROBS
153
217
  bit_set?(VALID_FLAG_BIT)
154
218
  end
155
219
 
156
- # Return true if the blob has been marked.
157
- def is_marked?
158
- bit_set?(MARK_FLAG_BIT)
159
- end
160
-
161
- # Set the mark bit.
162
- def set_mark_flag
163
- set_flag(MARK_FLAG_BIT)
164
- write_flags
165
- end
166
-
167
- # Clear the mark bit.
168
- def clear_mark_flag
169
- clear_flag(MARK_FLAG_BIT)
170
- write_flags
171
- end
172
-
173
220
  # Return true if the blob contains compressed data.
174
221
  def is_compressed?
175
222
  bit_set?(COMPRESSED_FLAG_BIT)
@@ -179,7 +226,7 @@ module PEROBS
179
226
  # transaction has been completed.
180
227
  def set_outdated_flag
181
228
  set_flag(OUTDATED_FLAG_BIT)
182
- write_flags
229
+ write
183
230
  end
184
231
 
185
232
  # Return true if the blob contains outdated data.
@@ -189,17 +236,6 @@ module PEROBS
189
236
 
190
237
  private
191
238
 
192
- def write_flags
193
- begin
194
- @file.seek(@addr)
195
- @file.write([ @flags ].pack('C'))
196
- @file.flush
197
- rescue IOError => e
198
- PEROBS.log.fatal "Writing flags of FlatFileBlobHeader with ID #{@id} " +
199
- "failed: #{e.message}"
200
- end
201
- end
202
-
203
239
  def bit_set?(n)
204
240
  mask = 1 << n
205
241
  @flags & mask == mask
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = FlatFileDB.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -41,7 +42,7 @@ module PEROBS
41
42
 
42
43
  # This version number increases whenever the on-disk format changes in a
43
44
  # way that requires conversion actions after an update.
44
- VERSION = 2
45
+ VERSION = 4
45
46
 
46
47
  attr_reader :max_blob_size
47
48
 
@@ -50,13 +51,17 @@ module PEROBS
50
51
  # @param options [Hash] options to customize the behavior. Currently only
51
52
  # the following options are supported:
52
53
  # :serializer : Can be :marshal, :json, :yaml
54
+ # :progressmeter : Reference to a ProgressMeter object
55
+ # :log : IO that should be used for logging
56
+ # :log_level : Minimum Logger level to log
53
57
  def initialize(db_name, options = {})
54
- super(options[:serializer] || :json)
58
+ super(options)
55
59
 
56
60
  @db_dir = db_name
57
61
  # Create the database directory if it doesn't exist yet.
58
62
  ensure_dir_exists(@db_dir)
59
- PEROBS.log.open(File.join(@db_dir, 'log'))
63
+ PEROBS.log.level = options[:log_level] if options[:log_level]
64
+ PEROBS.log.open(options[:log] || File.join(@db_dir, 'log'))
60
65
  check_version_and_upgrade
61
66
 
62
67
  # Read the existing DB config.
@@ -68,7 +73,7 @@ module PEROBS
68
73
 
69
74
  # Open the FlatFileDB for transactions.
70
75
  def open
71
- @flat_file = FlatFile.new(@db_dir)
76
+ @flat_file = FlatFile.new(@db_dir, @progressmeter)
72
77
  @flat_file.open
73
78
  PEROBS.log.info "FlatFile '#{@db_dir}' opened"
74
79
  end
@@ -87,11 +92,12 @@ module PEROBS
87
92
  end
88
93
 
89
94
  def FlatFileDB::delete_db(db_name)
95
+ close
90
96
  FileUtils.rm_rf(db_name)
91
97
  end
92
98
 
93
99
  # Return true if the object with given ID exists
94
- # @param id [Fixnum or Bignum]
100
+ # @param id [Integer]
95
101
  def include?(id)
96
102
  !@flat_file.find_obj_addr_by_id(id).nil?
97
103
  end
@@ -131,7 +137,7 @@ module PEROBS
131
137
  end
132
138
 
133
139
  # Load the given object from the filesystem.
134
- # @param id [Fixnum or Bignum] object ID
140
+ # @param id [Integer] object ID
135
141
  # @return [Hash] Object as defined by PEROBS::ObjectBase or nil if ID does
136
142
  # not exist
137
143
  def get_object(id)
@@ -142,6 +148,11 @@ module PEROBS
142
148
  end
143
149
  end
144
150
 
151
+ # @return [Integer] Number of objects stored in the DB.
152
+ def item_counter
153
+ @flat_file.item_counter
154
+ end
155
+
145
156
  # This method must be called to initiate the marking process.
146
157
  def clear_marks
147
158
  @flat_file.clear_all_marks
@@ -149,19 +160,19 @@ module PEROBS
149
160
 
150
161
  # Permanently delete all objects that have not been marked. Those are
151
162
  # orphaned and are no longer referenced by any actively used object.
152
- # @return [Array] List of IDs that have been removed from the DB.
153
- def delete_unmarked_objects
154
- @flat_file.delete_unmarked_objects
163
+ # @return [Integer] Number of the removed objects from the DB.
164
+ def delete_unmarked_objects(&block)
165
+ @flat_file.delete_unmarked_objects(&block)
155
166
  end
156
167
 
157
168
  # Mark an object.
158
- # @param id [Fixnum or Bignum] ID of the object to mark
169
+ # @param id [Integer] ID of the object to mark
159
170
  def mark(id)
160
171
  @flat_file.mark_obj_by_id(id)
161
172
  end
162
173
 
163
174
  # Check if the object is marked.
164
- # @param id [Fixnum or Bignum] ID of the object to check
175
+ # @param id [Integer] ID of the object to check
165
176
  # @param ignore_errors [Boolean] If set to true no errors will be raised
166
177
  # for non-existing objects.
167
178
  def is_marked?(id, ignore_errors = false)
@@ -173,11 +184,15 @@ module PEROBS
173
184
  # repaired.
174
185
  # @return number of errors found
175
186
  def check_db(repair = false)
176
- @flat_file.check(repair)
187
+ if repair
188
+ @flat_file.repair
189
+ else
190
+ @flat_file.check
191
+ end
177
192
  end
178
193
 
179
194
  # Check if the stored object is syntactically correct.
180
- # @param id [Fixnum/Bignum] Object ID
195
+ # @param id [Integer] Object ID
181
196
  # @param repair [TrueClass/FalseClass] True if an repair attempt should be
182
197
  # made.
183
198
  # @return [TrueClass/FalseClass] True if the object is OK, otherwise
@@ -202,7 +217,7 @@ module PEROBS
202
217
  # Store the given serialized object into the cluster files. This method is
203
218
  # for internal use only!
204
219
  # @param raw [String] Serialized Object as defined by PEROBS::ObjectBase
205
- # @param id [Fixnum or Bignum] Object ID
220
+ # @param id [Integer] Object ID
206
221
  def put_raw_object(raw, id)
207
222
  @flat_file.write_obj_by_id(id, raw)
208
223
  end
@@ -221,7 +236,8 @@ module PEROBS
221
236
  "'#{version_file}': " + e.message
222
237
  end
223
238
  else
224
- # Early versions of PEROBS did not have a version file.
239
+ # The DB is brand new.
240
+ version = VERSION
225
241
  write_version_file(version_file)
226
242
  end
227
243
 
@@ -229,25 +245,40 @@ module PEROBS
229
245
  PEROBS.log.fatal "Cannot downgrade the FlatFile database from " +
230
246
  "version #{version} to version #{VERSION}"
231
247
  end
232
-
233
- if version == 1
234
- # Version 1 had no support for data compression. Make sure all entries
235
- # are compressed to save space.
236
- open
237
- @flat_file.refresh
238
- close
248
+ if version < 3
249
+ PEROBS.log.fatal "The upgrade of this version of the PEROBS database " +
250
+ "is not supported by this version of PEROBS. Please try an earlier " +
251
+ "version of PEROBS to upgrade the database before using this version."
239
252
  end
240
253
 
241
- # After a successful upgrade change the version number in the DB as
242
- # well.
243
- if version < VERSION
254
+ # Version upgrades must be done one version number at a time. If the
255
+ # existing DB is multiple versions older than what the current PEROBS
256
+ # version expects than multiple upgrade runs will be needed.
257
+ while version < VERSION
258
+ if version == 3
259
+ PEROBS.log.warn "Updating FlatFileDB #{@db_dir} from version 3 to " +
260
+ "version 4 ..."
261
+ # Version 4 adds checksums for blob file headers. We have to convert
262
+ # the blob file to include the checksums.
263
+ FlatFile.insert_header_checksums(@db_dir)
264
+ open
265
+ @flat_file.regenerate_index_and_spaces
266
+ close
267
+ end
268
+
269
+ # After a successful upgrade change the version number in the DB as
270
+ # well.
244
271
  write_version_file(version_file)
245
272
  PEROBS.log.warn "Update of FlatFileDB '#{@db_dir}' from version " +
246
- "#{version} to version #{VERSION} completed"
273
+ "#{version} to version #{version + 1} completed"
274
+
275
+ # Update version variable to new version.
276
+ version += 1
247
277
  end
248
278
  end
249
279
 
250
280
  def write_version_file(version_file)
281
+
251
282
  begin
252
283
  RobustFile.write(version_file, VERSION)
253
284
  rescue IOError => e