perobs 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 862c67d14741c0fe0145af7c5eb1c1c147ebe189
4
- data.tar.gz: c88f1c19c16db2c6b3f2fd046cbb4d41e9fce2c3
3
+ metadata.gz: 0a8b4212f5d64ee97ea6d7592236792f51aac734
4
+ data.tar.gz: 6ed42c80925703e1f6224506ce0b04ef468699c2
5
5
  SHA512:
6
- metadata.gz: 11fd013bb0da3088a4ed88dfac390aac5b8cef823fd2bdeb06ab8c7ad4fedbde997365e0f135bae63b72acf5ee9ad3ac10f5640ac6b703785876514be3a480f1
7
- data.tar.gz: d13b9dd12084975bc5d6e84e6fcc0f91b3f455e56d4532c7a37983c53dd1054db211e8244c8317da5f0cb85e32ce4ce01564e9a3684b442c01bfdf889735a393
6
+ metadata.gz: 00439aa93cf50d9d06b7eb9986c5eaaac67c012c6ece983445de014d80c40b1234313b4a24e2092a5e5c9c48fe83f3ceeefdb60487d0de016dcf54741d374639
7
+ data.tar.gz: bb202f217c3a0a2529d40277c56ede5cbd3d020203b72580cef04dad09b853f1d6607ccd7cceea83d9f9d568d1d86b2783f51d346ebc98e53ba1c7c7d9c05b95
@@ -25,6 +25,7 @@
25
25
  # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
26
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
27
 
28
+ require 'zlib'
28
29
 
29
30
  module PEROBS
30
31
 
@@ -32,6 +33,9 @@ module PEROBS
32
33
  # HashedBlobsDB object.
33
34
  class BTreeBlob
34
35
 
36
+ # Magic number used for index files.
37
+ PEROBS_MAGIC = 0xB78EEDB
38
+
35
39
  # For performance reasons we use an Array for the entries instead of a
36
40
  # Hash. These constants specify the Array index for the corresponding
37
41
  # value.
@@ -42,6 +46,8 @@ module PEROBS
42
46
  START = 2
43
47
  # Mark/Unmarked flag
44
48
  MARKED = 3
49
+ # CRC Checksum of the data blobA
50
+ CRC = 4
45
51
 
46
52
  # Create a new BTreeBlob object.
47
53
  # @param dir [String] Fully qualified directory name
@@ -68,7 +74,8 @@ module PEROBS
68
74
  @btreedb.put_raw_object(raw, id)
69
75
  else
70
76
  bytes = raw.bytesize
71
- start_address = reserve_bytes(id, bytes)
77
+ crc32 = Zlib.crc32(raw, 0)
78
+ start_address = reserve_bytes(id, bytes, crc32)
72
79
  if write_to_blobs_file(raw, start_address) != bytes
73
80
  raise RuntimeError, 'Object length does not match written bytes'
74
81
  end
@@ -80,22 +87,16 @@ module PEROBS
80
87
  # @param id [Fixnum or Bignum] ID
81
88
  # @return [String] sequence of bytes or nil if ID is unknown
82
89
  def read_object(id)
83
- return nil unless (bytes_and_start = find(id))
84
- read_from_blobs_file(*bytes_and_start)
90
+ return nil unless (index_entry = find(id))
91
+ read_from_blobs_file(index_entry)
85
92
  end
86
93
 
87
-
88
94
  # Find the data for the object with given id.
89
95
  # @param id [Fixnum or Bignum] Object ID
90
- # @return [Array] Returns an Array with two Fixnum entries. The first is
91
- # the number of bytes and the second is the starting offset in the
92
- # blob storage file.
96
+ # @return [Array] Returns an Array that represents the index entry for the
97
+ # given object.
93
98
  def find(id)
94
- if (entry = @entries_by_id[id])
95
- return [ entry[BYTES], entry[START] ]
96
- end
97
-
98
- nil
99
+ @entries_by_id[id]
99
100
  end
100
101
 
101
102
  # Clear the mark on all entries in the index.
@@ -214,15 +215,22 @@ module PEROBS
214
215
  end
215
216
 
216
217
  # Read _bytes_ bytes from the file starting at offset _address_.
217
- # @param bytes [Fixnum] number of bytes to read
218
- # @param address [Fixnum] offset in the file
219
- def read_from_blobs_file(bytes, address)
218
+ # @param entry [Array] Index entry for the object
219
+ # @return [String] Raw bytes of the blob.
220
+ def read_from_blobs_file(entry)
220
221
  begin
221
- File.read(@blobs_file_name, bytes, address)
222
+ raw = File.read(@blobs_file_name, entry[BYTES], entry[START])
222
223
  rescue => e
223
224
  raise IOError,
224
225
  "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
225
226
  end
227
+ if Zlib.crc32(raw, 0) != entry[CRC]
228
+ raise RuntimeError,
229
+ "BTreeBlob for object #{entry[ID]} has been corrupted: " +
230
+ "Checksum mismatch"
231
+ end
232
+
233
+ raw
226
234
  end
227
235
 
228
236
  # Reserve the bytes needed for the specified number of bytes with the
@@ -230,7 +238,7 @@ module PEROBS
230
238
  # @param id [Fixnum or Bignum] ID of the entry
231
239
  # @param bytes [Fixnum] number of bytes for this entry
232
240
  # @return [Fixnum] the start address of the reserved blob
233
- def reserve_bytes(id, bytes)
241
+ def reserve_bytes(id, bytes, crc32)
234
242
  # index of first blob after the last seen entry
235
243
  end_of_last_entry = 0
236
244
  # blob index of best fit segment
@@ -272,7 +280,7 @@ module PEROBS
272
280
  # Object reads can trigger creation of new objects. As the marking
273
281
  # process triggers reads as well, all newly created objects are always
274
282
  # marked to prevent them from being collected right after creation.
275
- entry = [ id, bytes, best_fit_start || end_of_last_entry, 1 ]
283
+ entry = [ id, bytes, best_fit_start || end_of_last_entry, 1, crc32 ]
276
284
  @entries.insert(best_fit_index, entry)
277
285
  @entries_by_id[id] = entry
278
286
 
@@ -285,17 +293,57 @@ module PEROBS
285
293
  # a plan Array. @entries_by_id stores them hashed by their ID.
286
294
  @entries = []
287
295
  @entries_by_id = {}
296
+ entry_bytes = 29
297
+ entry_format = 'QQQCL'
298
+ restore_crc = false
288
299
  if File.exists?(@index_file_name)
289
300
  begin
290
301
  File.open(@index_file_name, 'rb') do |f|
291
- # The index is a binary format. Each entry has exactly 25 bytes.
302
+ # Since version 2.3.0, all index files start with a header.
303
+ # Earlier versions did not yet have this header. The header is 24
304
+ # bytes long. The 2nd set of 8 bytes must be 0 to distinguish the
305
+ # header from regular entries. The first 8 bytes are a magic
306
+ # number and the 3rd 8 bytes mark the schema version. We are
307
+ # currently at version 1.
308
+ if f.size >= 24
309
+ header = f.read(24).unpack('QQQ')
310
+ if header[0] != PEROBS_MAGIC && header[1] != 0
311
+ # These are the settings for the pre 2.3.0 entry format.
312
+ entry_bytes = 25
313
+ entry_format = 'QQQC'
314
+ restore_crc = true
315
+ # Rewind to start as we have an older version index file that
316
+ # has no header.
317
+ f.seek(0)
318
+ end
319
+ end
320
+
321
+ # The index is a binary format. Each entry has exactly 29 bytes.
322
+ # Version 2.2.0 and earlier did not have the CRC field. To ensure
323
+ # backwards compatibility with older databases, we reconstruct the
324
+ # CRC for older index files and convert it to the new format on
325
+ # the next index write.
326
+ #
292
327
  # Bytes
293
328
  # 0 - 7 : 64 bits, little endian : ID
294
329
  # 8 - 15 : 64 bits, little endian : Entry length in bytes
295
330
  # 16 - 23 : 64 bits, little endian : Start address in data file
296
331
  # 24 : 8 bits : 0 if unmarked, 1 if marked
297
- while (bytes = f.read(25))
298
- @entries << (e = bytes.unpack('QQQC'))
332
+ # 25 - 29 : 32 bits, CRC32 checksum of the data blob
333
+ while (bytes = f.read(entry_bytes))
334
+ e = bytes.unpack(entry_format)
335
+ if restore_crc
336
+ # If the index file was written with version <= 2.2.0 we have
337
+ # to compute the CRC from the data blob.
338
+ begin
339
+ raw = File.read(@blobs_file_name, e[BYTES], e[START])
340
+ rescue => e
341
+ raise IOError,
342
+ "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
343
+ end
344
+ e[CRC] = Zlib.crc32(raw)
345
+ end
346
+ @entries << e
299
347
  @entries_by_id[e[ID]] = e
300
348
  end
301
349
  end
@@ -310,8 +358,9 @@ module PEROBS
310
358
  begin
311
359
  File.open(@index_file_name, 'wb') do |f|
312
360
  # See read_index for data format documentation.
361
+ f.write([ PEROBS_MAGIC, 0, 1].pack('QQQ'))
313
362
  @entries.each do |entry|
314
- f.write(entry.pack('QQQC'))
363
+ f.write(entry.pack('QQQCL'))
315
364
  end
316
365
  end
317
366
  rescue => e
@@ -329,7 +378,7 @@ module PEROBS
329
378
  # already created the new BTree node, so these entries will be
330
379
  # distributed into new leaf blobs of this new node.
331
380
  @entries.each do |entry|
332
- raw = read_from_blobs_file(entry[BYTES], entry[START])
381
+ raw = read_from_blobs_file(entry)
333
382
  @btreedb.put_raw_object(raw, entry[ID])
334
383
  end
335
384
 
@@ -1,4 +1,4 @@
1
1
  module PEROBS
2
2
  # The version number
3
- VERSION = "2.2.0"
3
+ VERSION = "2.3.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: perobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Schlaeger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-24 00:00:00.000000000 Z
11
+ date: 2016-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler