perobs 2.2.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 862c67d14741c0fe0145af7c5eb1c1c147ebe189
4
- data.tar.gz: c88f1c19c16db2c6b3f2fd046cbb4d41e9fce2c3
3
+ metadata.gz: 0a8b4212f5d64ee97ea6d7592236792f51aac734
4
+ data.tar.gz: 6ed42c80925703e1f6224506ce0b04ef468699c2
5
5
  SHA512:
6
- metadata.gz: 11fd013bb0da3088a4ed88dfac390aac5b8cef823fd2bdeb06ab8c7ad4fedbde997365e0f135bae63b72acf5ee9ad3ac10f5640ac6b703785876514be3a480f1
7
- data.tar.gz: d13b9dd12084975bc5d6e84e6fcc0f91b3f455e56d4532c7a37983c53dd1054db211e8244c8317da5f0cb85e32ce4ce01564e9a3684b442c01bfdf889735a393
6
+ metadata.gz: 00439aa93cf50d9d06b7eb9986c5eaaac67c012c6ece983445de014d80c40b1234313b4a24e2092a5e5c9c48fe83f3ceeefdb60487d0de016dcf54741d374639
7
+ data.tar.gz: bb202f217c3a0a2529d40277c56ede5cbd3d020203b72580cef04dad09b853f1d6607ccd7cceea83d9f9d568d1d86b2783f51d346ebc98e53ba1c7c7d9c05b95
@@ -25,6 +25,7 @@
25
25
  # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
26
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
27
 
28
+ require 'zlib'
28
29
 
29
30
  module PEROBS
30
31
 
@@ -32,6 +33,9 @@ module PEROBS
32
33
  # HashedBlobsDB object.
33
34
  class BTreeBlob
34
35
 
36
+ # Magic number used for index files.
37
+ PEROBS_MAGIC = 0xB78EEDB
38
+
35
39
  # For performance reasons we use an Array for the entries instead of a
36
40
  # Hash. These constants specify the Array index for the corresponding
37
41
  # value.
@@ -42,6 +46,8 @@ module PEROBS
42
46
  START = 2
43
47
  # Mark/Unmarked flag
44
48
  MARKED = 3
49
+ # CRC Checksum of the data blobA
50
+ CRC = 4
45
51
 
46
52
  # Create a new BTreeBlob object.
47
53
  # @param dir [String] Fully qualified directory name
@@ -68,7 +74,8 @@ module PEROBS
68
74
  @btreedb.put_raw_object(raw, id)
69
75
  else
70
76
  bytes = raw.bytesize
71
- start_address = reserve_bytes(id, bytes)
77
+ crc32 = Zlib.crc32(raw, 0)
78
+ start_address = reserve_bytes(id, bytes, crc32)
72
79
  if write_to_blobs_file(raw, start_address) != bytes
73
80
  raise RuntimeError, 'Object length does not match written bytes'
74
81
  end
@@ -80,22 +87,16 @@ module PEROBS
80
87
  # @param id [Fixnum or Bignum] ID
81
88
  # @return [String] sequence of bytes or nil if ID is unknown
82
89
  def read_object(id)
83
- return nil unless (bytes_and_start = find(id))
84
- read_from_blobs_file(*bytes_and_start)
90
+ return nil unless (index_entry = find(id))
91
+ read_from_blobs_file(index_entry)
85
92
  end
86
93
 
87
-
88
94
  # Find the data for the object with given id.
89
95
  # @param id [Fixnum or Bignum] Object ID
90
- # @return [Array] Returns an Array with two Fixnum entries. The first is
91
- # the number of bytes and the second is the starting offset in the
92
- # blob storage file.
96
+ # @return [Array] Returns an Array that represents the index entry for the
97
+ # given object.
93
98
  def find(id)
94
- if (entry = @entries_by_id[id])
95
- return [ entry[BYTES], entry[START] ]
96
- end
97
-
98
- nil
99
+ @entries_by_id[id]
99
100
  end
100
101
 
101
102
  # Clear the mark on all entries in the index.
@@ -214,15 +215,22 @@ module PEROBS
214
215
  end
215
216
 
216
217
  # Read _bytes_ bytes from the file starting at offset _address_.
217
- # @param bytes [Fixnum] number of bytes to read
218
- # @param address [Fixnum] offset in the file
219
- def read_from_blobs_file(bytes, address)
218
+ # @param entry [Array] Index entry for the object
219
+ # @return [String] Raw bytes of the blob.
220
+ def read_from_blobs_file(entry)
220
221
  begin
221
- File.read(@blobs_file_name, bytes, address)
222
+ raw = File.read(@blobs_file_name, entry[BYTES], entry[START])
222
223
  rescue => e
223
224
  raise IOError,
224
225
  "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
225
226
  end
227
+ if Zlib.crc32(raw, 0) != entry[CRC]
228
+ raise RuntimeError,
229
+ "BTreeBlob for object #{entry[ID]} has been corrupted: " +
230
+ "Checksum mismatch"
231
+ end
232
+
233
+ raw
226
234
  end
227
235
 
228
236
  # Reserve the bytes needed for the specified number of bytes with the
@@ -230,7 +238,7 @@ module PEROBS
230
238
  # @param id [Fixnum or Bignum] ID of the entry
231
239
  # @param bytes [Fixnum] number of bytes for this entry
232
240
  # @return [Fixnum] the start address of the reserved blob
233
- def reserve_bytes(id, bytes)
241
+ def reserve_bytes(id, bytes, crc32)
234
242
  # index of first blob after the last seen entry
235
243
  end_of_last_entry = 0
236
244
  # blob index of best fit segment
@@ -272,7 +280,7 @@ module PEROBS
272
280
  # Object reads can trigger creation of new objects. As the marking
273
281
  # process triggers reads as well, all newly created objects are always
274
282
  # marked to prevent them from being collected right after creation.
275
- entry = [ id, bytes, best_fit_start || end_of_last_entry, 1 ]
283
+ entry = [ id, bytes, best_fit_start || end_of_last_entry, 1, crc32 ]
276
284
  @entries.insert(best_fit_index, entry)
277
285
  @entries_by_id[id] = entry
278
286
 
@@ -285,17 +293,57 @@ module PEROBS
285
293
  # a plan Array. @entries_by_id stores them hashed by their ID.
286
294
  @entries = []
287
295
  @entries_by_id = {}
296
+ entry_bytes = 29
297
+ entry_format = 'QQQCL'
298
+ restore_crc = false
288
299
  if File.exists?(@index_file_name)
289
300
  begin
290
301
  File.open(@index_file_name, 'rb') do |f|
291
- # The index is a binary format. Each entry has exactly 25 bytes.
302
+ # Since version 2.3.0, all index files start with a header.
303
+ # Earlier versions did not yet have this header. The header is 24
304
+ # bytes long. The 2nd set of 8 bytes must be 0 to distinguish the
305
+ # header from regular entries. The first 8 bytes are a magic
306
+ # number and the 3rd 8 bytes mark the schema version. We are
307
+ # currently at version 1.
308
+ if f.size >= 24
309
+ header = f.read(24).unpack('QQQ')
310
+ if header[0] != PEROBS_MAGIC && header[1] != 0
311
+ # These are the settings for the pre 2.3.0 entry format.
312
+ entry_bytes = 25
313
+ entry_format = 'QQQC'
314
+ restore_crc = true
315
+ # Rewind to start as we have an older version index file that
316
+ # has no header.
317
+ f.seek(0)
318
+ end
319
+ end
320
+
321
+ # The index is a binary format. Each entry has exactly 29 bytes.
322
+ # Version 2.2.0 and earlier did not have the CRC field. To ensure
323
+ # backwards compatibility with older databases, we reconstruct the
324
+ # CRC for older index files and convert it to the new format on
325
+ # the next index write.
326
+ #
292
327
  # Bytes
293
328
  # 0 - 7 : 64 bits, little endian : ID
294
329
  # 8 - 15 : 64 bits, little endian : Entry length in bytes
295
330
  # 16 - 23 : 64 bits, little endian : Start address in data file
296
331
  # 24 : 8 bits : 0 if unmarked, 1 if marked
297
- while (bytes = f.read(25))
298
- @entries << (e = bytes.unpack('QQQC'))
332
+ # 25 - 29 : 32 bits, CRC32 checksum of the data blob
333
+ while (bytes = f.read(entry_bytes))
334
+ e = bytes.unpack(entry_format)
335
+ if restore_crc
336
+ # If the index file was written with version <= 2.2.0 we have
337
+ # to compute the CRC from the data blob.
338
+ begin
339
+ raw = File.read(@blobs_file_name, e[BYTES], e[START])
340
+ rescue => e
341
+ raise IOError,
342
+ "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
343
+ end
344
+ e[CRC] = Zlib.crc32(raw)
345
+ end
346
+ @entries << e
299
347
  @entries_by_id[e[ID]] = e
300
348
  end
301
349
  end
@@ -310,8 +358,9 @@ module PEROBS
310
358
  begin
311
359
  File.open(@index_file_name, 'wb') do |f|
312
360
  # See read_index for data format documentation.
361
+ f.write([ PEROBS_MAGIC, 0, 1].pack('QQQ'))
313
362
  @entries.each do |entry|
314
- f.write(entry.pack('QQQC'))
363
+ f.write(entry.pack('QQQCL'))
315
364
  end
316
365
  end
317
366
  rescue => e
@@ -329,7 +378,7 @@ module PEROBS
329
378
  # already created the new BTree node, so these entries will be
330
379
  # distributed into new leaf blobs of this new node.
331
380
  @entries.each do |entry|
332
- raw = read_from_blobs_file(entry[BYTES], entry[START])
381
+ raw = read_from_blobs_file(entry)
333
382
  @btreedb.put_raw_object(raw, entry[ID])
334
383
  end
335
384
 
@@ -1,4 +1,4 @@
1
1
  module PEROBS
2
2
  # The version number
3
- VERSION = "2.2.0"
3
+ VERSION = "2.3.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: perobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Schlaeger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-24 00:00:00.000000000 Z
11
+ date: 2016-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler