perobs 2.2.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/perobs/BTreeBlob.rb +72 -23
- data/lib/perobs/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a8b4212f5d64ee97ea6d7592236792f51aac734
|
4
|
+
data.tar.gz: 6ed42c80925703e1f6224506ce0b04ef468699c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00439aa93cf50d9d06b7eb9986c5eaaac67c012c6ece983445de014d80c40b1234313b4a24e2092a5e5c9c48fe83f3ceeefdb60487d0de016dcf54741d374639
|
7
|
+
data.tar.gz: bb202f217c3a0a2529d40277c56ede5cbd3d020203b72580cef04dad09b853f1d6607ccd7cceea83d9f9d568d1d86b2783f51d346ebc98e53ba1c7c7d9c05b95
|
data/lib/perobs/BTreeBlob.rb
CHANGED
@@ -25,6 +25,7 @@
|
|
25
25
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
26
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
27
|
|
28
|
+
require 'zlib'
|
28
29
|
|
29
30
|
module PEROBS
|
30
31
|
|
@@ -32,6 +33,9 @@ module PEROBS
|
|
32
33
|
# HashedBlobsDB object.
|
33
34
|
class BTreeBlob
|
34
35
|
|
36
|
+
# Magic number used for index files.
|
37
|
+
PEROBS_MAGIC = 0xB78EEDB
|
38
|
+
|
35
39
|
# For performance reasons we use an Array for the entries instead of a
|
36
40
|
# Hash. These constants specify the Array index for the corresponding
|
37
41
|
# value.
|
@@ -42,6 +46,8 @@ module PEROBS
|
|
42
46
|
START = 2
|
43
47
|
# Mark/Unmarked flag
|
44
48
|
MARKED = 3
|
49
|
+
# CRC Checksum of the data blobA
|
50
|
+
CRC = 4
|
45
51
|
|
46
52
|
# Create a new BTreeBlob object.
|
47
53
|
# @param dir [String] Fully qualified directory name
|
@@ -68,7 +74,8 @@ module PEROBS
|
|
68
74
|
@btreedb.put_raw_object(raw, id)
|
69
75
|
else
|
70
76
|
bytes = raw.bytesize
|
71
|
-
|
77
|
+
crc32 = Zlib.crc32(raw, 0)
|
78
|
+
start_address = reserve_bytes(id, bytes, crc32)
|
72
79
|
if write_to_blobs_file(raw, start_address) != bytes
|
73
80
|
raise RuntimeError, 'Object length does not match written bytes'
|
74
81
|
end
|
@@ -80,22 +87,16 @@ module PEROBS
|
|
80
87
|
# @param id [Fixnum or Bignum] ID
|
81
88
|
# @return [String] sequence of bytes or nil if ID is unknown
|
82
89
|
def read_object(id)
|
83
|
-
return nil unless (
|
84
|
-
read_from_blobs_file(
|
90
|
+
return nil unless (index_entry = find(id))
|
91
|
+
read_from_blobs_file(index_entry)
|
85
92
|
end
|
86
93
|
|
87
|
-
|
88
94
|
# Find the data for the object with given id.
|
89
95
|
# @param id [Fixnum or Bignum] Object ID
|
90
|
-
# @return [Array] Returns an Array
|
91
|
-
#
|
92
|
-
# blob storage file.
|
96
|
+
# @return [Array] Returns an Array that represents the index entry for the
|
97
|
+
# given object.
|
93
98
|
def find(id)
|
94
|
-
|
95
|
-
return [ entry[BYTES], entry[START] ]
|
96
|
-
end
|
97
|
-
|
98
|
-
nil
|
99
|
+
@entries_by_id[id]
|
99
100
|
end
|
100
101
|
|
101
102
|
# Clear the mark on all entries in the index.
|
@@ -214,15 +215,22 @@ module PEROBS
|
|
214
215
|
end
|
215
216
|
|
216
217
|
# Read _bytes_ bytes from the file starting at offset _address_.
|
217
|
-
# @param
|
218
|
-
# @
|
219
|
-
def read_from_blobs_file(
|
218
|
+
# @param entry [Array] Index entry for the object
|
219
|
+
# @return [String] Raw bytes of the blob.
|
220
|
+
def read_from_blobs_file(entry)
|
220
221
|
begin
|
221
|
-
File.read(@blobs_file_name,
|
222
|
+
raw = File.read(@blobs_file_name, entry[BYTES], entry[START])
|
222
223
|
rescue => e
|
223
224
|
raise IOError,
|
224
225
|
"Cannot read blobs file #{@blobs_file_name}: #{e.message}"
|
225
226
|
end
|
227
|
+
if Zlib.crc32(raw, 0) != entry[CRC]
|
228
|
+
raise RuntimeError,
|
229
|
+
"BTreeBlob for object #{entry[ID]} has been corrupted: " +
|
230
|
+
"Checksum mismatch"
|
231
|
+
end
|
232
|
+
|
233
|
+
raw
|
226
234
|
end
|
227
235
|
|
228
236
|
# Reserve the bytes needed for the specified number of bytes with the
|
@@ -230,7 +238,7 @@ module PEROBS
|
|
230
238
|
# @param id [Fixnum or Bignum] ID of the entry
|
231
239
|
# @param bytes [Fixnum] number of bytes for this entry
|
232
240
|
# @return [Fixnum] the start address of the reserved blob
|
233
|
-
def reserve_bytes(id, bytes)
|
241
|
+
def reserve_bytes(id, bytes, crc32)
|
234
242
|
# index of first blob after the last seen entry
|
235
243
|
end_of_last_entry = 0
|
236
244
|
# blob index of best fit segment
|
@@ -272,7 +280,7 @@ module PEROBS
|
|
272
280
|
# Object reads can trigger creation of new objects. As the marking
|
273
281
|
# process triggers reads as well, all newly created objects are always
|
274
282
|
# marked to prevent them from being collected right after creation.
|
275
|
-
entry = [ id, bytes, best_fit_start || end_of_last_entry, 1 ]
|
283
|
+
entry = [ id, bytes, best_fit_start || end_of_last_entry, 1, crc32 ]
|
276
284
|
@entries.insert(best_fit_index, entry)
|
277
285
|
@entries_by_id[id] = entry
|
278
286
|
|
@@ -285,17 +293,57 @@ module PEROBS
|
|
285
293
|
# a plan Array. @entries_by_id stores them hashed by their ID.
|
286
294
|
@entries = []
|
287
295
|
@entries_by_id = {}
|
296
|
+
entry_bytes = 29
|
297
|
+
entry_format = 'QQQCL'
|
298
|
+
restore_crc = false
|
288
299
|
if File.exists?(@index_file_name)
|
289
300
|
begin
|
290
301
|
File.open(@index_file_name, 'rb') do |f|
|
291
|
-
#
|
302
|
+
# Since version 2.3.0, all index files start with a header.
|
303
|
+
# Earlier versions did not yet have this header. The header is 24
|
304
|
+
# bytes long. The 2nd set of 8 bytes must be 0 to distinguish the
|
305
|
+
# header from regular entries. The first 8 bytes are a magic
|
306
|
+
# number and the 3rd 8 bytes mark the schema version. We are
|
307
|
+
# currently at version 1.
|
308
|
+
if f.size >= 24
|
309
|
+
header = f.read(24).unpack('QQQ')
|
310
|
+
if header[0] != PEROBS_MAGIC && header[1] != 0
|
311
|
+
# These are the settings for the pre 2.3.0 entry format.
|
312
|
+
entry_bytes = 25
|
313
|
+
entry_format = 'QQQC'
|
314
|
+
restore_crc = true
|
315
|
+
# Rewind to start as we have an older version index file that
|
316
|
+
# has no header.
|
317
|
+
f.seek(0)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# The index is a binary format. Each entry has exactly 29 bytes.
|
322
|
+
# Version 2.2.0 and earlier did not have the CRC field. To ensure
|
323
|
+
# backwards compatibility with older databases, we reconstruct the
|
324
|
+
# CRC for older index files and convert it to the new format on
|
325
|
+
# the next index write.
|
326
|
+
#
|
292
327
|
# Bytes
|
293
328
|
# 0 - 7 : 64 bits, little endian : ID
|
294
329
|
# 8 - 15 : 64 bits, little endian : Entry length in bytes
|
295
330
|
# 16 - 23 : 64 bits, little endian : Start address in data file
|
296
331
|
# 24 : 8 bits : 0 if unmarked, 1 if marked
|
297
|
-
|
298
|
-
|
332
|
+
# 25 - 29 : 32 bits, CRC32 checksum of the data blob
|
333
|
+
while (bytes = f.read(entry_bytes))
|
334
|
+
e = bytes.unpack(entry_format)
|
335
|
+
if restore_crc
|
336
|
+
# If the index file was written with version <= 2.2.0 we have
|
337
|
+
# to compute the CRC from the data blob.
|
338
|
+
begin
|
339
|
+
raw = File.read(@blobs_file_name, e[BYTES], e[START])
|
340
|
+
rescue => e
|
341
|
+
raise IOError,
|
342
|
+
"Cannot read blobs file #{@blobs_file_name}: #{e.message}"
|
343
|
+
end
|
344
|
+
e[CRC] = Zlib.crc32(raw)
|
345
|
+
end
|
346
|
+
@entries << e
|
299
347
|
@entries_by_id[e[ID]] = e
|
300
348
|
end
|
301
349
|
end
|
@@ -310,8 +358,9 @@ module PEROBS
|
|
310
358
|
begin
|
311
359
|
File.open(@index_file_name, 'wb') do |f|
|
312
360
|
# See read_index for data format documentation.
|
361
|
+
f.write([ PEROBS_MAGIC, 0, 1].pack('QQQ'))
|
313
362
|
@entries.each do |entry|
|
314
|
-
f.write(entry.pack('
|
363
|
+
f.write(entry.pack('QQQCL'))
|
315
364
|
end
|
316
365
|
end
|
317
366
|
rescue => e
|
@@ -329,7 +378,7 @@ module PEROBS
|
|
329
378
|
# already created the new BTree node, so these entries will be
|
330
379
|
# distributed into new leaf blobs of this new node.
|
331
380
|
@entries.each do |entry|
|
332
|
-
raw = read_from_blobs_file(entry
|
381
|
+
raw = read_from_blobs_file(entry)
|
333
382
|
@btreedb.put_raw_object(raw, entry[ID])
|
334
383
|
end
|
335
384
|
|
data/lib/perobs/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: perobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Schlaeger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|