perobs 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/perobs/BTreeBlob.rb +72 -23
- data/lib/perobs/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a8b4212f5d64ee97ea6d7592236792f51aac734
|
4
|
+
data.tar.gz: 6ed42c80925703e1f6224506ce0b04ef468699c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00439aa93cf50d9d06b7eb9986c5eaaac67c012c6ece983445de014d80c40b1234313b4a24e2092a5e5c9c48fe83f3ceeefdb60487d0de016dcf54741d374639
|
7
|
+
data.tar.gz: bb202f217c3a0a2529d40277c56ede5cbd3d020203b72580cef04dad09b853f1d6607ccd7cceea83d9f9d568d1d86b2783f51d346ebc98e53ba1c7c7d9c05b95
|
data/lib/perobs/BTreeBlob.rb
CHANGED
@@ -25,6 +25,7 @@
|
|
25
25
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
26
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
27
|
|
28
|
+
require 'zlib'
|
28
29
|
|
29
30
|
module PEROBS
|
30
31
|
|
@@ -32,6 +33,9 @@ module PEROBS
|
|
32
33
|
# HashedBlobsDB object.
|
33
34
|
class BTreeBlob
|
34
35
|
|
36
|
+
# Magic number used for index files.
|
37
|
+
PEROBS_MAGIC = 0xB78EEDB
|
38
|
+
|
35
39
|
# For performance reasons we use an Array for the entries instead of a
|
36
40
|
# Hash. These constants specify the Array index for the corresponding
|
37
41
|
# value.
|
@@ -42,6 +46,8 @@ module PEROBS
|
|
42
46
|
START = 2
|
43
47
|
# Mark/Unmarked flag
|
44
48
|
MARKED = 3
|
49
|
+
# CRC Checksum of the data blobA
|
50
|
+
CRC = 4
|
45
51
|
|
46
52
|
# Create a new BTreeBlob object.
|
47
53
|
# @param dir [String] Fully qualified directory name
|
@@ -68,7 +74,8 @@ module PEROBS
|
|
68
74
|
@btreedb.put_raw_object(raw, id)
|
69
75
|
else
|
70
76
|
bytes = raw.bytesize
|
71
|
-
|
77
|
+
crc32 = Zlib.crc32(raw, 0)
|
78
|
+
start_address = reserve_bytes(id, bytes, crc32)
|
72
79
|
if write_to_blobs_file(raw, start_address) != bytes
|
73
80
|
raise RuntimeError, 'Object length does not match written bytes'
|
74
81
|
end
|
@@ -80,22 +87,16 @@ module PEROBS
|
|
80
87
|
# @param id [Fixnum or Bignum] ID
|
81
88
|
# @return [String] sequence of bytes or nil if ID is unknown
|
82
89
|
def read_object(id)
|
83
|
-
return nil unless (
|
84
|
-
read_from_blobs_file(
|
90
|
+
return nil unless (index_entry = find(id))
|
91
|
+
read_from_blobs_file(index_entry)
|
85
92
|
end
|
86
93
|
|
87
|
-
|
88
94
|
# Find the data for the object with given id.
|
89
95
|
# @param id [Fixnum or Bignum] Object ID
|
90
|
-
# @return [Array] Returns an Array
|
91
|
-
#
|
92
|
-
# blob storage file.
|
96
|
+
# @return [Array] Returns an Array that represents the index entry for the
|
97
|
+
# given object.
|
93
98
|
def find(id)
|
94
|
-
|
95
|
-
return [ entry[BYTES], entry[START] ]
|
96
|
-
end
|
97
|
-
|
98
|
-
nil
|
99
|
+
@entries_by_id[id]
|
99
100
|
end
|
100
101
|
|
101
102
|
# Clear the mark on all entries in the index.
|
@@ -214,15 +215,22 @@ module PEROBS
|
|
214
215
|
end
|
215
216
|
|
216
217
|
# Read _bytes_ bytes from the file starting at offset _address_.
|
217
|
-
# @param
|
218
|
-
# @
|
219
|
-
def read_from_blobs_file(
|
218
|
+
# @param entry [Array] Index entry for the object
|
219
|
+
# @return [String] Raw bytes of the blob.
|
220
|
+
def read_from_blobs_file(entry)
|
220
221
|
begin
|
221
|
-
File.read(@blobs_file_name,
|
222
|
+
raw = File.read(@blobs_file_name, entry[BYTES], entry[START])
|
222
223
|
rescue => e
|
223
224
|
raise IOError,
|
224
225
|
"Cannot read blobs file #{@blobs_file_name}: #{e.message}"
|
225
226
|
end
|
227
|
+
if Zlib.crc32(raw, 0) != entry[CRC]
|
228
|
+
raise RuntimeError,
|
229
|
+
"BTreeBlob for object #{entry[ID]} has been corrupted: " +
|
230
|
+
"Checksum mismatch"
|
231
|
+
end
|
232
|
+
|
233
|
+
raw
|
226
234
|
end
|
227
235
|
|
228
236
|
# Reserve the bytes needed for the specified number of bytes with the
|
@@ -230,7 +238,7 @@ module PEROBS
|
|
230
238
|
# @param id [Fixnum or Bignum] ID of the entry
|
231
239
|
# @param bytes [Fixnum] number of bytes for this entry
|
232
240
|
# @return [Fixnum] the start address of the reserved blob
|
233
|
-
def reserve_bytes(id, bytes)
|
241
|
+
def reserve_bytes(id, bytes, crc32)
|
234
242
|
# index of first blob after the last seen entry
|
235
243
|
end_of_last_entry = 0
|
236
244
|
# blob index of best fit segment
|
@@ -272,7 +280,7 @@ module PEROBS
|
|
272
280
|
# Object reads can trigger creation of new objects. As the marking
|
273
281
|
# process triggers reads as well, all newly created objects are always
|
274
282
|
# marked to prevent them from being collected right after creation.
|
275
|
-
entry = [ id, bytes, best_fit_start || end_of_last_entry, 1 ]
|
283
|
+
entry = [ id, bytes, best_fit_start || end_of_last_entry, 1, crc32 ]
|
276
284
|
@entries.insert(best_fit_index, entry)
|
277
285
|
@entries_by_id[id] = entry
|
278
286
|
|
@@ -285,17 +293,57 @@ module PEROBS
|
|
285
293
|
# a plan Array. @entries_by_id stores them hashed by their ID.
|
286
294
|
@entries = []
|
287
295
|
@entries_by_id = {}
|
296
|
+
entry_bytes = 29
|
297
|
+
entry_format = 'QQQCL'
|
298
|
+
restore_crc = false
|
288
299
|
if File.exists?(@index_file_name)
|
289
300
|
begin
|
290
301
|
File.open(@index_file_name, 'rb') do |f|
|
291
|
-
#
|
302
|
+
# Since version 2.3.0, all index files start with a header.
|
303
|
+
# Earlier versions did not yet have this header. The header is 24
|
304
|
+
# bytes long. The 2nd set of 8 bytes must be 0 to distinguish the
|
305
|
+
# header from regular entries. The first 8 bytes are a magic
|
306
|
+
# number and the 3rd 8 bytes mark the schema version. We are
|
307
|
+
# currently at version 1.
|
308
|
+
if f.size >= 24
|
309
|
+
header = f.read(24).unpack('QQQ')
|
310
|
+
if header[0] != PEROBS_MAGIC && header[1] != 0
|
311
|
+
# These are the settings for the pre 2.3.0 entry format.
|
312
|
+
entry_bytes = 25
|
313
|
+
entry_format = 'QQQC'
|
314
|
+
restore_crc = true
|
315
|
+
# Rewind to start as we have an older version index file that
|
316
|
+
# has no header.
|
317
|
+
f.seek(0)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# The index is a binary format. Each entry has exactly 29 bytes.
|
322
|
+
# Version 2.2.0 and earlier did not have the CRC field. To ensure
|
323
|
+
# backwards compatibility with older databases, we reconstruct the
|
324
|
+
# CRC for older index files and convert it to the new format on
|
325
|
+
# the next index write.
|
326
|
+
#
|
292
327
|
# Bytes
|
293
328
|
# 0 - 7 : 64 bits, little endian : ID
|
294
329
|
# 8 - 15 : 64 bits, little endian : Entry length in bytes
|
295
330
|
# 16 - 23 : 64 bits, little endian : Start address in data file
|
296
331
|
# 24 : 8 bits : 0 if unmarked, 1 if marked
|
297
|
-
|
298
|
-
|
332
|
+
# 25 - 29 : 32 bits, CRC32 checksum of the data blob
|
333
|
+
while (bytes = f.read(entry_bytes))
|
334
|
+
e = bytes.unpack(entry_format)
|
335
|
+
if restore_crc
|
336
|
+
# If the index file was written with version <= 2.2.0 we have
|
337
|
+
# to compute the CRC from the data blob.
|
338
|
+
begin
|
339
|
+
raw = File.read(@blobs_file_name, e[BYTES], e[START])
|
340
|
+
rescue => e
|
341
|
+
raise IOError,
|
342
|
+
"Cannot read blobs file #{@blobs_file_name}: #{e.message}"
|
343
|
+
end
|
344
|
+
e[CRC] = Zlib.crc32(raw)
|
345
|
+
end
|
346
|
+
@entries << e
|
299
347
|
@entries_by_id[e[ID]] = e
|
300
348
|
end
|
301
349
|
end
|
@@ -310,8 +358,9 @@ module PEROBS
|
|
310
358
|
begin
|
311
359
|
File.open(@index_file_name, 'wb') do |f|
|
312
360
|
# See read_index for data format documentation.
|
361
|
+
f.write([ PEROBS_MAGIC, 0, 1].pack('QQQ'))
|
313
362
|
@entries.each do |entry|
|
314
|
-
f.write(entry.pack('
|
363
|
+
f.write(entry.pack('QQQCL'))
|
315
364
|
end
|
316
365
|
end
|
317
366
|
rescue => e
|
@@ -329,7 +378,7 @@ module PEROBS
|
|
329
378
|
# already created the new BTree node, so these entries will be
|
330
379
|
# distributed into new leaf blobs of this new node.
|
331
380
|
@entries.each do |entry|
|
332
|
-
raw = read_from_blobs_file(entry
|
381
|
+
raw = read_from_blobs_file(entry)
|
333
382
|
@btreedb.put_raw_object(raw, entry[ID])
|
334
383
|
end
|
335
384
|
|
data/lib/perobs/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: perobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Schlaeger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|