RubyGems - perobs - Versions diffs - 2.2.0 → 2.3.0 - Mend

perobs 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 862c67d14741c0fe0145af7c5eb1c1c147ebe189
-  data.tar.gz: c88f1c19c16db2c6b3f2fd046cbb4d41e9fce2c3
+  metadata.gz: 0a8b4212f5d64ee97ea6d7592236792f51aac734
+  data.tar.gz: 6ed42c80925703e1f6224506ce0b04ef468699c2
 SHA512:
-  metadata.gz: 11fd013bb0da3088a4ed88dfac390aac5b8cef823fd2bdeb06ab8c7ad4fedbde997365e0f135bae63b72acf5ee9ad3ac10f5640ac6b703785876514be3a480f1
-  data.tar.gz: d13b9dd12084975bc5d6e84e6fcc0f91b3f455e56d4532c7a37983c53dd1054db211e8244c8317da5f0cb85e32ce4ce01564e9a3684b442c01bfdf889735a393
+  metadata.gz: 00439aa93cf50d9d06b7eb9986c5eaaac67c012c6ece983445de014d80c40b1234313b4a24e2092a5e5c9c48fe83f3ceeefdb60487d0de016dcf54741d374639
+  data.tar.gz: bb202f217c3a0a2529d40277c56ede5cbd3d020203b72580cef04dad09b853f1d6607ccd7cceea83d9f9d568d1d86b2783f51d346ebc98e53ba1c7c7d9c05b95

data/lib/perobs/BTreeBlob.rb CHANGED Viewed

@@ -25,6 +25,7 @@
 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+require 'zlib'
 module PEROBS
@@ -32,6 +33,9 @@ module PEROBS
   # HashedBlobsDB object.
   class BTreeBlob
+    # Magic number used for index files.
+    PEROBS_MAGIC = 0xB78EEDB
     # For performance reasons we use an Array for the entries instead of a
     # Hash. These constants specify the Array index for the corresponding
     # value.
@@ -42,6 +46,8 @@ module PEROBS
     START = 2
     # Mark/Unmarked flag
     MARKED = 3
+    # CRC Checksum of the data blobA
+    CRC = 4
     # Create a new BTreeBlob object.
     # @param dir [String] Fully qualified directory name
@@ -68,7 +74,8 @@ module PEROBS
         @btreedb.put_raw_object(raw, id)
       else
         bytes = raw.bytesize
-        start_address = reserve_bytes(id, bytes)
+        crc32 = Zlib.crc32(raw, 0)
+        start_address = reserve_bytes(id, bytes, crc32)
         if write_to_blobs_file(raw, start_address) != bytes
           raise RuntimeError, 'Object length does not match written bytes'
         end
@@ -80,22 +87,16 @@ module PEROBS
     # @param id [Fixnum or Bignum] ID
     # @return [String] sequence of bytes or nil if ID is unknown
     def read_object(id)
-      return nil unless (bytes_and_start = find(id))
-      read_from_blobs_file(*bytes_and_start)
+      return nil unless (index_entry = find(id))
+      read_from_blobs_file(index_entry)
     end
     # Find the data for the object with given id.
     # @param id [Fixnum or Bignum] Object ID
-    # @return [Array] Returns an Array with two Fixnum entries. The first is
-    #         the number of bytes and the second is the starting offset in the
-    #         blob storage file.
+    # @return [Array] Returns an Array that represents the index entry for the
+    #         given object.
     def find(id)
-      if (entry = @entries_by_id[id])
-        return [ entry[BYTES], entry[START] ]
-      end
-      nil
+      @entries_by_id[id]
     end
     # Clear the mark on all entries in the index.
@@ -214,15 +215,22 @@ module PEROBS
     end
     # Read _bytes_ bytes from the file starting at offset _address_.
-    # @param bytes [Fixnum] number of bytes to read
-    # @param address [Fixnum] offset in the file
-    def read_from_blobs_file(bytes, address)
+    # @param entry [Array] Index entry for the object
+    # @return [String] Raw bytes of the blob.
+    def read_from_blobs_file(entry)
       begin
-        File.read(@blobs_file_name, bytes, address)
+        raw = File.read(@blobs_file_name, entry[BYTES], entry[START])
       rescue => e
         raise IOError,
               "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
       end
+      if Zlib.crc32(raw, 0) != entry[CRC]
+        raise RuntimeError,
+              "BTreeBlob for object #{entry[ID]} has been corrupted: " +
+              "Checksum mismatch"
+      end
+      raw
     end
     # Reserve the bytes needed for the specified number of bytes with the
@@ -230,7 +238,7 @@ module PEROBS
     # @param id [Fixnum or Bignum] ID of the entry
     # @param bytes [Fixnum] number of bytes for this entry
     # @return [Fixnum] the start address of the reserved blob
-    def reserve_bytes(id, bytes)
+    def reserve_bytes(id, bytes, crc32)
       # index of first blob after the last seen entry
       end_of_last_entry = 0
       # blob index of best fit segment
@@ -272,7 +280,7 @@ module PEROBS
       # Object reads can trigger creation of new objects. As the marking
       # process triggers reads as well, all newly created objects are always
       # marked to prevent them from being collected right after creation.
-      entry = [ id, bytes, best_fit_start || end_of_last_entry, 1 ]
+      entry = [ id, bytes, best_fit_start || end_of_last_entry, 1, crc32 ]
       @entries.insert(best_fit_index, entry)
       @entries_by_id[id] = entry
@@ -285,17 +293,57 @@ module PEROBS
       # a plan Array. @entries_by_id stores them hashed by their ID.
       @entries = []
       @entries_by_id = {}
+      entry_bytes = 29
+      entry_format = 'QQQCL'
+      restore_crc = false
       if File.exists?(@index_file_name)
         begin
           File.open(@index_file_name, 'rb') do |f|
-            # The index is a binary format. Each entry has exactly 25 bytes.
+            # Since version 2.3.0, all index files start with a header.
+            # Earlier versions did not yet have this header. The header is 24
+            # bytes long. The 2nd set of 8 bytes must be 0 to distinguish the
+            # header from regular entries. The first 8 bytes are a magic
+            # number and the 3rd 8 bytes mark the schema version. We are
+            # currently at version 1.
+            if f.size >= 24
+              header = f.read(24).unpack('QQQ')
+              if header[0] != PEROBS_MAGIC && header[1] != 0
+                # These are the settings for the pre 2.3.0 entry format.
+                entry_bytes = 25
+                entry_format = 'QQQC'
+                restore_crc = true
+                # Rewind to start as we have an older version index file that
+                # has no header.
+                f.seek(0)
+              end
+            end
+            # The index is a binary format. Each entry has exactly 29 bytes.
+            # Version 2.2.0 and earlier did not have the CRC field. To ensure
+            # backwards compatibility with older databases, we reconstruct the
+            # CRC for older index files and convert it to the new format on
+            # the next index write.
+            #
             # Bytes
             #  0 -  7 : 64 bits, little endian : ID
             #  8 - 15 : 64 bits, little endian : Entry length in bytes
             # 16 - 23 : 64 bits, little endian : Start address in data file
             # 24      : 8 bits : 0 if unmarked, 1 if marked
-            while (bytes = f.read(25))
-              @entries << (e = bytes.unpack('QQQC'))
+            # 25 - 29 : 32 bits, CRC32 checksum of the data blob
+            while (bytes = f.read(entry_bytes))
+              e = bytes.unpack(entry_format)
+              if restore_crc
+                # If the index file was written with version <= 2.2.0 we have
+                # to compute the CRC from the data blob.
+                begin
+                  raw = File.read(@blobs_file_name, e[BYTES], e[START])
+                rescue => e
+                  raise IOError,
+                    "Cannot read blobs file #{@blobs_file_name}: #{e.message}"
+                end
+                e[CRC] = Zlib.crc32(raw)
+              end
+              @entries << e
               @entries_by_id[e[ID]] = e
             end
           end
@@ -310,8 +358,9 @@ module PEROBS
       begin
         File.open(@index_file_name, 'wb') do |f|
           # See read_index for data format documentation.
+          f.write([ PEROBS_MAGIC, 0, 1].pack('QQQ'))
           @entries.each do |entry|
-            f.write(entry.pack('QQQC'))
+            f.write(entry.pack('QQQCL'))
           end
         end
       rescue => e
@@ -329,7 +378,7 @@ module PEROBS
       # already created the new BTree node, so these entries will be
       # distributed into new leaf blobs of this new node.
       @entries.each do |entry|
-        raw = read_from_blobs_file(entry[BYTES], entry[START])
+        raw = read_from_blobs_file(entry)
         @btreedb.put_raw_object(raw, entry[ID])
       end

data/lib/perobs/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module PEROBS
   # The version number
-  VERSION = "2.2.0"
+  VERSION = "2.3.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: perobs
 version: !ruby/object:Gem::Version
-  version: 2.2.0
+  version: 2.3.0
 platform: ruby
 authors:
 - Chris Schlaeger
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-01-24 00:00:00.000000000 Z
+date: 2016-01-31 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler