perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
    
        data/lib/perobs/FlatFile.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            #
         | 
| 3 3 | 
             
            # = FlatFile.rb -- Persistent Ruby Object Store
         | 
| 4 4 | 
             
            #
         | 
| 5 | 
            -
            # Copyright (c) 2016 by Chris Schlaeger <chris@taskjuggler.org>
         | 
| 5 | 
            +
            # Copyright (c) 2016, 2018, 2019 by Chris Schlaeger <chris@taskjuggler.org>
         | 
| 6 6 | 
             
            #
         | 
| 7 7 | 
             
            # MIT License
         | 
| 8 8 | 
             
            #
         | 
| @@ -31,6 +31,8 @@ require 'perobs/Log' | |
| 31 31 | 
             
            require 'perobs/FlatFileBlobHeader'
         | 
| 32 32 | 
             
            require 'perobs/BTree'
         | 
| 33 33 | 
             
            require 'perobs/SpaceTree'
         | 
| 34 | 
            +
            require 'perobs/SpaceManager'
         | 
| 35 | 
            +
            require 'perobs/IDList'
         | 
| 34 36 |  | 
| 35 37 | 
             
            module PEROBS
         | 
| 36 38 |  | 
| @@ -44,12 +46,20 @@ module PEROBS | |
| 44 46 |  | 
| 45 47 | 
             
                # Create a new FlatFile object for a database in the given path.
         | 
| 46 48 | 
             
                # @param dir [String] Directory path for the data base file
         | 
| 47 | 
            -
                def initialize(dir)
         | 
| 49 | 
            +
                def initialize(dir, progressmeter)
         | 
| 48 50 | 
             
                  @db_dir = dir
         | 
| 51 | 
            +
                  @progressmeter = progressmeter
         | 
| 49 52 | 
             
                  @f = nil
         | 
| 50 | 
            -
                  @ | 
| 51 | 
            -
                  @ | 
| 52 | 
            -
                   | 
| 53 | 
            +
                  @marks = nil
         | 
| 54 | 
            +
                  @index = BTree.new(@db_dir, 'index', INDEX_BTREE_ORDER, @progressmeter)
         | 
| 55 | 
            +
                  old_spaces_file = File.join(@db_dir, 'database_spaces.blobs')
         | 
| 56 | 
            +
                  if File.exist?(old_spaces_file)
         | 
| 57 | 
            +
                    # PEROBS version 4.1.0 and earlier used this space list format. It is
         | 
| 58 | 
            +
                    # deprecated now. Newly created DBs use the SpaceManager format.
         | 
| 59 | 
            +
                    @space_list = SpaceTree.new(@db_dir, @progressmeter)
         | 
| 60 | 
            +
                  else
         | 
| 61 | 
            +
                    @space_list = SpaceManager.new(@db_dir, @progressmeter)
         | 
| 62 | 
            +
                  end
         | 
| 53 63 | 
             
                end
         | 
| 54 64 |  | 
| 55 65 | 
             
                # Open the flat file for reading and writing.
         | 
| @@ -74,33 +84,19 @@ module PEROBS | |
| 74 84 | 
             
                  end
         | 
| 75 85 | 
             
                  @f.sync = true
         | 
| 76 86 |  | 
| 77 | 
            -
                   | 
| 78 | 
            -
                    @index.open(!new_db_created)
         | 
| 79 | 
            -
                    @space_list.open
         | 
| 80 | 
            -
                  rescue FatalError
         | 
| 81 | 
            -
                    # Ensure that the index is really closed.
         | 
| 82 | 
            -
                    @index.close
         | 
| 83 | 
            -
                    # Erase it completely
         | 
| 84 | 
            -
                    @index.erase
         | 
| 85 | 
            -
                    # Then create it again.
         | 
| 86 | 
            -
                    @index.open
         | 
| 87 | 
            -
             | 
| 88 | 
            -
                    # Ensure that the spaces list is really closed.
         | 
| 89 | 
            -
                    @space_list.close
         | 
| 90 | 
            -
                    # Erase it completely
         | 
| 91 | 
            -
                    @space_list.erase
         | 
| 92 | 
            -
                    # Then create it again
         | 
| 93 | 
            -
                    @space_list.open
         | 
| 94 | 
            -
             | 
| 95 | 
            -
                    regenerate_index_and_spaces
         | 
| 96 | 
            -
                  end
         | 
| 87 | 
            +
                  open_index_files(!new_db_created)
         | 
| 97 88 | 
             
                end
         | 
| 98 89 |  | 
| 99 90 | 
             
                # Close the flat file. This method must be called to ensure that all data
         | 
| 100 91 | 
             
                # is really written into the filesystem.
         | 
| 101 92 | 
             
                def close
         | 
| 102 | 
            -
                  @space_list.close
         | 
| 103 | 
            -
                  @index.close
         | 
| 93 | 
            +
                  @space_list.close if @space_list.is_open?
         | 
| 94 | 
            +
                  @index.close if @index.is_open?
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                  if @marks
         | 
| 97 | 
            +
                    @marks.erase
         | 
| 98 | 
            +
                    @marks = nil
         | 
| 99 | 
            +
                  end
         | 
| 104 100 |  | 
| 105 101 | 
             
                  if @f
         | 
| 106 102 | 
             
                    @f.flush
         | 
| @@ -139,29 +135,37 @@ module PEROBS | |
| 139 135 | 
             
                # @param addr [Integer] Address of the blob to delete
         | 
| 140 136 | 
             
                # @param id [Integer] ID of the blob to delete
         | 
| 141 137 | 
             
                def delete_obj_by_address(addr, id)
         | 
| 142 | 
            -
                  @index.remove(id)
         | 
| 143 | 
            -
                  header = FlatFileBlobHeader. | 
| 138 | 
            +
                  @index.remove(id) if @index.is_open?
         | 
| 139 | 
            +
                  header = FlatFileBlobHeader.read(@f, addr, id)
         | 
| 144 140 | 
             
                  header.clear_flags
         | 
| 145 | 
            -
                  @space_list.add_space(addr, header.length)
         | 
| 141 | 
            +
                  @space_list.add_space(addr, header.length) if @space_list.is_open?
         | 
| 146 142 | 
             
                end
         | 
| 147 143 |  | 
| 148 144 | 
             
                # Delete all unmarked objects.
         | 
| 149 | 
            -
                def delete_unmarked_objects
         | 
| 150 | 
            -
                   | 
| 151 | 
            -
                   | 
| 145 | 
            +
                def delete_unmarked_objects(&block)
         | 
| 146 | 
            +
                  # We don't update the index and the space list during this operation as
         | 
| 147 | 
            +
                  # we defragmentize the blob file at the end. We'll end the operation
         | 
| 148 | 
            +
                  # with an empty space list.
         | 
| 149 | 
            +
                  clear_index_files
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                  deleted_objects_count = 0
         | 
| 152 | 
            +
                  @progressmeter.start('Sweeping unmarked objects', @f.size) do |pm|
         | 
| 153 | 
            +
                    each_blob_header do |header|
         | 
| 154 | 
            +
                      if header.is_valid? && !@marks.include?(header.id)
         | 
| 155 | 
            +
                        delete_obj_by_address(header.addr, header.id)
         | 
| 156 | 
            +
                        yield(header.id) if block_given?
         | 
| 157 | 
            +
                        deleted_objects_count += 1
         | 
| 158 | 
            +
                      end
         | 
| 152 159 |  | 
| 153 | 
            -
             | 
| 154 | 
            -
                  each_blob_header do |pos, header|
         | 
| 155 | 
            -
                    if header.is_valid? && @marks.get(header.id).nil?
         | 
| 156 | 
            -
                      delete_obj_by_address(pos, header.id)
         | 
| 157 | 
            -
                      deleted_ids << header.id
         | 
| 160 | 
            +
                      pm.update(header.addr)
         | 
| 158 161 | 
             
                    end
         | 
| 159 162 | 
             
                  end
         | 
| 160 163 | 
             
                  defragmentize
         | 
| 161 164 |  | 
| 162 | 
            -
                   | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            +
                  # Update the index file and create a new, empty space list.
         | 
| 166 | 
            +
                  regenerate_index_and_spaces
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                  deleted_objects_count
         | 
| 165 169 | 
             
                end
         | 
| 166 170 |  | 
| 167 171 | 
             
                # Write the given object into the file. This method never uses in-place
         | 
| @@ -177,7 +181,7 @@ module PEROBS | |
| 177 181 | 
             
                  # operation is aborted or interrupted we ensure that we either have the
         | 
| 178 182 | 
             
                  # old or the new version available.
         | 
| 179 183 | 
             
                  if (old_addr = find_obj_addr_by_id(id))
         | 
| 180 | 
            -
                    old_header = FlatFileBlobHeader. | 
| 184 | 
            +
                    old_header = FlatFileBlobHeader.read(@f, old_addr)
         | 
| 181 185 | 
             
                    old_header.set_outdated_flag
         | 
| 182 186 | 
             
                  end
         | 
| 183 187 |  | 
| @@ -188,22 +192,24 @@ module PEROBS | |
| 188 192 | 
             
                  # performance impact of compression is not compensated by writing
         | 
| 189 193 | 
             
                  # less data to the storage.
         | 
| 190 194 | 
             
                  compressed = false
         | 
| 191 | 
            -
                   | 
| 195 | 
            +
                  raw_obj_bytesize = raw_obj.bytesize
         | 
| 196 | 
            +
                  if raw_obj_bytesize > 256
         | 
| 192 197 | 
             
                    raw_obj = Zlib.deflate(raw_obj)
         | 
| 198 | 
            +
                    raw_obj_bytesize = raw_obj.bytesize
         | 
| 193 199 | 
             
                    compressed = true
         | 
| 194 200 | 
             
                  end
         | 
| 195 201 |  | 
| 196 | 
            -
                  addr, length = find_free_blob( | 
| 202 | 
            +
                  addr, length = find_free_blob(raw_obj_bytesize)
         | 
| 197 203 | 
             
                  begin
         | 
| 198 204 | 
             
                    if length != -1
         | 
| 199 205 | 
             
                      # Just a safeguard so we don't overwrite current data.
         | 
| 200 | 
            -
                      header = FlatFileBlobHeader. | 
| 206 | 
            +
                      header = FlatFileBlobHeader.read(@f, addr)
         | 
| 201 207 | 
             
                      if header.length != length
         | 
| 202 208 | 
             
                        PEROBS.log.fatal "Length in free list (#{length}) and header " +
         | 
| 203 209 | 
             
                          "(#{header.length}) for address #{addr} don't match."
         | 
| 204 210 | 
             
                      end
         | 
| 205 | 
            -
                      if  | 
| 206 | 
            -
                        PEROBS.log.fatal "Object (#{ | 
| 211 | 
            +
                      if raw_obj_bytesize > header.length
         | 
| 212 | 
            +
                        PEROBS.log.fatal "Object (#{raw_obj_bytesize}) is longer than " +
         | 
| 207 213 | 
             
                          "blob space (#{header.length})."
         | 
| 208 214 | 
             
                      end
         | 
| 209 215 | 
             
                      if header.is_valid?
         | 
| @@ -213,36 +219,40 @@ module PEROBS | |
| 213 219 | 
             
                    end
         | 
| 214 220 | 
             
                    flags = 1 << FlatFileBlobHeader::VALID_FLAG_BIT
         | 
| 215 221 | 
             
                    flags |= (1 << FlatFileBlobHeader::COMPRESSED_FLAG_BIT) if compressed
         | 
| 216 | 
            -
                    FlatFileBlobHeader.new(@f, addr, flags,  | 
| 222 | 
            +
                    FlatFileBlobHeader.new(@f, addr, flags, raw_obj_bytesize, id, crc).write
         | 
| 217 223 | 
             
                    @f.write(raw_obj)
         | 
| 218 | 
            -
                     | 
| 224 | 
            +
                    @f.flush
         | 
| 225 | 
            +
                    if length != -1 && raw_obj_bytesize < length
         | 
| 219 226 | 
             
                      # The new object was not appended and it did not completely fill the
         | 
| 220 227 | 
             
                      # free space. So we have to write a new header to mark the remaining
         | 
| 221 228 | 
             
                      # empty space.
         | 
| 222 | 
            -
                      unless length -  | 
| 229 | 
            +
                      unless length - raw_obj_bytesize >= FlatFileBlobHeader::LENGTH
         | 
| 223 230 | 
             
                        PEROBS.log.fatal "Not enough space to append the empty space " +
         | 
| 224 | 
            -
                          "header (space: #{length} bytes, object: #{ | 
| 231 | 
            +
                          "header (space: #{length} bytes, object: #{raw_obj_bytesize} " +
         | 
| 225 232 | 
             
                          "bytes)."
         | 
| 226 233 | 
             
                      end
         | 
| 227 234 | 
             
                      space_address = @f.pos
         | 
| 228 | 
            -
                      space_length = length - FlatFileBlobHeader::LENGTH -  | 
| 235 | 
            +
                      space_length = length - FlatFileBlobHeader::LENGTH - raw_obj_bytesize
         | 
| 229 236 | 
             
                      FlatFileBlobHeader.new(@f, space_address, 0, space_length,
         | 
| 230 237 | 
             
                                             0, 0).write
         | 
| 231 238 | 
             
                      # Register the new space with the space list.
         | 
| 232 | 
            -
                      @space_list. | 
| 239 | 
            +
                      if @space_list.is_open? && space_length > 0
         | 
| 240 | 
            +
                        @space_list.add_space(space_address, space_length)
         | 
| 241 | 
            +
                      end
         | 
| 233 242 | 
             
                    end
         | 
| 234 243 |  | 
| 235 244 | 
             
                    # Once the blob has been written we can update the index as well.
         | 
| 236 | 
            -
                    @index.insert(id, addr)
         | 
| 245 | 
            +
                    @index.insert(id, addr) if @index.is_open?
         | 
| 237 246 |  | 
| 238 247 | 
             
                    if old_addr
         | 
| 239 248 | 
             
                      # If we had an existing object stored for the ID we have to mark
         | 
| 240 249 | 
             
                      # this entry as deleted now.
         | 
| 241 250 | 
             
                      old_header.clear_flags
         | 
| 242 | 
            -
                      # And register the newly freed space with the space list.
         | 
| 243 | 
            -
                      @space_list.add_space(old_addr, old_header.length)
         | 
| 244 | 
            -
                    else
         | 
| 245 251 | 
             
                      @f.flush
         | 
| 252 | 
            +
                      # And register the newly freed space with the space list.
         | 
| 253 | 
            +
                      if @space_list.is_open?
         | 
| 254 | 
            +
                        @space_list.add_space(old_addr, old_header.length)
         | 
| 255 | 
            +
                      end
         | 
| 246 256 | 
             
                    end
         | 
| 247 257 | 
             
                  rescue IOError => e
         | 
| 248 258 | 
             
                    PEROBS.log.fatal "Cannot write blob for ID #{id} to FlatFileDB: " +
         | 
| @@ -270,24 +280,20 @@ module PEROBS | |
| 270 280 | 
             
                  nil
         | 
| 271 281 | 
             
                end
         | 
| 272 282 |  | 
| 273 | 
            -
                 | 
| 274 | 
            -
             | 
| 275 | 
            -
             | 
| 276 | 
            -
                  end
         | 
| 277 | 
            -
             | 
| 278 | 
            -
                  nil
         | 
| 283 | 
            +
                # @return [Integer] Number of items stored in the DB.
         | 
| 284 | 
            +
                def item_counter
         | 
| 285 | 
            +
                  @index.entries_count
         | 
| 279 286 | 
             
                end
         | 
| 280 287 |  | 
| 281 | 
            -
             | 
| 282 288 | 
             
                # Read the object at the specified address.
         | 
| 283 289 | 
             
                # @param addr [Integer] Offset in the flat file
         | 
| 284 290 | 
             
                # @param id [Integer] ID of the data blob
         | 
| 285 291 | 
             
                # @return [String] Raw object data
         | 
| 286 292 | 
             
                def read_obj_by_address(addr, id)
         | 
| 287 | 
            -
                  header = FlatFileBlobHeader. | 
| 293 | 
            +
                  header = FlatFileBlobHeader.read(@f, addr, id)
         | 
| 288 294 | 
             
                  if header.id != id
         | 
| 289 295 | 
             
                    PEROBS.log.fatal "Database index corrupted: Index for object " +
         | 
| 290 | 
            -
                      "#{id} points to object with ID #{header.id}"
         | 
| 296 | 
            +
                      "#{id} points to object with ID #{header.id} at address #{addr}"
         | 
| 291 297 | 
             
                  end
         | 
| 292 298 |  | 
| 293 299 | 
             
                  buf = nil
         | 
| @@ -296,7 +302,8 @@ module PEROBS | |
| 296 302 | 
             
                    @f.seek(addr + FlatFileBlobHeader::LENGTH)
         | 
| 297 303 | 
             
                    buf = @f.read(header.length)
         | 
| 298 304 | 
             
                  rescue IOError => e
         | 
| 299 | 
            -
                    PEROBS.log.fatal "Cannot read blob for ID #{id} | 
| 305 | 
            +
                    PEROBS.log.fatal "Cannot read blob for ID #{id} at address #{addr}: " +
         | 
| 306 | 
            +
                      e.message
         | 
| 300 307 | 
             
                  end
         | 
| 301 308 |  | 
| 302 309 | 
             
                  # Uncompress the data if the compression bit is set in the flags byte.
         | 
| @@ -305,12 +312,13 @@ module PEROBS | |
| 305 312 | 
             
                      buf = Zlib.inflate(buf)
         | 
| 306 313 | 
             
                    rescue Zlib::BufError, Zlib::DataError
         | 
| 307 314 | 
             
                      PEROBS.log.fatal "Corrupted compressed block with ID " +
         | 
| 308 | 
            -
                        "#{ | 
| 315 | 
            +
                        "#{id} found at address #{addr}."
         | 
| 309 316 | 
             
                    end
         | 
| 310 317 | 
             
                  end
         | 
| 311 318 |  | 
| 312 319 | 
             
                  if checksum(buf) != header.crc
         | 
| 313 | 
            -
                    PEROBS.log.fatal "Checksum failure while reading blob ID #{id}"
         | 
| 320 | 
            +
                    PEROBS.log.fatal "Checksum failure while reading blob ID #{id} " +
         | 
| 321 | 
            +
                      "at address #{addr}"
         | 
| 314 322 | 
             
                  end
         | 
| 315 323 |  | 
| 316 324 | 
             
                  buf
         | 
| @@ -319,19 +327,22 @@ module PEROBS | |
| 319 327 | 
             
                # Mark the object with the given ID.
         | 
| 320 328 | 
             
                # @param id [Integer] ID of the object
         | 
| 321 329 | 
             
                def mark_obj_by_id(id)
         | 
| 322 | 
            -
                  @marks.insert(id | 
| 330 | 
            +
                  @marks.insert(id)
         | 
| 323 331 | 
             
                end
         | 
| 324 332 |  | 
| 325 333 | 
             
                # Return true if the object with the given ID is marked, false otherwise.
         | 
| 326 334 | 
             
                # @param id [Integer] ID of the object
         | 
| 327 335 | 
             
                def is_marked_by_id?(id)
         | 
| 328 | 
            -
                   | 
| 336 | 
            +
                  @marks.include?(id)
         | 
| 329 337 | 
             
                end
         | 
| 330 338 |  | 
| 331 339 | 
             
                # Clear alls marks.
         | 
| 332 340 | 
             
                def clear_all_marks
         | 
| 333 | 
            -
                  @marks | 
| 334 | 
            -
             | 
| 341 | 
            +
                  if @marks
         | 
| 342 | 
            +
                    @marks.clear
         | 
| 343 | 
            +
                  else
         | 
| 344 | 
            +
                    @marks = IDList.new(@db_dir, 'marks', item_counter)
         | 
| 345 | 
            +
                  end
         | 
| 335 346 | 
             
                end
         | 
| 336 347 |  | 
| 337 348 | 
             
                # Eliminate all the holes in the file. This is an in-place
         | 
| @@ -340,59 +351,72 @@ module PEROBS | |
| 340 351 | 
             
                  distance = 0
         | 
| 341 352 | 
             
                  new_file_size = 0
         | 
| 342 353 | 
             
                  deleted_blobs = 0
         | 
| 354 | 
            +
                  corrupted_blobs = 0
         | 
| 343 355 | 
             
                  valid_blobs = 0
         | 
| 344 | 
            -
             | 
| 345 | 
            -
                  PEROBS.log.info "Defragmenting FlatFile"
         | 
| 356 | 
            +
             | 
| 346 357 | 
             
                  # Iterate over all entries.
         | 
| 347 | 
            -
                   | 
| 348 | 
            -
                     | 
| 349 | 
            -
             | 
| 350 | 
            -
             | 
| 351 | 
            -
                       | 
| 352 | 
            -
             | 
| 353 | 
            -
             | 
| 354 | 
            -
             | 
| 355 | 
            -
             | 
| 356 | 
            -
             | 
| 357 | 
            -
             | 
| 358 | 
            -
             | 
| 359 | 
            -
             | 
| 360 | 
            -
             | 
| 361 | 
            -
             | 
| 362 | 
            -
                           | 
| 363 | 
            -
             | 
| 364 | 
            -
             | 
| 365 | 
            -
             | 
| 366 | 
            -
             | 
| 367 | 
            -
             | 
| 368 | 
            -
             | 
| 369 | 
            -
             | 
| 370 | 
            -
             | 
| 371 | 
            -
                             | 
| 358 | 
            +
                  @progressmeter.start('Defragmenting blobs file', @f.size) do |pm|
         | 
| 359 | 
            +
                    each_blob_header do |header|
         | 
| 360 | 
            +
                      # If we have stumbled over a corrupted blob we treat it similar to a
         | 
| 361 | 
            +
                      # deleted blob and reuse the space.
         | 
| 362 | 
            +
                      if header.corruption_start
         | 
| 363 | 
            +
                        distance += header.addr - header.corruption_start
         | 
| 364 | 
            +
                        corrupted_blobs += 1
         | 
| 365 | 
            +
                      end
         | 
| 366 | 
            +
             | 
| 367 | 
            +
                      # Total size of the current entry
         | 
| 368 | 
            +
                      entry_bytes = FlatFileBlobHeader::LENGTH + header.length
         | 
| 369 | 
            +
                      if header.is_valid?
         | 
| 370 | 
            +
                        # We have found a valid entry.
         | 
| 371 | 
            +
                        valid_blobs += 1
         | 
| 372 | 
            +
                        if distance > 0
         | 
| 373 | 
            +
                          begin
         | 
| 374 | 
            +
                            # Read current entry into a buffer
         | 
| 375 | 
            +
                            @f.seek(header.addr)
         | 
| 376 | 
            +
                            buf = @f.read(entry_bytes)
         | 
| 377 | 
            +
                            # Write the buffer right after the end of the previous entry.
         | 
| 378 | 
            +
                            @f.seek(header.addr - distance)
         | 
| 379 | 
            +
                            @f.write(buf)
         | 
| 380 | 
            +
                            # Mark the space between the relocated current entry and the
         | 
| 381 | 
            +
                            # next valid entry as deleted space.
         | 
| 382 | 
            +
                            FlatFileBlobHeader.new(@f, @f.pos, 0,
         | 
| 383 | 
            +
                                                   distance - FlatFileBlobHeader::LENGTH,
         | 
| 384 | 
            +
                                                   0, 0).write
         | 
| 385 | 
            +
                            @f.flush
         | 
| 386 | 
            +
                          rescue IOError => e
         | 
| 387 | 
            +
                            PEROBS.log.fatal "Error while moving blob for ID " +
         | 
| 388 | 
            +
                              "#{header.id}: #{e.message}"
         | 
| 389 | 
            +
                          end
         | 
| 372 390 | 
             
                        end
         | 
| 391 | 
            +
                        new_file_size = header.addr - distance +
         | 
| 392 | 
            +
                          FlatFileBlobHeader::LENGTH + header.length
         | 
| 393 | 
            +
                      else
         | 
| 394 | 
            +
                        deleted_blobs += 1
         | 
| 395 | 
            +
                        distance += entry_bytes
         | 
| 373 396 | 
             
                      end
         | 
| 374 | 
            -
             | 
| 375 | 
            -
             | 
| 376 | 
            -
                      deleted_blobs += 1
         | 
| 377 | 
            -
                      distance += entry_bytes
         | 
| 397 | 
            +
             | 
| 398 | 
            +
                      pm.update(header.addr)
         | 
| 378 399 | 
             
                    end
         | 
| 379 400 | 
             
                  end
         | 
| 380 | 
            -
             | 
| 401 | 
            +
             | 
| 381 402 | 
             
                  PEROBS.log.info "#{distance / 1000} KiB/#{deleted_blobs} blobs of " +
         | 
| 382 403 | 
             
                    "#{@f.size / 1000} KiB/#{valid_blobs} blobs or " +
         | 
| 383 404 | 
             
                    "#{'%.1f' % (distance.to_f / @f.size * 100.0)}% reclaimed"
         | 
| 405 | 
            +
                  if corrupted_blobs > 0
         | 
| 406 | 
            +
                    PEROBS.log.info "#{corrupted_blobs} corrupted blob(s) found. Space " +
         | 
| 407 | 
            +
                      "was recycled."
         | 
| 408 | 
            +
                  end
         | 
| 384 409 |  | 
| 385 410 | 
             
                  @f.flush
         | 
| 386 411 | 
             
                  @f.truncate(new_file_size)
         | 
| 387 412 | 
             
                  @f.flush
         | 
| 388 | 
            -
                  @space_list.clear
         | 
| 389 413 |  | 
| 390 414 | 
             
                  sync
         | 
| 391 415 | 
             
                end
         | 
| 392 416 |  | 
| 393 417 | 
             
                # This method iterates over all entries in the FlatFile and removes the
         | 
| 394 418 | 
             
                # entry and inserts it again. This is useful to update all entries in
         | 
| 395 | 
            -
                #  | 
| 419 | 
            +
                # case the storage format has changed.
         | 
| 396 420 | 
             
                def refresh
         | 
| 397 421 | 
             
                  # This iteration might look scary as we iterate over the entries while
         | 
| 398 422 | 
             
                  # while we are rearranging them. Re-inserted items may be inserted
         | 
| @@ -400,132 +424,276 @@ module PEROBS | |
| 400 424 | 
             
                  # inserted after the current entry and will be re-read again unless they
         | 
| 401 425 | 
             
                  # are inserted after the original file end.
         | 
| 402 426 | 
             
                  file_size = @f.size
         | 
| 403 | 
            -
                  PEROBS.log.info "Refreshing the DB..."
         | 
| 404 | 
            -
                  t = Time.now
         | 
| 405 | 
            -
                  each_blob_header do |pos, header|
         | 
| 406 | 
            -
                    if header.is_valid?
         | 
| 407 | 
            -
                      buf = read_obj_by_address(pos, header.id)
         | 
| 408 | 
            -
                      delete_obj_by_address(pos, header.id)
         | 
| 409 | 
            -
                      write_obj_by_id(header.id, buf)
         | 
| 410 | 
            -
                    end
         | 
| 411 427 |  | 
| 412 | 
            -
             | 
| 413 | 
            -
             | 
| 414 | 
            -
             | 
| 428 | 
            +
                  # We don't update the index and the space list during this operation as
         | 
| 429 | 
            +
                  # we defragmentize the blob file at the end. We'll end the operation
         | 
| 430 | 
            +
                  # with an empty space list.
         | 
| 431 | 
            +
                  clear_index_files
         | 
| 432 | 
            +
             | 
| 433 | 
            +
                  @progressmeter.start('Converting objects to new storage format',
         | 
| 434 | 
            +
                                       @f.size) do |pm|
         | 
| 435 | 
            +
                    each_blob_header do |header|
         | 
| 436 | 
            +
                      if header.is_valid?
         | 
| 437 | 
            +
                        buf = read_obj_by_address(header.addr, header.id)
         | 
| 438 | 
            +
                        delete_obj_by_address(header.addr, header.id)
         | 
| 439 | 
            +
                        write_obj_by_id(header.id, buf)
         | 
| 440 | 
            +
                      end
         | 
| 441 | 
            +
             | 
| 442 | 
            +
                      # Some re-inserted blobs may be inserted after the original file end.
         | 
| 443 | 
            +
                      # No need to process those blobs again.
         | 
| 444 | 
            +
                      break if header.addr >= file_size
         | 
| 445 | 
            +
             | 
| 446 | 
            +
                      pm.update(header.addr)
         | 
| 447 | 
            +
                    end
         | 
| 415 448 | 
             
                  end
         | 
| 416 | 
            -
                  PEROBS.log.info "DB refresh completed in #{Time.now - t} seconds"
         | 
| 417 449 |  | 
| 418 450 | 
             
                  # Reclaim the space saved by compressing entries.
         | 
| 419 451 | 
             
                  defragmentize
         | 
| 452 | 
            +
             | 
| 453 | 
            +
                  # Recreate the index file and create an empty space list.
         | 
| 454 | 
            +
                  regenerate_index_and_spaces
         | 
| 420 455 | 
             
                end
         | 
| 421 456 |  | 
| 422 | 
            -
                # Check  | 
| 423 | 
            -
                # @param repair [Boolean] True if errors should be fixed.
         | 
| 457 | 
            +
                # Check the FlatFile.
         | 
| 424 458 | 
             
                # @return [Integer] Number of errors found
         | 
| 425 | 
            -
                def check( | 
| 459 | 
            +
                def check()
         | 
| 426 460 | 
             
                  errors = 0
         | 
| 427 461 | 
             
                  return errors unless @f
         | 
| 428 462 |  | 
| 429 463 | 
             
                  t = Time.now
         | 
| 430 | 
            -
                  PEROBS.log.info "Checking FlatFile database" | 
| 431 | 
            -
                    "#{repair ? ' in repair mode' : ''}..."
         | 
| 464 | 
            +
                  PEROBS.log.info "Checking FlatFile database..."
         | 
| 432 465 |  | 
| 433 466 | 
             
                  # First check the database blob file. Each entry should be readable and
         | 
| 434 467 | 
             
                  # correct and all IDs must be unique. We use a shadow index to keep
         | 
| 435 468 | 
             
                  # track of the already found IDs.
         | 
| 436 | 
            -
                  new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER | 
| 469 | 
            +
                  new_index = BTree.new(@db_dir, 'new-index', INDEX_BTREE_ORDER,
         | 
| 470 | 
            +
                                        @progressmeter)
         | 
| 437 471 | 
             
                  new_index.erase
         | 
| 438 472 | 
             
                  new_index.open
         | 
| 439 473 |  | 
| 440 | 
            -
                   | 
| 441 | 
            -
             | 
| 442 | 
            -
             | 
| 443 | 
            -
             | 
| 444 | 
            -
             | 
| 445 | 
            -
                         | 
| 446 | 
            -
                         | 
| 447 | 
            -
                           | 
| 448 | 
            -
             | 
| 449 | 
            -
                           | 
| 450 | 
            -
             | 
| 451 | 
            -
             | 
| 452 | 
            -
             | 
| 474 | 
            +
                  corrupted_blobs = 0
         | 
| 475 | 
            +
                  end_of_last_healthy_blob = nil
         | 
| 476 | 
            +
                  @progressmeter.start('Checking blobs file', @f.size) do |pm|
         | 
| 477 | 
            +
                    corrupted_blobs = each_blob_header do |header|
         | 
| 478 | 
            +
                      if header.is_valid?
         | 
| 479 | 
            +
                        # We have a non-deleted entry.
         | 
| 480 | 
            +
                        begin
         | 
| 481 | 
            +
                          @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
         | 
| 482 | 
            +
                          buf = @f.read(header.length)
         | 
| 483 | 
            +
                          if buf.bytesize != header.length
         | 
| 484 | 
            +
                            PEROBS.log.error "Premature end of file in blob with ID " +
         | 
| 485 | 
            +
                              "#{header.id}."
         | 
| 486 | 
            +
                            errors += 1
         | 
| 487 | 
            +
                            next
         | 
| 488 | 
            +
                          end
         | 
| 453 489 |  | 
| 454 | 
            -
             | 
| 455 | 
            -
             | 
| 456 | 
            -
             | 
| 457 | 
            -
             | 
| 458 | 
            -
             | 
| 459 | 
            -
             | 
| 460 | 
            -
             | 
| 461 | 
            -
             | 
| 462 | 
            -
             | 
| 490 | 
            +
                          # Uncompress the data if the compression bit is set in the mark
         | 
| 491 | 
            +
                          # byte.
         | 
| 492 | 
            +
                          if header.is_compressed?
         | 
| 493 | 
            +
                            begin
         | 
| 494 | 
            +
                              buf = Zlib.inflate(buf)
         | 
| 495 | 
            +
                            rescue Zlib::BufError, Zlib::DataError
         | 
| 496 | 
            +
                              PEROBS.log.error "Corrupted compressed block with ID " +
         | 
| 497 | 
            +
                                "#{header.id} found."
         | 
| 498 | 
            +
                              errors += 1
         | 
| 499 | 
            +
                              next
         | 
| 500 | 
            +
                            end
         | 
| 501 | 
            +
                          end
         | 
| 502 | 
            +
             | 
| 503 | 
            +
                          if header.crc && checksum(buf) != header.crc
         | 
| 504 | 
            +
                            PEROBS.log.error "Checksum failure while checking blob " +
         | 
| 505 | 
            +
                              "with ID #{header.id}"
         | 
| 463 506 | 
             
                            errors += 1
         | 
| 464 507 | 
             
                            next
         | 
| 465 508 | 
             
                          end
         | 
| 509 | 
            +
                        rescue IOError => e
         | 
| 510 | 
            +
                          PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
         | 
| 511 | 
            +
                            e.message
         | 
| 466 512 | 
             
                        end
         | 
| 467 513 |  | 
| 468 | 
            -
                        if  | 
| 469 | 
            -
             | 
| 470 | 
            -
             | 
| 471 | 
            -
             | 
| 514 | 
            +
                        # Check if the ID has already been found in the file.
         | 
| 515 | 
            +
                        if (previous_address = new_index.get(header.id))
         | 
| 516 | 
            +
                          PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
         | 
| 517 | 
            +
                            "Addresses: #{previous_address}, #{header.addr}"
         | 
| 472 518 | 
             
                          errors += 1
         | 
| 473 | 
            -
                           | 
| 519 | 
            +
                          previous_header = FlatFileBlobHeader.read(@f, previous_address,
         | 
| 520 | 
            +
                                                                    header.id)
         | 
| 521 | 
            +
                        else
         | 
| 522 | 
            +
                          # ID is unique so far. Add it to the shadow index.
         | 
| 523 | 
            +
                          new_index.insert(header.id, header.addr)
         | 
| 474 524 | 
             
                        end
         | 
| 475 | 
            -
                      rescue IOError => e
         | 
| 476 | 
            -
                        PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
         | 
| 477 | 
            -
                          e.message
         | 
| 478 525 | 
             
                      end
         | 
| 526 | 
            +
                      end_of_last_healthy_blob = header.addr +
         | 
| 527 | 
            +
                        FlatFileBlobHeader::LENGTH + header.length
         | 
| 479 528 |  | 
| 480 | 
            -
                       | 
| 481 | 
            -
             | 
| 482 | 
            -
                        PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
         | 
| 483 | 
            -
                          "Addresses: #{previous_address}, #{pos}"
         | 
| 484 | 
            -
                        previous_header = FlatFileBlobHeader.read_at(@f, previous_address,
         | 
| 485 | 
            -
                                                                     header.id)
         | 
| 486 | 
            -
                        if repair
         | 
| 487 | 
            -
                          # We have two blobs with the same ID and we must discard one of
         | 
| 488 | 
            -
                          # them.
         | 
| 489 | 
            -
                          if header.is_outdated?
         | 
| 490 | 
            -
                            discard_damaged_blob(header)
         | 
| 491 | 
            -
                          elsif previous_header.is_outdated?
         | 
| 492 | 
            -
                            discard_damaged_blob(previous_header)
         | 
| 493 | 
            -
                          else
         | 
| 494 | 
            -
                            PEROBS.log.error "None of the blobs with same ID have " +
         | 
| 495 | 
            -
                              "the outdated flag set. Deleting the smaller one."
         | 
| 496 | 
            -
                            discard_damaged_blob(header.length < previous_header.length ?
         | 
| 497 | 
            -
                                                 header : previous_header)
         | 
| 498 | 
            -
                          end
         | 
| 499 | 
            -
                          next
         | 
| 500 | 
            -
                        end
         | 
| 501 | 
            -
                      else
         | 
| 502 | 
            -
                        # ID is unique so far. Add it to the shadow index.
         | 
| 503 | 
            -
                        new_index.insert(header.id, pos)
         | 
| 504 | 
            -
                      end
         | 
| 529 | 
            +
                      pm.update(header.addr)
         | 
| 530 | 
            +
                    end
         | 
| 505 531 |  | 
| 532 | 
            +
                    if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
         | 
| 533 | 
            +
                      # The blob file ends with a corrupted blob header.
         | 
| 534 | 
            +
                      PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
         | 
| 535 | 
            +
                        'bytes found at the end of FlatFile.'
         | 
| 536 | 
            +
                      corrupted_blobs += 1
         | 
| 506 537 | 
             
                    end
         | 
| 538 | 
            +
             | 
| 539 | 
            +
                    errors += corrupted_blobs
         | 
| 507 540 | 
             
                  end
         | 
| 541 | 
            +
             | 
| 508 542 | 
             
                  # We no longer need the new index.
         | 
| 509 543 | 
             
                  new_index.close
         | 
| 510 544 | 
             
                  new_index.erase
         | 
| 511 545 |  | 
| 512 | 
            -
                   | 
| 513 | 
            -
             | 
| 514 | 
            -
             | 
| 515 | 
            -
             | 
| 516 | 
            -
                     | 
| 517 | 
            -
                       | 
| 546 | 
            +
                  if corrupted_blobs == 0
         | 
| 547 | 
            +
                    # Now we check the index data. It must be correct and the entries must
         | 
| 548 | 
            +
                    # match the blob file. All entries in the index must be in the blob file
         | 
| 549 | 
            +
                    # and vise versa.
         | 
| 550 | 
            +
                    begin
         | 
| 551 | 
            +
                      index_ok = @index.check do |id, address|
         | 
| 552 | 
            +
                        unless has_id_at?(id, address)
         | 
| 553 | 
            +
                          PEROBS.log.error "Index contains an entry for " +
         | 
| 554 | 
            +
                            "ID #{id} at address #{address} that is not in FlatFile"
         | 
| 555 | 
            +
                          false
         | 
| 556 | 
            +
                        else
         | 
| 557 | 
            +
                          true
         | 
| 558 | 
            +
                        end
         | 
| 559 | 
            +
                      end
         | 
| 560 | 
            +
                      x_check_errs = 0
         | 
| 561 | 
            +
                      space_check_ok = true
         | 
| 562 | 
            +
                      unless index_ok && (space_check_ok = @space_list.check(self)) &&
         | 
| 563 | 
            +
                        (x_check_errs = cross_check_entries) == 0
         | 
| 564 | 
            +
                        errors += 1 unless index_ok && space_check_ok
         | 
| 565 | 
            +
                        errors += x_check_errs
         | 
| 566 | 
            +
                      end
         | 
| 567 | 
            +
                    rescue PEROBS::FatalError
         | 
| 568 | 
            +
                      errors += 1
         | 
| 518 569 | 
             
                    end
         | 
| 519 | 
            -
             | 
| 520 | 
            -
             | 
| 570 | 
            +
                  end
         | 
| 571 | 
            +
             | 
| 572 | 
            +
                  PEROBS.log.info "FlatFile check completed in #{Time.now - t} seconds. " +
         | 
| 573 | 
            +
                    "#{errors} errors found."
         | 
| 574 | 
            +
             | 
| 575 | 
            +
                  errors
         | 
| 576 | 
            +
                end
         | 
| 577 | 
            +
             | 
| 578 | 
            +
                # Repair the FlatFile. In contrast to the repair functionality in the
         | 
| 579 | 
            +
                # check() method this method is much faster. It simply re-creates the
         | 
| 580 | 
            +
                # index and space list from the blob file.
         | 
| 581 | 
            +
                # @return [Integer] Number of errors found
         | 
| 582 | 
            +
                def repair
         | 
| 583 | 
            +
                  errors = 0
         | 
| 584 | 
            +
                  return errors unless @f
         | 
| 585 | 
            +
             | 
| 586 | 
            +
                  t = Time.now
         | 
| 587 | 
            +
                  PEROBS.log.info "Repairing FlatFile database"
         | 
| 588 | 
            +
             | 
| 589 | 
            +
                  # Erase and re-open the index and space list files. We purposely don't
         | 
| 590 | 
            +
                  # close the files at it would trigger needless flushing.
         | 
| 591 | 
            +
                  clear_index_files(true)
         | 
| 592 | 
            +
             | 
| 593 | 
            +
                  # Now we scan the blob file and re-index all blobs and spaces. Corrupted
         | 
| 594 | 
            +
                  # blobs will be skipped.
         | 
| 595 | 
            +
                  corrupted_blobs = 0
         | 
| 596 | 
            +
                  end_of_last_healthy_blob = nil
         | 
| 597 | 
            +
                  @progressmeter.start('Re-indexing blobs file', @f.size) do |pm|
         | 
| 598 | 
            +
                    corrupted_blobs = each_blob_header do |header|
         | 
| 599 | 
            +
                      if header.corruption_start
         | 
| 600 | 
            +
                        # The blob is preceeded by a corrupted area. We create a new
         | 
| 601 | 
            +
                        # header of a deleted blob for this area and write the new blob
         | 
| 602 | 
            +
                        # over it.
         | 
| 603 | 
            +
                        if (data_length = header.addr - header.corruption_start -
         | 
| 604 | 
            +
                            FlatFileBlobHeader::LENGTH) <= 0
         | 
| 605 | 
            +
                          PEROBS.log.error "Found a corrupted blob that is too small to " +
         | 
| 606 | 
            +
                            "fit a header (#{data_length}). File must be defragmented."
         | 
| 607 | 
            +
                        else
         | 
| 608 | 
            +
                          new_header = FlatFileBlobHeader.new(@f, header.corruption_start,
         | 
| 609 | 
            +
                                                              0, data_length, 0, 0)
         | 
| 610 | 
            +
                          new_header.write
         | 
| 611 | 
            +
                          @space_list.add_space(header.corruption_start, data_length)
         | 
| 612 | 
            +
                        end
         | 
| 613 | 
            +
                      end
         | 
| 614 | 
            +
             | 
| 615 | 
            +
                      if header.is_valid?
         | 
| 616 | 
            +
                        # We have a non-deleted entry.
         | 
| 617 | 
            +
                        begin
         | 
| 618 | 
            +
                          @f.seek(header.addr + FlatFileBlobHeader::LENGTH)
         | 
| 619 | 
            +
                          buf = @f.read(header.length)
         | 
| 620 | 
            +
                          if buf.bytesize != header.length
         | 
| 621 | 
            +
                            PEROBS.log.error "Premature end of file in blob with ID " +
         | 
| 622 | 
            +
                              "#{header.id}."
         | 
| 623 | 
            +
                            discard_damaged_blob(header)
         | 
| 624 | 
            +
                            errors += 1
         | 
| 625 | 
            +
                            next
         | 
| 626 | 
            +
                          end
         | 
| 627 | 
            +
             | 
| 628 | 
            +
                          # Uncompress the data if the compression bit is set in the mark
         | 
| 629 | 
            +
                          # byte.
         | 
| 630 | 
            +
                          if header.is_compressed?
         | 
| 631 | 
            +
                            begin
         | 
| 632 | 
            +
                              buf = Zlib.inflate(buf)
         | 
| 633 | 
            +
                            rescue Zlib::BufError, Zlib::DataError
         | 
| 634 | 
            +
                              PEROBS.log.error "Corrupted compressed block with ID " +
         | 
| 635 | 
            +
                                "#{header.id} found."
         | 
| 636 | 
            +
                              discard_damaged_blob(header)
         | 
| 637 | 
            +
                              errors += 1
         | 
| 638 | 
            +
                              next
         | 
| 639 | 
            +
                            end
         | 
| 640 | 
            +
                          end
         | 
| 641 | 
            +
             | 
| 642 | 
            +
                          if header.crc && checksum(buf) != header.crc
         | 
| 643 | 
            +
                            PEROBS.log.error "Checksum failure while checking blob " +
         | 
| 644 | 
            +
                              "with ID #{header.id}"
         | 
| 645 | 
            +
                            discard_damaged_blob(header)
         | 
| 646 | 
            +
                            errors += 1
         | 
| 647 | 
            +
                            next
         | 
| 648 | 
            +
                          end
         | 
| 649 | 
            +
                        rescue IOError => e
         | 
| 650 | 
            +
                          PEROBS.log.fatal "Check of blob with ID #{header.id} failed: " +
         | 
| 651 | 
            +
                            e.message
         | 
| 652 | 
            +
                        end
         | 
| 653 | 
            +
             | 
| 654 | 
            +
                        # Check if the ID has already been found in the file.
         | 
| 655 | 
            +
                        if (previous_address = @index.get(header.id))
         | 
| 656 | 
            +
                          PEROBS.log.error "Multiple blobs for ID #{header.id} found. " +
         | 
| 657 | 
            +
                            "Addresses: #{previous_address}, #{header.addr}"
         | 
| 658 | 
            +
                          errors += 1
         | 
| 659 | 
            +
                          previous_header = FlatFileBlobHeader.read(@f, previous_address,
         | 
| 660 | 
            +
                                                                    header.id)
         | 
| 661 | 
            +
                          # We have two blobs with the same ID and we must discard one of
         | 
| 662 | 
            +
                          # them.
         | 
| 663 | 
            +
                          discard_duplicate_blobs(header, previous_header)
         | 
| 664 | 
            +
                        else
         | 
| 665 | 
            +
                          # ID is unique so far. Add it to the shadow index.
         | 
| 666 | 
            +
                          @index.insert(header.id, header.addr)
         | 
| 667 | 
            +
                        end
         | 
| 668 | 
            +
             | 
| 669 | 
            +
                      else
         | 
| 670 | 
            +
                        if header.length > 0
         | 
| 671 | 
            +
                          @space_list.add_space(header.addr, header.length)
         | 
| 672 | 
            +
                        end
         | 
| 673 | 
            +
                      end
         | 
| 674 | 
            +
                      end_of_last_healthy_blob = header.addr +
         | 
| 675 | 
            +
                        FlatFileBlobHeader::LENGTH + header.length
         | 
| 676 | 
            +
             | 
| 677 | 
            +
                      pm.update(header.addr)
         | 
| 521 678 | 
             
                    end
         | 
| 522 | 
            -
             | 
| 523 | 
            -
                     | 
| 524 | 
            -
             | 
| 679 | 
            +
             | 
| 680 | 
            +
                    if end_of_last_healthy_blob && end_of_last_healthy_blob != @f.size
         | 
| 681 | 
            +
                      # The blob file ends with a corrupted blob header.
         | 
| 682 | 
            +
                      PEROBS.log.error "#{@f.size - end_of_last_healthy_blob} corrupted " +
         | 
| 683 | 
            +
                        'bytes found at the end of FlatFile.'
         | 
| 684 | 
            +
                      corrupted_blobs += 1
         | 
| 685 | 
            +
             | 
| 686 | 
            +
                      PEROBS.log.error "Truncating FlatFile to " +
         | 
| 687 | 
            +
                        "#{end_of_last_healthy_blob} bytes by discarding " +
         | 
| 688 | 
            +
                        "#{@f.size - end_of_last_healthy_blob} bytes"
         | 
| 689 | 
            +
                      @f.truncate(end_of_last_healthy_blob)
         | 
| 690 | 
            +
                    end
         | 
| 691 | 
            +
             | 
| 692 | 
            +
                    errors += corrupted_blobs
         | 
| 525 693 | 
             
                  end
         | 
| 526 694 |  | 
| 527 | 
            -
                  sync | 
| 528 | 
            -
                  PEROBS.log.info " | 
| 695 | 
            +
                  sync
         | 
| 696 | 
            +
                  PEROBS.log.info "FlatFile repair completed in #{Time.now - t} seconds. " +
         | 
| 529 697 | 
             
                    "#{errors} errors found."
         | 
| 530 698 |  | 
| 531 699 | 
             
                  errors
         | 
| @@ -535,22 +703,32 @@ module PEROBS | |
| 535 703 | 
             
                # regenerates them from the FlatFile.
         | 
| 536 704 | 
             
                def regenerate_index_and_spaces
         | 
| 537 705 | 
             
                  PEROBS.log.warn "Re-generating FlatFileDB index and space files"
         | 
| 706 | 
            +
                  @index.open unless @index.is_open?
         | 
| 538 707 | 
             
                  @index.clear
         | 
| 708 | 
            +
                  @space_list.open unless @space_list.is_open?
         | 
| 539 709 | 
             
                  @space_list.clear
         | 
| 540 710 |  | 
| 541 | 
            -
                   | 
| 542 | 
            -
                     | 
| 543 | 
            -
                      if  | 
| 544 | 
            -
                         | 
| 545 | 
            -
                           | 
| 546 | 
            -
             | 
| 547 | 
            -
             | 
| 548 | 
            -
             | 
| 711 | 
            +
                  @progressmeter.start('Re-generating database index', @f.size) do |pm|
         | 
| 712 | 
            +
                    each_blob_header do |header|
         | 
| 713 | 
            +
                      if header.is_valid?
         | 
| 714 | 
            +
                        if (duplicate_pos = @index.get(header.id))
         | 
| 715 | 
            +
                          PEROBS.log.error "FlatFile contains multiple blobs for ID " +
         | 
| 716 | 
            +
                            "#{header.id}. First blob is at address #{duplicate_pos}. " +
         | 
| 717 | 
            +
                            "Other blob found at address #{header.addr}."
         | 
| 718 | 
            +
                          if header.length > 0
         | 
| 719 | 
            +
                            @space_list.add_space(header.addr, header.length)
         | 
| 720 | 
            +
                          end
         | 
| 721 | 
            +
                          discard_damaged_blob(header)
         | 
| 722 | 
            +
                        else
         | 
| 723 | 
            +
                          @index.insert(header.id, header.addr)
         | 
| 724 | 
            +
                        end
         | 
| 549 725 | 
             
                      else
         | 
| 550 | 
            -
                         | 
| 726 | 
            +
                        if header.length > 0
         | 
| 727 | 
            +
                          @space_list.add_space(header.addr, header.length)
         | 
| 728 | 
            +
                        end
         | 
| 551 729 | 
             
                      end
         | 
| 552 | 
            -
             | 
| 553 | 
            -
                       | 
| 730 | 
            +
             | 
| 731 | 
            +
                      pm.update(header.addr)
         | 
| 554 732 | 
             
                    end
         | 
| 555 733 | 
             
                  end
         | 
| 556 734 |  | 
| @@ -558,19 +736,23 @@ module PEROBS | |
| 558 736 | 
             
                end
         | 
| 559 737 |  | 
| 560 738 | 
             
                def has_space?(address, size)
         | 
| 561 | 
            -
                  header = FlatFileBlobHeader. | 
| 739 | 
            +
                  header = FlatFileBlobHeader.read(@f, address)
         | 
| 562 740 | 
             
                  !header.is_valid? && header.length == size
         | 
| 563 741 | 
             
                end
         | 
| 564 742 |  | 
| 565 743 | 
             
                def has_id_at?(id, address)
         | 
| 566 | 
            -
                   | 
| 744 | 
            +
                  begin
         | 
| 745 | 
            +
                    header = FlatFileBlobHeader.read(@f, address)
         | 
| 746 | 
            +
                  rescue PEROBS::FatalError
         | 
| 747 | 
            +
                    return false
         | 
| 748 | 
            +
                  end
         | 
| 567 749 | 
             
                  header.is_valid? && header.id == id
         | 
| 568 750 | 
             
                end
         | 
| 569 751 |  | 
| 570 752 | 
             
                def inspect
         | 
| 571 753 | 
             
                  s = '['
         | 
| 572 | 
            -
                  each_blob_header do | | 
| 573 | 
            -
                    s << "{ :pos => #{ | 
| 754 | 
            +
                  each_blob_header do |header|
         | 
| 755 | 
            +
                    s << "{ :pos => #{header.addr}, :flags => #{header.flags}, " +
         | 
| 574 756 | 
             
                         ":length => #{header.length}, :id => #{header.id}, " +
         | 
| 575 757 | 
             
                         ":crc => #{header.crc}"
         | 
| 576 758 | 
             
                    if header.is_valid?
         | 
| @@ -581,21 +763,68 @@ module PEROBS | |
| 581 763 | 
             
                  s + ']'
         | 
| 582 764 | 
             
                end
         | 
| 583 765 |  | 
| 766 | 
            +
                def FlatFile::insert_header_checksums(db_dir)
         | 
| 767 | 
            +
                  old_file_name = File.join(db_dir, 'database.blobs')
         | 
| 768 | 
            +
                  new_file_name = File.join(db_dir, 'database_v4.blobs')
         | 
| 769 | 
            +
                  bak_file_name = File.join(db_dir, 'database_v3.blobs')
         | 
| 770 | 
            +
             | 
| 771 | 
            +
                  old_file = File.open(old_file_name, 'rb')
         | 
| 772 | 
            +
                  new_file = File.open(new_file_name, 'wb')
         | 
| 773 | 
            +
             | 
| 774 | 
            +
                  entries = 0
         | 
| 775 | 
            +
                  while (buf = old_file.read(21))
         | 
| 776 | 
            +
                    flags, length, id, crc = *buf.unpack('CQQL')
         | 
| 777 | 
            +
                    blob_data = old_file.read(length)
         | 
| 778 | 
            +
             | 
| 779 | 
            +
                    # Some basic sanity checking to ensure all reserved bits are 0. Older
         | 
| 780 | 
            +
                    # versions of PEROBS used to set bit 1 despite it being reserved now.
         | 
| 781 | 
            +
                    unless flags & 0xF0 == 0
         | 
| 782 | 
            +
                      PEROBS.log.fatal "Blob file #{old_file_name} contains illegal " +
         | 
| 783 | 
            +
                        "flag byte #{'%02x' % flags} at #{old_file.pos - 21}"
         | 
| 784 | 
            +
                    end
         | 
| 785 | 
            +
             | 
| 786 | 
            +
                    # Check if the blob is valid and current.
         | 
| 787 | 
            +
                    if flags & 0x1 == 1 && flags & 0x8 == 0
         | 
| 788 | 
            +
                      # Make sure the bit 1 is not set anymore.
         | 
| 789 | 
            +
                      flags = flags & 0x05
         | 
| 790 | 
            +
                      header_str = [ flags, length, id, crc ].pack('CQQL')
         | 
| 791 | 
            +
                      header_crc = Zlib.crc32(header_str, 0)
         | 
| 792 | 
            +
                      header_str += [ header_crc ].pack('L')
         | 
| 793 | 
            +
             | 
| 794 | 
            +
                      new_file.write(header_str + blob_data)
         | 
| 795 | 
            +
                      entries += 1
         | 
| 796 | 
            +
                    end
         | 
| 797 | 
            +
                  end
         | 
| 798 | 
            +
                  PEROBS.log.info "Header checksum added to #{entries} entries"
         | 
| 799 | 
            +
             | 
| 800 | 
            +
                  old_file.close
         | 
| 801 | 
            +
                  new_file.close
         | 
| 802 | 
            +
             | 
| 803 | 
            +
                  File.rename(old_file_name, bak_file_name)
         | 
| 804 | 
            +
                  File.rename(new_file_name, old_file_name)
         | 
| 805 | 
            +
                end
         | 
| 806 | 
            +
             | 
| 584 807 | 
             
                private
         | 
| 585 808 |  | 
| 586 809 | 
             
                def each_blob_header(&block)
         | 
| 587 | 
            -
                   | 
| 810 | 
            +
                  corrupted_blobs = 0
         | 
| 811 | 
            +
             | 
| 588 812 | 
             
                  begin
         | 
| 589 813 | 
             
                    @f.seek(0)
         | 
| 590 814 | 
             
                    while (header = FlatFileBlobHeader.read(@f))
         | 
| 591 | 
            -
                       | 
| 815 | 
            +
                      if header.corruption_start
         | 
| 816 | 
            +
                        corrupted_blobs += 1
         | 
| 817 | 
            +
                      end
         | 
| 818 | 
            +
             | 
| 819 | 
            +
                      yield(header)
         | 
| 592 820 |  | 
| 593 | 
            -
                       | 
| 594 | 
            -
                      @f.seek(pos)
         | 
| 821 | 
            +
                      @f.seek(header.addr + FlatFileBlobHeader::LENGTH + header.length)
         | 
| 595 822 | 
             
                    end
         | 
| 596 823 | 
             
                  rescue IOError => e
         | 
| 597 824 | 
             
                    PEROBS.log.fatal "Cannot read blob in flat file DB: #{e.message}"
         | 
| 598 825 | 
             
                  end
         | 
| 826 | 
            +
             | 
| 827 | 
            +
                  corrupted_blobs
         | 
| 599 828 | 
             
                end
         | 
| 600 829 |  | 
| 601 830 | 
             
                def find_free_blob(bytes)
         | 
| @@ -625,26 +854,34 @@ module PEROBS | |
| 625 854 | 
             
                def cross_check_entries
         | 
| 626 855 | 
             
                  errors = 0
         | 
| 627 856 |  | 
| 628 | 
            -
                   | 
| 629 | 
            -
                     | 
| 630 | 
            -
                      if header. | 
| 631 | 
            -
                         | 
| 632 | 
            -
                           | 
| 633 | 
            -
                             | 
| 634 | 
            -
             | 
| 635 | 
            -
             | 
| 857 | 
            +
                  @progressmeter.start('Cross checking blobs and index', @f.size) do |pm|
         | 
| 858 | 
            +
                    each_blob_header do |header|
         | 
| 859 | 
            +
                      if !header.is_valid?
         | 
| 860 | 
            +
                        if header.length > 0
         | 
| 861 | 
            +
                          unless @space_list.has_space?(header.addr, header.length)
         | 
| 862 | 
            +
                            PEROBS.log.error "FlatFile has free space " +
         | 
| 863 | 
            +
                              "(addr: #{header.addr}, len: #{header.length}) that is " +
         | 
| 864 | 
            +
                              "not in SpaceManager"
         | 
| 865 | 
            +
                            errors += 1
         | 
| 866 | 
            +
                          end
         | 
| 867 | 
            +
                        end
         | 
| 868 | 
            +
                      else
         | 
| 869 | 
            +
                        if (index_address = @index.get(header.id)).nil?
         | 
| 870 | 
            +
                          PEROBS.log.error "FlatFile blob at address #{header.addr} " +
         | 
| 871 | 
            +
                            "is not listed in the index"
         | 
| 872 | 
            +
                          errors +=1
         | 
| 873 | 
            +
                        elsif index_address != header.addr
         | 
| 874 | 
            +
                            PEROBS.log.error "FlatFile blob at address #{header.addr} " +
         | 
| 875 | 
            +
                              "is listed in index with address #{index_address}"
         | 
| 876 | 
            +
                            errors += 1
         | 
| 636 877 | 
             
                        end
         | 
| 637 878 | 
             
                      end
         | 
| 638 | 
            -
             | 
| 639 | 
            -
                       | 
| 640 | 
            -
                        PEROBS.log.error "FlatFile blob at address #{pos} is listed " +
         | 
| 641 | 
            -
                          "in index with address #{@index.get(header.id)}"
         | 
| 642 | 
            -
                        errors += 1
         | 
| 643 | 
            -
                      end
         | 
| 879 | 
            +
             | 
| 880 | 
            +
                      pm.update(header.addr)
         | 
| 644 881 | 
             
                    end
         | 
| 645 882 | 
             
                  end
         | 
| 646 883 |  | 
| 647 | 
            -
                  errors | 
| 884 | 
            +
                  errors
         | 
| 648 885 | 
             
                end
         | 
| 649 886 |  | 
| 650 887 | 
             
                def discard_damaged_blob(header)
         | 
| @@ -653,6 +890,61 @@ module PEROBS | |
| 653 890 | 
             
                  header.clear_flags
         | 
| 654 891 | 
             
                end
         | 
| 655 892 |  | 
| 893 | 
            +
                def discard_duplicate_blobs(header, previous_header)
         | 
| 894 | 
            +
                  if header.is_outdated?
         | 
| 895 | 
            +
                    discard_damaged_blob(header)
         | 
| 896 | 
            +
                  elsif previous_header.is_outdated?
         | 
| 897 | 
            +
                    discard_damaged_blob(previous_header)
         | 
| 898 | 
            +
                  else
         | 
| 899 | 
            +
                    smaller, larger = header.length < previous_header.length ?
         | 
| 900 | 
            +
                      [ header, previous_header ] : [ previous_header, header ]
         | 
| 901 | 
            +
                    PEROBS.log.error "None of the blobs with same ID have " +
         | 
| 902 | 
            +
                      "the outdated flag set. Deleting the smaller one " +
         | 
| 903 | 
            +
                      "at address #{smaller.addr}"
         | 
| 904 | 
            +
                    discard_damaged_blob(smaller)
         | 
| 905 | 
            +
                    @space_list.add_space(smaller.addr, smaller.length)
         | 
| 906 | 
            +
                    @index.insert(larger.id, larger.addr)
         | 
| 907 | 
            +
                  end
         | 
| 908 | 
            +
                end
         | 
| 909 | 
            +
             | 
| 910 | 
            +
                def open_index_files(abort_on_missing_files = false)
         | 
| 911 | 
            +
                  begin
         | 
| 912 | 
            +
                    @index.open(abort_on_missing_files)
         | 
| 913 | 
            +
                    @space_list.open
         | 
| 914 | 
            +
                  rescue FatalError
         | 
| 915 | 
            +
                    clear_index_files
         | 
| 916 | 
            +
                    regenerate_index_and_spaces
         | 
| 917 | 
            +
                  end
         | 
| 918 | 
            +
                end
         | 
| 919 | 
            +
             | 
| 920 | 
            +
                def erase_index_files(dont_close_files = false)
         | 
| 921 | 
            +
                  # Ensure that the index is really closed.
         | 
| 922 | 
            +
                  @index.close unless dont_close_files
         | 
| 923 | 
            +
                  # Erase it completely
         | 
| 924 | 
            +
                  @index.erase
         | 
| 925 | 
            +
             | 
| 926 | 
            +
                  # Ensure that the spaces list is really closed.
         | 
| 927 | 
            +
                  @space_list.close unless dont_close_files
         | 
| 928 | 
            +
                  # Erase it completely
         | 
| 929 | 
            +
                  @space_list.erase
         | 
| 930 | 
            +
             | 
| 931 | 
            +
                  if @space_list.is_a?(SpaceTree)
         | 
| 932 | 
            +
                    # If we still use the old SpaceTree format, this is the moment to
         | 
| 933 | 
            +
                    # convert it to the new SpaceManager format.
         | 
| 934 | 
            +
                    @space_list = SpaceManager.new(@db_dir, @progressmeter)
         | 
| 935 | 
            +
                    PEROBS.log.warn "Converting space list from SpaceTree format " +
         | 
| 936 | 
            +
                      "to SpaceManager format"
         | 
| 937 | 
            +
                  end
         | 
| 938 | 
            +
                end
         | 
| 939 | 
            +
             | 
| 940 | 
            +
                def clear_index_files(dont_close_files = false)
         | 
| 941 | 
            +
                  erase_index_files(dont_close_files)
         | 
| 942 | 
            +
             | 
| 943 | 
            +
                  # Then create them again.
         | 
| 944 | 
            +
                  @index.open
         | 
| 945 | 
            +
                  @space_list.open
         | 
| 946 | 
            +
                end
         | 
| 947 | 
            +
             | 
| 656 948 | 
             
              end
         | 
| 657 949 |  | 
| 658 950 | 
             
            end
         |