RubyGems - bc3 - Versions diffs - 0.1.0 - Mend

bc3 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

data/BCSS_Binary_Format.txt +239 -0
data/bin/bc3_merge.rb +155 -0
data/examples/folder1_2011-01-21.bcss +0 -0
data/examples/folder2_2011-01-21.bcss +0 -0
data/examples/test_combine.rb +43 -0
data/examples/test_filesystem.rb +6 -0
data/examples/test_hardcoded.rb +15 -0
data/examples/test_merge.bat +5 -0
data/examples/test_yaml.rb +188 -0
data/lib/bc3.rb +89 -0
data/lib/bc3/file.rb +120 -0
data/lib/bc3/folder.rb +239 -0
data/lib/bc3/helper.rb +101 -0
data/lib/bc3/parse.rb +312 -0
data/lib/bc3/snapshot.rb +264 -0
data/lib/bc3/time.rb +1 -0
data/unittest/unittest_bc3.rb +66 -0
data/unittest/unittest_bc3_file.rb +35 -0
data/unittest/unittest_bc3_folder.rb +179 -0
data/unittest/unittest_bc3_merge.rb +102 -0
data/unittest/unittest_bc3_snapshot.rb +121 -0
metadata +119 -0

data/lib/bc3/helper.rb ADDED Viewed

@@ -0,0 +1,101 @@
+module BC3
+=begin rdoc
+Define DOS-Attributs.
+http://www.xxcopy.com/xxcopy06.htm
+       Bit 0     Read-Only
+       Bit 1     Hidden
+       Bit 2     System
+       Bit 3     Volume Label
+       Bit 4     Directory
+       Bit 5     Archive
+  http://www.computerhope.com/attribhl.htm
+  Bit Positions	Hex	Description
+  0 0 0 0 0 0 0 1	01h	Read Only file
+  0 0 0 0 0 0 1 0	02h	Hidden file
+  0 0 0 0 0 1 0 0	04h	System file
+  0 0 0 0 1 0 0 0	08h	Volume Label
+  0 0 0 1 0 0 0 0	10h	Subdirectory
+  0 0 1 0 0 0 0 0	20h	Archive
+  0 1 0 0 0 0 0 0	40h	Reserved
+  1 0 0 0 0 0 0 0	80h	Reserved
+Usage:
+  attrib = Attrib::ReadOnly | Attrib::Hidden
+=end
+module Attrib
+  ReadOnly = 1
+  Hidden  = 2
+  System = 4
+  VolumeLabel = 8
+  Directory = 16
+  Archive = 32
+end
+=begin rdoc
+Helper methods to be included to File and Folder.
+=end
+module Helper
+=begin rdoc
+Calculate the CRC32 for a string.
+Based on http://www.ruby-forum.com/topic/179452
+(with minor ruby 1.9 adaptions).
+=end
+  def self.crc32(c)
+    #Alternative solution:
+    #~ require 'zlib'
+    #~ return Zlib.crc32(c, 0)
+    #Encoding of c should be binary or similar.
+    n = c.length
+    r = 0xFFFFFFFF
+    c.each_byte do |i|
+        r ^= i
+        8.times do
+            if (r & 1)!=0
+                r = (r>>1) ^ 0xEDB88320
+            else
+                r >>= 1
+            end
+        end
+    end
+    r ^ 0xFFFFFFFF
+  end
+=begin rdoc
+Return a Number (e.g. AD epoch) as a binary string with 8 bytes in little endian order.
+With
+  String#<< (Integer)
+the integer is a codepoint, the character is added.
+AD's epoch isn't a codepoint, but a binary data.
+So we need a little conversion, to get
+AD's epoch (Bignum) as an bit sequence.
+=end
+  def fixnum2int64( int )
+    bindata = ''.force_encoding('BINARY')
+    #Ugly code, but it works ;-)
+    #.reverse to get "little endian"
+    ('%064b' % int).scan(/(\d{8})/).flatten.reverse.each{|b|
+        bindata << b.to_i(2)
+    }
+    raise ArgumentError unless bindata.size == 8  #int was too big
+    bindata
+  end
+=begin rdoc
+Same as Helper#fixnum2int64, but as 4 bytes string.
+=end
+  def fixnum2int32( int )
+    bindata = ''.force_encoding('BINARY')
+    ('%032b' % int).scan(/(\d{8})/).flatten.reverse.each{|b|
+        bindata << b.to_i(2)
+    }
+    raise ArgumentError unless bindata.size == 4  #int was too big
+    bindata
+  end
+end #Helper
+end #BC3

data/lib/bc3/parse.rb ADDED Viewed

@@ -0,0 +1,312 @@
+=begin rdoc
+=end
+$:.unshift('..')
+require 'bc3'
+require "zlib"
+module BC3
+=begin rdoc
+Parser for a given bcss-file.
+=end
+  class SnapshotParser
+=begin rdoc
+=end
+    def initialize( filename )
+      rawdata = nil
+      @log = $log #fixme replace with sublogger
+      @log.info("Read and parse #{filename}")
+      ::File.open( filename, 'rb' ){|f|
+        rawdata = f.read()
+      }
+=begin
+ - HEADER STRUCTURE -
+    [0..3]   = 'BCSS'
+    [4]      = Major version (UByte)
+    [5]      = Minor version (UByte)
+    [6]	     = Minimum Supported Major Version (UByte)
+    [7]	     = Minimum Supported Minor Version (UByte)
+    [8..F]   = Creation Time (FileTime)
+    [10..11] = Flags         (UWord)
+            Bit : Meaning
+              0 : Compressed
+              1 : Source Path included
+              2 : Reserved
+              3 : UTF-8
+           4-15 : Reserved
+    [12..13] = Path Length (UWord)   | Optional
+    [14..N]  = Path        (char[])  |
+=end
+      #~ header =  rawdata[0..17]
+      @timestamp = Time.now
+      @timestamp, tail = parse_filetime(rawdata[8,8])
+      #Analyse flags - byte position 16/hex10
+      @compressed = rawdata[16].getbyte(0) & 1 != 0
+      @sourcepath = rawdata[16].getbyte(0) & 2 != 0
+      @reserved    = rawdata[16].getbyte(0) & 4 != 0
+      @utf           = rawdata[16].getbyte(0) & 8 != 0
+      if rawdata[17] != "\x0"
+        @log.warn("2nd flag byte is filled")
+      end
+      #Analyse Source path
+      #Delete second length parameter for source path
+      if rawdata.slice!(19) != "\x0"
+        @log.warn("Path > 255 not supported")
+        raise "Path > 255 not supported"
+      end
+      path, body = parse_shortstring(rawdata[18..-1])
+      if @compressed
+=begin
+Flags:
+    Compressed: If set everything following the header is compressed as a raw
+deflate stream, as defined by RFC 1951.  It is the same compression used by
+.zip and .gz archives.
+Code from http://www.ruby-forum.com/topic/136825
+=end
+        @log.debug("uncompress body data")
+        begin
+          body= Zlib::Inflate.inflate(body);  #Unclear problem
+        rescue Zlib::DataError
+          @log.debug("Zlib::DataError occured - try with raw  deflate")
+          #no luck with Zlib decompression. Let's try with raw  deflate,
+          #like some broken browsers do.
+          body= Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body)
+        end
+      end
+      @snapshot = Snapshot.new(path, @timestamp)
+      parse_body(body)
+    end #initialize
+    #Snapshot-object, result of the parsing.
+    attr_reader :snapshot
+    attr_reader :timestamp
+=begin
+Parse the body data.
+This method will change the given parameter.
+=end
+    def parse_body(body)
+      folderstack = [ @snapshot ]
+      while ! body.empty?
+=begin
+Each record starts with a single UByte ID value and then the data defined below.
+=end
+        case last_flag = body.slice!(0)
+=begin
+ID_DIRECTORY (0x01)
+    Represents a directory on the system, or an expanded archive file.
+    Name           : ShortString
+    Last Modified  : FileTime
+    DOS Attributes : UInt32
+=end
+          when "\x01" #folder
+            dirname, tail = parse_shortstring(body)
+            filetime, tail = parse_filetime(tail)
+            attributes, tail = parse_dosattributes(tail)
+            folder = Folder.newh(
+              dirname: dirname,
+              timestamp: filetime,
+              attributes: attributes
+            )
+            folderstack.last << folder
+            folderstack << folder
+=begin
+ID_FILE (0x02)
+    Represents a file on the system.
+    Name           : ShortString
+    Last Modified  : FileTime
+    DOS Attributes : UInt32
+    Size           : Int32[+Int64]
+       If Size > 2GB, store as Int32(-1) followed by Int64
+    CRC32          : UInt32
+=end
+          when "\x02" #file
+            filename, tail = parse_shortstring(body)
+            filetime, tail = parse_filetime(tail)
+            attributes, tail = parse_dosattributes(tail)
+            filesize, tail = parse_uint32(tail)
+            crc32, tail = parse_uint32(tail)
+            folderstack.last << File.new(
+              filename: filename,
+              timestamp: filetime,
+              attributes: attributes,
+              filesize: filesize,
+              crc: crc32
+            )
+=begin
+ID_FILE_EX (0x03)
+    Represents a file on the system, with extended headers.
+    Name..CRC32 is the same as ID_FILE
+    ExtraLen       : UInt16
+    ExtraData      : Byte[ExtraLen]
+=end
+          when "\x03" #file
+            filename, tail = parse_shortstring(body)
+            filetime, tail = parse_filetime(tail)
+            attributes, tail = parse_dosattributes(tail)
+            filesize, tail = parse_uint32(tail)
+            crc32, tail = parse_uint32(tail)
+            extradata, tail = parse_longstring(tail)
+            extradata = parse_file_extended_headers(extradata)
+            unless extradata #Skip at prob
+              @log.warn("Skip #{filename}")
+              next
+            end
+            folderstack.last << File.new({
+              filename: filename,
+              timestamp: filetime,
+              attributes: attributes,
+              filesize: filesize,
+              crc: crc32,
+              }.merge(extradata)
+            )
+=begin
+ID_DIRECTORY_END (0xFF)
+    Represents the end of a directory listing.  No data.
+=end
+          when "\xff" #end of folder
+            folderstack.pop
+          else
+            @log.fatal("Undefined body-parse element #{last_flag.inspect}")
+      p body
+            body.slice!(0..-1)  #close further pasring
+          end
+        end
+        if folderstack.size > 1
+          @log.error("Folders in Folderstack not closed correct")
+          p folderstack.size
+        end
+    end
+=begin rdoc
+Get a "shortstring".
+1 Byte with length, then the string.
+Return shortstring and rest of string.
+=end
+    def parse_shortstring( string )
+      #Get length of path
+      pathsize = string.slice!(0).bytes.first
+                      # + rawdata[19].bytes.first * 255  #--test it
+      return [string.slice!(0,pathsize), string]
+    end
+=begin rdoc
+Get a "longstring".
+2 Bytes with length, then the string.
+The length is including the 2 bytes for the length
+Return longstring and rest of string.
+=end
+    def parse_longstring( string )
+      stringsize = string.slice!(0).bytes.first - 2
+      if string.slice!(0) != "\x0"
+        @log.warn("longstring > 255 not supported")
+        raise "longstring > 255 not supported"
+      end
+      return [string.slice!(0,stringsize), string]
+    end
+=begin rdoc
+Get Unsigned 32-bit number
+=end
+    def parse_uint32( string )
+      num = string.slice!(0,4).reverse.each_byte.map{|x| '%08b' % x}.join.to_i(2)
+      return [num, string]
+    end
+=begin rdoc
+Get a "filetime".
+FileTime:
+    Windows FILETIME structure.  64-bit value representing the number of
+100-nanosecond intervals since January 1, 1601 UTC.  Stored in local time.
+Return time and rest of string.
+=end
+    def parse_filetime( string )
+      ad_time = string.slice!(0,8)  #Integer with filetime
+      time = Time.ad2time(ad_time.reverse.each_byte.map{|x| '%08b' % x}.join.to_i(2))
+      return [time, string]
+    end
+=begin rdoc
+Get DOS-attributes.
+=end
+    def parse_dosattributes( string )
+      #Get length of path
+      attributes = string.slice!(0).bytes.first
+      string.slice!(0,3)  #skip next 3 bytes
+      return [attributes, string]
+    end
+=begin
+=====================
+File Extended Headers
+=====================
+Like extended headers, file extended headers should be written in ascending
+numeric order.
+FILE_EX_VERSION (0x01)
+    String representation of an executable file's Major/Minor/Maint/Build
+version (e.g., "2.11.28.3542").
+    Length : UByte
+    Data   : char[Length]
+FILE_EX_UTF8 (0x02)
+    UTF-8 encoded filename.  Stored as a FileExString.  Only used if the UTF-8
+name doesn't match the ANSI encoded one or if the filename is longer than 255
+characters.
+FILE_EX_LINK_PATH (0x03)
+    UTF-8 encoded symbolic link path.  Stored as a FileExString.
+=end
+    def parse_file_extended_headers(extradata_string)
+      extradata = {}
+      case flag = extradata_string.slice!(0)
+        when "\x01"
+          extradata[:version] = parse_shortstring( extradata_string )
+        when "\x02"
+            @log.warn("Undefined extra data handling for UTF-8 encoded filename")
+            return false #fixme
+        when "\x03"
+            @log.warn("Undefined extra data handling for UTF-8 encoded symbolic")
+            return false #fixme
+        else
+            #fixme handling extradata_string
+            @log.warn("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
+        end
+        unless extradata_string.empty?
+          @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")
+          p extradata_string
+        end
+        extradata
+    end
+  end #SnapshotParser
+  def to_hash; @snapshot.to_hash; end
+end
+if $0 == __FILE__
+require 'yaml'
+#~ x = BC3::SnapshotParser.new('../../examples/results/testdir_2011-01-16.bcssx' )
+x = BC3::SnapshotParser.new('../../examples/results/bc3_2011-01-16.bcss' )
+#~ x = BC3::SnapshotParser.new('../../Uncompressed Sample/Uncompressed Sample.bcss' )
+puts x.snapshot.to_hash.to_yaml
+x.snapshot.save('../../Uncompressed Sample/Uncompressed Sample_reconstructed.bcss')
+end

data/lib/bc3/snapshot.rb ADDED Viewed

@@ -0,0 +1,264 @@
+require 'bc3/helper'
+module BC3
+=begin rdoc
+Container for a snapshot.
+=end
+  class Snapshot
+    include Helper
+=begin rdoc
+=end
+  def initialize( path, timestamp = Time.now )
+    $log.debug("Create Snapshot #{path}")
+    @path = path
+    @timestamp = timestamp || Time.now
+    $log.debug("Create base folder for snapshot #{path}")
+    @basefolder = Folder.new('SnapshotRoot', @timestamp)
+  end
+=begin rdoc
+Create a snapshot from a hash.
+A snapsot-hash must contain:
+* snapshot - dirname of the snapshot
+* content - array of folders (see Folder.newh) and files (File.new)
+* timestamp (optional)
+=end
+  def self.newh( data )
+    $log.info("Build Snapshot from hash")
+    raise ArgumentError, "No hash given" unless data.is_a?(Hash)
+    raise ArgumentError, "snapshot name missing" unless data.has_key?(:snapshot)
+    raise ArgumentError, "content missing" unless data.has_key?(:content)
+    raise ArgumentError, "Content is no array" unless data[:content].is_a?(Array)
+    snapshot = new( data[:snapshot], data[:timestamp] )
+    data[:content].each{| element |
+      if element.has_key?(:dirname)
+        snapshot << Folder.newh(element)
+      elsif element.has_key?(:filename)
+        snapshot << File.new(element)
+      else
+        raise ArgumentError, "element without dir/filename"
+      end
+    }
+    snapshot
+  end #newh
+=begin rdoc
+Create a snapshot from a directory.
+=end
+  def self.newd( dirname )
+    $log.info("Build Snapshot from directory #{dirname}")
+    #~ raise ArgumentError, "No hash given" unless data.is_a?(Hash)
+    snapshot = new( ::File.expand_path("./#{dirname}") )
+    Dir.chdir(dirname){
+      Dir['*'].each{|f|
+        if ::File.directory?(f)
+          snapshot << Folder.new_by_dirname(f)
+        elsif ::File.exist?(f)
+          snapshot << File.new_by_filename(f)
+        else
+          raise ArgumentError, "#{f} not found in #{dirname}"
+        end
+      }
+    }
+    snapshot
+  end #newh
+  #homepath of the snapshot
+  attr_reader :path
+  #Time stamp from snapshot. Default 'now'
+  attr_reader :timestamp
+=begin rdoc
+Add content (folders/files) to snapshot.
+=end
+  def << (content)
+      @basefolder << content
+  end
+=begin rdoc
+Loop on content of the folder.
+Options see BC3::Folder#each
+=end
+    def each(*options)
+      if block_given?
+        @basefolder.each(*options){|key, content| yield key, content }
+      else
+        @basefolder.each
+      end
+    end
+=begin rdoc
+=end
+    def save( filename, compressed = nil )
+      $log.debug("Prepare snapshot for #{filename}")
+      #Check if compressed or uncompressed output wanted
+      compressed = ( filename =~ /\.bcssx/ ) if compressed.nil?
+      #Must be binary, else a \n get's \r\n under windows.
+      ::File.open(filename,'wb'){|f|
+        f << bcss( compressed )
+      }
+      $log.info("Saved snapshot as #{filename}")
+    end
+=begin rdoc
+Collect the data in a hash.
+Usefull in combination with yaml:
+  require 'bc3'
+  require 'yaml'
+  #...
+  snapshot = snapshot.new(...)
+  #...
+  puts snapshot.to_hash.to_yaml
+=end
+    def to_hash()
+      {
+        snapshot: @path,
+        timestamp: @timestamp,
+        content: @basefolder.each.values.map{| x | x.to_hash }
+      }
+    end
+=begin rdoc
+Prepare a snapshot (bcss-file).
+Only uncompressed structure.
+===Beyond Compare Snapshot Format                                      Version 1.1
+Beyond Compare snapshots (.bcss) are binary files containing the file metadata
+(names, sizes, last modified times) of a directory structure without storing
+any of the file content.  They are designed to be read sequentially.  File
+record sizes are variable, so there's no way to seek to arbitrary records
+without reading all of the records before it.
+=end
+    def bcss( compressed = false )
+      bcss = "".force_encoding('BINARY')
+      bcss << bcss_header( compressed )
+      if compressed
+        $log.debug("Compress bcss-data")
+        $log.fatal("Compress bcss-data not supported - only for test purposes")
+=begin
+Flags:
+    Compressed: If set everything following the header is compressed as a raw
+deflate stream, as defined by RFC 1951.  It is the same compression used by
+.zip and .gz archives.
+=end
+        #see for truncations http://www.ruby-forum.com/topic/101078
+        # http://ilovett.com/blog/programming/ruby-zlib-deflate
+        #~ puts "%-2i %s" % [ 99, bcss_data.inspect ]
+        -1.upto(9){|i|
+          puts "%-2i %s" % [ i, Zlib::Deflate.deflate( bcss_data, i )[2..-5].inspect ]
+        }
+        bcss << Zlib::Deflate.deflate( bcss_data)[2..-5]
+          #~ bcss << Zlib::Deflate.new(nil, -Zlib::MAX_WBITS).deflate(bcss_data, Zlib::FINISH)
+      else  #uncompressed
+        bcss << bcss_data
+      end
+      bcss << 255
+      bcss
+    end
+=begin rdoc
+Create the header data for bcss-file
+Snapshots start with a fixed size header that contains an ID value, version
+information, a creation date, and various flags, optionally followed by the
+source folder's path:
+ - HEADER STRUCTURE -
+    [0..3]   = 'BCSS'
+    [4]      = Major version (UByte)
+    [5]      = Minor version (UByte)
+    [6]	     = Minimum Supported Major Version (UByte)
+    [7]	     = Minimum Supported Minor Version (UByte)
+    [8..F]   = Creation Time (FileTime)
+    [10..11] = Flags         (UWord)
+            Bit : Meaning
+              0 : Compressed
+              1 : Source Path included
+              2 : Reserved
+              3 : UTF-8
+           4-15 : Reserved
+    [12..13] = Path Length (UWord)   | Optional
+    [14..N]  = Path        (char[])  |
+Version Information:
+    The first two version bytes represent the actual major and minor versions
+of the file, and reference a specific version of this specification.  The
+second pair of version bytes represent the minimum snapshot version which must
+be supported in order to read the snapshot file.  Version 1.1 can be read by
+Version 1.0 applications, so currently Major/Minor should be set to 1.1 and
+Minimum should be 1.0.
+Flags:
+    Compressed: If set everything following the header is compressed as a raw
+deflate stream, as defined by RFC 1951.  It is the same compression used by
+.zip and .gz archives.
+    Source Path included: If set the original folder's path is included
+immediately after the header.  This is only on part of the file besides the
+fixed header that is not compressed.
+    UTF-8: If set the snapshot was compressed on a system where the default
+character encoding is UTF-8 (Linux, OS X).  Filenames, paths, and link targets
+will all be stored as UTF-8.  If this isn't set the paths are stored using the
+original OS's ANSI codepage (Windows).  In that case any paths may be stored a
+second time as UTF-8 in extended headers.
+=end
+    def bcss_header( compressed )
+      header = "".force_encoding('BINARY')
+      header << 'BCSS'
+      header << 1 #Major version (UByte)
+      header << 1 #Minor version (UByte)
+      header << 1 #Minimum Supported Major Version (UByte)
+      header << 0 #Minimum Supported Minor Version (UByte)
+      #[8..F]   = Creation Time (FileTime)
+      #Windows FILETIME structure.  64-bit value representing the number of
+      #100-nanosecond intervals since January 1, 1601 UTC.  Stored in local time.
+      #8 Byte long
+      #~ header << "%x" % Time.now.time2ad  #-> bignum too big to convert into `unsigned long' (RangeError)
+      header << fixnum2int64(@timestamp.time2ad)
+      #~ header << "\x70\x57\x5C\x25\x69\xB2\xCB\x01" #Data from example
+      # [10..11] = Flags         (UWord)
+            #~ Bit : Meaning
+              #~ 0 : Compressed
+              #~ 1 : Source Path included
+              #~ 2 : Reserved
+              #~ 3 : UTF-8
+           #~ 4-15 : Reserved
+      flag = 0  #no flag set
+      flag += 2 #Source Path included
+      flag += 1 if compressed
+      header << flag
+      header << 0
+      # [12..13] = Path Length (UWord)   | Optional
+      header << @path.size
+      header << 0 #fixme if path > 255
+      raise "too long path" if @path.size > 155 #fixme
+      # [14..N]  = Path        (char[])  |
+      header << @path
+      header
+    end #header
+=begin rdoc
+Return the data part of the snapshot.
+This part may be packed.
+=end
+    def bcss_data()
+      data = "".force_encoding('BINARY')
+      @basefolder.each{|key, folder|
+        data << folder.bcss
+      }
+      data
+    end
+  end #Snapshot
+end #module BC3