RubyGems - property-list - Versions diffs - 1.0 - Mend

property-list 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +7 -0
data/.gitignore +50 -0
data/LICENSE +29 -0
data/README.md +52 -0
data/Rakefile +7 -0
data/gems.locked +41 -0
data/gems.rb +3 -0
data/lib/property-list.rb +83 -0
data/lib/property-list/ascii_generator.rb +148 -0
data/lib/property-list/ascii_parser.rb +217 -0
data/lib/property-list/binary_generator.rb +306 -0
data/lib/property-list/binary_markers.rb +34 -0
data/lib/property-list/binary_parser.rb +169 -0
data/lib/property-list/version.rb +3 -0
data/lib/property-list/xml_generator.rb +147 -0
data/lib/property-list/xml_parser.rb +121 -0
data/property-list.gemspec +22 -0
metadata +131 -0

data/lib/property-list/binary_generator.rb ADDED Viewed

@@ -0,0 +1,306 @@
+# encoding: binary
+module PropertyList
+  def self.dump_binary obj, options=nil
+    generator = BinaryGenerator.new options
+    generator.generate obj
+    generator.output.join
+  end
+  # Modified from:
+  #   https://github.com/jarib/plist/blob/master/lib/plist/binary.rb
+  #
+  # With improved performance
+  class BinaryGenerator
+    include BinaryMarkers
+    def initialize opts
+      @output = []
+      @offset = 0
+    end
+    attr_reader :output
+    # Encodes +obj+ as a binary property list. If +obj+ is an Array, Hash, or
+    # Set, the property list includes its contents.
+    def generate object
+      flatten_objects = flatten_collection object
+      ref_byte_size = min_byte_size flatten_objects.size - 1
+      # Write header and encoded objects.
+      # TODO use bplist10 when there are version 1x elements
+      add_output "bplist00"
+      offset_table = []
+      flatten_objects.each do |o|
+        offset_table << @offset
+        binary_object o, ref_byte_size
+      end
+      # Write offset table.
+      offset_table_addr = @offset
+      offset_byte_size = min_byte_size @offset
+      offset_table.each do |offset|
+        binary_integer offset, offset_byte_size
+      end
+      # Write trailer. (6 + 2 + 24 = 32 bytes)
+      add_output [
+        "\0\0\0\0\0\0", # padding
+        offset_byte_size, ref_byte_size,
+        flatten_objects.size,
+        0, # index of root object
+        offset_table_addr
+      ].pack("a*C2Q>3")
+    end
+    private
+    # Takes an object (nominally a collection, like an Array, Set, or Hash, but
+    # any object is acceptable) and flattens it into a one-dimensional array.
+    # Non-collection objects appear in the array as-is, but the contents of
+    # Arrays, Sets, and Hashes are modified like so: (1) The contents of the
+    # collection are added, one-by-one, to the one-dimensional array. (2) The
+    # collection itself is modified so that it contains indexes pointing to the
+    # objects in the one-dimensional array. Here's an example with an Array:
+    #
+    #   ary = [:a, :b, :c]
+    #   flatten_collection(ary) # => [[1, 2, 3], :a, :b, :c]
+    #
+    # In the case of a Hash, keys and values are both appended to the one-
+    # dimensional array and then replaced with indexes.
+    #
+    #   hsh = {:a => "blue", :b => "purple", :c => "green"}
+    #   flatten_collection(hsh)
+    #   # => [{1 => 2, 3 => 4, 5 => 6}, :a, "blue", :b, "purple", :c, "green"]
+    #
+    # An object will never be added to the one-dimensional array twice. If a
+    # collection refers to an object more than once, the object will be added
+    # to the one-dimensional array only once.
+    #
+    #   ary = [:a, :a, :a]
+    #   flatten_collection(ary) # => [[1, 1, 1], :a]
+    #
+    # The +obj_list+ and +id_refs+ parameters are private; they're used for
+    # descending into sub-collections recursively.
+    def flatten_collection collection, obj_list=[], id_refs={}
+      case collection
+      when Array, Set
+        if id_refs[collection.object_id]
+          return obj_list[id_refs[collection.object_id]]
+        end
+        obj_refs = collection.class.new
+        id_refs[collection.object_id] = obj_list.length
+        obj_list << obj_refs
+        collection.each do |obj|
+          flatten_collection(obj, obj_list, id_refs)
+          obj_refs << id_refs[obj.object_id]
+        end
+        return obj_list
+      when Hash
+        if id_refs[collection.object_id]
+          return obj_list[id_refs[collection.object_id]]
+        end
+        obj_refs = {}
+        id_refs[collection.object_id] = obj_list.length
+        obj_list << obj_refs
+        collection.keys.sort.each do |key|
+          value = collection[key]
+          key = key.to_s if key.is_a?(Symbol)
+          flatten_collection(key, obj_list, id_refs)
+          flatten_collection(value, obj_list, id_refs)
+          obj_refs[id_refs[key.object_id]] = id_refs[value.object_id]
+        end
+        return obj_list
+      else
+        unless id_refs[collection.object_id]
+          id_refs[collection.object_id] = obj_list.length
+          obj_list << collection
+        end
+        return obj_list
+      end
+    end
+    def add_output data
+      @output << data
+      @offset += data.bytesize
+    end
+    # Returns a binary property list fragment that represents +obj+. The
+    # returned string is not a complete property list, just a fragment that
+    # describes +obj+, and is not useful without a header, offset table, and
+    # trailer.
+    #
+    # The following classes are recognized: String, Float, Integer, the Boolean
+    # classes, Time, IO, StringIO, Array, Set, and Hash. IO and StringIO
+    # objects are rewound, read, and the contents stored as data (i.e., Cocoa
+    # applications will decode them as NSData). All other classes are dumped
+    # with Marshal and stored as data.
+    #
+    # Note that subclasses of the supported classes will be encoded as though
+    # they were the supported superclass. Thus, a subclass of (for example)
+    # String will be encoded and decoded as a String, not as the subclass:
+    #
+    #   class ExampleString < String
+    #     ...
+    #   end
+    #
+    #   s = ExampleString.new("disquieting plantlike mystery")
+    #   encoded_s = binary_object(s)
+    #   decoded_s = decode_binary_object(encoded_s)
+    #   puts decoded_s.class # => String
+    #
+    # +ref_byte_size+ is the number of bytes to use for storing references to
+    # other objects.
+    def binary_object obj, ref_byte_size = 4
+      case obj
+      when Symbol
+        binary_string obj.to_s
+      when String
+        binary_string obj
+      when URL
+        binary_url obj.url
+      when Float
+        add_output [(MARKER_REAL | 3), obj].pack("CG")
+      when Integer
+        nbytes = min_byte_size obj
+        size_bits = { 1 => 0, 2 => 1, 4 => 2, 8 => 3, 16 => 4 }[nbytes]
+        add_output (MARKER_INT | size_bits).chr
+        binary_integer obj, nbytes
+      when TrueClass
+        add_output MARKER_TRUE.chr
+      when FalseClass
+        add_output MARKER_FALSE.chr
+      when Time
+        add_output [MARKER_DATE, obj.to_f - TIME_INTERVAL_SINCE_1970].pack("CG")
+      when Date # also covers DateTime
+        add_output [MARKER_DATE, obj.to_time.to_f - TIME_INTERVAL_SINCE_1970].pack("CG")
+      when IO, StringIO
+        obj.rewind
+        obj.binmode
+        data = obj.read
+        binary_marker MARKER_DATA, data.bytesize
+        add_output data
+      when Array
+        # Must be an array of object references as returned by flatten_collection.
+        binary_marker MARKER_ARRAY, obj.size
+        obj.each do |i|
+          binary_integer i, ref_byte_size
+        end
+      when Set
+        # Must be a set of object references as returned by flatten_collection.
+        binary_marker MARKER_SET, obj.size
+        obj.each do |i|
+          binary_integer i, ref_byte_size
+        end
+      when Hash
+        # Must be a table of object references as returned by flatten_collection.
+        binary_marker MARKER_DICT, obj.size
+        obj.keys.each do |k|
+          binary_integer k, ref_byte_size
+        end
+        obj.values.each do |v|
+          binary_integer v, ref_byte_size
+        end
+      else
+        raise "Unsupported class: #{obj.class}"
+      end
+    end
+    def binary_marker marker, size
+      if size < 15
+        add_output (marker | size).chr
+      else
+        add_output (marker | 0xf).chr
+        binary_object size
+      end
+    end
+    def binary_string obj
+      if obj.encoding == Encoding.find('binary')
+        binary_marker MARKER_ASCII_STRING, obj.bytesize
+        add_output obj
+      elsif obj.ascii_only?
+        obj = obj.dup.force_encoding 'binary'
+        binary_marker MARKER_ASCII_STRING, obj.bytesize
+        add_output obj
+      else
+        data = obj.encode('utf-16be').force_encoding 'binary'
+        cp_size = data.bytesize / 2
+        binary_marker MARKER_UTF16BE_STRING, cp_size # TODO check if it works for 4 bytes
+        add_output data
+      end
+    end
+    def binary_url obj
+      @v1 = true
+      if obj =~ /\A\w+:/
+        add_output MARKER_WITH_BASE_URL.chr
+      else
+        add_output MARKER_NO_BASE_URL.chr
+      end
+      binary_marker MARKER_ASCII_STRING, obj.bytesize
+      add_output obj
+    end
+    def binary_uuid obj
+      # TODO
+    end
+    def binary_ordered_set obj
+      # TODO
+    end
+    # Packs an integer +i+ into its binary representation in the specified
+    # number of bytes. Byte order is big-endian. Negative integers cannot be
+    # stored in 1, 2, or 4 bytes.
+    def binary_integer i, num_bytes
+      if i < 0 && num_bytes < 8
+        raise ArgumentError, "negative integers require 8 or 16 bytes of storage"
+      end
+      case num_bytes
+      when 1
+        add_output [i].pack("C")
+      when 2
+        add_output [i].pack("n")
+      when 4
+        add_output [i].pack("N")
+      when 8
+        add_output [i].pack("q>")
+      when 16
+        # TODO verify 128 bit integer encoding
+        if i < 0
+          i = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff ^ i.abs + 1
+        end
+        add_output [i >> 64, i & 0xffff_ffff_ffff_ffff].pack("q>2")
+      else
+        raise ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16"
+      end
+    end
+    # Determines the minimum number of bytes that is a power of two and can
+    # represent the integer +i+. Raises a RangeError if the number of bytes
+    # exceeds 16. Note that the property list format considers integers of 1,
+    # 2, and 4 bytes to be unsigned, while 8- and 16-byte integers are signed;
+    # thus negative integers will always require at least 8 bytes of storage.
+    def min_byte_size i
+      if i < 0
+        i = i.abs - 1
+      else
+        if i <= 0xff
+          return 1
+        elsif i <= 0xffff
+          return 2
+        elsif i <= 0xffffffff
+          return 4
+        end
+      end
+      if i <= 0x7fffffffffffffff
+        8
+      elsif i <= 0x7fffffffffffffffffffffffffffffff
+        16
+      else
+        raise RangeError, "integer too big - exceeds 128 bits"
+      end
+    end
+  end
+end

data/lib/property-list/binary_markers.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module PropertyList
+  module BinaryMarkers
+    # These marker bytes are prefixed to objects in a binary property list to
+    # indicate the type of the object.
+    MARKER_NULL           = 0b0000_0000 # v1?+ only
+    MARKER_FALSE          = 0b0000_1000
+    MARKER_TRUE           = 0b0000_1001
+    MARKER_NO_BASE_URL    = 0b0000_1100 # followed by string, v1?+ only
+    MARKER_WITH_BASE_URL  = 0b0000_1101 # followed by string, v1?+ only
+    MARKER_UUID           = 0b0000_1110 # 16 byte uuid, v1?+ only
+    MARKER_FILL           = 0b0000_1111 # fill byte
+    MARKER_INT            = 0b0001_0000 # 0nnn
+    MARKER_REAL           = 0b0010_0000 # 0nnn
+    MARKER_DATE           = 0b0011_0011 # follows 8 byte big endian float
+    MARKER_DATA           = 0b0100_0000 # [int]
+    MARKER_ASCII_STRING   = 0b0101_0000 # [int]
+    MARKER_UTF16BE_STRING = 0b0110_0000 # [int]
+    MARKER_UTF8_STRING    = 0b0111_0000 # [int], v1?+ only
+    MARKER_UID            = 0b1000_0000 # nnnn, followed by nnnn+1 bytes
+                          # 0b1001_xxxx # unused
+    MARKER_ARRAY          = 0b1010_0000
+    MARKER_ORD_SET        = 0b1011_0000 # v1?+ only
+    MARKER_SET            = 0b1100_0000 # v1?+ only
+    MARKER_DICT           = 0b1101_0000
+                          # 0b1110_xxxx # unused
+                          # 0b1111_xxxx # unused
+    # POSIX uses a reference time of 1970-01-01T00:00:00Z; Cocoa's reference
+    # time is in 2001. This interval is for converting between the two.
+    TIME_INTERVAL_SINCE_1970 = 978307200.0
+  end
+end

data/lib/property-list/binary_parser.rb ADDED Viewed

@@ -0,0 +1,169 @@
+module PropertyList
+  def self.load_binary(data)
+    BinaryParser.new(data).parse
+  end
+  # Reference:
+  #   https://opensource.apple.com/source/CF/CF-1151.16/CFBinaryPList.c.auto.html
+  class BinaryParser
+    include BinaryMarkers
+    def initialize src
+      @src = src
+      @offset_byte_size, @ref_byte_size, @flatten_objects_size, @root_object_index, @offset_table_addr = \
+        @src.byteslice((-32)..(-1)).unpack '@6C2Q>3'
+    end
+    def parse
+      @offset_table = decode_offset_table
+      decode_id @root_object_index
+    end
+    private
+    def decode_object offset
+      first_byte, = @src.unpack "@#{offset}C"
+      marker = first_byte & 0xF0
+      if marker == 0 or first_byte == MARKER_DATE
+        marker = first_byte
+      end
+      case marker
+      when MARKER_NULL
+        nil
+      when MARKER_FALSE
+        false
+      when MARKER_TRUE
+        true
+      when MARKER_NO_BASE_URL
+        raise 'todo'
+      when MARKER_WITH_BASE_URL
+        raise 'todo'
+      when MARKER_UUID
+        raise 'todo'
+      when MARKER_FILL
+        decode_object offest + 1
+      when MARKER_INT
+        size_bits = first_byte & 0x0F
+        num_bytes = 2 ** size_bits
+        decode_integer offset + 1, num_bytes
+      when MARKER_REAL
+        r, = @src.unpack "@#{offset + 1}G"
+        r
+      when MARKER_DATE
+        seconds_since_2001, = @src.unpack "@#{offset + 1}G"
+        Time.at(TIME_INTERVAL_SINCE_1970 + seconds_since_2001).to_datetime
+      when MARKER_DATA
+        data = @src.byteslice *(decode_vl_info offset)
+        StringIO.new data
+      when MARKER_ASCII_STRING
+        @src.byteslice *(decode_vl_info offset)
+      when MARKER_UTF16BE_STRING
+        str_offset, str_size = decode_vl_info offset
+        s = @src.byteslice str_offset, str_size * 2
+        s.force_encoding('utf-16be').encode 'utf-8'
+      when MARKER_UTF8_STRING
+        s = @src.byteslice *(decode_vl_info offset)
+        s.force_encoding 'utf-8'
+      when MARKER_UID
+        # Encoding is as integers, except values are unsigned.
+        # These are used extensively in files written using NSKeyedArchiver, a serializer for Objective-C objects.
+        # The value is the index in parse_result["$objects"]
+        size = (first_byte & 0xF) + 1
+        bytes = @src.byteslice offset + 1, size
+        res = 0
+        bytes.unpack('C*').each do |byte|
+          res *= 256
+          res += byte
+        end
+        UID[res]
+      when MARKER_ARRAY
+        offset, size = decode_vl_info offset
+        size.times.map do |i|
+          id = decode_ref_id offset + i * @ref_byte_size
+          decode_id id
+        end
+      when MARKER_ORD_SET, MARKER_SET
+        r = Set.new
+        offset, size = decode_vl_info offset
+        size.times do |i|
+          id = decode_ref_id offset + i * @ref_byte_size
+          r << (decode_id id)
+        end
+        r
+      when MARKER_DICT
+        offset, size = decode_vl_info offset
+        keys_byte_size = @ref_byte_size * size
+        entries = []
+        size.times do |i|
+          k_offset = offset + i * @ref_byte_size
+          v_offset = k_offset + keys_byte_size
+          entries << [
+            decode_id(decode_ref_id k_offset),
+            decode_id(decode_ref_id v_offset)
+          ]
+        end
+        entries.sort_by! &:first
+        Hash[entries]
+      else
+        raise "unused marker: 0b#{marker.to_s(2).rjust 8, '0'}"
+      end
+    end
+    def decode_vl_info offset
+      marker, = @src.unpack "@#{offset}C"
+      vl_size_bits = marker & 0x0F
+      if vl_size_bits == 0x0F
+        # size is followed by marker int
+        int_marker, = @src.unpack "@#{offset + 1}C"
+        num_bytes = 2 ** (int_marker & 0x0F)
+        size = decode_integer offset + 2, num_bytes
+        [offset + 2 + num_bytes, size]
+      else
+        [offset + 1, vl_size_bits]
+      end
+    end
+    def decode_offset_table
+      @flatten_objects_size.times.map do |i|
+        offset_index = @offset_table_addr + i * @offset_byte_size
+        decode_integer offset_index, @offset_byte_size
+      end
+    end
+    # decode the i-th entry in offset table
+    def decode_id i
+      raise "ref-id should be positive, but got #{i}" if i < 0
+      offset = @offset_table[i]
+      raise "offset not found for ref-id #{i}" if !offset
+      decode_object offset
+    end
+    # decode integer of ref byte size
+    def decode_ref_id offset
+      decode_integer offset, @ref_byte_size
+    end
+    def decode_integer offset, num_bytes
+      # NOTE: only num_bytes = 8 or 16 it can be negative
+      case num_bytes
+      when 1
+        i, = @src.unpack "@#{offset}C"
+      when 2
+        i, = @src.unpack "@#{offset}n"
+      when 4
+        i, = @src.unpack "@#{offset}N"
+      when 8
+        i, = @src.unpack "@#{offset}q>"
+      when 16
+        hi, lo = @src.unpack "@#{offset}q>2"
+        i = (hi << 64) | lo
+      else
+        raise ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16"
+      end
+      i
+    end
+  end
+end