property-list 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,306 @@
1
+ # encoding: binary
2
+
3
+ module PropertyList
4
+ def self.dump_binary obj, options=nil
5
+ generator = BinaryGenerator.new options
6
+ generator.generate obj
7
+ generator.output.join
8
+ end
9
+
10
+ # Modified from:
11
+ # https://github.com/jarib/plist/blob/master/lib/plist/binary.rb
12
+ #
13
+ # With improved performance
14
+ class BinaryGenerator
15
+ include BinaryMarkers
16
+
17
+ def initialize opts
18
+ @output = []
19
+ @offset = 0
20
+ end
21
+ attr_reader :output
22
+
23
+ # Encodes +obj+ as a binary property list. If +obj+ is an Array, Hash, or
24
+ # Set, the property list includes its contents.
25
+ def generate object
26
+ flatten_objects = flatten_collection object
27
+ ref_byte_size = min_byte_size flatten_objects.size - 1
28
+
29
+ # Write header and encoded objects.
30
+ # TODO use bplist10 when there are version 1x elements
31
+ add_output "bplist00"
32
+ offset_table = []
33
+ flatten_objects.each do |o|
34
+ offset_table << @offset
35
+ binary_object o, ref_byte_size
36
+ end
37
+
38
+ # Write offset table.
39
+ offset_table_addr = @offset
40
+ offset_byte_size = min_byte_size @offset
41
+ offset_table.each do |offset|
42
+ binary_integer offset, offset_byte_size
43
+ end
44
+
45
+ # Write trailer. (6 + 2 + 24 = 32 bytes)
46
+ add_output [
47
+ "\0\0\0\0\0\0", # padding
48
+ offset_byte_size, ref_byte_size,
49
+ flatten_objects.size,
50
+ 0, # index of root object
51
+ offset_table_addr
52
+ ].pack("a*C2Q>3")
53
+ end
54
+
55
+ private
56
+
57
+ # Takes an object (nominally a collection, like an Array, Set, or Hash, but
58
+ # any object is acceptable) and flattens it into a one-dimensional array.
59
+ # Non-collection objects appear in the array as-is, but the contents of
60
+ # Arrays, Sets, and Hashes are modified like so: (1) The contents of the
61
+ # collection are added, one-by-one, to the one-dimensional array. (2) The
62
+ # collection itself is modified so that it contains indexes pointing to the
63
+ # objects in the one-dimensional array. Here's an example with an Array:
64
+ #
65
+ # ary = [:a, :b, :c]
66
+ # flatten_collection(ary) # => [[1, 2, 3], :a, :b, :c]
67
+ #
68
+ # In the case of a Hash, keys and values are both appended to the one-
69
+ # dimensional array and then replaced with indexes.
70
+ #
71
+ # hsh = {:a => "blue", :b => "purple", :c => "green"}
72
+ # flatten_collection(hsh)
73
+ # # => [{1 => 2, 3 => 4, 5 => 6}, :a, "blue", :b, "purple", :c, "green"]
74
+ #
75
+ # An object will never be added to the one-dimensional array twice. If a
76
+ # collection refers to an object more than once, the object will be added
77
+ # to the one-dimensional array only once.
78
+ #
79
+ # ary = [:a, :a, :a]
80
+ # flatten_collection(ary) # => [[1, 1, 1], :a]
81
+ #
82
+ # The +obj_list+ and +id_refs+ parameters are private; they're used for
83
+ # descending into sub-collections recursively.
84
+ def flatten_collection collection, obj_list=[], id_refs={}
85
+ case collection
86
+ when Array, Set
87
+ if id_refs[collection.object_id]
88
+ return obj_list[id_refs[collection.object_id]]
89
+ end
90
+ obj_refs = collection.class.new
91
+ id_refs[collection.object_id] = obj_list.length
92
+ obj_list << obj_refs
93
+ collection.each do |obj|
94
+ flatten_collection(obj, obj_list, id_refs)
95
+ obj_refs << id_refs[obj.object_id]
96
+ end
97
+ return obj_list
98
+
99
+ when Hash
100
+ if id_refs[collection.object_id]
101
+ return obj_list[id_refs[collection.object_id]]
102
+ end
103
+ obj_refs = {}
104
+ id_refs[collection.object_id] = obj_list.length
105
+ obj_list << obj_refs
106
+ collection.keys.sort.each do |key|
107
+ value = collection[key]
108
+ key = key.to_s if key.is_a?(Symbol)
109
+ flatten_collection(key, obj_list, id_refs)
110
+ flatten_collection(value, obj_list, id_refs)
111
+ obj_refs[id_refs[key.object_id]] = id_refs[value.object_id]
112
+ end
113
+ return obj_list
114
+ else
115
+ unless id_refs[collection.object_id]
116
+ id_refs[collection.object_id] = obj_list.length
117
+ obj_list << collection
118
+ end
119
+ return obj_list
120
+ end
121
+ end
122
+
123
+ def add_output data
124
+ @output << data
125
+ @offset += data.bytesize
126
+ end
127
+
128
+ # Returns a binary property list fragment that represents +obj+. The
129
+ # returned string is not a complete property list, just a fragment that
130
+ # describes +obj+, and is not useful without a header, offset table, and
131
+ # trailer.
132
+ #
133
+ # The following classes are recognized: String, Float, Integer, the Boolean
134
+ # classes, Time, IO, StringIO, Array, Set, and Hash. IO and StringIO
135
+ # objects are rewound, read, and the contents stored as data (i.e., Cocoa
136
+ # applications will decode them as NSData). All other classes are dumped
137
+ # with Marshal and stored as data.
138
+ #
139
+ # Note that subclasses of the supported classes will be encoded as though
140
+ # they were the supported superclass. Thus, a subclass of (for example)
141
+ # String will be encoded and decoded as a String, not as the subclass:
142
+ #
143
+ # class ExampleString < String
144
+ # ...
145
+ # end
146
+ #
147
+ # s = ExampleString.new("disquieting plantlike mystery")
148
+ # encoded_s = binary_object(s)
149
+ # decoded_s = decode_binary_object(encoded_s)
150
+ # puts decoded_s.class # => String
151
+ #
152
+ # +ref_byte_size+ is the number of bytes to use for storing references to
153
+ # other objects.
154
+ def binary_object obj, ref_byte_size = 4
155
+ case obj
156
+ when Symbol
157
+ binary_string obj.to_s
158
+ when String
159
+ binary_string obj
160
+ when URL
161
+ binary_url obj.url
162
+ when Float
163
+ add_output [(MARKER_REAL | 3), obj].pack("CG")
164
+ when Integer
165
+ nbytes = min_byte_size obj
166
+ size_bits = { 1 => 0, 2 => 1, 4 => 2, 8 => 3, 16 => 4 }[nbytes]
167
+ add_output (MARKER_INT | size_bits).chr
168
+ binary_integer obj, nbytes
169
+ when TrueClass
170
+ add_output MARKER_TRUE.chr
171
+ when FalseClass
172
+ add_output MARKER_FALSE.chr
173
+ when Time
174
+ add_output [MARKER_DATE, obj.to_f - TIME_INTERVAL_SINCE_1970].pack("CG")
175
+ when Date # also covers DateTime
176
+ add_output [MARKER_DATE, obj.to_time.to_f - TIME_INTERVAL_SINCE_1970].pack("CG")
177
+ when IO, StringIO
178
+ obj.rewind
179
+ obj.binmode
180
+ data = obj.read
181
+ binary_marker MARKER_DATA, data.bytesize
182
+ add_output data
183
+ when Array
184
+ # Must be an array of object references as returned by flatten_collection.
185
+ binary_marker MARKER_ARRAY, obj.size
186
+ obj.each do |i|
187
+ binary_integer i, ref_byte_size
188
+ end
189
+ when Set
190
+ # Must be a set of object references as returned by flatten_collection.
191
+ binary_marker MARKER_SET, obj.size
192
+ obj.each do |i|
193
+ binary_integer i, ref_byte_size
194
+ end
195
+ when Hash
196
+ # Must be a table of object references as returned by flatten_collection.
197
+ binary_marker MARKER_DICT, obj.size
198
+ obj.keys.each do |k|
199
+ binary_integer k, ref_byte_size
200
+ end
201
+ obj.values.each do |v|
202
+ binary_integer v, ref_byte_size
203
+ end
204
+ else
205
+ raise "Unsupported class: #{obj.class}"
206
+ end
207
+ end
208
+
209
+ def binary_marker marker, size
210
+ if size < 15
211
+ add_output (marker | size).chr
212
+ else
213
+ add_output (marker | 0xf).chr
214
+ binary_object size
215
+ end
216
+ end
217
+
218
+ def binary_string obj
219
+ if obj.encoding == Encoding.find('binary')
220
+ binary_marker MARKER_ASCII_STRING, obj.bytesize
221
+ add_output obj
222
+ elsif obj.ascii_only?
223
+ obj = obj.dup.force_encoding 'binary'
224
+ binary_marker MARKER_ASCII_STRING, obj.bytesize
225
+ add_output obj
226
+ else
227
+ data = obj.encode('utf-16be').force_encoding 'binary'
228
+ cp_size = data.bytesize / 2
229
+ binary_marker MARKER_UTF16BE_STRING, cp_size # TODO check if it works for 4 bytes
230
+ add_output data
231
+ end
232
+ end
233
+
234
+ def binary_url obj
235
+ @v1 = true
236
+ if obj =~ /\A\w+:/
237
+ add_output MARKER_WITH_BASE_URL.chr
238
+ else
239
+ add_output MARKER_NO_BASE_URL.chr
240
+ end
241
+ binary_marker MARKER_ASCII_STRING, obj.bytesize
242
+ add_output obj
243
+ end
244
+
245
+ def binary_uuid obj
246
+ # TODO
247
+ end
248
+
249
+ def binary_ordered_set obj
250
+ # TODO
251
+ end
252
+
253
+ # Packs an integer +i+ into its binary representation in the specified
254
+ # number of bytes. Byte order is big-endian. Negative integers cannot be
255
+ # stored in 1, 2, or 4 bytes.
256
+ def binary_integer i, num_bytes
257
+ if i < 0 && num_bytes < 8
258
+ raise ArgumentError, "negative integers require 8 or 16 bytes of storage"
259
+ end
260
+ case num_bytes
261
+ when 1
262
+ add_output [i].pack("C")
263
+ when 2
264
+ add_output [i].pack("n")
265
+ when 4
266
+ add_output [i].pack("N")
267
+ when 8
268
+ add_output [i].pack("q>")
269
+ when 16
270
+ # TODO verify 128 bit integer encoding
271
+ if i < 0
272
+ i = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff ^ i.abs + 1
273
+ end
274
+ add_output [i >> 64, i & 0xffff_ffff_ffff_ffff].pack("q>2")
275
+ else
276
+ raise ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16"
277
+ end
278
+ end
279
+
280
+ # Determines the minimum number of bytes that is a power of two and can
281
+ # represent the integer +i+. Raises a RangeError if the number of bytes
282
+ # exceeds 16. Note that the property list format considers integers of 1,
283
+ # 2, and 4 bytes to be unsigned, while 8- and 16-byte integers are signed;
284
+ # thus negative integers will always require at least 8 bytes of storage.
285
+ def min_byte_size i
286
+ if i < 0
287
+ i = i.abs - 1
288
+ else
289
+ if i <= 0xff
290
+ return 1
291
+ elsif i <= 0xffff
292
+ return 2
293
+ elsif i <= 0xffffffff
294
+ return 4
295
+ end
296
+ end
297
+ if i <= 0x7fffffffffffffff
298
+ 8
299
+ elsif i <= 0x7fffffffffffffffffffffffffffffff
300
+ 16
301
+ else
302
+ raise RangeError, "integer too big - exceeds 128 bits"
303
+ end
304
+ end
305
+ end
306
+ end
@@ -0,0 +1,34 @@
1
+ module PropertyList
2
+ module BinaryMarkers
3
+ # These marker bytes are prefixed to objects in a binary property list to
4
+ # indicate the type of the object.
5
+ MARKER_NULL = 0b0000_0000 # v1?+ only
6
+ MARKER_FALSE = 0b0000_1000
7
+ MARKER_TRUE = 0b0000_1001
8
+ MARKER_NO_BASE_URL = 0b0000_1100 # followed by string, v1?+ only
9
+ MARKER_WITH_BASE_URL = 0b0000_1101 # followed by string, v1?+ only
10
+ MARKER_UUID = 0b0000_1110 # 16 byte uuid, v1?+ only
11
+ MARKER_FILL = 0b0000_1111 # fill byte
12
+ MARKER_INT = 0b0001_0000 # 0nnn
13
+ MARKER_REAL = 0b0010_0000 # 0nnn
14
+ MARKER_DATE = 0b0011_0011 # follows 8 byte big endian float
15
+
16
+ MARKER_DATA = 0b0100_0000 # [int]
17
+ MARKER_ASCII_STRING = 0b0101_0000 # [int]
18
+ MARKER_UTF16BE_STRING = 0b0110_0000 # [int]
19
+ MARKER_UTF8_STRING = 0b0111_0000 # [int], v1?+ only
20
+ MARKER_UID = 0b1000_0000 # nnnn, followed by nnnn+1 bytes
21
+ # 0b1001_xxxx # unused
22
+
23
+ MARKER_ARRAY = 0b1010_0000
24
+ MARKER_ORD_SET = 0b1011_0000 # v1?+ only
25
+ MARKER_SET = 0b1100_0000 # v1?+ only
26
+ MARKER_DICT = 0b1101_0000
27
+ # 0b1110_xxxx # unused
28
+ # 0b1111_xxxx # unused
29
+
30
+ # POSIX uses a reference time of 1970-01-01T00:00:00Z; Cocoa's reference
31
+ # time is in 2001. This interval is for converting between the two.
32
+ TIME_INTERVAL_SINCE_1970 = 978307200.0
33
+ end
34
+ end
@@ -0,0 +1,169 @@
1
+ module PropertyList
2
+ def self.load_binary(data)
3
+ BinaryParser.new(data).parse
4
+ end
5
+
6
+ # Reference:
7
+ # https://opensource.apple.com/source/CF/CF-1151.16/CFBinaryPList.c.auto.html
8
+ class BinaryParser
9
+ include BinaryMarkers
10
+
11
+ def initialize src
12
+ @src = src
13
+
14
+ @offset_byte_size, @ref_byte_size, @flatten_objects_size, @root_object_index, @offset_table_addr = \
15
+ @src.byteslice((-32)..(-1)).unpack '@6C2Q>3'
16
+ end
17
+
18
+ def parse
19
+ @offset_table = decode_offset_table
20
+ decode_id @root_object_index
21
+ end
22
+
23
+ private
24
+
25
+ def decode_object offset
26
+ first_byte, = @src.unpack "@#{offset}C"
27
+ marker = first_byte & 0xF0
28
+ if marker == 0 or first_byte == MARKER_DATE
29
+ marker = first_byte
30
+ end
31
+
32
+ case marker
33
+ when MARKER_NULL
34
+ nil
35
+ when MARKER_FALSE
36
+ false
37
+ when MARKER_TRUE
38
+ true
39
+ when MARKER_NO_BASE_URL
40
+ raise 'todo'
41
+ when MARKER_WITH_BASE_URL
42
+ raise 'todo'
43
+ when MARKER_UUID
44
+ raise 'todo'
45
+ when MARKER_FILL
46
+ decode_object offest + 1
47
+ when MARKER_INT
48
+ size_bits = first_byte & 0x0F
49
+ num_bytes = 2 ** size_bits
50
+ decode_integer offset + 1, num_bytes
51
+ when MARKER_REAL
52
+ r, = @src.unpack "@#{offset + 1}G"
53
+ r
54
+ when MARKER_DATE
55
+ seconds_since_2001, = @src.unpack "@#{offset + 1}G"
56
+ Time.at(TIME_INTERVAL_SINCE_1970 + seconds_since_2001).to_datetime
57
+ when MARKER_DATA
58
+ data = @src.byteslice *(decode_vl_info offset)
59
+ StringIO.new data
60
+ when MARKER_ASCII_STRING
61
+ @src.byteslice *(decode_vl_info offset)
62
+ when MARKER_UTF16BE_STRING
63
+ str_offset, str_size = decode_vl_info offset
64
+ s = @src.byteslice str_offset, str_size * 2
65
+ s.force_encoding('utf-16be').encode 'utf-8'
66
+ when MARKER_UTF8_STRING
67
+ s = @src.byteslice *(decode_vl_info offset)
68
+ s.force_encoding 'utf-8'
69
+ when MARKER_UID
70
+ # Encoding is as integers, except values are unsigned.
71
+ # These are used extensively in files written using NSKeyedArchiver, a serializer for Objective-C objects.
72
+ # The value is the index in parse_result["$objects"]
73
+ size = (first_byte & 0xF) + 1
74
+ bytes = @src.byteslice offset + 1, size
75
+ res = 0
76
+ bytes.unpack('C*').each do |byte|
77
+ res *= 256
78
+ res += byte
79
+ end
80
+ UID[res]
81
+ when MARKER_ARRAY
82
+ offset, size = decode_vl_info offset
83
+ size.times.map do |i|
84
+ id = decode_ref_id offset + i * @ref_byte_size
85
+ decode_id id
86
+ end
87
+ when MARKER_ORD_SET, MARKER_SET
88
+ r = Set.new
89
+ offset, size = decode_vl_info offset
90
+ size.times do |i|
91
+ id = decode_ref_id offset + i * @ref_byte_size
92
+ r << (decode_id id)
93
+ end
94
+ r
95
+ when MARKER_DICT
96
+ offset, size = decode_vl_info offset
97
+ keys_byte_size = @ref_byte_size * size
98
+ entries = []
99
+ size.times do |i|
100
+ k_offset = offset + i * @ref_byte_size
101
+ v_offset = k_offset + keys_byte_size
102
+ entries << [
103
+ decode_id(decode_ref_id k_offset),
104
+ decode_id(decode_ref_id v_offset)
105
+ ]
106
+ end
107
+ entries.sort_by! &:first
108
+ Hash[entries]
109
+ else
110
+ raise "unused marker: 0b#{marker.to_s(2).rjust 8, '0'}"
111
+ end
112
+ end
113
+
114
+ def decode_vl_info offset
115
+ marker, = @src.unpack "@#{offset}C"
116
+ vl_size_bits = marker & 0x0F
117
+
118
+ if vl_size_bits == 0x0F
119
+ # size is followed by marker int
120
+ int_marker, = @src.unpack "@#{offset + 1}C"
121
+ num_bytes = 2 ** (int_marker & 0x0F)
122
+ size = decode_integer offset + 2, num_bytes
123
+ [offset + 2 + num_bytes, size]
124
+ else
125
+ [offset + 1, vl_size_bits]
126
+ end
127
+ end
128
+
129
+ def decode_offset_table
130
+ @flatten_objects_size.times.map do |i|
131
+ offset_index = @offset_table_addr + i * @offset_byte_size
132
+ decode_integer offset_index, @offset_byte_size
133
+ end
134
+ end
135
+
136
+ # decode the i-th entry in offset table
137
+ def decode_id i
138
+ raise "ref-id should be positive, but got #{i}" if i < 0
139
+ offset = @offset_table[i]
140
+ raise "offset not found for ref-id #{i}" if !offset
141
+ decode_object offset
142
+ end
143
+
144
+ # decode integer of ref byte size
145
+ def decode_ref_id offset
146
+ decode_integer offset, @ref_byte_size
147
+ end
148
+
149
+ def decode_integer offset, num_bytes
150
+ # NOTE: only num_bytes = 8 or 16 it can be negative
151
+ case num_bytes
152
+ when 1
153
+ i, = @src.unpack "@#{offset}C"
154
+ when 2
155
+ i, = @src.unpack "@#{offset}n"
156
+ when 4
157
+ i, = @src.unpack "@#{offset}N"
158
+ when 8
159
+ i, = @src.unpack "@#{offset}q>"
160
+ when 16
161
+ hi, lo = @src.unpack "@#{offset}q>2"
162
+ i = (hi << 64) | lo
163
+ else
164
+ raise ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16"
165
+ end
166
+ i
167
+ end
168
+ end
169
+ end