property-list 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,306 @@
1
+ # encoding: binary
2
+
3
+ module PropertyList
4
+ def self.dump_binary obj, options=nil
5
+ generator = BinaryGenerator.new options
6
+ generator.generate obj
7
+ generator.output.join
8
+ end
9
+
10
+ # Modified from:
11
+ # https://github.com/jarib/plist/blob/master/lib/plist/binary.rb
12
+ #
13
+ # With improved performance
14
+ class BinaryGenerator
15
+ include BinaryMarkers
16
+
17
+ def initialize opts
18
+ @output = []
19
+ @offset = 0
20
+ end
21
+ attr_reader :output
22
+
23
+ # Encodes +obj+ as a binary property list. If +obj+ is an Array, Hash, or
24
+ # Set, the property list includes its contents.
25
+ def generate object
26
+ flatten_objects = flatten_collection object
27
+ ref_byte_size = min_byte_size flatten_objects.size - 1
28
+
29
+ # Write header and encoded objects.
30
+ # TODO use bplist10 when there are version 1x elements
31
+ add_output "bplist00"
32
+ offset_table = []
33
+ flatten_objects.each do |o|
34
+ offset_table << @offset
35
+ binary_object o, ref_byte_size
36
+ end
37
+
38
+ # Write offset table.
39
+ offset_table_addr = @offset
40
+ offset_byte_size = min_byte_size @offset
41
+ offset_table.each do |offset|
42
+ binary_integer offset, offset_byte_size
43
+ end
44
+
45
+ # Write trailer. (6 + 2 + 24 = 32 bytes)
46
+ add_output [
47
+ "\0\0\0\0\0\0", # padding
48
+ offset_byte_size, ref_byte_size,
49
+ flatten_objects.size,
50
+ 0, # index of root object
51
+ offset_table_addr
52
+ ].pack("a*C2Q>3")
53
+ end
54
+
55
+ private
56
+
57
+ # Takes an object (nominally a collection, like an Array, Set, or Hash, but
58
+ # any object is acceptable) and flattens it into a one-dimensional array.
59
+ # Non-collection objects appear in the array as-is, but the contents of
60
+ # Arrays, Sets, and Hashes are modified like so: (1) The contents of the
61
+ # collection are added, one-by-one, to the one-dimensional array. (2) The
62
+ # collection itself is modified so that it contains indexes pointing to the
63
+ # objects in the one-dimensional array. Here's an example with an Array:
64
+ #
65
+ # ary = [:a, :b, :c]
66
+ # flatten_collection(ary) # => [[1, 2, 3], :a, :b, :c]
67
+ #
68
+ # In the case of a Hash, keys and values are both appended to the one-
69
+ # dimensional array and then replaced with indexes.
70
+ #
71
+ # hsh = {:a => "blue", :b => "purple", :c => "green"}
72
+ # flatten_collection(hsh)
73
+ # # => [{1 => 2, 3 => 4, 5 => 6}, :a, "blue", :b, "purple", :c, "green"]
74
+ #
75
+ # An object will never be added to the one-dimensional array twice. If a
76
+ # collection refers to an object more than once, the object will be added
77
+ # to the one-dimensional array only once.
78
+ #
79
+ # ary = [:a, :a, :a]
80
+ # flatten_collection(ary) # => [[1, 1, 1], :a]
81
+ #
82
+ # The +obj_list+ and +id_refs+ parameters are private; they're used for
83
+ # descending into sub-collections recursively.
84
+ def flatten_collection collection, obj_list=[], id_refs={}
85
+ case collection
86
+ when Array, Set
87
+ if id_refs[collection.object_id]
88
+ return obj_list[id_refs[collection.object_id]]
89
+ end
90
+ obj_refs = collection.class.new
91
+ id_refs[collection.object_id] = obj_list.length
92
+ obj_list << obj_refs
93
+ collection.each do |obj|
94
+ flatten_collection(obj, obj_list, id_refs)
95
+ obj_refs << id_refs[obj.object_id]
96
+ end
97
+ return obj_list
98
+
99
+ when Hash
100
+ if id_refs[collection.object_id]
101
+ return obj_list[id_refs[collection.object_id]]
102
+ end
103
+ obj_refs = {}
104
+ id_refs[collection.object_id] = obj_list.length
105
+ obj_list << obj_refs
106
+ collection.keys.sort.each do |key|
107
+ value = collection[key]
108
+ key = key.to_s if key.is_a?(Symbol)
109
+ flatten_collection(key, obj_list, id_refs)
110
+ flatten_collection(value, obj_list, id_refs)
111
+ obj_refs[id_refs[key.object_id]] = id_refs[value.object_id]
112
+ end
113
+ return obj_list
114
+ else
115
+ unless id_refs[collection.object_id]
116
+ id_refs[collection.object_id] = obj_list.length
117
+ obj_list << collection
118
+ end
119
+ return obj_list
120
+ end
121
+ end
122
+
123
+ def add_output data
124
+ @output << data
125
+ @offset += data.bytesize
126
+ end
127
+
128
+ # Returns a binary property list fragment that represents +obj+. The
129
+ # returned string is not a complete property list, just a fragment that
130
+ # describes +obj+, and is not useful without a header, offset table, and
131
+ # trailer.
132
+ #
133
+ # The following classes are recognized: String, Float, Integer, the Boolean
134
+ # classes, Time, IO, StringIO, Array, Set, and Hash. IO and StringIO
135
+ # objects are rewound, read, and the contents stored as data (i.e., Cocoa
136
+ # applications will decode them as NSData). All other classes are dumped
137
+ # with Marshal and stored as data.
138
+ #
139
+ # Note that subclasses of the supported classes will be encoded as though
140
+ # they were the supported superclass. Thus, a subclass of (for example)
141
+ # String will be encoded and decoded as a String, not as the subclass:
142
+ #
143
+ # class ExampleString < String
144
+ # ...
145
+ # end
146
+ #
147
+ # s = ExampleString.new("disquieting plantlike mystery")
148
+ # encoded_s = binary_object(s)
149
+ # decoded_s = decode_binary_object(encoded_s)
150
+ # puts decoded_s.class # => String
151
+ #
152
+ # +ref_byte_size+ is the number of bytes to use for storing references to
153
+ # other objects.
154
+ def binary_object obj, ref_byte_size = 4
155
+ case obj
156
+ when Symbol
157
+ binary_string obj.to_s
158
+ when String
159
+ binary_string obj
160
+ when URL
161
+ binary_url obj.url
162
+ when Float
163
+ add_output [(MARKER_REAL | 3), obj].pack("CG")
164
+ when Integer
165
+ nbytes = min_byte_size obj
166
+ size_bits = { 1 => 0, 2 => 1, 4 => 2, 8 => 3, 16 => 4 }[nbytes]
167
+ add_output (MARKER_INT | size_bits).chr
168
+ binary_integer obj, nbytes
169
+ when TrueClass
170
+ add_output MARKER_TRUE.chr
171
+ when FalseClass
172
+ add_output MARKER_FALSE.chr
173
+ when Time
174
+ add_output [MARKER_DATE, obj.to_f - TIME_INTERVAL_SINCE_1970].pack("CG")
175
+ when Date # also covers DateTime
176
+ add_output [MARKER_DATE, obj.to_time.to_f - TIME_INTERVAL_SINCE_1970].pack("CG")
177
+ when IO, StringIO
178
+ obj.rewind
179
+ obj.binmode
180
+ data = obj.read
181
+ binary_marker MARKER_DATA, data.bytesize
182
+ add_output data
183
+ when Array
184
+ # Must be an array of object references as returned by flatten_collection.
185
+ binary_marker MARKER_ARRAY, obj.size
186
+ obj.each do |i|
187
+ binary_integer i, ref_byte_size
188
+ end
189
+ when Set
190
+ # Must be a set of object references as returned by flatten_collection.
191
+ binary_marker MARKER_SET, obj.size
192
+ obj.each do |i|
193
+ binary_integer i, ref_byte_size
194
+ end
195
+ when Hash
196
+ # Must be a table of object references as returned by flatten_collection.
197
+ binary_marker MARKER_DICT, obj.size
198
+ obj.keys.each do |k|
199
+ binary_integer k, ref_byte_size
200
+ end
201
+ obj.values.each do |v|
202
+ binary_integer v, ref_byte_size
203
+ end
204
+ else
205
+ raise "Unsupported class: #{obj.class}"
206
+ end
207
+ end
208
+
209
+ def binary_marker marker, size
210
+ if size < 15
211
+ add_output (marker | size).chr
212
+ else
213
+ add_output (marker | 0xf).chr
214
+ binary_object size
215
+ end
216
+ end
217
+
218
+ def binary_string obj
219
+ if obj.encoding == Encoding.find('binary')
220
+ binary_marker MARKER_ASCII_STRING, obj.bytesize
221
+ add_output obj
222
+ elsif obj.ascii_only?
223
+ obj = obj.dup.force_encoding 'binary'
224
+ binary_marker MARKER_ASCII_STRING, obj.bytesize
225
+ add_output obj
226
+ else
227
+ data = obj.encode('utf-16be').force_encoding 'binary'
228
+ cp_size = data.bytesize / 2
229
+ binary_marker MARKER_UTF16BE_STRING, cp_size # TODO check if it works for 4 bytes
230
+ add_output data
231
+ end
232
+ end
233
+
234
+ def binary_url obj
235
+ @v1 = true
236
+ if obj =~ /\A\w+:/
237
+ add_output MARKER_WITH_BASE_URL.chr
238
+ else
239
+ add_output MARKER_NO_BASE_URL.chr
240
+ end
241
+ binary_marker MARKER_ASCII_STRING, obj.bytesize
242
+ add_output obj
243
+ end
244
+
245
+ def binary_uuid obj
246
+ # TODO
247
+ end
248
+
249
+ def binary_ordered_set obj
250
+ # TODO
251
+ end
252
+
253
+ # Packs an integer +i+ into its binary representation in the specified
254
+ # number of bytes. Byte order is big-endian. Negative integers cannot be
255
+ # stored in 1, 2, or 4 bytes.
256
+ def binary_integer i, num_bytes
257
+ if i < 0 && num_bytes < 8
258
+ raise ArgumentError, "negative integers require 8 or 16 bytes of storage"
259
+ end
260
+ case num_bytes
261
+ when 1
262
+ add_output [i].pack("C")
263
+ when 2
264
+ add_output [i].pack("n")
265
+ when 4
266
+ add_output [i].pack("N")
267
+ when 8
268
+ add_output [i].pack("q>")
269
+ when 16
270
+ # TODO verify 128 bit integer encoding
271
+ if i < 0
272
+ i = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff ^ i.abs + 1
273
+ end
274
+ add_output [i >> 64, i & 0xffff_ffff_ffff_ffff].pack("q>2")
275
+ else
276
+ raise ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16"
277
+ end
278
+ end
279
+
280
+ # Determines the minimum number of bytes that is a power of two and can
281
+ # represent the integer +i+. Raises a RangeError if the number of bytes
282
+ # exceeds 16. Note that the property list format considers integers of 1,
283
+ # 2, and 4 bytes to be unsigned, while 8- and 16-byte integers are signed;
284
+ # thus negative integers will always require at least 8 bytes of storage.
285
+ def min_byte_size i
286
+ if i < 0
287
+ i = i.abs - 1
288
+ else
289
+ if i <= 0xff
290
+ return 1
291
+ elsif i <= 0xffff
292
+ return 2
293
+ elsif i <= 0xffffffff
294
+ return 4
295
+ end
296
+ end
297
+ if i <= 0x7fffffffffffffff
298
+ 8
299
+ elsif i <= 0x7fffffffffffffffffffffffffffffff
300
+ 16
301
+ else
302
+ raise RangeError, "integer too big - exceeds 128 bits"
303
+ end
304
+ end
305
+ end
306
+ end
@@ -0,0 +1,34 @@
1
+ module PropertyList
2
+ module BinaryMarkers
3
+ # These marker bytes are prefixed to objects in a binary property list to
4
+ # indicate the type of the object.
5
+ MARKER_NULL = 0b0000_0000 # v1?+ only
6
+ MARKER_FALSE = 0b0000_1000
7
+ MARKER_TRUE = 0b0000_1001
8
+ MARKER_NO_BASE_URL = 0b0000_1100 # followed by string, v1?+ only
9
+ MARKER_WITH_BASE_URL = 0b0000_1101 # followed by string, v1?+ only
10
+ MARKER_UUID = 0b0000_1110 # 16 byte uuid, v1?+ only
11
+ MARKER_FILL = 0b0000_1111 # fill byte
12
+ MARKER_INT = 0b0001_0000 # 0nnn
13
+ MARKER_REAL = 0b0010_0000 # 0nnn
14
+ MARKER_DATE = 0b0011_0011 # follows 8 byte big endian float
15
+
16
+ MARKER_DATA = 0b0100_0000 # [int]
17
+ MARKER_ASCII_STRING = 0b0101_0000 # [int]
18
+ MARKER_UTF16BE_STRING = 0b0110_0000 # [int]
19
+ MARKER_UTF8_STRING = 0b0111_0000 # [int], v1?+ only
20
+ MARKER_UID = 0b1000_0000 # nnnn, followed by nnnn+1 bytes
21
+ # 0b1001_xxxx # unused
22
+
23
+ MARKER_ARRAY = 0b1010_0000
24
+ MARKER_ORD_SET = 0b1011_0000 # v1?+ only
25
+ MARKER_SET = 0b1100_0000 # v1?+ only
26
+ MARKER_DICT = 0b1101_0000
27
+ # 0b1110_xxxx # unused
28
+ # 0b1111_xxxx # unused
29
+
30
+ # POSIX uses a reference time of 1970-01-01T00:00:00Z; Cocoa's reference
31
+ # time is in 2001. This interval is for converting between the two.
32
+ TIME_INTERVAL_SINCE_1970 = 978307200.0
33
+ end
34
+ end
@@ -0,0 +1,169 @@
1
+ module PropertyList
2
+ def self.load_binary(data)
3
+ BinaryParser.new(data).parse
4
+ end
5
+
6
+ # Reference:
7
+ # https://opensource.apple.com/source/CF/CF-1151.16/CFBinaryPList.c.auto.html
8
+ class BinaryParser
9
+ include BinaryMarkers
10
+
11
+ def initialize src
12
+ @src = src
13
+
14
+ @offset_byte_size, @ref_byte_size, @flatten_objects_size, @root_object_index, @offset_table_addr = \
15
+ @src.byteslice((-32)..(-1)).unpack '@6C2Q>3'
16
+ end
17
+
18
+ def parse
19
+ @offset_table = decode_offset_table
20
+ decode_id @root_object_index
21
+ end
22
+
23
+ private
24
+
25
+ def decode_object offset
26
+ first_byte, = @src.unpack "@#{offset}C"
27
+ marker = first_byte & 0xF0
28
+ if marker == 0 or first_byte == MARKER_DATE
29
+ marker = first_byte
30
+ end
31
+
32
+ case marker
33
+ when MARKER_NULL
34
+ nil
35
+ when MARKER_FALSE
36
+ false
37
+ when MARKER_TRUE
38
+ true
39
+ when MARKER_NO_BASE_URL
40
+ raise 'todo'
41
+ when MARKER_WITH_BASE_URL
42
+ raise 'todo'
43
+ when MARKER_UUID
44
+ raise 'todo'
45
+ when MARKER_FILL
46
+ decode_object offest + 1
47
+ when MARKER_INT
48
+ size_bits = first_byte & 0x0F
49
+ num_bytes = 2 ** size_bits
50
+ decode_integer offset + 1, num_bytes
51
+ when MARKER_REAL
52
+ r, = @src.unpack "@#{offset + 1}G"
53
+ r
54
+ when MARKER_DATE
55
+ seconds_since_2001, = @src.unpack "@#{offset + 1}G"
56
+ Time.at(TIME_INTERVAL_SINCE_1970 + seconds_since_2001).to_datetime
57
+ when MARKER_DATA
58
+ data = @src.byteslice *(decode_vl_info offset)
59
+ StringIO.new data
60
+ when MARKER_ASCII_STRING
61
+ @src.byteslice *(decode_vl_info offset)
62
+ when MARKER_UTF16BE_STRING
63
+ str_offset, str_size = decode_vl_info offset
64
+ s = @src.byteslice str_offset, str_size * 2
65
+ s.force_encoding('utf-16be').encode 'utf-8'
66
+ when MARKER_UTF8_STRING
67
+ s = @src.byteslice *(decode_vl_info offset)
68
+ s.force_encoding 'utf-8'
69
+ when MARKER_UID
70
+ # Encoding is as integers, except values are unsigned.
71
+ # These are used extensively in files written using NSKeyedArchiver, a serializer for Objective-C objects.
72
+ # The value is the index in parse_result["$objects"]
73
+ size = (first_byte & 0xF) + 1
74
+ bytes = @src.byteslice offset + 1, size
75
+ res = 0
76
+ bytes.unpack('C*').each do |byte|
77
+ res *= 256
78
+ res += byte
79
+ end
80
+ UID[res]
81
+ when MARKER_ARRAY
82
+ offset, size = decode_vl_info offset
83
+ size.times.map do |i|
84
+ id = decode_ref_id offset + i * @ref_byte_size
85
+ decode_id id
86
+ end
87
+ when MARKER_ORD_SET, MARKER_SET
88
+ r = Set.new
89
+ offset, size = decode_vl_info offset
90
+ size.times do |i|
91
+ id = decode_ref_id offset + i * @ref_byte_size
92
+ r << (decode_id id)
93
+ end
94
+ r
95
+ when MARKER_DICT
96
+ offset, size = decode_vl_info offset
97
+ keys_byte_size = @ref_byte_size * size
98
+ entries = []
99
+ size.times do |i|
100
+ k_offset = offset + i * @ref_byte_size
101
+ v_offset = k_offset + keys_byte_size
102
+ entries << [
103
+ decode_id(decode_ref_id k_offset),
104
+ decode_id(decode_ref_id v_offset)
105
+ ]
106
+ end
107
+ entries.sort_by! &:first
108
+ Hash[entries]
109
+ else
110
+ raise "unused marker: 0b#{marker.to_s(2).rjust 8, '0'}"
111
+ end
112
+ end
113
+
114
+ def decode_vl_info offset
115
+ marker, = @src.unpack "@#{offset}C"
116
+ vl_size_bits = marker & 0x0F
117
+
118
+ if vl_size_bits == 0x0F
119
+ # size is followed by marker int
120
+ int_marker, = @src.unpack "@#{offset + 1}C"
121
+ num_bytes = 2 ** (int_marker & 0x0F)
122
+ size = decode_integer offset + 2, num_bytes
123
+ [offset + 2 + num_bytes, size]
124
+ else
125
+ [offset + 1, vl_size_bits]
126
+ end
127
+ end
128
+
129
+ def decode_offset_table
130
+ @flatten_objects_size.times.map do |i|
131
+ offset_index = @offset_table_addr + i * @offset_byte_size
132
+ decode_integer offset_index, @offset_byte_size
133
+ end
134
+ end
135
+
136
+ # decode the i-th entry in offset table
137
+ def decode_id i
138
+ raise "ref-id should be positive, but got #{i}" if i < 0
139
+ offset = @offset_table[i]
140
+ raise "offset not found for ref-id #{i}" if !offset
141
+ decode_object offset
142
+ end
143
+
144
+ # decode integer of ref byte size
145
+ def decode_ref_id offset
146
+ decode_integer offset, @ref_byte_size
147
+ end
148
+
149
+ def decode_integer offset, num_bytes
150
+ # NOTE: only num_bytes = 8 or 16 it can be negative
151
+ case num_bytes
152
+ when 1
153
+ i, = @src.unpack "@#{offset}C"
154
+ when 2
155
+ i, = @src.unpack "@#{offset}n"
156
+ when 4
157
+ i, = @src.unpack "@#{offset}N"
158
+ when 8
159
+ i, = @src.unpack "@#{offset}q>"
160
+ when 16
161
+ hi, lo = @src.unpack "@#{offset}q>2"
162
+ i = (hi << 64) | lo
163
+ else
164
+ raise ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16"
165
+ end
166
+ i
167
+ end
168
+ end
169
+ end