sj-plist 3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,510 @@
1
+ require "date"
2
+ require "nkf"
3
+ require "set"
4
+ require "stringio"
5
+
6
+ module Plist
7
+ module Binary
8
+ # Encodes +obj+ as a binary property list. If +obj+ is an Array, Hash, or
9
+ # Set, the property list includes its contents.
10
+ def self.binary_plist(obj)
11
+ encoded_objs = flatten_collection(obj)
12
+ ref_byte_size = min_byte_size(encoded_objs.length - 1)
13
+ # Write header
14
+ header = "bplist00"
15
+ plist = StringIO.new
16
+ plist << header
17
+ # Write offset table.
18
+ offset = 8
19
+ offset_table = []
20
+ offset_string = ""
21
+ encoded_length = 0
22
+ encoded_objs.each do |o|
23
+ obj = binary_plist_obj(o, ref_byte_size)
24
+ plist << obj
25
+ encoded_length += obj.length
26
+ offset_table << offset
27
+ offset += obj.length
28
+ end
29
+ offset_table_addr = header.size + encoded_length
30
+ offset_byte_size = min_byte_size(offset)
31
+ offset_table.each do |offset|
32
+ plist << pack_int(offset, offset_byte_size)
33
+ end
34
+ # Write trailer.
35
+ plist << "\0\0\0\0\0\0" # Six unused bytes
36
+ plist << [
37
+ offset_byte_size,
38
+ ref_byte_size,
39
+ encoded_objs.length >> 32, encoded_objs.length & 0xffffffff,
40
+ 0, 0, # Index of root object
41
+ offset_table_addr >> 32, offset_table_addr & 0xffffffff
42
+ ].pack("CCNNNNNN")
43
+ plist.string
44
+ end
45
+
46
+ def self.decode_binary_plist(plist)
47
+ # Check header.
48
+ unless plist[0, 6] == "bplist"
49
+ raise ArgumentError, "argument is not a binary property list"
50
+ end
51
+ version = plist[6, 2]
52
+ unless version == "00"
53
+ raise ArgumentError,
54
+ "don't know how to decode format version #{version}"
55
+ end
56
+ # Read trailer.
57
+ trailer = plist[-26, 26].unpack("CCNNNNNN")
58
+ offset_byte_size = trailer[0]
59
+ ref_byte_size = trailer[1]
60
+ encoded_objs_length = combine_ints(32, trailer[2], trailer[3])
61
+ root_index = combine_ints(32, trailer[4], trailer[5])
62
+ offset_table_addr = combine_ints(32, trailer[6], trailer[7])
63
+ # Decode objects.
64
+ root_offset = offset_for_index(plist, offset_table_addr,
65
+ offset_byte_size, root_index)
66
+ root_obj = decode_binary_plist_obj(plist, root_offset, ref_byte_size)
67
+ unflatten_collection(root_obj, [root_obj], plist, offset_table_addr,
68
+ offset_byte_size, ref_byte_size)
69
+ end
70
+
71
+ private
72
+
73
+ # These marker bytes are prefixed to objects in a binary property list to
74
+ # indicate the type of the object.
75
+ CFBinaryPlistMarkerNull = 0x00 # :nodoc:
76
+ CFBinaryPlistMarkerFalse = 0x08 # :nodoc:
77
+ CFBinaryPlistMarkerTrue = 0x09 # :nodoc:
78
+ CFBinaryPlistMarkerFill = 0x0F # :nodoc:
79
+ CFBinaryPlistMarkerInt = 0x10 # :nodoc:
80
+ CFBinaryPlistMarkerReal = 0x20 # :nodoc:
81
+ CFBinaryPlistMarkerDate = 0x33 # :nodoc:
82
+ CFBinaryPlistMarkerData = 0x40 # :nodoc:
83
+ CFBinaryPlistMarkerASCIIString = 0x50 # :nodoc:
84
+ CFBinaryPlistMarkerUnicode16String = 0x60 # :nodoc:
85
+ CFBinaryPlistMarkerUID = 0x80 # :nodoc:
86
+ CFBinaryPlistMarkerArray = 0xA0 # :nodoc:
87
+ CFBinaryPlistMarkerSet = 0xC0 # :nodoc:
88
+ CFBinaryPlistMarkerDict = 0xD0 # :nodoc:
89
+
90
+ # POSIX uses a reference time of 1970-01-01T00:00:00Z; Cocoa's reference
91
+ # time is in 2001. This interval is for converting between the two.
92
+ NSTimeIntervalSince1970 = 978307200.0 # :nodoc:
93
+
94
+ # Takes an object (nominally a collection, like an Array, Set, or Hash, but
95
+ # any object is acceptable) and flattens it into a one-dimensional array.
96
+ # Non-collection objects appear in the array as-is, but the contents of
97
+ # Arrays, Sets, and Hashes are modified like so: (1) The contents of the
98
+ # collection are added, one-by-one, to the one-dimensional array. (2) The
99
+ # collection itself is modified so that it contains indexes pointing to the
100
+ # objects in the one-dimensional array. Here's an example with an Array:
101
+ #
102
+ # ary = [:a, :b, :c]
103
+ # flatten_collection(ary) # => [[1, 2, 3], :a, :b, :c]
104
+ #
105
+ # In the case of a Hash, keys and values are both appended to the one-
106
+ # dimensional array and then replaced with indexes.
107
+ #
108
+ # hsh = {:a => "blue", :b => "purple", :c => "green"}
109
+ # flatten_collection(hsh)
110
+ # # => [{1 => 2, 3 => 4, 5 => 6}, :a, "blue", :b, "purple", :c, "green"]
111
+ #
112
+ # An object will never be added to the one-dimensional array twice. If a
113
+ # collection refers to an object more than once, the object will be added
114
+ # to the one-dimensional array only once.
115
+ #
116
+ # ary = [:a, :a, :a]
117
+ # flatten_collection(ary) # => [[1, 1, 1], :a]
118
+ #
119
+ # The +obj_list+ and +id_refs+ parameters are private; they're used for
120
+ # descending into sub-collections recursively.
121
+ def self.flatten_collection(collection, obj_list = [], id_refs = {})
122
+ case collection
123
+ when Array, Set
124
+ if id_refs[collection.object_id]
125
+ return obj_list[id_refs[collection.object_id]]
126
+ end
127
+ obj_refs = collection.class.new
128
+ id_refs[collection.object_id] = obj_list.length
129
+ obj_list << obj_refs
130
+ collection.each do |obj|
131
+ flatten_collection(obj, obj_list, id_refs)
132
+ obj_refs << id_refs[obj.object_id]
133
+ end
134
+ return obj_list
135
+ when Hash
136
+ if id_refs[collection.object_id]
137
+ return obj_list[id_refs[collection.object_id]]
138
+ end
139
+ obj_refs = {}
140
+ id_refs[collection.object_id] = obj_list.length
141
+ obj_list << obj_refs
142
+ collection.each do |key, value|
143
+ key = key.to_s if key.is_a?(Symbol)
144
+ flatten_collection(key, obj_list, id_refs)
145
+ flatten_collection(value, obj_list, id_refs)
146
+ obj_refs[id_refs[key.object_id]] = id_refs[value.object_id]
147
+ end
148
+ return obj_list
149
+ else
150
+ unless id_refs[collection.object_id]
151
+ id_refs[collection.object_id] = obj_list.length
152
+ obj_list << collection
153
+ end
154
+ return obj_list
155
+ end
156
+ end
157
+
158
+ def self.unflatten_collection(collection, obj_list, plist,
159
+ offset_table_addr, offset_byte_size, ref_byte_size)
160
+ case collection
161
+ when Array, Set
162
+ collection.collect! do |index|
163
+ if obj = obj_list[index]
164
+ obj
165
+ else
166
+ offset = offset_for_index(plist, offset_table_addr, offset_byte_size,
167
+ index)
168
+ obj = decode_binary_plist_obj(plist, offset, ref_byte_size)
169
+ obj_list[index] = obj
170
+ unflatten_collection(obj, obj_list, plist, offset_table_addr,
171
+ offset_byte_size, ref_byte_size)
172
+ end
173
+ end
174
+ when Hash
175
+ hsh = {}
176
+ collection.each do |key, value|
177
+ unless key_obj = obj_list[key]
178
+ offset = offset_for_index(plist, offset_table_addr, offset_byte_size,
179
+ key)
180
+ key_obj = decode_binary_plist_obj(plist, offset, ref_byte_size)
181
+ obj_list[key] = key_obj
182
+ key_obj = unflatten_collection(key_obj, obj_list, plist,
183
+ offset_table_addr, offset_byte_size, ref_byte_size)
184
+ end
185
+ unless value_obj = obj_list[value]
186
+ offset = offset_for_index(plist, offset_table_addr, offset_byte_size,
187
+ value)
188
+ value_obj = decode_binary_plist_obj(plist, offset, ref_byte_size)
189
+ obj_list[value] = value_obj
190
+ value_obj = unflatten_collection(value_obj, obj_list, plist,
191
+ offset_table_addr, offset_byte_size, ref_byte_size)
192
+ end
193
+ hsh[key_obj] = value_obj
194
+ end
195
+ collection.replace(hsh)
196
+ end
197
+ return collection
198
+ end
199
+
200
+ # Returns a binary property list fragment that represents +obj+. The
201
+ # returned string is not a complete property list, just a fragment that
202
+ # describes +obj+, and is not useful without a header, offset table, and
203
+ # trailer.
204
+ #
205
+ # The following classes are recognized: String, Float, Integer, the Boolean
206
+ # classes, Time, IO, StringIO, Array, Set, and Hash. IO and StringIO
207
+ # objects are rewound, read, and the contents stored as data (i.e., Cocoa
208
+ # applications will decode them as NSData). All other classes are dumped
209
+ # with Marshal and stored as data.
210
+ #
211
+ # Note that subclasses of the supported classes will be encoded as though
212
+ # they were the supported superclass. Thus, a subclass of (for example)
213
+ # String will be encoded and decoded as a String, not as the subclass:
214
+ #
215
+ # class ExampleString < String
216
+ # ...
217
+ # end
218
+ #
219
+ # s = ExampleString.new("disquieting plantlike mystery")
220
+ # encoded_s = binary_plist_obj(s)
221
+ # decoded_s = decode_binary_plist_obj(encoded_s)
222
+ # puts decoded_s.class # => String
223
+ #
224
+ # +ref_byte_size+ is the number of bytes to use for storing references to
225
+ # other objects.
226
+ def self.binary_plist_obj(obj, ref_byte_size = 4)
227
+ case obj
228
+ when String
229
+ obj = obj.to_s if obj.is_a?(Symbol)
230
+ # This doesn't really work. NKF's guess method is really, really bad
231
+ # at discovering UTF8 when only a handful of characters are multi-byte.
232
+ encoding = NKF.guess2(obj)
233
+ if encoding == NKF::ASCII && obj =~ /[\x80-\xff]/
234
+ encoding = NKF::UTF8
235
+ end
236
+ if [NKF::ASCII, NKF::BINARY, NKF::UNKNOWN].include?(encoding)
237
+ result = (CFBinaryPlistMarkerASCIIString |
238
+ (obj.length < 15 ? obj.length : 0xf)).chr
239
+ result += binary_plist_obj(obj.length) if obj.length >= 15
240
+ result += obj
241
+ return result
242
+ else
243
+ # Convert to UTF8.
244
+ if encoding == NKF::UTF8
245
+ utf8 = obj
246
+ else
247
+ utf8 = NKF.nkf("-m0 -w", obj)
248
+ end
249
+ # Decode each character's UCS codepoint.
250
+ codepoints = []
251
+ i = 0
252
+ while i < utf8.length
253
+ byte = utf8[i]
254
+ if byte & 0xe0 == 0xc0
255
+ codepoints << ((byte & 0x1f) << 6) + (utf8[i+1] & 0x3f)
256
+ i += 1
257
+ elsif byte & 0xf0 == 0xe0
258
+ codepoints << ((byte & 0xf) << 12) + ((utf8[i+1] & 0x3f) << 6) +
259
+ (utf8[i+2] & 0x3f)
260
+ i += 2
261
+ elsif byte & 0xf8 == 0xf0
262
+ codepoints << ((byte & 0xe) << 18) + ((utf8[i+1] & 0x3f) << 12) +
263
+ ((utf8[i+2] & 0x3f) << 6) + (utf8[i+3] & 0x3f)
264
+ i += 3
265
+ else
266
+ codepoints << byte
267
+ end
268
+ if codepoints.last > 0xffff
269
+ raise(ArgumentError, "codepoint too high - only the Basic Multilingual Plane can be encoded")
270
+ end
271
+ i += 1
272
+ end
273
+ # Return string of 16-bit codepoints.
274
+ data = codepoints.pack("n*")
275
+ result = (CFBinaryPlistMarkerUnicode16String |
276
+ (codepoints.length < 15 ? codepoints.length : 0xf)).chr
277
+ result += binary_plist_obj(codepoints.length) if codepoints.length >= 15
278
+ result += data
279
+ return result
280
+ end
281
+ when Float
282
+ return (CFBinaryPlistMarkerReal | 3).chr + [obj].pack("G")
283
+ when Integer
284
+ nbytes = min_byte_size(obj)
285
+ size_bits = { 1 => 0, 2 => 1, 4 => 2, 8 => 3, 16 => 4 }[nbytes]
286
+ return (CFBinaryPlistMarkerInt | size_bits).chr + pack_int(obj, nbytes)
287
+ when TrueClass
288
+ return CFBinaryPlistMarkerTrue.chr
289
+ when FalseClass
290
+ return CFBinaryPlistMarkerFalse.chr
291
+ when Time
292
+ return CFBinaryPlistMarkerDate.chr +
293
+ [obj.to_f - NSTimeIntervalSince1970].pack("G")
294
+ when IO, StringIO
295
+ obj.rewind
296
+ return binary_plist_data(obj.read)
297
+ when Array
298
+ # Must be an array of object references as returned by flatten_collection.
299
+ result = StringIO.new
300
+ result << (CFBinaryPlistMarkerArray | (obj.length < 15 ? obj.length : 0xf)).chr
301
+ result << binary_plist_obj(obj.length) if obj.length >= 15
302
+ obj.each do |i|
303
+ result << pack_int(i, ref_byte_size)
304
+ end
305
+ result.string
306
+ when Set
307
+ # Must be a set of object references as returned by flatten_collection.
308
+ result = StringIO.new
309
+ result << (CFBinaryPlistMarkerSet | (obj.length < 15 ? obj.length : 0xf)).chr
310
+ result << binary_plist_obj(obj.length) if obj.length >= 15
311
+ obj.to_a.each do |i|
312
+ result << pack_int(i, ref_byte_size)
313
+ end
314
+ result.string
315
+ when Hash
316
+ # Must be a table of object references as returned by flatten_collection.
317
+ result = StringIO.new
318
+ result << (CFBinaryPlistMarkerDict | (obj.length < 15 ? obj.length : 0xf)).chr
319
+ result << binary_plist_obj(obj.length) if obj.length >= 15
320
+ res_keys = StringIO.new
321
+ res_values = StringIO.new
322
+ obj.each do |k, v|
323
+ res_keys << pack_int(k, ref_byte_size)
324
+ res_values << pack_int(v, ref_byte_size)
325
+ end
326
+ result << res_keys.string
327
+ result << res_values.string
328
+ result.string
329
+ else
330
+ return binary_plist_data(Marshal.dump(obj))
331
+ end
332
+ end
333
+
334
+ def self.decode_binary_plist_obj(plist, offset, ref_byte_size)
335
+ case plist[offset]
336
+ when CFBinaryPlistMarkerASCIIString..(CFBinaryPlistMarkerASCIIString | 0xf)
337
+ length, offset = decode_length(plist, offset)
338
+ return plist[offset, length]
339
+ when CFBinaryPlistMarkerUnicode16String..(CFBinaryPlistMarkerUnicode16String | 0xf)
340
+ length, offset = decode_length(plist, offset)
341
+ codepoints = plist[offset, length * 2].unpack("n*")
342
+ str = ""
343
+ codepoints.each do |codepoint|
344
+ if codepoint <= 0x7f
345
+ ch = ' '
346
+ ch[0] = to_i
347
+ elsif codepoint <= 0x7ff
348
+ ch = ' '
349
+ ch[0] = ((codepoint & 0x7c0) >> 6) | 0xc0
350
+ ch[1] = codepoint & 0x3f | 0x80
351
+ else
352
+ ch = ' '
353
+ ch[0] = ((codepoint & 0xf000) >> 12) | 0xe0
354
+ ch[1] = ((codepoint & 0xfc0) >> 6) | 0x80
355
+ ch[2] = codepoint & 0x3f | 0x80
356
+ end
357
+ str << ch
358
+ end
359
+ return str
360
+ when CFBinaryPlistMarkerReal | 3
361
+ return plist[offset+1, 8].unpack("G").first
362
+ when CFBinaryPlistMarkerInt..(CFBinaryPlistMarkerInt | 0xf)
363
+ num_bytes = 2 ** (plist[offset] & 0xf)
364
+ return unpack_int(plist[offset+1, num_bytes])
365
+ when CFBinaryPlistMarkerTrue
366
+ return true
367
+ when CFBinaryPlistMarkerFalse
368
+ return false
369
+ when CFBinaryPlistMarkerDate
370
+ secs = plist[offset+1, 8].unpack("G").first + NSTimeIntervalSince1970
371
+ return Time.at(secs)
372
+ when CFBinaryPlistMarkerData..(CFBinaryPlistMarkerData | 0xf)
373
+ length, offset = decode_length(plist, offset)
374
+ return StringIO.new(plist[offset, length])
375
+ when CFBinaryPlistMarkerArray..(CFBinaryPlistMarkerArray | 0xf)
376
+ ary = []
377
+ length, offset = decode_length(plist, offset)
378
+ length.times do
379
+ ary << unpack_int(plist[offset, ref_byte_size])
380
+ offset += ref_byte_size
381
+ end
382
+ return ary
383
+ when CFBinaryPlistMarkerDict..(CFBinaryPlistMarkerDict | 0xf)
384
+ hsh = {}
385
+ keys = []
386
+ length, offset = decode_length(plist, offset)
387
+ length.times do
388
+ keys << unpack_int(plist[offset, ref_byte_size])
389
+ offset += ref_byte_size
390
+ end
391
+ length.times do |i|
392
+ hsh[keys[i]] = unpack_int(plist[offset, ref_byte_size])
393
+ offset += ref_byte_size
394
+ end
395
+ return hsh
396
+ end
397
+ end
398
+
399
+ # Returns a binary property list fragment that represents a data object
400
+ # with the contents of the string +data+. A Cocoa application would decode
401
+ # this fragment as NSData. Like binary_plist_obj, the value returned by
402
+ # this method is not usable by itself; it is only useful as part of a
403
+ # complete binary property list with a header, offset table, and trailer.
404
+ def self.binary_plist_data(data)
405
+ result = (CFBinaryPlistMarkerData |
406
+ (data.length < 15 ? data.length : 0xf)).chr
407
+ result += binary_plist_obj(data.length) if data.length > 15
408
+ result += data
409
+ return result
410
+ end
411
+
412
+ # Determines the minimum number of bytes that is a power of two and can
413
+ # represent the integer +i+. Raises a RangeError if the number of bytes
414
+ # exceeds 16. Note that the property list format considers integers of 1,
415
+ # 2, and 4 bytes to be unsigned, while 8- and 16-byte integers are signed;
416
+ # thus negative integers will always require at least 8 bytes of storage.
417
+ def self.min_byte_size(i)
418
+ if i < 0
419
+ i = i.abs - 1
420
+ else
421
+ if i <= 0xff
422
+ return 1
423
+ elsif i <= 0xffff
424
+ return 2
425
+ elsif i <= 0xffffffff
426
+ return 4
427
+ end
428
+ end
429
+ if i <= 0x7fffffffffffffff
430
+ return 8
431
+ elsif i <= 0x7fffffffffffffffffffffffffffffff
432
+ return 16
433
+ end
434
+ raise(RangeError, "integer too big - exceeds 128 bits")
435
+ end
436
+
437
+ # Packs an integer +i+ into its binary representation in the specified
438
+ # number of bytes. Byte order is big-endian. Negative integers cannot be
439
+ # stored in 1, 2, or 4 bytes.
440
+ def self.pack_int(i, num_bytes)
441
+ if i < 0 && num_bytes < 8
442
+ raise(ArgumentError, "negative integers require 8 or 16 bytes of storage")
443
+ end
444
+ case num_bytes
445
+ when 1
446
+ [i].pack("c")
447
+ when 2
448
+ [i].pack("n")
449
+ when 4
450
+ [i].pack("N")
451
+ when 8
452
+ [(i >> 32) & 0xffffffff, i & 0xffffffff].pack("NN")
453
+ when 16
454
+ [i >> 96, (i >> 64) & 0xffffffff, (i >> 32) & 0xffffffff,
455
+ i & 0xffffffff].pack("NNNN")
456
+ else
457
+ raise(ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16")
458
+ end
459
+ end
460
+
461
+ def self.combine_ints(num_bits, *ints)
462
+ i = ints.pop
463
+ shift_bits = num_bits
464
+ ints.reverse.each do |i_part|
465
+ i += i_part << shift_bits
466
+ shift_bits += num_bits
467
+ end
468
+ return i
469
+ end
470
+
471
+ def self.offset_for_index(plist, table_addr, offset_byte_size, index)
472
+ offset = plist[table_addr + index * offset_byte_size, offset_byte_size]
473
+ unpack_int(offset)
474
+ end
475
+
476
+ def self.unpack_int(s)
477
+ case s.length
478
+ when 1
479
+ s.unpack("C").first
480
+ when 2
481
+ s.unpack("n").first
482
+ when 4
483
+ s.unpack("N").first
484
+ when 8
485
+ i = combine_ints(32, *(s.unpack("NN")))
486
+ (i & 0x80000000_00000000 == 0) ?
487
+ i :
488
+ -(i ^ 0xffffffff_ffffffff) - 1
489
+ when 16
490
+ i = combine_ints(32, *(s.unpack("NNNN")))
491
+ (i & 0x80000000_00000000_00000000_00000000 == 0) ?
492
+ i :
493
+ -(i ^ 0xffffffff_ffffffff_ffffffff_ffffffff) - 1
494
+ else
495
+ raise(ArgumentError, "length must be 1, 2, 4, 8, or 16 bytes")
496
+ end
497
+ end
498
+
499
+ def self.decode_length(plist, offset)
500
+ if plist[offset] & 0xf == 0xf
501
+ offset += 1
502
+ length = decode_binary_plist_obj(plist, offset, 0)
503
+ offset += min_byte_size(length) + 1
504
+ return length, offset
505
+ else
506
+ return (plist[offset] & 0xf), (offset + 1)
507
+ end
508
+ end
509
+ end
510
+ end