sj-plist 3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,510 @@
1
+ require "date"
2
+ require "nkf"
3
+ require "set"
4
+ require "stringio"
5
+
6
+ module Plist
7
+ module Binary
8
+ # Encodes +obj+ as a binary property list. If +obj+ is an Array, Hash, or
9
+ # Set, the property list includes its contents.
10
+ def self.binary_plist(obj)
11
+ encoded_objs = flatten_collection(obj)
12
+ ref_byte_size = min_byte_size(encoded_objs.length - 1)
13
+ # Write header
14
+ header = "bplist00"
15
+ plist = StringIO.new
16
+ plist << header
17
+ # Write offset table.
18
+ offset = 8
19
+ offset_table = []
20
+ offset_string = ""
21
+ encoded_length = 0
22
+ encoded_objs.each do |o|
23
+ obj = binary_plist_obj(o, ref_byte_size)
24
+ plist << obj
25
+ encoded_length += obj.length
26
+ offset_table << offset
27
+ offset += obj.length
28
+ end
29
+ offset_table_addr = header.size + encoded_length
30
+ offset_byte_size = min_byte_size(offset)
31
+ offset_table.each do |offset|
32
+ plist << pack_int(offset, offset_byte_size)
33
+ end
34
+ # Write trailer.
35
+ plist << "\0\0\0\0\0\0" # Six unused bytes
36
+ plist << [
37
+ offset_byte_size,
38
+ ref_byte_size,
39
+ encoded_objs.length >> 32, encoded_objs.length & 0xffffffff,
40
+ 0, 0, # Index of root object
41
+ offset_table_addr >> 32, offset_table_addr & 0xffffffff
42
+ ].pack("CCNNNNNN")
43
+ plist.string
44
+ end
45
+
46
+ def self.decode_binary_plist(plist)
47
+ # Check header.
48
+ unless plist[0, 6] == "bplist"
49
+ raise ArgumentError, "argument is not a binary property list"
50
+ end
51
+ version = plist[6, 2]
52
+ unless version == "00"
53
+ raise ArgumentError,
54
+ "don't know how to decode format version #{version}"
55
+ end
56
+ # Read trailer.
57
+ trailer = plist[-26, 26].unpack("CCNNNNNN")
58
+ offset_byte_size = trailer[0]
59
+ ref_byte_size = trailer[1]
60
+ encoded_objs_length = combine_ints(32, trailer[2], trailer[3])
61
+ root_index = combine_ints(32, trailer[4], trailer[5])
62
+ offset_table_addr = combine_ints(32, trailer[6], trailer[7])
63
+ # Decode objects.
64
+ root_offset = offset_for_index(plist, offset_table_addr,
65
+ offset_byte_size, root_index)
66
+ root_obj = decode_binary_plist_obj(plist, root_offset, ref_byte_size)
67
+ unflatten_collection(root_obj, [root_obj], plist, offset_table_addr,
68
+ offset_byte_size, ref_byte_size)
69
+ end
70
+
71
+ private
72
+
73
+ # These marker bytes are prefixed to objects in a binary property list to
74
+ # indicate the type of the object.
75
+ CFBinaryPlistMarkerNull = 0x00 # :nodoc:
76
+ CFBinaryPlistMarkerFalse = 0x08 # :nodoc:
77
+ CFBinaryPlistMarkerTrue = 0x09 # :nodoc:
78
+ CFBinaryPlistMarkerFill = 0x0F # :nodoc:
79
+ CFBinaryPlistMarkerInt = 0x10 # :nodoc:
80
+ CFBinaryPlistMarkerReal = 0x20 # :nodoc:
81
+ CFBinaryPlistMarkerDate = 0x33 # :nodoc:
82
+ CFBinaryPlistMarkerData = 0x40 # :nodoc:
83
+ CFBinaryPlistMarkerASCIIString = 0x50 # :nodoc:
84
+ CFBinaryPlistMarkerUnicode16String = 0x60 # :nodoc:
85
+ CFBinaryPlistMarkerUID = 0x80 # :nodoc:
86
+ CFBinaryPlistMarkerArray = 0xA0 # :nodoc:
87
+ CFBinaryPlistMarkerSet = 0xC0 # :nodoc:
88
+ CFBinaryPlistMarkerDict = 0xD0 # :nodoc:
89
+
90
+ # POSIX uses a reference time of 1970-01-01T00:00:00Z; Cocoa's reference
91
+ # time is in 2001. This interval is for converting between the two.
92
+ NSTimeIntervalSince1970 = 978307200.0 # :nodoc:
93
+
94
+ # Takes an object (nominally a collection, like an Array, Set, or Hash, but
95
+ # any object is acceptable) and flattens it into a one-dimensional array.
96
+ # Non-collection objects appear in the array as-is, but the contents of
97
+ # Arrays, Sets, and Hashes are modified like so: (1) The contents of the
98
+ # collection are added, one-by-one, to the one-dimensional array. (2) The
99
+ # collection itself is modified so that it contains indexes pointing to the
100
+ # objects in the one-dimensional array. Here's an example with an Array:
101
+ #
102
+ # ary = [:a, :b, :c]
103
+ # flatten_collection(ary) # => [[1, 2, 3], :a, :b, :c]
104
+ #
105
+ # In the case of a Hash, keys and values are both appended to the one-
106
+ # dimensional array and then replaced with indexes.
107
+ #
108
+ # hsh = {:a => "blue", :b => "purple", :c => "green"}
109
+ # flatten_collection(hsh)
110
+ # # => [{1 => 2, 3 => 4, 5 => 6}, :a, "blue", :b, "purple", :c, "green"]
111
+ #
112
+ # An object will never be added to the one-dimensional array twice. If a
113
+ # collection refers to an object more than once, the object will be added
114
+ # to the one-dimensional array only once.
115
+ #
116
+ # ary = [:a, :a, :a]
117
+ # flatten_collection(ary) # => [[1, 1, 1], :a]
118
+ #
119
+ # The +obj_list+ and +id_refs+ parameters are private; they're used for
120
+ # descending into sub-collections recursively.
121
+ def self.flatten_collection(collection, obj_list = [], id_refs = {})
122
+ case collection
123
+ when Array, Set
124
+ if id_refs[collection.object_id]
125
+ return obj_list[id_refs[collection.object_id]]
126
+ end
127
+ obj_refs = collection.class.new
128
+ id_refs[collection.object_id] = obj_list.length
129
+ obj_list << obj_refs
130
+ collection.each do |obj|
131
+ flatten_collection(obj, obj_list, id_refs)
132
+ obj_refs << id_refs[obj.object_id]
133
+ end
134
+ return obj_list
135
+ when Hash
136
+ if id_refs[collection.object_id]
137
+ return obj_list[id_refs[collection.object_id]]
138
+ end
139
+ obj_refs = {}
140
+ id_refs[collection.object_id] = obj_list.length
141
+ obj_list << obj_refs
142
+ collection.each do |key, value|
143
+ key = key.to_s if key.is_a?(Symbol)
144
+ flatten_collection(key, obj_list, id_refs)
145
+ flatten_collection(value, obj_list, id_refs)
146
+ obj_refs[id_refs[key.object_id]] = id_refs[value.object_id]
147
+ end
148
+ return obj_list
149
+ else
150
+ unless id_refs[collection.object_id]
151
+ id_refs[collection.object_id] = obj_list.length
152
+ obj_list << collection
153
+ end
154
+ return obj_list
155
+ end
156
+ end
157
+
158
+ def self.unflatten_collection(collection, obj_list, plist,
159
+ offset_table_addr, offset_byte_size, ref_byte_size)
160
+ case collection
161
+ when Array, Set
162
+ collection.collect! do |index|
163
+ if obj = obj_list[index]
164
+ obj
165
+ else
166
+ offset = offset_for_index(plist, offset_table_addr, offset_byte_size,
167
+ index)
168
+ obj = decode_binary_plist_obj(plist, offset, ref_byte_size)
169
+ obj_list[index] = obj
170
+ unflatten_collection(obj, obj_list, plist, offset_table_addr,
171
+ offset_byte_size, ref_byte_size)
172
+ end
173
+ end
174
+ when Hash
175
+ hsh = {}
176
+ collection.each do |key, value|
177
+ unless key_obj = obj_list[key]
178
+ offset = offset_for_index(plist, offset_table_addr, offset_byte_size,
179
+ key)
180
+ key_obj = decode_binary_plist_obj(plist, offset, ref_byte_size)
181
+ obj_list[key] = key_obj
182
+ key_obj = unflatten_collection(key_obj, obj_list, plist,
183
+ offset_table_addr, offset_byte_size, ref_byte_size)
184
+ end
185
+ unless value_obj = obj_list[value]
186
+ offset = offset_for_index(plist, offset_table_addr, offset_byte_size,
187
+ value)
188
+ value_obj = decode_binary_plist_obj(plist, offset, ref_byte_size)
189
+ obj_list[value] = value_obj
190
+ value_obj = unflatten_collection(value_obj, obj_list, plist,
191
+ offset_table_addr, offset_byte_size, ref_byte_size)
192
+ end
193
+ hsh[key_obj] = value_obj
194
+ end
195
+ collection.replace(hsh)
196
+ end
197
+ return collection
198
+ end
199
+
200
+ # Returns a binary property list fragment that represents +obj+. The
201
+ # returned string is not a complete property list, just a fragment that
202
+ # describes +obj+, and is not useful without a header, offset table, and
203
+ # trailer.
204
+ #
205
+ # The following classes are recognized: String, Float, Integer, the Boolean
206
+ # classes, Time, IO, StringIO, Array, Set, and Hash. IO and StringIO
207
+ # objects are rewound, read, and the contents stored as data (i.e., Cocoa
208
+ # applications will decode them as NSData). All other classes are dumped
209
+ # with Marshal and stored as data.
210
+ #
211
+ # Note that subclasses of the supported classes will be encoded as though
212
+ # they were the supported superclass. Thus, a subclass of (for example)
213
+ # String will be encoded and decoded as a String, not as the subclass:
214
+ #
215
+ # class ExampleString < String
216
+ # ...
217
+ # end
218
+ #
219
+ # s = ExampleString.new("disquieting plantlike mystery")
220
+ # encoded_s = binary_plist_obj(s)
221
+ # decoded_s = decode_binary_plist_obj(encoded_s)
222
+ # puts decoded_s.class # => String
223
+ #
224
+ # +ref_byte_size+ is the number of bytes to use for storing references to
225
+ # other objects.
226
+ def self.binary_plist_obj(obj, ref_byte_size = 4)
227
+ case obj
228
+ when String
229
+ obj = obj.to_s if obj.is_a?(Symbol)
230
+ # This doesn't really work. NKF's guess method is really, really bad
231
+ # at discovering UTF8 when only a handful of characters are multi-byte.
232
+ encoding = NKF.guess2(obj)
233
+ if encoding == NKF::ASCII && obj =~ /[\x80-\xff]/
234
+ encoding = NKF::UTF8
235
+ end
236
+ if [NKF::ASCII, NKF::BINARY, NKF::UNKNOWN].include?(encoding)
237
+ result = (CFBinaryPlistMarkerASCIIString |
238
+ (obj.length < 15 ? obj.length : 0xf)).chr
239
+ result += binary_plist_obj(obj.length) if obj.length >= 15
240
+ result += obj
241
+ return result
242
+ else
243
+ # Convert to UTF8.
244
+ if encoding == NKF::UTF8
245
+ utf8 = obj
246
+ else
247
+ utf8 = NKF.nkf("-m0 -w", obj)
248
+ end
249
+ # Decode each character's UCS codepoint.
250
+ codepoints = []
251
+ i = 0
252
+ while i < utf8.length
253
+ byte = utf8[i]
254
+ if byte & 0xe0 == 0xc0
255
+ codepoints << ((byte & 0x1f) << 6) + (utf8[i+1] & 0x3f)
256
+ i += 1
257
+ elsif byte & 0xf0 == 0xe0
258
+ codepoints << ((byte & 0xf) << 12) + ((utf8[i+1] & 0x3f) << 6) +
259
+ (utf8[i+2] & 0x3f)
260
+ i += 2
261
+ elsif byte & 0xf8 == 0xf0
262
+ codepoints << ((byte & 0xe) << 18) + ((utf8[i+1] & 0x3f) << 12) +
263
+ ((utf8[i+2] & 0x3f) << 6) + (utf8[i+3] & 0x3f)
264
+ i += 3
265
+ else
266
+ codepoints << byte
267
+ end
268
+ if codepoints.last > 0xffff
269
+ raise(ArgumentError, "codepoint too high - only the Basic Multilingual Plane can be encoded")
270
+ end
271
+ i += 1
272
+ end
273
+ # Return string of 16-bit codepoints.
274
+ data = codepoints.pack("n*")
275
+ result = (CFBinaryPlistMarkerUnicode16String |
276
+ (codepoints.length < 15 ? codepoints.length : 0xf)).chr
277
+ result += binary_plist_obj(codepoints.length) if codepoints.length >= 15
278
+ result += data
279
+ return result
280
+ end
281
+ when Float
282
+ return (CFBinaryPlistMarkerReal | 3).chr + [obj].pack("G")
283
+ when Integer
284
+ nbytes = min_byte_size(obj)
285
+ size_bits = { 1 => 0, 2 => 1, 4 => 2, 8 => 3, 16 => 4 }[nbytes]
286
+ return (CFBinaryPlistMarkerInt | size_bits).chr + pack_int(obj, nbytes)
287
+ when TrueClass
288
+ return CFBinaryPlistMarkerTrue.chr
289
+ when FalseClass
290
+ return CFBinaryPlistMarkerFalse.chr
291
+ when Time
292
+ return CFBinaryPlistMarkerDate.chr +
293
+ [obj.to_f - NSTimeIntervalSince1970].pack("G")
294
+ when IO, StringIO
295
+ obj.rewind
296
+ return binary_plist_data(obj.read)
297
+ when Array
298
+ # Must be an array of object references as returned by flatten_collection.
299
+ result = StringIO.new
300
+ result << (CFBinaryPlistMarkerArray | (obj.length < 15 ? obj.length : 0xf)).chr
301
+ result << binary_plist_obj(obj.length) if obj.length >= 15
302
+ obj.each do |i|
303
+ result << pack_int(i, ref_byte_size)
304
+ end
305
+ result.string
306
+ when Set
307
+ # Must be a set of object references as returned by flatten_collection.
308
+ result = StringIO.new
309
+ result << (CFBinaryPlistMarkerSet | (obj.length < 15 ? obj.length : 0xf)).chr
310
+ result << binary_plist_obj(obj.length) if obj.length >= 15
311
+ obj.to_a.each do |i|
312
+ result << pack_int(i, ref_byte_size)
313
+ end
314
+ result.string
315
+ when Hash
316
+ # Must be a table of object references as returned by flatten_collection.
317
+ result = StringIO.new
318
+ result << (CFBinaryPlistMarkerDict | (obj.length < 15 ? obj.length : 0xf)).chr
319
+ result << binary_plist_obj(obj.length) if obj.length >= 15
320
+ res_keys = StringIO.new
321
+ res_values = StringIO.new
322
+ obj.each do |k, v|
323
+ res_keys << pack_int(k, ref_byte_size)
324
+ res_values << pack_int(v, ref_byte_size)
325
+ end
326
+ result << res_keys.string
327
+ result << res_values.string
328
+ result.string
329
+ else
330
+ return binary_plist_data(Marshal.dump(obj))
331
+ end
332
+ end
333
+
334
+ def self.decode_binary_plist_obj(plist, offset, ref_byte_size)
335
+ case plist[offset]
336
+ when CFBinaryPlistMarkerASCIIString..(CFBinaryPlistMarkerASCIIString | 0xf)
337
+ length, offset = decode_length(plist, offset)
338
+ return plist[offset, length]
339
+ when CFBinaryPlistMarkerUnicode16String..(CFBinaryPlistMarkerUnicode16String | 0xf)
340
+ length, offset = decode_length(plist, offset)
341
+ codepoints = plist[offset, length * 2].unpack("n*")
342
+ str = ""
343
+ codepoints.each do |codepoint|
344
+ if codepoint <= 0x7f
345
+ ch = ' '
346
+ ch[0] = to_i
347
+ elsif codepoint <= 0x7ff
348
+ ch = ' '
349
+ ch[0] = ((codepoint & 0x7c0) >> 6) | 0xc0
350
+ ch[1] = codepoint & 0x3f | 0x80
351
+ else
352
+ ch = ' '
353
+ ch[0] = ((codepoint & 0xf000) >> 12) | 0xe0
354
+ ch[1] = ((codepoint & 0xfc0) >> 6) | 0x80
355
+ ch[2] = codepoint & 0x3f | 0x80
356
+ end
357
+ str << ch
358
+ end
359
+ return str
360
+ when CFBinaryPlistMarkerReal | 3
361
+ return plist[offset+1, 8].unpack("G").first
362
+ when CFBinaryPlistMarkerInt..(CFBinaryPlistMarkerInt | 0xf)
363
+ num_bytes = 2 ** (plist[offset] & 0xf)
364
+ return unpack_int(plist[offset+1, num_bytes])
365
+ when CFBinaryPlistMarkerTrue
366
+ return true
367
+ when CFBinaryPlistMarkerFalse
368
+ return false
369
+ when CFBinaryPlistMarkerDate
370
+ secs = plist[offset+1, 8].unpack("G").first + NSTimeIntervalSince1970
371
+ return Time.at(secs)
372
+ when CFBinaryPlistMarkerData..(CFBinaryPlistMarkerData | 0xf)
373
+ length, offset = decode_length(plist, offset)
374
+ return StringIO.new(plist[offset, length])
375
+ when CFBinaryPlistMarkerArray..(CFBinaryPlistMarkerArray | 0xf)
376
+ ary = []
377
+ length, offset = decode_length(plist, offset)
378
+ length.times do
379
+ ary << unpack_int(plist[offset, ref_byte_size])
380
+ offset += ref_byte_size
381
+ end
382
+ return ary
383
+ when CFBinaryPlistMarkerDict..(CFBinaryPlistMarkerDict | 0xf)
384
+ hsh = {}
385
+ keys = []
386
+ length, offset = decode_length(plist, offset)
387
+ length.times do
388
+ keys << unpack_int(plist[offset, ref_byte_size])
389
+ offset += ref_byte_size
390
+ end
391
+ length.times do |i|
392
+ hsh[keys[i]] = unpack_int(plist[offset, ref_byte_size])
393
+ offset += ref_byte_size
394
+ end
395
+ return hsh
396
+ end
397
+ end
398
+
399
+ # Returns a binary property list fragment that represents a data object
400
+ # with the contents of the string +data+. A Cocoa application would decode
401
+ # this fragment as NSData. Like binary_plist_obj, the value returned by
402
+ # this method is not usable by itself; it is only useful as part of a
403
+ # complete binary property list with a header, offset table, and trailer.
404
+ def self.binary_plist_data(data)
405
+ result = (CFBinaryPlistMarkerData |
406
+ (data.length < 15 ? data.length : 0xf)).chr
407
+ result += binary_plist_obj(data.length) if data.length > 15
408
+ result += data
409
+ return result
410
+ end
411
+
412
+ # Determines the minimum number of bytes that is a power of two and can
413
+ # represent the integer +i+. Raises a RangeError if the number of bytes
414
+ # exceeds 16. Note that the property list format considers integers of 1,
415
+ # 2, and 4 bytes to be unsigned, while 8- and 16-byte integers are signed;
416
+ # thus negative integers will always require at least 8 bytes of storage.
417
+ def self.min_byte_size(i)
418
+ if i < 0
419
+ i = i.abs - 1
420
+ else
421
+ if i <= 0xff
422
+ return 1
423
+ elsif i <= 0xffff
424
+ return 2
425
+ elsif i <= 0xffffffff
426
+ return 4
427
+ end
428
+ end
429
+ if i <= 0x7fffffffffffffff
430
+ return 8
431
+ elsif i <= 0x7fffffffffffffffffffffffffffffff
432
+ return 16
433
+ end
434
+ raise(RangeError, "integer too big - exceeds 128 bits")
435
+ end
436
+
437
+ # Packs an integer +i+ into its binary representation in the specified
438
+ # number of bytes. Byte order is big-endian. Negative integers cannot be
439
+ # stored in 1, 2, or 4 bytes.
440
+ def self.pack_int(i, num_bytes)
441
+ if i < 0 && num_bytes < 8
442
+ raise(ArgumentError, "negative integers require 8 or 16 bytes of storage")
443
+ end
444
+ case num_bytes
445
+ when 1
446
+ [i].pack("c")
447
+ when 2
448
+ [i].pack("n")
449
+ when 4
450
+ [i].pack("N")
451
+ when 8
452
+ [(i >> 32) & 0xffffffff, i & 0xffffffff].pack("NN")
453
+ when 16
454
+ [i >> 96, (i >> 64) & 0xffffffff, (i >> 32) & 0xffffffff,
455
+ i & 0xffffffff].pack("NNNN")
456
+ else
457
+ raise(ArgumentError, "num_bytes must be 1, 2, 4, 8, or 16")
458
+ end
459
+ end
460
+
461
+ def self.combine_ints(num_bits, *ints)
462
+ i = ints.pop
463
+ shift_bits = num_bits
464
+ ints.reverse.each do |i_part|
465
+ i += i_part << shift_bits
466
+ shift_bits += num_bits
467
+ end
468
+ return i
469
+ end
470
+
471
+ def self.offset_for_index(plist, table_addr, offset_byte_size, index)
472
+ offset = plist[table_addr + index * offset_byte_size, offset_byte_size]
473
+ unpack_int(offset)
474
+ end
475
+
476
+ def self.unpack_int(s)
477
+ case s.length
478
+ when 1
479
+ s.unpack("C").first
480
+ when 2
481
+ s.unpack("n").first
482
+ when 4
483
+ s.unpack("N").first
484
+ when 8
485
+ i = combine_ints(32, *(s.unpack("NN")))
486
+ (i & 0x80000000_00000000 == 0) ?
487
+ i :
488
+ -(i ^ 0xffffffff_ffffffff) - 1
489
+ when 16
490
+ i = combine_ints(32, *(s.unpack("NNNN")))
491
+ (i & 0x80000000_00000000_00000000_00000000 == 0) ?
492
+ i :
493
+ -(i ^ 0xffffffff_ffffffff_ffffffff_ffffffff) - 1
494
+ else
495
+ raise(ArgumentError, "length must be 1, 2, 4, 8, or 16 bytes")
496
+ end
497
+ end
498
+
499
+ def self.decode_length(plist, offset)
500
+ if plist[offset] & 0xf == 0xf
501
+ offset += 1
502
+ length = decode_binary_plist_obj(plist, offset, 0)
503
+ offset += min_byte_size(length) + 1
504
+ return length, offset
505
+ else
506
+ return (plist[offset] & 0xf), (offset + 1)
507
+ end
508
+ end
509
+ end
510
+ end