conify 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,606 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright 2011, 2012 Keith Rarick
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ # See https://github.com/kr/okjson for updates.
24
+
25
+ require 'stringio'
26
+
27
+ # Some parts adapted from
28
+ # http://golang.org/src/pkg/json/decode.go and
29
+ # http://golang.org/src/pkg/utf8/utf8.go
30
+ module OkJson
31
+ extend self
32
+
33
+
34
+ # Decodes a json document in string s and
35
+ # returns the corresponding ruby value.
36
+ # String s must be valid UTF-8. If you have
37
+ # a string in some other encoding, convert
38
+ # it first.
39
+ #
40
+ # String values in the resulting structure
41
+ # will be UTF-8.
42
+ def decode(s)
43
+ ts = lex(s)
44
+ v, ts = textparse(ts)
45
+ if ts.length > 0
46
+ raise Error, 'trailing garbage'
47
+ end
48
+ v
49
+ end
50
+
51
+
52
+ # Parses a "json text" in the sense of RFC 4627.
53
+ # Returns the parsed value and any trailing tokens.
54
+ # Note: this is almost the same as valparse,
55
+ # except that it does not accept atomic values.
56
+ def textparse(ts)
57
+ if ts.length < 0
58
+ raise Error, 'empty'
59
+ end
60
+
61
+ typ, _, val = ts[0]
62
+ case typ
63
+ when '{' then objparse(ts)
64
+ when '[' then arrparse(ts)
65
+ else
66
+ raise Error, "unexpected #{val.inspect}"
67
+ end
68
+ end
69
+
70
+
71
+ # Parses a "value" in the sense of RFC 4627.
72
+ # Returns the parsed value and any trailing tokens.
73
+ def valparse(ts)
74
+ if ts.length < 0
75
+ raise Error, 'empty'
76
+ end
77
+
78
+ typ, _, val = ts[0]
79
+ case typ
80
+ when '{' then objparse(ts)
81
+ when '[' then arrparse(ts)
82
+ when :val,:str then [val, ts[1..-1]]
83
+ else
84
+ raise Error, "unexpected #{val.inspect}"
85
+ end
86
+ end
87
+
88
+
89
+ # Parses an "object" in the sense of RFC 4627.
90
+ # Returns the parsed value and any trailing tokens.
91
+ def objparse(ts)
92
+ ts = eat('{', ts)
93
+ obj = {}
94
+
95
+ if ts[0][0] == '}'
96
+ return obj, ts[1..-1]
97
+ end
98
+
99
+ k, v, ts = pairparse(ts)
100
+ obj[k] = v
101
+
102
+ if ts[0][0] == '}'
103
+ return obj, ts[1..-1]
104
+ end
105
+
106
+ loop do
107
+ ts = eat(',', ts)
108
+
109
+ k, v, ts = pairparse(ts)
110
+ obj[k] = v
111
+
112
+ if ts[0][0] == '}'
113
+ return obj, ts[1..-1]
114
+ end
115
+ end
116
+ end
117
+
118
+
119
+ # Parses a "member" in the sense of RFC 4627.
120
+ # Returns the parsed values and any trailing tokens.
121
+ def pairparse(ts)
122
+ (typ, _, k), ts = ts[0], ts[1..-1]
123
+ if typ != :str
124
+ raise Error, "unexpected #{k.inspect}"
125
+ end
126
+ ts = eat(':', ts)
127
+ v, ts = valparse(ts)
128
+ [k, v, ts]
129
+ end
130
+
131
+
132
+ # Parses an "array" in the sense of RFC 4627.
133
+ # Returns the parsed value and any trailing tokens.
134
+ def arrparse(ts)
135
+ ts = eat('[', ts)
136
+ arr = []
137
+
138
+ if ts[0][0] == ']'
139
+ return arr, ts[1..-1]
140
+ end
141
+
142
+ v, ts = valparse(ts)
143
+ arr << v
144
+
145
+ if ts[0][0] == ']'
146
+ return arr, ts[1..-1]
147
+ end
148
+
149
+ loop do
150
+ ts = eat(',', ts)
151
+
152
+ v, ts = valparse(ts)
153
+ arr << v
154
+
155
+ if ts[0][0] == ']'
156
+ return arr, ts[1..-1]
157
+ end
158
+ end
159
+ end
160
+
161
+
162
+ def eat(typ, ts)
163
+ if ts[0][0] != typ
164
+ raise Error, "expected #{typ} (got #{ts[0].inspect})"
165
+ end
166
+ ts[1..-1]
167
+ end
168
+
169
+
170
+ # Scans s and returns a list of json tokens,
171
+ # excluding white space (as defined in RFC 4627).
172
+ def lex(s)
173
+ ts = []
174
+ while s.length > 0
175
+ typ, lexeme, val = tok(s)
176
+ if typ == nil
177
+ raise Error, "invalid character at #{s[0,10].inspect}"
178
+ end
179
+ if typ != :space
180
+ ts << [typ, lexeme, val]
181
+ end
182
+ s = s[lexeme.length..-1]
183
+ end
184
+ ts
185
+ end
186
+
187
+
188
+ # Scans the first token in s and
189
+ # returns a 3-element list, or nil
190
+ # if s does not begin with a valid token.
191
+ #
192
+ # The first list element is one of
193
+ # '{', '}', ':', ',', '[', ']',
194
+ # :val, :str, and :space.
195
+ #
196
+ # The second element is the lexeme.
197
+ #
198
+ # The third element is the value of the
199
+ # token for :val and :str, otherwise
200
+ # it is the lexeme.
201
+ def tok(s)
202
+ case s[0]
203
+ when ?{ then ['{', s[0,1], s[0,1]]
204
+ when ?} then ['}', s[0,1], s[0,1]]
205
+ when ?: then [':', s[0,1], s[0,1]]
206
+ when ?, then [',', s[0,1], s[0,1]]
207
+ when ?[ then ['[', s[0,1], s[0,1]]
208
+ when ?] then [']', s[0,1], s[0,1]]
209
+ when ?n then nulltok(s)
210
+ when ?t then truetok(s)
211
+ when ?f then falsetok(s)
212
+ when ?" then strtok(s)
213
+ when Spc then [:space, s[0,1], s[0,1]]
214
+ when ?\t then [:space, s[0,1], s[0,1]]
215
+ when ?\n then [:space, s[0,1], s[0,1]]
216
+ when ?\r then [:space, s[0,1], s[0,1]]
217
+ else numtok(s)
218
+ end
219
+ end
220
+
221
+
222
+ def nulltok(s); s[0,4] == 'null' && [:val, 'null', nil] end
223
+ def truetok(s); s[0,4] == 'true' && [:val, 'true', true] end
224
+ def falsetok(s); s[0,5] == 'false' && [:val, 'false', false] end
225
+
226
+
227
+ def numtok(s)
228
+ m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
229
+ if m && m.begin(0) == 0
230
+ if m[3] && !m[2]
231
+ [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
232
+ elsif m[2]
233
+ [:val, m[0], Float(m[0])]
234
+ else
235
+ [:val, m[0], Integer(m[0])]
236
+ end
237
+ end
238
+ end
239
+
240
+
241
+ def strtok(s)
242
+ m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
243
+ if ! m
244
+ raise Error, "invalid string literal at #{abbrev(s)}"
245
+ end
246
+ [:str, m[0], unquote(m[0])]
247
+ end
248
+
249
+
250
+ def abbrev(s)
251
+ t = s[0,10]
252
+ p = t['`']
253
+ t = t[0,p] if p
254
+ t = t + '...' if t.length < s.length
255
+ '`' + t + '`'
256
+ end
257
+
258
+
259
+ # Converts a quoted json string literal q into a UTF-8-encoded string.
260
+ # The rules are different than for Ruby, so we cannot use eval.
261
+ # Unquote will raise an error if q contains control characters.
262
+ def unquote(q)
263
+ q = q[1...-1]
264
+ a = q.dup # allocate a big enough string
265
+ rubydoesenc = false
266
+ # In ruby >= 1.9, a[w] is a codepoint, not a byte.
267
+ if a.class.method_defined?(:force_encoding)
268
+ a.force_encoding('UTF-8')
269
+ rubydoesenc = true
270
+ end
271
+ r, w = 0, 0
272
+ while r < q.length
273
+ c = q[r]
274
+ case true
275
+ when c == ?\\
276
+ r += 1
277
+ if r >= q.length
278
+ raise Error, "string literal ends with a \"\\\": \"#{q}\""
279
+ end
280
+
281
+ case q[r]
282
+ when ?",?\\,?/,?'
283
+ a[w] = q[r]
284
+ r += 1
285
+ w += 1
286
+ when ?b,?f,?n,?r,?t
287
+ a[w] = Unesc[q[r]]
288
+ r += 1
289
+ w += 1
290
+ when ?u
291
+ r += 1
292
+ uchar = begin
293
+ hexdec4(q[r,4])
294
+ rescue RuntimeError => e
295
+ raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
296
+ end
297
+ r += 4
298
+ if surrogate? uchar
299
+ if q.length >= r+6
300
+ uchar1 = hexdec4(q[r+2,4])
301
+ uchar = subst(uchar, uchar1)
302
+ if uchar != Ucharerr
303
+ # A valid pair; consume.
304
+ r += 6
305
+ end
306
+ end
307
+ end
308
+ if rubydoesenc
309
+ a[w] = '' << uchar
310
+ w += 1
311
+ else
312
+ w += ucharenc(a, w, uchar)
313
+ end
314
+ else
315
+ raise Error, "invalid escape char #{q[r]} in \"#{q}\""
316
+ end
317
+ when c == ?", c < Spc
318
+ raise Error, "invalid character in string literal \"#{q}\""
319
+ else
320
+ # Copy anything else byte-for-byte.
321
+ # Valid UTF-8 will remain valid UTF-8.
322
+ # Invalid UTF-8 will remain invalid UTF-8.
323
+ # In ruby >= 1.9, c is a codepoint, not a byte,
324
+ # in which case this is still what we want.
325
+ a[w] = c
326
+ r += 1
327
+ w += 1
328
+ end
329
+ end
330
+ a[0,w]
331
+ end
332
+
333
+
334
+ # Encodes unicode character u as UTF-8
335
+ # bytes in string a at position i.
336
+ # Returns the number of bytes written.
337
+ def ucharenc(a, i, u)
338
+ case true
339
+ when u <= Uchar1max
340
+ a[i] = (u & 0xff).chr
341
+ 1
342
+ when u <= Uchar2max
343
+ a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
344
+ a[i+1] = (Utagx | (u&Umaskx)).chr
345
+ 2
346
+ when u <= Uchar3max
347
+ a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
348
+ a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
349
+ a[i+2] = (Utagx | (u&Umaskx)).chr
350
+ 3
351
+ else
352
+ a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
353
+ a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
354
+ a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
355
+ a[i+3] = (Utagx | (u&Umaskx)).chr
356
+ 4
357
+ end
358
+ end
359
+
360
+
361
+ def hexdec4(s)
362
+ if s.length != 4
363
+ raise Error, 'short'
364
+ end
365
+ (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
366
+ end
367
+
368
+
369
+ def subst(u1, u2)
370
+ if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
371
+ return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
372
+ end
373
+ return Ucharerr
374
+ end
375
+
376
+
377
+ def unsubst(u)
378
+ if u < Usurrself || u > Umax || surrogate?(u)
379
+ return Ucharerr, Ucharerr
380
+ end
381
+ u -= Usurrself
382
+ [Usurr1 + ((u>>10)&0x3ff), Usurr2 + (u&0x3ff)]
383
+ end
384
+
385
+
386
+ def surrogate?(u)
387
+ Usurr1 <= u && u < Usurr3
388
+ end
389
+
390
+
391
+ def nibble(c)
392
+ case true
393
+ when ?0 <= c && c <= ?9 then c.ord - ?0.ord
394
+ when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
395
+ when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
396
+ else
397
+ raise Error, "invalid hex code #{c}"
398
+ end
399
+ end
400
+
401
+
402
+ # Encodes x into a json text. It may contain only
403
+ # Array, Hash, String, Numeric, true, false, nil.
404
+ # (Note, this list excludes Symbol.)
405
+ # X itself must be an Array or a Hash.
406
+ # No other value can be encoded, and an error will
407
+ # be raised if x contains any other value, such as
408
+ # Nan, Infinity, Symbol, and Proc, or if a Hash key
409
+ # is not a String.
410
+ # Strings contained in x must be valid UTF-8.
411
+ def encode(x)
412
+ case x
413
+ when Hash then objenc(x)
414
+ when Array then arrenc(x)
415
+ else
416
+ raise Error, 'root value must be an Array or a Hash'
417
+ end
418
+ end
419
+
420
+
421
+ def valenc(x)
422
+ case x
423
+ when Hash then objenc(x)
424
+ when Array then arrenc(x)
425
+ when String then strenc(x)
426
+ when Numeric then numenc(x)
427
+ when true then "true"
428
+ when false then "false"
429
+ when nil then "null"
430
+ else
431
+ raise Error, "cannot encode #{x.class}: #{x.inspect}"
432
+ end
433
+ end
434
+
435
+
436
+ def objenc(x)
437
+ '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
438
+ end
439
+
440
+
441
+ def arrenc(a)
442
+ '[' + a.map{|x| valenc(x)}.join(',') + ']'
443
+ end
444
+
445
+
446
+ def keyenc(k)
447
+ case k
448
+ when String then strenc(k)
449
+ else
450
+ raise Error, "Hash key is not a string: #{k.inspect}"
451
+ end
452
+ end
453
+
454
+
455
+ def strenc(s)
456
+ t = StringIO.new
457
+ t.putc(?")
458
+ r = 0
459
+
460
+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
461
+ rubydoesenc = s.class.method_defined?(:encoding)
462
+
463
+ while r < s.length
464
+ case s[r]
465
+ when ?" then t.print('\\"')
466
+ when ?\\ then t.print('\\\\')
467
+ when ?\b then t.print('\\b')
468
+ when ?\f then t.print('\\f')
469
+ when ?\n then t.print('\\n')
470
+ when ?\r then t.print('\\r')
471
+ when ?\t then t.print('\\t')
472
+ else
473
+ c = s[r]
474
+ case true
475
+ when Spc <= c && c <= ?~
476
+ t.putc(c)
477
+ when rubydoesenc
478
+ u = c.ord
479
+ surrenc(t, u)
480
+ else
481
+ u, size = uchardec(s, r)
482
+ r += size - 1 # we add one more at the bottom of the loop
483
+ surrenc(t, u)
484
+ end
485
+ end
486
+ r += 1
487
+ end
488
+ t.putc(?")
489
+ t.string
490
+ end
491
+
492
+
493
+ def surrenc(t, u)
494
+ if u < 0x10000
495
+ t.print('\\u')
496
+ hexenc4(t, u)
497
+ else
498
+ u1, u2 = unsubst(u)
499
+ t.print('\\u')
500
+ hexenc4(t, u1)
501
+ t.print('\\u')
502
+ hexenc4(t, u2)
503
+ end
504
+ end
505
+
506
+
507
+ def hexenc4(t, u)
508
+ t.putc(Hex[(u>>12)&0xf])
509
+ t.putc(Hex[(u>>8)&0xf])
510
+ t.putc(Hex[(u>>4)&0xf])
511
+ t.putc(Hex[u&0xf])
512
+ end
513
+
514
+
515
+ def numenc(x)
516
+ if ((x.nan? || x.infinite?) rescue false)
517
+ raise Error, "Numeric cannot be represented: #{x}"
518
+ end
519
+ "#{x}"
520
+ end
521
+
522
+
523
+ # Decodes unicode character u from UTF-8
524
+ # bytes in string s at position i.
525
+ # Returns u and the number of bytes read.
526
+ def uchardec(s, i)
527
+ n = s.length - i
528
+ return [Ucharerr, 1] if n < 1
529
+
530
+ c0 = s[i].ord
531
+
532
+ # 1-byte, 7-bit sequence?
533
+ if c0 < Utagx
534
+ return [c0, 1]
535
+ end
536
+
537
+ # unexpected continuation byte?
538
+ return [Ucharerr, 1] if c0 < Utag2
539
+
540
+ # need continuation byte
541
+ return [Ucharerr, 1] if n < 2
542
+ c1 = s[i+1].ord
543
+ return [Ucharerr, 1] if c1 < Utagx || Utag2 <= c1
544
+
545
+ # 2-byte, 11-bit sequence?
546
+ if c0 < Utag3
547
+ u = (c0&Umask2)<<6 | (c1&Umaskx)
548
+ return [Ucharerr, 1] if u <= Uchar1max
549
+ return [u, 2]
550
+ end
551
+
552
+ # need second continuation byte
553
+ return [Ucharerr, 1] if n < 3
554
+ c2 = s[i+2].ord
555
+ return [Ucharerr, 1] if c2 < Utagx || Utag2 <= c2
556
+
557
+ # 3-byte, 16-bit sequence?
558
+ if c0 < Utag4
559
+ u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
560
+ return [Ucharerr, 1] if u <= Uchar2max
561
+ return [u, 3]
562
+ end
563
+
564
+ # need third continuation byte
565
+ return [Ucharerr, 1] if n < 4
566
+ c3 = s[i+3].ord
567
+ return [Ucharerr, 1] if c3 < Utagx || Utag2 <= c3
568
+
569
+ # 4-byte, 21-bit sequence?
570
+ if c0 < Utag5
571
+ u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
572
+ return [Ucharerr, 1] if u <= Uchar3max
573
+ return [u, 4]
574
+ end
575
+
576
+ return [Ucharerr, 1]
577
+ end
578
+
579
+
580
+ class Error < ::StandardError
581
+ end
582
+
583
+
584
+ Utagx = 0x80 # 1000 0000
585
+ Utag2 = 0xc0 # 1100 0000
586
+ Utag3 = 0xe0 # 1110 0000
587
+ Utag4 = 0xf0 # 1111 0000
588
+ Utag5 = 0xF8 # 1111 1000
589
+ Umaskx = 0x3f # 0011 1111
590
+ Umask2 = 0x1f # 0001 1111
591
+ Umask3 = 0x0f # 0000 1111
592
+ Umask4 = 0x07 # 0000 0111
593
+ Uchar1max = (1<<7) - 1
594
+ Uchar2max = (1<<11) - 1
595
+ Uchar3max = (1<<16) - 1
596
+ Ucharerr = 0xFFFD # unicode "replacement char"
597
+ Usurrself = 0x10000
598
+ Usurr1 = 0xd800
599
+ Usurr2 = 0xdc00
600
+ Usurr3 = 0xe000
601
+ Umax = 0x10ffff
602
+
603
+ Spc = ' '[0]
604
+ Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
605
+ Hex = '0123456789abcdef'
606
+ end