conify 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright 2011, 2012 Keith Rarick
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ # See https://github.com/kr/okjson for updates.
24
+
25
+ require 'stringio'
26
+
27
+ # Some parts adapted from
28
+ # http://golang.org/src/pkg/json/decode.go and
29
+ # http://golang.org/src/pkg/utf8/utf8.go
30
+ module OkJson
31
+ extend self
32
+
33
+
34
+ # Decodes a json document in string s and
35
+ # returns the corresponding ruby value.
36
+ # String s must be valid UTF-8. If you have
37
+ # a string in some other encoding, convert
38
+ # it first.
39
+ #
40
+ # String values in the resulting structure
41
+ # will be UTF-8.
42
+ def decode(s)
43
+ ts = lex(s)
44
+ v, ts = textparse(ts)
45
+ if ts.length > 0
46
+ raise Error, 'trailing garbage'
47
+ end
48
+ v
49
+ end
50
+
51
+
52
+ # Parses a "json text" in the sense of RFC 4627.
53
+ # Returns the parsed value and any trailing tokens.
54
+ # Note: this is almost the same as valparse,
55
+ # except that it does not accept atomic values.
56
+ def textparse(ts)
57
+ if ts.length < 0
58
+ raise Error, 'empty'
59
+ end
60
+
61
+ typ, _, val = ts[0]
62
+ case typ
63
+ when '{' then objparse(ts)
64
+ when '[' then arrparse(ts)
65
+ else
66
+ raise Error, "unexpected #{val.inspect}"
67
+ end
68
+ end
69
+
70
+
71
+ # Parses a "value" in the sense of RFC 4627.
72
+ # Returns the parsed value and any trailing tokens.
73
+ def valparse(ts)
74
+ if ts.length < 0
75
+ raise Error, 'empty'
76
+ end
77
+
78
+ typ, _, val = ts[0]
79
+ case typ
80
+ when '{' then objparse(ts)
81
+ when '[' then arrparse(ts)
82
+ when :val,:str then [val, ts[1..-1]]
83
+ else
84
+ raise Error, "unexpected #{val.inspect}"
85
+ end
86
+ end
87
+
88
+
89
+ # Parses an "object" in the sense of RFC 4627.
90
+ # Returns the parsed value and any trailing tokens.
91
+ def objparse(ts)
92
+ ts = eat('{', ts)
93
+ obj = {}
94
+
95
+ if ts[0][0] == '}'
96
+ return obj, ts[1..-1]
97
+ end
98
+
99
+ k, v, ts = pairparse(ts)
100
+ obj[k] = v
101
+
102
+ if ts[0][0] == '}'
103
+ return obj, ts[1..-1]
104
+ end
105
+
106
+ loop do
107
+ ts = eat(',', ts)
108
+
109
+ k, v, ts = pairparse(ts)
110
+ obj[k] = v
111
+
112
+ if ts[0][0] == '}'
113
+ return obj, ts[1..-1]
114
+ end
115
+ end
116
+ end
117
+
118
+
119
+ # Parses a "member" in the sense of RFC 4627.
120
+ # Returns the parsed values and any trailing tokens.
121
+ def pairparse(ts)
122
+ (typ, _, k), ts = ts[0], ts[1..-1]
123
+ if typ != :str
124
+ raise Error, "unexpected #{k.inspect}"
125
+ end
126
+ ts = eat(':', ts)
127
+ v, ts = valparse(ts)
128
+ [k, v, ts]
129
+ end
130
+
131
+
132
+ # Parses an "array" in the sense of RFC 4627.
133
+ # Returns the parsed value and any trailing tokens.
134
+ def arrparse(ts)
135
+ ts = eat('[', ts)
136
+ arr = []
137
+
138
+ if ts[0][0] == ']'
139
+ return arr, ts[1..-1]
140
+ end
141
+
142
+ v, ts = valparse(ts)
143
+ arr << v
144
+
145
+ if ts[0][0] == ']'
146
+ return arr, ts[1..-1]
147
+ end
148
+
149
+ loop do
150
+ ts = eat(',', ts)
151
+
152
+ v, ts = valparse(ts)
153
+ arr << v
154
+
155
+ if ts[0][0] == ']'
156
+ return arr, ts[1..-1]
157
+ end
158
+ end
159
+ end
160
+
161
+
162
+ def eat(typ, ts)
163
+ if ts[0][0] != typ
164
+ raise Error, "expected #{typ} (got #{ts[0].inspect})"
165
+ end
166
+ ts[1..-1]
167
+ end
168
+
169
+
170
+ # Scans s and returns a list of json tokens,
171
+ # excluding white space (as defined in RFC 4627).
172
+ def lex(s)
173
+ ts = []
174
+ while s.length > 0
175
+ typ, lexeme, val = tok(s)
176
+ if typ == nil
177
+ raise Error, "invalid character at #{s[0,10].inspect}"
178
+ end
179
+ if typ != :space
180
+ ts << [typ, lexeme, val]
181
+ end
182
+ s = s[lexeme.length..-1]
183
+ end
184
+ ts
185
+ end
186
+
187
+
188
+ # Scans the first token in s and
189
+ # returns a 3-element list, or nil
190
+ # if s does not begin with a valid token.
191
+ #
192
+ # The first list element is one of
193
+ # '{', '}', ':', ',', '[', ']',
194
+ # :val, :str, and :space.
195
+ #
196
+ # The second element is the lexeme.
197
+ #
198
+ # The third element is the value of the
199
+ # token for :val and :str, otherwise
200
+ # it is the lexeme.
201
+ def tok(s)
202
+ case s[0]
203
+ when ?{ then ['{', s[0,1], s[0,1]]
204
+ when ?} then ['}', s[0,1], s[0,1]]
205
+ when ?: then [':', s[0,1], s[0,1]]
206
+ when ?, then [',', s[0,1], s[0,1]]
207
+ when ?[ then ['[', s[0,1], s[0,1]]
208
+ when ?] then [']', s[0,1], s[0,1]]
209
+ when ?n then nulltok(s)
210
+ when ?t then truetok(s)
211
+ when ?f then falsetok(s)
212
+ when ?" then strtok(s)
213
+ when Spc then [:space, s[0,1], s[0,1]]
214
+ when ?\t then [:space, s[0,1], s[0,1]]
215
+ when ?\n then [:space, s[0,1], s[0,1]]
216
+ when ?\r then [:space, s[0,1], s[0,1]]
217
+ else numtok(s)
218
+ end
219
+ end
220
+
221
+
222
+ def nulltok(s); s[0,4] == 'null' && [:val, 'null', nil] end
223
+ def truetok(s); s[0,4] == 'true' && [:val, 'true', true] end
224
+ def falsetok(s); s[0,5] == 'false' && [:val, 'false', false] end
225
+
226
+
227
+ def numtok(s)
228
+ m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
229
+ if m && m.begin(0) == 0
230
+ if m[3] && !m[2]
231
+ [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
232
+ elsif m[2]
233
+ [:val, m[0], Float(m[0])]
234
+ else
235
+ [:val, m[0], Integer(m[0])]
236
+ end
237
+ end
238
+ end
239
+
240
+
241
+ def strtok(s)
242
+ m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
243
+ if ! m
244
+ raise Error, "invalid string literal at #{abbrev(s)}"
245
+ end
246
+ [:str, m[0], unquote(m[0])]
247
+ end
248
+
249
+
250
+ def abbrev(s)
251
+ t = s[0,10]
252
+ p = t['`']
253
+ t = t[0,p] if p
254
+ t = t + '...' if t.length < s.length
255
+ '`' + t + '`'
256
+ end
257
+
258
+
259
+ # Converts a quoted json string literal q into a UTF-8-encoded string.
260
+ # The rules are different than for Ruby, so we cannot use eval.
261
+ # Unquote will raise an error if q contains control characters.
262
+ def unquote(q)
263
+ q = q[1...-1]
264
+ a = q.dup # allocate a big enough string
265
+ rubydoesenc = false
266
+ # In ruby >= 1.9, a[w] is a codepoint, not a byte.
267
+ if a.class.method_defined?(:force_encoding)
268
+ a.force_encoding('UTF-8')
269
+ rubydoesenc = true
270
+ end
271
+ r, w = 0, 0
272
+ while r < q.length
273
+ c = q[r]
274
+ case true
275
+ when c == ?\\
276
+ r += 1
277
+ if r >= q.length
278
+ raise Error, "string literal ends with a \"\\\": \"#{q}\""
279
+ end
280
+
281
+ case q[r]
282
+ when ?",?\\,?/,?'
283
+ a[w] = q[r]
284
+ r += 1
285
+ w += 1
286
+ when ?b,?f,?n,?r,?t
287
+ a[w] = Unesc[q[r]]
288
+ r += 1
289
+ w += 1
290
+ when ?u
291
+ r += 1
292
+ uchar = begin
293
+ hexdec4(q[r,4])
294
+ rescue RuntimeError => e
295
+ raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
296
+ end
297
+ r += 4
298
+ if surrogate? uchar
299
+ if q.length >= r+6
300
+ uchar1 = hexdec4(q[r+2,4])
301
+ uchar = subst(uchar, uchar1)
302
+ if uchar != Ucharerr
303
+ # A valid pair; consume.
304
+ r += 6
305
+ end
306
+ end
307
+ end
308
+ if rubydoesenc
309
+ a[w] = '' << uchar
310
+ w += 1
311
+ else
312
+ w += ucharenc(a, w, uchar)
313
+ end
314
+ else
315
+ raise Error, "invalid escape char #{q[r]} in \"#{q}\""
316
+ end
317
+ when c == ?", c < Spc
318
+ raise Error, "invalid character in string literal \"#{q}\""
319
+ else
320
+ # Copy anything else byte-for-byte.
321
+ # Valid UTF-8 will remain valid UTF-8.
322
+ # Invalid UTF-8 will remain invalid UTF-8.
323
+ # In ruby >= 1.9, c is a codepoint, not a byte,
324
+ # in which case this is still what we want.
325
+ a[w] = c
326
+ r += 1
327
+ w += 1
328
+ end
329
+ end
330
+ a[0,w]
331
+ end
332
+
333
+
334
+ # Encodes unicode character u as UTF-8
335
+ # bytes in string a at position i.
336
+ # Returns the number of bytes written.
337
+ def ucharenc(a, i, u)
338
+ case true
339
+ when u <= Uchar1max
340
+ a[i] = (u & 0xff).chr
341
+ 1
342
+ when u <= Uchar2max
343
+ a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
344
+ a[i+1] = (Utagx | (u&Umaskx)).chr
345
+ 2
346
+ when u <= Uchar3max
347
+ a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
348
+ a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
349
+ a[i+2] = (Utagx | (u&Umaskx)).chr
350
+ 3
351
+ else
352
+ a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
353
+ a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
354
+ a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
355
+ a[i+3] = (Utagx | (u&Umaskx)).chr
356
+ 4
357
+ end
358
+ end
359
+
360
+
361
+ def hexdec4(s)
362
+ if s.length != 4
363
+ raise Error, 'short'
364
+ end
365
+ (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
366
+ end
367
+
368
+
369
+ def subst(u1, u2)
370
+ if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
371
+ return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
372
+ end
373
+ return Ucharerr
374
+ end
375
+
376
+
377
+ def unsubst(u)
378
+ if u < Usurrself || u > Umax || surrogate?(u)
379
+ return Ucharerr, Ucharerr
380
+ end
381
+ u -= Usurrself
382
+ [Usurr1 + ((u>>10)&0x3ff), Usurr2 + (u&0x3ff)]
383
+ end
384
+
385
+
386
+ def surrogate?(u)
387
+ Usurr1 <= u && u < Usurr3
388
+ end
389
+
390
+
391
+ def nibble(c)
392
+ case true
393
+ when ?0 <= c && c <= ?9 then c.ord - ?0.ord
394
+ when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
395
+ when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
396
+ else
397
+ raise Error, "invalid hex code #{c}"
398
+ end
399
+ end
400
+
401
+
402
+ # Encodes x into a json text. It may contain only
403
+ # Array, Hash, String, Numeric, true, false, nil.
404
+ # (Note, this list excludes Symbol.)
405
+ # X itself must be an Array or a Hash.
406
+ # No other value can be encoded, and an error will
407
+ # be raised if x contains any other value, such as
408
+ # Nan, Infinity, Symbol, and Proc, or if a Hash key
409
+ # is not a String.
410
+ # Strings contained in x must be valid UTF-8.
411
+ def encode(x)
412
+ case x
413
+ when Hash then objenc(x)
414
+ when Array then arrenc(x)
415
+ else
416
+ raise Error, 'root value must be an Array or a Hash'
417
+ end
418
+ end
419
+
420
+
421
+ def valenc(x)
422
+ case x
423
+ when Hash then objenc(x)
424
+ when Array then arrenc(x)
425
+ when String then strenc(x)
426
+ when Numeric then numenc(x)
427
+ when true then "true"
428
+ when false then "false"
429
+ when nil then "null"
430
+ else
431
+ raise Error, "cannot encode #{x.class}: #{x.inspect}"
432
+ end
433
+ end
434
+
435
+
436
+ def objenc(x)
437
+ '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
438
+ end
439
+
440
+
441
+ def arrenc(a)
442
+ '[' + a.map{|x| valenc(x)}.join(',') + ']'
443
+ end
444
+
445
+
446
+ def keyenc(k)
447
+ case k
448
+ when String then strenc(k)
449
+ else
450
+ raise Error, "Hash key is not a string: #{k.inspect}"
451
+ end
452
+ end
453
+
454
+
455
+ def strenc(s)
456
+ t = StringIO.new
457
+ t.putc(?")
458
+ r = 0
459
+
460
+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
461
+ rubydoesenc = s.class.method_defined?(:encoding)
462
+
463
+ while r < s.length
464
+ case s[r]
465
+ when ?" then t.print('\\"')
466
+ when ?\\ then t.print('\\\\')
467
+ when ?\b then t.print('\\b')
468
+ when ?\f then t.print('\\f')
469
+ when ?\n then t.print('\\n')
470
+ when ?\r then t.print('\\r')
471
+ when ?\t then t.print('\\t')
472
+ else
473
+ c = s[r]
474
+ case true
475
+ when Spc <= c && c <= ?~
476
+ t.putc(c)
477
+ when rubydoesenc
478
+ u = c.ord
479
+ surrenc(t, u)
480
+ else
481
+ u, size = uchardec(s, r)
482
+ r += size - 1 # we add one more at the bottom of the loop
483
+ surrenc(t, u)
484
+ end
485
+ end
486
+ r += 1
487
+ end
488
+ t.putc(?")
489
+ t.string
490
+ end
491
+
492
+
493
+ def surrenc(t, u)
494
+ if u < 0x10000
495
+ t.print('\\u')
496
+ hexenc4(t, u)
497
+ else
498
+ u1, u2 = unsubst(u)
499
+ t.print('\\u')
500
+ hexenc4(t, u1)
501
+ t.print('\\u')
502
+ hexenc4(t, u2)
503
+ end
504
+ end
505
+
506
+
507
+ def hexenc4(t, u)
508
+ t.putc(Hex[(u>>12)&0xf])
509
+ t.putc(Hex[(u>>8)&0xf])
510
+ t.putc(Hex[(u>>4)&0xf])
511
+ t.putc(Hex[u&0xf])
512
+ end
513
+
514
+
515
+ def numenc(x)
516
+ if ((x.nan? || x.infinite?) rescue false)
517
+ raise Error, "Numeric cannot be represented: #{x}"
518
+ end
519
+ "#{x}"
520
+ end
521
+
522
+
523
+ # Decodes unicode character u from UTF-8
524
+ # bytes in string s at position i.
525
+ # Returns u and the number of bytes read.
526
+ def uchardec(s, i)
527
+ n = s.length - i
528
+ return [Ucharerr, 1] if n < 1
529
+
530
+ c0 = s[i].ord
531
+
532
+ # 1-byte, 7-bit sequence?
533
+ if c0 < Utagx
534
+ return [c0, 1]
535
+ end
536
+
537
+ # unexpected continuation byte?
538
+ return [Ucharerr, 1] if c0 < Utag2
539
+
540
+ # need continuation byte
541
+ return [Ucharerr, 1] if n < 2
542
+ c1 = s[i+1].ord
543
+ return [Ucharerr, 1] if c1 < Utagx || Utag2 <= c1
544
+
545
+ # 2-byte, 11-bit sequence?
546
+ if c0 < Utag3
547
+ u = (c0&Umask2)<<6 | (c1&Umaskx)
548
+ return [Ucharerr, 1] if u <= Uchar1max
549
+ return [u, 2]
550
+ end
551
+
552
+ # need second continuation byte
553
+ return [Ucharerr, 1] if n < 3
554
+ c2 = s[i+2].ord
555
+ return [Ucharerr, 1] if c2 < Utagx || Utag2 <= c2
556
+
557
+ # 3-byte, 16-bit sequence?
558
+ if c0 < Utag4
559
+ u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
560
+ return [Ucharerr, 1] if u <= Uchar2max
561
+ return [u, 3]
562
+ end
563
+
564
+ # need third continuation byte
565
+ return [Ucharerr, 1] if n < 4
566
+ c3 = s[i+3].ord
567
+ return [Ucharerr, 1] if c3 < Utagx || Utag2 <= c3
568
+
569
+ # 4-byte, 21-bit sequence?
570
+ if c0 < Utag5
571
+ u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
572
+ return [Ucharerr, 1] if u <= Uchar3max
573
+ return [u, 4]
574
+ end
575
+
576
+ return [Ucharerr, 1]
577
+ end
578
+
579
+
580
+ class Error < ::StandardError
581
+ end
582
+
583
+
584
+ Utagx = 0x80 # 1000 0000
585
+ Utag2 = 0xc0 # 1100 0000
586
+ Utag3 = 0xe0 # 1110 0000
587
+ Utag4 = 0xf0 # 1111 0000
588
+ Utag5 = 0xF8 # 1111 1000
589
+ Umaskx = 0x3f # 0011 1111
590
+ Umask2 = 0x1f # 0001 1111
591
+ Umask3 = 0x0f # 0000 1111
592
+ Umask4 = 0x07 # 0000 0111
593
+ Uchar1max = (1<<7) - 1
594
+ Uchar2max = (1<<11) - 1
595
+ Uchar3max = (1<<16) - 1
596
+ Ucharerr = 0xFFFD # unicode "replacement char"
597
+ Usurrself = 0x10000
598
+ Usurr1 = 0xd800
599
+ Usurr2 = 0xdc00
600
+ Usurr3 = 0xe000
601
+ Umax = 0x10ffff
602
+
603
+ Spc = ' '[0]
604
+ Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
605
+ Hex = '0123456789abcdef'
606
+ end