multi_json 1.19.1 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,545 +0,0 @@
1
- # Copyright 2011, 2012 Keith Rarick
2
- #
3
- # Permission is hereby granted, free of charge, to any person obtaining a copy
4
- # of this software and associated documentation files (the "Software"), to deal
5
- # in the Software without restriction, including without limitation the rights
6
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- # copies of the Software, and to permit persons to whom the Software is
8
- # furnished to do so, subject to the following conditions:
9
- #
10
- # The above copyright notice and this permission notice shall be included in
11
- # all copies or substantial portions of the Software.
12
- #
13
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
- # THE SOFTWARE.
20
-
21
- # See https://github.com/kr/okjson for updates.
22
-
23
- require "stringio"
24
-
25
- module MultiJson
26
- # Some parts adapted from
27
- # https://golang.org/src/encoding/json/decode.go and
28
- # https://golang.org/src/unicode/utf8/utf8.go
29
- module OkJson
30
- Upstream = "45"
31
- extend self
32
-
33
- # Decodes a json document in string s and
34
- # returns the corresponding ruby value.
35
- # String s must be valid UTF-8. If you have
36
- # a string in some other encoding, convert
37
- # it first.
38
- #
39
- # String values in the resulting structure
40
- # will be UTF-8.
41
- def decode(s)
42
- ts = lex(s)
43
- v, ts = textparse(ts)
44
- raise Error, "trailing garbage" unless ts.empty?
45
-
46
- v
47
- end
48
-
49
- # Encodes x into a json text. It may contain only
50
- # Array, Hash, String, Numeric, true, false, nil.
51
- # (Note, this list excludes Symbol.)
52
- # X itself must be an Array or a Hash.
53
- # No other value can be encoded, and an error will
54
- # be raised if x contains any other value, such as
55
- # Nan, Infinity, Symbol, and Proc, or if a Hash key
56
- # is not a String.
57
- # Strings contained in x must be valid UTF-8.
58
- def encode(x)
59
- case x
60
- when Hash then objenc(x)
61
- when Array then arrenc(x)
62
- else
63
- raise Error, "root value must be an Array or a Hash"
64
- end
65
- end
66
-
67
- def valenc(x)
68
- case x
69
- when Hash then objenc(x)
70
- when Array then arrenc(x)
71
- when String then strenc(x)
72
- when Numeric then numenc(x)
73
- when true then "true"
74
- when false then "false"
75
- when nil then "null"
76
- else
77
- raise Error, "cannot encode #{x.class}: #{x.inspect}" unless x.respond_to?(:to_json)
78
-
79
- x.to_json
80
- end
81
- end
82
-
83
- private
84
-
85
- # Parses a "json text" in the sense of RFC 4627.
86
- # Returns the parsed value and any trailing tokens.
87
- # Note: this is almost the same as valparse,
88
- # except that it does not accept atomic values.
89
- def textparse(ts)
90
- raise Error, "empty" if ts.length <= 0
91
-
92
- typ, _, val = ts[0]
93
- case typ
94
- when "{" then objparse(ts)
95
- when "[" then arrparse(ts)
96
- else
97
- raise Error, "unexpected #{val.inspect}"
98
- end
99
- end
100
-
101
- # Parses a "value" in the sense of RFC 4627.
102
- # Returns the parsed value and any trailing tokens.
103
- def valparse(ts)
104
- raise Error, "empty" if ts.length <= 0
105
-
106
- typ, _, val = ts[0]
107
- case typ
108
- when "{" then objparse(ts)
109
- when "[" then arrparse(ts)
110
- when :val, :str then [val, ts[1..]]
111
- else
112
- raise Error, "unexpected #{val.inspect}"
113
- end
114
- end
115
-
116
- # Parses an "object" in the sense of RFC 4627.
117
- # Returns the parsed value and any trailing tokens.
118
- def objparse(ts)
119
- ts = eat("{", ts)
120
- obj = {}
121
-
122
- return obj, ts[1..] if ts[0][0] == "}"
123
-
124
- k, v, ts = pairparse(ts)
125
- obj[k] = v
126
-
127
- return obj, ts[1..] if ts[0][0] == "}"
128
-
129
- loop do
130
- ts = eat(",", ts)
131
-
132
- k, v, ts = pairparse(ts)
133
- obj[k] = v
134
-
135
- return obj, ts[1..] if ts[0][0] == "}"
136
- end
137
- end
138
-
139
- # Parses a "member" in the sense of RFC 4627.
140
- # Returns the parsed values and any trailing tokens.
141
- def pairparse(ts)
142
- (typ, _, k) = ts[0]
143
- ts = ts[1..]
144
- raise Error, "unexpected #{k.inspect}" if typ != :str
145
-
146
- ts = eat(":", ts)
147
- v, ts = valparse(ts)
148
- [k, v, ts]
149
- end
150
-
151
- # Parses an "array" in the sense of RFC 4627.
152
- # Returns the parsed value and any trailing tokens.
153
- def arrparse(ts)
154
- ts = eat("[", ts)
155
- arr = []
156
-
157
- return arr, ts[1..] if ts[0][0] == "]"
158
-
159
- v, ts = valparse(ts)
160
- arr << v
161
-
162
- return arr, ts[1..] if ts[0][0] == "]"
163
-
164
- loop do
165
- ts = eat(",", ts)
166
-
167
- v, ts = valparse(ts)
168
- arr << v
169
-
170
- return arr, ts[1..] if ts[0][0] == "]"
171
- end
172
- end
173
-
174
- def eat(typ, ts)
175
- raise Error, "expected #{typ} (got #{ts[0].inspect})" if ts[0][0] != typ
176
-
177
- ts[1..]
178
- end
179
-
180
- # Scans s and returns a list of json tokens,
181
- # excluding white space (as defined in RFC 4627).
182
- def lex(s)
183
- ts = []
184
- until s.empty?
185
- typ, lexeme, val = tok(s)
186
- raise Error, "invalid character at #{s[0, 10].inspect}" if typ.nil?
187
-
188
- ts << [typ, lexeme, val] if typ != :space
189
- s = s[lexeme.length..]
190
- end
191
- ts
192
- end
193
-
194
- # Scans the first token in s and
195
- # returns a 3-element list, or nil
196
- # if s does not begin with a valid token.
197
- #
198
- # The first list element is one of
199
- # '{', '}', ':', ',', '[', ']',
200
- # :val, :str, and :space.
201
- #
202
- # The second element is the lexeme.
203
- #
204
- # The third element is the value of the
205
- # token for :val and :str, otherwise
206
- # it is the lexeme.
207
- def tok(s)
208
- case s[0]
209
- when "{" then ["{", s[0, 1], s[0, 1]]
210
- when "}" then ["}", s[0, 1], s[0, 1]]
211
- when ":" then [":", s[0, 1], s[0, 1]]
212
- when "," then [",", s[0, 1], s[0, 1]]
213
- when "[" then ["[", s[0, 1], s[0, 1]]
214
- when "]" then ["]", s[0, 1], s[0, 1]]
215
- when "n" then nulltok(s)
216
- when "t" then truetok(s)
217
- when "f" then falsetok(s)
218
- when '"' then strtok(s)
219
- when Spc, "\t", "\n", "\r" then [:space, s[0, 1], s[0, 1]]
220
- else
221
- numtok(s)
222
- end
223
- end
224
-
225
- def nulltok(s) = (s[0, 4] == "null") ? [:val, "null", nil] : []
226
-
227
- def truetok(s) = (s[0, 4] == "true") ? [:val, "true", true] : []
228
-
229
- def falsetok(s) = (s[0, 5] == "false") ? [:val, "false", false] : []
230
-
231
- def numtok(s)
232
- m = /(-?(?:[1-9][0-9]+|[0-9]))([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
233
- if m&.begin(0)&.zero?
234
- if !m[2] && !m[3]
235
- [:val, m[0], Integer(m[0])]
236
- elsif m[2]
237
- [:val, m[0], Float(m[0])]
238
- else
239
- [:val, m[0], Integer(m[1]) * (10**m[3][1..].to_i(10))]
240
- end
241
- else
242
- []
243
- end
244
- end
245
-
246
- def strtok(s)
247
- m = %r{"([^"\\]|\\["/\\bfnrt]|\\u[0-9a-fA-F]{4})*"}.match(s)
248
- raise Error, "invalid string literal at #{abbrev(s)}" unless m
249
-
250
- [:str, m[0], unquote(m[0])]
251
- end
252
-
253
- def abbrev(s)
254
- t = s[0, 10]
255
- p = t["`"]
256
- t = t[0, p] if p
257
- t += "..." if t.length < s.length
258
- "`" + t + "`"
259
- end
260
-
261
- # Converts a quoted json string literal q into a UTF-8-encoded string.
262
- # The rules are different than for Ruby, so we cannot use eval.
263
- # Unquote will raise an error if q contains control characters.
264
- def unquote(q)
265
- q = q[1...-1]
266
- a = q.dup # allocate a big enough string
267
- # In ruby >= 1.9, a[w] is a codepoint, not a byte.
268
- a.force_encoding("UTF-8") if rubydoesenc?
269
- r = 0
270
- w = 0
271
- while r < q.length
272
- c = q[r]
273
- if c == "\\"
274
- r += 1
275
- raise Error, "string literal ends with a \"\\\": \"#{q}\"" if r >= q.length
276
-
277
- case q[r]
278
- when '"', "\\", "/", "'"
279
- a[w] = q[r]
280
- r += 1
281
- w += 1
282
- when "b", "f", "n", "r", "t"
283
- a[w] = Unesc[q[r]]
284
- r += 1
285
- w += 1
286
- when "u"
287
- r += 1
288
- uchar = begin
289
- hexdec4(q[r, 4])
290
- rescue RuntimeError => e
291
- raise Error, "invalid escape sequence \\u#{q[r, 4]}: #{e}"
292
- end
293
- r += 4
294
- if surrogate?(uchar) && (q.length >= r + 6)
295
- uchar1 = hexdec4(q[r + 2, 4])
296
- uchar = subst(uchar, uchar1)
297
- if uchar != Ucharerr
298
- # A valid pair; consume.
299
- r += 6
300
- end
301
- end
302
- if rubydoesenc?
303
- a[w] = "" << uchar
304
- w += 1
305
- else
306
- w += ucharenc(a, w, uchar)
307
- end
308
- else
309
- raise Error, "invalid escape char #{q[r]} in \"#{q}\""
310
- end
311
- elsif c == '"' || c < Spc
312
- raise Error, "invalid character in string literal \"#{q}\""
313
- else
314
- # Copy anything else byte-for-byte.
315
- # Valid UTF-8 will remain valid UTF-8.
316
- # Invalid UTF-8 will remain invalid UTF-8.
317
- # In ruby >= 1.9, c is a codepoint, not a byte,
318
- # in which case this is still what we want.
319
- a[w] = c
320
- r += 1
321
- w += 1
322
- end
323
- end
324
- a[0, w]
325
- end
326
-
327
- # Encodes unicode character u as UTF-8
328
- # bytes in string a at position i.
329
- # Returns the number of bytes written.
330
- def ucharenc(a, i, u)
331
- if u <= Uchar1max
332
- a[i] = (u & 0xff).chr
333
- 1
334
- elsif u <= Uchar2max
335
- a[i + 0] = (Utag2 | ((u >> 6) & 0xff)).chr
336
- a[i + 1] = (Utagx | (u & Umaskx)).chr
337
- 2
338
- elsif u <= Uchar3max
339
- a[i + 0] = (Utag3 | ((u >> 12) & 0xff)).chr
340
- a[i + 1] = (Utagx | ((u >> 6) & Umaskx)).chr
341
- a[i + 2] = (Utagx | (u & Umaskx)).chr
342
- 3
343
- else
344
- a[i + 0] = (Utag4 | ((u >> 18) & 0xff)).chr
345
- a[i + 1] = (Utagx | ((u >> 12) & Umaskx)).chr
346
- a[i + 2] = (Utagx | ((u >> 6) & Umaskx)).chr
347
- a[i + 3] = (Utagx | (u & Umaskx)).chr
348
- 4
349
- end
350
- end
351
-
352
- def hexdec4(s)
353
- raise Error, "short" if s.length != 4
354
-
355
- (nibble(s[0]) << 12) | (nibble(s[1]) << 8) | (nibble(s[2]) << 4) | nibble(s[3])
356
- end
357
-
358
- def subst(u1, u2)
359
- return ((u1 - Usurr1) << 10) | ((u2 - Usurr2) + Usurrself) if u1 >= Usurr1 && u1 < Usurr2 && u2 >= Usurr2 && u2 < Usurr3
360
-
361
- Ucharerr
362
- end
363
-
364
- def surrogate?(u)
365
- u >= Usurr1 && u < Usurr3
366
- end
367
-
368
- def nibble(c)
369
- if c >= "0" && c <= "9" then c.ord - "0".ord
370
- elsif c >= "a" && c <= "z" then c.ord - "a".ord + 10
371
- elsif c >= "A" && c <= "Z" then c.ord - "A".ord + 10
372
- else
373
- raise Error, "invalid hex code #{c}"
374
- end
375
- end
376
-
377
- def objenc(x)
378
- "{" + x.map { |k, v| keyenc(k) + ":" + valenc(v) }.join(",") + "}"
379
- end
380
-
381
- def arrenc(a)
382
- "[" + a.map { |x| valenc(x) }.join(",") + "]"
383
- end
384
-
385
- def keyenc(k)
386
- case k
387
- when String then strenc(k)
388
- else
389
- raise Error, "Hash key is not a string: #{k.inspect}"
390
- end
391
- end
392
-
393
- def strenc(s)
394
- t = StringIO.new(String.new(encoding: Encoding::UTF_8))
395
- t.putc('"')
396
- r = 0
397
-
398
- while r < s.length
399
- case s[r]
400
- when '"' then t.print('\\"')
401
- when "\\" then t.print("\\\\")
402
- when "\b" then t.print('\\b')
403
- when "\f" then t.print('\\f')
404
- when "\n" then t.print('\\n')
405
- when "\r" then t.print('\\r')
406
- when "\t" then t.print('\\t')
407
- else
408
- c = s[r]
409
- # In ruby >= 1.9, s[r] is a codepoint, not a byte.
410
- if rubydoesenc?
411
- begin
412
- # c.ord will raise an error if c is invalid UTF-8
413
- c = "\\u%04x" % [c.ord] if c.ord < Spc.ord
414
- t.write(c)
415
- rescue
416
- t.write(Ustrerr)
417
- end
418
- elsif c < Spc
419
- t.write("\\u%04x" % c)
420
- elsif c >= Spc && c <= "~"
421
- t.putc(c)
422
- else
423
- n = ucharcopy(t, s, r) # ensure valid UTF-8 output
424
- r += n - 1 # r is incremented below
425
- end
426
- end
427
- r += 1
428
- end
429
- t.putc('"')
430
- t.string
431
- end
432
-
433
- def numenc(x)
434
- raise Error, "Numeric cannot be represented: #{x}" if (x.nan? || x.infinite? rescue false)
435
-
436
- x.to_s
437
- end
438
-
439
- # Copies the valid UTF-8 bytes of a single character
440
- # from string s at position i to I/O object t, and
441
- # returns the number of bytes copied.
442
- # If no valid UTF-8 char exists at position i,
443
- # ucharcopy writes Ustrerr and returns 1.
444
- def ucharcopy(t, s, i)
445
- n = s.length - i
446
- raise Utf8Error if n < 1
447
-
448
- c0 = s[i].ord
449
-
450
- # 1-byte, 7-bit sequence?
451
- if c0 < Utagx
452
- t.putc(c0)
453
- return 1
454
- end
455
-
456
- raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
457
-
458
- raise Utf8Error if n < 2 # need continuation byte
459
-
460
- c1 = s[i + 1].ord
461
- raise Utf8Error if c1 < Utagx || c1 >= Utag2
462
-
463
- # 2-byte, 11-bit sequence?
464
- if c0 < Utag3
465
- raise Utf8Error if (((c0 & Umask2) << 6) | (c1 & Umaskx)) <= Uchar1max
466
-
467
- t.putc(c0)
468
- t.putc(c1)
469
- return 2
470
- end
471
-
472
- # need second continuation byte
473
- raise Utf8Error if n < 3
474
-
475
- c2 = s[i + 2].ord
476
- raise Utf8Error if c2 < Utagx || c2 >= Utag2
477
-
478
- # 3-byte, 16-bit sequence?
479
- if c0 < Utag4
480
- u = ((c0 & Umask3) << 12) | ((c1 & Umaskx) << 6) | (c2 & Umaskx)
481
- raise Utf8Error if u <= Uchar2max
482
-
483
- t.putc(c0)
484
- t.putc(c1)
485
- t.putc(c2)
486
- return 3
487
- end
488
-
489
- # need third continuation byte
490
- raise Utf8Error if n < 4
491
-
492
- c3 = s[i + 3].ord
493
- raise Utf8Error if c3 < Utagx || c3 >= Utag2
494
-
495
- # 4-byte, 21-bit sequence?
496
- if c0 < Utag5
497
- u = ((c0 & Umask4) << 18) | ((c1 & Umaskx) << 12) | ((c2 & Umaskx) << 6) | (c3 & Umaskx)
498
- raise Utf8Error if u <= Uchar3max
499
-
500
- t.putc(c0)
501
- t.putc(c1)
502
- t.putc(c2)
503
- t.putc(c3)
504
- return 4
505
- end
506
-
507
- raise Utf8Error
508
- rescue Utf8Error
509
- t.write(Ustrerr)
510
- 1
511
- end
512
-
513
- def rubydoesenc?
514
- ::String.method_defined?(:force_encoding)
515
- end
516
-
517
- class Utf8Error < ::StandardError
518
- end
519
-
520
- class Error < ::StandardError
521
- end
522
-
523
- Utagx = 0b1000_0000
524
- Utag2 = 0b1100_0000
525
- Utag3 = 0b1110_0000
526
- Utag4 = 0b1111_0000
527
- Utag5 = 0b1111_1000
528
- Umaskx = 0b0011_1111
529
- Umask2 = 0b0001_1111
530
- Umask3 = 0b0000_1111
531
- Umask4 = 0b0000_0111
532
- Uchar1max = (1 << 7) - 1
533
- Uchar2max = (1 << 11) - 1
534
- Uchar3max = (1 << 16) - 1
535
- Ucharerr = 0xFFFD # unicode "replacement char"
536
- Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
537
- Usurrself = 0x10000
538
- Usurr1 = 0xd800
539
- Usurr2 = 0xdc00
540
- Usurr3 = 0xe000
541
-
542
- Spc = " "[0]
543
- Unesc = {"b" => "\b", "f" => "\f", "n" => "\n", "r" => "\r", "t" => "\t"}
544
- end
545
- end