RFC8259 8259
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +61 -0
- data/.rspec +4 -0
- data/.yardopts +2 -0
- data/Gemfile +58 -0
- data/LICENSE.txt +40 -0
- data/README.txt +1 -0
- data/RFC8259 +899 -0
- data/RFC8259.gemspec +89 -0
- data/Rakefile +86 -0
- data/lib/RFC8259.rb +94 -0
- data/lib/RFC8259/array.rb +136 -0
- data/lib/RFC8259/dumper.rb +255 -0
- data/lib/RFC8259/false.rb +79 -0
- data/lib/RFC8259/null.rb +79 -0
- data/lib/RFC8259/number.rb +149 -0
- data/lib/RFC8259/object.rb +137 -0
- data/lib/RFC8259/parser.ry +275 -0
- data/lib/RFC8259/string.rb +226 -0
- data/lib/RFC8259/true.rb +79 -0
- data/lib/RFC8259/value.rb +96 -0
- data/lib/RFC8259/version.rb +46 -0
- data/spec/RFC8259_spec.rb +305 -0
- data/spec/acceptance/README.txt +66 -0
- data/spec/acceptance/invalid/0001-ws/0001-verical-tab.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0002-null.txt +0 -0
- data/spec/acceptance/invalid/0001-ws/0003-space-in-number-1.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0004-space-in-number-2.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0005-space-in-number-3.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0006-space-in-number-4.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0007-space-in-number-5.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0008-space-in-number-6.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0009-space-in-literal.txt +1 -0
- data/spec/acceptance/invalid/0001-ws/0010-Unicode-LINE_SEPARATOR.txt +1 -0
- data/spec/acceptance/invalid/0002-comments/0001-C-style.txt +1 -0
- data/spec/acceptance/invalid/0002-comments/0002-C-plusplus-style.txt +2 -0
- data/spec/acceptance/invalid/0002-comments/0003-sh-style.txt +2 -0
- data/spec/acceptance/invalid/0002-comments/0004-python-docstring.txt +3 -0
- data/spec/acceptance/invalid/0002-comments/0005-SQL-style.txt +2 -0
- data/spec/acceptance/invalid/0002-comments/0006-BASIC-style.txt +2 -0
- data/spec/acceptance/invalid/0003-literals/0001-FALSE.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0002-NULL.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0003-TRUE.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0004-NUL.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0005-nil.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0006-undef.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0007-modifier.txt +1 -0
- data/spec/acceptance/invalid/0003-literals/0008-undefined.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0001-omit-zero.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0002-minus-dot.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0003-missing-frac.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0004-missing-exp.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0005-octal.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0006-hexadecimal.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0007-comma.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0008-perl-underscore.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0009-NaN.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0010-Inf.txt +1 -0
- data/spec/acceptance/invalid/0004-numbers/0011-Infinity.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0000-NUL.txt +0 -0
- data/spec/acceptance/invalid/0005-strings/0001-not-terminated.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0002-single-quote.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0003-back-quote.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0004-carriage-return.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0005-line-feed.txt +2 -0
- data/spec/acceptance/invalid/0005-strings/0006-unknown-escape-a.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0007-unknown-escape-perl-style.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0008-unknown-escape-C-style.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0009-unknown-escape-ruby-style.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0010-escape-too-short.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0011-C-string-concat.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0012-perl-string-concat.txt +1 -0
- data/spec/acceptance/invalid/0005-strings/0013-Java-string-concat.txt +1 -0
- data/spec/acceptance/invalid/0006-encodings/0001-CESU-8.txt +1 -0
- data/spec/acceptance/invalid/0006-encodings/0002-Windows-31J.txt +1 -0
- data/spec/acceptance/invalid/0006-encodings/0003-EBCDIC.txt +1 -0
- data/spec/acceptance/invalid/0006-encodings/0004-overlong-utf8.txt +1 -0
- data/spec/acceptance/invalid/0007-arrays/0001-lacks-open.txt +1 -0
- data/spec/acceptance/invalid/0007-arrays/0002-lacks-close.txt +1 -0
- data/spec/acceptance/invalid/0007-arrays/0003-interleaving-parens.txt +1 -0
- data/spec/acceptance/invalid/0007-arrays/0004-dangling-comma.txt +1 -0
- data/spec/acceptance/invalid/0007-arrays/0005-missing-comma.txt +1 -0
- data/spec/acceptance/invalid/0007-arrays/0006-colon-instead-of-comma.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0001-key-missing.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0002-value-missing.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0003-true-key.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0004-false-key.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0005-null-key.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0006-numeric-key.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0007-array-key.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0008-hash-key.txt +1 -0
- data/spec/acceptance/invalid/0008-hashes/0009-key-not-escaped.txt +4 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0001-JSONP.txt +1 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0002-new-Array.txt +3 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0003-new-Date.txt +1 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0004-new-Error.txt +1 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0005-Math.txt +1 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0006-regular-expression.txt +1 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0007-function.txt +7 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0008-this.txt +1 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0009-plusplus.txt +3 -0
- data/spec/acceptance/invalid/0009-javascriptisms/0010-ternary-operator.txt +1 -0
- data/spec/acceptance/valid/0001-ws/0001-space.json +1 -0
- data/spec/acceptance/valid/0001-ws/0002-tab.json +1 -0
- data/spec/acceptance/valid/0001-ws/0003-lf.json +1 -0
- data/spec/acceptance/valid/0001-ws/0004-cr.json +1 -0
- data/spec/acceptance/valid/0001-ws/0005-before.json +1 -0
- data/spec/acceptance/valid/0001-ws/0006-after.json +1 -0
- data/spec/acceptance/valid/0001-ws/0007-around-comma.json +3 -0
- data/spec/acceptance/valid/0001-ws/0008-around-colon.json +3 -0
- data/spec/acceptance/valid/0002-bare-values/0001-false.json +1 -0
- data/spec/acceptance/valid/0002-bare-values/0002-null.json +1 -0
- data/spec/acceptance/valid/0002-bare-values/0003-true.json +1 -0
- data/spec/acceptance/valid/0002-bare-values/0004-number.json +1 -0
- data/spec/acceptance/valid/0002-bare-values/0005-string.json +1 -0
- data/spec/acceptance/valid/0003-literals/0001-false.json +1 -0
- data/spec/acceptance/valid/0003-literals/0002-null.json +1 -0
- data/spec/acceptance/valid/0003-literals/0003-true.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0000-zero.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0001-one.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0002-two.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0003-three.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0004-four.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0005-five.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0006-six.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0007-seven.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0008-eight.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0009-nine.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0010-ten.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0011-minus.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0012-fraction.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0013-exponent.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0014-exponent-minus.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0015-exponent-plus.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0016-complex.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0017-DBL_MAX.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0018-DBL_MIN.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0019-subnormal-number.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0020-1E400.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0021-pi.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0022-UINT32_MAX.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0023-UINT64_MAX.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0024-INT64_MIN.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0025-high-resolution-zero.json +1 -0
- data/spec/acceptance/valid/0004-numbers/0026-high-resolution-100.json +1 -0
- data/spec/acceptance/valid/0005-strings/0001-empty.json +1 -0
- data/spec/acceptance/valid/0005-strings/0002-basic-latin.json +1 -0
- data/spec/acceptance/valid/0005-strings/0003-escapes.json +1 -0
- data/spec/acceptance/valid/0005-strings/0004-raw-unicode.json +1 -0
- data/spec/acceptance/valid/0005-strings/0005-escaped-unicode.json +1 -0
- data/spec/acceptance/valid/0005-strings/0006-escaped-NUL.json +1 -0
- data/spec/acceptance/valid/0005-strings/0007-escaped-invalid-unicode-still-valid-as-json.json +1 -0
- data/spec/acceptance/valid/0005-strings/0008-ruby-json-gem-cant-handle-this.json +1 -0
- data/spec/acceptance/valid/0005-strings/0009-unescaped-invalid-javascript-still-valid-as-json.json +1 -0
- data/spec/acceptance/valid/0005-strings/0010-escaped-separated-surrogate.json +1 -0
- data/spec/acceptance/valid/0005-strings/0011-escaped-surrogate-then-normal.json +1 -0
- data/spec/acceptance/valid/0006-m17n/0001-genesis.json +6 -0
- data/spec/acceptance/valid/0006-m17n/0002-heart-sutra.json +5 -0
- data/spec/acceptance/valid/0006-m17n/0003-escaped-valid-surrogate-pair.json +1 -0
- data/spec/acceptance/valid/0006-m17n/0004-unescaped-valid-supplementary-multilingual-plane.json +1 -0
- data/spec/acceptance/valid/0007-arrays/0000-empty.json +1 -0
- data/spec/acceptance/valid/0007-arrays/0001-one-element.json +1 -0
- data/spec/acceptance/valid/0007-arrays/0002-multiple-elements.json +33 -0
- data/spec/acceptance/valid/0007-arrays/0003-various-types.json +1 -0
- data/spec/acceptance/valid/0007-arrays/0004-nested.json +17 -0
- data/spec/acceptance/valid/0008-hashes/0000-empty.json +1 -0
- data/spec/acceptance/valid/0008-hashes/0001-onekey.json +1 -0
- data/spec/acceptance/valid/0008-hashes/0002-many-keys.json +5 -0
- data/spec/acceptance/valid/0008-hashes/0003-empty-key.json +3 -0
- data/spec/acceptance/valid/0008-hashes/0004-true-value.json +3 -0
- data/spec/acceptance/valid/0008-hashes/0005-false-value.json +3 -0
- data/spec/acceptance/valid/0008-hashes/0006-null-value.json +3 -0
- data/spec/acceptance/valid/0008-hashes/0007-string-value.json +3 -0
- data/spec/acceptance/valid/0008-hashes/0008-numeric-value.json +3 -0
- data/spec/acceptance/valid/0008-hashes/0009-array-value.json +8 -0
- data/spec/acceptance/valid/0008-hashes/0010-hash-value.json +20 -0
- data/spec/acceptance/valid/0008-hashes/0011-duplicate-key.json +4 -0
- data/spec/acceptance/valid/0008-hashes/0012-duplicate-key-in-different-representations.json +4 -0
- data/spec/acceptance/valid/0009-complicated/0001-jsonschema.json +46 -0
- data/spec/acceptance/valid/0009-complicated/0002-example-in-RFC7159-section-13.json +14 -0
- data/spec/acceptance/valid/0009-complicated/0003-example-in-RFC7159-section-13.json +22 -0
- data/spec/spec_helper.rb +60 -0
- metadata +530 -0
@@ -0,0 +1,226 @@
|
|
1
|
+
#! /your/favourite/path/to/ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
|
5
|
+
#
|
6
|
+
# Redistribution and use in source and binary forms, with or without
|
7
|
+
# modification, are permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# - Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
#
|
12
|
+
# - Redistributions in binary form must reproduce the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer in
|
14
|
+
# the documentation and/or other materials provided with the
|
15
|
+
# distribution.
|
16
|
+
#
|
17
|
+
# - Neither the name of Internet Society, IETF or IETF Trust, nor the
|
18
|
+
# names of specific contributors, may be used to endorse or promote
|
19
|
+
# products derived from this software without specific prior written
|
20
|
+
# permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
|
23
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
24
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
25
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
26
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
27
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
28
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
29
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
30
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
31
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
32
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
# The Strings, as described in RFC8259 section 7.
|
35
|
+
class RFC8259::String < RFC8259::Value
|
36
|
+
# Parse the AST from parser, and convert into corrsponding values.
|
37
|
+
# @param [::Array] ast the AST, generated by the parser
|
38
|
+
# @return [String] evaluated instance
|
39
|
+
# @raise [ArgumentError] malformed input
|
40
|
+
def self.from_ast ast
|
41
|
+
type, *ary = *ast
|
42
|
+
raise ArgumentError, "not an object: #{ast.inspect}" if type != :string
|
43
|
+
new ary
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [::String] converte string
|
47
|
+
def plain_old_ruby_object
|
48
|
+
return @str
|
49
|
+
end
|
50
|
+
|
51
|
+
alias to_s plain_old_ruby_object
|
52
|
+
alias to_str plain_old_ruby_object
|
53
|
+
|
54
|
+
# @return [::String] the string, escaped
|
55
|
+
def inspect
|
56
|
+
sprintf "#<%p:%#016x %p>", self.class, self.object_id << 1, @str
|
57
|
+
end
|
58
|
+
|
59
|
+
# For pretty print
|
60
|
+
# @param [PP] pp the pp
|
61
|
+
def pretty_print pp
|
62
|
+
hdr = sprintf '#<%p:%#016x', self.class, self.object_id << 1
|
63
|
+
pp.group 1, hdr, '>' do
|
64
|
+
pp.text ' '
|
65
|
+
@str.pretty_print pp
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [string] original string
|
70
|
+
def to_json *;
|
71
|
+
# Here '"', which is UTF-8, and @orig, which might be UTF-16, should be
|
72
|
+
# aligned. We take UTF-8 because we are not interested in generating
|
73
|
+
# UTF-16 JSON and so on.
|
74
|
+
'"' << @orig.flatten.join('').encode(Encoding::UTF_8) << '"'
|
75
|
+
end
|
76
|
+
|
77
|
+
# String comparisons are defined in RFC8259 section 8.3. We follow that.
|
78
|
+
def == other
|
79
|
+
self.to_str == other.to_str
|
80
|
+
rescue NoMethodError
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
private_class_method:new
|
86
|
+
# @private
|
87
|
+
def initialize ary
|
88
|
+
@orig = ary
|
89
|
+
enc = ary[0][0].encoding rescue Encoding::US_ASCII # empty string
|
90
|
+
path1 = ary.map do |i|
|
91
|
+
case i when Array
|
92
|
+
# ['\\', 'u', 'F', 'F', 'E', 'E'] or something
|
93
|
+
case i[1].encode(Encoding::US_ASCII)
|
94
|
+
when "\x22" then 0x0022 # " quotation mark U+0022
|
95
|
+
when "\x5C" then 0x005C # \ reverse solidus U+005C
|
96
|
+
when "\x2F" then 0x002F # / solidus U+002F
|
97
|
+
when "\x62" then 0x0008 # b backspace U+0008
|
98
|
+
when "\x66" then 0x000C # f form feed U+000C
|
99
|
+
when "\x6E" then 0x000A # n line feed U+000A
|
100
|
+
when "\x72" then 0x000D # r carriage return U+000D
|
101
|
+
when "\x74" then 0x0009 # t tab U+0009
|
102
|
+
when "\x75" then # uXXXX U+XXXX
|
103
|
+
i[2..5].join.encode(Encoding::US_ASCII).to_i 16
|
104
|
+
else
|
105
|
+
raise "invalid escape: #{i.inspect}"
|
106
|
+
end
|
107
|
+
else
|
108
|
+
i.ord
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# RFC8259 section 8.1 states that the JSON text itself shall be written
|
113
|
+
# in a sort of Unicode. However the parsed JSON value's content strings
|
114
|
+
# are not always Unicode-valid, according to its section 8.2. Then what?
|
115
|
+
# It says nothing. Here, we try to preserve the JSON text's encoding
|
116
|
+
# i.e. if the JSON text is in UTF-16, we try UTF-16. If that doesn't
|
117
|
+
# fit, we give up and take BINARY.
|
118
|
+
buf = nil
|
119
|
+
path2 = path1.each_with_object Array.new do |i, r|
|
120
|
+
if buf.nil?
|
121
|
+
next buf = i
|
122
|
+
else
|
123
|
+
case buf when 0xD800..0xDBFF
|
124
|
+
case i when 0xDC00..0xDFFF
|
125
|
+
# valid surrogate pair
|
126
|
+
utf16str = [buf, i].pack 'nn'
|
127
|
+
utf16str.force_encoding Encoding::UTF_16BE
|
128
|
+
r << utf16str[0].ord
|
129
|
+
buf = nil # consumed
|
130
|
+
else
|
131
|
+
# buf is a garbage
|
132
|
+
r << buf
|
133
|
+
buf = i
|
134
|
+
end
|
135
|
+
else
|
136
|
+
# buf is a normal char
|
137
|
+
r << buf
|
138
|
+
buf = i
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
path2 << buf if buf # buf might remain
|
143
|
+
|
144
|
+
path3 = path2.each_with_object ''.b do |i, r|
|
145
|
+
case enc
|
146
|
+
when Encoding::UTF_32BE then j = [i].pack 'N'
|
147
|
+
when Encoding::UTF_32LE then j = [i].pack 'V'
|
148
|
+
when Encoding::UTF_16BE then j = [i].pack 'n'
|
149
|
+
when Encoding::UTF_16LE then j = [i].pack 'v'
|
150
|
+
else j = [i].pack 'U' # sort of UTF-8
|
151
|
+
end
|
152
|
+
r << j.b
|
153
|
+
end
|
154
|
+
path4 = path3.dup.force_encoding enc
|
155
|
+
# @str = path4.valid_encoding? ? path4 : path3
|
156
|
+
@str = path4
|
157
|
+
@str.freeze
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Dialogue about evaluating JSON's string
|
163
|
+
# ----
|
164
|
+
# 2014.03.17.txt:20:50:01 >#ruby-ja@ircnet:shyouhei < JSONのRFC、文字列が"\uDEAD"とかなっててもvalidだよって書いてあるけど、
|
165
|
+
# 2014.03.17.txt:20:50:14 >#ruby-ja@ircnet:shyouhei < それはいいのだが
|
166
|
+
# 2014.03.17.txt:20:50:32 >#ruby-ja@ircnet:shyouhei < たとえばそのJSONがUTF-16で書かれているとして
|
167
|
+
# 2014.03.17.txt:20:50:59 >#ruby-ja@ircnet:shyouhei < UTF-16の"\uDEAD"的なのをRubyで作ろうと思うとなかなかむずかしいな
|
168
|
+
# 2014.03.17.txt:20:51:55 >#ruby-ja@ircnet:shyouhei < "\\uDEAD"という文字列(ただしUTF-16)を入力したら"\u{DEAD}"という文字列(ただしUTF-16)を出力する関数
|
169
|
+
# 2014.03.17.txt:20:52:08 >#ruby-ja@ircnet:shyouhei < むずい。
|
170
|
+
# 2014.03.17.txt:20:52:09 <#ruby-ja@ircnet:nurse > "\xDE\xAD".force_encoding("utf-16be")とかになっちゃいますなぁ
|
171
|
+
# 2014.03.17.txt:20:52:34 <#ruby-ja@ircnet:nurse > [0xDEAD].pack("n").force_encoding("utf-16be")のが素直かな
|
172
|
+
# 2014.03.17.txt:20:53:35 >#ruby-ja@ircnet:shyouhei < なんか実務上はそこまでがんばるより例外で死んだ方がしあわせになれそうではある
|
173
|
+
# 2014.03.17.txt:20:54:00 >#ruby-ja@ircnet:shyouhei < 誰も幸せにしなさそう
|
174
|
+
# 2014.03.17.txt:20:54:26 <#ruby-ja@ircnet:nurse > 死んじゃダメで、ゲタにするのが正解じゃないっけ
|
175
|
+
# 2014.03.17.txt:20:54:54 >#ruby-ja@ircnet:shyouhei < それがより正しそうですね
|
176
|
+
# 2014.03.17.txt:20:55:56 >#ruby-ja@ircnet:shyouhei < JSONはサロゲートペアもなんとかせねばならんので面倒そうだ
|
177
|
+
# 2014.03.17.txt:20:57:06 >#ruby-ja@ircnet:shyouhei < (\uXYZW が単体でNGぽいくても次にサロゲートペアが続くかもしれん)
|
178
|
+
# 2014.03.17.txt:20:57:37 >#ruby-ja@ircnet:shyouhei < めんどう!
|
179
|
+
# 2014.03.17.txt:20:57:42 >#ruby-ja@ircnet:shyouhei < UTF16しねばいいのに
|
180
|
+
# 2014.03.17.txt:20:59:06 <#ruby-ja@ircnet:nurse > とりあえずそのままUTF-16にしてみて、encodeでinvalid replaceすればいい気がする
|
181
|
+
# 2014.03.17.txt:21:00:33 >#ruby-ja@ircnet:shyouhei < すでにUTF16な文字列にサロゲートペアの片割れ的なバイナリをがしょがしょって後ろから足してからencodeするとよしなにする?
|
182
|
+
# 2014.03.17.txt:21:01:13 >#ruby-ja@ircnet:shyouhei < (頭の悪い発言なのは自覚しております)
|
183
|
+
# 2014.03.17.txt:21:01:29 <#ruby-ja@ircnet:nurse > invalid: :replaceつけてUTF-8にするなり、UTF-16のままscrubすれば
|
184
|
+
# 2014.03.17.txt:21:02:45 >#ruby-ja@ircnet:shyouhei < invalidなのはよいとして "\uFOO\uBAR" てきなサロゲートペアてきJSON文字列をちゃんとRuby的に(正しいUTF16文字列)に復元するシナリオ
|
185
|
+
# 2014.03.17.txt:21:03:46 <#ruby-ja@ircnet:nurse > たぶんAScii-8BITで足さないとエラーになる気がする
|
186
|
+
# 2014.03.17.txt:21:04:05 <#ruby-ja@ircnet:nurse > そこいがいは、無心につなげて、最後にencodeまたはscrubが正解ではないかと
|
187
|
+
# 2014.03.17.txt:21:04:13 >#ruby-ja@ircnet:shyouhei < あきらめて全部バイナリと思ってくっつけておいてから最後にencodeか
|
188
|
+
# 2014.03.17.txt:21:05:20 <#ruby-ja@ircnet:nurse > ASCII-8BITだと文字列のvalidチェックしない分速いし。
|
189
|
+
# 2014.03.17.txt:21:06:33 >#ruby-ja@ircnet:shyouhei < 世の中のJSONパーザがUTF16サポートしないという姿勢にはそれなりの理由があることがわかった。
|
190
|
+
# 2014.03.17.txt:21:07:17 <#ruby-ja@ircnet:nurse > そもそもHTTPで文字列流すのにASCII非互換ってのが邪悪である
|
191
|
+
# 2014.03.17.txt:21:15:04 <#ruby-ja@ircnet:nurse > 例のOpenBSDのsignifyをportableにしたらRubyでも使えるかなぁ
|
192
|
+
# 2014.03.17.txt:21:18:39 <#ruby-ja@ircnet:nurse > ていうか卜部さんはJSONパーサでも書いてるのかしら
|
193
|
+
# 2014.03.17.txt:21:18:56 <#ruby-ja@ircnet:nurse > って、聞いちゃいけない質問な気がした
|
194
|
+
# ----
|
195
|
+
# 2014.03.25.txt:16:08:14 >#ruby-ja@ircnet:shyouhei < "\u{dead}" を入力されたときに "\\uDEAD" を出力する関数を作成せよ
|
196
|
+
# 2014.03.25.txt:16:09:21 >#ruby-ja@ircnet:shyouhei < str.force_encoding('utf-8').scrub {|c| "\\u" + c.unpack('H*") } はだめぽい
|
197
|
+
# 2014.03.25.txt:16:14:13 >#ruby-ja@ircnet:shyouhei < primitive_convertでなんとかなるのかこれ
|
198
|
+
# 2014.03.25.txt:16:20:10 <#ruby-ja@ircnet:n0kada > "\u{dead}"ってinvalidなんだっけ
|
199
|
+
# 2014.03.25.txt:16:22:29 >#ruby-ja@ircnet:shyouhei < サロゲートペアのかたほう
|
200
|
+
# 2014.03.25.txt:16:22:44 >#ruby-ja@ircnet:shyouhei < それだけではinvalidすね
|
201
|
+
# 2014.03.25.txt:16:34:47 >#ruby-ja@ircnet:shyouhei < お、"\u{dead}".unpack('U*')で0xdeadが取得できる
|
202
|
+
# 2014.03.25.txt:16:34:57 >#ruby-ja@ircnet:shyouhei < ここからなんとかすればいいのか…?
|
203
|
+
# 2014.03.25.txt:16:35:00 >#ruby-ja@ircnet:shyouhei < しかしどうする
|
204
|
+
# 2014.03.25.txt:16:35:08 <#ruby-ja@ircnet:akr > "\u{dead}".unpack("U*").map {|c| 0xD800 <= c && c <= 0xDFFF ? "\\u%04X" % c : [c].pack("U") }.join
|
205
|
+
# 2014.03.25.txt:16:38:16 >#ruby-ja@ircnet:shyouhei < おお。
|
206
|
+
# 2014.03.25.txt:16:38:46 >#ruby-ja@ircnet:shyouhei < scrubでなんとかするのは筋が悪いことが分かりつつある
|
207
|
+
# 2014.03.25.txt:16:39:36 >#ruby-ja@ircnet:shyouhei < まずは文字列じゃなくてコードポイントの配列にして、そこでごにょってから、さいごに文字列になおすのが色々正しい雰囲気を感じる
|
208
|
+
# 2014.03.25.txt:16:39:53 <#ruby-ja@ircnet:akr > encoding が壊れている時に、文字の範囲を確定するのは難しいので。
|
209
|
+
# 2014.03.25.txt:16:43:08 <#ruby-ja@ircnet:n0kada > unpackはサロゲートペアの片割れも扱える仕様なんだっけ
|
210
|
+
# 2014.03.25.txt:16:43:41 <#ruby-ja@ircnet:akr > 仕様かどうかは知らない
|
211
|
+
# 2014.03.25.txt:16:44:36 <#ruby-ja@ircnet:akr > 伝統的に寛大だったとは思う
|
212
|
+
# 2014.03.25.txt:16:45:41 (#ruby-ja@ircnet:n0kada ) $ grep -r surrogate spec/rubyspec/core/string/unpack/
|
213
|
+
# 2014.03.25.txt:16:45:42 (#ruby-ja@ircnet:n0kada ) bash: exit 1
|
214
|
+
# 2014.03.25.txt:16:46:06 <#ruby-ja@ircnet:n0kada > rubyspecが持ってないとは意外だな
|
215
|
+
# 2014.03.25.txt:16:46:18 <#ruby-ja@ircnet:n0kada > こういう重箱の隅はお得意だろうに
|
216
|
+
|
217
|
+
#
|
218
|
+
# Local Variables:
|
219
|
+
# mode: ruby
|
220
|
+
# coding: utf-8-unix
|
221
|
+
# indent-tabs-mode: t
|
222
|
+
# tab-width: 3
|
223
|
+
# ruby-indent-level: 3
|
224
|
+
# fill-column: 79
|
225
|
+
# default-justification: full
|
226
|
+
# End:
|
data/lib/RFC8259/true.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
#! /your/favourite/path/to/ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
|
5
|
+
#
|
6
|
+
# Redistribution and use in source and binary forms, with or without
|
7
|
+
# modification, are permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# - Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
#
|
12
|
+
# - Redistributions in binary form must reproduce the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer in
|
14
|
+
# the documentation and/or other materials provided with the
|
15
|
+
# distribution.
|
16
|
+
#
|
17
|
+
# - Neither the name of Internet Society, IETF or IETF Trust, nor the
|
18
|
+
# names of specific contributors, may be used to endorse or promote
|
19
|
+
# products derived from this software without specific prior written
|
20
|
+
# permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
|
23
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
24
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
25
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
26
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
27
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
28
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
29
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
30
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
31
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
32
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
# The true, as defined in RFC8259 section 3.
|
35
|
+
class RFC8259::True < RFC8259::Value
|
36
|
+
|
37
|
+
# Parse the AST from parser, and convert into corrsponding value.
|
38
|
+
# @param [Array] ast the AST, generated by the parser
|
39
|
+
# @return [self] evaluated instance
|
40
|
+
# @raise [ArgumentError] malformed input
|
41
|
+
def self.from_ast ast
|
42
|
+
raise ArgumentError "garbage included: #{ast.inspect}" if ast.length > 1
|
43
|
+
raise ArgumentError "not a false: #{ast.inspect}" if ast[0] != :true
|
44
|
+
return new
|
45
|
+
end
|
46
|
+
|
47
|
+
# convert to Ruby's true
|
48
|
+
# @return [true] the ruby counter part.
|
49
|
+
def plain_old_ruby_object
|
50
|
+
return true
|
51
|
+
end
|
52
|
+
|
53
|
+
# JSON gem compat
|
54
|
+
# @return [::String] JSONified string representation
|
55
|
+
def to_json
|
56
|
+
return 'true'
|
57
|
+
end
|
58
|
+
|
59
|
+
# Equality. All true instances are equal each other, plus a TrueClass
|
60
|
+
# instance is also equal to this.
|
61
|
+
def == other
|
62
|
+
case other when TrueClass, self.class
|
63
|
+
true
|
64
|
+
else
|
65
|
+
false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Local Variables:
|
72
|
+
# mode: ruby
|
73
|
+
# coding: utf-8-unix
|
74
|
+
# indent-tabs-mode: t
|
75
|
+
# tab-width: 3
|
76
|
+
# ruby-indent-level: 3
|
77
|
+
# fill-column: 79
|
78
|
+
# default-justification: full
|
79
|
+
# End:
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#! /your/favourite/path/to/ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
|
5
|
+
#
|
6
|
+
# Redistribution and use in source and binary forms, with or without
|
7
|
+
# modification, are permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# - Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
#
|
12
|
+
# - Redistributions in binary form must reproduce the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer in
|
14
|
+
# the documentation and/or other materials provided with the
|
15
|
+
# distribution.
|
16
|
+
#
|
17
|
+
# - Neither the name of Internet Society, IETF or IETF Trust, nor the
|
18
|
+
# names of specific contributors, may be used to endorse or promote
|
19
|
+
# products derived from this software without specific prior written
|
20
|
+
# permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
|
23
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
24
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
25
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
26
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
27
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
28
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
29
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
30
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
31
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
32
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
# @abstract
|
35
|
+
# The Values, as described in RFC8259 section 3.
|
36
|
+
#
|
37
|
+
# > value = false / null / true / object / array / number / string
|
38
|
+
class RFC8259::Value
|
39
|
+
|
40
|
+
# Parse the AST from parser, and convert into corrsponding values.
|
41
|
+
# @param [::Array] ast the AST, generated by the parser
|
42
|
+
# @return [Value] evaluated instance
|
43
|
+
# @raise [ArgumentError] malformed input
|
44
|
+
#
|
45
|
+
# @note the input MUST be a tree. No cyclic lists, nor even direct-acycric
|
46
|
+
# graphs are allowed by definition. Such input can cause undefined
|
47
|
+
# behaviour.
|
48
|
+
def self.from_ast ast
|
49
|
+
klass = case ast.first
|
50
|
+
when :false then RFC8259::False
|
51
|
+
when :null then RFC8259::Null
|
52
|
+
when :true then RFC8259::True
|
53
|
+
when :object then RFC8259::Object
|
54
|
+
when :array then RFC8259::Array
|
55
|
+
when :number then RFC8259::Number
|
56
|
+
when :string then RFC8259::String
|
57
|
+
end
|
58
|
+
return klass.from_ast ast
|
59
|
+
end
|
60
|
+
|
61
|
+
# convert to Ruby's value
|
62
|
+
def plain_old_ruby_object
|
63
|
+
raise RuntimeError, 'abstruct method called'
|
64
|
+
end
|
65
|
+
|
66
|
+
# @return [::String] the class in string
|
67
|
+
def inspect
|
68
|
+
sprintf "#<%p>", self.class
|
69
|
+
end
|
70
|
+
|
71
|
+
# For pretty print (require 'pp' beforehand)
|
72
|
+
# @param [PP] pp the pp
|
73
|
+
def pretty_print pp
|
74
|
+
pp.object_group self do end # nothing
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# *WARN* order matters, do not move below.
|
79
|
+
require_relative 'false'
|
80
|
+
require_relative 'null'
|
81
|
+
require_relative 'true'
|
82
|
+
require_relative 'object'
|
83
|
+
require_relative 'array'
|
84
|
+
require_relative 'number'
|
85
|
+
require_relative 'string'
|
86
|
+
|
87
|
+
#
|
88
|
+
# Local Variables:
|
89
|
+
# mode: ruby
|
90
|
+
# coding: utf-8-unix
|
91
|
+
# indent-tabs-mode: t
|
92
|
+
# tab-width: 3
|
93
|
+
# ruby-indent-level: 3
|
94
|
+
# fill-column: 79
|
95
|
+
# default-justification: full
|
96
|
+
# End:
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#! /your/favourite/path/to/ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
|
5
|
+
#
|
6
|
+
# Redistribution and use in source and binary forms, with or without
|
7
|
+
# modification, are permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# - Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
#
|
12
|
+
# - Redistributions in binary form must reproduce the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer in
|
14
|
+
# the documentation and/or other materials provided with the
|
15
|
+
# distribution.
|
16
|
+
#
|
17
|
+
# - Neither the name of Internet Society, IETF or IETF Trust, nor the
|
18
|
+
# names of specific contributors, may be used to endorse or promote
|
19
|
+
# products derived from this software without specific prior written
|
20
|
+
# permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
|
23
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
24
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
25
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
26
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
27
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
28
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
29
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
30
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
31
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
32
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
# The version
|
35
|
+
RFC8259::VERSION = 8259
|
36
|
+
|
37
|
+
#
|
38
|
+
# Local Variables:
|
39
|
+
# mode: ruby
|
40
|
+
# coding: utf-8-unix
|
41
|
+
# indent-tabs-mode: t
|
42
|
+
# tab-width: 3
|
43
|
+
# ruby-indent-level: 3
|
44
|
+
# fill-column: 79
|
45
|
+
# default-justification: full
|
46
|
+
# End:
|