RFC8259 8259

Sign up to get free protection for your applications and to get access to all the features.
Files changed (183) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +61 -0
  3. data/.rspec +4 -0
  4. data/.yardopts +2 -0
  5. data/Gemfile +58 -0
  6. data/LICENSE.txt +40 -0
  7. data/README.txt +1 -0
  8. data/RFC8259 +899 -0
  9. data/RFC8259.gemspec +89 -0
  10. data/Rakefile +86 -0
  11. data/lib/RFC8259.rb +94 -0
  12. data/lib/RFC8259/array.rb +136 -0
  13. data/lib/RFC8259/dumper.rb +255 -0
  14. data/lib/RFC8259/false.rb +79 -0
  15. data/lib/RFC8259/null.rb +79 -0
  16. data/lib/RFC8259/number.rb +149 -0
  17. data/lib/RFC8259/object.rb +137 -0
  18. data/lib/RFC8259/parser.ry +275 -0
  19. data/lib/RFC8259/string.rb +226 -0
  20. data/lib/RFC8259/true.rb +79 -0
  21. data/lib/RFC8259/value.rb +96 -0
  22. data/lib/RFC8259/version.rb +46 -0
  23. data/spec/RFC8259_spec.rb +305 -0
  24. data/spec/acceptance/README.txt +66 -0
  25. data/spec/acceptance/invalid/0001-ws/0001-verical-tab.txt +1 -0
  26. data/spec/acceptance/invalid/0001-ws/0002-null.txt +0 -0
  27. data/spec/acceptance/invalid/0001-ws/0003-space-in-number-1.txt +1 -0
  28. data/spec/acceptance/invalid/0001-ws/0004-space-in-number-2.txt +1 -0
  29. data/spec/acceptance/invalid/0001-ws/0005-space-in-number-3.txt +1 -0
  30. data/spec/acceptance/invalid/0001-ws/0006-space-in-number-4.txt +1 -0
  31. data/spec/acceptance/invalid/0001-ws/0007-space-in-number-5.txt +1 -0
  32. data/spec/acceptance/invalid/0001-ws/0008-space-in-number-6.txt +1 -0
  33. data/spec/acceptance/invalid/0001-ws/0009-space-in-literal.txt +1 -0
  34. data/spec/acceptance/invalid/0001-ws/0010-Unicode-LINE_SEPARATOR.txt +1 -0
  35. data/spec/acceptance/invalid/0002-comments/0001-C-style.txt +1 -0
  36. data/spec/acceptance/invalid/0002-comments/0002-C-plusplus-style.txt +2 -0
  37. data/spec/acceptance/invalid/0002-comments/0003-sh-style.txt +2 -0
  38. data/spec/acceptance/invalid/0002-comments/0004-python-docstring.txt +3 -0
  39. data/spec/acceptance/invalid/0002-comments/0005-SQL-style.txt +2 -0
  40. data/spec/acceptance/invalid/0002-comments/0006-BASIC-style.txt +2 -0
  41. data/spec/acceptance/invalid/0003-literals/0001-FALSE.txt +1 -0
  42. data/spec/acceptance/invalid/0003-literals/0002-NULL.txt +1 -0
  43. data/spec/acceptance/invalid/0003-literals/0003-TRUE.txt +1 -0
  44. data/spec/acceptance/invalid/0003-literals/0004-NUL.txt +1 -0
  45. data/spec/acceptance/invalid/0003-literals/0005-nil.txt +1 -0
  46. data/spec/acceptance/invalid/0003-literals/0006-undef.txt +1 -0
  47. data/spec/acceptance/invalid/0003-literals/0007-modifier.txt +1 -0
  48. data/spec/acceptance/invalid/0003-literals/0008-undefined.txt +1 -0
  49. data/spec/acceptance/invalid/0004-numbers/0001-omit-zero.txt +1 -0
  50. data/spec/acceptance/invalid/0004-numbers/0002-minus-dot.txt +1 -0
  51. data/spec/acceptance/invalid/0004-numbers/0003-missing-frac.txt +1 -0
  52. data/spec/acceptance/invalid/0004-numbers/0004-missing-exp.txt +1 -0
  53. data/spec/acceptance/invalid/0004-numbers/0005-octal.txt +1 -0
  54. data/spec/acceptance/invalid/0004-numbers/0006-hexadecimal.txt +1 -0
  55. data/spec/acceptance/invalid/0004-numbers/0007-comma.txt +1 -0
  56. data/spec/acceptance/invalid/0004-numbers/0008-perl-underscore.txt +1 -0
  57. data/spec/acceptance/invalid/0004-numbers/0009-NaN.txt +1 -0
  58. data/spec/acceptance/invalid/0004-numbers/0010-Inf.txt +1 -0
  59. data/spec/acceptance/invalid/0004-numbers/0011-Infinity.txt +1 -0
  60. data/spec/acceptance/invalid/0005-strings/0000-NUL.txt +0 -0
  61. data/spec/acceptance/invalid/0005-strings/0001-not-terminated.txt +1 -0
  62. data/spec/acceptance/invalid/0005-strings/0002-single-quote.txt +1 -0
  63. data/spec/acceptance/invalid/0005-strings/0003-back-quote.txt +1 -0
  64. data/spec/acceptance/invalid/0005-strings/0004-carriage-return.txt +1 -0
  65. data/spec/acceptance/invalid/0005-strings/0005-line-feed.txt +2 -0
  66. data/spec/acceptance/invalid/0005-strings/0006-unknown-escape-a.txt +1 -0
  67. data/spec/acceptance/invalid/0005-strings/0007-unknown-escape-perl-style.txt +1 -0
  68. data/spec/acceptance/invalid/0005-strings/0008-unknown-escape-C-style.txt +1 -0
  69. data/spec/acceptance/invalid/0005-strings/0009-unknown-escape-ruby-style.txt +1 -0
  70. data/spec/acceptance/invalid/0005-strings/0010-escape-too-short.txt +1 -0
  71. data/spec/acceptance/invalid/0005-strings/0011-C-string-concat.txt +1 -0
  72. data/spec/acceptance/invalid/0005-strings/0012-perl-string-concat.txt +1 -0
  73. data/spec/acceptance/invalid/0005-strings/0013-Java-string-concat.txt +1 -0
  74. data/spec/acceptance/invalid/0006-encodings/0001-CESU-8.txt +1 -0
  75. data/spec/acceptance/invalid/0006-encodings/0002-Windows-31J.txt +1 -0
  76. data/spec/acceptance/invalid/0006-encodings/0003-EBCDIC.txt +1 -0
  77. data/spec/acceptance/invalid/0006-encodings/0004-overlong-utf8.txt +1 -0
  78. data/spec/acceptance/invalid/0007-arrays/0001-lacks-open.txt +1 -0
  79. data/spec/acceptance/invalid/0007-arrays/0002-lacks-close.txt +1 -0
  80. data/spec/acceptance/invalid/0007-arrays/0003-interleaving-parens.txt +1 -0
  81. data/spec/acceptance/invalid/0007-arrays/0004-dangling-comma.txt +1 -0
  82. data/spec/acceptance/invalid/0007-arrays/0005-missing-comma.txt +1 -0
  83. data/spec/acceptance/invalid/0007-arrays/0006-colon-instead-of-comma.txt +1 -0
  84. data/spec/acceptance/invalid/0008-hashes/0001-key-missing.txt +1 -0
  85. data/spec/acceptance/invalid/0008-hashes/0002-value-missing.txt +1 -0
  86. data/spec/acceptance/invalid/0008-hashes/0003-true-key.txt +1 -0
  87. data/spec/acceptance/invalid/0008-hashes/0004-false-key.txt +1 -0
  88. data/spec/acceptance/invalid/0008-hashes/0005-null-key.txt +1 -0
  89. data/spec/acceptance/invalid/0008-hashes/0006-numeric-key.txt +1 -0
  90. data/spec/acceptance/invalid/0008-hashes/0007-array-key.txt +1 -0
  91. data/spec/acceptance/invalid/0008-hashes/0008-hash-key.txt +1 -0
  92. data/spec/acceptance/invalid/0008-hashes/0009-key-not-escaped.txt +4 -0
  93. data/spec/acceptance/invalid/0009-javascriptisms/0001-JSONP.txt +1 -0
  94. data/spec/acceptance/invalid/0009-javascriptisms/0002-new-Array.txt +3 -0
  95. data/spec/acceptance/invalid/0009-javascriptisms/0003-new-Date.txt +1 -0
  96. data/spec/acceptance/invalid/0009-javascriptisms/0004-new-Error.txt +1 -0
  97. data/spec/acceptance/invalid/0009-javascriptisms/0005-Math.txt +1 -0
  98. data/spec/acceptance/invalid/0009-javascriptisms/0006-regular-expression.txt +1 -0
  99. data/spec/acceptance/invalid/0009-javascriptisms/0007-function.txt +7 -0
  100. data/spec/acceptance/invalid/0009-javascriptisms/0008-this.txt +1 -0
  101. data/spec/acceptance/invalid/0009-javascriptisms/0009-plusplus.txt +3 -0
  102. data/spec/acceptance/invalid/0009-javascriptisms/0010-ternary-operator.txt +1 -0
  103. data/spec/acceptance/valid/0001-ws/0001-space.json +1 -0
  104. data/spec/acceptance/valid/0001-ws/0002-tab.json +1 -0
  105. data/spec/acceptance/valid/0001-ws/0003-lf.json +1 -0
  106. data/spec/acceptance/valid/0001-ws/0004-cr.json +1 -0
  107. data/spec/acceptance/valid/0001-ws/0005-before.json +1 -0
  108. data/spec/acceptance/valid/0001-ws/0006-after.json +1 -0
  109. data/spec/acceptance/valid/0001-ws/0007-around-comma.json +3 -0
  110. data/spec/acceptance/valid/0001-ws/0008-around-colon.json +3 -0
  111. data/spec/acceptance/valid/0002-bare-values/0001-false.json +1 -0
  112. data/spec/acceptance/valid/0002-bare-values/0002-null.json +1 -0
  113. data/spec/acceptance/valid/0002-bare-values/0003-true.json +1 -0
  114. data/spec/acceptance/valid/0002-bare-values/0004-number.json +1 -0
  115. data/spec/acceptance/valid/0002-bare-values/0005-string.json +1 -0
  116. data/spec/acceptance/valid/0003-literals/0001-false.json +1 -0
  117. data/spec/acceptance/valid/0003-literals/0002-null.json +1 -0
  118. data/spec/acceptance/valid/0003-literals/0003-true.json +1 -0
  119. data/spec/acceptance/valid/0004-numbers/0000-zero.json +1 -0
  120. data/spec/acceptance/valid/0004-numbers/0001-one.json +1 -0
  121. data/spec/acceptance/valid/0004-numbers/0002-two.json +1 -0
  122. data/spec/acceptance/valid/0004-numbers/0003-three.json +1 -0
  123. data/spec/acceptance/valid/0004-numbers/0004-four.json +1 -0
  124. data/spec/acceptance/valid/0004-numbers/0005-five.json +1 -0
  125. data/spec/acceptance/valid/0004-numbers/0006-six.json +1 -0
  126. data/spec/acceptance/valid/0004-numbers/0007-seven.json +1 -0
  127. data/spec/acceptance/valid/0004-numbers/0008-eight.json +1 -0
  128. data/spec/acceptance/valid/0004-numbers/0009-nine.json +1 -0
  129. data/spec/acceptance/valid/0004-numbers/0010-ten.json +1 -0
  130. data/spec/acceptance/valid/0004-numbers/0011-minus.json +1 -0
  131. data/spec/acceptance/valid/0004-numbers/0012-fraction.json +1 -0
  132. data/spec/acceptance/valid/0004-numbers/0013-exponent.json +1 -0
  133. data/spec/acceptance/valid/0004-numbers/0014-exponent-minus.json +1 -0
  134. data/spec/acceptance/valid/0004-numbers/0015-exponent-plus.json +1 -0
  135. data/spec/acceptance/valid/0004-numbers/0016-complex.json +1 -0
  136. data/spec/acceptance/valid/0004-numbers/0017-DBL_MAX.json +1 -0
  137. data/spec/acceptance/valid/0004-numbers/0018-DBL_MIN.json +1 -0
  138. data/spec/acceptance/valid/0004-numbers/0019-subnormal-number.json +1 -0
  139. data/spec/acceptance/valid/0004-numbers/0020-1E400.json +1 -0
  140. data/spec/acceptance/valid/0004-numbers/0021-pi.json +1 -0
  141. data/spec/acceptance/valid/0004-numbers/0022-UINT32_MAX.json +1 -0
  142. data/spec/acceptance/valid/0004-numbers/0023-UINT64_MAX.json +1 -0
  143. data/spec/acceptance/valid/0004-numbers/0024-INT64_MIN.json +1 -0
  144. data/spec/acceptance/valid/0004-numbers/0025-high-resolution-zero.json +1 -0
  145. data/spec/acceptance/valid/0004-numbers/0026-high-resolution-100.json +1 -0
  146. data/spec/acceptance/valid/0005-strings/0001-empty.json +1 -0
  147. data/spec/acceptance/valid/0005-strings/0002-basic-latin.json +1 -0
  148. data/spec/acceptance/valid/0005-strings/0003-escapes.json +1 -0
  149. data/spec/acceptance/valid/0005-strings/0004-raw-unicode.json +1 -0
  150. data/spec/acceptance/valid/0005-strings/0005-escaped-unicode.json +1 -0
  151. data/spec/acceptance/valid/0005-strings/0006-escaped-NUL.json +1 -0
  152. data/spec/acceptance/valid/0005-strings/0007-escaped-invalid-unicode-still-valid-as-json.json +1 -0
  153. data/spec/acceptance/valid/0005-strings/0008-ruby-json-gem-cant-handle-this.json +1 -0
  154. data/spec/acceptance/valid/0005-strings/0009-unescaped-invalid-javascript-still-valid-as-json.json +1 -0
  155. data/spec/acceptance/valid/0005-strings/0010-escaped-separated-surrogate.json +1 -0
  156. data/spec/acceptance/valid/0005-strings/0011-escaped-surrogate-then-normal.json +1 -0
  157. data/spec/acceptance/valid/0006-m17n/0001-genesis.json +6 -0
  158. data/spec/acceptance/valid/0006-m17n/0002-heart-sutra.json +5 -0
  159. data/spec/acceptance/valid/0006-m17n/0003-escaped-valid-surrogate-pair.json +1 -0
  160. data/spec/acceptance/valid/0006-m17n/0004-unescaped-valid-supplementary-multilingual-plane.json +1 -0
  161. data/spec/acceptance/valid/0007-arrays/0000-empty.json +1 -0
  162. data/spec/acceptance/valid/0007-arrays/0001-one-element.json +1 -0
  163. data/spec/acceptance/valid/0007-arrays/0002-multiple-elements.json +33 -0
  164. data/spec/acceptance/valid/0007-arrays/0003-various-types.json +1 -0
  165. data/spec/acceptance/valid/0007-arrays/0004-nested.json +17 -0
  166. data/spec/acceptance/valid/0008-hashes/0000-empty.json +1 -0
  167. data/spec/acceptance/valid/0008-hashes/0001-onekey.json +1 -0
  168. data/spec/acceptance/valid/0008-hashes/0002-many-keys.json +5 -0
  169. data/spec/acceptance/valid/0008-hashes/0003-empty-key.json +3 -0
  170. data/spec/acceptance/valid/0008-hashes/0004-true-value.json +3 -0
  171. data/spec/acceptance/valid/0008-hashes/0005-false-value.json +3 -0
  172. data/spec/acceptance/valid/0008-hashes/0006-null-value.json +3 -0
  173. data/spec/acceptance/valid/0008-hashes/0007-string-value.json +3 -0
  174. data/spec/acceptance/valid/0008-hashes/0008-numeric-value.json +3 -0
  175. data/spec/acceptance/valid/0008-hashes/0009-array-value.json +8 -0
  176. data/spec/acceptance/valid/0008-hashes/0010-hash-value.json +20 -0
  177. data/spec/acceptance/valid/0008-hashes/0011-duplicate-key.json +4 -0
  178. data/spec/acceptance/valid/0008-hashes/0012-duplicate-key-in-different-representations.json +4 -0
  179. data/spec/acceptance/valid/0009-complicated/0001-jsonschema.json +46 -0
  180. data/spec/acceptance/valid/0009-complicated/0002-example-in-RFC7159-section-13.json +14 -0
  181. data/spec/acceptance/valid/0009-complicated/0003-example-in-RFC7159-section-13.json +22 -0
  182. data/spec/spec_helper.rb +60 -0
  183. metadata +530 -0
@@ -0,0 +1,226 @@
1
+ #! /your/favourite/path/to/ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ #
9
+ # - Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ #
12
+ # - Redistributions in binary form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in
14
+ # the documentation and/or other materials provided with the
15
+ # distribution.
16
+ #
17
+ # - Neither the name of Internet Society, IETF or IETF Trust, nor the
18
+ # names of specific contributors, may be used to endorse or promote
19
+ # products derived from this software without specific prior written
20
+ # permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
23
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ # POSSIBILITY OF SUCH DAMAGE.
33
+
34
+ # The Strings, as described in RFC8259 section 7.
35
+ class RFC8259::String < RFC8259::Value
36
+ # Parse the AST from parser, and convert into corrsponding values.
37
+ # @param [::Array] ast the AST, generated by the parser
38
+ # @return [String] evaluated instance
39
+ # @raise [ArgumentError] malformed input
40
+ def self.from_ast ast
41
+ type, *ary = *ast
42
+ raise ArgumentError, "not an object: #{ast.inspect}" if type != :string
43
+ new ary
44
+ end
45
+
46
+ # @return [::String] converte string
47
+ def plain_old_ruby_object
48
+ return @str
49
+ end
50
+
51
+ alias to_s plain_old_ruby_object
52
+ alias to_str plain_old_ruby_object
53
+
54
+ # @return [::String] the string, escaped
55
+ def inspect
56
+ sprintf "#<%p:%#016x %p>", self.class, self.object_id << 1, @str
57
+ end
58
+
59
+ # For pretty print
60
+ # @param [PP] pp the pp
61
+ def pretty_print pp
62
+ hdr = sprintf '#<%p:%#016x', self.class, self.object_id << 1
63
+ pp.group 1, hdr, '>' do
64
+ pp.text ' '
65
+ @str.pretty_print pp
66
+ end
67
+ end
68
+
69
+ # @return [string] original string
70
+ def to_json *;
71
+ # Here '"', which is UTF-8, and @orig, which might be UTF-16, should be
72
+ # aligned. We take UTF-8 because we are not interested in generating
73
+ # UTF-16 JSON and so on.
74
+ '"' << @orig.flatten.join('').encode(Encoding::UTF_8) << '"'
75
+ end
76
+
77
+ # String comparisons are defined in RFC8259 section 8.3. We follow that.
78
+ def == other
79
+ self.to_str == other.to_str
80
+ rescue NoMethodError
81
+ return false
82
+ end
83
+
84
+ private
85
+ private_class_method:new
86
+ # @private
87
+ def initialize ary
88
+ @orig = ary
89
+ enc = ary[0][0].encoding rescue Encoding::US_ASCII # empty string
90
+ path1 = ary.map do |i|
91
+ case i when Array
92
+ # ['\\', 'u', 'F', 'F', 'E', 'E'] or something
93
+ case i[1].encode(Encoding::US_ASCII)
94
+ when "\x22" then 0x0022 # " quotation mark U+0022
95
+ when "\x5C" then 0x005C # \ reverse solidus U+005C
96
+ when "\x2F" then 0x002F # / solidus U+002F
97
+ when "\x62" then 0x0008 # b backspace U+0008
98
+ when "\x66" then 0x000C # f form feed U+000C
99
+ when "\x6E" then 0x000A # n line feed U+000A
100
+ when "\x72" then 0x000D # r carriage return U+000D
101
+ when "\x74" then 0x0009 # t tab U+0009
102
+ when "\x75" then # uXXXX U+XXXX
103
+ i[2..5].join.encode(Encoding::US_ASCII).to_i 16
104
+ else
105
+ raise "invalid escape: #{i.inspect}"
106
+ end
107
+ else
108
+ i.ord
109
+ end
110
+ end
111
+
112
+ # RFC8259 section 8.1 states that the JSON text itself shall be written
113
+ # in a sort of Unicode. However the parsed JSON value's content strings
114
+ # are not always Unicode-valid, according to its section 8.2. Then what?
115
+ # It says nothing. Here, we try to preserve the JSON text's encoding
116
+ # i.e. if the JSON text is in UTF-16, we try UTF-16. If that doesn't
117
+ # fit, we give up and take BINARY.
118
+ buf = nil
119
+ path2 = path1.each_with_object Array.new do |i, r|
120
+ if buf.nil?
121
+ next buf = i
122
+ else
123
+ case buf when 0xD800..0xDBFF
124
+ case i when 0xDC00..0xDFFF
125
+ # valid surrogate pair
126
+ utf16str = [buf, i].pack 'nn'
127
+ utf16str.force_encoding Encoding::UTF_16BE
128
+ r << utf16str[0].ord
129
+ buf = nil # consumed
130
+ else
131
+ # buf is a garbage
132
+ r << buf
133
+ buf = i
134
+ end
135
+ else
136
+ # buf is a normal char
137
+ r << buf
138
+ buf = i
139
+ end
140
+ end
141
+ end
142
+ path2 << buf if buf # buf might remain
143
+
144
+ path3 = path2.each_with_object ''.b do |i, r|
145
+ case enc
146
+ when Encoding::UTF_32BE then j = [i].pack 'N'
147
+ when Encoding::UTF_32LE then j = [i].pack 'V'
148
+ when Encoding::UTF_16BE then j = [i].pack 'n'
149
+ when Encoding::UTF_16LE then j = [i].pack 'v'
150
+ else j = [i].pack 'U' # sort of UTF-8
151
+ end
152
+ r << j.b
153
+ end
154
+ path4 = path3.dup.force_encoding enc
155
+ # @str = path4.valid_encoding? ? path4 : path3
156
+ @str = path4
157
+ @str.freeze
158
+ end
159
+ end
160
+
161
+ #
162
+ # Dialogue about evaluating JSON's string
163
+ # ----
164
+ # 2014.03.17.txt:20:50:01 >#ruby-ja@ircnet:shyouhei < JSONのRFC、文字列が"\uDEAD"とかなっててもvalidだよって書いてあるけど、
165
+ # 2014.03.17.txt:20:50:14 >#ruby-ja@ircnet:shyouhei < それはいいのだが
166
+ # 2014.03.17.txt:20:50:32 >#ruby-ja@ircnet:shyouhei < たとえばそのJSONがUTF-16で書かれているとして
167
+ # 2014.03.17.txt:20:50:59 >#ruby-ja@ircnet:shyouhei < UTF-16の"\uDEAD"的なのをRubyで作ろうと思うとなかなかむずかしいな
168
+ # 2014.03.17.txt:20:51:55 >#ruby-ja@ircnet:shyouhei < "\\uDEAD"という文字列(ただしUTF-16)を入力したら"\u{DEAD}"という文字列(ただしUTF-16)を出力する関数
169
+ # 2014.03.17.txt:20:52:08 >#ruby-ja@ircnet:shyouhei < むずい。
170
+ # 2014.03.17.txt:20:52:09 <#ruby-ja@ircnet:nurse > "\xDE\xAD".force_encoding("utf-16be")とかになっちゃいますなぁ
171
+ # 2014.03.17.txt:20:52:34 <#ruby-ja@ircnet:nurse > [0xDEAD].pack("n").force_encoding("utf-16be")のが素直かな
172
+ # 2014.03.17.txt:20:53:35 >#ruby-ja@ircnet:shyouhei < なんか実務上はそこまでがんばるより例外で死んだ方がしあわせになれそうではある
173
+ # 2014.03.17.txt:20:54:00 >#ruby-ja@ircnet:shyouhei < 誰も幸せにしなさそう
174
+ # 2014.03.17.txt:20:54:26 <#ruby-ja@ircnet:nurse > 死んじゃダメで、ゲタにするのが正解じゃないっけ
175
+ # 2014.03.17.txt:20:54:54 >#ruby-ja@ircnet:shyouhei < それがより正しそうですね
176
+ # 2014.03.17.txt:20:55:56 >#ruby-ja@ircnet:shyouhei < JSONはサロゲートペアもなんとかせねばならんので面倒そうだ
177
+ # 2014.03.17.txt:20:57:06 >#ruby-ja@ircnet:shyouhei < (\uXYZW が単体でNGぽいくても次にサロゲートペアが続くかもしれん)
178
+ # 2014.03.17.txt:20:57:37 >#ruby-ja@ircnet:shyouhei < めんどう!
179
+ # 2014.03.17.txt:20:57:42 >#ruby-ja@ircnet:shyouhei < UTF16しねばいいのに
180
+ # 2014.03.17.txt:20:59:06 <#ruby-ja@ircnet:nurse > とりあえずそのままUTF-16にしてみて、encodeでinvalid replaceすればいい気がする
181
+ # 2014.03.17.txt:21:00:33 >#ruby-ja@ircnet:shyouhei < すでにUTF16な文字列にサロゲートペアの片割れ的なバイナリをがしょがしょって後ろから足してからencodeするとよしなにする?
182
+ # 2014.03.17.txt:21:01:13 >#ruby-ja@ircnet:shyouhei < (頭の悪い発言なのは自覚しております)
183
+ # 2014.03.17.txt:21:01:29 <#ruby-ja@ircnet:nurse > invalid: :replaceつけてUTF-8にするなり、UTF-16のままscrubすれば
184
+ # 2014.03.17.txt:21:02:45 >#ruby-ja@ircnet:shyouhei < invalidなのはよいとして "\uFOO\uBAR" てきなサロゲートペアてきJSON文字列をちゃんとRuby的に(正しいUTF16文字列)に復元するシナリオ
185
+ # 2014.03.17.txt:21:03:46 <#ruby-ja@ircnet:nurse > たぶんAScii-8BITで足さないとエラーになる気がする
186
+ # 2014.03.17.txt:21:04:05 <#ruby-ja@ircnet:nurse > そこいがいは、無心につなげて、最後にencodeまたはscrubが正解ではないかと
187
+ # 2014.03.17.txt:21:04:13 >#ruby-ja@ircnet:shyouhei < あきらめて全部バイナリと思ってくっつけておいてから最後にencodeか
188
+ # 2014.03.17.txt:21:05:20 <#ruby-ja@ircnet:nurse > ASCII-8BITだと文字列のvalidチェックしない分速いし。
189
+ # 2014.03.17.txt:21:06:33 >#ruby-ja@ircnet:shyouhei < 世の中のJSONパーザがUTF16サポートしないという姿勢にはそれなりの理由があることがわかった。
190
+ # 2014.03.17.txt:21:07:17 <#ruby-ja@ircnet:nurse > そもそもHTTPで文字列流すのにASCII非互換ってのが邪悪である
191
+ # 2014.03.17.txt:21:15:04 <#ruby-ja@ircnet:nurse > 例のOpenBSDのsignifyをportableにしたらRubyでも使えるかなぁ
192
+ # 2014.03.17.txt:21:18:39 <#ruby-ja@ircnet:nurse > ていうか卜部さんはJSONパーサでも書いてるのかしら
193
+ # 2014.03.17.txt:21:18:56 <#ruby-ja@ircnet:nurse > って、聞いちゃいけない質問な気がした
194
+ # ----
195
+ # 2014.03.25.txt:16:08:14 >#ruby-ja@ircnet:shyouhei < "\u{dead}" を入力されたときに "\\uDEAD" を出力する関数を作成せよ
196
+ # 2014.03.25.txt:16:09:21 >#ruby-ja@ircnet:shyouhei < str.force_encoding('utf-8').scrub {|c| "\\u" + c.unpack('H*") } はだめぽい
197
+ # 2014.03.25.txt:16:14:13 >#ruby-ja@ircnet:shyouhei < primitive_convertでなんとかなるのかこれ
198
+ # 2014.03.25.txt:16:20:10 <#ruby-ja@ircnet:n0kada > "\u{dead}"ってinvalidなんだっけ
199
+ # 2014.03.25.txt:16:22:29 >#ruby-ja@ircnet:shyouhei < サロゲートペアのかたほう
200
+ # 2014.03.25.txt:16:22:44 >#ruby-ja@ircnet:shyouhei < それだけではinvalidすね
201
+ # 2014.03.25.txt:16:34:47 >#ruby-ja@ircnet:shyouhei < お、"\u{dead}".unpack('U*')で0xdeadが取得できる
202
+ # 2014.03.25.txt:16:34:57 >#ruby-ja@ircnet:shyouhei < ここからなんとかすればいいのか…?
203
+ # 2014.03.25.txt:16:35:00 >#ruby-ja@ircnet:shyouhei < しかしどうする
204
+ # 2014.03.25.txt:16:35:08 <#ruby-ja@ircnet:akr > "\u{dead}".unpack("U*").map {|c| 0xD800 <= c && c <= 0xDFFF ? "\\u%04X" % c : [c].pack("U") }.join
205
+ # 2014.03.25.txt:16:38:16 >#ruby-ja@ircnet:shyouhei < おお。
206
+ # 2014.03.25.txt:16:38:46 >#ruby-ja@ircnet:shyouhei < scrubでなんとかするのは筋が悪いことが分かりつつある
207
+ # 2014.03.25.txt:16:39:36 >#ruby-ja@ircnet:shyouhei < まずは文字列じゃなくてコードポイントの配列にして、そこでごにょってから、さいごに文字列になおすのが色々正しい雰囲気を感じる
208
+ # 2014.03.25.txt:16:39:53 <#ruby-ja@ircnet:akr > encoding が壊れている時に、文字の範囲を確定するのは難しいので。
209
+ # 2014.03.25.txt:16:43:08 <#ruby-ja@ircnet:n0kada > unpackはサロゲートペアの片割れも扱える仕様なんだっけ
210
+ # 2014.03.25.txt:16:43:41 <#ruby-ja@ircnet:akr > 仕様かどうかは知らない
211
+ # 2014.03.25.txt:16:44:36 <#ruby-ja@ircnet:akr > 伝統的に寛大だったとは思う
212
+ # 2014.03.25.txt:16:45:41 (#ruby-ja@ircnet:n0kada ) $ grep -r surrogate spec/rubyspec/core/string/unpack/
213
+ # 2014.03.25.txt:16:45:42 (#ruby-ja@ircnet:n0kada ) bash: exit 1
214
+ # 2014.03.25.txt:16:46:06 <#ruby-ja@ircnet:n0kada > rubyspecが持ってないとは意外だな
215
+ # 2014.03.25.txt:16:46:18 <#ruby-ja@ircnet:n0kada > こういう重箱の隅はお得意だろうに
216
+
217
+ #
218
+ # Local Variables:
219
+ # mode: ruby
220
+ # coding: utf-8-unix
221
+ # indent-tabs-mode: t
222
+ # tab-width: 3
223
+ # ruby-indent-level: 3
224
+ # fill-column: 79
225
+ # default-justification: full
226
+ # End:
@@ -0,0 +1,79 @@
1
+ #! /your/favourite/path/to/ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ #
9
+ # - Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ #
12
+ # - Redistributions in binary form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in
14
+ # the documentation and/or other materials provided with the
15
+ # distribution.
16
+ #
17
+ # - Neither the name of Internet Society, IETF or IETF Trust, nor the
18
+ # names of specific contributors, may be used to endorse or promote
19
+ # products derived from this software without specific prior written
20
+ # permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
23
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ # POSSIBILITY OF SUCH DAMAGE.
33
+
34
+ # The true, as defined in RFC8259 section 3.
35
+ class RFC8259::True < RFC8259::Value
36
+
37
+ # Parse the AST from parser, and convert into corrsponding value.
38
+ # @param [Array] ast the AST, generated by the parser
39
+ # @return [self] evaluated instance
40
+ # @raise [ArgumentError] malformed input
41
+ def self.from_ast ast
42
+ raise ArgumentError "garbage included: #{ast.inspect}" if ast.length > 1
43
+ raise ArgumentError "not a false: #{ast.inspect}" if ast[0] != :true
44
+ return new
45
+ end
46
+
47
+ # convert to Ruby's true
48
+ # @return [true] the ruby counter part.
49
+ def plain_old_ruby_object
50
+ return true
51
+ end
52
+
53
+ # JSON gem compat
54
+ # @return [::String] JSONified string representation
55
+ def to_json
56
+ return 'true'
57
+ end
58
+
59
+ # Equality. All true instances are equal each other, plus a TrueClass
60
+ # instance is also equal to this.
61
+ def == other
62
+ case other when TrueClass, self.class
63
+ true
64
+ else
65
+ false
66
+ end
67
+ end
68
+ end
69
+
70
+ #
71
+ # Local Variables:
72
+ # mode: ruby
73
+ # coding: utf-8-unix
74
+ # indent-tabs-mode: t
75
+ # tab-width: 3
76
+ # ruby-indent-level: 3
77
+ # fill-column: 79
78
+ # default-justification: full
79
+ # End:
@@ -0,0 +1,96 @@
1
+ #! /your/favourite/path/to/ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ #
9
+ # - Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ #
12
+ # - Redistributions in binary form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in
14
+ # the documentation and/or other materials provided with the
15
+ # distribution.
16
+ #
17
+ # - Neither the name of Internet Society, IETF or IETF Trust, nor the
18
+ # names of specific contributors, may be used to endorse or promote
19
+ # products derived from this software without specific prior written
20
+ # permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
23
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ # POSSIBILITY OF SUCH DAMAGE.
33
+
34
+ # @abstract
35
+ # The Values, as described in RFC8259 section 3.
36
+ #
37
+ # > value = false / null / true / object / array / number / string
38
+ class RFC8259::Value
39
+
40
+ # Parse the AST from parser, and convert into corrsponding values.
41
+ # @param [::Array] ast the AST, generated by the parser
42
+ # @return [Value] evaluated instance
43
+ # @raise [ArgumentError] malformed input
44
+ #
45
+ # @note the input MUST be a tree. No cyclic lists, nor even direct-acycric
46
+ # graphs are allowed by definition. Such input can cause undefined
47
+ # behaviour.
48
+ def self.from_ast ast
49
+ klass = case ast.first
50
+ when :false then RFC8259::False
51
+ when :null then RFC8259::Null
52
+ when :true then RFC8259::True
53
+ when :object then RFC8259::Object
54
+ when :array then RFC8259::Array
55
+ when :number then RFC8259::Number
56
+ when :string then RFC8259::String
57
+ end
58
+ return klass.from_ast ast
59
+ end
60
+
61
+ # convert to Ruby's value
62
+ def plain_old_ruby_object
63
+ raise RuntimeError, 'abstruct method called'
64
+ end
65
+
66
+ # @return [::String] the class in string
67
+ def inspect
68
+ sprintf "#<%p>", self.class
69
+ end
70
+
71
+ # For pretty print (require 'pp' beforehand)
72
+ # @param [PP] pp the pp
73
+ def pretty_print pp
74
+ pp.object_group self do end # nothing
75
+ end
76
+ end
77
+
78
+ # *WARN* order matters, do not move below.
79
+ require_relative 'false'
80
+ require_relative 'null'
81
+ require_relative 'true'
82
+ require_relative 'object'
83
+ require_relative 'array'
84
+ require_relative 'number'
85
+ require_relative 'string'
86
+
87
+ #
88
+ # Local Variables:
89
+ # mode: ruby
90
+ # coding: utf-8-unix
91
+ # indent-tabs-mode: t
92
+ # tab-width: 3
93
+ # ruby-indent-level: 3
94
+ # fill-column: 79
95
+ # default-justification: full
96
+ # End:
@@ -0,0 +1,46 @@
1
+ #! /your/favourite/path/to/ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright (c) 2014 Urabe, Shyouhei. All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ #
9
+ # - Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ #
12
+ # - Redistributions in binary form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in
14
+ # the documentation and/or other materials provided with the
15
+ # distribution.
16
+ #
17
+ # - Neither the name of Internet Society, IETF or IETF Trust, nor the
18
+ # names of specific contributors, may be used to endorse or promote
19
+ # products derived from this software without specific prior written
20
+ # permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
23
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ # POSSIBILITY OF SUCH DAMAGE.
33
+
34
+ # The version
35
+ RFC8259::VERSION = 8259
36
+
37
+ #
38
+ # Local Variables:
39
+ # mode: ruby
40
+ # coding: utf-8-unix
41
+ # indent-tabs-mode: t
42
+ # tab-width: 3
43
+ # ruby-indent-level: 3
44
+ # fill-column: 79
45
+ # default-justification: full
46
+ # End: