oj 2.18.5 → 3.16.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +1452 -0
  3. data/README.md +53 -221
  4. data/RELEASE_NOTES.md +61 -0
  5. data/ext/oj/buf.h +54 -72
  6. data/ext/oj/cache.c +329 -0
  7. data/ext/oj/cache.h +22 -0
  8. data/ext/oj/cache8.c +61 -63
  9. data/ext/oj/cache8.h +12 -39
  10. data/ext/oj/circarray.c +38 -67
  11. data/ext/oj/circarray.h +16 -42
  12. data/ext/oj/code.c +214 -0
  13. data/ext/oj/code.h +40 -0
  14. data/ext/oj/compat.c +194 -110
  15. data/ext/oj/custom.c +1074 -0
  16. data/ext/oj/debug.c +126 -0
  17. data/ext/oj/dump.c +1276 -2494
  18. data/ext/oj/dump.h +110 -0
  19. data/ext/oj/dump_compat.c +897 -0
  20. data/ext/oj/dump_leaf.c +162 -0
  21. data/ext/oj/dump_object.c +710 -0
  22. data/ext/oj/dump_strict.c +399 -0
  23. data/ext/oj/encode.h +7 -42
  24. data/ext/oj/encoder.c +43 -0
  25. data/ext/oj/err.c +28 -53
  26. data/ext/oj/err.h +49 -46
  27. data/ext/oj/extconf.rb +33 -32
  28. data/ext/oj/fast.c +1082 -1098
  29. data/ext/oj/intern.c +313 -0
  30. data/ext/oj/intern.h +22 -0
  31. data/ext/oj/mem.c +318 -0
  32. data/ext/oj/mem.h +53 -0
  33. data/ext/oj/mimic_json.c +919 -0
  34. data/ext/oj/object.c +545 -625
  35. data/ext/oj/odd.c +158 -168
  36. data/ext/oj/odd.h +32 -58
  37. data/ext/oj/oj.c +1727 -2080
  38. data/ext/oj/oj.h +334 -259
  39. data/ext/oj/parse.c +974 -753
  40. data/ext/oj/parse.h +97 -90
  41. data/ext/oj/parser.c +1600 -0
  42. data/ext/oj/parser.h +103 -0
  43. data/ext/oj/rails.c +1478 -0
  44. data/ext/oj/rails.h +18 -0
  45. data/ext/oj/reader.c +136 -163
  46. data/ext/oj/reader.h +76 -112
  47. data/ext/oj/resolve.c +45 -94
  48. data/ext/oj/resolve.h +7 -34
  49. data/ext/oj/rxclass.c +144 -0
  50. data/ext/oj/rxclass.h +26 -0
  51. data/ext/oj/saj.c +445 -511
  52. data/ext/oj/saj2.c +584 -0
  53. data/ext/oj/saj2.h +23 -0
  54. data/ext/oj/scp.c +82 -143
  55. data/ext/oj/simd.h +10 -0
  56. data/ext/oj/sparse.c +749 -644
  57. data/ext/oj/stream_writer.c +329 -0
  58. data/ext/oj/strict.c +114 -112
  59. data/ext/oj/string_writer.c +517 -0
  60. data/ext/oj/trace.c +72 -0
  61. data/ext/oj/trace.h +55 -0
  62. data/ext/oj/usual.c +1218 -0
  63. data/ext/oj/usual.h +69 -0
  64. data/ext/oj/util.c +136 -0
  65. data/ext/oj/util.h +20 -0
  66. data/ext/oj/val_stack.c +75 -72
  67. data/ext/oj/val_stack.h +94 -127
  68. data/ext/oj/validate.c +46 -0
  69. data/ext/oj/wab.c +586 -0
  70. data/lib/oj/active_support_helper.rb +1 -3
  71. data/lib/oj/bag.rb +8 -1
  72. data/lib/oj/easy_hash.rb +21 -13
  73. data/lib/oj/error.rb +10 -12
  74. data/lib/oj/json.rb +188 -0
  75. data/lib/oj/mimic.rb +165 -26
  76. data/lib/oj/saj.rb +20 -6
  77. data/lib/oj/schandler.rb +5 -4
  78. data/lib/oj/state.rb +135 -0
  79. data/lib/oj/version.rb +2 -3
  80. data/lib/oj.rb +3 -31
  81. data/pages/Advanced.md +22 -0
  82. data/pages/Compatibility.md +25 -0
  83. data/pages/Custom.md +23 -0
  84. data/pages/Encoding.md +65 -0
  85. data/pages/InstallOptions.md +20 -0
  86. data/pages/JsonGem.md +94 -0
  87. data/pages/Modes.md +161 -0
  88. data/pages/Options.md +337 -0
  89. data/pages/Parser.md +309 -0
  90. data/pages/Rails.md +167 -0
  91. data/pages/Security.md +20 -0
  92. data/pages/WAB.md +13 -0
  93. metadata +126 -163
  94. data/ext/oj/hash.c +0 -163
  95. data/ext/oj/hash.h +0 -46
  96. data/ext/oj/hash_test.c +0 -512
  97. data/test/_test_active.rb +0 -76
  98. data/test/_test_active_mimic.rb +0 -96
  99. data/test/_test_mimic_rails.rb +0 -126
  100. data/test/activesupport_datetime_test.rb +0 -23
  101. data/test/bug.rb +0 -51
  102. data/test/bug2.rb +0 -10
  103. data/test/bug3.rb +0 -46
  104. data/test/bug_fast.rb +0 -32
  105. data/test/bug_load.rb +0 -24
  106. data/test/crash.rb +0 -111
  107. data/test/curl/curl_oj.rb +0 -46
  108. data/test/curl/get_oj.rb +0 -24
  109. data/test/curl/just_curl.rb +0 -31
  110. data/test/curl/just_oj.rb +0 -51
  111. data/test/example.rb +0 -11
  112. data/test/files.rb +0 -29
  113. data/test/foo.rb +0 -24
  114. data/test/helper.rb +0 -27
  115. data/test/io.rb +0 -48
  116. data/test/isolated/shared.rb +0 -310
  117. data/test/isolated/test_mimic_after.rb +0 -13
  118. data/test/isolated/test_mimic_alone.rb +0 -12
  119. data/test/isolated/test_mimic_as_json.rb +0 -45
  120. data/test/isolated/test_mimic_before.rb +0 -13
  121. data/test/isolated/test_mimic_define.rb +0 -28
  122. data/test/isolated/test_mimic_rails_after.rb +0 -22
  123. data/test/isolated/test_mimic_rails_before.rb +0 -21
  124. data/test/isolated/test_mimic_rails_datetime.rb +0 -27
  125. data/test/isolated/test_mimic_redefine.rb +0 -15
  126. data/test/mod.rb +0 -16
  127. data/test/perf.rb +0 -107
  128. data/test/perf_compat.rb +0 -128
  129. data/test/perf_fast.rb +0 -164
  130. data/test/perf_file.rb +0 -64
  131. data/test/perf_object.rb +0 -138
  132. data/test/perf_saj.rb +0 -109
  133. data/test/perf_scp.rb +0 -151
  134. data/test/perf_simple.rb +0 -287
  135. data/test/perf_strict.rb +0 -128
  136. data/test/rails.rb +0 -50
  137. data/test/russian.rb +0 -18
  138. data/test/sample/change.rb +0 -14
  139. data/test/sample/dir.rb +0 -19
  140. data/test/sample/doc.rb +0 -36
  141. data/test/sample/file.rb +0 -48
  142. data/test/sample/group.rb +0 -16
  143. data/test/sample/hasprops.rb +0 -16
  144. data/test/sample/layer.rb +0 -12
  145. data/test/sample/line.rb +0 -20
  146. data/test/sample/oval.rb +0 -10
  147. data/test/sample/rect.rb +0 -10
  148. data/test/sample/shape.rb +0 -35
  149. data/test/sample/text.rb +0 -20
  150. data/test/sample.rb +0 -55
  151. data/test/sample_json.rb +0 -37
  152. data/test/struct.rb +0 -29
  153. data/test/test_compat.rb +0 -398
  154. data/test/test_debian.rb +0 -53
  155. data/test/test_fast.rb +0 -458
  156. data/test/test_file.rb +0 -245
  157. data/test/test_gc.rb +0 -49
  158. data/test/test_hash.rb +0 -29
  159. data/test/test_object.rb +0 -745
  160. data/test/test_saj.rb +0 -186
  161. data/test/test_scp.rb +0 -396
  162. data/test/test_serializer.rb +0 -59
  163. data/test/test_strict.rb +0 -254
  164. data/test/test_various.rb +0 -1383
  165. data/test/test_writer.rb +0 -308
  166. data/test/write_timebars.rb +0 -31
data/ext/oj/parse.c CHANGED
@@ -1,790 +1,964 @@
1
- /* parse.c
2
- * Copyright (c) 2013, Peter Ohler
3
- * All rights reserved.
4
- *
5
- * Redistribution and use in source and binary forms, with or without
6
- * modification, are permitted provided that the following conditions are met:
7
- *
8
- * - Redistributions of source code must retain the above copyright notice, this
9
- * list of conditions and the following disclaimer.
10
- *
11
- * - Redistributions in binary form must reproduce the above copyright notice,
12
- * this list of conditions and the following disclaimer in the documentation
13
- * and/or other materials provided with the distribution.
14
- *
15
- * - Neither the name of Peter Ohler nor the names of its contributors may be
16
- * used to endorse or promote products derived from this software without
17
- * specific prior written permission.
18
- *
19
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- */
1
+ // Copyright (c) 2013 Peter Ohler. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file in the project root for license details.
30
3
 
31
- #include <stdlib.h>
4
+ #include "parse.h"
5
+
6
+ #include <math.h>
7
+ #include <ruby/util.h>
32
8
  #include <stdio.h>
9
+ #include <stdlib.h>
33
10
  #include <string.h>
34
11
  #include <unistd.h>
35
- #include <math.h>
36
12
 
37
- #include "oj.h"
38
- #include "parse.h"
39
13
  #include "buf.h"
14
+ #include "encode.h"
15
+ #include "mem.h"
16
+ #include "oj.h"
17
+ #include "rxclass.h"
40
18
  #include "val_stack.h"
41
19
 
20
+ #ifdef OJ_USE_SSE4_2
21
+ #include <nmmintrin.h>
22
+ #endif
23
+
42
24
  // Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
43
- #define OJ_INFINITY (1.0/0.0)
25
+ #define OJ_INFINITY (1.0 / 0.0)
44
26
 
45
- //#define EXP_MAX 1023
46
- #define EXP_MAX 100000
47
- #define DEC_MAX 15
27
+ // #define EXP_MAX 1023
28
+ #define EXP_MAX 100000
29
+ #define DEC_MAX 15
48
30
 
49
- static void
50
- next_non_white(ParseInfo pi) {
31
+ static void next_non_white(ParseInfo pi) {
51
32
  for (; 1; pi->cur++) {
52
- switch(*pi->cur) {
53
- case ' ':
54
- case '\t':
55
- case '\f':
56
- case '\n':
57
- case '\r':
58
- break;
59
- default:
60
- return;
61
- }
33
+ switch (*pi->cur) {
34
+ case ' ':
35
+ case '\t':
36
+ case '\f':
37
+ case '\n':
38
+ case '\r': break;
39
+ default: return;
40
+ }
62
41
  }
63
42
  }
64
43
 
65
- static void
66
- skip_comment(ParseInfo pi) {
44
+ static void skip_comment(ParseInfo pi) {
67
45
  if ('*' == *pi->cur) {
68
- pi->cur++;
69
- for (; pi->cur < pi->end; pi->cur++) {
70
- if ('*' == *pi->cur && '/' == *(pi->cur + 1)) {
71
- pi->cur += 2;
72
- return;
73
- } else if (pi->end <= pi->cur) {
74
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "comment not terminated");
75
- return;
76
- }
77
- }
46
+ pi->cur++;
47
+ for (; pi->cur < pi->end; pi->cur++) {
48
+ if ('*' == *pi->cur && '/' == *(pi->cur + 1)) {
49
+ pi->cur += 2;
50
+ return;
51
+ } else if (pi->end <= pi->cur) {
52
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "comment not terminated");
53
+ return;
54
+ }
55
+ }
78
56
  } else if ('/' == *pi->cur) {
79
- for (; 1; pi->cur++) {
80
- switch (*pi->cur) {
81
- case '\n':
82
- case '\r':
83
- case '\f':
84
- case '\0':
85
- return;
86
- default:
87
- break;
88
- }
89
- }
57
+ for (; 1; pi->cur++) {
58
+ switch (*pi->cur) {
59
+ case '\n':
60
+ case '\r':
61
+ case '\f':
62
+ case '\0': return;
63
+ default: break;
64
+ }
65
+ }
90
66
  } else {
91
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid comment format");
67
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid comment format");
92
68
  }
93
69
  }
94
70
 
95
- static void
96
- add_value(ParseInfo pi, VALUE rval) {
97
- Val parent = stack_peek(&pi->stack);
71
+ static void add_value(ParseInfo pi, VALUE rval) {
72
+ Val parent = stack_peek(&pi->stack);
98
73
 
99
- if (0 == parent) { // simple add
100
- pi->add_value(pi, rval);
74
+ if (0 == parent) { // simple add
75
+ pi->add_value(pi, rval);
101
76
  } else {
102
- switch (parent->next) {
103
- case NEXT_ARRAY_NEW:
104
- case NEXT_ARRAY_ELEMENT:
105
- pi->array_append_value(pi, rval);
106
- parent->next = NEXT_ARRAY_COMMA;
107
- break;
108
- case NEXT_HASH_VALUE:
109
- pi->hash_set_value(pi, parent, rval);
110
- if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
111
- xfree((char*)parent->key);
112
- parent->key = 0;
113
- }
114
- parent->next = NEXT_HASH_COMMA;
115
- break;
116
- case NEXT_HASH_NEW:
117
- case NEXT_HASH_KEY:
118
- case NEXT_HASH_COMMA:
119
- case NEXT_NONE:
120
- case NEXT_ARRAY_COMMA:
121
- case NEXT_HASH_COLON:
122
- default:
123
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected %s", oj_stack_next_string(parent->next));
124
- break;
125
- }
77
+ switch (parent->next) {
78
+ case NEXT_ARRAY_NEW:
79
+ case NEXT_ARRAY_ELEMENT:
80
+ pi->array_append_value(pi, rval);
81
+ parent->next = NEXT_ARRAY_COMMA;
82
+ break;
83
+ case NEXT_HASH_VALUE:
84
+ pi->hash_set_value(pi, parent, rval);
85
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
86
+ OJ_R_FREE((char *)parent->key);
87
+ parent->key = 0;
88
+ }
89
+ parent->next = NEXT_HASH_COMMA;
90
+ break;
91
+ case NEXT_HASH_NEW:
92
+ case NEXT_HASH_KEY:
93
+ case NEXT_HASH_COMMA:
94
+ case NEXT_NONE:
95
+ case NEXT_ARRAY_COMMA:
96
+ case NEXT_HASH_COLON:
97
+ default:
98
+ oj_set_error_at(pi,
99
+ oj_parse_error_class,
100
+ __FILE__,
101
+ __LINE__,
102
+ "expected %s",
103
+ oj_stack_next_string(parent->next));
104
+ break;
105
+ }
126
106
  }
127
107
  }
128
108
 
129
- static void
130
- read_null(ParseInfo pi) {
109
+ static void read_null(ParseInfo pi) {
131
110
  if ('u' == *pi->cur++ && 'l' == *pi->cur++ && 'l' == *pi->cur++) {
132
- add_value(pi, Qnil);
111
+ add_value(pi, Qnil);
133
112
  } else {
134
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected null");
113
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected null");
135
114
  }
136
115
  }
137
116
 
138
- static void
139
- read_true(ParseInfo pi) {
117
+ static void read_true(ParseInfo pi) {
140
118
  if ('r' == *pi->cur++ && 'u' == *pi->cur++ && 'e' == *pi->cur++) {
141
- add_value(pi, Qtrue);
119
+ add_value(pi, Qtrue);
142
120
  } else {
143
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected true");
121
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected true");
144
122
  }
145
123
  }
146
124
 
147
- static void
148
- read_false(ParseInfo pi) {
125
+ static void read_false(ParseInfo pi) {
149
126
  if ('a' == *pi->cur++ && 'l' == *pi->cur++ && 's' == *pi->cur++ && 'e' == *pi->cur++) {
150
- add_value(pi, Qfalse);
127
+ add_value(pi, Qfalse);
151
128
  } else {
152
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected false");
129
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected false");
153
130
  }
154
131
  }
155
132
 
156
- static uint32_t
157
- read_hex(ParseInfo pi, const char *h) {
158
- uint32_t b = 0;
159
- int i;
133
+ static uint32_t read_hex(ParseInfo pi, const char *h) {
134
+ uint32_t b = 0;
135
+ int i;
160
136
 
161
137
  for (i = 0; i < 4; i++, h++) {
162
- b = b << 4;
163
- if ('0' <= *h && *h <= '9') {
164
- b += *h - '0';
165
- } else if ('A' <= *h && *h <= 'F') {
166
- b += *h - 'A' + 10;
167
- } else if ('a' <= *h && *h <= 'f') {
168
- b += *h - 'a' + 10;
169
- } else {
170
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid hex character");
171
- return 0;
172
- }
138
+ b = b << 4;
139
+ if ('0' <= *h && *h <= '9') {
140
+ b += *h - '0';
141
+ } else if ('A' <= *h && *h <= 'F') {
142
+ b += *h - 'A' + 10;
143
+ } else if ('a' <= *h && *h <= 'f') {
144
+ b += *h - 'a' + 10;
145
+ } else {
146
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid hex character");
147
+ return 0;
148
+ }
173
149
  }
174
150
  return b;
175
151
  }
176
152
 
177
- static void
178
- unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
153
+ static void unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
179
154
  if (0x0000007F >= code) {
180
- buf_append(buf, (char)code);
155
+ buf_append(buf, (char)code);
181
156
  } else if (0x000007FF >= code) {
182
- buf_append(buf, 0xC0 | (code >> 6));
183
- buf_append(buf, 0x80 | (0x3F & code));
157
+ buf_append(buf, 0xC0 | (code >> 6));
158
+ buf_append(buf, 0x80 | (0x3F & code));
184
159
  } else if (0x0000FFFF >= code) {
185
- buf_append(buf, 0xE0 | (code >> 12));
186
- buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
187
- buf_append(buf, 0x80 | (0x3F & code));
160
+ buf_append(buf, 0xE0 | (code >> 12));
161
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
162
+ buf_append(buf, 0x80 | (0x3F & code));
188
163
  } else if (0x001FFFFF >= code) {
189
- buf_append(buf, 0xF0 | (code >> 18));
190
- buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
191
- buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
192
- buf_append(buf, 0x80 | (0x3F & code));
164
+ buf_append(buf, 0xF0 | (code >> 18));
165
+ buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
166
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
167
+ buf_append(buf, 0x80 | (0x3F & code));
193
168
  } else if (0x03FFFFFF >= code) {
194
- buf_append(buf, 0xF8 | (code >> 24));
195
- buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
196
- buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
197
- buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
198
- buf_append(buf, 0x80 | (0x3F & code));
169
+ buf_append(buf, 0xF8 | (code >> 24));
170
+ buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
171
+ buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
172
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
173
+ buf_append(buf, 0x80 | (0x3F & code));
199
174
  } else if (0x7FFFFFFF >= code) {
200
- buf_append(buf, 0xFC | (code >> 30));
201
- buf_append(buf, 0x80 | ((code >> 24) & 0x3F));
202
- buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
203
- buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
204
- buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
205
- buf_append(buf, 0x80 | (0x3F & code));
175
+ buf_append(buf, 0xFC | (code >> 30));
176
+ buf_append(buf, 0x80 | ((code >> 24) & 0x3F));
177
+ buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
178
+ buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
179
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
180
+ buf_append(buf, 0x80 | (0x3F & code));
206
181
  } else {
207
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid Unicode character");
182
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid Unicode character");
183
+ }
184
+ }
185
+
186
+ static const unsigned char end_of_scan_string[] = {
187
+ // Filled 1 at the positions of '\0', '\\', and '"'
188
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
189
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
195
+ };
196
+ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
197
+ for (; str < end; str++) {
198
+ if (end_of_scan_string[(unsigned char)*str]) {
199
+ break;
200
+ }
201
+ }
202
+ return str;
203
+ }
204
+
205
+ #ifdef OJ_USE_SSE4_2
206
+ static inline const char *scan_string_SIMD(const char *str, const char *end) {
207
+ static const char chars[16] = "\x00\\\"";
208
+ const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
209
+ const char *_end = (const char *)(end - 16);
210
+
211
+ for (; str <= _end; str += 16) {
212
+ const __m128i string = _mm_loadu_si128((const __m128i *)str);
213
+ const int r = _mm_cmpestri(terminate,
214
+ 3,
215
+ string,
216
+ 16,
217
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
218
+ if (r != 16) {
219
+ str = (char *)(str + r);
220
+ return str;
221
+ }
208
222
  }
223
+
224
+ return scan_string_noSIMD(str, end);
225
+ }
226
+ #endif
227
+
228
+ static const char *(*scan_func)(const char *str, const char *end) = scan_string_noSIMD;
229
+
230
+ void oj_scanner_init(void) {
231
+ #ifdef OJ_USE_SSE4_2
232
+ scan_func = scan_string_SIMD;
233
+ #endif
209
234
  }
210
235
 
211
236
  // entered at /
212
- static void
213
- read_escaped_str(ParseInfo pi, const char *start) {
214
- struct _Buf buf;
215
- const char *s;
216
- int cnt = (int)(pi->cur - start);
217
- uint32_t code;
218
- Val parent = stack_peek(&pi->stack);
237
+ static void read_escaped_str(ParseInfo pi, const char *start) {
238
+ struct _buf buf;
239
+ const char *s;
240
+ int cnt = (int)(pi->cur - start);
241
+ uint32_t code;
242
+ Val parent = stack_peek(&pi->stack);
219
243
 
220
244
  buf_init(&buf);
221
- if (0 < cnt) {
222
- buf_append_string(&buf, start, cnt);
223
- }
224
- for (s = pi->cur; '"' != *s; s++) {
225
- if (s >= pi->end) {
226
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
227
- buf_cleanup(&buf);
228
- return;
229
- } else if ('\\' == *s) {
230
- s++;
231
- switch (*s) {
232
- case 'n': buf_append(&buf, '\n'); break;
233
- case 'r': buf_append(&buf, '\r'); break;
234
- case 't': buf_append(&buf, '\t'); break;
235
- case 'f': buf_append(&buf, '\f'); break;
236
- case 'b': buf_append(&buf, '\b'); break;
237
- case '"': buf_append(&buf, '"'); break;
238
- case '/': buf_append(&buf, '/'); break;
239
- case '\\': buf_append(&buf, '\\'); break;
240
- case 'u':
241
- s++;
242
- if (0 == (code = read_hex(pi, s)) && err_has(&pi->err)) {
243
- buf_cleanup(&buf);
244
- return;
245
- }
246
- s += 3;
247
- if (0x0000D800 <= code && code <= 0x0000DFFF) {
248
- uint32_t c1 = (code - 0x0000D800) & 0x000003FF;
249
- uint32_t c2;
250
-
251
- s++;
252
- if ('\\' != *s || 'u' != *(s + 1)) {
253
- if (Yes == pi->options.allow_invalid) {
254
- s--;
255
- unicode_to_chars(pi, &buf, code);
256
- break;
257
- }
258
- pi->cur = s;
259
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
260
- buf_cleanup(&buf);
261
- return;
262
- }
263
- s += 2;
264
- if (0 == (c2 = read_hex(pi, s)) && err_has(&pi->err)) {
265
- buf_cleanup(&buf);
266
- return;
267
- }
268
- s += 3;
269
- c2 = (c2 - 0x0000DC00) & 0x000003FF;
270
- code = ((c1 << 10) | c2) + 0x00010000;
271
- }
272
- unicode_to_chars(pi, &buf, code);
273
- if (err_has(&pi->err)) {
274
- buf_cleanup(&buf);
275
- return;
276
- }
277
- break;
278
- default:
279
- pi->cur = s;
280
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
281
- buf_cleanup(&buf);
282
- return;
283
- }
284
- } else {
285
- buf_append(&buf, *s);
286
- }
245
+ buf_append_string(&buf, start, cnt);
246
+
247
+ for (s = pi->cur; '"' != *s;) {
248
+ const char *scanned = scan_func(s, pi->end);
249
+ if (scanned >= pi->end || '\0' == *scanned) {
250
+ // if (scanned >= pi->end) {
251
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
252
+ buf_cleanup(&buf);
253
+ return;
254
+ }
255
+ buf_append_string(&buf, s, (size_t)(scanned - s));
256
+ s = scanned;
257
+
258
+ if ('\\' == *s) {
259
+ s++;
260
+ switch (*s) {
261
+ case 'n': buf_append(&buf, '\n'); break;
262
+ case 'r': buf_append(&buf, '\r'); break;
263
+ case 't': buf_append(&buf, '\t'); break;
264
+ case 'f': buf_append(&buf, '\f'); break;
265
+ case 'b': buf_append(&buf, '\b'); break;
266
+ case '"': buf_append(&buf, '"'); break;
267
+ case '/': buf_append(&buf, '/'); break;
268
+ case '\\': buf_append(&buf, '\\'); break;
269
+ case 'u':
270
+ s++;
271
+ if (0 == (code = read_hex(pi, s)) && err_has(&pi->err)) {
272
+ buf_cleanup(&buf);
273
+ return;
274
+ }
275
+ s += 3;
276
+ if (0x0000D800 <= code && code <= 0x0000DFFF) {
277
+ uint32_t c1 = (code - 0x0000D800) & 0x000003FF;
278
+ uint32_t c2;
279
+
280
+ s++;
281
+ if ('\\' != *s || 'u' != *(s + 1)) {
282
+ if (Yes == pi->options.allow_invalid) {
283
+ s--;
284
+ unicode_to_chars(pi, &buf, code);
285
+ break;
286
+ }
287
+ pi->cur = s;
288
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
289
+ buf_cleanup(&buf);
290
+ return;
291
+ }
292
+ s += 2;
293
+ if (0 == (c2 = read_hex(pi, s)) && err_has(&pi->err)) {
294
+ buf_cleanup(&buf);
295
+ return;
296
+ }
297
+ s += 3;
298
+ c2 = (c2 - 0x0000DC00) & 0x000003FF;
299
+ code = ((c1 << 10) | c2) + 0x00010000;
300
+ }
301
+ unicode_to_chars(pi, &buf, code);
302
+ if (err_has(&pi->err)) {
303
+ buf_cleanup(&buf);
304
+ return;
305
+ }
306
+ break;
307
+ default:
308
+ // The json gem claims this is not an error despite the
309
+ // ECMA-404 indicating it is not valid.
310
+ if (CompatMode == pi->options.mode) {
311
+ buf_append(&buf, *s);
312
+ break;
313
+ }
314
+ pi->cur = s;
315
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
316
+ buf_cleanup(&buf);
317
+ return;
318
+ }
319
+ s++;
320
+ }
287
321
  }
288
322
  if (0 == parent) {
289
- pi->add_cstr(pi, buf.head, buf_len(&buf), start);
323
+ pi->add_cstr(pi, buf.head, buf_len(&buf), start);
290
324
  } else {
291
- switch (parent->next) {
292
- case NEXT_ARRAY_NEW:
293
- case NEXT_ARRAY_ELEMENT:
294
- pi->array_append_cstr(pi, buf.head, buf_len(&buf), start);
295
- parent->next = NEXT_ARRAY_COMMA;
296
- break;
297
- case NEXT_HASH_NEW:
298
- case NEXT_HASH_KEY:
299
- if (Qundef == (parent->key_val = pi->hash_key(pi, buf.head, buf_len(&buf)))) {
300
- parent->key = strdup(buf.head);
301
- parent->klen = buf_len(&buf);
302
- } else {
303
- parent->key = "";
304
- parent->klen = 0;
305
- }
306
- parent->k1 = *start;
307
- parent->next = NEXT_HASH_COLON;
308
- break;
309
- case NEXT_HASH_VALUE:
310
- pi->hash_set_cstr(pi, parent, buf.head, buf_len(&buf), start);
311
- if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
312
- xfree((char*)parent->key);
313
- parent->key = 0;
314
- }
315
- parent->next = NEXT_HASH_COMMA;
316
- break;
317
- case NEXT_HASH_COMMA:
318
- case NEXT_NONE:
319
- case NEXT_ARRAY_COMMA:
320
- case NEXT_HASH_COLON:
321
- default:
322
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected %s, not a string", oj_stack_next_string(parent->next));
323
- break;
324
- }
325
+ switch (parent->next) {
326
+ case NEXT_ARRAY_NEW:
327
+ case NEXT_ARRAY_ELEMENT:
328
+ pi->array_append_cstr(pi, buf.head, buf_len(&buf), start);
329
+ parent->next = NEXT_ARRAY_COMMA;
330
+ break;
331
+ case NEXT_HASH_NEW:
332
+ case NEXT_HASH_KEY:
333
+ if (Qundef == (parent->key_val = pi->hash_key(pi, buf.head, buf_len(&buf)))) {
334
+ parent->klen = buf_len(&buf);
335
+ parent->key = OJ_MALLOC(parent->klen + 1);
336
+ memcpy((char *)parent->key, buf.head, parent->klen);
337
+ *(char *)(parent->key + parent->klen) = '\0';
338
+ } else {
339
+ parent->key = "";
340
+ parent->klen = 0;
341
+ }
342
+ parent->k1 = *start;
343
+ parent->next = NEXT_HASH_COLON;
344
+ break;
345
+ case NEXT_HASH_VALUE:
346
+ pi->hash_set_cstr(pi, parent, buf.head, buf_len(&buf), start);
347
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
348
+ OJ_R_FREE((char *)parent->key);
349
+ parent->key = 0;
350
+ }
351
+ parent->next = NEXT_HASH_COMMA;
352
+ break;
353
+ case NEXT_HASH_COMMA:
354
+ case NEXT_NONE:
355
+ case NEXT_ARRAY_COMMA:
356
+ case NEXT_HASH_COLON:
357
+ default:
358
+ oj_set_error_at(pi,
359
+ oj_parse_error_class,
360
+ __FILE__,
361
+ __LINE__,
362
+ "expected %s, not a string",
363
+ oj_stack_next_string(parent->next));
364
+ break;
365
+ }
325
366
  }
326
367
  pi->cur = s + 1;
327
368
  buf_cleanup(&buf);
328
369
  }
329
370
 
330
- static void
331
- read_str(ParseInfo pi) {
332
- const char *str = pi->cur;
333
- Val parent = stack_peek(&pi->stack);
334
-
335
- for (; '"' != *pi->cur; pi->cur++) {
336
- if (pi->end <= pi->cur) {
337
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
338
- return;
339
- } else if ('\0' == *pi->cur) {
340
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "NULL byte in string");
341
- return;
342
- } else if ('\\' == *pi->cur) {
343
- read_escaped_str(pi, str);
344
- return;
345
- }
346
- }
347
- if (0 == parent) { // simple add
348
- pi->add_cstr(pi, str, pi->cur - str, str);
371
+ static void read_str(ParseInfo pi) {
372
+ const char *str = pi->cur;
373
+ Val parent = stack_peek(&pi->stack);
374
+
375
+ pi->cur = scan_func(pi->cur, pi->end);
376
+ if (RB_UNLIKELY(pi->end <= pi->cur)) {
377
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
378
+ return;
379
+ }
380
+ if (RB_UNLIKELY('\0' == *pi->cur)) {
381
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "NULL byte in string");
382
+ return;
383
+ }
384
+ if ('\\' == *pi->cur) {
385
+ read_escaped_str(pi, str);
386
+ return;
387
+ }
388
+
389
+ if (0 == parent) { // simple add
390
+ pi->add_cstr(pi, str, pi->cur - str, str);
349
391
  } else {
350
- switch (parent->next) {
351
- case NEXT_ARRAY_NEW:
352
- case NEXT_ARRAY_ELEMENT:
353
- pi->array_append_cstr(pi, str, pi->cur - str, str);
354
- parent->next = NEXT_ARRAY_COMMA;
355
- break;
356
- case NEXT_HASH_NEW:
357
- case NEXT_HASH_KEY:
358
- if (Qundef == (parent->key_val = pi->hash_key(pi, str, pi->cur - str))) {
359
- parent->key = str;
360
- parent->klen = pi->cur - str;
361
- } else {
362
- parent->key = "";
363
- parent->klen = 0;
364
- }
365
- parent->k1 = *str;
366
- parent->next = NEXT_HASH_COLON;
367
- break;
368
- case NEXT_HASH_VALUE:
369
- pi->hash_set_cstr(pi, parent, str, pi->cur - str, str);
370
- if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
371
- xfree((char*)parent->key);
372
- parent->key = 0;
373
- }
374
- parent->next = NEXT_HASH_COMMA;
375
- break;
376
- case NEXT_HASH_COMMA:
377
- case NEXT_NONE:
378
- case NEXT_ARRAY_COMMA:
379
- case NEXT_HASH_COLON:
380
- default:
381
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected %s, not a string", oj_stack_next_string(parent->next));
382
- break;
383
- }
384
- }
385
- pi->cur++; // move past "
392
+ switch (parent->next) {
393
+ case NEXT_ARRAY_NEW:
394
+ case NEXT_ARRAY_ELEMENT:
395
+ pi->array_append_cstr(pi, str, pi->cur - str, str);
396
+ parent->next = NEXT_ARRAY_COMMA;
397
+ break;
398
+ case NEXT_HASH_NEW:
399
+ case NEXT_HASH_KEY:
400
+ if (Qundef == (parent->key_val = pi->hash_key(pi, str, pi->cur - str))) {
401
+ parent->key = str;
402
+ parent->klen = pi->cur - str;
403
+ } else {
404
+ parent->key = "";
405
+ parent->klen = 0;
406
+ }
407
+ parent->k1 = *str;
408
+ parent->next = NEXT_HASH_COLON;
409
+ break;
410
+ case NEXT_HASH_VALUE:
411
+ pi->hash_set_cstr(pi, parent, str, pi->cur - str, str);
412
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
413
+ OJ_R_FREE((char *)parent->key);
414
+ parent->key = 0;
415
+ }
416
+ parent->next = NEXT_HASH_COMMA;
417
+ break;
418
+ case NEXT_HASH_COMMA:
419
+ case NEXT_NONE:
420
+ case NEXT_ARRAY_COMMA:
421
+ case NEXT_HASH_COLON:
422
+ default:
423
+ oj_set_error_at(pi,
424
+ oj_parse_error_class,
425
+ __FILE__,
426
+ __LINE__,
427
+ "expected %s, not a string",
428
+ oj_stack_next_string(parent->next));
429
+ break;
430
+ }
431
+ }
432
+ pi->cur++; // move past "
386
433
  }
387
434
 
388
- static void
389
- read_num(ParseInfo pi) {
390
- struct _NumInfo ni;
391
- Val parent = stack_peek(&pi->stack);
392
-
393
- ni.str = pi->cur;
394
- ni.i = 0;
395
- ni.num = 0;
396
- ni.div = 1;
397
- ni.di = 0;
398
- ni.len = 0;
399
- ni.exp = 0;
400
- ni.big = 0;
435
+ static void read_num(ParseInfo pi) {
436
+ struct _numInfo ni;
437
+ Val parent = stack_peek(&pi->stack);
438
+
439
+ ni.pi = pi;
440
+ ni.str = pi->cur;
441
+ ni.i = 0;
442
+ ni.num = 0;
443
+ ni.div = 1;
444
+ ni.di = 0;
445
+ ni.len = 0;
446
+ ni.exp = 0;
447
+ ni.big = 0;
401
448
  ni.infinity = 0;
402
- ni.nan = 0;
403
- ni.neg = 0;
404
- ni.hasExp = 0;
405
- ni.no_big = (FloatDec == pi->options.bigdec_load);
449
+ ni.nan = 0;
450
+ ni.neg = 0;
451
+ ni.has_exp = 0;
452
+ if (CompatMode == pi->options.mode) {
453
+ ni.no_big = !pi->options.compat_bigdec;
454
+ ni.bigdec_load = pi->options.compat_bigdec;
455
+ } else {
456
+ ni.no_big = (FloatDec == pi->options.bigdec_load || FastDec == pi->options.bigdec_load ||
457
+ RubyDec == pi->options.bigdec_load);
458
+ ni.bigdec_load = pi->options.bigdec_load;
459
+ }
406
460
 
407
461
  if ('-' == *pi->cur) {
408
- pi->cur++;
409
- ni.neg = 1;
462
+ pi->cur++;
463
+ ni.neg = 1;
410
464
  } else if ('+' == *pi->cur) {
411
- pi->cur++;
465
+ if (StrictMode == pi->options.mode) {
466
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
467
+ return;
468
+ }
469
+ pi->cur++;
412
470
  }
413
471
  if ('I' == *pi->cur) {
414
- if (0 != strncmp("Infinity", pi->cur, 8)) {
415
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
416
- return;
417
- }
418
- pi->cur += 8;
419
- ni.infinity = 1;
472
+ if (No == pi->options.allow_nan || 0 != strncmp("Infinity", pi->cur, 8)) {
473
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
474
+ return;
475
+ }
476
+ pi->cur += 8;
477
+ ni.infinity = 1;
420
478
  } else if ('N' == *pi->cur || 'n' == *pi->cur) {
421
- if ('a' != pi->cur[1] || ('N' != pi->cur[2] && 'n' != pi->cur[2])) {
422
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
423
- return;
424
- }
425
- pi->cur += 3;
426
- ni.nan = 1;
479
+ if ('a' != pi->cur[1] || ('N' != pi->cur[2] && 'n' != pi->cur[2])) {
480
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
481
+ return;
482
+ }
483
+ pi->cur += 3;
484
+ ni.nan = 1;
427
485
  } else {
428
- int dec_cnt = 0;
429
-
430
- for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
431
- if (0 < ni.i) {
432
- dec_cnt++;
433
- }
434
- if (!ni.big) {
435
- int d = (*pi->cur - '0');
436
-
437
- ni.i = ni.i * 10 + d;
438
- if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
439
- ni.big = 1;
440
- }
441
- }
442
- }
443
- if ('.' == *pi->cur) {
444
- pi->cur++;
445
- for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
446
- int d = (*pi->cur - '0');
447
-
448
- if (0 < ni.num || 0 < ni.i) {
449
- dec_cnt++;
450
- }
451
- ni.num = ni.num * 10 + d;
452
- ni.div *= 10;
453
- ni.di++;
454
- if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
455
- ni.big = 1;
456
- }
457
- }
458
- }
459
- if ('e' == *pi->cur || 'E' == *pi->cur) {
460
- int eneg = 0;
461
-
462
- ni.hasExp = 1;
463
- pi->cur++;
464
- if ('-' == *pi->cur) {
465
- pi->cur++;
466
- eneg = 1;
467
- } else if ('+' == *pi->cur) {
468
- pi->cur++;
469
- }
470
- for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
471
- ni.exp = ni.exp * 10 + (*pi->cur - '0');
472
- if (EXP_MAX <= ni.exp) {
473
- ni.big = 1;
474
- }
475
- }
476
- if (eneg) {
477
- ni.exp = -ni.exp;
478
- }
479
- }
480
- ni.len = pi->cur - ni.str;
486
+ int dec_cnt = 0;
487
+ bool zero1 = false;
488
+
489
+ // Skip leading zeros.
490
+ for (; '0' == *pi->cur; pi->cur++) {
491
+ zero1 = true;
492
+ }
493
+
494
+ for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
495
+ int d = (*pi->cur - '0');
496
+
497
+ if (RB_LIKELY(0 != ni.i)) {
498
+ dec_cnt++;
499
+ }
500
+ ni.i = ni.i * 10 + d;
501
+ }
502
+ if (RB_UNLIKELY(0 != ni.i && zero1 && CompatMode == pi->options.mode)) {
503
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
504
+ return;
505
+ }
506
+ if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
507
+ ni.big = true;
508
+ }
509
+
510
+ if ('.' == *pi->cur) {
511
+ pi->cur++;
512
+ // A trailing . is not a valid decimal but if encountered allow it
513
+ // except when mimicking the JSON gem or in strict mode.
514
+ if (StrictMode == pi->options.mode || CompatMode == pi->options.mode) {
515
+ int pos = (int)(pi->cur - ni.str);
516
+
517
+ if (1 == pos || (2 == pos && ni.neg)) {
518
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
519
+ return;
520
+ }
521
+ if (*pi->cur < '0' || '9' < *pi->cur) {
522
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
523
+ return;
524
+ }
525
+ }
526
+ for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
527
+ int d = (*pi->cur - '0');
528
+
529
+ if (RB_LIKELY(0 != ni.num || 0 != ni.i)) {
530
+ dec_cnt++;
531
+ }
532
+ ni.num = ni.num * 10 + d;
533
+ ni.div *= 10;
534
+ ni.di++;
535
+ }
536
+ }
537
+ if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
538
+ if (!ni.no_big) {
539
+ ni.big = true;
540
+ }
541
+ }
542
+
543
+ if ('e' == *pi->cur || 'E' == *pi->cur) {
544
+ int eneg = 0;
545
+
546
+ ni.has_exp = 1;
547
+ pi->cur++;
548
+ if ('-' == *pi->cur) {
549
+ pi->cur++;
550
+ eneg = 1;
551
+ } else if ('+' == *pi->cur) {
552
+ pi->cur++;
553
+ }
554
+ for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
555
+ ni.exp = ni.exp * 10 + (*pi->cur - '0');
556
+ if (EXP_MAX <= ni.exp) {
557
+ ni.big = true;
558
+ }
559
+ }
560
+ if (eneg) {
561
+ ni.exp = -ni.exp;
562
+ }
563
+ }
564
+ ni.len = pi->cur - ni.str;
481
565
  }
482
566
  // Check for special reserved values for Infinity and NaN.
483
567
  if (ni.big) {
484
- if (0 == strcasecmp(INF_VAL, ni.str)) {
485
- ni.infinity = 1;
486
- } else if (0 == strcasecmp(NINF_VAL, ni.str)) {
487
- ni.infinity = 1;
488
- ni.neg = 1;
489
- } else if (0 == strcasecmp(NAN_VAL, ni.str)) {
490
- ni.nan = 1;
491
- }
492
- }
493
- if (BigDec == pi->options.bigdec_load) {
494
- ni.big = 1;
568
+ if (0 == strcasecmp(INF_VAL, ni.str)) {
569
+ ni.infinity = 1;
570
+ } else if (0 == strcasecmp(NINF_VAL, ni.str)) {
571
+ ni.infinity = 1;
572
+ ni.neg = 1;
573
+ } else if (0 == strcasecmp(NAN_VAL, ni.str)) {
574
+ ni.nan = 1;
575
+ }
576
+ }
577
+ if (CompatMode == pi->options.mode) {
578
+ if (pi->options.compat_bigdec) {
579
+ ni.big = 1;
580
+ }
581
+ } else if (BigDec == pi->options.bigdec_load) {
582
+ ni.big = 1;
495
583
  }
496
584
  if (0 == parent) {
497
- pi->add_num(pi, &ni);
585
+ pi->add_num(pi, &ni);
498
586
  } else {
499
- switch (parent->next) {
500
- case NEXT_ARRAY_NEW:
501
- case NEXT_ARRAY_ELEMENT:
502
- pi->array_append_num(pi, &ni);
503
- parent->next = NEXT_ARRAY_COMMA;
504
- break;
505
- case NEXT_HASH_VALUE:
506
- pi->hash_set_num(pi, parent, &ni);
507
- if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
508
- xfree((char*)parent->key);
509
- parent->key = 0;
510
- }
511
- parent->next = NEXT_HASH_COMMA;
512
- break;
513
- default:
514
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected %s", oj_stack_next_string(parent->next));
515
- break;
516
- }
587
+ switch (parent->next) {
588
+ case NEXT_ARRAY_NEW:
589
+ case NEXT_ARRAY_ELEMENT:
590
+ pi->array_append_num(pi, &ni);
591
+ parent->next = NEXT_ARRAY_COMMA;
592
+ break;
593
+ case NEXT_HASH_VALUE:
594
+ pi->hash_set_num(pi, parent, &ni);
595
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
596
+ OJ_R_FREE((char *)parent->key);
597
+ parent->key = 0;
598
+ }
599
+ parent->next = NEXT_HASH_COMMA;
600
+ break;
601
+ default:
602
+ oj_set_error_at(pi,
603
+ oj_parse_error_class,
604
+ __FILE__,
605
+ __LINE__,
606
+ "expected %s",
607
+ oj_stack_next_string(parent->next));
608
+ break;
609
+ }
517
610
  }
518
611
  }
519
612
 
520
- static void
521
- array_start(ParseInfo pi) {
522
- volatile VALUE v = pi->start_array(pi);
613
+ static void array_start(ParseInfo pi) {
614
+ VALUE v = pi->start_array(pi);
523
615
 
524
616
  stack_push(&pi->stack, v, NEXT_ARRAY_NEW);
525
617
  }
526
618
 
527
- static void
528
- array_end(ParseInfo pi) {
529
- Val array = stack_pop(&pi->stack);
619
+ static void array_end(ParseInfo pi) {
620
+ Val array = stack_pop(&pi->stack);
530
621
 
531
622
  if (0 == array) {
532
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected array close");
623
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected array close");
533
624
  } else if (NEXT_ARRAY_COMMA != array->next && NEXT_ARRAY_NEW != array->next) {
534
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected %s, not an array close", oj_stack_next_string(array->next));
625
+ oj_set_error_at(pi,
626
+ oj_parse_error_class,
627
+ __FILE__,
628
+ __LINE__,
629
+ "expected %s, not an array close",
630
+ oj_stack_next_string(array->next));
535
631
  } else {
536
- pi->end_array(pi);
537
- add_value(pi, array->val);
632
+ pi->end_array(pi);
633
+ add_value(pi, array->val);
538
634
  }
539
635
  }
540
636
 
541
- static void
542
- hash_start(ParseInfo pi) {
543
- volatile VALUE v = pi->start_hash(pi);
637
+ static void hash_start(ParseInfo pi) {
638
+ VALUE v = pi->start_hash(pi);
544
639
 
545
640
  stack_push(&pi->stack, v, NEXT_HASH_NEW);
546
641
  }
547
642
 
548
- static void
549
- hash_end(ParseInfo pi) {
550
- volatile Val hash = stack_peek(&pi->stack);
643
+ static void hash_end(ParseInfo pi) {
644
+ Val hash = stack_peek(&pi->stack);
551
645
 
552
646
  // leave hash on stack until just before
553
647
  if (0 == hash) {
554
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected hash close");
648
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected hash close");
555
649
  } else if (NEXT_HASH_COMMA != hash->next && NEXT_HASH_NEW != hash->next) {
556
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected %s, not a hash close", oj_stack_next_string(hash->next));
650
+ oj_set_error_at(pi,
651
+ oj_parse_error_class,
652
+ __FILE__,
653
+ __LINE__,
654
+ "expected %s, not a hash close",
655
+ oj_stack_next_string(hash->next));
557
656
  } else {
558
- pi->end_hash(pi);
559
- stack_pop(&pi->stack);
560
- add_value(pi, hash->val);
657
+ pi->end_hash(pi);
658
+ stack_pop(&pi->stack);
659
+ add_value(pi, hash->val);
561
660
  }
562
661
  }
563
662
 
564
- static void
565
- comma(ParseInfo pi) {
566
- Val parent = stack_peek(&pi->stack);
663
+ static void comma(ParseInfo pi) {
664
+ Val parent = stack_peek(&pi->stack);
567
665
 
568
666
  if (0 == parent) {
569
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
667
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
570
668
  } else if (NEXT_ARRAY_COMMA == parent->next) {
571
- parent->next = NEXT_ARRAY_ELEMENT;
669
+ parent->next = NEXT_ARRAY_ELEMENT;
572
670
  } else if (NEXT_HASH_COMMA == parent->next) {
573
- parent->next = NEXT_HASH_KEY;
671
+ parent->next = NEXT_HASH_KEY;
574
672
  } else {
575
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
673
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
576
674
  }
577
675
  }
578
676
 
579
- static void
580
- colon(ParseInfo pi) {
581
- Val parent = stack_peek(&pi->stack);
677
+ static void colon(ParseInfo pi) {
678
+ Val parent = stack_peek(&pi->stack);
582
679
 
583
680
  if (0 != parent && NEXT_HASH_COLON == parent->next) {
584
- parent->next = NEXT_HASH_VALUE;
681
+ parent->next = NEXT_HASH_VALUE;
585
682
  } else {
586
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected colon");
683
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected colon");
587
684
  }
588
685
  }
589
686
 
590
- void
591
- oj_parse2(ParseInfo pi) {
592
- int first = 1;
687
+ void oj_parse2(ParseInfo pi) {
688
+ int first = 1;
689
+ long start = 0;
593
690
 
594
691
  pi->cur = pi->json;
595
692
  err_init(&pi->err);
596
693
  while (1) {
597
- next_non_white(pi);
598
- if (!first && '\0' != *pi->cur) {
599
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected characters after the JSON document");
600
- }
601
-
602
- // if no tokens are consumed (i.e. empty string), throw a parse error
603
- // this is the behavior of JSON.parse in both Ruby and JS
604
- if (No == pi->options.empty_string && 1 == first && '\0' == *pi->cur) {
605
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
606
- }
607
-
608
- switch (*pi->cur++) {
609
- case '{':
610
- hash_start(pi);
611
- break;
612
- case '}':
613
- hash_end(pi);
614
- break;
615
- case ':':
616
- colon(pi);
617
- break;
618
- case '[':
619
- array_start(pi);
620
- break;
621
- case ']':
622
- array_end(pi);
623
- break;
624
- case ',':
625
- comma(pi);
626
- break;
627
- case '"':
628
- read_str(pi);
629
- break;
630
- case '+':
631
- case '-':
632
- case '0':
633
- case '1':
634
- case '2':
635
- case '3':
636
- case '4':
637
- case '5':
638
- case '6':
639
- case '7':
640
- case '8':
641
- case '9':
642
- case 'I':
643
- case 'N':
644
- pi->cur--;
645
- read_num(pi);
646
- break;
647
- case 't':
648
- read_true(pi);
649
- break;
650
- case 'f':
651
- read_false(pi);
652
- break;
653
- case 'n':
654
- if ('u' == *pi->cur) {
655
- read_null(pi);
656
- } else {
657
- pi->cur--;
658
- read_num(pi);
659
- }
660
- break;
661
- case '/':
662
- skip_comment(pi);
663
- if (first) {
664
- continue;
665
- }
666
- break;
667
- case '\0':
668
- pi->cur--;
669
- return;
670
- default:
671
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
672
- return;
673
- }
674
- if (err_has(&pi->err)) {
675
- return;
676
- }
677
- if (stack_empty(&pi->stack)) {
678
- if (Qundef != pi->proc) {
679
- if (Qnil == pi->proc) {
680
- rb_yield(stack_head_val(&pi->stack));
681
- } else {
682
- #if HAS_PROC_WITH_BLOCK
683
- VALUE args[1];
684
-
685
- *args = stack_head_val(&pi->stack);
686
- rb_proc_call_with_block(pi->proc, 1, args, Qnil);
687
- #else
688
- rb_raise(rb_eNotImpError,
689
- "Calling a Proc with a block not supported in this version. Use func() {|x| } syntax instead.");
690
- #endif
691
- }
692
- } else {
693
- first = 0;
694
- }
695
- }
694
+ if (RB_UNLIKELY(0 < pi->max_depth && pi->max_depth <= pi->stack.tail - pi->stack.head - 1)) {
695
+ VALUE err_clas = oj_get_json_err_class("NestingError");
696
+
697
+ oj_set_error_at(pi, err_clas, __FILE__, __LINE__, "Too deeply nested.");
698
+ pi->err_class = err_clas;
699
+ return;
700
+ }
701
+ next_non_white(pi);
702
+ if (first) {
703
+ // If no tokens are consumed (i.e. empty string), throw a parse error
704
+ // this is the behavior of JSON.parse in both Ruby and JS.
705
+ if (RB_UNLIKELY('\0' == *pi->cur && No == pi->options.empty_string)) {
706
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
707
+ }
708
+ } else {
709
+ if (RB_UNLIKELY('\0' != *pi->cur)) {
710
+ oj_set_error_at(pi,
711
+ oj_parse_error_class,
712
+ __FILE__,
713
+ __LINE__,
714
+ "unexpected characters after the JSON document");
715
+ }
716
+ }
717
+
718
+ switch (*pi->cur++) {
719
+ case '{': hash_start(pi); break;
720
+ case '}': hash_end(pi); break;
721
+ case ':': colon(pi); break;
722
+ case '[': array_start(pi); break;
723
+ case ']': array_end(pi); break;
724
+ case ',': comma(pi); break;
725
+ case '"': read_str(pi); break;
726
+ case '+':
727
+ if (CompatMode == pi->options.mode) {
728
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
729
+ return;
730
+ }
731
+ pi->cur--;
732
+ read_num(pi);
733
+ break;
734
+ case '-':
735
+ case '0':
736
+ case '1':
737
+ case '2':
738
+ case '3':
739
+ case '4':
740
+ case '5':
741
+ case '6':
742
+ case '7':
743
+ case '8':
744
+ case '9':
745
+ pi->cur--;
746
+ read_num(pi);
747
+ break;
748
+ case 'I':
749
+ case 'N':
750
+ if (Yes == pi->options.allow_nan) {
751
+ pi->cur--;
752
+ read_num(pi);
753
+ } else {
754
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
755
+ }
756
+ break;
757
+ case 't': read_true(pi); break;
758
+ case 'f': read_false(pi); break;
759
+ case 'n':
760
+ if ('u' == *pi->cur) {
761
+ read_null(pi);
762
+ } else {
763
+ pi->cur--;
764
+ read_num(pi);
765
+ }
766
+ break;
767
+ case '/':
768
+ skip_comment(pi);
769
+ if (first) {
770
+ continue;
771
+ }
772
+ break;
773
+ case '\0': pi->cur--; return;
774
+ default: oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character"); return;
775
+ }
776
+ if (RB_UNLIKELY(err_has(&pi->err))) {
777
+ return;
778
+ }
779
+ if (stack_empty(&pi->stack)) {
780
+ if (Qundef != pi->proc) {
781
+ VALUE args[3];
782
+ long len = (pi->cur - pi->json) - start;
783
+
784
+ *args = stack_head_val(&pi->stack);
785
+ args[1] = LONG2NUM(start);
786
+ args[2] = LONG2NUM(len);
787
+
788
+ if (Qnil == pi->proc) {
789
+ rb_yield_values2(3, args);
790
+ } else {
791
+ rb_proc_call_with_block(pi->proc, 3, args, Qnil);
792
+ }
793
+ } else if (!pi->has_callbacks) {
794
+ first = 0;
795
+ }
796
+ start = pi->cur - pi->json;
797
+ }
696
798
  }
697
799
  }
698
800
 
801
+ static VALUE rescue_big_decimal(VALUE str, VALUE ignore) {
802
+ rb_raise(oj_parse_error_class, "Invalid value for BigDecimal()");
803
+ return Qnil;
804
+ }
805
+
806
+ static VALUE parse_big_decimal(VALUE str) {
807
+ return rb_funcall(rb_cObject, oj_bigdecimal_id, 1, str);
808
+ }
809
+
810
+ static long double exp_plus[] = {
811
+ 1.0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12,
812
+ 1.0e13, 1.0e14, 1.0e15, 1.0e16, 1.0e17, 1.0e18, 1.0e19, 1.0e20, 1.0e21, 1.0e22, 1.0e23, 1.0e24, 1.0e25,
813
+ 1.0e26, 1.0e27, 1.0e28, 1.0e29, 1.0e30, 1.0e31, 1.0e32, 1.0e33, 1.0e34, 1.0e35, 1.0e36, 1.0e37, 1.0e38,
814
+ 1.0e39, 1.0e40, 1.0e41, 1.0e42, 1.0e43, 1.0e44, 1.0e45, 1.0e46, 1.0e47, 1.0e48, 1.0e49,
815
+ };
816
+
699
817
  VALUE
700
818
  oj_num_as_value(NumInfo ni) {
701
- volatile VALUE rnum = Qnil;
819
+ VALUE rnum = Qnil;
702
820
 
703
821
  if (ni->infinity) {
704
- if (ni->neg) {
705
- rnum = rb_float_new(-OJ_INFINITY);
706
- } else {
707
- rnum = rb_float_new(OJ_INFINITY);
708
- }
822
+ if (ni->neg) {
823
+ rnum = rb_float_new(-OJ_INFINITY);
824
+ } else {
825
+ rnum = rb_float_new(OJ_INFINITY);
826
+ }
709
827
  } else if (ni->nan) {
710
- rnum = rb_float_new(0.0/0.0);
711
- } else if (1 == ni->div && 0 == ni->exp) { // fixnum
712
- if (ni->big) {
713
- if (256 > ni->len) {
714
- char buf[256];
715
-
716
- memcpy(buf, ni->str, ni->len);
717
- buf[ni->len] = '\0';
718
- rnum = rb_cstr_to_inum(buf, 10, 0);
719
- } else {
720
- char *buf = ALLOC_N(char, ni->len + 1);
721
-
722
- memcpy(buf, ni->str, ni->len);
723
- buf[ni->len] = '\0';
724
- rnum = rb_cstr_to_inum(buf, 10, 0);
725
- xfree(buf);
726
- }
727
- } else {
728
- if (ni->neg) {
729
- rnum = rb_ll2inum(-ni->i);
730
- } else {
731
- rnum = rb_ll2inum(ni->i);
732
- }
733
- }
734
- } else { // decimal
735
- if (ni->big) {
736
- rnum = rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new(ni->str, ni->len));
737
- if (ni->no_big) {
738
- rnum = rb_funcall(rnum, rb_intern("to_f"), 0);
739
- }
740
- } else {
741
- // All these machinations are to get rounding to work better.
742
- long double d = (long double)ni->i * (long double)ni->div + (long double)ni->num;
743
- int x = ni->exp - ni->di;
744
-
745
- // Rounding sometimes cuts off the last digit even if there are only
746
- // 15 digits. This attempts to fix those few cases where this
747
- // occurs.
748
- if ((long double)INT64_MAX > d && (int64_t)d != (ni->i * ni->div + ni->num)) {
749
- rnum = rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new(ni->str, ni->len));
750
- if (ni->no_big) {
751
- rnum = rb_funcall(rnum, rb_intern("to_f"), 0);
752
- }
753
- } else {
754
- d = roundl(d);
755
- if (0 < x) {
756
- d *= powl(10.0L, x);
757
- } else if (0 > x) {
758
- d /= powl(10.0L, -x);
759
- }
760
- if (ni->neg) {
761
- d = -d;
762
- }
763
- rnum = rb_float_new((double)d);
764
- }
765
- }
828
+ rnum = rb_float_new(0.0 / 0.0);
829
+ } else if (1 == ni->div && 0 == ni->exp && !ni->has_exp) { // fixnum
830
+ if (ni->big) {
831
+ if (256 > ni->len) {
832
+ char buf[256];
833
+
834
+ memcpy(buf, ni->str, ni->len);
835
+ buf[ni->len] = '\0';
836
+ rnum = rb_cstr_to_inum(buf, 10, 0);
837
+ } else {
838
+ char *buf = OJ_R_ALLOC_N(char, ni->len + 1);
839
+
840
+ memcpy(buf, ni->str, ni->len);
841
+ buf[ni->len] = '\0';
842
+ rnum = rb_cstr_to_inum(buf, 10, 0);
843
+ OJ_R_FREE(buf);
844
+ }
845
+ } else {
846
+ if (ni->neg) {
847
+ rnum = rb_ll2inum(-ni->i);
848
+ } else {
849
+ rnum = rb_ll2inum(ni->i);
850
+ }
851
+ }
852
+ } else { // decimal
853
+ if (ni->big) {
854
+ VALUE bd = rb_str_new(ni->str, ni->len);
855
+
856
+ rnum = rb_rescue2(parse_big_decimal, bd, rescue_big_decimal, bd, rb_eException, 0);
857
+ if (ni->no_big) {
858
+ rnum = rb_funcall(rnum, rb_intern("to_f"), 0);
859
+ }
860
+ } else if (FastDec == ni->bigdec_load) {
861
+ long double ld = (long double)ni->i * (long double)ni->div + (long double)ni->num;
862
+ int x = (int)((int64_t)ni->exp - ni->di);
863
+
864
+ if (0 < x) {
865
+ if (x < (int)(sizeof(exp_plus) / sizeof(*exp_plus))) {
866
+ ld *= exp_plus[x];
867
+ } else {
868
+ ld *= powl(10.0, x);
869
+ }
870
+ } else if (x < 0) {
871
+ if (-x < (int)(sizeof(exp_plus) / sizeof(*exp_plus))) {
872
+ ld /= exp_plus[-x];
873
+ } else {
874
+ ld /= powl(10.0, -x);
875
+ }
876
+ }
877
+ if (ni->neg) {
878
+ ld = -ld;
879
+ }
880
+ rnum = rb_float_new((double)ld);
881
+ } else if (RubyDec == ni->bigdec_load) {
882
+ VALUE sv = rb_str_new(ni->str, ni->len);
883
+
884
+ rnum = rb_funcall(sv, rb_intern("to_f"), 0);
885
+ } else {
886
+ char *end;
887
+ double d = strtod(ni->str, &end);
888
+
889
+ if ((long)ni->len != (long)(end - ni->str)) {
890
+ if (Qnil == ni->pi->err_class) {
891
+ rb_raise(oj_parse_error_class, "Invalid float");
892
+ } else {
893
+ rb_raise(ni->pi->err_class, "Invalid float");
894
+ }
895
+ }
896
+ rnum = rb_float_new(d);
897
+ }
766
898
  }
767
899
  return rnum;
768
900
  }
769
901
 
770
- void
771
- oj_set_error_at(ParseInfo pi, VALUE err_clas, const char* file, int line, const char *format, ...) {
772
- va_list ap;
773
- char msg[128];
902
+ void oj_set_error_at(ParseInfo pi, VALUE err_clas, const char *file, int line, const char *format, ...) {
903
+ va_list ap;
904
+ char msg[256];
905
+ char *p = msg;
906
+ char *end = p + sizeof(msg) - 2;
907
+ char *start;
908
+ Val vp;
909
+ int mlen;
774
910
 
775
911
  va_start(ap, format);
776
- vsnprintf(msg, sizeof(msg) - 1, format, ap);
912
+ mlen = vsnprintf(msg, sizeof(msg) - 1, format, ap);
913
+ if (0 < mlen) {
914
+ if (sizeof(msg) - 2 < (size_t)mlen) {
915
+ p = end - 2;
916
+ } else {
917
+ p += mlen;
918
+ }
919
+ }
777
920
  va_end(ap);
778
921
  pi->err.clas = err_clas;
922
+ if (p + 3 < end) {
923
+ *p++ = ' ';
924
+ *p++ = '(';
925
+ *p++ = 'a';
926
+ *p++ = 'f';
927
+ *p++ = 't';
928
+ *p++ = 'e';
929
+ *p++ = 'r';
930
+ *p++ = ' ';
931
+ start = p;
932
+ for (vp = pi->stack.head; vp < pi->stack.tail; vp++) {
933
+ if (end <= p + 1 + vp->klen) {
934
+ break;
935
+ }
936
+ if (NULL != vp->key) {
937
+ if (start < p) {
938
+ *p++ = '.';
939
+ }
940
+ memcpy(p, vp->key, vp->klen);
941
+ p += vp->klen;
942
+ } else {
943
+ if (RUBY_T_ARRAY == rb_type(vp->val)) {
944
+ if (end <= p + 12) {
945
+ break;
946
+ }
947
+ p += snprintf(p, end - p, "[%ld]", RARRAY_LEN(vp->val));
948
+ }
949
+ }
950
+ }
951
+ *p++ = ')';
952
+ }
953
+ *p = '\0';
779
954
  if (0 == pi->json) {
780
- oj_err_set(&pi->err, err_clas, "%s at line %d, column %d [%s:%d]", msg, pi->rd.line, pi->rd.col, file, line);
955
+ oj_err_set(&pi->err, err_clas, "%s at line %d, column %d [%s:%d]", msg, pi->rd.line, pi->rd.col, file, line);
781
956
  } else {
782
- _oj_err_set_with_location(&pi->err, err_clas, msg, pi->json, pi->cur - 1, file, line);
957
+ _oj_err_set_with_location(&pi->err, err_clas, msg, pi->json, pi->cur - 1, file, line);
783
958
  }
784
959
  }
785
960
 
786
- static VALUE
787
- protect_parse(VALUE pip) {
961
+ static VALUE protect_parse(VALUE pip) {
788
962
  oj_parse2((ParseInfo)pip);
789
963
 
790
964
  return Qnil;
@@ -792,91 +966,102 @@ protect_parse(VALUE pip) {
792
966
 
793
967
  extern int oj_utf8_index;
794
968
 
795
- static void
796
- oj_pi_set_input_str(ParseInfo pi, volatile VALUE *inputp) {
797
- #if HAS_ENCODING_SUPPORT
798
- rb_encoding *enc = rb_to_encoding(rb_obj_encoding(*inputp));
969
+ static void oj_pi_set_input_str(ParseInfo pi, VALUE *inputp) {
970
+ int idx = RB_ENCODING_GET(*inputp);
799
971
 
800
- if (rb_utf8_encoding() != enc) {
801
- *inputp = rb_str_conv_enc(*inputp, enc, rb_utf8_encoding());
972
+ if (oj_utf8_encoding_index != idx) {
973
+ rb_encoding *enc = rb_enc_from_index(idx);
974
+ *inputp = rb_str_conv_enc(*inputp, enc, oj_utf8_encoding);
802
975
  }
803
- #endif
804
- pi->json = rb_string_value_ptr((VALUE*)inputp);
805
- pi->end = pi->json + RSTRING_LEN(*inputp);
976
+ pi->json = RSTRING_PTR(*inputp);
977
+ pi->end = pi->json + RSTRING_LEN(*inputp);
806
978
  }
807
979
 
808
980
  VALUE
809
981
  oj_pi_parse(int argc, VALUE *argv, ParseInfo pi, char *json, size_t len, int yieldOk) {
810
- char *buf = 0;
811
- volatile VALUE input;
812
- volatile VALUE wrapped_stack;
813
- volatile VALUE result = Qnil;
814
- int line = 0;
815
- int free_json = 0;
982
+ char *buf = 0;
983
+ VALUE input;
984
+ VALUE wrapped_stack;
985
+ VALUE result = Qnil;
986
+ int line = 0;
987
+ int free_json = 0;
816
988
 
817
989
  if (argc < 1) {
818
- rb_raise(rb_eArgError, "Wrong number of arguments to parse.");
990
+ rb_raise(rb_eArgError, "Wrong number of arguments to parse.");
819
991
  }
820
992
  input = argv[0];
821
- if (2 == argc) {
822
- oj_parse_options(argv[1], &pi->options);
993
+ if (2 <= argc) {
994
+ if (T_HASH == rb_type(argv[1])) {
995
+ oj_parse_options(argv[1], &pi->options);
996
+ } else if (3 <= argc && T_HASH == rb_type(argv[2])) {
997
+ oj_parse_options(argv[2], &pi->options);
998
+ }
823
999
  }
824
1000
  if (yieldOk && rb_block_given_p()) {
825
- pi->proc = Qnil;
1001
+ pi->proc = Qnil;
826
1002
  } else {
827
- pi->proc = Qundef;
1003
+ pi->proc = Qundef;
828
1004
  }
829
1005
  if (0 != json) {
830
- pi->json = json;
831
- pi->end = json + len;
832
- free_json = 1;
1006
+ pi->json = json;
1007
+ pi->end = json + len;
1008
+ free_json = 1;
833
1009
  } else if (T_STRING == rb_type(input)) {
834
- oj_pi_set_input_str(pi, &input);
835
- } else if (Qnil == input && Yes == pi->options.nilnil) {
836
- return Qnil;
1010
+ if (CompatMode == pi->options.mode) {
1011
+ if (No == pi->options.nilnil && 0 == RSTRING_LEN(input)) {
1012
+ rb_raise(oj_json_parser_error_class, "An empty string is not a valid JSON string.");
1013
+ }
1014
+ }
1015
+ oj_pi_set_input_str(pi, &input);
1016
+ } else if (Qnil == input) {
1017
+ if (Yes == pi->options.nilnil) {
1018
+ return Qnil;
1019
+ } else {
1020
+ rb_raise(rb_eTypeError, "Nil is not a valid JSON source.");
1021
+ }
837
1022
  } else {
838
- VALUE clas = rb_obj_class(input);
839
- volatile VALUE s;
1023
+ VALUE clas = rb_obj_class(input);
1024
+ VALUE s;
840
1025
 
841
- if (oj_stringio_class == clas) {
842
- s = rb_funcall2(input, oj_string_id, 0, 0);
843
- oj_pi_set_input_str(pi, &s);
1026
+ if (oj_stringio_class == clas) {
1027
+ s = rb_funcall2(input, oj_string_id, 0, 0);
1028
+ oj_pi_set_input_str(pi, &s);
844
1029
  #if !IS_WINDOWS
845
- } else if (rb_cFile == clas && 0 == FIX2INT(rb_funcall(input, oj_pos_id, 0))) {
846
- int fd = FIX2INT(rb_funcall(input, oj_fileno_id, 0));
847
- ssize_t cnt;
848
- size_t len = lseek(fd, 0, SEEK_END);
849
-
850
- lseek(fd, 0, SEEK_SET);
851
- buf = ALLOC_N(char, len + 1);
852
- pi->json = buf;
853
- pi->end = buf + len;
854
- if (0 >= (cnt = read(fd, (char*)pi->json, len)) || cnt != (ssize_t)len) {
855
- if (0 != buf) {
856
- xfree(buf);
857
- }
858
- rb_raise(rb_eIOError, "failed to read from IO Object.");
859
- }
860
- ((char*)pi->json)[len] = '\0';
861
- /* skip UTF-8 BOM if present */
862
- if (0xEF == (uint8_t)*pi->json && 0xBB == (uint8_t)pi->json[1] && 0xBF == (uint8_t)pi->json[2]) {
863
- pi->json += 3;
864
- }
1030
+ } else if (rb_cFile == clas && 0 == FIX2INT(rb_funcall(input, oj_pos_id, 0))) {
1031
+ int fd = FIX2INT(rb_funcall(input, oj_fileno_id, 0));
1032
+ ssize_t cnt;
1033
+ size_t len = lseek(fd, 0, SEEK_END);
1034
+
1035
+ lseek(fd, 0, SEEK_SET);
1036
+ buf = OJ_R_ALLOC_N(char, len + 1);
1037
+ pi->json = buf;
1038
+ pi->end = buf + len;
1039
+ if (0 >= (cnt = read(fd, (char *)pi->json, len)) || cnt != (ssize_t)len) {
1040
+ if (0 != buf) {
1041
+ OJ_R_FREE(buf);
1042
+ }
1043
+ rb_raise(rb_eIOError, "failed to read from IO Object.");
1044
+ }
1045
+ ((char *)pi->json)[len] = '\0';
1046
+ /* skip UTF-8 BOM if present */
1047
+ if (0xEF == (uint8_t)*pi->json && 0xBB == (uint8_t)pi->json[1] && 0xBF == (uint8_t)pi->json[2]) {
1048
+ pi->cur += 3;
1049
+ }
865
1050
  #endif
866
- } else if (rb_respond_to(input, oj_read_id)) {
867
- // use stream parser instead
868
- return oj_pi_sparse(argc, argv, pi, 0);
869
- } else {
870
- rb_raise(rb_eArgError, "strict_parse() expected a String or IO Object.");
871
- }
1051
+ } else if (rb_respond_to(input, oj_read_id)) {
1052
+ // use stream parser instead
1053
+ return oj_pi_sparse(argc, argv, pi, 0);
1054
+ } else {
1055
+ rb_raise(rb_eArgError, "parse() expected a String or IO Object.");
1056
+ }
872
1057
  }
873
1058
  if (Yes == pi->options.circular) {
874
- pi->circ_array = oj_circ_array_new();
1059
+ pi->circ_array = oj_circ_array_new();
875
1060
  } else {
876
- pi->circ_array = 0;
1061
+ pi->circ_array = 0;
877
1062
  }
878
1063
  if (No == pi->options.allow_gc) {
879
- rb_gc_disable();
1064
+ rb_gc_disable();
880
1065
  }
881
1066
  // GC can run at any time. When it runs any Object created by C will be
882
1067
  // freed. We protect against this by wrapping the value stack in a ruby
@@ -884,78 +1069,114 @@ oj_pi_parse(int argc, VALUE *argv, ParseInfo pi, char *json, size_t len, int yie
884
1069
  // value stack (while it is in scope).
885
1070
  wrapped_stack = oj_stack_init(&pi->stack);
886
1071
  rb_protect(protect_parse, (VALUE)pi, &line);
887
- result = stack_head_val(&pi->stack);
1072
+ if (Qundef == pi->stack.head->val && !empty_ok(&pi->options)) {
1073
+ if (No == pi->options.nilnil || (CompatMode == pi->options.mode && 0 < pi->cur - pi->json)) {
1074
+ oj_set_error_at(pi, oj_json_parser_error_class, __FILE__, __LINE__, "Empty input");
1075
+ }
1076
+ }
1077
+ result = stack_head_val(&pi->stack);
888
1078
  DATA_PTR(wrapped_stack) = 0;
889
1079
  if (No == pi->options.allow_gc) {
890
- rb_gc_enable();
1080
+ rb_gc_enable();
891
1081
  }
892
1082
  if (!err_has(&pi->err)) {
893
- // If the stack is not empty then the JSON terminated early.
894
- Val v;
895
-
896
- if (0 != (v = stack_peek(&pi->stack))) {
897
- switch (v->next) {
898
- case NEXT_ARRAY_NEW:
899
- case NEXT_ARRAY_ELEMENT:
900
- case NEXT_ARRAY_COMMA:
901
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "Array not terminated");
902
- break;
903
- case NEXT_HASH_NEW:
904
- case NEXT_HASH_KEY:
905
- case NEXT_HASH_COLON:
906
- case NEXT_HASH_VALUE:
907
- case NEXT_HASH_COMMA:
908
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "Hash/Object not terminated");
909
- break;
910
- default:
911
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not terminated");
912
- }
913
- }
1083
+ // If the stack is not empty then the JSON terminated early.
1084
+ Val v;
1085
+ VALUE err_class = oj_parse_error_class;
1086
+
1087
+ if (0 != line) {
1088
+ VALUE ec = rb_obj_class(rb_errinfo());
1089
+
1090
+ if (rb_eArgError != ec && 0 != ec) {
1091
+ err_class = ec;
1092
+ }
1093
+ if (rb_eIOError != ec) {
1094
+ goto CLEANUP;
1095
+ }
1096
+ }
1097
+ if (NULL != (v = stack_peek(&pi->stack))) {
1098
+ switch (v->next) {
1099
+ case NEXT_ARRAY_NEW:
1100
+ case NEXT_ARRAY_ELEMENT:
1101
+ case NEXT_ARRAY_COMMA: oj_set_error_at(pi, err_class, __FILE__, __LINE__, "Array not terminated"); break;
1102
+ case NEXT_HASH_NEW:
1103
+ case NEXT_HASH_KEY:
1104
+ case NEXT_HASH_COLON:
1105
+ case NEXT_HASH_VALUE:
1106
+ case NEXT_HASH_COMMA:
1107
+ oj_set_error_at(pi, err_class, __FILE__, __LINE__, "Hash/Object not terminated");
1108
+ break;
1109
+ default: oj_set_error_at(pi, err_class, __FILE__, __LINE__, "not terminated");
1110
+ }
1111
+ }
914
1112
  }
1113
+ CLEANUP:
915
1114
  // proceed with cleanup
916
1115
  if (0 != pi->circ_array) {
917
- oj_circ_array_free(pi->circ_array);
1116
+ oj_circ_array_free(pi->circ_array);
918
1117
  }
919
1118
  if (0 != buf) {
920
- xfree(buf);
1119
+ OJ_R_FREE(buf);
921
1120
  } else if (free_json) {
922
- xfree(json);
1121
+ OJ_R_FREE(json);
923
1122
  }
924
1123
  stack_cleanup(&pi->stack);
925
- if (0 != line) {
926
- rb_jump_tag(line);
1124
+ if (pi->str_rx.head != oj_default_options.str_rx.head) {
1125
+ oj_rxclass_cleanup(&pi->str_rx);
927
1126
  }
928
1127
  if (err_has(&pi->err)) {
929
- if (Qnil != pi->err_class) {
930
- pi->err.clas = pi->err_class;
931
- }
932
- oj_err_raise(&pi->err);
1128
+ rb_set_errinfo(Qnil);
1129
+ if (Qnil != pi->err_class) {
1130
+ pi->err.clas = pi->err_class;
1131
+ }
1132
+ if ((CompatMode == pi->options.mode || RailsMode == pi->options.mode) && Yes != pi->options.safe) {
1133
+ // The json gem requires the error message be UTF-8 encoded. In
1134
+ // additional the complete JSON source must be returned. There
1135
+ // does not seem to be a size limit.
1136
+ VALUE msg = rb_utf8_str_new_cstr(pi->err.msg);
1137
+ VALUE args[1];
1138
+
1139
+ if (NULL != pi->json) {
1140
+ msg = rb_str_append(msg, rb_utf8_str_new_cstr(" in '"));
1141
+ msg = rb_str_append(msg, rb_utf8_str_new_cstr(pi->json));
1142
+ }
1143
+ args[0] = msg;
1144
+ if (pi->err.clas == oj_parse_error_class) {
1145
+ // The error was an Oj::ParseError so change to a JSON::ParserError.
1146
+ pi->err.clas = oj_json_parser_error_class;
1147
+ }
1148
+ rb_exc_raise(rb_class_new_instance(1, args, pi->err.clas));
1149
+ } else {
1150
+ oj_err_raise(&pi->err);
1151
+ }
1152
+ } else if (0 != line) {
1153
+ rb_jump_tag(line);
933
1154
  }
934
1155
  if (pi->options.quirks_mode == No) {
935
- switch (rb_type(result)) {
936
- case T_NIL:
937
- case T_TRUE:
938
- case T_FALSE:
939
- case T_FIXNUM:
940
- case T_FLOAT:
941
- case T_CLASS:
942
- case T_STRING:
943
- case T_SYMBOL: {
944
- struct _Err err;
945
-
946
- if (Qnil == pi->err_class) {
947
- err.clas = oj_parse_error_class;
948
- } else {
949
- err.clas = pi->err_class;
950
- }
951
- snprintf(err.msg, sizeof(err.msg), "unexpected non-document value");
952
- oj_err_raise(&err);
953
- break;
954
- }
955
- default:
956
- // okay
957
- break;
958
- }
1156
+ switch (rb_type(result)) {
1157
+ case T_NIL:
1158
+ case T_TRUE:
1159
+ case T_FALSE:
1160
+ case T_FIXNUM:
1161
+ case T_FLOAT:
1162
+ case T_CLASS:
1163
+ case T_STRING:
1164
+ case T_SYMBOL: {
1165
+ struct _err err;
1166
+
1167
+ if (Qnil == pi->err_class) {
1168
+ err.clas = oj_parse_error_class;
1169
+ } else {
1170
+ err.clas = pi->err_class;
1171
+ }
1172
+ snprintf(err.msg, sizeof(err.msg), "unexpected non-document value");
1173
+ oj_err_raise(&err);
1174
+ break;
1175
+ }
1176
+ default:
1177
+ // okay
1178
+ break;
1179
+ }
959
1180
  }
960
1181
  return result;
961
1182
  }