oj_windows 3.16.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +44 -0
  3. data/LICENSE +21 -0
  4. data/README.md +164 -0
  5. data/ext/oj_windows/buf.h +85 -0
  6. data/ext/oj_windows/cache.c +339 -0
  7. data/ext/oj_windows/cache.h +22 -0
  8. data/ext/oj_windows/cache8.c +105 -0
  9. data/ext/oj_windows/cache8.h +21 -0
  10. data/ext/oj_windows/circarray.c +64 -0
  11. data/ext/oj_windows/circarray.h +22 -0
  12. data/ext/oj_windows/code.c +214 -0
  13. data/ext/oj_windows/code.h +40 -0
  14. data/ext/oj_windows/compat.c +239 -0
  15. data/ext/oj_windows/custom.c +1074 -0
  16. data/ext/oj_windows/debug.c +126 -0
  17. data/ext/oj_windows/dump.c +1556 -0
  18. data/ext/oj_windows/dump.h +110 -0
  19. data/ext/oj_windows/dump_compat.c +901 -0
  20. data/ext/oj_windows/dump_leaf.c +162 -0
  21. data/ext/oj_windows/dump_object.c +710 -0
  22. data/ext/oj_windows/dump_strict.c +405 -0
  23. data/ext/oj_windows/encode.h +16 -0
  24. data/ext/oj_windows/err.c +57 -0
  25. data/ext/oj_windows/err.h +67 -0
  26. data/ext/oj_windows/extconf.rb +77 -0
  27. data/ext/oj_windows/fast.c +1710 -0
  28. data/ext/oj_windows/intern.c +325 -0
  29. data/ext/oj_windows/intern.h +22 -0
  30. data/ext/oj_windows/mem.c +320 -0
  31. data/ext/oj_windows/mem.h +53 -0
  32. data/ext/oj_windows/mimic_json.c +919 -0
  33. data/ext/oj_windows/object.c +726 -0
  34. data/ext/oj_windows/odd.c +245 -0
  35. data/ext/oj_windows/odd.h +43 -0
  36. data/ext/oj_windows/oj.c +2097 -0
  37. data/ext/oj_windows/oj.h +420 -0
  38. data/ext/oj_windows/parse.c +1317 -0
  39. data/ext/oj_windows/parse.h +113 -0
  40. data/ext/oj_windows/parser.c +1600 -0
  41. data/ext/oj_windows/parser.h +103 -0
  42. data/ext/oj_windows/rails.c +1484 -0
  43. data/ext/oj_windows/rails.h +18 -0
  44. data/ext/oj_windows/reader.c +222 -0
  45. data/ext/oj_windows/reader.h +137 -0
  46. data/ext/oj_windows/resolve.c +80 -0
  47. data/ext/oj_windows/resolve.h +12 -0
  48. data/ext/oj_windows/rxclass.c +144 -0
  49. data/ext/oj_windows/rxclass.h +26 -0
  50. data/ext/oj_windows/saj.c +675 -0
  51. data/ext/oj_windows/saj2.c +584 -0
  52. data/ext/oj_windows/saj2.h +23 -0
  53. data/ext/oj_windows/scp.c +187 -0
  54. data/ext/oj_windows/simd.h +47 -0
  55. data/ext/oj_windows/sparse.c +946 -0
  56. data/ext/oj_windows/stream_writer.c +329 -0
  57. data/ext/oj_windows/strict.c +189 -0
  58. data/ext/oj_windows/string_writer.c +517 -0
  59. data/ext/oj_windows/trace.c +72 -0
  60. data/ext/oj_windows/trace.h +55 -0
  61. data/ext/oj_windows/usual.c +1218 -0
  62. data/ext/oj_windows/usual.h +69 -0
  63. data/ext/oj_windows/util.c +136 -0
  64. data/ext/oj_windows/util.h +20 -0
  65. data/ext/oj_windows/val_stack.c +101 -0
  66. data/ext/oj_windows/val_stack.h +151 -0
  67. data/ext/oj_windows/validate.c +46 -0
  68. data/ext/oj_windows/wab.c +584 -0
  69. data/lib/oj/active_support_helper.rb +39 -0
  70. data/lib/oj/bag.rb +95 -0
  71. data/lib/oj/easy_hash.rb +52 -0
  72. data/lib/oj/error.rb +21 -0
  73. data/lib/oj/json.rb +188 -0
  74. data/lib/oj/mimic.rb +301 -0
  75. data/lib/oj/saj.rb +80 -0
  76. data/lib/oj/schandler.rb +143 -0
  77. data/lib/oj/state.rb +135 -0
  78. data/lib/oj/version.rb +4 -0
  79. data/lib/oj_windows/active_support_helper.rb +39 -0
  80. data/lib/oj_windows/bag.rb +95 -0
  81. data/lib/oj_windows/easy_hash.rb +52 -0
  82. data/lib/oj_windows/error.rb +21 -0
  83. data/lib/oj_windows/json.rb +188 -0
  84. data/lib/oj_windows/mimic.rb +301 -0
  85. data/lib/oj_windows/saj.rb +80 -0
  86. data/lib/oj_windows/schandler.rb +143 -0
  87. data/lib/oj_windows/state.rb +135 -0
  88. data/lib/oj_windows/version.rb +4 -0
  89. data/lib/oj_windows.rb +15 -0
  90. data/pages/Advanced.md +38 -0
  91. data/pages/Compatibility.md +49 -0
  92. data/pages/Custom.md +37 -0
  93. data/pages/Encoding.md +61 -0
  94. data/pages/InstallOptions.md +20 -0
  95. data/pages/JsonGem.md +60 -0
  96. data/pages/Modes.md +94 -0
  97. data/pages/Options.md +339 -0
  98. data/pages/Parser.md +134 -0
  99. data/pages/Rails.md +85 -0
  100. data/pages/Security.md +43 -0
  101. data/pages/WAB.md +12 -0
  102. metadata +242 -0
@@ -0,0 +1,1317 @@
1
+ // Copyright (c) 2013 Peter Ohler. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
+
4
+ #include "parse.h"
5
+
6
+ #include <math.h>
7
+ #include <ruby/util.h>
8
+ #include <stdio.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+ #if !IS_WINDOWS
12
+ #include <unistd.h>
13
+ #endif
14
+
15
+ #include "buf.h"
16
+ #include "encode.h"
17
+ #include "mem.h"
18
+ #include "oj.h"
19
+ #include "rxclass.h"
20
+ #include "simd.h"
21
+ #include "val_stack.h"
22
+
23
+ // Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
24
+ #ifdef _MSC_VER
25
+ #define OJ_INFINITY HUGE_VAL
26
+ #ifndef NAN
27
+ #include <float.h>
28
+ #define NAN (DBL_MAX + DBL_MAX - DBL_MAX - DBL_MAX) /* hack to get NaN? NO, just use standard nan */
29
+ /* Actually MSVC has NAN in math.h, but let's use a safe fallback if needed. */
30
+ /* Using division by zero like 0.0/0.0 triggers error. */
31
+ static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
32
+ #undef NAN
33
+ #define NAN (*(const double *)__nan)
34
+ #endif
35
+ #else
36
+ #define OJ_INFINITY (1.0 / 0.0)
37
+ #endif
38
+
39
+ // #define EXP_MAX 1023
40
+ #define EXP_MAX 100000
41
+ #define DEC_MAX 15
42
+
43
+ static void next_non_white(ParseInfo pi) {
44
+ for (; 1; pi->cur++) {
45
+ switch (*pi->cur) {
46
+ case ' ':
47
+ case '\t':
48
+ case '\f':
49
+ case '\n':
50
+ case '\r': break;
51
+ default: return;
52
+ }
53
+ }
54
+ }
55
+
56
+ static void skip_comment(ParseInfo pi) {
57
+ if ('*' == *pi->cur) {
58
+ pi->cur++;
59
+ for (; pi->cur < pi->end; pi->cur++) {
60
+ if ('*' == *pi->cur && '/' == *(pi->cur + 1)) {
61
+ pi->cur += 2;
62
+ return;
63
+ } else if (pi->end <= pi->cur) {
64
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "comment not terminated");
65
+ return;
66
+ }
67
+ }
68
+ } else if ('/' == *pi->cur) {
69
+ for (; 1; pi->cur++) {
70
+ switch (*pi->cur) {
71
+ case '\n':
72
+ case '\r':
73
+ case '\f':
74
+ case '\0': return;
75
+ default: break;
76
+ }
77
+ }
78
+ } else {
79
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid comment format");
80
+ }
81
+ }
82
+
83
+ static void add_value(ParseInfo pi, VALUE rval) {
84
+ Val parent = stack_peek(&pi->stack);
85
+
86
+ if (0 == parent) { // simple add
87
+ pi->add_value(pi, rval);
88
+ } else {
89
+ switch (parent->next) {
90
+ case NEXT_ARRAY_NEW:
91
+ case NEXT_ARRAY_ELEMENT:
92
+ pi->array_append_value(pi, rval);
93
+ parent->next = NEXT_ARRAY_COMMA;
94
+ break;
95
+ case NEXT_HASH_VALUE:
96
+ pi->hash_set_value(pi, parent, rval);
97
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
98
+ OJ_R_FREE((char *)parent->key);
99
+ parent->key = 0;
100
+ }
101
+ parent->next = NEXT_HASH_COMMA;
102
+ break;
103
+ case NEXT_HASH_NEW:
104
+ case NEXT_HASH_KEY:
105
+ case NEXT_HASH_COMMA:
106
+ case NEXT_NONE:
107
+ case NEXT_ARRAY_COMMA:
108
+ case NEXT_HASH_COLON:
109
+ default:
110
+ oj_set_error_at(pi,
111
+ oj_parse_error_class,
112
+ __FILE__,
113
+ __LINE__,
114
+ "expected %s",
115
+ oj_stack_next_string(parent->next));
116
+ break;
117
+ }
118
+ }
119
+ }
120
+
121
+ static void read_null(ParseInfo pi) {
122
+ if ('u' == *pi->cur++ && 'l' == *pi->cur++ && 'l' == *pi->cur++) {
123
+ add_value(pi, Qnil);
124
+ } else {
125
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected null");
126
+ }
127
+ }
128
+
129
+ static void read_true(ParseInfo pi) {
130
+ if ('r' == *pi->cur++ && 'u' == *pi->cur++ && 'e' == *pi->cur++) {
131
+ add_value(pi, Qtrue);
132
+ } else {
133
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected true");
134
+ }
135
+ }
136
+
137
+ static void read_false(ParseInfo pi) {
138
+ if ('a' == *pi->cur++ && 'l' == *pi->cur++ && 's' == *pi->cur++ && 'e' == *pi->cur++) {
139
+ add_value(pi, Qfalse);
140
+ } else {
141
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected false");
142
+ }
143
+ }
144
+
145
+ static uint32_t read_hex(ParseInfo pi, const char *h) {
146
+ uint32_t b = 0;
147
+ int i;
148
+
149
+ for (i = 0; i < 4; i++, h++) {
150
+ b = b << 4;
151
+ if ('0' <= *h && *h <= '9') {
152
+ b += *h - '0';
153
+ } else if ('A' <= *h && *h <= 'F') {
154
+ b += *h - 'A' + 10;
155
+ } else if ('a' <= *h && *h <= 'f') {
156
+ b += *h - 'a' + 10;
157
+ } else {
158
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid hex character");
159
+ return 0;
160
+ }
161
+ }
162
+ return b;
163
+ }
164
+
165
+ static void unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
166
+ if (0x0000007F >= code) {
167
+ buf_append(buf, (char)code);
168
+ } else if (0x000007FF >= code) {
169
+ buf_append(buf, 0xC0 | (code >> 6));
170
+ buf_append(buf, 0x80 | (0x3F & code));
171
+ } else if (0x0000FFFF >= code) {
172
+ buf_append(buf, 0xE0 | (code >> 12));
173
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
174
+ buf_append(buf, 0x80 | (0x3F & code));
175
+ } else if (0x001FFFFF >= code) {
176
+ buf_append(buf, 0xF0 | (code >> 18));
177
+ buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
178
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
179
+ buf_append(buf, 0x80 | (0x3F & code));
180
+ } else if (0x03FFFFFF >= code) {
181
+ buf_append(buf, 0xF8 | (code >> 24));
182
+ buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
183
+ buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
184
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
185
+ buf_append(buf, 0x80 | (0x3F & code));
186
+ } else if (0x7FFFFFFF >= code) {
187
+ buf_append(buf, 0xFC | (code >> 30));
188
+ buf_append(buf, 0x80 | ((code >> 24) & 0x3F));
189
+ buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
190
+ buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
191
+ buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
192
+ buf_append(buf, 0x80 | (0x3F & code));
193
+ } else {
194
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid Unicode character");
195
+ }
196
+ }
197
+
198
+ static const unsigned char end_of_scan_string[] = {
199
+ // Filled 1 at the positions of '\0', '\\', and '"'
200
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
201
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
205
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
207
+ };
208
+ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
209
+ for (; str < end; str++) {
210
+ if (end_of_scan_string[(unsigned char)*str]) {
211
+ break;
212
+ }
213
+ }
214
+ return str;
215
+ }
216
+
217
+ #ifdef HAVE_SIMD_SSE4_2
218
+ // Optimized SIMD string scanner using SSE4.2 instructions
219
+ // Uses prefetching and processes multiple chunks in parallel to reduce latency
220
+ static inline const char *scan_string_SSE42(const char *str, const char *end) {
221
+ static const char chars[16] = "\x00\\\"";
222
+ const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
223
+ const char *safe_end_64 = end - 64;
224
+ const char *safe_end_16 = end - 16;
225
+
226
+ // Process 64 bytes at a time with parallel SIMD operations
227
+ // This reduces pipeline stalls and improves instruction-level parallelism
228
+ while (str <= safe_end_64) {
229
+ // Prefetch next cache line for better memory throughput
230
+ __builtin_prefetch(str + 64, 0, 0);
231
+
232
+ // Load and compare 4 chunks in parallel
233
+ const __m128i chunk0 = _mm_loadu_si128((const __m128i *)(str));
234
+ const __m128i chunk1 = _mm_loadu_si128((const __m128i *)(str + 16));
235
+ const __m128i chunk2 = _mm_loadu_si128((const __m128i *)(str + 32));
236
+ const __m128i chunk3 = _mm_loadu_si128((const __m128i *)(str + 48));
237
+
238
+ const int r0 = _mm_cmpestri(terminate,
239
+ 3,
240
+ chunk0,
241
+ 16,
242
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
243
+ if (__builtin_expect(r0 != 16, 0))
244
+ return str + r0;
245
+
246
+ const int r1 = _mm_cmpestri(terminate,
247
+ 3,
248
+ chunk1,
249
+ 16,
250
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
251
+ if (__builtin_expect(r1 != 16, 0))
252
+ return str + 16 + r1;
253
+
254
+ const int r2 = _mm_cmpestri(terminate,
255
+ 3,
256
+ chunk2,
257
+ 16,
258
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
259
+ if (__builtin_expect(r2 != 16, 0))
260
+ return str + 32 + r2;
261
+
262
+ const int r3 = _mm_cmpestri(terminate,
263
+ 3,
264
+ chunk3,
265
+ 16,
266
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
267
+ if (__builtin_expect(r3 != 16, 0))
268
+ return str + 48 + r3;
269
+
270
+ str += 64;
271
+ }
272
+
273
+ // Handle remaining 16-byte chunks
274
+ for (; str <= safe_end_16; str += 16) {
275
+ const __m128i string = _mm_loadu_si128((const __m128i *)str);
276
+ const int r = _mm_cmpestri(terminate,
277
+ 3,
278
+ string,
279
+ 16,
280
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
281
+ if (r != 16)
282
+ return str + r;
283
+ }
284
+
285
+ return scan_string_noSIMD(str, end);
286
+ }
287
+ #endif
288
+
289
+ #ifdef HAVE_SIMD_SSE2
290
+ // Optimized SSE2 string scanner (fallback for older x86_64 CPUs)
291
+ // Uses SSE2 instructions with prefetching and parallel processing
292
+ static inline const char *scan_string_SSE2(const char *str, const char *end) {
293
+ const char *safe_end_64 = end - 64;
294
+ const char *safe_end_16 = end - 16;
295
+
296
+ // Create comparison vectors for our three special characters
297
+ const __m128i null_char = _mm_setzero_si128();
298
+ const __m128i backslash = _mm_set1_epi8('\\');
299
+ const __m128i quote = _mm_set1_epi8('"');
300
+
301
+ // Process 64 bytes at a time for better throughput
302
+ while (str <= safe_end_64) {
303
+ __builtin_prefetch(str + 64, 0, 0);
304
+
305
+ // Load 4 chunks
306
+ const __m128i chunk0 = _mm_loadu_si128((const __m128i *)(str));
307
+ const __m128i chunk1 = _mm_loadu_si128((const __m128i *)(str + 16));
308
+ const __m128i chunk2 = _mm_loadu_si128((const __m128i *)(str + 32));
309
+ const __m128i chunk3 = _mm_loadu_si128((const __m128i *)(str + 48));
310
+
311
+ // Compare all chunks (allows CPU to parallelize)
312
+ const __m128i cmp0 = _mm_or_si128(
313
+ _mm_or_si128(_mm_cmpeq_epi8(chunk0, null_char), _mm_cmpeq_epi8(chunk0, backslash)),
314
+ _mm_cmpeq_epi8(chunk0, quote));
315
+ const __m128i cmp1 = _mm_or_si128(
316
+ _mm_or_si128(_mm_cmpeq_epi8(chunk1, null_char), _mm_cmpeq_epi8(chunk1, backslash)),
317
+ _mm_cmpeq_epi8(chunk1, quote));
318
+ const __m128i cmp2 = _mm_or_si128(
319
+ _mm_or_si128(_mm_cmpeq_epi8(chunk2, null_char), _mm_cmpeq_epi8(chunk2, backslash)),
320
+ _mm_cmpeq_epi8(chunk2, quote));
321
+ const __m128i cmp3 = _mm_or_si128(
322
+ _mm_or_si128(_mm_cmpeq_epi8(chunk3, null_char), _mm_cmpeq_epi8(chunk3, backslash)),
323
+ _mm_cmpeq_epi8(chunk3, quote));
324
+
325
+ // Convert to masks
326
+ int mask0 = _mm_movemask_epi8(cmp0);
327
+ if (__builtin_expect(mask0 != 0, 0))
328
+ return str + __builtin_ctz(mask0);
329
+
330
+ int mask1 = _mm_movemask_epi8(cmp1);
331
+ if (__builtin_expect(mask1 != 0, 0))
332
+ return str + 16 + __builtin_ctz(mask1);
333
+
334
+ int mask2 = _mm_movemask_epi8(cmp2);
335
+ if (__builtin_expect(mask2 != 0, 0))
336
+ return str + 32 + __builtin_ctz(mask2);
337
+
338
+ int mask3 = _mm_movemask_epi8(cmp3);
339
+ if (__builtin_expect(mask3 != 0, 0))
340
+ return str + 48 + __builtin_ctz(mask3);
341
+
342
+ str += 64;
343
+ }
344
+
345
+ // Handle remaining 16-byte chunks
346
+ for (; str <= safe_end_16; str += 16) {
347
+ const __m128i chunk = _mm_loadu_si128((const __m128i *)str);
348
+ const __m128i matches = _mm_or_si128(
349
+ _mm_or_si128(_mm_cmpeq_epi8(chunk, null_char), _mm_cmpeq_epi8(chunk, backslash)),
350
+ _mm_cmpeq_epi8(chunk, quote));
351
+ int mask = _mm_movemask_epi8(matches);
352
+ if (mask != 0)
353
+ return str + __builtin_ctz(mask);
354
+ }
355
+
356
+ return scan_string_noSIMD(str, end);
357
+ }
358
+ #endif
359
+
360
+ static const char *(*scan_func)(const char *str, const char *end) = scan_string_noSIMD;
361
+
362
+ void oj_scanner_init(void) {
363
+ #ifdef HAVE_SIMD_SSE4_2
364
+ scan_func = scan_string_SSE42;
365
+ #elif defined(HAVE_SIMD_SSE2)
366
+ scan_func = scan_string_SSE2;
367
+ #endif
368
+ // Note: ARM NEON string scanning would be added here if needed
369
+ }
370
+
371
+ // entered at /
372
+ static void read_escaped_str(ParseInfo pi, const char *start) {
373
+ struct _buf buf;
374
+ const char *s;
375
+ int cnt = (int)(pi->cur - start);
376
+ uint32_t code;
377
+ Val parent = stack_peek(&pi->stack);
378
+
379
+ buf_init(&buf);
380
+ buf_append_string(&buf, start, cnt);
381
+
382
+ for (s = pi->cur; '"' != *s;) {
383
+ const char *scanned = scan_func(s, pi->end);
384
+ if (scanned >= pi->end || '\0' == *scanned) {
385
+ // if (scanned >= pi->end) {
386
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
387
+ buf_cleanup(&buf);
388
+ return;
389
+ }
390
+ buf_append_string(&buf, s, (size_t)(scanned - s));
391
+ s = scanned;
392
+
393
+ if ('\\' == *s) {
394
+ s++;
395
+ switch (*s) {
396
+ case 'n': buf_append(&buf, '\n'); break;
397
+ case 'r': buf_append(&buf, '\r'); break;
398
+ case 't': buf_append(&buf, '\t'); break;
399
+ case 'f': buf_append(&buf, '\f'); break;
400
+ case 'b': buf_append(&buf, '\b'); break;
401
+ case '"': buf_append(&buf, '"'); break;
402
+ case '/': buf_append(&buf, '/'); break;
403
+ case '\\': buf_append(&buf, '\\'); break;
404
+ case 'u':
405
+ s++;
406
+ if (0 == (code = read_hex(pi, s)) && err_has(&pi->err)) {
407
+ buf_cleanup(&buf);
408
+ return;
409
+ }
410
+ s += 3;
411
+ if (0x0000D800 <= code && code <= 0x0000DFFF) {
412
+ uint32_t c1 = (code - 0x0000D800) & 0x000003FF;
413
+ uint32_t c2;
414
+
415
+ s++;
416
+ if ('\\' != *s || 'u' != *(s + 1)) {
417
+ if (Yes == pi->options.allow_invalid) {
418
+ s--;
419
+ unicode_to_chars(pi, &buf, code);
420
+ break;
421
+ }
422
+ pi->cur = s;
423
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
424
+ buf_cleanup(&buf);
425
+ return;
426
+ }
427
+ s += 2;
428
+ if (0 == (c2 = read_hex(pi, s)) && err_has(&pi->err)) {
429
+ buf_cleanup(&buf);
430
+ return;
431
+ }
432
+ s += 3;
433
+ c2 = (c2 - 0x0000DC00) & 0x000003FF;
434
+ code = ((c1 << 10) | c2) + 0x00010000;
435
+ }
436
+ unicode_to_chars(pi, &buf, code);
437
+ if (err_has(&pi->err)) {
438
+ buf_cleanup(&buf);
439
+ return;
440
+ }
441
+ break;
442
+ default:
443
+ // The json gem claims this is not an error despite the
444
+ // ECMA-404 indicating it is not valid.
445
+ if (CompatMode == pi->options.mode) {
446
+ buf_append(&buf, *s);
447
+ break;
448
+ }
449
+ pi->cur = s;
450
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
451
+ buf_cleanup(&buf);
452
+ return;
453
+ }
454
+ s++;
455
+ }
456
+ }
457
+ if (0 == parent) {
458
+ pi->add_cstr(pi, buf.head, buf_len(&buf), start);
459
+ } else {
460
+ switch (parent->next) {
461
+ case NEXT_ARRAY_NEW:
462
+ case NEXT_ARRAY_ELEMENT:
463
+ pi->array_append_cstr(pi, buf.head, buf_len(&buf), start);
464
+ parent->next = NEXT_ARRAY_COMMA;
465
+ break;
466
+ case NEXT_HASH_NEW:
467
+ case NEXT_HASH_KEY:
468
+ if (Qundef == (parent->key_val = pi->hash_key(pi, buf.head, buf_len(&buf)))) {
469
+ parent->klen = buf_len(&buf);
470
+ parent->key = OJ_MALLOC(parent->klen + 1);
471
+ memcpy((char *)parent->key, buf.head, parent->klen);
472
+ *(char *)(parent->key + parent->klen) = '\0';
473
+ } else {
474
+ parent->key = "";
475
+ parent->klen = 0;
476
+ }
477
+ parent->k1 = *start;
478
+ parent->next = NEXT_HASH_COLON;
479
+ break;
480
+ case NEXT_HASH_VALUE:
481
+ pi->hash_set_cstr(pi, parent, buf.head, buf_len(&buf), start);
482
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
483
+ OJ_R_FREE((char *)parent->key);
484
+ parent->key = 0;
485
+ }
486
+ parent->next = NEXT_HASH_COMMA;
487
+ break;
488
+ case NEXT_HASH_COMMA:
489
+ case NEXT_NONE:
490
+ case NEXT_ARRAY_COMMA:
491
+ case NEXT_HASH_COLON:
492
+ default:
493
+ oj_set_error_at(pi,
494
+ oj_parse_error_class,
495
+ __FILE__,
496
+ __LINE__,
497
+ "expected %s, not a string",
498
+ oj_stack_next_string(parent->next));
499
+ break;
500
+ }
501
+ }
502
+ pi->cur = s + 1;
503
+ buf_cleanup(&buf);
504
+ }
505
+
506
+ static void read_str(ParseInfo pi) {
507
+ const char *str = pi->cur;
508
+ Val parent = stack_peek(&pi->stack);
509
+
510
+ pi->cur = scan_func(pi->cur, pi->end);
511
+ if (RB_UNLIKELY(pi->end <= pi->cur)) {
512
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
513
+ return;
514
+ }
515
+ if (RB_UNLIKELY('\0' == *pi->cur)) {
516
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "NULL byte in string");
517
+ return;
518
+ }
519
+ if ('\\' == *pi->cur) {
520
+ read_escaped_str(pi, str);
521
+ return;
522
+ }
523
+
524
+ if (0 == parent) { // simple add
525
+ pi->add_cstr(pi, str, pi->cur - str, str);
526
+ } else {
527
+ switch (parent->next) {
528
+ case NEXT_ARRAY_NEW:
529
+ case NEXT_ARRAY_ELEMENT:
530
+ pi->array_append_cstr(pi, str, pi->cur - str, str);
531
+ parent->next = NEXT_ARRAY_COMMA;
532
+ break;
533
+ case NEXT_HASH_NEW:
534
+ case NEXT_HASH_KEY:
535
+ if (Qundef == (parent->key_val = pi->hash_key(pi, str, pi->cur - str))) {
536
+ parent->key = str;
537
+ parent->klen = pi->cur - str;
538
+ } else {
539
+ parent->key = "";
540
+ parent->klen = 0;
541
+ }
542
+ parent->k1 = *str;
543
+ parent->next = NEXT_HASH_COLON;
544
+ break;
545
+ case NEXT_HASH_VALUE:
546
+ pi->hash_set_cstr(pi, parent, str, pi->cur - str, str);
547
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
548
+ OJ_R_FREE((char *)parent->key);
549
+ parent->key = 0;
550
+ }
551
+ parent->next = NEXT_HASH_COMMA;
552
+ break;
553
+ case NEXT_HASH_COMMA:
554
+ case NEXT_NONE:
555
+ case NEXT_ARRAY_COMMA:
556
+ case NEXT_HASH_COLON:
557
+ default:
558
+ oj_set_error_at(pi,
559
+ oj_parse_error_class,
560
+ __FILE__,
561
+ __LINE__,
562
+ "expected %s, not a string",
563
+ oj_stack_next_string(parent->next));
564
+ break;
565
+ }
566
+ }
567
+ pi->cur++; // move past "
568
+ }
569
+
570
+ static void read_num(ParseInfo pi) {
571
+ struct _numInfo ni;
572
+ Val parent = stack_peek(&pi->stack);
573
+
574
+ ni.pi = pi;
575
+ ni.str = pi->cur;
576
+ ni.i = 0;
577
+ ni.num = 0;
578
+ ni.div = 1;
579
+ ni.di = 0;
580
+ ni.len = 0;
581
+ ni.exp = 0;
582
+ ni.big = 0;
583
+ ni.infinity = 0;
584
+ ni.nan = 0;
585
+ ni.neg = 0;
586
+ ni.has_exp = 0;
587
+ if (CompatMode == pi->options.mode) {
588
+ ni.no_big = !pi->options.compat_bigdec;
589
+ ni.bigdec_load = pi->options.compat_bigdec;
590
+ } else {
591
+ ni.no_big = (FloatDec == pi->options.bigdec_load || FastDec == pi->options.bigdec_load ||
592
+ RubyDec == pi->options.bigdec_load);
593
+ ni.bigdec_load = pi->options.bigdec_load;
594
+ }
595
+
596
+ if ('-' == *pi->cur) {
597
+ pi->cur++;
598
+ ni.neg = 1;
599
+ } else if ('+' == *pi->cur) {
600
+ if (StrictMode == pi->options.mode) {
601
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
602
+ return;
603
+ }
604
+ pi->cur++;
605
+ }
606
+ if ('I' == *pi->cur) {
607
+ if (No == pi->options.allow_nan || 0 != strncmp("Infinity", pi->cur, 8)) {
608
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
609
+ return;
610
+ }
611
+ pi->cur += 8;
612
+ ni.infinity = 1;
613
+ } else if ('N' == *pi->cur || 'n' == *pi->cur) {
614
+ if ('a' != pi->cur[1] || ('N' != pi->cur[2] && 'n' != pi->cur[2])) {
615
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
616
+ return;
617
+ }
618
+ pi->cur += 3;
619
+ ni.nan = 1;
620
+ } else {
621
+ int dec_cnt = 0;
622
+ bool zero1 = false;
623
+
624
+ // Skip leading zeros.
625
+ for (; '0' == *pi->cur; pi->cur++) {
626
+ zero1 = true;
627
+ }
628
+
629
+ for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
630
+ int d = (*pi->cur - '0');
631
+
632
+ if (RB_LIKELY(0 != ni.i)) {
633
+ dec_cnt++;
634
+ }
635
+ ni.i = ni.i * 10 + d;
636
+ }
637
+ if (RB_UNLIKELY(0 != ni.i && zero1 && CompatMode == pi->options.mode)) {
638
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
639
+ return;
640
+ }
641
+ if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
642
+ ni.big = true;
643
+ }
644
+
645
+ if ('.' == *pi->cur) {
646
+ pi->cur++;
647
+ // A trailing . is not a valid decimal but if encountered allow it
648
+ // except when mimicking the JSON gem or in strict mode.
649
+ if (StrictMode == pi->options.mode || CompatMode == pi->options.mode) {
650
+ int pos = (int)(pi->cur - ni.str);
651
+
652
+ if (1 == pos || (2 == pos && ni.neg)) {
653
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
654
+ return;
655
+ }
656
+ if (*pi->cur < '0' || '9' < *pi->cur) {
657
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
658
+ return;
659
+ }
660
+ }
661
+ for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
662
+ int d = (*pi->cur - '0');
663
+
664
+ if (RB_LIKELY(0 != ni.num || 0 != ni.i)) {
665
+ dec_cnt++;
666
+ }
667
+ ni.num = ni.num * 10 + d;
668
+ ni.div *= 10;
669
+ ni.di++;
670
+ }
671
+ }
672
+ if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
673
+ if (!ni.no_big) {
674
+ ni.big = true;
675
+ }
676
+ }
677
+
678
+ if ('e' == *pi->cur || 'E' == *pi->cur) {
679
+ int eneg = 0;
680
+
681
+ ni.has_exp = 1;
682
+ pi->cur++;
683
+ if ('-' == *pi->cur) {
684
+ pi->cur++;
685
+ eneg = 1;
686
+ } else if ('+' == *pi->cur) {
687
+ pi->cur++;
688
+ }
689
+ for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
690
+ ni.exp = ni.exp * 10 + (*pi->cur - '0');
691
+ if (EXP_MAX <= ni.exp) {
692
+ ni.big = true;
693
+ }
694
+ }
695
+ if (eneg) {
696
+ ni.exp = -ni.exp;
697
+ }
698
+ }
699
+ ni.len = pi->cur - ni.str;
700
+ }
701
+ // Check for special reserved values for Infinity and NaN.
702
+ if (ni.big) {
703
+ if (0 == strcasecmp(INF_VAL, ni.str)) {
704
+ ni.infinity = 1;
705
+ } else if (0 == strcasecmp(NINF_VAL, ni.str)) {
706
+ ni.infinity = 1;
707
+ ni.neg = 1;
708
+ } else if (0 == strcasecmp(NAN_VAL, ni.str)) {
709
+ ni.nan = 1;
710
+ }
711
+ }
712
+ if (CompatMode == pi->options.mode) {
713
+ if (pi->options.compat_bigdec) {
714
+ ni.big = 1;
715
+ }
716
+ } else if (BigDec == pi->options.bigdec_load) {
717
+ ni.big = 1;
718
+ }
719
+ if (0 == parent) {
720
+ pi->add_num(pi, &ni);
721
+ } else {
722
+ switch (parent->next) {
723
+ case NEXT_ARRAY_NEW:
724
+ case NEXT_ARRAY_ELEMENT:
725
+ pi->array_append_num(pi, &ni);
726
+ parent->next = NEXT_ARRAY_COMMA;
727
+ break;
728
+ case NEXT_HASH_VALUE:
729
+ pi->hash_set_num(pi, parent, &ni);
730
+ if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
731
+ OJ_R_FREE((char *)parent->key);
732
+ parent->key = 0;
733
+ }
734
+ parent->next = NEXT_HASH_COMMA;
735
+ break;
736
+ default:
737
+ oj_set_error_at(pi,
738
+ oj_parse_error_class,
739
+ __FILE__,
740
+ __LINE__,
741
+ "expected %s",
742
+ oj_stack_next_string(parent->next));
743
+ break;
744
+ }
745
+ }
746
+ }
747
+
748
+ static void array_start(ParseInfo pi) {
749
+ VALUE v = pi->start_array(pi);
750
+
751
+ stack_push(&pi->stack, v, NEXT_ARRAY_NEW);
752
+ }
753
+
754
+ static void array_end(ParseInfo pi) {
755
+ Val array = stack_pop(&pi->stack);
756
+
757
+ if (0 == array) {
758
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected array close");
759
+ } else if (NEXT_ARRAY_COMMA != array->next && NEXT_ARRAY_NEW != array->next) {
760
+ oj_set_error_at(pi,
761
+ oj_parse_error_class,
762
+ __FILE__,
763
+ __LINE__,
764
+ "expected %s, not an array close",
765
+ oj_stack_next_string(array->next));
766
+ } else {
767
+ pi->end_array(pi);
768
+ add_value(pi, array->val);
769
+ }
770
+ }
771
+
772
+ static void hash_start(ParseInfo pi) {
773
+ VALUE v = pi->start_hash(pi);
774
+
775
+ stack_push(&pi->stack, v, NEXT_HASH_NEW);
776
+ }
777
+
778
+ static void hash_end(ParseInfo pi) {
779
+ Val hash = stack_peek(&pi->stack);
780
+
781
+ // leave hash on stack until just before
782
+ if (0 == hash) {
783
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected hash close");
784
+ } else if (NEXT_HASH_COMMA != hash->next && NEXT_HASH_NEW != hash->next) {
785
+ oj_set_error_at(pi,
786
+ oj_parse_error_class,
787
+ __FILE__,
788
+ __LINE__,
789
+ "expected %s, not a hash close",
790
+ oj_stack_next_string(hash->next));
791
+ } else {
792
+ pi->end_hash(pi);
793
+ stack_pop(&pi->stack);
794
+ add_value(pi, hash->val);
795
+ }
796
+ }
797
+
798
+ static void comma(ParseInfo pi) {
799
+ Val parent = stack_peek(&pi->stack);
800
+
801
+ if (0 == parent) {
802
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
803
+ } else if (NEXT_ARRAY_COMMA == parent->next) {
804
+ parent->next = NEXT_ARRAY_ELEMENT;
805
+ } else if (NEXT_HASH_COMMA == parent->next) {
806
+ parent->next = NEXT_HASH_KEY;
807
+ } else {
808
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
809
+ }
810
+ }
811
+
812
+ static void colon(ParseInfo pi) {
813
+ Val parent = stack_peek(&pi->stack);
814
+
815
+ if (0 != parent && NEXT_HASH_COLON == parent->next) {
816
+ parent->next = NEXT_HASH_VALUE;
817
+ } else {
818
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected colon");
819
+ }
820
+ }
821
+
822
+ void oj_parse2(ParseInfo pi) {
823
+ int first = 1;
824
+ long start = 0;
825
+
826
+ pi->cur = pi->json;
827
+ err_init(&pi->err);
828
+ while (1) {
829
+ if (RB_UNLIKELY(0 < pi->max_depth && pi->max_depth <= pi->stack.tail - pi->stack.head - 1)) {
830
+ VALUE err_clas = oj_get_json_err_class("NestingError");
831
+
832
+ oj_set_error_at(pi, err_clas, __FILE__, __LINE__, "Too deeply nested.");
833
+ pi->err_class = err_clas;
834
+ return;
835
+ }
836
+ next_non_white(pi);
837
+ if (first) {
838
+ // If no tokens are consumed (i.e. empty string), throw a parse error
839
+ // this is the behavior of JSON.parse in both Ruby and JS.
840
+ if (RB_UNLIKELY('\0' == *pi->cur && No == pi->options.empty_string)) {
841
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
842
+ }
843
+ } else {
844
+ if (RB_UNLIKELY('\0' != *pi->cur)) {
845
+ oj_set_error_at(pi,
846
+ oj_parse_error_class,
847
+ __FILE__,
848
+ __LINE__,
849
+ "unexpected characters after the JSON document");
850
+ }
851
+ }
852
+
853
+ switch (*pi->cur++) {
854
+ case '{': hash_start(pi); break;
855
+ case '}': hash_end(pi); break;
856
+ case ':': colon(pi); break;
857
+ case '[': array_start(pi); break;
858
+ case ']': array_end(pi); break;
859
+ case ',': comma(pi); break;
860
+ case '"': read_str(pi); break;
861
+ case '+':
862
+ if (CompatMode == pi->options.mode) {
863
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
864
+ return;
865
+ }
866
+ pi->cur--;
867
+ read_num(pi);
868
+ break;
869
+ case '-':
870
+ case '0':
871
+ case '1':
872
+ case '2':
873
+ case '3':
874
+ case '4':
875
+ case '5':
876
+ case '6':
877
+ case '7':
878
+ case '8':
879
+ case '9':
880
+ pi->cur--;
881
+ read_num(pi);
882
+ break;
883
+ case 'I':
884
+ case 'N':
885
+ if (Yes == pi->options.allow_nan) {
886
+ pi->cur--;
887
+ read_num(pi);
888
+ } else {
889
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
890
+ }
891
+ break;
892
+ case 't': read_true(pi); break;
893
+ case 'f': read_false(pi); break;
894
+ case 'n':
895
+ if ('u' == *pi->cur) {
896
+ read_null(pi);
897
+ } else {
898
+ pi->cur--;
899
+ read_num(pi);
900
+ }
901
+ break;
902
+ case '/':
903
+ skip_comment(pi);
904
+ if (first) {
905
+ continue;
906
+ }
907
+ break;
908
+ case '\0': pi->cur--; return;
909
+ default: oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character"); return;
910
+ }
911
+ if (RB_UNLIKELY(err_has(&pi->err))) {
912
+ return;
913
+ }
914
+ if (stack_empty(&pi->stack)) {
915
+ if (Qundef != pi->proc) {
916
+ VALUE args[3];
917
+ long len = (pi->cur - pi->json) - start;
918
+
919
+ *args = stack_head_val(&pi->stack);
920
+ args[1] = LONG2NUM(start);
921
+ args[2] = LONG2NUM(len);
922
+
923
+ if (Qnil == pi->proc) {
924
+ rb_yield_values2(3, args);
925
+ } else {
926
+ rb_proc_call_with_block(pi->proc, 3, args, Qnil);
927
+ }
928
+ } else if (!pi->has_callbacks) {
929
+ first = 0;
930
+ }
931
+ start = pi->cur - pi->json;
932
+ }
933
+ }
934
+ }
935
+
936
+ static VALUE rescue_big_decimal(VALUE str, VALUE ignore) {
937
+ rb_raise(oj_parse_error_class, "Invalid value for BigDecimal()");
938
+ return Qnil;
939
+ }
940
+
941
+ static VALUE parse_big_decimal(VALUE str) {
942
+ return rb_funcall(rb_cObject, oj_bigdecimal_id, 1, str);
943
+ }
944
+
945
+ static long double exp_plus[] = {
946
+ 1.0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12,
947
+ 1.0e13, 1.0e14, 1.0e15, 1.0e16, 1.0e17, 1.0e18, 1.0e19, 1.0e20, 1.0e21, 1.0e22, 1.0e23, 1.0e24, 1.0e25,
948
+ 1.0e26, 1.0e27, 1.0e28, 1.0e29, 1.0e30, 1.0e31, 1.0e32, 1.0e33, 1.0e34, 1.0e35, 1.0e36, 1.0e37, 1.0e38,
949
+ 1.0e39, 1.0e40, 1.0e41, 1.0e42, 1.0e43, 1.0e44, 1.0e45, 1.0e46, 1.0e47, 1.0e48, 1.0e49,
950
+ };
951
+
952
+ VALUE
953
+ oj_num_as_value(NumInfo ni) {
954
+ VALUE rnum = Qnil;
955
+
956
+ if (ni->infinity) {
957
+ if (ni->neg) {
958
+ rnum = rb_float_new(-OJ_INFINITY);
959
+ } else {
960
+ rnum = rb_float_new(OJ_INFINITY);
961
+ }
962
+ } else if (ni->nan) {
963
+ rnum = rb_float_new(NAN);
964
+ } else if (1 == ni->div && 0 == ni->exp && !ni->has_exp) { // fixnum
965
+ if (ni->big) {
966
+ if (256 > ni->len) {
967
+ char buf[256];
968
+
969
+ memcpy(buf, ni->str, ni->len);
970
+ buf[ni->len] = '\0';
971
+ rnum = rb_cstr_to_inum(buf, 10, 0);
972
+ } else {
973
+ char *buf = OJ_R_ALLOC_N(char, ni->len + 1);
974
+
975
+ memcpy(buf, ni->str, ni->len);
976
+ buf[ni->len] = '\0';
977
+ rnum = rb_cstr_to_inum(buf, 10, 0);
978
+ OJ_R_FREE(buf);
979
+ }
980
+ } else {
981
+ if (ni->neg) {
982
+ rnum = rb_ll2inum(-ni->i);
983
+ } else {
984
+ rnum = rb_ll2inum(ni->i);
985
+ }
986
+ }
987
+ } else { // decimal
988
+ if (ni->big) {
989
+ VALUE bd = rb_str_new(ni->str, ni->len);
990
+
991
+ rnum = rb_rescue2(parse_big_decimal, bd, rescue_big_decimal, bd, rb_eException, 0);
992
+ if (ni->no_big) {
993
+ rnum = rb_funcall(rnum, rb_intern("to_f"), 0);
994
+ }
995
+ } else if (FastDec == ni->bigdec_load) {
996
+ long double ld = (long double)ni->i * (long double)ni->div + (long double)ni->num;
997
+ int x = (int)((int64_t)ni->exp - ni->di);
998
+
999
+ if (0 < x) {
1000
+ if (x < (int)(sizeof(exp_plus) / sizeof(*exp_plus))) {
1001
+ ld *= exp_plus[x];
1002
+ } else {
1003
+ ld *= powl(10.0, x);
1004
+ }
1005
+ } else if (x < 0) {
1006
+ if (-x < (int)(sizeof(exp_plus) / sizeof(*exp_plus))) {
1007
+ ld /= exp_plus[-x];
1008
+ } else {
1009
+ ld /= powl(10.0, -x);
1010
+ }
1011
+ }
1012
+ if (ni->neg) {
1013
+ ld = -ld;
1014
+ }
1015
+ rnum = rb_float_new((double)ld);
1016
+ } else if (RubyDec == ni->bigdec_load) {
1017
+ VALUE sv = rb_str_new(ni->str, ni->len);
1018
+
1019
+ rnum = rb_funcall(sv, rb_intern("to_f"), 0);
1020
+ } else {
1021
+ char *end;
1022
+ double d = strtod(ni->str, &end);
1023
+
1024
+ if ((long)ni->len != (long)(end - ni->str)) {
1025
+ if (Qnil == ni->pi->err_class) {
1026
+ rb_raise(oj_parse_error_class, "Invalid float");
1027
+ } else {
1028
+ rb_raise(ni->pi->err_class, "Invalid float");
1029
+ }
1030
+ }
1031
+ rnum = rb_float_new(d);
1032
+ }
1033
+ }
1034
+ return rnum;
1035
+ }
1036
+
1037
+ void oj_set_error_at(ParseInfo pi, VALUE err_clas, const char *file, int line, const char *format, ...) {
1038
+ va_list ap;
1039
+ char msg[256];
1040
+ char *p = msg;
1041
+ char *end = p + sizeof(msg) - 2;
1042
+ char *start;
1043
+ Val vp;
1044
+ int mlen;
1045
+
1046
+ va_start(ap, format);
1047
+ mlen = vsnprintf(msg, sizeof(msg) - 1, format, ap);
1048
+ if (0 < mlen) {
1049
+ if (sizeof(msg) - 2 < (size_t)mlen) {
1050
+ p = end - 2;
1051
+ } else {
1052
+ p += mlen;
1053
+ }
1054
+ }
1055
+ va_end(ap);
1056
+ pi->err.clas = err_clas;
1057
+ if (p + 3 < end) {
1058
+ *p++ = ' ';
1059
+ *p++ = '(';
1060
+ *p++ = 'a';
1061
+ *p++ = 'f';
1062
+ *p++ = 't';
1063
+ *p++ = 'e';
1064
+ *p++ = 'r';
1065
+ *p++ = ' ';
1066
+ start = p;
1067
+ for (vp = pi->stack.head; vp < pi->stack.tail; vp++) {
1068
+ if (end <= p + 1 + vp->klen) {
1069
+ break;
1070
+ }
1071
+ if (NULL != vp->key) {
1072
+ if (start < p) {
1073
+ *p++ = '.';
1074
+ }
1075
+ memcpy(p, vp->key, vp->klen);
1076
+ p += vp->klen;
1077
+ } else {
1078
+ if (RUBY_T_ARRAY == rb_type(vp->val)) {
1079
+ if (end <= p + 12) {
1080
+ break;
1081
+ }
1082
+ p += snprintf(p, end - p, "[%ld]", RARRAY_LEN(vp->val));
1083
+ }
1084
+ }
1085
+ }
1086
+ *p++ = ')';
1087
+ }
1088
+ *p = '\0';
1089
+ if (0 == pi->json) {
1090
+ oj_err_set(&pi->err, err_clas, "%s at line %d, column %d [%s:%d]", msg, pi->rd.line, pi->rd.col, file, line);
1091
+ } else {
1092
+ _oj_err_set_with_location(&pi->err, err_clas, msg, pi->json, pi->cur - 1, file, line);
1093
+ }
1094
+ }
1095
+
1096
+ static VALUE protect_parse(VALUE pip) {
1097
+ oj_parse2((ParseInfo)pip);
1098
+
1099
+ return Qnil;
1100
+ }
1101
+
1102
+ extern int oj_utf8_index;
1103
+
1104
+ static void oj_pi_set_input_str(ParseInfo pi, VALUE *inputp) {
1105
+ int idx = RB_ENCODING_GET(*inputp);
1106
+
1107
+ if (oj_utf8_encoding_index != idx) {
1108
+ rb_encoding *enc = rb_enc_from_index(idx);
1109
+ *inputp = rb_str_conv_enc(*inputp, enc, oj_utf8_encoding);
1110
+ }
1111
+ pi->json = RSTRING_PTR(*inputp);
1112
+ pi->end = pi->json + RSTRING_LEN(*inputp);
1113
+ }
1114
+
1115
+ VALUE
1116
+ oj_pi_parse(int argc, VALUE *argv, ParseInfo pi, char *json, size_t len, int yieldOk) {
1117
+ char *buf = 0;
1118
+ VALUE input;
1119
+ VALUE wrapped_stack;
1120
+ VALUE result = Qnil;
1121
+ int line = 0;
1122
+ int free_json = 0;
1123
+
1124
+ if (argc < 1) {
1125
+ rb_raise(rb_eArgError, "Wrong number of arguments to parse.");
1126
+ }
1127
+ input = argv[0];
1128
+ if (2 <= argc) {
1129
+ if (T_HASH == rb_type(argv[1])) {
1130
+ oj_parse_options(argv[1], &pi->options);
1131
+ } else if (3 <= argc && T_HASH == rb_type(argv[2])) {
1132
+ oj_parse_options(argv[2], &pi->options);
1133
+ }
1134
+ }
1135
+ if (yieldOk && rb_block_given_p()) {
1136
+ pi->proc = Qnil;
1137
+ } else {
1138
+ pi->proc = Qundef;
1139
+ }
1140
+ if (0 != json) {
1141
+ pi->json = json;
1142
+ pi->end = json + len;
1143
+ free_json = 1;
1144
+ } else if (T_STRING == rb_type(input)) {
1145
+ if (CompatMode == pi->options.mode) {
1146
+ if (No == pi->options.nilnil && 0 == RSTRING_LEN(input)) {
1147
+ rb_raise(oj_json_parser_error_class, "An empty string is not a valid JSON string.");
1148
+ }
1149
+ }
1150
+ oj_pi_set_input_str(pi, &input);
1151
+ } else if (Qnil == input) {
1152
+ if (Yes == pi->options.nilnil) {
1153
+ return Qnil;
1154
+ } else {
1155
+ rb_raise(rb_eTypeError, "Nil is not a valid JSON source.");
1156
+ }
1157
+ } else {
1158
+ VALUE clas = rb_obj_class(input);
1159
+ VALUE s;
1160
+
1161
+ if (oj_stringio_class == clas) {
1162
+ s = rb_funcall2(input, oj_string_id, 0, 0);
1163
+ oj_pi_set_input_str(pi, &s);
1164
+ #if !IS_WINDOWS
1165
+ } else if (rb_cFile == clas && 0 == FIX2INT(rb_funcall(input, oj_pos_id, 0))) {
1166
+ int fd = FIX2INT(rb_funcall(input, oj_fileno_id, 0));
1167
+ ssize_t cnt;
1168
+ size_t len = lseek(fd, 0, SEEK_END);
1169
+
1170
+ lseek(fd, 0, SEEK_SET);
1171
+ buf = OJ_R_ALLOC_N(char, len + 1);
1172
+ pi->json = buf;
1173
+ pi->end = buf + len;
1174
+ if (0 >= (cnt = read(fd, (char *)pi->json, len)) || cnt != (ssize_t)len) {
1175
+ if (0 != buf) {
1176
+ OJ_R_FREE(buf);
1177
+ }
1178
+ rb_raise(rb_eIOError, "failed to read from IO Object.");
1179
+ }
1180
+ ((char *)pi->json)[len] = '\0';
1181
+ /* skip UTF-8 BOM if present */
1182
+ if (0xEF == (uint8_t)*pi->json && 0xBB == (uint8_t)pi->json[1] && 0xBF == (uint8_t)pi->json[2]) {
1183
+ pi->cur += 3;
1184
+ }
1185
+ #endif
1186
+ } else if (rb_respond_to(input, oj_read_id)) {
1187
+ // use stream parser instead
1188
+ return oj_pi_sparse(argc, argv, pi, 0);
1189
+ } else {
1190
+ rb_raise(rb_eArgError, "parse() expected a String or IO Object.");
1191
+ }
1192
+ }
1193
+ if (Yes == pi->options.circular) {
1194
+ pi->circ_array = oj_circ_array_new();
1195
+ } else {
1196
+ pi->circ_array = 0;
1197
+ }
1198
+ if (No == pi->options.allow_gc) {
1199
+ rb_gc_disable();
1200
+ }
1201
+ // GC can run at any time. When it runs any Object created by C will be
1202
+ // freed. We protect against this by wrapping the value stack in a ruby
1203
+ // data object and poviding a mark function for ruby objects on the
1204
+ // value stack (while it is in scope).
1205
+ wrapped_stack = oj_stack_init(&pi->stack);
1206
+ rb_protect(protect_parse, (VALUE)pi, &line);
1207
+ if (Qundef == pi->stack.head->val && !empty_ok(&pi->options)) {
1208
+ if (No == pi->options.nilnil || (CompatMode == pi->options.mode && 0 < pi->cur - pi->json)) {
1209
+ oj_set_error_at(pi, oj_json_parser_error_class, __FILE__, __LINE__, "Empty input");
1210
+ }
1211
+ }
1212
+ result = stack_head_val(&pi->stack);
1213
+ DATA_PTR(wrapped_stack) = 0;
1214
+ if (No == pi->options.allow_gc) {
1215
+ rb_gc_enable();
1216
+ }
1217
+ if (!err_has(&pi->err)) {
1218
+ // If the stack is not empty then the JSON terminated early.
1219
+ Val v;
1220
+ VALUE err_class = oj_parse_error_class;
1221
+
1222
+ if (0 != line) {
1223
+ VALUE ec = rb_obj_class(rb_errinfo());
1224
+
1225
+ if (rb_eArgError != ec && 0 != ec) {
1226
+ err_class = ec;
1227
+ }
1228
+ if (rb_eIOError != ec) {
1229
+ goto CLEANUP;
1230
+ }
1231
+ }
1232
+ if (NULL != (v = stack_peek(&pi->stack))) {
1233
+ switch (v->next) {
1234
+ case NEXT_ARRAY_NEW:
1235
+ case NEXT_ARRAY_ELEMENT:
1236
+ case NEXT_ARRAY_COMMA: oj_set_error_at(pi, err_class, __FILE__, __LINE__, "Array not terminated"); break;
1237
+ case NEXT_HASH_NEW:
1238
+ case NEXT_HASH_KEY:
1239
+ case NEXT_HASH_COLON:
1240
+ case NEXT_HASH_VALUE:
1241
+ case NEXT_HASH_COMMA:
1242
+ oj_set_error_at(pi, err_class, __FILE__, __LINE__, "Hash/Object not terminated");
1243
+ break;
1244
+ default: oj_set_error_at(pi, err_class, __FILE__, __LINE__, "not terminated");
1245
+ }
1246
+ }
1247
+ }
1248
+ CLEANUP:
1249
+ // proceed with cleanup
1250
+ if (0 != pi->circ_array) {
1251
+ oj_circ_array_free(pi->circ_array);
1252
+ }
1253
+ if (0 != buf) {
1254
+ OJ_R_FREE(buf);
1255
+ } else if (free_json) {
1256
+ OJ_R_FREE(json);
1257
+ }
1258
+ stack_cleanup(&pi->stack);
1259
+ if (pi->str_rx.head != oj_default_options.str_rx.head) {
1260
+ oj_rxclass_cleanup(&pi->str_rx);
1261
+ }
1262
+ if (err_has(&pi->err)) {
1263
+ rb_set_errinfo(Qnil);
1264
+ if (Qnil != pi->err_class) {
1265
+ pi->err.clas = pi->err_class;
1266
+ }
1267
+ if ((CompatMode == pi->options.mode || RailsMode == pi->options.mode) && Yes != pi->options.safe) {
1268
+ // The json gem requires the error message be UTF-8 encoded. In
1269
+ // additional the complete JSON source must be returned. There
1270
+ // does not seem to be a size limit.
1271
+ VALUE msg = rb_utf8_str_new_cstr(pi->err.msg);
1272
+ VALUE args[1];
1273
+
1274
+ if (NULL != pi->json) {
1275
+ msg = rb_str_append(msg, rb_utf8_str_new_cstr(" in '"));
1276
+ msg = rb_str_append(msg, rb_utf8_str_new_cstr(pi->json));
1277
+ }
1278
+ args[0] = msg;
1279
+ if (pi->err.clas == oj_parse_error_class) {
1280
+ // The error was an Oj::ParseError so change to a JSON::ParserError.
1281
+ pi->err.clas = oj_json_parser_error_class;
1282
+ }
1283
+ rb_exc_raise(rb_class_new_instance(1, args, pi->err.clas));
1284
+ } else {
1285
+ oj_err_raise(&pi->err);
1286
+ }
1287
+ } else if (0 != line) {
1288
+ rb_jump_tag(line);
1289
+ }
1290
+ if (pi->options.quirks_mode == No) {
1291
+ switch (rb_type(result)) {
1292
+ case T_NIL:
1293
+ case T_TRUE:
1294
+ case T_FALSE:
1295
+ case T_FIXNUM:
1296
+ case T_FLOAT:
1297
+ case T_CLASS:
1298
+ case T_STRING:
1299
+ case T_SYMBOL: {
1300
+ struct _err err;
1301
+
1302
+ if (Qnil == pi->err_class) {
1303
+ err.clas = oj_parse_error_class;
1304
+ } else {
1305
+ err.clas = pi->err_class;
1306
+ }
1307
+ snprintf(err.msg, sizeof(err.msg), "unexpected non-document value");
1308
+ oj_err_raise(&err);
1309
+ break;
1310
+ }
1311
+ default:
1312
+ // okay
1313
+ break;
1314
+ }
1315
+ }
1316
+ return result;
1317
+ }