oj 3.12.2 → 3.13.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/oj/buf.h +9 -0
  4. data/ext/oj/cache.c +193 -0
  5. data/ext/oj/cache.h +20 -0
  6. data/ext/oj/compat.c +8 -22
  7. data/ext/oj/custom.c +15 -14
  8. data/ext/oj/debug.c +132 -0
  9. data/ext/oj/dump.c +12 -15
  10. data/ext/oj/dump_compat.c +3 -3
  11. data/ext/oj/dump_object.c +9 -9
  12. data/ext/oj/dump_strict.c +3 -3
  13. data/ext/oj/err.h +19 -0
  14. data/ext/oj/extconf.rb +4 -0
  15. data/ext/oj/fast.c +7 -18
  16. data/ext/oj/intern.c +398 -0
  17. data/ext/oj/intern.h +27 -0
  18. data/ext/oj/mimic_json.c +9 -9
  19. data/ext/oj/object.c +11 -59
  20. data/ext/oj/odd.c +1 -1
  21. data/ext/oj/oj.c +167 -109
  22. data/ext/oj/oj.h +2 -2
  23. data/ext/oj/parse.c +5 -5
  24. data/ext/oj/parser.c +1512 -0
  25. data/ext/oj/parser.h +90 -0
  26. data/ext/oj/rails.c +5 -5
  27. data/ext/oj/resolve.c +2 -20
  28. data/ext/oj/rxclass.c +1 -1
  29. data/ext/oj/saj.c +1 -1
  30. data/ext/oj/saj2.c +348 -0
  31. data/ext/oj/scp.c +1 -1
  32. data/ext/oj/sparse.c +2 -2
  33. data/ext/oj/stream_writer.c +4 -4
  34. data/ext/oj/strict.c +10 -27
  35. data/ext/oj/string_writer.c +2 -2
  36. data/ext/oj/usual.c +1228 -0
  37. data/ext/oj/validate.c +51 -0
  38. data/ext/oj/wab.c +9 -17
  39. data/lib/oj/error.rb +1 -1
  40. data/lib/oj/mimic.rb +1 -1
  41. data/lib/oj/version.rb +1 -1
  42. data/pages/Modes.md +2 -0
  43. data/pages/Options.md +17 -5
  44. data/pages/Parser.md +309 -0
  45. data/pages/Rails.md +2 -2
  46. data/test/json_gem/json_generator_test.rb +1 -1
  47. data/test/perf_parser.rb +184 -0
  48. data/test/test_hash.rb +1 -1
  49. data/test/test_parser.rb +27 -0
  50. data/test/test_parser_saj.rb +245 -0
  51. data/test/test_parser_usual.rb +213 -0
  52. metadata +22 -5
  53. data/ext/oj/hash.c +0 -168
  54. data/ext/oj/hash.h +0 -21
  55. data/ext/oj/hash_test.c +0 -491
data/ext/oj/oj.h CHANGED
@@ -143,7 +143,7 @@ typedef struct _options {
143
143
  char safe; // YesNo
144
144
  char sec_prec_set; // boolean (0 or 1)
145
145
  char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
146
- char cache_keys; // YexNo
146
+ char cache_keys; // YesNo
147
147
  char cache_str; // string short than or equal to this are cache
148
148
  int64_t int_range_min; // dump numbers below as string
149
149
  int64_t int_range_max; // dump numbers above as string
@@ -245,6 +245,7 @@ extern VALUE oj_compat_parse_cstr(int argc, VALUE *argv, char *json, size_t len)
245
245
  extern VALUE oj_object_parse_cstr(int argc, VALUE *argv, char *json, size_t len);
246
246
  extern VALUE oj_custom_parse_cstr(int argc, VALUE *argv, char *json, size_t len);
247
247
 
248
+ extern bool oj_hash_has_key(VALUE hash, VALUE key);
248
249
  extern void oj_parse_options(VALUE ropts, Options copts);
249
250
 
250
251
  extern void oj_dump_obj_to_json(VALUE obj, Options copts, Out out);
@@ -327,7 +328,6 @@ extern ID oj_exclude_end_id;
327
328
  extern ID oj_file_id;
328
329
  extern ID oj_fileno_id;
329
330
  extern ID oj_ftype_id;
330
- extern ID oj_has_key_id;
331
331
  extern ID oj_hash_end_id;
332
332
  extern ID oj_hash_key_id;
333
333
  extern ID oj_hash_set_id;
data/ext/oj/parse.c CHANGED
@@ -489,7 +489,7 @@ static void read_num(ParseInfo pi) {
489
489
  if ('.' == *pi->cur) {
490
490
  pi->cur++;
491
491
  // A trailing . is not a valid decimal but if encountered allow it
492
- // except when mimicing the JSON gem or in strict mode.
492
+ // except when mimicking the JSON gem or in strict mode.
493
493
  if (StrictMode == pi->options.mode || CompatMode == pi->options.mode) {
494
494
  int pos = (int)(pi->cur - ni.str);
495
495
 
@@ -964,12 +964,12 @@ static VALUE protect_parse(VALUE pip) {
964
964
  extern int oj_utf8_index;
965
965
 
966
966
  static void oj_pi_set_input_str(ParseInfo pi, volatile VALUE *inputp) {
967
- rb_encoding *enc = rb_to_encoding(rb_obj_encoding(*inputp));
967
+ rb_encoding *enc = rb_enc_get(*inputp);
968
968
 
969
- if (rb_utf8_encoding() != enc) {
970
- *inputp = rb_str_conv_enc(*inputp, enc, rb_utf8_encoding());
969
+ if (oj_utf8_encoding != enc) {
970
+ *inputp = rb_str_conv_enc(*inputp, enc, oj_utf8_encoding);
971
971
  }
972
- pi->json = rb_string_value_ptr((VALUE *)inputp);
972
+ pi->json = RSTRING_PTR(*inputp);
973
973
  pi->end = pi->json + RSTRING_LEN(*inputp);
974
974
  }
975
975
 
data/ext/oj/parser.c ADDED
@@ -0,0 +1,1512 @@
1
+ // Copyright (c) 2020, 2021, Peter Ohler, All rights reserved.
2
+
3
+ #include <fcntl.h>
4
+
5
+ #include "parser.h"
6
+ #include "oj.h"
7
+
8
+ #define DEBUG 0
9
+
10
+ #define USE_THREAD_LIMIT 0
11
+ // #define USE_THREAD_LIMIT 100000
12
+ #define MAX_EXP 4932
13
+ // max in the pow_map
14
+ #define MAX_POW 400
15
+
16
+ #define MIN_SLEEP (1000000000LL / (double)CLOCKS_PER_SEC)
17
+ // 9,223,372,036,854,775,807
18
+ #define BIG_LIMIT LLONG_MAX / 10
19
+ #define FRAC_LIMIT 10000000000000000ULL
20
+
21
+ // Give better performance with indented JSON but worse with unindented.
22
+ //#define SPACE_JUMP
23
+
24
+ enum {
25
+ SKIP_CHAR = 'a',
26
+ SKIP_NEWLINE = 'b',
27
+ VAL_NULL = 'c',
28
+ VAL_TRUE = 'd',
29
+ VAL_FALSE = 'e',
30
+ VAL_NEG = 'f',
31
+ VAL0 = 'g',
32
+ VAL_DIGIT = 'h',
33
+ VAL_QUOTE = 'i',
34
+ OPEN_ARRAY = 'k',
35
+ OPEN_OBJECT = 'l',
36
+ CLOSE_ARRAY = 'm',
37
+ CLOSE_OBJECT = 'n',
38
+ AFTER_COMMA = 'o',
39
+ KEY_QUOTE = 'p',
40
+ COLON_COLON = 'q',
41
+ NUM_SPC = 'r',
42
+ NUM_NEWLINE = 's',
43
+ NUM_DOT = 't',
44
+ NUM_COMMA = 'u',
45
+ NUM_FRAC = 'v',
46
+ FRAC_E = 'w',
47
+ EXP_SIGN = 'x',
48
+ EXP_DIGIT = 'y',
49
+ STR_QUOTE = 'z',
50
+ NEG_DIGIT = '-',
51
+ STR_SLASH = 'A',
52
+ ESC_OK = 'B',
53
+ BIG_DIGIT = 'C',
54
+ BIG_DOT = 'D',
55
+ U_OK = 'E',
56
+ TOKEN_OK = 'F',
57
+ NUM_CLOSE_OBJECT = 'G',
58
+ NUM_CLOSE_ARRAY = 'H',
59
+ BIG_FRAC = 'I',
60
+ BIG_E = 'J',
61
+ BIG_EXP_SIGN = 'K',
62
+ BIG_EXP = 'L',
63
+ UTF1 = 'M', // expect 1 more follow byte
64
+ NUM_DIGIT = 'N',
65
+ NUM_ZERO = 'O',
66
+ UTF2 = 'P', // expect 2 more follow byte
67
+ UTF3 = 'Q', // expect 3 more follow byte
68
+ STR_OK = 'R',
69
+ UTFX = 'S', // following bytes
70
+ ESC_U = 'U',
71
+ CHAR_ERR = '.',
72
+ DONE = 'X',
73
+ };
74
+
75
+ /*
76
+ 0123456789abcdef0123456789abcdef */
77
+ static const char value_map[257] = "\
78
+ X........ab..a..................\
79
+ a.i..........f..ghhhhhhhhh......\
80
+ ...........................k.m..\
81
+ ......e.......c.....d......l.n..\
82
+ ................................\
83
+ ................................\
84
+ ................................\
85
+ ................................v";
86
+
87
+ static const char null_map[257] = "\
88
+ ................................\
89
+ ............o...................\
90
+ ................................\
91
+ ............F........F..........\
92
+ ................................\
93
+ ................................\
94
+ ................................\
95
+ ................................N";
96
+
97
+ static const char true_map[257] = "\
98
+ ................................\
99
+ ............o...................\
100
+ ................................\
101
+ .....F............F..F..........\
102
+ ................................\
103
+ ................................\
104
+ ................................\
105
+ ................................T";
106
+
107
+ static const char false_map[257] = "\
108
+ ................................\
109
+ ............o...................\
110
+ ................................\
111
+ .F...F......F......F............\
112
+ ................................\
113
+ ................................\
114
+ ................................\
115
+ ................................F";
116
+
117
+ static const char comma_map[257] = "\
118
+ .........ab..a..................\
119
+ a.i..........f..ghhhhhhhhh......\
120
+ ...........................k....\
121
+ ......e.......c.....d......l....\
122
+ ................................\
123
+ ................................\
124
+ ................................\
125
+ ................................,";
126
+
127
+ static const char after_map[257] = "\
128
+ X........ab..a..................\
129
+ a...........o...................\
130
+ .............................m..\
131
+ .............................n..\
132
+ ................................\
133
+ ................................\
134
+ ................................\
135
+ ................................a";
136
+
137
+ static const char key1_map[257] = "\
138
+ .........ab..a..................\
139
+ a.p.............................\
140
+ ................................\
141
+ .............................n..\
142
+ ................................\
143
+ ................................\
144
+ ................................\
145
+ ................................K";
146
+
147
+ static const char key_map[257] = "\
148
+ .........ab..a..................\
149
+ a.p.............................\
150
+ ................................\
151
+ ................................\
152
+ ................................\
153
+ ................................\
154
+ ................................\
155
+ ................................k";
156
+
157
+ static const char colon_map[257] = "\
158
+ .........ab..a..................\
159
+ a.........................q.....\
160
+ ................................\
161
+ ................................\
162
+ ................................\
163
+ ................................\
164
+ ................................\
165
+ ................................:";
166
+
167
+ static const char neg_map[257] = "\
168
+ ................................\
169
+ ................O---------......\
170
+ ................................\
171
+ ................................\
172
+ ................................\
173
+ ................................\
174
+ ................................\
175
+ ................................-";
176
+
177
+ static const char zero_map[257] = "\
178
+ .........rs..r..................\
179
+ r...........u.t.................\
180
+ .............................H..\
181
+ .............................G..\
182
+ ................................\
183
+ ................................\
184
+ ................................\
185
+ ................................0";
186
+
187
+ static const char digit_map[257] = "\
188
+ .........rs..r..................\
189
+ r...........u.t.NNNNNNNNNN......\
190
+ .....w.......................H..\
191
+ .....w.......................G..\
192
+ ................................\
193
+ ................................\
194
+ ................................\
195
+ ................................d";
196
+
197
+ static const char dot_map[257] = "\
198
+ ................................\
199
+ ................vvvvvvvvvv......\
200
+ ................................\
201
+ ................................\
202
+ ................................\
203
+ ................................\
204
+ ................................\
205
+ .................................";
206
+
207
+ static const char frac_map[257] = "\
208
+ .........rs..r..................\
209
+ r...........u...vvvvvvvvvv......\
210
+ .....w.......................H..\
211
+ .....w.......................G..\
212
+ ................................\
213
+ ................................\
214
+ ................................\
215
+ ................................f";
216
+
217
+ static const char exp_sign_map[257] = "\
218
+ ................................\
219
+ ...........x.x..yyyyyyyyyy......\
220
+ ................................\
221
+ ................................\
222
+ ................................\
223
+ ................................\
224
+ ................................\
225
+ ................................x";
226
+
227
+ static const char exp_zero_map[257] = "\
228
+ ................................\
229
+ ................yyyyyyyyyy......\
230
+ ................................\
231
+ ................................\
232
+ ................................\
233
+ ................................\
234
+ ................................\
235
+ ................................z";
236
+
237
+ static const char exp_map[257] = "\
238
+ .........rs..r..................\
239
+ r...........u...yyyyyyyyyy......\
240
+ .............................H..\
241
+ .............................G..\
242
+ ................................\
243
+ ................................\
244
+ ................................\
245
+ ................................X";
246
+
247
+ static const char big_digit_map[257] = "\
248
+ .........rs..r..................\
249
+ r...........u.D.CCCCCCCCCC......\
250
+ .....J.......................H..\
251
+ .....J.......................G..\
252
+ ................................\
253
+ ................................\
254
+ ................................\
255
+ ................................D";
256
+
257
+ static const char big_dot_map[257] = "\
258
+ ................................\
259
+ ................IIIIIIIIII......\
260
+ ................................\
261
+ ................................\
262
+ ................................\
263
+ ................................\
264
+ ................................\
265
+ ................................o";
266
+
267
+ static const char big_frac_map[257] = "\
268
+ .........rs..r..................\
269
+ r...........u...IIIIIIIIII......\
270
+ .....J.......................H..\
271
+ .....J.......................G..\
272
+ ................................\
273
+ ................................\
274
+ ................................\
275
+ ................................g";
276
+
277
+ static const char big_exp_sign_map[257] = "\
278
+ ................................\
279
+ ...........K.K..LLLLLLLLLL......\
280
+ ................................\
281
+ ................................\
282
+ ................................\
283
+ ................................\
284
+ ................................\
285
+ ................................B";
286
+
287
+ static const char big_exp_zero_map[257] = "\
288
+ ................................\
289
+ ................LLLLLLLLLL......\
290
+ ................................\
291
+ ................................\
292
+ ................................\
293
+ ................................\
294
+ ................................\
295
+ ................................Z";
296
+
297
+ static const char big_exp_map[257] = "\
298
+ .........rs..r..................\
299
+ r...........u...LLLLLLLLLL......\
300
+ .............................H..\
301
+ .............................G..\
302
+ ................................\
303
+ ................................\
304
+ ................................\
305
+ ................................Y";
306
+
307
+ static const char string_map[257] = "\
308
+ ................................\
309
+ RRzRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
310
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRARRR\
311
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
312
+ ................................\
313
+ ................................\
314
+ MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\
315
+ PPPPPPPPPPPPPPPPQQQQQQQQ........s";
316
+
317
+ static const char esc_map[257] = "\
318
+ ................................\
319
+ ..B............B................\
320
+ ............................B...\
321
+ ..B...B.......B...B.BU..........\
322
+ ................................\
323
+ ................................\
324
+ ................................\
325
+ ................................~";
326
+
327
+ static const char esc_byte_map[257] = "\
328
+ ................................\
329
+ ..\"............/................\
330
+ ............................\\...\
331
+ ..\b...\f.......\n...\r.\t..........\
332
+ ................................\
333
+ ................................\
334
+ ................................\
335
+ ................................b";
336
+
337
+ static const char u_map[257] = "\
338
+ ................................\
339
+ ................EEEEEEEEEE......\
340
+ .EEEEEE.........................\
341
+ .EEEEEE.........................\
342
+ ................................\
343
+ ................................\
344
+ ................................\
345
+ ................................u";
346
+
347
+ static const char utf_map[257] = "\
348
+ ................................\
349
+ ................................\
350
+ ................................\
351
+ ................................\
352
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
353
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
354
+ ................................\
355
+ ................................8";
356
+
357
+ static const char space_map[257] = "\
358
+ .........ab..a..................\
359
+ a...............................\
360
+ ................................\
361
+ ................................\
362
+ ................................\
363
+ ................................\
364
+ ................................\
365
+ ................................S";
366
+
367
+ static const char trail_map[257] = "\
368
+ .........ab..a..................\
369
+ a...............................\
370
+ ................................\
371
+ ................................\
372
+ ................................\
373
+ ................................\
374
+ ................................\
375
+ ................................R";
376
+
377
+ static const byte hex_map[256] = "\
378
+ ................................\
379
+ ................\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09......\
380
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
381
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
382
+ ................................\
383
+ ................................\
384
+ ................................\
385
+ ................................";
386
+
387
+ static long double pow_map[401] = {1.0L, 1.0e1L, 1.0e2L, 1.0e3L, 1.0e4L,
388
+ 1.0e5L, 1.0e6L, 1.0e7L, 1.0e8L, 1.0e9L, // 00
389
+ 1.0e10L, 1.0e11L, 1.0e12L, 1.0e13L, 1.0e14L,
390
+ 1.0e15L, 1.0e16L, 1.0e17L, 1.0e18L, 1.0e19L, // 10
391
+ 1.0e20L, 1.0e21L, 1.0e22L, 1.0e23L, 1.0e24L,
392
+ 1.0e25L, 1.0e26L, 1.0e27L, 1.0e28L, 1.0e29L, // 20
393
+ 1.0e30L, 1.0e31L, 1.0e32L, 1.0e33L, 1.0e34L,
394
+ 1.0e35L, 1.0e36L, 1.0e37L, 1.0e38L, 1.0e39L, // 30
395
+ 1.0e40L, 1.0e41L, 1.0e42L, 1.0e43L, 1.0e44L,
396
+ 1.0e45L, 1.0e46L, 1.0e47L, 1.0e48L, 1.0e49L, // 40
397
+ 1.0e50L, 1.0e51L, 1.0e52L, 1.0e53L, 1.0e54L,
398
+ 1.0e55L, 1.0e56L, 1.0e57L, 1.0e58L, 1.0e59L, // 50
399
+ 1.0e60L, 1.0e61L, 1.0e62L, 1.0e63L, 1.0e64L,
400
+ 1.0e65L, 1.0e66L, 1.0e67L, 1.0e68L, 1.0e69L, // 60
401
+ 1.0e70L, 1.0e71L, 1.0e72L, 1.0e73L, 1.0e74L,
402
+ 1.0e75L, 1.0e76L, 1.0e77L, 1.0e78L, 1.0e79L, // 70
403
+ 1.0e80L, 1.0e81L, 1.0e82L, 1.0e83L, 1.0e84L,
404
+ 1.0e85L, 1.0e86L, 1.0e87L, 1.0e88L, 1.0e89L, // 80
405
+ 1.0e90L, 1.0e91L, 1.0e92L, 1.0e93L, 1.0e94L,
406
+ 1.0e95L, 1.0e96L, 1.0e97L, 1.0e98L, 1.0e99L, // 90
407
+ 1.0e100L, 1.0e101L, 1.0e102L, 1.0e103L, 1.0e104L,
408
+ 1.0e105L, 1.0e106L, 1.0e107L, 1.0e108L, 1.0e109L, // 100
409
+ 1.0e110L, 1.0e111L, 1.0e112L, 1.0e113L, 1.0e114L,
410
+ 1.0e115L, 1.0e116L, 1.0e117L, 1.0e118L, 1.0e119L, // 110
411
+ 1.0e120L, 1.0e121L, 1.0e122L, 1.0e123L, 1.0e124L,
412
+ 1.0e125L, 1.0e126L, 1.0e127L, 1.0e128L, 1.0e129L, // 120
413
+ 1.0e130L, 1.0e131L, 1.0e132L, 1.0e133L, 1.0e134L,
414
+ 1.0e135L, 1.0e136L, 1.0e137L, 1.0e138L, 1.0e139L, // 130
415
+ 1.0e140L, 1.0e141L, 1.0e142L, 1.0e143L, 1.0e144L,
416
+ 1.0e145L, 1.0e146L, 1.0e147L, 1.0e148L, 1.0e149L, // 140
417
+ 1.0e150L, 1.0e151L, 1.0e152L, 1.0e153L, 1.0e154L,
418
+ 1.0e155L, 1.0e156L, 1.0e157L, 1.0e158L, 1.0e159L, // 150
419
+ 1.0e160L, 1.0e161L, 1.0e162L, 1.0e163L, 1.0e164L,
420
+ 1.0e165L, 1.0e166L, 1.0e167L, 1.0e168L, 1.0e169L, // 160
421
+ 1.0e170L, 1.0e171L, 1.0e172L, 1.0e173L, 1.0e174L,
422
+ 1.0e175L, 1.0e176L, 1.0e177L, 1.0e178L, 1.0e179L, // 170
423
+ 1.0e180L, 1.0e181L, 1.0e182L, 1.0e183L, 1.0e184L,
424
+ 1.0e185L, 1.0e186L, 1.0e187L, 1.0e188L, 1.0e189L, // 180
425
+ 1.0e190L, 1.0e191L, 1.0e192L, 1.0e193L, 1.0e194L,
426
+ 1.0e195L, 1.0e196L, 1.0e197L, 1.0e198L, 1.0e199L, // 190
427
+ 1.0e200L, 1.0e201L, 1.0e202L, 1.0e203L, 1.0e204L,
428
+ 1.0e205L, 1.0e206L, 1.0e207L, 1.0e208L, 1.0e209L, // 200
429
+ 1.0e210L, 1.0e211L, 1.0e212L, 1.0e213L, 1.0e214L,
430
+ 1.0e215L, 1.0e216L, 1.0e217L, 1.0e218L, 1.0e219L, // 210
431
+ 1.0e220L, 1.0e221L, 1.0e222L, 1.0e223L, 1.0e224L,
432
+ 1.0e225L, 1.0e226L, 1.0e227L, 1.0e228L, 1.0e229L, // 220
433
+ 1.0e230L, 1.0e231L, 1.0e232L, 1.0e233L, 1.0e234L,
434
+ 1.0e235L, 1.0e236L, 1.0e237L, 1.0e238L, 1.0e239L, // 230
435
+ 1.0e240L, 1.0e241L, 1.0e242L, 1.0e243L, 1.0e244L,
436
+ 1.0e245L, 1.0e246L, 1.0e247L, 1.0e248L, 1.0e249L, // 240
437
+ 1.0e250L, 1.0e251L, 1.0e252L, 1.0e253L, 1.0e254L,
438
+ 1.0e255L, 1.0e256L, 1.0e257L, 1.0e258L, 1.0e259L, // 250
439
+ 1.0e260L, 1.0e261L, 1.0e262L, 1.0e263L, 1.0e264L,
440
+ 1.0e265L, 1.0e266L, 1.0e267L, 1.0e268L, 1.0e269L, // 260
441
+ 1.0e270L, 1.0e271L, 1.0e272L, 1.0e273L, 1.0e274L,
442
+ 1.0e275L, 1.0e276L, 1.0e277L, 1.0e278L, 1.0e279L, // 270
443
+ 1.0e280L, 1.0e281L, 1.0e282L, 1.0e283L, 1.0e284L,
444
+ 1.0e285L, 1.0e286L, 1.0e287L, 1.0e288L, 1.0e289L, // 280
445
+ 1.0e290L, 1.0e291L, 1.0e292L, 1.0e293L, 1.0e294L,
446
+ 1.0e295L, 1.0e296L, 1.0e297L, 1.0e298L, 1.0e299L, // 290
447
+ 1.0e300L, 1.0e301L, 1.0e302L, 1.0e303L, 1.0e304L,
448
+ 1.0e305L, 1.0e306L, 1.0e307L, 1.0e308L, 1.0e309L, // 300
449
+ 1.0e310L, 1.0e311L, 1.0e312L, 1.0e313L, 1.0e314L,
450
+ 1.0e315L, 1.0e316L, 1.0e317L, 1.0e318L, 1.0e319L, // 310
451
+ 1.0e320L, 1.0e321L, 1.0e322L, 1.0e323L, 1.0e324L,
452
+ 1.0e325L, 1.0e326L, 1.0e327L, 1.0e328L, 1.0e329L, // 320
453
+ 1.0e330L, 1.0e331L, 1.0e332L, 1.0e333L, 1.0e334L,
454
+ 1.0e335L, 1.0e336L, 1.0e337L, 1.0e338L, 1.0e339L, // 330
455
+ 1.0e340L, 1.0e341L, 1.0e342L, 1.0e343L, 1.0e344L,
456
+ 1.0e345L, 1.0e346L, 1.0e347L, 1.0e348L, 1.0e349L, // 340
457
+ 1.0e350L, 1.0e351L, 1.0e352L, 1.0e353L, 1.0e354L,
458
+ 1.0e355L, 1.0e356L, 1.0e357L, 1.0e358L, 1.0e359L, // 350
459
+ 1.0e360L, 1.0e361L, 1.0e362L, 1.0e363L, 1.0e364L,
460
+ 1.0e365L, 1.0e366L, 1.0e367L, 1.0e368L, 1.0e369L, // 360
461
+ 1.0e370L, 1.0e371L, 1.0e372L, 1.0e373L, 1.0e374L,
462
+ 1.0e375L, 1.0e376L, 1.0e377L, 1.0e378L, 1.0e379L, // 370
463
+ 1.0e380L, 1.0e381L, 1.0e382L, 1.0e383L, 1.0e384L,
464
+ 1.0e385L, 1.0e386L, 1.0e387L, 1.0e388L, 1.0e389L, // 380
465
+ 1.0e390L, 1.0e391L, 1.0e392L, 1.0e393L, 1.0e394L,
466
+ 1.0e395L, 1.0e396L, 1.0e397L, 1.0e398L, 1.0e399L, // 390
467
+ 1.0e400L};
468
+
469
+ static VALUE parser_class;
470
+
471
+ // Works with extended unicode as well. \Uffffffff if support is desired in
472
+ // the future.
473
+ static size_t unicodeToUtf8(uint32_t code, byte *buf) {
474
+ byte *start = buf;
475
+
476
+ if (0x0000007F >= code) {
477
+ *buf++ = (byte)code;
478
+ } else if (0x000007FF >= code) {
479
+ *buf++ = 0xC0 | (code >> 6);
480
+ *buf++ = 0x80 | (0x3F & code);
481
+ } else if (0x0000FFFF >= code) {
482
+ *buf++ = 0xE0 | (code >> 12);
483
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
484
+ *buf++ = 0x80 | (0x3F & code);
485
+ } else if (0x001FFFFF >= code) {
486
+ *buf++ = 0xF0 | (code >> 18);
487
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
488
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
489
+ *buf++ = 0x80 | (0x3F & code);
490
+ } else if (0x03FFFFFF >= code) {
491
+ *buf++ = 0xF8 | (code >> 24);
492
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
493
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
494
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
495
+ *buf++ = 0x80 | (0x3F & code);
496
+ } else if (0x7FFFFFFF >= code) {
497
+ *buf++ = 0xFC | (code >> 30);
498
+ *buf++ = 0x80 | ((code >> 24) & 0x3F);
499
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
500
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
501
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
502
+ *buf++ = 0x80 | (0x3F & code);
503
+ }
504
+ return buf - start;
505
+ }
506
+
507
+ static void parser_reset(ojParser p) {
508
+ p->reader = 0;
509
+ memset(&p->num, 0, sizeof(p->num));
510
+ buf_reset(&p->key);
511
+ buf_reset(&p->buf);
512
+ p->map = value_map;
513
+ p->next_map = NULL;
514
+ p->depth = 0;
515
+ }
516
+
517
+ static void parse_error(ojParser p, const char *fmt, ...) {
518
+ va_list ap;
519
+ char buf[256];
520
+
521
+ va_start(ap, fmt);
522
+ vsnprintf(buf, sizeof(buf), fmt, ap);
523
+ va_end(ap);
524
+ rb_raise(oj_json_parser_error_class, "%s at %ld:%ld", buf, p->line, p->col);
525
+ }
526
+
527
+ static void byte_error(ojParser p, byte b) {
528
+ switch (p->map[256]) {
529
+ case 'N': // null_map
530
+ parse_error(p, "expected null");
531
+ break;
532
+ case 'T': // true_map
533
+ parse_error(p, "expected true");
534
+ break;
535
+ case 'F': // false_map
536
+ parse_error(p, "expected false");
537
+ break;
538
+ case 's': // string_map
539
+ parse_error(p, "invalid JSON character 0x%02x", b);
540
+ break;
541
+ default: parse_error(p, "unexpected character '%c' in '%c' mode", b, p->map[256]); break;
542
+ }
543
+ }
544
+
545
+ static void calc_num(ojParser p) {
546
+ switch (p->type) {
547
+ case OJ_INT:
548
+ if (p->num.neg) {
549
+ p->num.fixnum = -p->num.fixnum;
550
+ p->num.neg = false;
551
+ }
552
+ p->funcs[p->stack[p->depth]].add_int(p);
553
+ break;
554
+ case OJ_DECIMAL: {
555
+ long double d = (long double)p->num.fixnum;
556
+
557
+ if (p->num.neg) {
558
+ d = -d;
559
+ }
560
+ if (0 < p->num.shift) {
561
+ d /= pow_map[p->num.shift];
562
+ }
563
+ if (0 < p->num.exp) {
564
+ long double x;
565
+
566
+ if (MAX_POW < p->num.exp) {
567
+ x = powl(10.0L, (long double)p->num.exp);
568
+ } else {
569
+ x = pow_map[p->num.exp];
570
+ }
571
+ if (p->num.exp_neg) {
572
+ d /= x;
573
+ } else {
574
+ d *= x;
575
+ }
576
+ }
577
+ p->num.dub = d;
578
+ p->funcs[p->stack[p->depth]].add_float(p);
579
+ break;
580
+ }
581
+ case OJ_BIG: p->funcs[p->stack[p->depth]].add_big(p);
582
+ default:
583
+ // nothing to do
584
+ break;
585
+ }
586
+ }
587
+
588
+ static void big_change(ojParser p) {
589
+ char buf[32];
590
+ int64_t i = p->num.fixnum;
591
+ int len = 0;
592
+
593
+ buf[sizeof(buf) - 1] = '\0';
594
+ p->buf.tail = p->buf.head;
595
+ switch (p->type) {
596
+ case OJ_INT:
597
+ // If an int then it will fit in the num.raw so no need to check length;
598
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10) {
599
+ buf[len] = '0' + (i % 10);
600
+ }
601
+ if (p->num.neg) {
602
+ buf[len] = '-';
603
+ len--;
604
+ }
605
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
606
+ p->type = OJ_BIG;
607
+ break;
608
+ case OJ_DECIMAL: {
609
+ int shift = p->num.shift;
610
+
611
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10, shift--) {
612
+ if (0 == shift) {
613
+ buf[len] = '.';
614
+ len--;
615
+ }
616
+ buf[len] = '0' + (i % 10);
617
+ }
618
+ if (p->num.neg) {
619
+ buf[len] = '-';
620
+ len--;
621
+ }
622
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
623
+ if (0 < p->num.exp) {
624
+ int x = p->num.exp;
625
+ int d, div;
626
+ bool started = false;
627
+
628
+ buf_append(&p->buf, 'e');
629
+ if (0 < p->num.exp_neg) {
630
+ buf_append(&p->buf, '-');
631
+ }
632
+ for (div = 1000; 0 < div; div /= 10) {
633
+ d = x / div % 10;
634
+ if (started || 0 < d) {
635
+ buf_append(&p->buf, '0' + d);
636
+ }
637
+ }
638
+ }
639
+ p->type = OJ_BIG;
640
+ break;
641
+ }
642
+ default: break;
643
+ }
644
+ }
645
+
646
+ static void parse(ojParser p, const byte *json) {
647
+ const byte *start;
648
+ const byte *b = json;
649
+ int i;
650
+
651
+ #if DEBUG
652
+ printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
653
+ #endif
654
+ for (; '\0' != *b; b++) {
655
+ switch (p->map[*b]) {
656
+ case SKIP_NEWLINE:
657
+ p->line++;
658
+ p->col = b - json;
659
+ b++;
660
+ #ifdef SPACE_JUMP
661
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
662
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
663
+ b += 2;
664
+ }
665
+ #endif
666
+ for (; SKIP_CHAR == space_map[*b]; b++) {
667
+ }
668
+ b--;
669
+ break;
670
+ case COLON_COLON: p->map = value_map; break;
671
+ case SKIP_CHAR: break;
672
+ case KEY_QUOTE:
673
+ b++;
674
+ p->key.tail = p->key.head;
675
+ start = b;
676
+ for (; STR_OK == string_map[*b]; b++) {
677
+ }
678
+ buf_append_string(&p->key, (const char *)start, b - start);
679
+ if ('"' == *b) {
680
+ p->map = colon_map;
681
+ break;
682
+ }
683
+ b--;
684
+ p->map = string_map;
685
+ p->next_map = colon_map;
686
+ break;
687
+ case AFTER_COMMA:
688
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
689
+ p->map = key_map;
690
+ } else {
691
+ p->map = comma_map;
692
+ }
693
+ break;
694
+ case VAL_QUOTE:
695
+ b++;
696
+ start = b;
697
+ p->buf.tail = p->buf.head;
698
+ for (; STR_OK == string_map[*b]; b++) {
699
+ }
700
+ buf_append_string(&p->buf, (const char *)start, b - start);
701
+ if ('"' == *b) {
702
+ p->funcs[p->stack[p->depth]].add_str(p);
703
+ p->map = (0 == p->depth) ? value_map : after_map;
704
+ break;
705
+ }
706
+ b--;
707
+ p->map = string_map;
708
+ p->next_map = (0 == p->depth) ? value_map : after_map;
709
+ break;
710
+ case OPEN_OBJECT:
711
+ p->funcs[p->stack[p->depth]].open_object(p);
712
+ p->depth++;
713
+ p->stack[p->depth] = OBJECT_FUN;
714
+ p->map = key1_map;
715
+ break;
716
+ case NUM_CLOSE_OBJECT:
717
+ calc_num(p);
718
+ // flow through
719
+ case CLOSE_OBJECT:
720
+ p->map = (1 == p->depth) ? value_map : after_map;
721
+ if (p->depth <= 0 || OBJECT_FUN != p->stack[p->depth]) {
722
+ p->col = b - json - p->col + 1;
723
+ parse_error(p, "unexpected object close");
724
+ return;
725
+ }
726
+ p->depth--;
727
+ p->funcs[p->stack[p->depth]].close_object(p);
728
+ break;
729
+ case OPEN_ARRAY:
730
+ p->funcs[p->stack[p->depth]].open_array(p);
731
+ p->depth++;
732
+ p->stack[p->depth] = ARRAY_FUN;
733
+ p->map = value_map;
734
+ break;
735
+ case NUM_CLOSE_ARRAY:
736
+ calc_num(p);
737
+ // flow through
738
+ case CLOSE_ARRAY:
739
+ p->map = (1 == p->depth) ? value_map : after_map;
740
+ if (p->depth <= 0 || ARRAY_FUN != p->stack[p->depth]) {
741
+ p->col = b - json - p->col + 1;
742
+ parse_error(p, "unexpected array close");
743
+ return;
744
+ }
745
+ p->depth--;
746
+ p->funcs[p->stack[p->depth]].close_array(p);
747
+ break;
748
+ case NUM_COMMA:
749
+ calc_num(p);
750
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
751
+ p->map = key_map;
752
+ } else {
753
+ p->map = comma_map;
754
+ }
755
+ break;
756
+ case VAL0:
757
+ p->type = OJ_INT;
758
+ p->num.fixnum = 0;
759
+ p->num.neg = false;
760
+ p->num.shift = 0;
761
+ p->num.len = 0;
762
+ p->num.exp = 0;
763
+ p->num.exp_neg = false;
764
+ p->map = zero_map;
765
+ break;
766
+ case VAL_NEG:
767
+ p->type = OJ_INT;
768
+ p->num.fixnum = 0;
769
+ p->num.neg = true;
770
+ p->num.shift = 0;
771
+ p->num.len = 0;
772
+ p->num.exp = 0;
773
+ p->num.exp_neg = false;
774
+ p->map = neg_map;
775
+ break;
776
+ ;
777
+ case VAL_DIGIT:
778
+ p->type = OJ_INT;
779
+ p->num.fixnum = 0;
780
+ p->num.neg = false;
781
+ p->num.shift = 0;
782
+ p->num.exp = 0;
783
+ p->num.exp_neg = false;
784
+ p->num.len = 0;
785
+ p->map = digit_map;
786
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
787
+ uint64_t x = (uint64_t)p->num.fixnum * 10 + (uint64_t)(*b - '0');
788
+
789
+ // Tried just checking for an int less than zero but that
790
+ // fails when optimization is on for some reason with the
791
+ // clang compiler so us a bit mask instead.
792
+ if (x < BIG_LIMIT) {
793
+ p->num.fixnum = (int64_t)x;
794
+ } else {
795
+ big_change(p);
796
+ p->map = big_digit_map;
797
+ break;
798
+ }
799
+ }
800
+ b--;
801
+ break;
802
+ case NUM_DIGIT:
803
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
804
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
805
+
806
+ if (x < BIG_LIMIT) {
807
+ p->num.fixnum = (int64_t)x;
808
+ } else {
809
+ big_change(p);
810
+ p->map = big_digit_map;
811
+ break;
812
+ }
813
+ }
814
+ b--;
815
+ break;
816
+ case NUM_DOT:
817
+ p->type = OJ_DECIMAL;
818
+ p->map = dot_map;
819
+ break;
820
+ case NUM_FRAC:
821
+ p->map = frac_map;
822
+ for (; NUM_FRAC == frac_map[*b]; b++) {
823
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
824
+
825
+ if (x < FRAC_LIMIT) {
826
+ p->num.fixnum = (int64_t)x;
827
+ p->num.shift++;
828
+ } else {
829
+ big_change(p);
830
+ p->map = big_frac_map;
831
+ break;
832
+ }
833
+ }
834
+ b--;
835
+ break;
836
+ case FRAC_E:
837
+ p->type = OJ_DECIMAL;
838
+ p->map = exp_sign_map;
839
+ break;
840
+ case NUM_ZERO: p->map = zero_map; break;
841
+ case NEG_DIGIT:
842
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
843
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
844
+
845
+ if (x < BIG_LIMIT) {
846
+ p->num.fixnum = (int64_t)x;
847
+ } else {
848
+ big_change(p);
849
+ p->map = big_digit_map;
850
+ break;
851
+ }
852
+ }
853
+ b--;
854
+ p->map = digit_map;
855
+ break;
856
+ case EXP_SIGN:
857
+ p->num.exp_neg = ('-' == *b);
858
+ p->map = exp_zero_map;
859
+ break;
860
+ case EXP_DIGIT:
861
+ p->map = exp_map;
862
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
863
+ int16_t x = p->num.exp * 10 + (int16_t)(*b - '0');
864
+
865
+ if (x <= MAX_EXP) {
866
+ p->num.exp = x;
867
+ } else {
868
+ big_change(p);
869
+ p->map = big_exp_map;
870
+ break;
871
+ }
872
+ }
873
+ b--;
874
+ break;
875
+ case BIG_DIGIT:
876
+ start = b;
877
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
878
+ }
879
+ buf_append_string(&p->buf, (const char *)start, b - start);
880
+ b--;
881
+ break;
882
+ case BIG_DOT:
883
+ buf_append(&p->buf, '.');
884
+ p->map = big_dot_map;
885
+ break;
886
+ case BIG_FRAC:
887
+ p->map = big_frac_map;
888
+ start = b;
889
+ for (; NUM_FRAC == frac_map[*b]; b++) {
890
+ }
891
+ buf_append_string(&p->buf, (const char *)start, b - start);
892
+ b--;
893
+ break;
894
+ case BIG_E:
895
+ buf_append(&p->buf, *b);
896
+ p->map = big_exp_sign_map;
897
+ break;
898
+ case BIG_EXP_SIGN:
899
+ buf_append(&p->buf, *b);
900
+ p->map = big_exp_zero_map;
901
+ break;
902
+ case BIG_EXP:
903
+ start = b;
904
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
905
+ }
906
+ buf_append_string(&p->buf, (const char *)start, b - start);
907
+ b--;
908
+ p->map = big_exp_map;
909
+ break;
910
+ case NUM_SPC: calc_num(p); break;
911
+ case NUM_NEWLINE: calc_num(p); b++;
912
+ #ifdef SPACE_JUMP
913
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
914
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
915
+ b += 2;
916
+ }
917
+ #endif
918
+ for (; SKIP_CHAR == space_map[*b]; b++) {
919
+ }
920
+ b--;
921
+ break;
922
+ case STR_OK:
923
+ start = b;
924
+ for (; STR_OK == string_map[*b]; b++) {
925
+ }
926
+ if (':' == p->next_map[256]) {
927
+ buf_append_string(&p->key, (const char *)start, b - start);
928
+ } else {
929
+ buf_append_string(&p->buf, (const char *)start, b - start);
930
+ }
931
+ if ('"' == *b) {
932
+ p->map = p->next_map;
933
+ break;
934
+ }
935
+ b--;
936
+ break;
937
+ case STR_SLASH: p->map = esc_map; break;
938
+ case STR_QUOTE: p->map = p->next_map; break;
939
+ case ESC_U:
940
+ p->map = u_map;
941
+ p->ri = 0;
942
+ p->ucode = 0;
943
+ break;
944
+ case U_OK:
945
+ p->ri++;
946
+ p->ucode = p->ucode << 4 | (uint32_t)hex_map[*b];
947
+ if (4 <= p->ri) {
948
+ byte utf8[8];
949
+ size_t ulen = unicodeToUtf8(p->ucode, utf8);
950
+
951
+ if (0 < ulen) {
952
+ if (':' == p->next_map[256]) {
953
+ buf_append_string(&p->key, (const char *)utf8, ulen);
954
+ } else {
955
+ buf_append_string(&p->buf, (const char *)utf8, ulen);
956
+ }
957
+ } else {
958
+ parse_error(p, "invalid unicode");
959
+ return;
960
+ }
961
+ p->map = string_map;
962
+ }
963
+ break;
964
+ case ESC_OK:
965
+ if (':' == p->next_map[256]) {
966
+ buf_append(&p->key, esc_byte_map[*b]);
967
+ } else {
968
+ buf_append(&p->buf, esc_byte_map[*b]);
969
+ }
970
+ p->map = string_map;
971
+ break;
972
+ case UTF1:
973
+ p->ri = 1;
974
+ p->map = utf_map;
975
+ if (':' == p->next_map[256]) {
976
+ buf_append(&p->key, *b);
977
+ } else {
978
+ buf_append(&p->buf, *b);
979
+ }
980
+ break;
981
+ case UTF2:
982
+ p->ri = 2;
983
+ p->map = utf_map;
984
+ if (':' == p->next_map[256]) {
985
+ buf_append(&p->key, *b);
986
+ } else {
987
+ buf_append(&p->buf, *b);
988
+ }
989
+ break;
990
+ case UTF3:
991
+ p->ri = 3;
992
+ p->map = utf_map;
993
+ if (':' == p->next_map[256]) {
994
+ buf_append(&p->key, *b);
995
+ } else {
996
+ buf_append(&p->buf, *b);
997
+ }
998
+ break;
999
+ case UTFX:
1000
+ p->ri--;
1001
+ if (':' == p->next_map[256]) {
1002
+ buf_append(&p->key, *b);
1003
+ } else {
1004
+ buf_append(&p->buf, *b);
1005
+ }
1006
+ if (p->ri <= 0) {
1007
+ p->map = string_map;
1008
+ }
1009
+ break;
1010
+ case VAL_NULL:
1011
+ if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
1012
+ b += 3;
1013
+ p->funcs[p->stack[p->depth]].add_null(p);
1014
+ p->map = (0 == p->depth) ? value_map : after_map;
1015
+ break;
1016
+ }
1017
+ p->ri = 0;
1018
+ *p->token = *b++;
1019
+ for (i = 1; i < 4; i++) {
1020
+ if ('\0' == *b) {
1021
+ p->ri = i;
1022
+ break;
1023
+ } else {
1024
+ p->token[i] = *b++;
1025
+ }
1026
+ }
1027
+ if (0 < p->ri) {
1028
+ p->map = null_map;
1029
+ b--;
1030
+ break;
1031
+ }
1032
+ p->col = b - json - p->col;
1033
+ parse_error(p, "expected null");
1034
+ return;
1035
+ case VAL_TRUE:
1036
+ if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
1037
+ b += 3;
1038
+ p->funcs[p->stack[p->depth]].add_true(p);
1039
+ p->map = (0 == p->depth) ? value_map : after_map;
1040
+ break;
1041
+ }
1042
+ p->ri = 0;
1043
+ *p->token = *b++;
1044
+ for (i = 1; i < 4; i++) {
1045
+ if ('\0' == *b) {
1046
+ p->ri = i;
1047
+ break;
1048
+ } else {
1049
+ p->token[i] = *b++;
1050
+ }
1051
+ }
1052
+ if (0 < p->ri) {
1053
+ p->map = true_map;
1054
+ b--;
1055
+ break;
1056
+ }
1057
+ p->col = b - json - p->col;
1058
+ parse_error(p, "expected true");
1059
+ return;
1060
+ case VAL_FALSE:
1061
+ if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1062
+ b += 4;
1063
+ p->funcs[p->stack[p->depth]].add_false(p);
1064
+ p->map = (0 == p->depth) ? value_map : after_map;
1065
+ break;
1066
+ }
1067
+ p->ri = 0;
1068
+ *p->token = *b++;
1069
+ for (i = 1; i < 5; i++) {
1070
+ if ('\0' == *b) {
1071
+ p->ri = i;
1072
+ break;
1073
+ } else {
1074
+ p->token[i] = *b++;
1075
+ }
1076
+ }
1077
+ if (0 < p->ri) {
1078
+ p->map = false_map;
1079
+ b--;
1080
+ break;
1081
+ }
1082
+ p->col = b - json - p->col;
1083
+ parse_error(p, "expected false");
1084
+ return;
1085
+ case TOKEN_OK:
1086
+ p->token[p->ri] = *b;
1087
+ p->ri++;
1088
+ switch (p->map[256]) {
1089
+ case 'N':
1090
+ if (4 == p->ri) {
1091
+ if (0 != strncmp("null", p->token, 4)) {
1092
+ p->col = b - json - p->col;
1093
+ parse_error(p, "expected null");
1094
+ return;
1095
+ }
1096
+ p->funcs[p->stack[p->depth]].add_null(p);
1097
+ p->map = (0 == p->depth) ? value_map : after_map;
1098
+ }
1099
+ break;
1100
+ case 'F':
1101
+ if (5 == p->ri) {
1102
+ if (0 != strncmp("false", p->token, 5)) {
1103
+ p->col = b - json - p->col;
1104
+ parse_error(p, "expected false");
1105
+ return;
1106
+ }
1107
+ p->funcs[p->stack[p->depth]].add_false(p);
1108
+ p->map = (0 == p->depth) ? value_map : after_map;
1109
+ }
1110
+ break;
1111
+ case 'T':
1112
+ if (4 == p->ri) {
1113
+ if (0 != strncmp("true", p->token, 4)) {
1114
+ p->col = b - json - p->col;
1115
+ parse_error(p, "expected true");
1116
+ return;
1117
+ }
1118
+ p->funcs[p->stack[p->depth]].add_true(p);
1119
+ p->map = (0 == p->depth) ? value_map : after_map;
1120
+ }
1121
+ break;
1122
+ default:
1123
+ p->col = b - json - p->col;
1124
+ parse_error(p, "parse error");
1125
+ return;
1126
+ }
1127
+ break;
1128
+ case CHAR_ERR: byte_error(p, *b); return;
1129
+ default: break;
1130
+ }
1131
+ if (0 == p->depth && 'v' == p->map[256] && p->just_one) {
1132
+ p->map = trail_map;
1133
+ }
1134
+ }
1135
+ if (0 == p->depth) {
1136
+ switch (p->map[256]) {
1137
+ case '0':
1138
+ case 'd':
1139
+ case 'f':
1140
+ case 'z':
1141
+ case 'X':
1142
+ case 'D':
1143
+ case 'g':
1144
+ case 'B':
1145
+ case 'Y': calc_num(p); break;
1146
+ }
1147
+ }
1148
+ return;
1149
+ }
1150
+
1151
+ static void parser_free(void *ptr) {
1152
+ ojParser p;
1153
+
1154
+ if (0 == ptr) {
1155
+ return;
1156
+ }
1157
+ p = (ojParser)ptr;
1158
+ buf_cleanup(&p->key);
1159
+ buf_cleanup(&p->buf);
1160
+ p->free(p);
1161
+ xfree(ptr);
1162
+ }
1163
+
1164
+ static void parser_mark(void *ptr) {
1165
+ if (NULL != ptr) {
1166
+ ojParser p = (ojParser)ptr;
1167
+
1168
+ if (0 != p->reader) {
1169
+ rb_gc_mark(p->reader);
1170
+ }
1171
+ p->mark(p);
1172
+ }
1173
+ }
1174
+
1175
+ extern void oj_set_parser_validator(ojParser p);
1176
+ extern void oj_set_parser_saj(ojParser p);
1177
+ extern void oj_set_parser_usual(ojParser p);
1178
+ extern void oj_set_parser_debug(ojParser p);
1179
+
1180
+ /* Document-method: new
1181
+ * call-seq: new(mode=nil)
1182
+ *
1183
+ * Creates a new Parser with the specified mode. If no mode is provided
1184
+ * validation is assumed.
1185
+ */
1186
+ static VALUE parser_new(VALUE self, VALUE mode) {
1187
+ ojParser p = ALLOC(struct _ojParser);
1188
+
1189
+ #if HAVE_RB_EXT_RACTOR_SAFE
1190
+ // This doesn't seem to do anything.
1191
+ rb_ext_ractor_safe(true);
1192
+ #endif
1193
+ memset(p, 0, sizeof(struct _ojParser));
1194
+ buf_init(&p->key);
1195
+ buf_init(&p->buf);
1196
+
1197
+ p->map = value_map;
1198
+ if (Qnil == mode) {
1199
+ oj_set_parser_validator(p);
1200
+ } else {
1201
+ const char *ms = NULL;
1202
+
1203
+ switch (rb_type(mode)) {
1204
+ case RUBY_T_SYMBOL:
1205
+ mode = rb_sym2str(mode);
1206
+ // fall through
1207
+ case RUBY_T_STRING: ms = RSTRING_PTR(mode); break;
1208
+ default:
1209
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1210
+ }
1211
+ if (0 == strcmp("usual", ms) || 0 == strcmp("standard", ms) || 0 == strcmp("strict", ms) ||
1212
+ 0 == strcmp("compat", ms)) {
1213
+ oj_set_parser_usual(p);
1214
+ } else if (0 == strcmp("object", ms)) {
1215
+ // TBD
1216
+ } else if (0 == strcmp("saj", ms)) {
1217
+ oj_set_parser_saj(p);
1218
+ } else if (0 == strcmp("validate", ms)) {
1219
+ oj_set_parser_validator(p);
1220
+ } else if (0 == strcmp("debug", ms)) {
1221
+ oj_set_parser_debug(p);
1222
+ } else {
1223
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1224
+ }
1225
+ }
1226
+ return Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1227
+ }
1228
+
1229
+ /* Document-method: method_missing(value)
1230
+ * call-seq: method_missing(value)
1231
+ *
1232
+ * Methods not handled by the parser are passed to the delegate. The methods
1233
+ * supported by delegate are:
1234
+ *
1235
+ * - *:validate*
1236
+ * - no options
1237
+ *
1238
+ * - *:saj*
1239
+ * - _cache_keys=_ sets the value of the _cache_keys_ flag.
1240
+ * - _cache_keys_ returns the value of the _cache_keys_ flag.
1241
+ * - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than that length are cached.
1242
+ * - _cache_strings_ returns the value of the _cache_strings_ integer value.
1243
+ * - _handler=_ sets the SAJ handler
1244
+ * - _handler_ returns the SAJ handler
1245
+ *
1246
+ * - *:usual*
1247
+ * - _cache_keys=_ sets the value of the _cache_keys_ flag.
1248
+ * - _cache_keys_ returns the value of the _cache_keys_ flag.
1249
+ * - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than that length are cached.
1250
+ * - _cache_strings_ returns the value of the _cache_strings_ integer value.
1251
+ * - _capacity=_ sets the capacity of the parser. The parser grows automatically but can be updated directly with this call.
1252
+ * - _capacity_ returns the current capacity of the parser's internal stack.
1253
+ * - _create_id_ returns the value _create_id_ or _nil_ if there is no _create_id_.
1254
+ * - _create_id=_ sets the value _create_id_ or if _nil_ unsets it. Parsed JSON objects that include the specified element use the element value as the name of the class to create an object from instead of a Hash.
1255
+ * - _decimal=_ sets the approach to how decimals are parser. If _:auto_ then the decimals with significant digits are 16 or less are Floats and long ones are BigDecimal. _:ruby_ uses a call to Ruby to convert a string to a Float. _:float_ always generates a Float. _:bigdecimal_ always results in a BigDecimal.
1256
+ * - _decimal_ returns the value of the decimal conversion option which can be :auto (default), :ruby, :float, or :bigdecimal.
1257
+ * - _ignore_json_create_ returns the value of the _ignore_json_create_ flag.
1258
+ * - _ignore_json_create=_ sets the value of the _ignore_json_create_ flag. When set the class json_create method is ignored on parsing in favor of creating an instance and populating directly.
1259
+ * - _missing_class_ return the value of the _missing_class_ indicator.
1260
+ * - _missing_class=_ sets the value of the _missing_class_ flag. Valid values are _:auto_ which creates any missing classes on parse, :ignore which ignores and continues as a Hash (default), and :raise which raises an exception if the class is not found.
1261
+ * - _omit_null=_ sets the _omit_null_ flag. If true then null values in a map or object are omitted from the resulting Hash or Object.
1262
+ * - _omit_null_ returns the value of the _omit_null_ flag.
1263
+ * - _symbol_keys=_ sets the flag that indicates Hash keys should be parsed to Symbols versus Strings.
1264
+ * - _symbol_keys_ returns the value of the _symbol_keys_ flag.
1265
+ */
1266
+ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
1267
+ ojParser p = (ojParser)DATA_PTR(self);
1268
+ const char * key = NULL;
1269
+ volatile VALUE rkey = *argv;
1270
+ volatile VALUE rv = Qnil;
1271
+
1272
+ #if HAVE_RB_EXT_RACTOR_SAFE
1273
+ // This doesn't seem to do anything.
1274
+ rb_ext_ractor_safe(true);
1275
+ #endif
1276
+ switch (rb_type(rkey)) {
1277
+ case RUBY_T_SYMBOL:
1278
+ rkey = rb_sym2str(rkey);
1279
+ // fall through
1280
+ case RUBY_T_STRING: key = rb_string_value_ptr(&rkey); break;
1281
+ default: rb_raise(rb_eArgError, "option method must be a symbol or string");
1282
+ }
1283
+ if (1 < argc) {
1284
+ rv = argv[1];
1285
+ }
1286
+ return p->option(p, key, rv);
1287
+ }
1288
+
1289
+ /* Document-method: parse(json)
1290
+ * call-seq: parse(json)
1291
+ *
1292
+ * Parse a JSON string.
1293
+ *
1294
+ * Returns the result according to the delegate of the parser.
1295
+ */
1296
+ static VALUE parser_parse(VALUE self, VALUE json) {
1297
+ ojParser p = (ojParser)DATA_PTR(self);
1298
+
1299
+ Check_Type(json, T_STRING);
1300
+ parser_reset(p);
1301
+ p->start(p);
1302
+ parse(p, (const byte *)rb_string_value_ptr(&json));
1303
+
1304
+ return p->result(p);
1305
+ }
1306
+
1307
+ static VALUE load_rescue(VALUE self, VALUE x) {
1308
+ // Normal EOF. No action needed other than to stop loading.
1309
+ return Qfalse;
1310
+ }
1311
+
1312
+ static VALUE load(VALUE self) {
1313
+ ojParser p = (ojParser)DATA_PTR(self);
1314
+ volatile VALUE rbuf = rb_str_new2("");
1315
+
1316
+ p->start(p);
1317
+ while (true) {
1318
+ rb_funcall(p->reader, oj_readpartial_id, 2, INT2NUM(16385), rbuf);
1319
+ if (0 < RSTRING_LEN(rbuf)) {
1320
+ parse(p, (byte *)StringValuePtr(rbuf));
1321
+ }
1322
+ }
1323
+ return Qtrue;
1324
+ }
1325
+
1326
+ /* Document-method: load(reader)
1327
+ * call-seq: load(reader)
1328
+ *
1329
+ * Parse a JSON stream.
1330
+ *
1331
+ * Returns the result according to the delegate of the parser.
1332
+ */
1333
+ static VALUE parser_load(VALUE self, VALUE reader) {
1334
+ ojParser p = (ojParser)DATA_PTR(self);
1335
+
1336
+ parser_reset(p);
1337
+ p->reader = reader;
1338
+ rb_rescue2(load, self, load_rescue, Qnil, rb_eEOFError, 0);
1339
+
1340
+ return p->result(p);
1341
+ }
1342
+
1343
+ /* Document-method: file(filename)
1344
+ * call-seq: file(filename)
1345
+ *
1346
+ * Parse a JSON file.
1347
+ *
1348
+ * Returns the result according to the delegate of the parser.
1349
+ */
1350
+ static VALUE parser_file(VALUE self, VALUE filename) {
1351
+ ojParser p = (ojParser)DATA_PTR(self);
1352
+ const char *path;
1353
+ int fd;
1354
+
1355
+ Check_Type(filename, T_STRING);
1356
+ path = rb_string_value_ptr(&filename);
1357
+
1358
+ parser_reset(p);
1359
+ p->start(p);
1360
+
1361
+ if (0 > (fd = open(path, O_RDONLY))) {
1362
+ rb_raise(rb_eIOError, "error opening %s", path);
1363
+ }
1364
+ #if USE_THREAD_LIMIT
1365
+ struct stat info;
1366
+ // st_size will be 0 if not a file
1367
+ if (0 == fstat(fd, &info) && USE_THREAD_LIMIT < info.st_size) {
1368
+ // Use threaded version.
1369
+ // TBD only if has pthreads
1370
+ // TBD parse_large(p, fd);
1371
+ return p->result(p);
1372
+ }
1373
+ #endif
1374
+ byte buf[16385];
1375
+ size_t size = sizeof(buf) - 1;
1376
+ size_t rsize;
1377
+
1378
+ while (true) {
1379
+ if (0 < (rsize = read(fd, buf, size))) {
1380
+ buf[rsize] = '\0';
1381
+ parse(p, buf);
1382
+ }
1383
+ if (rsize <= 0) {
1384
+ if (0 != rsize) {
1385
+ rb_raise(rb_eIOError, "error reading from %s", path);
1386
+ }
1387
+ break;
1388
+ }
1389
+ }
1390
+ return p->result(p);
1391
+ }
1392
+
1393
+ /* Document-method: just_one
1394
+ * call-seq: just_one
1395
+ *
1396
+ * Returns the current state of the just_one [_Boolean_] option.
1397
+ */
1398
+ static VALUE parser_just_one(VALUE self) {
1399
+ ojParser p = (ojParser)DATA_PTR(self);
1400
+
1401
+ return p->just_one ? Qtrue : Qfalse;
1402
+ }
1403
+
1404
+ /* Document-method: just_one=
1405
+ * call-seq: just_one=(value)
1406
+ *
1407
+ * Sets the *just_one* option which limits the parsing of a string or or
1408
+ * stream to a single JSON element.
1409
+ *
1410
+ * Returns the current state of the just_one [_Boolean_] option.
1411
+ */
1412
+ static VALUE parser_just_one_set(VALUE self, VALUE v) {
1413
+ ojParser p = (ojParser)DATA_PTR(self);
1414
+
1415
+ p->just_one = (Qtrue == v);
1416
+
1417
+ return p->just_one ? Qtrue : Qfalse;
1418
+ }
1419
+
1420
+ static VALUE usual_parser = Qundef;
1421
+
1422
+ /* Document-method: usual
1423
+ * call-seq: usual
1424
+ *
1425
+ * Returns the default usual parser. Note the default usual parser can not be
1426
+ * used concurrently in more than one thread.
1427
+ */
1428
+ static VALUE parser_usual(VALUE self) {
1429
+ if (Qundef == usual_parser) {
1430
+ ojParser p = ALLOC(struct _ojParser);
1431
+
1432
+ memset(p, 0, sizeof(struct _ojParser));
1433
+ buf_init(&p->key);
1434
+ buf_init(&p->buf);
1435
+ p->map = value_map;
1436
+ oj_set_parser_usual(p);
1437
+ usual_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1438
+ rb_gc_register_address(&usual_parser);
1439
+ }
1440
+ return usual_parser;
1441
+ }
1442
+
1443
+ static VALUE saj_parser = Qundef;
1444
+
1445
+ /* Document-method: saj
1446
+ * call-seq: saj
1447
+ *
1448
+ * Returns the default saj parser. Note the default SAJ parser can not be used
1449
+ * concurrently in more than one thread.
1450
+ */
1451
+ static VALUE parser_saj(VALUE self) {
1452
+ if (Qundef == saj_parser) {
1453
+ ojParser p = ALLOC(struct _ojParser);
1454
+
1455
+ memset(p, 0, sizeof(struct _ojParser));
1456
+ buf_init(&p->key);
1457
+ buf_init(&p->buf);
1458
+ p->map = value_map;
1459
+ oj_set_parser_saj(p);
1460
+ saj_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1461
+ rb_gc_register_address(&saj_parser);
1462
+ }
1463
+ return saj_parser;
1464
+ }
1465
+
1466
+ static VALUE validate_parser = Qundef;
1467
+
1468
+ /* Document-method: validate
1469
+ * call-seq: validate
1470
+ *
1471
+ * Returns the default validate parser.
1472
+ */
1473
+ static VALUE parser_validate(VALUE self) {
1474
+ if (Qundef == validate_parser) {
1475
+ ojParser p = ALLOC(struct _ojParser);
1476
+
1477
+ memset(p, 0, sizeof(struct _ojParser));
1478
+ buf_init(&p->key);
1479
+ buf_init(&p->buf);
1480
+ p->map = value_map;
1481
+ oj_set_parser_validator(p);
1482
+ validate_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1483
+ rb_gc_register_address(&validate_parser);
1484
+ }
1485
+ return validate_parser;
1486
+ }
1487
+
1488
+ /* Document-class: Oj::Parser
1489
+ *
1490
+ * A reusable parser that makes use of named delegates to determine the
1491
+ * handling of parsed data. Delegates are available for validation, a callback
1492
+ * parser (SAJ), and a usual delegate that builds Ruby objects as parsing
1493
+ * proceeds.
1494
+ *
1495
+ * This parser is considerably faster than the older Oj.parse call and
1496
+ * isolates options to just the parser so that other parts of the code are not
1497
+ * forced to use the same options.
1498
+ */
1499
+ void oj_parser_init() {
1500
+ parser_class = rb_define_class_under(Oj, "Parser", rb_cObject);
1501
+ rb_define_module_function(parser_class, "new", parser_new, 1);
1502
+ rb_define_method(parser_class, "parse", parser_parse, 1);
1503
+ rb_define_method(parser_class, "load", parser_load, 1);
1504
+ rb_define_method(parser_class, "file", parser_file, 1);
1505
+ rb_define_method(parser_class, "just_one", parser_just_one, 0);
1506
+ rb_define_method(parser_class, "just_one=", parser_just_one_set, 1);
1507
+ rb_define_method(parser_class, "method_missing", parser_missing, -1);
1508
+
1509
+ rb_define_module_function(parser_class, "usual", parser_usual, 0);
1510
+ rb_define_module_function(parser_class, "saj", parser_saj, 0);
1511
+ rb_define_module_function(parser_class, "validate", parser_validate, 0);
1512
+ }