oj 3.12.3 → 3.13.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/oj/buf.h +9 -0
  4. data/ext/oj/cache.c +341 -0
  5. data/ext/oj/cache.h +21 -0
  6. data/ext/oj/compat.c +7 -22
  7. data/ext/oj/custom.c +15 -17
  8. data/ext/oj/debug.c +132 -0
  9. data/ext/oj/dump.c +12 -15
  10. data/ext/oj/dump_compat.c +3 -3
  11. data/ext/oj/dump_object.c +9 -9
  12. data/ext/oj/dump_strict.c +3 -3
  13. data/ext/oj/err.h +19 -0
  14. data/ext/oj/extconf.rb +5 -0
  15. data/ext/oj/fast.c +7 -18
  16. data/ext/oj/intern.c +281 -0
  17. data/ext/oj/intern.h +26 -0
  18. data/ext/oj/mimic_json.c +2 -2
  19. data/ext/oj/object.c +15 -92
  20. data/ext/oj/odd.c +1 -1
  21. data/ext/oj/oj.c +117 -94
  22. data/ext/oj/oj.h +1 -1
  23. data/ext/oj/parse.c +5 -5
  24. data/ext/oj/parser.c +1483 -0
  25. data/ext/oj/parser.h +90 -0
  26. data/ext/oj/rails.c +5 -5
  27. data/ext/oj/resolve.c +2 -20
  28. data/ext/oj/rxclass.c +1 -1
  29. data/ext/oj/saj.c +1 -1
  30. data/ext/oj/saj2.c +348 -0
  31. data/ext/oj/scp.c +1 -1
  32. data/ext/oj/sparse.c +2 -2
  33. data/ext/oj/stream_writer.c +4 -4
  34. data/ext/oj/strict.c +9 -27
  35. data/ext/oj/string_writer.c +2 -2
  36. data/ext/oj/usual.c +1252 -0
  37. data/ext/oj/validate.c +51 -0
  38. data/ext/oj/wab.c +14 -19
  39. data/lib/oj/error.rb +1 -1
  40. data/lib/oj/state.rb +8 -7
  41. data/lib/oj/version.rb +1 -1
  42. data/pages/Options.md +1 -1
  43. data/pages/Parser.md +309 -0
  44. data/pages/Rails.md +2 -2
  45. data/test/json_gem/json_generator_test.rb +1 -1
  46. data/test/mem.rb +33 -0
  47. data/test/perf_once.rb +58 -0
  48. data/test/perf_parser.rb +189 -0
  49. data/test/test_hash.rb +1 -1
  50. data/test/test_parser.rb +27 -0
  51. data/test/test_parser_saj.rb +245 -0
  52. data/test/test_parser_usual.rb +213 -0
  53. metadata +26 -5
  54. data/ext/oj/hash.c +0 -168
  55. data/ext/oj/hash.h +0 -21
  56. data/ext/oj/hash_test.c +0 -491
data/ext/oj/parser.c ADDED
@@ -0,0 +1,1483 @@
1
+ // Copyright (c) 2020, 2021, Peter Ohler, All rights reserved.
2
+
3
+ #include "parser.h"
4
+
5
+ #include <fcntl.h>
6
+
7
+ #include "oj.h"
8
+
9
+ #define DEBUG 0
10
+
11
+ #define USE_THREAD_LIMIT 0
12
+ // #define USE_THREAD_LIMIT 100000
13
+ #define MAX_EXP 4932
14
+ // max in the pow_map
15
+ #define MAX_POW 400
16
+
17
+ #define MIN_SLEEP (1000000000LL / (double)CLOCKS_PER_SEC)
18
+ // 9,223,372,036,854,775,807
19
+ #define BIG_LIMIT LLONG_MAX / 10
20
+ #define FRAC_LIMIT 10000000000000000ULL
21
+
22
+ // Give better performance with indented JSON but worse with unindented.
23
+ //#define SPACE_JUMP
24
+
25
+ enum {
26
+ SKIP_CHAR = 'a',
27
+ SKIP_NEWLINE = 'b',
28
+ VAL_NULL = 'c',
29
+ VAL_TRUE = 'd',
30
+ VAL_FALSE = 'e',
31
+ VAL_NEG = 'f',
32
+ VAL0 = 'g',
33
+ VAL_DIGIT = 'h',
34
+ VAL_QUOTE = 'i',
35
+ OPEN_ARRAY = 'k',
36
+ OPEN_OBJECT = 'l',
37
+ CLOSE_ARRAY = 'm',
38
+ CLOSE_OBJECT = 'n',
39
+ AFTER_COMMA = 'o',
40
+ KEY_QUOTE = 'p',
41
+ COLON_COLON = 'q',
42
+ NUM_SPC = 'r',
43
+ NUM_NEWLINE = 's',
44
+ NUM_DOT = 't',
45
+ NUM_COMMA = 'u',
46
+ NUM_FRAC = 'v',
47
+ FRAC_E = 'w',
48
+ EXP_SIGN = 'x',
49
+ EXP_DIGIT = 'y',
50
+ STR_QUOTE = 'z',
51
+ NEG_DIGIT = '-',
52
+ STR_SLASH = 'A',
53
+ ESC_OK = 'B',
54
+ BIG_DIGIT = 'C',
55
+ BIG_DOT = 'D',
56
+ U_OK = 'E',
57
+ TOKEN_OK = 'F',
58
+ NUM_CLOSE_OBJECT = 'G',
59
+ NUM_CLOSE_ARRAY = 'H',
60
+ BIG_FRAC = 'I',
61
+ BIG_E = 'J',
62
+ BIG_EXP_SIGN = 'K',
63
+ BIG_EXP = 'L',
64
+ UTF1 = 'M', // expect 1 more follow byte
65
+ NUM_DIGIT = 'N',
66
+ NUM_ZERO = 'O',
67
+ UTF2 = 'P', // expect 2 more follow byte
68
+ UTF3 = 'Q', // expect 3 more follow byte
69
+ STR_OK = 'R',
70
+ UTFX = 'S', // following bytes
71
+ ESC_U = 'U',
72
+ CHAR_ERR = '.',
73
+ DONE = 'X',
74
+ };
75
+
76
+ /*
77
+ 0123456789abcdef0123456789abcdef */
78
+ static const char value_map[257] = "\
79
+ X........ab..a..................\
80
+ a.i..........f..ghhhhhhhhh......\
81
+ ...........................k.m..\
82
+ ......e.......c.....d......l.n..\
83
+ ................................\
84
+ ................................\
85
+ ................................\
86
+ ................................v";
87
+
88
+ static const char null_map[257] = "\
89
+ ................................\
90
+ ............o...................\
91
+ ................................\
92
+ ............F........F..........\
93
+ ................................\
94
+ ................................\
95
+ ................................\
96
+ ................................N";
97
+
98
+ static const char true_map[257] = "\
99
+ ................................\
100
+ ............o...................\
101
+ ................................\
102
+ .....F............F..F..........\
103
+ ................................\
104
+ ................................\
105
+ ................................\
106
+ ................................T";
107
+
108
+ static const char false_map[257] = "\
109
+ ................................\
110
+ ............o...................\
111
+ ................................\
112
+ .F...F......F......F............\
113
+ ................................\
114
+ ................................\
115
+ ................................\
116
+ ................................F";
117
+
118
+ static const char comma_map[257] = "\
119
+ .........ab..a..................\
120
+ a.i..........f..ghhhhhhhhh......\
121
+ ...........................k....\
122
+ ......e.......c.....d......l....\
123
+ ................................\
124
+ ................................\
125
+ ................................\
126
+ ................................,";
127
+
128
+ static const char after_map[257] = "\
129
+ X........ab..a..................\
130
+ a...........o...................\
131
+ .............................m..\
132
+ .............................n..\
133
+ ................................\
134
+ ................................\
135
+ ................................\
136
+ ................................a";
137
+
138
+ static const char key1_map[257] = "\
139
+ .........ab..a..................\
140
+ a.p.............................\
141
+ ................................\
142
+ .............................n..\
143
+ ................................\
144
+ ................................\
145
+ ................................\
146
+ ................................K";
147
+
148
+ static const char key_map[257] = "\
149
+ .........ab..a..................\
150
+ a.p.............................\
151
+ ................................\
152
+ ................................\
153
+ ................................\
154
+ ................................\
155
+ ................................\
156
+ ................................k";
157
+
158
+ static const char colon_map[257] = "\
159
+ .........ab..a..................\
160
+ a.........................q.....\
161
+ ................................\
162
+ ................................\
163
+ ................................\
164
+ ................................\
165
+ ................................\
166
+ ................................:";
167
+
168
+ static const char neg_map[257] = "\
169
+ ................................\
170
+ ................O---------......\
171
+ ................................\
172
+ ................................\
173
+ ................................\
174
+ ................................\
175
+ ................................\
176
+ ................................-";
177
+
178
+ static const char zero_map[257] = "\
179
+ .........rs..r..................\
180
+ r...........u.t.................\
181
+ .............................H..\
182
+ .............................G..\
183
+ ................................\
184
+ ................................\
185
+ ................................\
186
+ ................................0";
187
+
188
+ static const char digit_map[257] = "\
189
+ .........rs..r..................\
190
+ r...........u.t.NNNNNNNNNN......\
191
+ .....w.......................H..\
192
+ .....w.......................G..\
193
+ ................................\
194
+ ................................\
195
+ ................................\
196
+ ................................d";
197
+
198
+ static const char dot_map[257] = "\
199
+ ................................\
200
+ ................vvvvvvvvvv......\
201
+ ................................\
202
+ ................................\
203
+ ................................\
204
+ ................................\
205
+ ................................\
206
+ .................................";
207
+
208
+ static const char frac_map[257] = "\
209
+ .........rs..r..................\
210
+ r...........u...vvvvvvvvvv......\
211
+ .....w.......................H..\
212
+ .....w.......................G..\
213
+ ................................\
214
+ ................................\
215
+ ................................\
216
+ ................................f";
217
+
218
+ static const char exp_sign_map[257] = "\
219
+ ................................\
220
+ ...........x.x..yyyyyyyyyy......\
221
+ ................................\
222
+ ................................\
223
+ ................................\
224
+ ................................\
225
+ ................................\
226
+ ................................x";
227
+
228
+ static const char exp_zero_map[257] = "\
229
+ ................................\
230
+ ................yyyyyyyyyy......\
231
+ ................................\
232
+ ................................\
233
+ ................................\
234
+ ................................\
235
+ ................................\
236
+ ................................z";
237
+
238
+ static const char exp_map[257] = "\
239
+ .........rs..r..................\
240
+ r...........u...yyyyyyyyyy......\
241
+ .............................H..\
242
+ .............................G..\
243
+ ................................\
244
+ ................................\
245
+ ................................\
246
+ ................................X";
247
+
248
+ static const char big_digit_map[257] = "\
249
+ .........rs..r..................\
250
+ r...........u.D.CCCCCCCCCC......\
251
+ .....J.......................H..\
252
+ .....J.......................G..\
253
+ ................................\
254
+ ................................\
255
+ ................................\
256
+ ................................D";
257
+
258
+ static const char big_dot_map[257] = "\
259
+ ................................\
260
+ ................IIIIIIIIII......\
261
+ ................................\
262
+ ................................\
263
+ ................................\
264
+ ................................\
265
+ ................................\
266
+ ................................o";
267
+
268
+ static const char big_frac_map[257] = "\
269
+ .........rs..r..................\
270
+ r...........u...IIIIIIIIII......\
271
+ .....J.......................H..\
272
+ .....J.......................G..\
273
+ ................................\
274
+ ................................\
275
+ ................................\
276
+ ................................g";
277
+
278
+ static const char big_exp_sign_map[257] = "\
279
+ ................................\
280
+ ...........K.K..LLLLLLLLLL......\
281
+ ................................\
282
+ ................................\
283
+ ................................\
284
+ ................................\
285
+ ................................\
286
+ ................................B";
287
+
288
+ static const char big_exp_zero_map[257] = "\
289
+ ................................\
290
+ ................LLLLLLLLLL......\
291
+ ................................\
292
+ ................................\
293
+ ................................\
294
+ ................................\
295
+ ................................\
296
+ ................................Z";
297
+
298
+ static const char big_exp_map[257] = "\
299
+ .........rs..r..................\
300
+ r...........u...LLLLLLLLLL......\
301
+ .............................H..\
302
+ .............................G..\
303
+ ................................\
304
+ ................................\
305
+ ................................\
306
+ ................................Y";
307
+
308
+ static const char string_map[257] = "\
309
+ ................................\
310
+ RRzRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
311
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRARRR\
312
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
313
+ ................................\
314
+ ................................\
315
+ MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\
316
+ PPPPPPPPPPPPPPPPQQQQQQQQ........s";
317
+
318
+ static const char esc_map[257] = "\
319
+ ................................\
320
+ ..B............B................\
321
+ ............................B...\
322
+ ..B...B.......B...B.BU..........\
323
+ ................................\
324
+ ................................\
325
+ ................................\
326
+ ................................~";
327
+
328
+ static const char esc_byte_map[257] = "\
329
+ ................................\
330
+ ..\"............/................\
331
+ ............................\\...\
332
+ ..\b...\f.......\n...\r.\t..........\
333
+ ................................\
334
+ ................................\
335
+ ................................\
336
+ ................................b";
337
+
338
+ static const char u_map[257] = "\
339
+ ................................\
340
+ ................EEEEEEEEEE......\
341
+ .EEEEEE.........................\
342
+ .EEEEEE.........................\
343
+ ................................\
344
+ ................................\
345
+ ................................\
346
+ ................................u";
347
+
348
+ static const char utf_map[257] = "\
349
+ ................................\
350
+ ................................\
351
+ ................................\
352
+ ................................\
353
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
354
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
355
+ ................................\
356
+ ................................8";
357
+
358
+ static const char space_map[257] = "\
359
+ .........ab..a..................\
360
+ a...............................\
361
+ ................................\
362
+ ................................\
363
+ ................................\
364
+ ................................\
365
+ ................................\
366
+ ................................S";
367
+
368
+ static const char trail_map[257] = "\
369
+ .........ab..a..................\
370
+ a...............................\
371
+ ................................\
372
+ ................................\
373
+ ................................\
374
+ ................................\
375
+ ................................\
376
+ ................................R";
377
+
378
+ static const byte hex_map[256] = "\
379
+ ................................\
380
+ ................\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09......\
381
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
382
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
383
+ ................................\
384
+ ................................\
385
+ ................................\
386
+ ................................";
387
+
388
+ static long double pow_map[401] = {
389
+ 1.0L, 1.0e1L, 1.0e2L, 1.0e3L, 1.0e4L, 1.0e5L, 1.0e6L, 1.0e7L, 1.0e8L, 1.0e9L, 1.0e10L,
390
+ 1.0e11L, 1.0e12L, 1.0e13L, 1.0e14L, 1.0e15L, 1.0e16L, 1.0e17L, 1.0e18L, 1.0e19L, 1.0e20L, 1.0e21L,
391
+ 1.0e22L, 1.0e23L, 1.0e24L, 1.0e25L, 1.0e26L, 1.0e27L, 1.0e28L, 1.0e29L, 1.0e30L, 1.0e31L, 1.0e32L,
392
+ 1.0e33L, 1.0e34L, 1.0e35L, 1.0e36L, 1.0e37L, 1.0e38L, 1.0e39L, 1.0e40L, 1.0e41L, 1.0e42L, 1.0e43L,
393
+ 1.0e44L, 1.0e45L, 1.0e46L, 1.0e47L, 1.0e48L, 1.0e49L, 1.0e50L, 1.0e51L, 1.0e52L, 1.0e53L, 1.0e54L,
394
+ 1.0e55L, 1.0e56L, 1.0e57L, 1.0e58L, 1.0e59L, 1.0e60L, 1.0e61L, 1.0e62L, 1.0e63L, 1.0e64L, 1.0e65L,
395
+ 1.0e66L, 1.0e67L, 1.0e68L, 1.0e69L, 1.0e70L, 1.0e71L, 1.0e72L, 1.0e73L, 1.0e74L, 1.0e75L, 1.0e76L,
396
+ 1.0e77L, 1.0e78L, 1.0e79L, 1.0e80L, 1.0e81L, 1.0e82L, 1.0e83L, 1.0e84L, 1.0e85L, 1.0e86L, 1.0e87L,
397
+ 1.0e88L, 1.0e89L, 1.0e90L, 1.0e91L, 1.0e92L, 1.0e93L, 1.0e94L, 1.0e95L, 1.0e96L, 1.0e97L, 1.0e98L,
398
+ 1.0e99L, 1.0e100L, 1.0e101L, 1.0e102L, 1.0e103L, 1.0e104L, 1.0e105L, 1.0e106L, 1.0e107L, 1.0e108L, 1.0e109L,
399
+ 1.0e110L, 1.0e111L, 1.0e112L, 1.0e113L, 1.0e114L, 1.0e115L, 1.0e116L, 1.0e117L, 1.0e118L, 1.0e119L, 1.0e120L,
400
+ 1.0e121L, 1.0e122L, 1.0e123L, 1.0e124L, 1.0e125L, 1.0e126L, 1.0e127L, 1.0e128L, 1.0e129L, 1.0e130L, 1.0e131L,
401
+ 1.0e132L, 1.0e133L, 1.0e134L, 1.0e135L, 1.0e136L, 1.0e137L, 1.0e138L, 1.0e139L, 1.0e140L, 1.0e141L, 1.0e142L,
402
+ 1.0e143L, 1.0e144L, 1.0e145L, 1.0e146L, 1.0e147L, 1.0e148L, 1.0e149L, 1.0e150L, 1.0e151L, 1.0e152L, 1.0e153L,
403
+ 1.0e154L, 1.0e155L, 1.0e156L, 1.0e157L, 1.0e158L, 1.0e159L, 1.0e160L, 1.0e161L, 1.0e162L, 1.0e163L, 1.0e164L,
404
+ 1.0e165L, 1.0e166L, 1.0e167L, 1.0e168L, 1.0e169L, 1.0e170L, 1.0e171L, 1.0e172L, 1.0e173L, 1.0e174L, 1.0e175L,
405
+ 1.0e176L, 1.0e177L, 1.0e178L, 1.0e179L, 1.0e180L, 1.0e181L, 1.0e182L, 1.0e183L, 1.0e184L, 1.0e185L, 1.0e186L,
406
+ 1.0e187L, 1.0e188L, 1.0e189L, 1.0e190L, 1.0e191L, 1.0e192L, 1.0e193L, 1.0e194L, 1.0e195L, 1.0e196L, 1.0e197L,
407
+ 1.0e198L, 1.0e199L, 1.0e200L, 1.0e201L, 1.0e202L, 1.0e203L, 1.0e204L, 1.0e205L, 1.0e206L, 1.0e207L, 1.0e208L,
408
+ 1.0e209L, 1.0e210L, 1.0e211L, 1.0e212L, 1.0e213L, 1.0e214L, 1.0e215L, 1.0e216L, 1.0e217L, 1.0e218L, 1.0e219L,
409
+ 1.0e220L, 1.0e221L, 1.0e222L, 1.0e223L, 1.0e224L, 1.0e225L, 1.0e226L, 1.0e227L, 1.0e228L, 1.0e229L, 1.0e230L,
410
+ 1.0e231L, 1.0e232L, 1.0e233L, 1.0e234L, 1.0e235L, 1.0e236L, 1.0e237L, 1.0e238L, 1.0e239L, 1.0e240L, 1.0e241L,
411
+ 1.0e242L, 1.0e243L, 1.0e244L, 1.0e245L, 1.0e246L, 1.0e247L, 1.0e248L, 1.0e249L, 1.0e250L, 1.0e251L, 1.0e252L,
412
+ 1.0e253L, 1.0e254L, 1.0e255L, 1.0e256L, 1.0e257L, 1.0e258L, 1.0e259L, 1.0e260L, 1.0e261L, 1.0e262L, 1.0e263L,
413
+ 1.0e264L, 1.0e265L, 1.0e266L, 1.0e267L, 1.0e268L, 1.0e269L, 1.0e270L, 1.0e271L, 1.0e272L, 1.0e273L, 1.0e274L,
414
+ 1.0e275L, 1.0e276L, 1.0e277L, 1.0e278L, 1.0e279L, 1.0e280L, 1.0e281L, 1.0e282L, 1.0e283L, 1.0e284L, 1.0e285L,
415
+ 1.0e286L, 1.0e287L, 1.0e288L, 1.0e289L, 1.0e290L, 1.0e291L, 1.0e292L, 1.0e293L, 1.0e294L, 1.0e295L, 1.0e296L,
416
+ 1.0e297L, 1.0e298L, 1.0e299L, 1.0e300L, 1.0e301L, 1.0e302L, 1.0e303L, 1.0e304L, 1.0e305L, 1.0e306L, 1.0e307L,
417
+ 1.0e308L, 1.0e309L, 1.0e310L, 1.0e311L, 1.0e312L, 1.0e313L, 1.0e314L, 1.0e315L, 1.0e316L, 1.0e317L, 1.0e318L,
418
+ 1.0e319L, 1.0e320L, 1.0e321L, 1.0e322L, 1.0e323L, 1.0e324L, 1.0e325L, 1.0e326L, 1.0e327L, 1.0e328L, 1.0e329L,
419
+ 1.0e330L, 1.0e331L, 1.0e332L, 1.0e333L, 1.0e334L, 1.0e335L, 1.0e336L, 1.0e337L, 1.0e338L, 1.0e339L, 1.0e340L,
420
+ 1.0e341L, 1.0e342L, 1.0e343L, 1.0e344L, 1.0e345L, 1.0e346L, 1.0e347L, 1.0e348L, 1.0e349L, 1.0e350L, 1.0e351L,
421
+ 1.0e352L, 1.0e353L, 1.0e354L, 1.0e355L, 1.0e356L, 1.0e357L, 1.0e358L, 1.0e359L, 1.0e360L, 1.0e361L, 1.0e362L,
422
+ 1.0e363L, 1.0e364L, 1.0e365L, 1.0e366L, 1.0e367L, 1.0e368L, 1.0e369L, 1.0e370L, 1.0e371L, 1.0e372L, 1.0e373L,
423
+ 1.0e374L, 1.0e375L, 1.0e376L, 1.0e377L, 1.0e378L, 1.0e379L, 1.0e380L, 1.0e381L, 1.0e382L, 1.0e383L, 1.0e384L,
424
+ 1.0e385L, 1.0e386L, 1.0e387L, 1.0e388L, 1.0e389L, 1.0e390L, 1.0e391L, 1.0e392L, 1.0e393L, 1.0e394L, 1.0e395L,
425
+ 1.0e396L, 1.0e397L, 1.0e398L, 1.0e399L, 1.0e400L};
426
+
427
+ static VALUE parser_class;
428
+
429
+ // Works with extended unicode as well. \Uffffffff if support is desired in
430
+ // the future.
431
+ static size_t unicodeToUtf8(uint32_t code, byte *buf) {
432
+ byte *start = buf;
433
+
434
+ if (0x0000007F >= code) {
435
+ *buf++ = (byte)code;
436
+ } else if (0x000007FF >= code) {
437
+ *buf++ = 0xC0 | (code >> 6);
438
+ *buf++ = 0x80 | (0x3F & code);
439
+ } else if (0x0000FFFF >= code) {
440
+ *buf++ = 0xE0 | (code >> 12);
441
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
442
+ *buf++ = 0x80 | (0x3F & code);
443
+ } else if (0x001FFFFF >= code) {
444
+ *buf++ = 0xF0 | (code >> 18);
445
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
446
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
447
+ *buf++ = 0x80 | (0x3F & code);
448
+ } else if (0x03FFFFFF >= code) {
449
+ *buf++ = 0xF8 | (code >> 24);
450
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
451
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
452
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
453
+ *buf++ = 0x80 | (0x3F & code);
454
+ } else if (0x7FFFFFFF >= code) {
455
+ *buf++ = 0xFC | (code >> 30);
456
+ *buf++ = 0x80 | ((code >> 24) & 0x3F);
457
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
458
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
459
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
460
+ *buf++ = 0x80 | (0x3F & code);
461
+ }
462
+ return buf - start;
463
+ }
464
+
465
+ static void parser_reset(ojParser p) {
466
+ p->reader = 0;
467
+ memset(&p->num, 0, sizeof(p->num));
468
+ buf_reset(&p->key);
469
+ buf_reset(&p->buf);
470
+ p->map = value_map;
471
+ p->next_map = NULL;
472
+ p->depth = 0;
473
+ }
474
+
475
+ static void parse_error(ojParser p, const char *fmt, ...) {
476
+ va_list ap;
477
+ char buf[256];
478
+
479
+ va_start(ap, fmt);
480
+ vsnprintf(buf, sizeof(buf), fmt, ap);
481
+ va_end(ap);
482
+ rb_raise(oj_json_parser_error_class, "%s at %ld:%ld", buf, p->line, p->col);
483
+ }
484
+
485
+ static void byte_error(ojParser p, byte b) {
486
+ switch (p->map[256]) {
487
+ case 'N': // null_map
488
+ parse_error(p, "expected null");
489
+ break;
490
+ case 'T': // true_map
491
+ parse_error(p, "expected true");
492
+ break;
493
+ case 'F': // false_map
494
+ parse_error(p, "expected false");
495
+ break;
496
+ case 's': // string_map
497
+ parse_error(p, "invalid JSON character 0x%02x", b);
498
+ break;
499
+ default: parse_error(p, "unexpected character '%c' in '%c' mode", b, p->map[256]); break;
500
+ }
501
+ }
502
+
503
+ static void calc_num(ojParser p) {
504
+ switch (p->type) {
505
+ case OJ_INT:
506
+ if (p->num.neg) {
507
+ p->num.fixnum = -p->num.fixnum;
508
+ p->num.neg = false;
509
+ }
510
+ p->funcs[p->stack[p->depth]].add_int(p);
511
+ break;
512
+ case OJ_DECIMAL: {
513
+ long double d = (long double)p->num.fixnum;
514
+
515
+ if (p->num.neg) {
516
+ d = -d;
517
+ }
518
+ if (0 < p->num.shift) {
519
+ d /= pow_map[p->num.shift];
520
+ }
521
+ if (0 < p->num.exp) {
522
+ long double x;
523
+
524
+ if (MAX_POW < p->num.exp) {
525
+ x = powl(10.0L, (long double)p->num.exp);
526
+ } else {
527
+ x = pow_map[p->num.exp];
528
+ }
529
+ if (p->num.exp_neg) {
530
+ d /= x;
531
+ } else {
532
+ d *= x;
533
+ }
534
+ }
535
+ p->num.dub = d;
536
+ p->funcs[p->stack[p->depth]].add_float(p);
537
+ break;
538
+ }
539
+ case OJ_BIG: p->funcs[p->stack[p->depth]].add_big(p);
540
+ default:
541
+ // nothing to do
542
+ break;
543
+ }
544
+ }
545
+
546
+ static void big_change(ojParser p) {
547
+ char buf[32];
548
+ int64_t i = p->num.fixnum;
549
+ int len = 0;
550
+
551
+ buf[sizeof(buf) - 1] = '\0';
552
+ p->buf.tail = p->buf.head;
553
+ switch (p->type) {
554
+ case OJ_INT:
555
+ // If an int then it will fit in the num.raw so no need to check length;
556
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10) {
557
+ buf[len] = '0' + (i % 10);
558
+ }
559
+ if (p->num.neg) {
560
+ buf[len] = '-';
561
+ len--;
562
+ }
563
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
564
+ p->type = OJ_BIG;
565
+ break;
566
+ case OJ_DECIMAL: {
567
+ int shift = p->num.shift;
568
+
569
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10, shift--) {
570
+ if (0 == shift) {
571
+ buf[len] = '.';
572
+ len--;
573
+ }
574
+ buf[len] = '0' + (i % 10);
575
+ }
576
+ if (p->num.neg) {
577
+ buf[len] = '-';
578
+ len--;
579
+ }
580
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
581
+ if (0 < p->num.exp) {
582
+ int x = p->num.exp;
583
+ int d, div;
584
+ bool started = false;
585
+
586
+ buf_append(&p->buf, 'e');
587
+ if (0 < p->num.exp_neg) {
588
+ buf_append(&p->buf, '-');
589
+ }
590
+ for (div = 1000; 0 < div; div /= 10) {
591
+ d = x / div % 10;
592
+ if (started || 0 < d) {
593
+ buf_append(&p->buf, '0' + d);
594
+ }
595
+ }
596
+ }
597
+ p->type = OJ_BIG;
598
+ break;
599
+ }
600
+ default: break;
601
+ }
602
+ }
603
+
604
+ static void parse(ojParser p, const byte *json) {
605
+ const byte *start;
606
+ const byte *b = json;
607
+ int i;
608
+
609
+ #if DEBUG
610
+ printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
611
+ #endif
612
+ for (; '\0' != *b; b++) {
613
+ switch (p->map[*b]) {
614
+ case SKIP_NEWLINE:
615
+ p->line++;
616
+ p->col = b - json;
617
+ b++;
618
+ #ifdef SPACE_JUMP
619
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
620
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
621
+ b += 2;
622
+ }
623
+ #endif
624
+ for (; SKIP_CHAR == space_map[*b]; b++) {
625
+ }
626
+ b--;
627
+ break;
628
+ case COLON_COLON: p->map = value_map; break;
629
+ case SKIP_CHAR: break;
630
+ case KEY_QUOTE:
631
+ b++;
632
+ p->key.tail = p->key.head;
633
+ start = b;
634
+ for (; STR_OK == string_map[*b]; b++) {
635
+ }
636
+ buf_append_string(&p->key, (const char *)start, b - start);
637
+ if ('"' == *b) {
638
+ p->map = colon_map;
639
+ break;
640
+ }
641
+ b--;
642
+ p->map = string_map;
643
+ p->next_map = colon_map;
644
+ break;
645
+ case AFTER_COMMA:
646
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
647
+ p->map = key_map;
648
+ } else {
649
+ p->map = comma_map;
650
+ }
651
+ break;
652
+ case VAL_QUOTE:
653
+ b++;
654
+ start = b;
655
+ p->buf.tail = p->buf.head;
656
+ for (; STR_OK == string_map[*b]; b++) {
657
+ }
658
+ buf_append_string(&p->buf, (const char *)start, b - start);
659
+ if ('"' == *b) {
660
+ p->funcs[p->stack[p->depth]].add_str(p);
661
+ p->map = (0 == p->depth) ? value_map : after_map;
662
+ break;
663
+ }
664
+ b--;
665
+ p->map = string_map;
666
+ p->next_map = (0 == p->depth) ? value_map : after_map;
667
+ break;
668
+ case OPEN_OBJECT:
669
+ p->funcs[p->stack[p->depth]].open_object(p);
670
+ p->depth++;
671
+ p->stack[p->depth] = OBJECT_FUN;
672
+ p->map = key1_map;
673
+ break;
674
+ case NUM_CLOSE_OBJECT:
675
+ calc_num(p);
676
+ // flow through
677
+ case CLOSE_OBJECT:
678
+ p->map = (1 == p->depth) ? value_map : after_map;
679
+ if (p->depth <= 0 || OBJECT_FUN != p->stack[p->depth]) {
680
+ p->col = b - json - p->col + 1;
681
+ parse_error(p, "unexpected object close");
682
+ return;
683
+ }
684
+ p->depth--;
685
+ p->funcs[p->stack[p->depth]].close_object(p);
686
+ break;
687
+ case OPEN_ARRAY:
688
+ p->funcs[p->stack[p->depth]].open_array(p);
689
+ p->depth++;
690
+ p->stack[p->depth] = ARRAY_FUN;
691
+ p->map = value_map;
692
+ break;
693
+ case NUM_CLOSE_ARRAY:
694
+ calc_num(p);
695
+ // flow through
696
+ case CLOSE_ARRAY:
697
+ p->map = (1 == p->depth) ? value_map : after_map;
698
+ if (p->depth <= 0 || ARRAY_FUN != p->stack[p->depth]) {
699
+ p->col = b - json - p->col + 1;
700
+ parse_error(p, "unexpected array close");
701
+ return;
702
+ }
703
+ p->depth--;
704
+ p->funcs[p->stack[p->depth]].close_array(p);
705
+ break;
706
+ case NUM_COMMA:
707
+ calc_num(p);
708
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
709
+ p->map = key_map;
710
+ } else {
711
+ p->map = comma_map;
712
+ }
713
+ break;
714
+ case VAL0:
715
+ p->type = OJ_INT;
716
+ p->num.fixnum = 0;
717
+ p->num.neg = false;
718
+ p->num.shift = 0;
719
+ p->num.len = 0;
720
+ p->num.exp = 0;
721
+ p->num.exp_neg = false;
722
+ p->map = zero_map;
723
+ break;
724
+ case VAL_NEG:
725
+ p->type = OJ_INT;
726
+ p->num.fixnum = 0;
727
+ p->num.neg = true;
728
+ p->num.shift = 0;
729
+ p->num.len = 0;
730
+ p->num.exp = 0;
731
+ p->num.exp_neg = false;
732
+ p->map = neg_map;
733
+ break;
734
+ ;
735
+ case VAL_DIGIT:
736
+ p->type = OJ_INT;
737
+ p->num.fixnum = 0;
738
+ p->num.neg = false;
739
+ p->num.shift = 0;
740
+ p->num.exp = 0;
741
+ p->num.exp_neg = false;
742
+ p->num.len = 0;
743
+ p->map = digit_map;
744
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
745
+ uint64_t x = (uint64_t)p->num.fixnum * 10 + (uint64_t)(*b - '0');
746
+
747
+ // Tried just checking for an int less than zero but that
748
+ // fails when optimization is on for some reason with the
749
+ // clang compiler so us a bit mask instead.
750
+ if (x < BIG_LIMIT) {
751
+ p->num.fixnum = (int64_t)x;
752
+ } else {
753
+ big_change(p);
754
+ p->map = big_digit_map;
755
+ break;
756
+ }
757
+ }
758
+ b--;
759
+ break;
760
+ case NUM_DIGIT:
761
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
762
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
763
+
764
+ if (x < BIG_LIMIT) {
765
+ p->num.fixnum = (int64_t)x;
766
+ } else {
767
+ big_change(p);
768
+ p->map = big_digit_map;
769
+ break;
770
+ }
771
+ }
772
+ b--;
773
+ break;
774
+ case NUM_DOT:
775
+ p->type = OJ_DECIMAL;
776
+ p->map = dot_map;
777
+ break;
778
+ case NUM_FRAC:
779
+ p->map = frac_map;
780
+ for (; NUM_FRAC == frac_map[*b]; b++) {
781
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
782
+
783
+ if (x < FRAC_LIMIT) {
784
+ p->num.fixnum = (int64_t)x;
785
+ p->num.shift++;
786
+ } else {
787
+ big_change(p);
788
+ p->map = big_frac_map;
789
+ break;
790
+ }
791
+ }
792
+ b--;
793
+ break;
794
+ case FRAC_E:
795
+ p->type = OJ_DECIMAL;
796
+ p->map = exp_sign_map;
797
+ break;
798
+ case NUM_ZERO: p->map = zero_map; break;
799
+ case NEG_DIGIT:
800
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
801
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
802
+
803
+ if (x < BIG_LIMIT) {
804
+ p->num.fixnum = (int64_t)x;
805
+ } else {
806
+ big_change(p);
807
+ p->map = big_digit_map;
808
+ break;
809
+ }
810
+ }
811
+ b--;
812
+ p->map = digit_map;
813
+ break;
814
+ case EXP_SIGN:
815
+ p->num.exp_neg = ('-' == *b);
816
+ p->map = exp_zero_map;
817
+ break;
818
+ case EXP_DIGIT:
819
+ p->map = exp_map;
820
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
821
+ int16_t x = p->num.exp * 10 + (int16_t)(*b - '0');
822
+
823
+ if (x <= MAX_EXP) {
824
+ p->num.exp = x;
825
+ } else {
826
+ big_change(p);
827
+ p->map = big_exp_map;
828
+ break;
829
+ }
830
+ }
831
+ b--;
832
+ break;
833
+ case BIG_DIGIT:
834
+ start = b;
835
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
836
+ }
837
+ buf_append_string(&p->buf, (const char *)start, b - start);
838
+ b--;
839
+ break;
840
+ case BIG_DOT:
841
+ buf_append(&p->buf, '.');
842
+ p->map = big_dot_map;
843
+ break;
844
+ case BIG_FRAC:
845
+ p->map = big_frac_map;
846
+ start = b;
847
+ for (; NUM_FRAC == frac_map[*b]; b++) {
848
+ }
849
+ buf_append_string(&p->buf, (const char *)start, b - start);
850
+ b--;
851
+ break;
852
+ case BIG_E:
853
+ buf_append(&p->buf, *b);
854
+ p->map = big_exp_sign_map;
855
+ break;
856
+ case BIG_EXP_SIGN:
857
+ buf_append(&p->buf, *b);
858
+ p->map = big_exp_zero_map;
859
+ break;
860
+ case BIG_EXP:
861
+ start = b;
862
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
863
+ }
864
+ buf_append_string(&p->buf, (const char *)start, b - start);
865
+ b--;
866
+ p->map = big_exp_map;
867
+ break;
868
+ case NUM_SPC: calc_num(p); break;
869
+ case NUM_NEWLINE: calc_num(p); b++;
870
+ #ifdef SPACE_JUMP
871
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
872
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
873
+ b += 2;
874
+ }
875
+ #endif
876
+ for (; SKIP_CHAR == space_map[*b]; b++) {
877
+ }
878
+ b--;
879
+ break;
880
+ case STR_OK:
881
+ start = b;
882
+ for (; STR_OK == string_map[*b]; b++) {
883
+ }
884
+ if (':' == p->next_map[256]) {
885
+ buf_append_string(&p->key, (const char *)start, b - start);
886
+ } else {
887
+ buf_append_string(&p->buf, (const char *)start, b - start);
888
+ }
889
+ if ('"' == *b) {
890
+ p->map = p->next_map;
891
+ break;
892
+ }
893
+ b--;
894
+ break;
895
+ case STR_SLASH: p->map = esc_map; break;
896
+ case STR_QUOTE: p->map = p->next_map; break;
897
+ case ESC_U:
898
+ p->map = u_map;
899
+ p->ri = 0;
900
+ p->ucode = 0;
901
+ break;
902
+ case U_OK:
903
+ p->ri++;
904
+ p->ucode = p->ucode << 4 | (uint32_t)hex_map[*b];
905
+ if (4 <= p->ri) {
906
+ byte utf8[8];
907
+ size_t ulen = unicodeToUtf8(p->ucode, utf8);
908
+
909
+ if (0 < ulen) {
910
+ if (':' == p->next_map[256]) {
911
+ buf_append_string(&p->key, (const char *)utf8, ulen);
912
+ } else {
913
+ buf_append_string(&p->buf, (const char *)utf8, ulen);
914
+ }
915
+ } else {
916
+ parse_error(p, "invalid unicode");
917
+ return;
918
+ }
919
+ p->map = string_map;
920
+ }
921
+ break;
922
+ case ESC_OK:
923
+ if (':' == p->next_map[256]) {
924
+ buf_append(&p->key, esc_byte_map[*b]);
925
+ } else {
926
+ buf_append(&p->buf, esc_byte_map[*b]);
927
+ }
928
+ p->map = string_map;
929
+ break;
930
+ case UTF1:
931
+ p->ri = 1;
932
+ p->map = utf_map;
933
+ if (':' == p->next_map[256]) {
934
+ buf_append(&p->key, *b);
935
+ } else {
936
+ buf_append(&p->buf, *b);
937
+ }
938
+ break;
939
+ case UTF2:
940
+ p->ri = 2;
941
+ p->map = utf_map;
942
+ if (':' == p->next_map[256]) {
943
+ buf_append(&p->key, *b);
944
+ } else {
945
+ buf_append(&p->buf, *b);
946
+ }
947
+ break;
948
+ case UTF3:
949
+ p->ri = 3;
950
+ p->map = utf_map;
951
+ if (':' == p->next_map[256]) {
952
+ buf_append(&p->key, *b);
953
+ } else {
954
+ buf_append(&p->buf, *b);
955
+ }
956
+ break;
957
+ case UTFX:
958
+ p->ri--;
959
+ if (':' == p->next_map[256]) {
960
+ buf_append(&p->key, *b);
961
+ } else {
962
+ buf_append(&p->buf, *b);
963
+ }
964
+ if (p->ri <= 0) {
965
+ p->map = string_map;
966
+ }
967
+ break;
968
+ case VAL_NULL:
969
+ if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
970
+ b += 3;
971
+ p->funcs[p->stack[p->depth]].add_null(p);
972
+ p->map = (0 == p->depth) ? value_map : after_map;
973
+ break;
974
+ }
975
+ p->ri = 0;
976
+ *p->token = *b++;
977
+ for (i = 1; i < 4; i++) {
978
+ if ('\0' == *b) {
979
+ p->ri = i;
980
+ break;
981
+ } else {
982
+ p->token[i] = *b++;
983
+ }
984
+ }
985
+ if (0 < p->ri) {
986
+ p->map = null_map;
987
+ b--;
988
+ break;
989
+ }
990
+ p->col = b - json - p->col;
991
+ parse_error(p, "expected null");
992
+ return;
993
+ case VAL_TRUE:
994
+ if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
995
+ b += 3;
996
+ p->funcs[p->stack[p->depth]].add_true(p);
997
+ p->map = (0 == p->depth) ? value_map : after_map;
998
+ break;
999
+ }
1000
+ p->ri = 0;
1001
+ *p->token = *b++;
1002
+ for (i = 1; i < 4; i++) {
1003
+ if ('\0' == *b) {
1004
+ p->ri = i;
1005
+ break;
1006
+ } else {
1007
+ p->token[i] = *b++;
1008
+ }
1009
+ }
1010
+ if (0 < p->ri) {
1011
+ p->map = true_map;
1012
+ b--;
1013
+ break;
1014
+ }
1015
+ p->col = b - json - p->col;
1016
+ parse_error(p, "expected true");
1017
+ return;
1018
+ case VAL_FALSE:
1019
+ if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1020
+ b += 4;
1021
+ p->funcs[p->stack[p->depth]].add_false(p);
1022
+ p->map = (0 == p->depth) ? value_map : after_map;
1023
+ break;
1024
+ }
1025
+ p->ri = 0;
1026
+ *p->token = *b++;
1027
+ for (i = 1; i < 5; i++) {
1028
+ if ('\0' == *b) {
1029
+ p->ri = i;
1030
+ break;
1031
+ } else {
1032
+ p->token[i] = *b++;
1033
+ }
1034
+ }
1035
+ if (0 < p->ri) {
1036
+ p->map = false_map;
1037
+ b--;
1038
+ break;
1039
+ }
1040
+ p->col = b - json - p->col;
1041
+ parse_error(p, "expected false");
1042
+ return;
1043
+ case TOKEN_OK:
1044
+ p->token[p->ri] = *b;
1045
+ p->ri++;
1046
+ switch (p->map[256]) {
1047
+ case 'N':
1048
+ if (4 == p->ri) {
1049
+ if (0 != strncmp("null", p->token, 4)) {
1050
+ p->col = b - json - p->col;
1051
+ parse_error(p, "expected null");
1052
+ return;
1053
+ }
1054
+ p->funcs[p->stack[p->depth]].add_null(p);
1055
+ p->map = (0 == p->depth) ? value_map : after_map;
1056
+ }
1057
+ break;
1058
+ case 'F':
1059
+ if (5 == p->ri) {
1060
+ if (0 != strncmp("false", p->token, 5)) {
1061
+ p->col = b - json - p->col;
1062
+ parse_error(p, "expected false");
1063
+ return;
1064
+ }
1065
+ p->funcs[p->stack[p->depth]].add_false(p);
1066
+ p->map = (0 == p->depth) ? value_map : after_map;
1067
+ }
1068
+ break;
1069
+ case 'T':
1070
+ if (4 == p->ri) {
1071
+ if (0 != strncmp("true", p->token, 4)) {
1072
+ p->col = b - json - p->col;
1073
+ parse_error(p, "expected true");
1074
+ return;
1075
+ }
1076
+ p->funcs[p->stack[p->depth]].add_true(p);
1077
+ p->map = (0 == p->depth) ? value_map : after_map;
1078
+ }
1079
+ break;
1080
+ default:
1081
+ p->col = b - json - p->col;
1082
+ parse_error(p, "parse error");
1083
+ return;
1084
+ }
1085
+ break;
1086
+ case CHAR_ERR: byte_error(p, *b); return;
1087
+ default: break;
1088
+ }
1089
+ if (0 == p->depth && 'v' == p->map[256] && p->just_one) {
1090
+ p->map = trail_map;
1091
+ }
1092
+ }
1093
+ if (0 == p->depth) {
1094
+ switch (p->map[256]) {
1095
+ case '0':
1096
+ case 'd':
1097
+ case 'f':
1098
+ case 'z':
1099
+ case 'X':
1100
+ case 'D':
1101
+ case 'g':
1102
+ case 'B':
1103
+ case 'Y': calc_num(p); break;
1104
+ }
1105
+ }
1106
+ return;
1107
+ }
1108
+
1109
+ static void parser_free(void *ptr) {
1110
+ ojParser p;
1111
+
1112
+ if (0 == ptr) {
1113
+ return;
1114
+ }
1115
+ p = (ojParser)ptr;
1116
+ buf_cleanup(&p->key);
1117
+ buf_cleanup(&p->buf);
1118
+ p->free(p);
1119
+ xfree(ptr);
1120
+ }
1121
+
1122
+ static void parser_mark(void *ptr) {
1123
+ if (NULL != ptr) {
1124
+ ojParser p = (ojParser)ptr;
1125
+
1126
+ if (0 != p->reader) {
1127
+ rb_gc_mark(p->reader);
1128
+ }
1129
+ p->mark(p);
1130
+ }
1131
+ }
1132
+
1133
+ extern void oj_set_parser_validator(ojParser p);
1134
+ extern void oj_set_parser_saj(ojParser p);
1135
+ extern void oj_set_parser_usual(ojParser p);
1136
+ extern void oj_set_parser_debug(ojParser p);
1137
+
1138
+ /* Document-method: new
1139
+ * call-seq: new(mode=nil)
1140
+ *
1141
+ * Creates a new Parser with the specified mode. If no mode is provided
1142
+ * validation is assumed.
1143
+ */
1144
+ static VALUE parser_new(VALUE self, VALUE mode) {
1145
+ ojParser p = ALLOC(struct _ojParser);
1146
+
1147
+ #if HAVE_RB_EXT_RACTOR_SAFE
1148
+ // This doesn't seem to do anything.
1149
+ rb_ext_ractor_safe(true);
1150
+ #endif
1151
+ memset(p, 0, sizeof(struct _ojParser));
1152
+ buf_init(&p->key);
1153
+ buf_init(&p->buf);
1154
+
1155
+ p->map = value_map;
1156
+ if (Qnil == mode) {
1157
+ oj_set_parser_validator(p);
1158
+ } else {
1159
+ const char *ms = NULL;
1160
+
1161
+ switch (rb_type(mode)) {
1162
+ case RUBY_T_SYMBOL:
1163
+ mode = rb_sym2str(mode);
1164
+ // fall through
1165
+ case RUBY_T_STRING: ms = RSTRING_PTR(mode); break;
1166
+ default: rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1167
+ }
1168
+ if (0 == strcmp("usual", ms) || 0 == strcmp("standard", ms) || 0 == strcmp("strict", ms) ||
1169
+ 0 == strcmp("compat", ms)) {
1170
+ oj_set_parser_usual(p);
1171
+ } else if (0 == strcmp("object", ms)) {
1172
+ // TBD
1173
+ } else if (0 == strcmp("saj", ms)) {
1174
+ oj_set_parser_saj(p);
1175
+ } else if (0 == strcmp("validate", ms)) {
1176
+ oj_set_parser_validator(p);
1177
+ } else if (0 == strcmp("debug", ms)) {
1178
+ oj_set_parser_debug(p);
1179
+ } else {
1180
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1181
+ }
1182
+ }
1183
+ return Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1184
+ }
1185
+
1186
+ /* Document-method: method_missing(value)
1187
+ * call-seq: method_missing(value)
1188
+ *
1189
+ * Methods not handled by the parser are passed to the delegate. The methods
1190
+ * supported by delegate are:
1191
+ *
1192
+ * - *:validate*
1193
+ * - no options
1194
+ *
1195
+ * - *:saj*
1196
+ * - _cache_keys=_ sets the value of the _cache_keys_ flag.
1197
+ * - _cache_keys_ returns the value of the _cache_keys_ flag.
1198
+ * - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than
1199
+ * that length are cached.
1200
+ * - _cache_strings_ returns the value of the _cache_strings_ integer value.
1201
+ * - _handler=_ sets the SAJ handler
1202
+ * - _handler_ returns the SAJ handler
1203
+ *
1204
+ * - *:usual*
1205
+ * - _cache_keys=_ sets the value of the _cache_keys_ flag.
1206
+ * - _cache_keys_ returns the value of the _cache_keys_ flag.
1207
+ * - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than
1208
+ * that length are cached.
1209
+ * - _cache_strings_ returns the value of the _cache_strings_ integer value.
1210
+ * - _cache_expunge=_ sets the value of the _cache_expunge_ where 0 never expunges, 1 expunges slowly, 2 expunges
1211
+ * faster, and 3 or higher expunges agressively.
1212
+ * - _cache_expunge_ returns the value of the _cache_expunge_ integer value.
1213
+ * - _capacity=_ sets the capacity of the parser. The parser grows automatically but can be updated directly with this
1214
+ * call.
1215
+ * - _capacity_ returns the current capacity of the parser's internal stack.
1216
+ * - _create_id_ returns the value _create_id_ or _nil_ if there is no _create_id_.
1217
+ * - _create_id=_ sets the value _create_id_ or if _nil_ unsets it. Parsed JSON objects that include the specified
1218
+ * element use the element value as the name of the class to create an object from instead of a Hash.
1219
+ * - _decimal=_ sets the approach to how decimals are parser. If _:auto_ then the decimals with significant digits are
1220
+ * 16 or less are Floats and long ones are BigDecimal. _:ruby_ uses a call to Ruby to convert a string to a Float.
1221
+ * _:float_ always generates a Float. _:bigdecimal_ always results in a BigDecimal.
1222
+ * - _decimal_ returns the value of the decimal conversion option which can be :auto (default), :ruby, :float, or
1223
+ * :bigdecimal.
1224
+ * - _ignore_json_create_ returns the value of the _ignore_json_create_ flag.
1225
+ * - _ignore_json_create=_ sets the value of the _ignore_json_create_ flag. When set the class json_create method is
1226
+ * ignored on parsing in favor of creating an instance and populating directly.
1227
+ * - _missing_class_ return the value of the _missing_class_ indicator.
1228
+ * - _missing_class=_ sets the value of the _missing_class_ flag. Valid values are _:auto_ which creates any missing
1229
+ * classes on parse, :ignore which ignores and continues as a Hash (default), and :raise which raises an exception if
1230
+ * the class is not found.
1231
+ * - _omit_null=_ sets the _omit_null_ flag. If true then null values in a map or object are omitted from the
1232
+ * resulting Hash or Object.
1233
+ * - _omit_null_ returns the value of the _omit_null_ flag.
1234
+ * - _symbol_keys=_ sets the flag that indicates Hash keys should be parsed to Symbols versus Strings.
1235
+ * - _symbol_keys_ returns the value of the _symbol_keys_ flag.
1236
+ */
1237
+ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
1238
+ ojParser p = (ojParser)DATA_PTR(self);
1239
+ const char * key = NULL;
1240
+ volatile VALUE rkey = *argv;
1241
+ volatile VALUE rv = Qnil;
1242
+
1243
+ #if HAVE_RB_EXT_RACTOR_SAFE
1244
+ // This doesn't seem to do anything.
1245
+ rb_ext_ractor_safe(true);
1246
+ #endif
1247
+ switch (rb_type(rkey)) {
1248
+ case RUBY_T_SYMBOL:
1249
+ rkey = rb_sym2str(rkey);
1250
+ // fall through
1251
+ case RUBY_T_STRING: key = rb_string_value_ptr(&rkey); break;
1252
+ default: rb_raise(rb_eArgError, "option method must be a symbol or string");
1253
+ }
1254
+ if (1 < argc) {
1255
+ rv = argv[1];
1256
+ }
1257
+ return p->option(p, key, rv);
1258
+ }
1259
+
1260
+ /* Document-method: parse(json)
1261
+ * call-seq: parse(json)
1262
+ *
1263
+ * Parse a JSON string.
1264
+ *
1265
+ * Returns the result according to the delegate of the parser.
1266
+ */
1267
+ static VALUE parser_parse(VALUE self, VALUE json) {
1268
+ ojParser p = (ojParser)DATA_PTR(self);
1269
+
1270
+ Check_Type(json, T_STRING);
1271
+ parser_reset(p);
1272
+ p->start(p);
1273
+ parse(p, (const byte *)rb_string_value_ptr(&json));
1274
+
1275
+ return p->result(p);
1276
+ }
1277
+
1278
+ static VALUE load_rescue(VALUE self, VALUE x) {
1279
+ // Normal EOF. No action needed other than to stop loading.
1280
+ return Qfalse;
1281
+ }
1282
+
1283
+ static VALUE load(VALUE self) {
1284
+ ojParser p = (ojParser)DATA_PTR(self);
1285
+ volatile VALUE rbuf = rb_str_new2("");
1286
+
1287
+ p->start(p);
1288
+ while (true) {
1289
+ rb_funcall(p->reader, oj_readpartial_id, 2, INT2NUM(16385), rbuf);
1290
+ if (0 < RSTRING_LEN(rbuf)) {
1291
+ parse(p, (byte *)StringValuePtr(rbuf));
1292
+ }
1293
+ }
1294
+ return Qtrue;
1295
+ }
1296
+
1297
+ /* Document-method: load(reader)
1298
+ * call-seq: load(reader)
1299
+ *
1300
+ * Parse a JSON stream.
1301
+ *
1302
+ * Returns the result according to the delegate of the parser.
1303
+ */
1304
+ static VALUE parser_load(VALUE self, VALUE reader) {
1305
+ ojParser p = (ojParser)DATA_PTR(self);
1306
+
1307
+ parser_reset(p);
1308
+ p->reader = reader;
1309
+ rb_rescue2(load, self, load_rescue, Qnil, rb_eEOFError, 0);
1310
+
1311
+ return p->result(p);
1312
+ }
1313
+
1314
+ /* Document-method: file(filename)
1315
+ * call-seq: file(filename)
1316
+ *
1317
+ * Parse a JSON file.
1318
+ *
1319
+ * Returns the result according to the delegate of the parser.
1320
+ */
1321
+ static VALUE parser_file(VALUE self, VALUE filename) {
1322
+ ojParser p = (ojParser)DATA_PTR(self);
1323
+ const char *path;
1324
+ int fd;
1325
+
1326
+ Check_Type(filename, T_STRING);
1327
+ path = rb_string_value_ptr(&filename);
1328
+
1329
+ parser_reset(p);
1330
+ p->start(p);
1331
+
1332
+ if (0 > (fd = open(path, O_RDONLY))) {
1333
+ rb_raise(rb_eIOError, "error opening %s", path);
1334
+ }
1335
+ #if USE_THREAD_LIMIT
1336
+ struct stat info;
1337
+ // st_size will be 0 if not a file
1338
+ if (0 == fstat(fd, &info) && USE_THREAD_LIMIT < info.st_size) {
1339
+ // Use threaded version.
1340
+ // TBD only if has pthreads
1341
+ // TBD parse_large(p, fd);
1342
+ return p->result(p);
1343
+ }
1344
+ #endif
1345
+ byte buf[16385];
1346
+ size_t size = sizeof(buf) - 1;
1347
+ size_t rsize;
1348
+
1349
+ while (true) {
1350
+ if (0 < (rsize = read(fd, buf, size))) {
1351
+ buf[rsize] = '\0';
1352
+ parse(p, buf);
1353
+ }
1354
+ if (rsize <= 0) {
1355
+ if (0 != rsize) {
1356
+ rb_raise(rb_eIOError, "error reading from %s", path);
1357
+ }
1358
+ break;
1359
+ }
1360
+ }
1361
+ return p->result(p);
1362
+ }
1363
+
1364
+ /* Document-method: just_one
1365
+ * call-seq: just_one
1366
+ *
1367
+ * Returns the current state of the just_one [_Boolean_] option.
1368
+ */
1369
+ static VALUE parser_just_one(VALUE self) {
1370
+ ojParser p = (ojParser)DATA_PTR(self);
1371
+
1372
+ return p->just_one ? Qtrue : Qfalse;
1373
+ }
1374
+
1375
+ /* Document-method: just_one=
1376
+ * call-seq: just_one=(value)
1377
+ *
1378
+ * Sets the *just_one* option which limits the parsing of a string or or
1379
+ * stream to a single JSON element.
1380
+ *
1381
+ * Returns the current state of the just_one [_Boolean_] option.
1382
+ */
1383
+ static VALUE parser_just_one_set(VALUE self, VALUE v) {
1384
+ ojParser p = (ojParser)DATA_PTR(self);
1385
+
1386
+ p->just_one = (Qtrue == v);
1387
+
1388
+ return p->just_one ? Qtrue : Qfalse;
1389
+ }
1390
+
1391
+ static VALUE usual_parser = Qundef;
1392
+
1393
+ /* Document-method: usual
1394
+ * call-seq: usual
1395
+ *
1396
+ * Returns the default usual parser. Note the default usual parser can not be
1397
+ * used concurrently in more than one thread.
1398
+ */
1399
+ static VALUE parser_usual(VALUE self) {
1400
+ if (Qundef == usual_parser) {
1401
+ ojParser p = ALLOC(struct _ojParser);
1402
+
1403
+ memset(p, 0, sizeof(struct _ojParser));
1404
+ buf_init(&p->key);
1405
+ buf_init(&p->buf);
1406
+ p->map = value_map;
1407
+ oj_set_parser_usual(p);
1408
+ usual_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1409
+ rb_gc_register_address(&usual_parser);
1410
+ }
1411
+ return usual_parser;
1412
+ }
1413
+
1414
+ static VALUE saj_parser = Qundef;
1415
+
1416
+ /* Document-method: saj
1417
+ * call-seq: saj
1418
+ *
1419
+ * Returns the default saj parser. Note the default SAJ parser can not be used
1420
+ * concurrently in more than one thread.
1421
+ */
1422
+ static VALUE parser_saj(VALUE self) {
1423
+ if (Qundef == saj_parser) {
1424
+ ojParser p = ALLOC(struct _ojParser);
1425
+
1426
+ memset(p, 0, sizeof(struct _ojParser));
1427
+ buf_init(&p->key);
1428
+ buf_init(&p->buf);
1429
+ p->map = value_map;
1430
+ oj_set_parser_saj(p);
1431
+ saj_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1432
+ rb_gc_register_address(&saj_parser);
1433
+ }
1434
+ return saj_parser;
1435
+ }
1436
+
1437
+ static VALUE validate_parser = Qundef;
1438
+
1439
+ /* Document-method: validate
1440
+ * call-seq: validate
1441
+ *
1442
+ * Returns the default validate parser.
1443
+ */
1444
+ static VALUE parser_validate(VALUE self) {
1445
+ if (Qundef == validate_parser) {
1446
+ ojParser p = ALLOC(struct _ojParser);
1447
+
1448
+ memset(p, 0, sizeof(struct _ojParser));
1449
+ buf_init(&p->key);
1450
+ buf_init(&p->buf);
1451
+ p->map = value_map;
1452
+ oj_set_parser_validator(p);
1453
+ validate_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1454
+ rb_gc_register_address(&validate_parser);
1455
+ }
1456
+ return validate_parser;
1457
+ }
1458
+
1459
+ /* Document-class: Oj::Parser
1460
+ *
1461
+ * A reusable parser that makes use of named delegates to determine the
1462
+ * handling of parsed data. Delegates are available for validation, a callback
1463
+ * parser (SAJ), and a usual delegate that builds Ruby objects as parsing
1464
+ * proceeds.
1465
+ *
1466
+ * This parser is considerably faster than the older Oj.parse call and
1467
+ * isolates options to just the parser so that other parts of the code are not
1468
+ * forced to use the same options.
1469
+ */
1470
+ void oj_parser_init() {
1471
+ parser_class = rb_define_class_under(Oj, "Parser", rb_cObject);
1472
+ rb_define_module_function(parser_class, "new", parser_new, 1);
1473
+ rb_define_method(parser_class, "parse", parser_parse, 1);
1474
+ rb_define_method(parser_class, "load", parser_load, 1);
1475
+ rb_define_method(parser_class, "file", parser_file, 1);
1476
+ rb_define_method(parser_class, "just_one", parser_just_one, 0);
1477
+ rb_define_method(parser_class, "just_one=", parser_just_one_set, 1);
1478
+ rb_define_method(parser_class, "method_missing", parser_missing, -1);
1479
+
1480
+ rb_define_module_function(parser_class, "usual", parser_usual, 0);
1481
+ rb_define_module_function(parser_class, "saj", parser_saj, 0);
1482
+ rb_define_module_function(parser_class, "validate", parser_validate, 0);
1483
+ }