oj 3.12.3 → 3.13.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/oj/oj.h CHANGED
@@ -143,7 +143,7 @@ typedef struct _options {
143
143
  char safe; // YesNo
144
144
  char sec_prec_set; // boolean (0 or 1)
145
145
  char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
146
- char cache_keys; // YexNo
146
+ char cache_keys; // YesNo
147
147
  char cache_str; // string short than or equal to this are cache
148
148
  int64_t int_range_min; // dump numbers below as string
149
149
  int64_t int_range_max; // dump numbers above as string
data/ext/oj/parse.c CHANGED
@@ -964,12 +964,12 @@ static VALUE protect_parse(VALUE pip) {
964
964
  extern int oj_utf8_index;
965
965
 
966
966
  static void oj_pi_set_input_str(ParseInfo pi, volatile VALUE *inputp) {
967
- rb_encoding *enc = rb_to_encoding(rb_obj_encoding(*inputp));
967
+ rb_encoding *enc = rb_enc_get(*inputp);
968
968
 
969
- if (rb_utf8_encoding() != enc) {
970
- *inputp = rb_str_conv_enc(*inputp, enc, rb_utf8_encoding());
969
+ if (oj_utf8_encoding != enc) {
970
+ *inputp = rb_str_conv_enc(*inputp, enc, oj_utf8_encoding);
971
971
  }
972
- pi->json = rb_string_value_ptr((VALUE *)inputp);
972
+ pi->json = RSTRING_PTR(*inputp);
973
973
  pi->end = pi->json + RSTRING_LEN(*inputp);
974
974
  }
975
975
 
data/ext/oj/parser.c ADDED
@@ -0,0 +1,1527 @@
1
+ // Copyright (c) 2020, 2021, Peter Ohler, All rights reserved.
2
+
3
+ #include "parser.h"
4
+
5
+ #include <errno.h>
6
+ #include <fcntl.h>
7
+ #include <limits.h>
8
+ #include <math.h>
9
+ #include <pthread.h>
10
+ #include <signal.h>
11
+ #include <stdarg.h>
12
+ #include <stdbool.h>
13
+ #include <stdint.h>
14
+ #include <stdio.h>
15
+ #include <stdlib.h>
16
+ #include <string.h>
17
+ #include <sys/stat.h>
18
+ #include <sys/time.h>
19
+ #include <time.h>
20
+ #include <unistd.h>
21
+
22
+ #include "oj.h"
23
+
24
+ #define DEBUG 0
25
+
26
+ #define USE_THREAD_LIMIT 0
27
+ // #define USE_THREAD_LIMIT 100000
28
+ #define MAX_EXP 4932
29
+ // max in the pow_map
30
+ #define MAX_POW 400
31
+
32
+ #define MIN_SLEEP (1000000000LL / (double)CLOCKS_PER_SEC)
33
+ // 9,223,372,036,854,775,807
34
+ #define BIG_LIMIT LLONG_MAX / 10
35
+ #define FRAC_LIMIT 10000000000000000ULL
36
+
37
+ // Give better performance with indented JSON but worse with unindented.
38
+ //#define SPACE_JUMP
39
+
40
+ enum {
41
+ SKIP_CHAR = 'a',
42
+ SKIP_NEWLINE = 'b',
43
+ VAL_NULL = 'c',
44
+ VAL_TRUE = 'd',
45
+ VAL_FALSE = 'e',
46
+ VAL_NEG = 'f',
47
+ VAL0 = 'g',
48
+ VAL_DIGIT = 'h',
49
+ VAL_QUOTE = 'i',
50
+ OPEN_ARRAY = 'k',
51
+ OPEN_OBJECT = 'l',
52
+ CLOSE_ARRAY = 'm',
53
+ CLOSE_OBJECT = 'n',
54
+ AFTER_COMMA = 'o',
55
+ KEY_QUOTE = 'p',
56
+ COLON_COLON = 'q',
57
+ NUM_SPC = 'r',
58
+ NUM_NEWLINE = 's',
59
+ NUM_DOT = 't',
60
+ NUM_COMMA = 'u',
61
+ NUM_FRAC = 'v',
62
+ FRAC_E = 'w',
63
+ EXP_SIGN = 'x',
64
+ EXP_DIGIT = 'y',
65
+ STR_QUOTE = 'z',
66
+ NEG_DIGIT = '-',
67
+ STR_SLASH = 'A',
68
+ ESC_OK = 'B',
69
+ BIG_DIGIT = 'C',
70
+ BIG_DOT = 'D',
71
+ U_OK = 'E',
72
+ TOKEN_OK = 'F',
73
+ NUM_CLOSE_OBJECT = 'G',
74
+ NUM_CLOSE_ARRAY = 'H',
75
+ BIG_FRAC = 'I',
76
+ BIG_E = 'J',
77
+ BIG_EXP_SIGN = 'K',
78
+ BIG_EXP = 'L',
79
+ UTF1 = 'M', // expect 1 more follow byte
80
+ NUM_DIGIT = 'N',
81
+ NUM_ZERO = 'O',
82
+ UTF2 = 'P', // expect 2 more follow byte
83
+ UTF3 = 'Q', // expect 3 more follow byte
84
+ STR_OK = 'R',
85
+ UTFX = 'S', // following bytes
86
+ ESC_U = 'U',
87
+ CHAR_ERR = '.',
88
+ DONE = 'X',
89
+ };
90
+
91
+ /*
92
+ 0123456789abcdef0123456789abcdef */
93
+ static const char value_map[257] = "\
94
+ X........ab..a..................\
95
+ a.i..........f..ghhhhhhhhh......\
96
+ ...........................k.m..\
97
+ ......e.......c.....d......l.n..\
98
+ ................................\
99
+ ................................\
100
+ ................................\
101
+ ................................v";
102
+
103
+ static const char null_map[257] = "\
104
+ ................................\
105
+ ............o...................\
106
+ ................................\
107
+ ............F........F..........\
108
+ ................................\
109
+ ................................\
110
+ ................................\
111
+ ................................N";
112
+
113
+ static const char true_map[257] = "\
114
+ ................................\
115
+ ............o...................\
116
+ ................................\
117
+ .....F............F..F..........\
118
+ ................................\
119
+ ................................\
120
+ ................................\
121
+ ................................T";
122
+
123
+ static const char false_map[257] = "\
124
+ ................................\
125
+ ............o...................\
126
+ ................................\
127
+ .F...F......F......F............\
128
+ ................................\
129
+ ................................\
130
+ ................................\
131
+ ................................F";
132
+
133
+ static const char comma_map[257] = "\
134
+ .........ab..a..................\
135
+ a.i..........f..ghhhhhhhhh......\
136
+ ...........................k....\
137
+ ......e.......c.....d......l....\
138
+ ................................\
139
+ ................................\
140
+ ................................\
141
+ ................................,";
142
+
143
+ static const char after_map[257] = "\
144
+ X........ab..a..................\
145
+ a...........o...................\
146
+ .............................m..\
147
+ .............................n..\
148
+ ................................\
149
+ ................................\
150
+ ................................\
151
+ ................................a";
152
+
153
+ static const char key1_map[257] = "\
154
+ .........ab..a..................\
155
+ a.p.............................\
156
+ ................................\
157
+ .............................n..\
158
+ ................................\
159
+ ................................\
160
+ ................................\
161
+ ................................K";
162
+
163
+ static const char key_map[257] = "\
164
+ .........ab..a..................\
165
+ a.p.............................\
166
+ ................................\
167
+ ................................\
168
+ ................................\
169
+ ................................\
170
+ ................................\
171
+ ................................k";
172
+
173
+ static const char colon_map[257] = "\
174
+ .........ab..a..................\
175
+ a.........................q.....\
176
+ ................................\
177
+ ................................\
178
+ ................................\
179
+ ................................\
180
+ ................................\
181
+ ................................:";
182
+
183
+ static const char neg_map[257] = "\
184
+ ................................\
185
+ ................O---------......\
186
+ ................................\
187
+ ................................\
188
+ ................................\
189
+ ................................\
190
+ ................................\
191
+ ................................-";
192
+
193
+ static const char zero_map[257] = "\
194
+ .........rs..r..................\
195
+ r...........u.t.................\
196
+ .............................H..\
197
+ .............................G..\
198
+ ................................\
199
+ ................................\
200
+ ................................\
201
+ ................................0";
202
+
203
+ static const char digit_map[257] = "\
204
+ .........rs..r..................\
205
+ r...........u.t.NNNNNNNNNN......\
206
+ .....w.......................H..\
207
+ .....w.......................G..\
208
+ ................................\
209
+ ................................\
210
+ ................................\
211
+ ................................d";
212
+
213
+ static const char dot_map[257] = "\
214
+ ................................\
215
+ ................vvvvvvvvvv......\
216
+ ................................\
217
+ ................................\
218
+ ................................\
219
+ ................................\
220
+ ................................\
221
+ .................................";
222
+
223
+ static const char frac_map[257] = "\
224
+ .........rs..r..................\
225
+ r...........u...vvvvvvvvvv......\
226
+ .....w.......................H..\
227
+ .....w.......................G..\
228
+ ................................\
229
+ ................................\
230
+ ................................\
231
+ ................................f";
232
+
233
+ static const char exp_sign_map[257] = "\
234
+ ................................\
235
+ ...........x.x..yyyyyyyyyy......\
236
+ ................................\
237
+ ................................\
238
+ ................................\
239
+ ................................\
240
+ ................................\
241
+ ................................x";
242
+
243
+ static const char exp_zero_map[257] = "\
244
+ ................................\
245
+ ................yyyyyyyyyy......\
246
+ ................................\
247
+ ................................\
248
+ ................................\
249
+ ................................\
250
+ ................................\
251
+ ................................z";
252
+
253
+ static const char exp_map[257] = "\
254
+ .........rs..r..................\
255
+ r...........u...yyyyyyyyyy......\
256
+ .............................H..\
257
+ .............................G..\
258
+ ................................\
259
+ ................................\
260
+ ................................\
261
+ ................................X";
262
+
263
+ static const char big_digit_map[257] = "\
264
+ .........rs..r..................\
265
+ r...........u.D.CCCCCCCCCC......\
266
+ .....J.......................H..\
267
+ .....J.......................G..\
268
+ ................................\
269
+ ................................\
270
+ ................................\
271
+ ................................D";
272
+
273
+ static const char big_dot_map[257] = "\
274
+ ................................\
275
+ ................IIIIIIIIII......\
276
+ ................................\
277
+ ................................\
278
+ ................................\
279
+ ................................\
280
+ ................................\
281
+ ................................o";
282
+
283
+ static const char big_frac_map[257] = "\
284
+ .........rs..r..................\
285
+ r...........u...IIIIIIIIII......\
286
+ .....J.......................H..\
287
+ .....J.......................G..\
288
+ ................................\
289
+ ................................\
290
+ ................................\
291
+ ................................g";
292
+
293
+ static const char big_exp_sign_map[257] = "\
294
+ ................................\
295
+ ...........K.K..LLLLLLLLLL......\
296
+ ................................\
297
+ ................................\
298
+ ................................\
299
+ ................................\
300
+ ................................\
301
+ ................................B";
302
+
303
+ static const char big_exp_zero_map[257] = "\
304
+ ................................\
305
+ ................LLLLLLLLLL......\
306
+ ................................\
307
+ ................................\
308
+ ................................\
309
+ ................................\
310
+ ................................\
311
+ ................................Z";
312
+
313
+ static const char big_exp_map[257] = "\
314
+ .........rs..r..................\
315
+ r...........u...LLLLLLLLLL......\
316
+ .............................H..\
317
+ .............................G..\
318
+ ................................\
319
+ ................................\
320
+ ................................\
321
+ ................................Y";
322
+
323
+ static const char string_map[257] = "\
324
+ ................................\
325
+ RRzRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
326
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRARRR\
327
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
328
+ ................................\
329
+ ................................\
330
+ MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\
331
+ PPPPPPPPPPPPPPPPQQQQQQQQ........s";
332
+
333
+ static const char esc_map[257] = "\
334
+ ................................\
335
+ ..B............B................\
336
+ ............................B...\
337
+ ..B...B.......B...B.BU..........\
338
+ ................................\
339
+ ................................\
340
+ ................................\
341
+ ................................~";
342
+
343
+ static const char esc_byte_map[257] = "\
344
+ ................................\
345
+ ..\"............/................\
346
+ ............................\\...\
347
+ ..\b...\f.......\n...\r.\t..........\
348
+ ................................\
349
+ ................................\
350
+ ................................\
351
+ ................................b";
352
+
353
+ static const char u_map[257] = "\
354
+ ................................\
355
+ ................EEEEEEEEEE......\
356
+ .EEEEEE.........................\
357
+ .EEEEEE.........................\
358
+ ................................\
359
+ ................................\
360
+ ................................\
361
+ ................................u";
362
+
363
+ static const char utf_map[257] = "\
364
+ ................................\
365
+ ................................\
366
+ ................................\
367
+ ................................\
368
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
369
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
370
+ ................................\
371
+ ................................8";
372
+
373
+ static const char space_map[257] = "\
374
+ .........ab..a..................\
375
+ a...............................\
376
+ ................................\
377
+ ................................\
378
+ ................................\
379
+ ................................\
380
+ ................................\
381
+ ................................S";
382
+
383
+ static const char trail_map[257] = "\
384
+ .........ab..a..................\
385
+ a...............................\
386
+ ................................\
387
+ ................................\
388
+ ................................\
389
+ ................................\
390
+ ................................\
391
+ ................................R";
392
+
393
+ static const byte hex_map[256] = "\
394
+ ................................\
395
+ ................\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09......\
396
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
397
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
398
+ ................................\
399
+ ................................\
400
+ ................................\
401
+ ................................";
402
+
403
+ static long double pow_map[401] = {1.0L, 1.0e1L, 1.0e2L, 1.0e3L, 1.0e4L,
404
+ 1.0e5L, 1.0e6L, 1.0e7L, 1.0e8L, 1.0e9L, // 00
405
+ 1.0e10L, 1.0e11L, 1.0e12L, 1.0e13L, 1.0e14L,
406
+ 1.0e15L, 1.0e16L, 1.0e17L, 1.0e18L, 1.0e19L, // 10
407
+ 1.0e20L, 1.0e21L, 1.0e22L, 1.0e23L, 1.0e24L,
408
+ 1.0e25L, 1.0e26L, 1.0e27L, 1.0e28L, 1.0e29L, // 20
409
+ 1.0e30L, 1.0e31L, 1.0e32L, 1.0e33L, 1.0e34L,
410
+ 1.0e35L, 1.0e36L, 1.0e37L, 1.0e38L, 1.0e39L, // 30
411
+ 1.0e40L, 1.0e41L, 1.0e42L, 1.0e43L, 1.0e44L,
412
+ 1.0e45L, 1.0e46L, 1.0e47L, 1.0e48L, 1.0e49L, // 40
413
+ 1.0e50L, 1.0e51L, 1.0e52L, 1.0e53L, 1.0e54L,
414
+ 1.0e55L, 1.0e56L, 1.0e57L, 1.0e58L, 1.0e59L, // 50
415
+ 1.0e60L, 1.0e61L, 1.0e62L, 1.0e63L, 1.0e64L,
416
+ 1.0e65L, 1.0e66L, 1.0e67L, 1.0e68L, 1.0e69L, // 60
417
+ 1.0e70L, 1.0e71L, 1.0e72L, 1.0e73L, 1.0e74L,
418
+ 1.0e75L, 1.0e76L, 1.0e77L, 1.0e78L, 1.0e79L, // 70
419
+ 1.0e80L, 1.0e81L, 1.0e82L, 1.0e83L, 1.0e84L,
420
+ 1.0e85L, 1.0e86L, 1.0e87L, 1.0e88L, 1.0e89L, // 80
421
+ 1.0e90L, 1.0e91L, 1.0e92L, 1.0e93L, 1.0e94L,
422
+ 1.0e95L, 1.0e96L, 1.0e97L, 1.0e98L, 1.0e99L, // 90
423
+ 1.0e100L, 1.0e101L, 1.0e102L, 1.0e103L, 1.0e104L,
424
+ 1.0e105L, 1.0e106L, 1.0e107L, 1.0e108L, 1.0e109L, // 100
425
+ 1.0e110L, 1.0e111L, 1.0e112L, 1.0e113L, 1.0e114L,
426
+ 1.0e115L, 1.0e116L, 1.0e117L, 1.0e118L, 1.0e119L, // 110
427
+ 1.0e120L, 1.0e121L, 1.0e122L, 1.0e123L, 1.0e124L,
428
+ 1.0e125L, 1.0e126L, 1.0e127L, 1.0e128L, 1.0e129L, // 120
429
+ 1.0e130L, 1.0e131L, 1.0e132L, 1.0e133L, 1.0e134L,
430
+ 1.0e135L, 1.0e136L, 1.0e137L, 1.0e138L, 1.0e139L, // 130
431
+ 1.0e140L, 1.0e141L, 1.0e142L, 1.0e143L, 1.0e144L,
432
+ 1.0e145L, 1.0e146L, 1.0e147L, 1.0e148L, 1.0e149L, // 140
433
+ 1.0e150L, 1.0e151L, 1.0e152L, 1.0e153L, 1.0e154L,
434
+ 1.0e155L, 1.0e156L, 1.0e157L, 1.0e158L, 1.0e159L, // 150
435
+ 1.0e160L, 1.0e161L, 1.0e162L, 1.0e163L, 1.0e164L,
436
+ 1.0e165L, 1.0e166L, 1.0e167L, 1.0e168L, 1.0e169L, // 160
437
+ 1.0e170L, 1.0e171L, 1.0e172L, 1.0e173L, 1.0e174L,
438
+ 1.0e175L, 1.0e176L, 1.0e177L, 1.0e178L, 1.0e179L, // 170
439
+ 1.0e180L, 1.0e181L, 1.0e182L, 1.0e183L, 1.0e184L,
440
+ 1.0e185L, 1.0e186L, 1.0e187L, 1.0e188L, 1.0e189L, // 180
441
+ 1.0e190L, 1.0e191L, 1.0e192L, 1.0e193L, 1.0e194L,
442
+ 1.0e195L, 1.0e196L, 1.0e197L, 1.0e198L, 1.0e199L, // 190
443
+ 1.0e200L, 1.0e201L, 1.0e202L, 1.0e203L, 1.0e204L,
444
+ 1.0e205L, 1.0e206L, 1.0e207L, 1.0e208L, 1.0e209L, // 200
445
+ 1.0e210L, 1.0e211L, 1.0e212L, 1.0e213L, 1.0e214L,
446
+ 1.0e215L, 1.0e216L, 1.0e217L, 1.0e218L, 1.0e219L, // 210
447
+ 1.0e220L, 1.0e221L, 1.0e222L, 1.0e223L, 1.0e224L,
448
+ 1.0e225L, 1.0e226L, 1.0e227L, 1.0e228L, 1.0e229L, // 220
449
+ 1.0e230L, 1.0e231L, 1.0e232L, 1.0e233L, 1.0e234L,
450
+ 1.0e235L, 1.0e236L, 1.0e237L, 1.0e238L, 1.0e239L, // 230
451
+ 1.0e240L, 1.0e241L, 1.0e242L, 1.0e243L, 1.0e244L,
452
+ 1.0e245L, 1.0e246L, 1.0e247L, 1.0e248L, 1.0e249L, // 240
453
+ 1.0e250L, 1.0e251L, 1.0e252L, 1.0e253L, 1.0e254L,
454
+ 1.0e255L, 1.0e256L, 1.0e257L, 1.0e258L, 1.0e259L, // 250
455
+ 1.0e260L, 1.0e261L, 1.0e262L, 1.0e263L, 1.0e264L,
456
+ 1.0e265L, 1.0e266L, 1.0e267L, 1.0e268L, 1.0e269L, // 260
457
+ 1.0e270L, 1.0e271L, 1.0e272L, 1.0e273L, 1.0e274L,
458
+ 1.0e275L, 1.0e276L, 1.0e277L, 1.0e278L, 1.0e279L, // 270
459
+ 1.0e280L, 1.0e281L, 1.0e282L, 1.0e283L, 1.0e284L,
460
+ 1.0e285L, 1.0e286L, 1.0e287L, 1.0e288L, 1.0e289L, // 280
461
+ 1.0e290L, 1.0e291L, 1.0e292L, 1.0e293L, 1.0e294L,
462
+ 1.0e295L, 1.0e296L, 1.0e297L, 1.0e298L, 1.0e299L, // 290
463
+ 1.0e300L, 1.0e301L, 1.0e302L, 1.0e303L, 1.0e304L,
464
+ 1.0e305L, 1.0e306L, 1.0e307L, 1.0e308L, 1.0e309L, // 300
465
+ 1.0e310L, 1.0e311L, 1.0e312L, 1.0e313L, 1.0e314L,
466
+ 1.0e315L, 1.0e316L, 1.0e317L, 1.0e318L, 1.0e319L, // 310
467
+ 1.0e320L, 1.0e321L, 1.0e322L, 1.0e323L, 1.0e324L,
468
+ 1.0e325L, 1.0e326L, 1.0e327L, 1.0e328L, 1.0e329L, // 320
469
+ 1.0e330L, 1.0e331L, 1.0e332L, 1.0e333L, 1.0e334L,
470
+ 1.0e335L, 1.0e336L, 1.0e337L, 1.0e338L, 1.0e339L, // 330
471
+ 1.0e340L, 1.0e341L, 1.0e342L, 1.0e343L, 1.0e344L,
472
+ 1.0e345L, 1.0e346L, 1.0e347L, 1.0e348L, 1.0e349L, // 340
473
+ 1.0e350L, 1.0e351L, 1.0e352L, 1.0e353L, 1.0e354L,
474
+ 1.0e355L, 1.0e356L, 1.0e357L, 1.0e358L, 1.0e359L, // 350
475
+ 1.0e360L, 1.0e361L, 1.0e362L, 1.0e363L, 1.0e364L,
476
+ 1.0e365L, 1.0e366L, 1.0e367L, 1.0e368L, 1.0e369L, // 360
477
+ 1.0e370L, 1.0e371L, 1.0e372L, 1.0e373L, 1.0e374L,
478
+ 1.0e375L, 1.0e376L, 1.0e377L, 1.0e378L, 1.0e379L, // 370
479
+ 1.0e380L, 1.0e381L, 1.0e382L, 1.0e383L, 1.0e384L,
480
+ 1.0e385L, 1.0e386L, 1.0e387L, 1.0e388L, 1.0e389L, // 380
481
+ 1.0e390L, 1.0e391L, 1.0e392L, 1.0e393L, 1.0e394L,
482
+ 1.0e395L, 1.0e396L, 1.0e397L, 1.0e398L, 1.0e399L, // 390
483
+ 1.0e400L};
484
+
485
+ static VALUE parser_class;
486
+
487
+ // Works with extended unicode as well. \Uffffffff if support is desired in
488
+ // the future.
489
+ static size_t unicodeToUtf8(uint32_t code, byte *buf) {
490
+ byte *start = buf;
491
+
492
+ if (0x0000007F >= code) {
493
+ *buf++ = (byte)code;
494
+ } else if (0x000007FF >= code) {
495
+ *buf++ = 0xC0 | (code >> 6);
496
+ *buf++ = 0x80 | (0x3F & code);
497
+ } else if (0x0000FFFF >= code) {
498
+ *buf++ = 0xE0 | (code >> 12);
499
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
500
+ *buf++ = 0x80 | (0x3F & code);
501
+ } else if (0x001FFFFF >= code) {
502
+ *buf++ = 0xF0 | (code >> 18);
503
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
504
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
505
+ *buf++ = 0x80 | (0x3F & code);
506
+ } else if (0x03FFFFFF >= code) {
507
+ *buf++ = 0xF8 | (code >> 24);
508
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
509
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
510
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
511
+ *buf++ = 0x80 | (0x3F & code);
512
+ } else if (0x7FFFFFFF >= code) {
513
+ *buf++ = 0xFC | (code >> 30);
514
+ *buf++ = 0x80 | ((code >> 24) & 0x3F);
515
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
516
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
517
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
518
+ *buf++ = 0x80 | (0x3F & code);
519
+ }
520
+ return buf - start;
521
+ }
522
+
523
+ static void parser_reset(ojParser p) {
524
+ p->reader = 0;
525
+ memset(&p->num, 0, sizeof(p->num));
526
+ buf_reset(&p->key);
527
+ buf_reset(&p->buf);
528
+ p->map = value_map;
529
+ p->next_map = NULL;
530
+ p->depth = 0;
531
+ }
532
+
533
+ static void parse_error(ojParser p, const char *fmt, ...) {
534
+ va_list ap;
535
+ char buf[256];
536
+
537
+ va_start(ap, fmt);
538
+ vsnprintf(buf, sizeof(buf), fmt, ap);
539
+ va_end(ap);
540
+ rb_raise(oj_json_parser_error_class, "%s at %ld:%ld", buf, p->line, p->col);
541
+ }
542
+
543
+ static void byte_error(ojParser p, byte b) {
544
+ switch (p->map[256]) {
545
+ case 'N': // null_map
546
+ parse_error(p, "expected null");
547
+ break;
548
+ case 'T': // true_map
549
+ parse_error(p, "expected true");
550
+ break;
551
+ case 'F': // false_map
552
+ parse_error(p, "expected false");
553
+ break;
554
+ case 's': // string_map
555
+ parse_error(p, "invalid JSON character 0x%02x", b);
556
+ break;
557
+ default: parse_error(p, "unexpected character '%c' in '%c' mode", b, p->map[256]); break;
558
+ }
559
+ }
560
+
561
+ static void calc_num(ojParser p) {
562
+ switch (p->type) {
563
+ case OJ_INT:
564
+ if (p->num.neg) {
565
+ p->num.fixnum = -p->num.fixnum;
566
+ p->num.neg = false;
567
+ }
568
+ p->funcs[p->stack[p->depth]].add_int(p);
569
+ break;
570
+ case OJ_DECIMAL: {
571
+ long double d = (long double)p->num.fixnum;
572
+
573
+ if (p->num.neg) {
574
+ d = -d;
575
+ }
576
+ if (0 < p->num.shift) {
577
+ d /= pow_map[p->num.shift];
578
+ }
579
+ if (0 < p->num.exp) {
580
+ long double x;
581
+
582
+ if (MAX_POW < p->num.exp) {
583
+ x = powl(10.0L, (long double)p->num.exp);
584
+ } else {
585
+ x = pow_map[p->num.exp];
586
+ }
587
+ if (p->num.exp_neg) {
588
+ d /= x;
589
+ } else {
590
+ d *= x;
591
+ }
592
+ }
593
+ p->num.dub = d;
594
+ p->funcs[p->stack[p->depth]].add_float(p);
595
+ break;
596
+ }
597
+ case OJ_BIG: p->funcs[p->stack[p->depth]].add_big(p);
598
+ default:
599
+ // nothing to do
600
+ break;
601
+ }
602
+ }
603
+
604
+ static void big_change(ojParser p) {
605
+ char buf[32];
606
+ int64_t i = p->num.fixnum;
607
+ int len = 0;
608
+
609
+ buf[sizeof(buf) - 1] = '\0';
610
+ p->buf.tail = p->buf.head;
611
+ switch (p->type) {
612
+ case OJ_INT:
613
+ // If an int then it will fit in the num.raw so no need to check length;
614
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10) {
615
+ buf[len] = '0' + (i % 10);
616
+ }
617
+ if (p->num.neg) {
618
+ buf[len] = '-';
619
+ len--;
620
+ }
621
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
622
+ p->type = OJ_BIG;
623
+ break;
624
+ case OJ_DECIMAL: {
625
+ int shift = p->num.shift;
626
+
627
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10, shift--) {
628
+ if (0 == shift) {
629
+ buf[len] = '.';
630
+ len--;
631
+ }
632
+ buf[len] = '0' + (i % 10);
633
+ }
634
+ if (p->num.neg) {
635
+ buf[len] = '-';
636
+ len--;
637
+ }
638
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
639
+ if (0 < p->num.exp) {
640
+ int x = p->num.exp;
641
+ int d;
642
+ bool started = false;
643
+
644
+ buf_append(&p->buf, 'e');
645
+ if (0 < p->num.exp_neg) {
646
+ buf_append(&p->buf, '-');
647
+ }
648
+ for (int div = 1000; 0 < div; div /= 10) {
649
+ d = x / div % 10;
650
+ if (started || 0 < d) {
651
+ buf_append(&p->buf, '0' + d);
652
+ }
653
+ }
654
+ }
655
+ p->type = OJ_BIG;
656
+ break;
657
+ }
658
+ default: break;
659
+ }
660
+ }
661
+
662
+ static void parse(ojParser p, const byte *json) {
663
+ const byte *start;
664
+ const byte *b = json;
665
+
666
+ #if DEBUG
667
+ printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
668
+ #endif
669
+ for (; '\0' != *b; b++) {
670
+ switch (p->map[*b]) {
671
+ case SKIP_NEWLINE:
672
+ p->line++;
673
+ p->col = b - json;
674
+ b++;
675
+ #ifdef SPACE_JUMP
676
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
677
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
678
+ b += 2;
679
+ }
680
+ #endif
681
+ for (; SKIP_CHAR == space_map[*b]; b++) {
682
+ }
683
+ b--;
684
+ break;
685
+ case COLON_COLON: p->map = value_map; break;
686
+ case SKIP_CHAR: break;
687
+ case KEY_QUOTE:
688
+ b++;
689
+ p->key.tail = p->key.head;
690
+ start = b;
691
+ for (; STR_OK == string_map[*b]; b++) {
692
+ }
693
+ buf_append_string(&p->key, (const char *)start, b - start);
694
+ if ('"' == *b) {
695
+ p->map = colon_map;
696
+ break;
697
+ }
698
+ b--;
699
+ p->map = string_map;
700
+ p->next_map = colon_map;
701
+ break;
702
+ case AFTER_COMMA:
703
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
704
+ p->map = key_map;
705
+ } else {
706
+ p->map = comma_map;
707
+ }
708
+ break;
709
+ case VAL_QUOTE:
710
+ b++;
711
+ start = b;
712
+ p->buf.tail = p->buf.head;
713
+ for (; STR_OK == string_map[*b]; b++) {
714
+ }
715
+ buf_append_string(&p->buf, (const char *)start, b - start);
716
+ if ('"' == *b) {
717
+ p->funcs[p->stack[p->depth]].add_str(p);
718
+ p->map = (0 == p->depth) ? value_map : after_map;
719
+ break;
720
+ }
721
+ b--;
722
+ p->map = string_map;
723
+ p->next_map = (0 == p->depth) ? value_map : after_map;
724
+ break;
725
+ case OPEN_OBJECT:
726
+ p->funcs[p->stack[p->depth]].open_object(p);
727
+ p->depth++;
728
+ p->stack[p->depth] = OBJECT_FUN;
729
+ p->map = key1_map;
730
+ break;
731
+ case NUM_CLOSE_OBJECT:
732
+ calc_num(p);
733
+ // flow through
734
+ case CLOSE_OBJECT:
735
+ p->map = (1 == p->depth) ? value_map : after_map;
736
+ if (p->depth <= 0 || OBJECT_FUN != p->stack[p->depth]) {
737
+ p->col = b - json - p->col + 1;
738
+ parse_error(p, "unexpected object close");
739
+ return;
740
+ }
741
+ p->depth--;
742
+ p->funcs[p->stack[p->depth]].close_object(p);
743
+ break;
744
+ case OPEN_ARRAY:
745
+ p->funcs[p->stack[p->depth]].open_array(p);
746
+ p->depth++;
747
+ p->stack[p->depth] = ARRAY_FUN;
748
+ p->map = value_map;
749
+ break;
750
+ case NUM_CLOSE_ARRAY:
751
+ calc_num(p);
752
+ // flow through
753
+ case CLOSE_ARRAY:
754
+ p->map = (1 == p->depth) ? value_map : after_map;
755
+ if (p->depth <= 0 || ARRAY_FUN != p->stack[p->depth]) {
756
+ p->col = b - json - p->col + 1;
757
+ parse_error(p, "unexpected array close");
758
+ return;
759
+ }
760
+ p->depth--;
761
+ p->funcs[p->stack[p->depth]].close_array(p);
762
+ break;
763
+ case NUM_COMMA:
764
+ calc_num(p);
765
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
766
+ p->map = key_map;
767
+ } else {
768
+ p->map = comma_map;
769
+ }
770
+ break;
771
+ case VAL0:
772
+ p->type = OJ_INT;
773
+ p->num.fixnum = 0;
774
+ p->num.neg = false;
775
+ p->num.shift = 0;
776
+ p->num.len = 0;
777
+ p->num.exp = 0;
778
+ p->num.exp_neg = false;
779
+ p->map = zero_map;
780
+ break;
781
+ case VAL_NEG:
782
+ p->type = OJ_INT;
783
+ p->num.fixnum = 0;
784
+ p->num.neg = true;
785
+ p->num.shift = 0;
786
+ p->num.len = 0;
787
+ p->num.exp = 0;
788
+ p->num.exp_neg = false;
789
+ p->map = neg_map;
790
+ break;
791
+ ;
792
+ case VAL_DIGIT:
793
+ p->type = OJ_INT;
794
+ p->num.fixnum = 0;
795
+ p->num.neg = false;
796
+ p->num.shift = 0;
797
+ p->num.exp = 0;
798
+ p->num.exp_neg = false;
799
+ p->num.len = 0;
800
+ p->map = digit_map;
801
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
802
+ uint64_t x = (uint64_t)p->num.fixnum * 10 + (uint64_t)(*b - '0');
803
+
804
+ // Tried just checking for an int less than zero but that
805
+ // fails when optimization is on for some reason with the
806
+ // clang compiler so us a bit mask instead.
807
+ if (x < BIG_LIMIT) {
808
+ p->num.fixnum = (int64_t)x;
809
+ } else {
810
+ big_change(p);
811
+ p->map = big_digit_map;
812
+ break;
813
+ }
814
+ }
815
+ b--;
816
+ break;
817
+ case NUM_DIGIT:
818
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
819
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
820
+
821
+ if (x < BIG_LIMIT) {
822
+ p->num.fixnum = (int64_t)x;
823
+ } else {
824
+ big_change(p);
825
+ p->map = big_digit_map;
826
+ break;
827
+ }
828
+ }
829
+ b--;
830
+ break;
831
+ case NUM_DOT:
832
+ p->type = OJ_DECIMAL;
833
+ p->map = dot_map;
834
+ break;
835
+ case NUM_FRAC:
836
+ p->map = frac_map;
837
+ for (; NUM_FRAC == frac_map[*b]; b++) {
838
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
839
+
840
+ if (x < FRAC_LIMIT) {
841
+ p->num.fixnum = (int64_t)x;
842
+ p->num.shift++;
843
+ } else {
844
+ big_change(p);
845
+ p->map = big_frac_map;
846
+ break;
847
+ }
848
+ }
849
+ b--;
850
+ break;
851
+ case FRAC_E:
852
+ p->type = OJ_DECIMAL;
853
+ p->map = exp_sign_map;
854
+ break;
855
+ case NUM_ZERO: p->map = zero_map; break;
856
+ case NEG_DIGIT:
857
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
858
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
859
+
860
+ if (x < BIG_LIMIT) {
861
+ p->num.fixnum = (int64_t)x;
862
+ } else {
863
+ big_change(p);
864
+ p->map = big_digit_map;
865
+ break;
866
+ }
867
+ }
868
+ b--;
869
+ p->map = digit_map;
870
+ break;
871
+ case EXP_SIGN:
872
+ p->num.exp_neg = ('-' == *b);
873
+ p->map = exp_zero_map;
874
+ break;
875
+ case EXP_DIGIT:
876
+ p->map = exp_map;
877
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
878
+ int16_t x = p->num.exp * 10 + (int16_t)(*b - '0');
879
+
880
+ if (x <= MAX_EXP) {
881
+ p->num.exp = x;
882
+ } else {
883
+ big_change(p);
884
+ p->map = big_exp_map;
885
+ break;
886
+ }
887
+ }
888
+ b--;
889
+ break;
890
+ case BIG_DIGIT:
891
+ start = b;
892
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
893
+ }
894
+ buf_append_string(&p->buf, (const char *)start, b - start);
895
+ b--;
896
+ break;
897
+ case BIG_DOT:
898
+ buf_append(&p->buf, '.');
899
+ p->map = big_dot_map;
900
+ break;
901
+ case BIG_FRAC:
902
+ p->map = big_frac_map;
903
+ start = b;
904
+ for (; NUM_FRAC == frac_map[*b]; b++) {
905
+ }
906
+ buf_append_string(&p->buf, (const char *)start, b - start);
907
+ b--;
908
+ break;
909
+ case BIG_E:
910
+ buf_append(&p->buf, *b);
911
+ p->map = big_exp_sign_map;
912
+ break;
913
+ case BIG_EXP_SIGN:
914
+ buf_append(&p->buf, *b);
915
+ p->map = big_exp_zero_map;
916
+ break;
917
+ case BIG_EXP:
918
+ start = b;
919
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
920
+ }
921
+ buf_append_string(&p->buf, (const char *)start, b - start);
922
+ b--;
923
+ p->map = big_exp_map;
924
+ break;
925
+ case NUM_SPC: calc_num(p); break;
926
+ case NUM_NEWLINE: calc_num(p); b++;
927
+ #ifdef SPACE_JUMP
928
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
929
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
930
+ b += 2;
931
+ }
932
+ #endif
933
+ for (; SKIP_CHAR == space_map[*b]; b++) {
934
+ }
935
+ b--;
936
+ break;
937
+ case STR_OK:
938
+ start = b;
939
+ for (; STR_OK == string_map[*b]; b++) {
940
+ }
941
+ if (':' == p->next_map[256]) {
942
+ buf_append_string(&p->key, (const char *)start, b - start);
943
+ } else {
944
+ buf_append_string(&p->buf, (const char *)start, b - start);
945
+ }
946
+ if ('"' == *b) {
947
+ p->map = p->next_map;
948
+ break;
949
+ }
950
+ b--;
951
+ break;
952
+ case STR_SLASH: p->map = esc_map; break;
953
+ case STR_QUOTE: p->map = p->next_map; break;
954
+ case ESC_U:
955
+ p->map = u_map;
956
+ p->ri = 0;
957
+ p->ucode = 0;
958
+ break;
959
+ case U_OK:
960
+ p->ri++;
961
+ p->ucode = p->ucode << 4 | (uint32_t)hex_map[*b];
962
+ if (4 <= p->ri) {
963
+ byte utf8[8];
964
+ size_t ulen = unicodeToUtf8(p->ucode, utf8);
965
+
966
+ if (0 < ulen) {
967
+ if (':' == p->next_map[256]) {
968
+ buf_append_string(&p->key, (const char *)utf8, ulen);
969
+ } else {
970
+ buf_append_string(&p->buf, (const char *)utf8, ulen);
971
+ }
972
+ } else {
973
+ parse_error(p, "invalid unicode");
974
+ return;
975
+ }
976
+ p->map = string_map;
977
+ }
978
+ break;
979
+ case ESC_OK:
980
+ if (':' == p->next_map[256]) {
981
+ buf_append(&p->key, esc_byte_map[*b]);
982
+ } else {
983
+ buf_append(&p->buf, esc_byte_map[*b]);
984
+ }
985
+ p->map = string_map;
986
+ break;
987
+ case UTF1:
988
+ p->ri = 1;
989
+ p->map = utf_map;
990
+ if (':' == p->next_map[256]) {
991
+ buf_append(&p->key, *b);
992
+ } else {
993
+ buf_append(&p->buf, *b);
994
+ }
995
+ break;
996
+ case UTF2:
997
+ p->ri = 2;
998
+ p->map = utf_map;
999
+ if (':' == p->next_map[256]) {
1000
+ buf_append(&p->key, *b);
1001
+ } else {
1002
+ buf_append(&p->buf, *b);
1003
+ }
1004
+ break;
1005
+ case UTF3:
1006
+ p->ri = 3;
1007
+ p->map = utf_map;
1008
+ if (':' == p->next_map[256]) {
1009
+ buf_append(&p->key, *b);
1010
+ } else {
1011
+ buf_append(&p->buf, *b);
1012
+ }
1013
+ break;
1014
+ case UTFX:
1015
+ p->ri--;
1016
+ if (':' == p->next_map[256]) {
1017
+ buf_append(&p->key, *b);
1018
+ } else {
1019
+ buf_append(&p->buf, *b);
1020
+ }
1021
+ if (p->ri <= 0) {
1022
+ p->map = string_map;
1023
+ }
1024
+ break;
1025
+ case VAL_NULL:
1026
+ if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
1027
+ b += 3;
1028
+ p->funcs[p->stack[p->depth]].add_null(p);
1029
+ p->map = (0 == p->depth) ? value_map : after_map;
1030
+ break;
1031
+ }
1032
+ p->ri = 0;
1033
+ *p->token = *b++;
1034
+ for (int i = 1; i < 4; i++) {
1035
+ if ('\0' == *b) {
1036
+ p->ri = i;
1037
+ break;
1038
+ } else {
1039
+ p->token[i] = *b++;
1040
+ }
1041
+ }
1042
+ if (0 < p->ri) {
1043
+ p->map = null_map;
1044
+ b--;
1045
+ break;
1046
+ }
1047
+ p->col = b - json - p->col;
1048
+ parse_error(p, "expected null");
1049
+ return;
1050
+ case VAL_TRUE:
1051
+ if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
1052
+ b += 3;
1053
+ p->funcs[p->stack[p->depth]].add_true(p);
1054
+ p->map = (0 == p->depth) ? value_map : after_map;
1055
+ break;
1056
+ }
1057
+ p->ri = 0;
1058
+ *p->token = *b++;
1059
+ for (int i = 1; i < 4; i++) {
1060
+ if ('\0' == *b) {
1061
+ p->ri = i;
1062
+ break;
1063
+ } else {
1064
+ p->token[i] = *b++;
1065
+ }
1066
+ }
1067
+ if (0 < p->ri) {
1068
+ p->map = true_map;
1069
+ b--;
1070
+ break;
1071
+ }
1072
+ p->col = b - json - p->col;
1073
+ parse_error(p, "expected true");
1074
+ return;
1075
+ case VAL_FALSE:
1076
+ if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1077
+ b += 4;
1078
+ p->funcs[p->stack[p->depth]].add_false(p);
1079
+ p->map = (0 == p->depth) ? value_map : after_map;
1080
+ break;
1081
+ }
1082
+ p->ri = 0;
1083
+ *p->token = *b++;
1084
+ for (int i = 1; i < 5; i++) {
1085
+ if ('\0' == *b) {
1086
+ p->ri = i;
1087
+ break;
1088
+ } else {
1089
+ p->token[i] = *b++;
1090
+ }
1091
+ }
1092
+ if (0 < p->ri) {
1093
+ p->map = false_map;
1094
+ b--;
1095
+ break;
1096
+ }
1097
+ p->col = b - json - p->col;
1098
+ parse_error(p, "expected false");
1099
+ return;
1100
+ case TOKEN_OK:
1101
+ p->token[p->ri] = *b;
1102
+ p->ri++;
1103
+ switch (p->map[256]) {
1104
+ case 'N':
1105
+ if (4 == p->ri) {
1106
+ if (0 != strncmp("null", p->token, 4)) {
1107
+ p->col = b - json - p->col;
1108
+ parse_error(p, "expected null");
1109
+ return;
1110
+ }
1111
+ p->funcs[p->stack[p->depth]].add_null(p);
1112
+ p->map = (0 == p->depth) ? value_map : after_map;
1113
+ }
1114
+ break;
1115
+ case 'F':
1116
+ if (5 == p->ri) {
1117
+ if (0 != strncmp("false", p->token, 5)) {
1118
+ p->col = b - json - p->col;
1119
+ parse_error(p, "expected false");
1120
+ return;
1121
+ }
1122
+ p->funcs[p->stack[p->depth]].add_false(p);
1123
+ p->map = (0 == p->depth) ? value_map : after_map;
1124
+ }
1125
+ break;
1126
+ case 'T':
1127
+ if (4 == p->ri) {
1128
+ if (0 != strncmp("true", p->token, 4)) {
1129
+ p->col = b - json - p->col;
1130
+ parse_error(p, "expected true");
1131
+ return;
1132
+ }
1133
+ p->funcs[p->stack[p->depth]].add_true(p);
1134
+ p->map = (0 == p->depth) ? value_map : after_map;
1135
+ }
1136
+ break;
1137
+ default:
1138
+ p->col = b - json - p->col;
1139
+ parse_error(p, "parse error");
1140
+ return;
1141
+ }
1142
+ break;
1143
+ case CHAR_ERR: byte_error(p, *b); return;
1144
+ default: break;
1145
+ }
1146
+ if (0 == p->depth && 'v' == p->map[256] && p->just_one) {
1147
+ p->map = trail_map;
1148
+ }
1149
+ }
1150
+ if (0 == p->depth) {
1151
+ switch (p->map[256]) {
1152
+ case '0':
1153
+ case 'd':
1154
+ case 'f':
1155
+ case 'z':
1156
+ case 'X':
1157
+ case 'D':
1158
+ case 'g':
1159
+ case 'B':
1160
+ case 'Y': calc_num(p); break;
1161
+ }
1162
+ }
1163
+ return;
1164
+ }
1165
+
1166
+ static void parser_free(void *ptr) {
1167
+ ojParser p;
1168
+
1169
+ if (0 == ptr) {
1170
+ return;
1171
+ }
1172
+ p = (ojParser)ptr;
1173
+ buf_cleanup(&p->key);
1174
+ buf_cleanup(&p->buf);
1175
+ p->free(p);
1176
+ xfree(ptr);
1177
+ }
1178
+
1179
+ static void parser_mark(void *ptr) {
1180
+ if (NULL != ptr) {
1181
+ ojParser p = (ojParser)ptr;
1182
+
1183
+ if (0 != p->reader) {
1184
+ rb_gc_mark(p->reader);
1185
+ }
1186
+ p->mark(p);
1187
+ }
1188
+ }
1189
+
1190
+ extern void oj_set_parser_validator(ojParser p);
1191
+ extern void oj_set_parser_saj(ojParser p);
1192
+ extern void oj_set_parser_usual(ojParser p);
1193
+ extern void oj_set_parser_debug(ojParser p);
1194
+
1195
+ /* Document-method: new
1196
+ * call-seq: new(mode=nil)
1197
+ *
1198
+ * Creates a new Parser with the specified mode. If no mode is provided
1199
+ * validation is assumed.
1200
+ */
1201
+ static VALUE parser_new(VALUE self, VALUE mode) {
1202
+ ojParser p = ALLOC(struct _ojParser);
1203
+
1204
+ #if HAVE_RB_EXT_RACTOR_SAFE
1205
+ // This doesn't seem to do anything.
1206
+ rb_ext_ractor_safe(true);
1207
+ #endif
1208
+ memset(p, 0, sizeof(struct _ojParser));
1209
+ buf_init(&p->key);
1210
+ buf_init(&p->buf);
1211
+
1212
+ p->map = value_map;
1213
+ if (Qnil == mode) {
1214
+ oj_set_parser_validator(p);
1215
+ } else {
1216
+ const char *ms = NULL;
1217
+
1218
+ switch (rb_type(mode)) {
1219
+ case RUBY_T_SYMBOL:
1220
+ mode = rb_sym_to_s(mode);
1221
+ // fall through
1222
+ case RUBY_T_STRING: ms = RSTRING_PTR(mode); break;
1223
+ default:
1224
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1225
+ }
1226
+ if (0 == strcmp("usual", ms) || 0 == strcmp("standard", ms) || 0 == strcmp("strict", ms) ||
1227
+ 0 == strcmp("compat", ms)) {
1228
+ oj_set_parser_usual(p);
1229
+ } else if (0 == strcmp("object", ms)) {
1230
+ // TBD
1231
+ } else if (0 == strcmp("saj", ms)) {
1232
+ oj_set_parser_saj(p);
1233
+ } else if (0 == strcmp("validate", ms)) {
1234
+ oj_set_parser_validator(p);
1235
+ } else if (0 == strcmp("debug", ms)) {
1236
+ oj_set_parser_debug(p);
1237
+ } else {
1238
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1239
+ }
1240
+ }
1241
+ return Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1242
+ }
1243
+
1244
+ /* Document-method: method_missing(value)
1245
+ * call-seq: method_missing(value)
1246
+ *
1247
+ * Methods not handled by the parser are passed to the delegate. The methods
1248
+ * supported by delegate are:
1249
+ *
1250
+ * - *:validate*
1251
+ * - no options
1252
+ *
1253
+ * - *:saj*
1254
+ * - _cache_keys=_ sets the value of the _cache_keys_ flag.
1255
+ * - _cache_keys_ returns the value of the _cache_keys_ flag.
1256
+ * - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than that length are cached.
1257
+ * - _cache_strings_ returns the value of the _cache_strings_ integer value.
1258
+ * - _handler=_ sets the SAJ handler
1259
+ * - _handler_ returns the SAJ handler
1260
+ *
1261
+ * - *:usual*
1262
+ * - _cache_keys=_ sets the value of the _cache_keys_ flag.
1263
+ * - _cache_keys_ returns the value of the _cache_keys_ flag.
1264
+ * - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than that length are cached.
1265
+ * - _cache_strings_ returns the value of the _cache_strings_ integer value.
1266
+ * - _capacity=_ sets the capacity of the parser. The parser grows automatically but can be updated directly with this call.
1267
+ * - _capacity_ returns the current capacity of the parser's internal stack.
1268
+ * - _create_id_ returns the value _create_id_ or _nil_ if there is no _create_id_.
1269
+ * - _create_id=_ sets the value _create_id_ or if _nil_ unsets it. Parsed JSON objects that include the specified element use the element value as the name of the class to create an object from instead of a Hash.
1270
+ * - _decimal=_ sets the approach to how decimals are parser. If _:auto_ then the decimals with significant digits are 16 or less are Floats and long ones are BigDecimal. _:ruby_ uses a call to Ruby to convert a string to a Float. _:float_ always generates a Float. _:bigdecimal_ always results in a BigDecimal.
1271
+ * - _decimal_ returns the value of the decimal conversion option which can be :auto (default), :ruby, :float, or :bigdecimal.
1272
+ * - _ignore_json_create_ returns the value of the _ignore_json_create_ flag.
1273
+ * - _ignore_json_create=_ sets the value of the _ignore_json_create_ flag. When set the class json_create method is ignored on parsing in favor of creating an instance and populating directly.
1274
+ * - _missing_class_ return the value of the _missing_class_ indicator.
1275
+ * - _missing_class=_ sets the value of the _missing_class_ flag. Valid values are _:auto_ which creates any missing classes on parse, :ignore which ignores and continues as a Hash (default), and :raise which raises an exception if the class is not found.
1276
+ * - _omit_null=_ sets the _omit_null_ flag. If true then null values in a map or object are omitted from the resulting Hash or Object.
1277
+ * - _omit_null_ returns the value of the _omit_null_ flag.
1278
+ * - _symbol_keys=_ sets the flag that indicates Hash keys should be parsed to Symbols versus Strings.
1279
+ * - _symbol_keys_ returns the value of the _symbol_keys_ flag.
1280
+ */
1281
+ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
1282
+ ojParser p = (ojParser)DATA_PTR(self);
1283
+ const char * key = NULL;
1284
+ volatile VALUE rkey = *argv;
1285
+ volatile VALUE rv = Qnil;
1286
+
1287
+ #if HAVE_RB_EXT_RACTOR_SAFE
1288
+ // This doesn't seem to do anything.
1289
+ rb_ext_ractor_safe(true);
1290
+ #endif
1291
+ switch (rb_type(rkey)) {
1292
+ case RUBY_T_SYMBOL:
1293
+ rkey = rb_sym_to_s(rkey);
1294
+ // fall through
1295
+ case RUBY_T_STRING: key = rb_string_value_ptr(&rkey); break;
1296
+ default: rb_raise(rb_eArgError, "option method must be a symbol or string");
1297
+ }
1298
+ if (1 < argc) {
1299
+ rv = argv[1];
1300
+ }
1301
+ return p->option(p, key, rv);
1302
+ }
1303
+
1304
+ /* Document-method: parse(json)
1305
+ * call-seq: parse(json)
1306
+ *
1307
+ * Parse a JSON string.
1308
+ *
1309
+ * Returns the result according to the delegate of the parser.
1310
+ */
1311
+ static VALUE parser_parse(VALUE self, VALUE json) {
1312
+ ojParser p = (ojParser)DATA_PTR(self);
1313
+
1314
+ Check_Type(json, T_STRING);
1315
+ parser_reset(p);
1316
+ p->start(p);
1317
+ parse(p, (const byte *)rb_string_value_ptr(&json));
1318
+
1319
+ return p->result(p);
1320
+ }
1321
+
1322
+ static VALUE load_rescue(VALUE self, VALUE x) {
1323
+ // Normal EOF. No action needed other than to stop loading.
1324
+ return Qfalse;
1325
+ }
1326
+
1327
+ static VALUE load(VALUE self) {
1328
+ ojParser p = (ojParser)DATA_PTR(self);
1329
+ volatile VALUE rbuf = rb_str_new2("");
1330
+
1331
+ p->start(p);
1332
+ while (true) {
1333
+ rb_funcall(p->reader, oj_readpartial_id, 2, INT2NUM(16385), rbuf);
1334
+ if (0 < RSTRING_LEN(rbuf)) {
1335
+ parse(p, (byte *)StringValuePtr(rbuf));
1336
+ }
1337
+ }
1338
+ return Qtrue;
1339
+ }
1340
+
1341
+ /* Document-method: load(reader)
1342
+ * call-seq: load(reader)
1343
+ *
1344
+ * Parse a JSON stream.
1345
+ *
1346
+ * Returns the result according to the delegate of the parser.
1347
+ */
1348
+ static VALUE parser_load(VALUE self, VALUE reader) {
1349
+ ojParser p = (ojParser)DATA_PTR(self);
1350
+
1351
+ parser_reset(p);
1352
+ p->reader = reader;
1353
+ rb_rescue2(load, self, load_rescue, Qnil, rb_eEOFError, 0);
1354
+
1355
+ return p->result(p);
1356
+ }
1357
+
1358
+ /* Document-method: file(filename)
1359
+ * call-seq: file(filename)
1360
+ *
1361
+ * Parse a JSON file.
1362
+ *
1363
+ * Returns the result according to the delegate of the parser.
1364
+ */
1365
+ static VALUE parser_file(VALUE self, VALUE filename) {
1366
+ ojParser p = (ojParser)DATA_PTR(self);
1367
+ const char *path;
1368
+ int fd;
1369
+
1370
+ Check_Type(filename, T_STRING);
1371
+ path = rb_string_value_ptr(&filename);
1372
+
1373
+ parser_reset(p);
1374
+ p->start(p);
1375
+
1376
+ if (0 > (fd = open(path, O_RDONLY))) {
1377
+ rb_raise(rb_eIOError, "error opening %s", path);
1378
+ }
1379
+ #if USE_THREAD_LIMIT
1380
+ struct stat info;
1381
+ // st_size will be 0 if not a file
1382
+ if (0 == fstat(fd, &info) && USE_THREAD_LIMIT < info.st_size) {
1383
+ // Use threaded version.
1384
+ // TBD only if has pthreads
1385
+ // TBD parse_large(p, fd);
1386
+ return p->result(p);
1387
+ }
1388
+ #endif
1389
+ byte buf[16385];
1390
+ size_t size = sizeof(buf) - 1;
1391
+ size_t rsize;
1392
+
1393
+ while (true) {
1394
+ if (0 < (rsize = read(fd, buf, size))) {
1395
+ buf[rsize] = '\0';
1396
+ parse(p, buf);
1397
+ }
1398
+ if (rsize <= 0) {
1399
+ if (0 != rsize) {
1400
+ rb_raise(rb_eIOError, "error reading from %s", path);
1401
+ }
1402
+ break;
1403
+ }
1404
+ }
1405
+ return p->result(p);
1406
+ }
1407
+
1408
+ /* Document-method: just_one
1409
+ * call-seq: just_one
1410
+ *
1411
+ * Returns the current state of the just_one [_Boolean_] option.
1412
+ */
1413
+ static VALUE parser_just_one(VALUE self) {
1414
+ ojParser p = (ojParser)DATA_PTR(self);
1415
+
1416
+ return p->just_one ? Qtrue : Qfalse;
1417
+ }
1418
+
1419
+ /* Document-method: just_one=
1420
+ * call-seq: just_one=(value)
1421
+ *
1422
+ * Sets the *just_one* option which limits the parsing of a string or or
1423
+ * stream to a single JSON element.
1424
+ *
1425
+ * Returns the current state of the just_one [_Boolean_] option.
1426
+ */
1427
+ static VALUE parser_just_one_set(VALUE self, VALUE v) {
1428
+ ojParser p = (ojParser)DATA_PTR(self);
1429
+
1430
+ p->just_one = (Qtrue == v);
1431
+
1432
+ return p->just_one ? Qtrue : Qfalse;
1433
+ }
1434
+
1435
+ static VALUE usual_parser = Qundef;
1436
+
1437
+ /* Document-method: usual
1438
+ * call-seq: usual
1439
+ *
1440
+ * Returns the default usual parser. Note the default usual parser can not be
1441
+ * used concurrently in more than one thread.
1442
+ */
1443
+ static VALUE parser_usual(VALUE self) {
1444
+ if (Qundef == usual_parser) {
1445
+ ojParser p = ALLOC(struct _ojParser);
1446
+
1447
+ memset(p, 0, sizeof(struct _ojParser));
1448
+ buf_init(&p->key);
1449
+ buf_init(&p->buf);
1450
+ p->map = value_map;
1451
+ oj_set_parser_usual(p);
1452
+ usual_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1453
+ rb_gc_register_address(&usual_parser);
1454
+ }
1455
+ return usual_parser;
1456
+ }
1457
+
1458
+ static VALUE saj_parser = Qundef;
1459
+
1460
+ /* Document-method: saj
1461
+ * call-seq: saj
1462
+ *
1463
+ * Returns the default saj parser. Note the default SAJ parser can not be used
1464
+ * concurrently in more than one thread.
1465
+ */
1466
+ static VALUE parser_saj(VALUE self) {
1467
+ if (Qundef == saj_parser) {
1468
+ ojParser p = ALLOC(struct _ojParser);
1469
+
1470
+ memset(p, 0, sizeof(struct _ojParser));
1471
+ buf_init(&p->key);
1472
+ buf_init(&p->buf);
1473
+ p->map = value_map;
1474
+ oj_set_parser_saj(p);
1475
+ saj_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1476
+ rb_gc_register_address(&saj_parser);
1477
+ }
1478
+ return saj_parser;
1479
+ }
1480
+
1481
+ static VALUE validate_parser = Qundef;
1482
+
1483
+ /* Document-method: validate
1484
+ * call-seq: validate
1485
+ *
1486
+ * Returns the default validate parser.
1487
+ */
1488
+ static VALUE parser_validate(VALUE self) {
1489
+ if (Qundef == validate_parser) {
1490
+ ojParser p = ALLOC(struct _ojParser);
1491
+
1492
+ memset(p, 0, sizeof(struct _ojParser));
1493
+ buf_init(&p->key);
1494
+ buf_init(&p->buf);
1495
+ p->map = value_map;
1496
+ oj_set_parser_validator(p);
1497
+ validate_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
1498
+ rb_gc_register_address(&validate_parser);
1499
+ }
1500
+ return validate_parser;
1501
+ }
1502
+
1503
+ /* Document-class: Oj::Parser
1504
+ *
1505
+ * A reusable parser that makes use of named delegates to determine the
1506
+ * handling of parsed data. Delegates are available for validation, a callback
1507
+ * parser (SAJ), and a usual delegate that builds Ruby objects as parsing
1508
+ * proceeds.
1509
+ *
1510
+ * This parser is considerably faster than the older Oj.parse call and
1511
+ * isolates options to just the parser so that other parts of the code are not
1512
+ * forced to use the same options.
1513
+ */
1514
+ void oj_parser_init() {
1515
+ parser_class = rb_define_class_under(Oj, "Parser", rb_cObject);
1516
+ rb_define_module_function(parser_class, "new", parser_new, 1);
1517
+ rb_define_method(parser_class, "parse", parser_parse, 1);
1518
+ rb_define_method(parser_class, "load", parser_load, 1);
1519
+ rb_define_method(parser_class, "file", parser_file, 1);
1520
+ rb_define_method(parser_class, "just_one", parser_just_one, 0);
1521
+ rb_define_method(parser_class, "just_one=", parser_just_one_set, 1);
1522
+ rb_define_method(parser_class, "method_missing", parser_missing, -1);
1523
+
1524
+ rb_define_module_function(parser_class, "usual", parser_usual, 0);
1525
+ rb_define_module_function(parser_class, "saj", parser_saj, 0);
1526
+ rb_define_module_function(parser_class, "validate", parser_validate, 0);
1527
+ }