oj 3.12.1 → 3.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -2
- data/ext/oj/buf.h +9 -0
- data/ext/oj/cache.c +187 -0
- data/ext/oj/cache.h +20 -0
- data/ext/oj/compat.c +8 -22
- data/ext/oj/custom.c +14 -13
- data/ext/oj/debug.c +131 -0
- data/ext/oj/dump.c +50 -56
- data/ext/oj/dump_compat.c +3 -3
- data/ext/oj/dump_object.c +9 -9
- data/ext/oj/dump_strict.c +3 -3
- data/ext/oj/err.h +19 -0
- data/ext/oj/extconf.rb +4 -0
- data/ext/oj/fast.c +6 -17
- data/ext/oj/intern.c +398 -0
- data/ext/oj/intern.h +27 -0
- data/ext/oj/mimic_json.c +9 -9
- data/ext/oj/object.c +10 -58
- data/ext/oj/odd.c +1 -1
- data/ext/oj/oj.c +164 -106
- data/ext/oj/oj.h +2 -2
- data/ext/oj/parse.c +4 -4
- data/ext/oj/parser.c +1511 -0
- data/ext/oj/parser.h +90 -0
- data/ext/oj/rails.c +5 -5
- data/ext/oj/resolve.c +2 -20
- data/ext/oj/saj2.c +346 -0
- data/ext/oj/scp.c +1 -1
- data/ext/oj/sparse.c +1 -1
- data/ext/oj/stream_writer.c +3 -3
- data/ext/oj/strict.c +10 -27
- data/ext/oj/usual.c +1222 -0
- data/ext/oj/validate.c +50 -0
- data/ext/oj/wab.c +9 -17
- data/lib/oj/mimic.rb +1 -1
- data/lib/oj/version.rb +1 -1
- data/pages/Modes.md +2 -0
- data/pages/Options.md +23 -5
- data/pages/Parser.md +309 -0
- data/test/foo.rb +2 -9
- data/test/perf_parser.rb +184 -0
- data/test/test_parser.rb +27 -0
- data/test/test_parser_saj.rb +245 -0
- data/test/test_parser_usual.rb +213 -0
- metadata +23 -6
- data/ext/oj/hash.c +0 -168
- data/ext/oj/hash.h +0 -21
- data/ext/oj/hash_test.c +0 -491
data/ext/oj/oj.h
CHANGED
@@ -143,7 +143,7 @@ typedef struct _options {
|
|
143
143
|
char safe; // YesNo
|
144
144
|
char sec_prec_set; // boolean (0 or 1)
|
145
145
|
char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
|
146
|
-
char cache_keys; //
|
146
|
+
char cache_keys; // YesNo
|
147
147
|
char cache_str; // string short than or equal to this are cache
|
148
148
|
int64_t int_range_min; // dump numbers below as string
|
149
149
|
int64_t int_range_max; // dump numbers above as string
|
@@ -245,6 +245,7 @@ extern VALUE oj_compat_parse_cstr(int argc, VALUE *argv, char *json, size_t len)
|
|
245
245
|
extern VALUE oj_object_parse_cstr(int argc, VALUE *argv, char *json, size_t len);
|
246
246
|
extern VALUE oj_custom_parse_cstr(int argc, VALUE *argv, char *json, size_t len);
|
247
247
|
|
248
|
+
extern bool oj_hash_has_key(VALUE hash, VALUE key);
|
248
249
|
extern void oj_parse_options(VALUE ropts, Options copts);
|
249
250
|
|
250
251
|
extern void oj_dump_obj_to_json(VALUE obj, Options copts, Out out);
|
@@ -327,7 +328,6 @@ extern ID oj_exclude_end_id;
|
|
327
328
|
extern ID oj_file_id;
|
328
329
|
extern ID oj_fileno_id;
|
329
330
|
extern ID oj_ftype_id;
|
330
|
-
extern ID oj_has_key_id;
|
331
331
|
extern ID oj_hash_end_id;
|
332
332
|
extern ID oj_hash_key_id;
|
333
333
|
extern ID oj_hash_set_id;
|
data/ext/oj/parse.c
CHANGED
@@ -964,12 +964,12 @@ static VALUE protect_parse(VALUE pip) {
|
|
964
964
|
extern int oj_utf8_index;
|
965
965
|
|
966
966
|
static void oj_pi_set_input_str(ParseInfo pi, volatile VALUE *inputp) {
|
967
|
-
rb_encoding *enc =
|
967
|
+
rb_encoding *enc = rb_enc_get(*inputp);
|
968
968
|
|
969
|
-
if (
|
970
|
-
*inputp = rb_str_conv_enc(*inputp, enc,
|
969
|
+
if (oj_utf8_encoding != enc) {
|
970
|
+
*inputp = rb_str_conv_enc(*inputp, enc, oj_utf8_encoding);
|
971
971
|
}
|
972
|
-
pi->json =
|
972
|
+
pi->json = RSTRING_PTR(*inputp);
|
973
973
|
pi->end = pi->json + RSTRING_LEN(*inputp);
|
974
974
|
}
|
975
975
|
|
data/ext/oj/parser.c
ADDED
@@ -0,0 +1,1511 @@
|
|
1
|
+
// Copyright (c) 2020, 2021, Peter Ohler, All rights reserved.
|
2
|
+
|
3
|
+
#include <fcntl.h>
|
4
|
+
|
5
|
+
#include "parser.h"
|
6
|
+
#include "oj.h"
|
7
|
+
|
8
|
+
#define DEBUG 0
|
9
|
+
|
10
|
+
#define USE_THREAD_LIMIT 0
|
11
|
+
// #define USE_THREAD_LIMIT 100000
|
12
|
+
#define MAX_EXP 4932
|
13
|
+
// max in the pow_map
|
14
|
+
#define MAX_POW 400
|
15
|
+
|
16
|
+
#define MIN_SLEEP (1000000000LL / (double)CLOCKS_PER_SEC)
|
17
|
+
// 9,223,372,036,854,775,807
|
18
|
+
#define BIG_LIMIT LLONG_MAX / 10
|
19
|
+
#define FRAC_LIMIT 10000000000000000ULL
|
20
|
+
|
21
|
+
// Give better performance with indented JSON but worse with unindented.
|
22
|
+
//#define SPACE_JUMP
|
23
|
+
|
24
|
+
enum {
|
25
|
+
SKIP_CHAR = 'a',
|
26
|
+
SKIP_NEWLINE = 'b',
|
27
|
+
VAL_NULL = 'c',
|
28
|
+
VAL_TRUE = 'd',
|
29
|
+
VAL_FALSE = 'e',
|
30
|
+
VAL_NEG = 'f',
|
31
|
+
VAL0 = 'g',
|
32
|
+
VAL_DIGIT = 'h',
|
33
|
+
VAL_QUOTE = 'i',
|
34
|
+
OPEN_ARRAY = 'k',
|
35
|
+
OPEN_OBJECT = 'l',
|
36
|
+
CLOSE_ARRAY = 'm',
|
37
|
+
CLOSE_OBJECT = 'n',
|
38
|
+
AFTER_COMMA = 'o',
|
39
|
+
KEY_QUOTE = 'p',
|
40
|
+
COLON_COLON = 'q',
|
41
|
+
NUM_SPC = 'r',
|
42
|
+
NUM_NEWLINE = 's',
|
43
|
+
NUM_DOT = 't',
|
44
|
+
NUM_COMMA = 'u',
|
45
|
+
NUM_FRAC = 'v',
|
46
|
+
FRAC_E = 'w',
|
47
|
+
EXP_SIGN = 'x',
|
48
|
+
EXP_DIGIT = 'y',
|
49
|
+
STR_QUOTE = 'z',
|
50
|
+
NEG_DIGIT = '-',
|
51
|
+
STR_SLASH = 'A',
|
52
|
+
ESC_OK = 'B',
|
53
|
+
BIG_DIGIT = 'C',
|
54
|
+
BIG_DOT = 'D',
|
55
|
+
U_OK = 'E',
|
56
|
+
TOKEN_OK = 'F',
|
57
|
+
NUM_CLOSE_OBJECT = 'G',
|
58
|
+
NUM_CLOSE_ARRAY = 'H',
|
59
|
+
BIG_FRAC = 'I',
|
60
|
+
BIG_E = 'J',
|
61
|
+
BIG_EXP_SIGN = 'K',
|
62
|
+
BIG_EXP = 'L',
|
63
|
+
UTF1 = 'M', // expect 1 more follow byte
|
64
|
+
NUM_DIGIT = 'N',
|
65
|
+
NUM_ZERO = 'O',
|
66
|
+
UTF2 = 'P', // expect 2 more follow byte
|
67
|
+
UTF3 = 'Q', // expect 3 more follow byte
|
68
|
+
STR_OK = 'R',
|
69
|
+
UTFX = 'S', // following bytes
|
70
|
+
ESC_U = 'U',
|
71
|
+
CHAR_ERR = '.',
|
72
|
+
DONE = 'X',
|
73
|
+
};
|
74
|
+
|
75
|
+
/*
|
76
|
+
0123456789abcdef0123456789abcdef */
|
77
|
+
static const char value_map[257] = "\
|
78
|
+
X........ab..a..................\
|
79
|
+
a.i..........f..ghhhhhhhhh......\
|
80
|
+
...........................k.m..\
|
81
|
+
......e.......c.....d......l.n..\
|
82
|
+
................................\
|
83
|
+
................................\
|
84
|
+
................................\
|
85
|
+
................................v";
|
86
|
+
|
87
|
+
static const char null_map[257] = "\
|
88
|
+
................................\
|
89
|
+
............o...................\
|
90
|
+
................................\
|
91
|
+
............F........F..........\
|
92
|
+
................................\
|
93
|
+
................................\
|
94
|
+
................................\
|
95
|
+
................................N";
|
96
|
+
|
97
|
+
static const char true_map[257] = "\
|
98
|
+
................................\
|
99
|
+
............o...................\
|
100
|
+
................................\
|
101
|
+
.....F............F..F..........\
|
102
|
+
................................\
|
103
|
+
................................\
|
104
|
+
................................\
|
105
|
+
................................T";
|
106
|
+
|
107
|
+
static const char false_map[257] = "\
|
108
|
+
................................\
|
109
|
+
............o...................\
|
110
|
+
................................\
|
111
|
+
.F...F......F......F............\
|
112
|
+
................................\
|
113
|
+
................................\
|
114
|
+
................................\
|
115
|
+
................................F";
|
116
|
+
|
117
|
+
static const char comma_map[257] = "\
|
118
|
+
.........ab..a..................\
|
119
|
+
a.i..........f..ghhhhhhhhh......\
|
120
|
+
...........................k....\
|
121
|
+
......e.......c.....d......l....\
|
122
|
+
................................\
|
123
|
+
................................\
|
124
|
+
................................\
|
125
|
+
................................,";
|
126
|
+
|
127
|
+
static const char after_map[257] = "\
|
128
|
+
X........ab..a..................\
|
129
|
+
a...........o...................\
|
130
|
+
.............................m..\
|
131
|
+
.............................n..\
|
132
|
+
................................\
|
133
|
+
................................\
|
134
|
+
................................\
|
135
|
+
................................a";
|
136
|
+
|
137
|
+
static const char key1_map[257] = "\
|
138
|
+
.........ab..a..................\
|
139
|
+
a.p.............................\
|
140
|
+
................................\
|
141
|
+
.............................n..\
|
142
|
+
................................\
|
143
|
+
................................\
|
144
|
+
................................\
|
145
|
+
................................K";
|
146
|
+
|
147
|
+
static const char key_map[257] = "\
|
148
|
+
.........ab..a..................\
|
149
|
+
a.p.............................\
|
150
|
+
................................\
|
151
|
+
................................\
|
152
|
+
................................\
|
153
|
+
................................\
|
154
|
+
................................\
|
155
|
+
................................k";
|
156
|
+
|
157
|
+
static const char colon_map[257] = "\
|
158
|
+
.........ab..a..................\
|
159
|
+
a.........................q.....\
|
160
|
+
................................\
|
161
|
+
................................\
|
162
|
+
................................\
|
163
|
+
................................\
|
164
|
+
................................\
|
165
|
+
................................:";
|
166
|
+
|
167
|
+
static const char neg_map[257] = "\
|
168
|
+
................................\
|
169
|
+
................O---------......\
|
170
|
+
................................\
|
171
|
+
................................\
|
172
|
+
................................\
|
173
|
+
................................\
|
174
|
+
................................\
|
175
|
+
................................-";
|
176
|
+
|
177
|
+
static const char zero_map[257] = "\
|
178
|
+
.........rs..r..................\
|
179
|
+
r...........u.t.................\
|
180
|
+
.............................H..\
|
181
|
+
.............................G..\
|
182
|
+
................................\
|
183
|
+
................................\
|
184
|
+
................................\
|
185
|
+
................................0";
|
186
|
+
|
187
|
+
static const char digit_map[257] = "\
|
188
|
+
.........rs..r..................\
|
189
|
+
r...........u.t.NNNNNNNNNN......\
|
190
|
+
.....w.......................H..\
|
191
|
+
.....w.......................G..\
|
192
|
+
................................\
|
193
|
+
................................\
|
194
|
+
................................\
|
195
|
+
................................d";
|
196
|
+
|
197
|
+
static const char dot_map[257] = "\
|
198
|
+
................................\
|
199
|
+
................vvvvvvvvvv......\
|
200
|
+
................................\
|
201
|
+
................................\
|
202
|
+
................................\
|
203
|
+
................................\
|
204
|
+
................................\
|
205
|
+
.................................";
|
206
|
+
|
207
|
+
static const char frac_map[257] = "\
|
208
|
+
.........rs..r..................\
|
209
|
+
r...........u...vvvvvvvvvv......\
|
210
|
+
.....w.......................H..\
|
211
|
+
.....w.......................G..\
|
212
|
+
................................\
|
213
|
+
................................\
|
214
|
+
................................\
|
215
|
+
................................f";
|
216
|
+
|
217
|
+
static const char exp_sign_map[257] = "\
|
218
|
+
................................\
|
219
|
+
...........x.x..yyyyyyyyyy......\
|
220
|
+
................................\
|
221
|
+
................................\
|
222
|
+
................................\
|
223
|
+
................................\
|
224
|
+
................................\
|
225
|
+
................................x";
|
226
|
+
|
227
|
+
static const char exp_zero_map[257] = "\
|
228
|
+
................................\
|
229
|
+
................yyyyyyyyyy......\
|
230
|
+
................................\
|
231
|
+
................................\
|
232
|
+
................................\
|
233
|
+
................................\
|
234
|
+
................................\
|
235
|
+
................................z";
|
236
|
+
|
237
|
+
static const char exp_map[257] = "\
|
238
|
+
.........rs..r..................\
|
239
|
+
r...........u...yyyyyyyyyy......\
|
240
|
+
.............................H..\
|
241
|
+
.............................G..\
|
242
|
+
................................\
|
243
|
+
................................\
|
244
|
+
................................\
|
245
|
+
................................X";
|
246
|
+
|
247
|
+
static const char big_digit_map[257] = "\
|
248
|
+
.........rs..r..................\
|
249
|
+
r...........u.D.CCCCCCCCCC......\
|
250
|
+
.....J.......................H..\
|
251
|
+
.....J.......................G..\
|
252
|
+
................................\
|
253
|
+
................................\
|
254
|
+
................................\
|
255
|
+
................................D";
|
256
|
+
|
257
|
+
static const char big_dot_map[257] = "\
|
258
|
+
................................\
|
259
|
+
................IIIIIIIIII......\
|
260
|
+
................................\
|
261
|
+
................................\
|
262
|
+
................................\
|
263
|
+
................................\
|
264
|
+
................................\
|
265
|
+
................................o";
|
266
|
+
|
267
|
+
static const char big_frac_map[257] = "\
|
268
|
+
.........rs..r..................\
|
269
|
+
r...........u...IIIIIIIIII......\
|
270
|
+
.....J.......................H..\
|
271
|
+
.....J.......................G..\
|
272
|
+
................................\
|
273
|
+
................................\
|
274
|
+
................................\
|
275
|
+
................................g";
|
276
|
+
|
277
|
+
static const char big_exp_sign_map[257] = "\
|
278
|
+
................................\
|
279
|
+
...........K.K..LLLLLLLLLL......\
|
280
|
+
................................\
|
281
|
+
................................\
|
282
|
+
................................\
|
283
|
+
................................\
|
284
|
+
................................\
|
285
|
+
................................B";
|
286
|
+
|
287
|
+
static const char big_exp_zero_map[257] = "\
|
288
|
+
................................\
|
289
|
+
................LLLLLLLLLL......\
|
290
|
+
................................\
|
291
|
+
................................\
|
292
|
+
................................\
|
293
|
+
................................\
|
294
|
+
................................\
|
295
|
+
................................Z";
|
296
|
+
|
297
|
+
static const char big_exp_map[257] = "\
|
298
|
+
.........rs..r..................\
|
299
|
+
r...........u...LLLLLLLLLL......\
|
300
|
+
.............................H..\
|
301
|
+
.............................G..\
|
302
|
+
................................\
|
303
|
+
................................\
|
304
|
+
................................\
|
305
|
+
................................Y";
|
306
|
+
|
307
|
+
static const char string_map[257] = "\
|
308
|
+
................................\
|
309
|
+
RRzRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
|
310
|
+
RRRRRRRRRRRRRRRRRRRRRRRRRRRRARRR\
|
311
|
+
RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
|
312
|
+
................................\
|
313
|
+
................................\
|
314
|
+
MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\
|
315
|
+
PPPPPPPPPPPPPPPPQQQQQQQQ........s";
|
316
|
+
|
317
|
+
static const char esc_map[257] = "\
|
318
|
+
................................\
|
319
|
+
..B............B................\
|
320
|
+
............................B...\
|
321
|
+
..B...B.......B...B.BU..........\
|
322
|
+
................................\
|
323
|
+
................................\
|
324
|
+
................................\
|
325
|
+
................................~";
|
326
|
+
|
327
|
+
static const char esc_byte_map[257] = "\
|
328
|
+
................................\
|
329
|
+
..\"............/................\
|
330
|
+
............................\\...\
|
331
|
+
..\b...\f.......\n...\r.\t..........\
|
332
|
+
................................\
|
333
|
+
................................\
|
334
|
+
................................\
|
335
|
+
................................b";
|
336
|
+
|
337
|
+
static const char u_map[257] = "\
|
338
|
+
................................\
|
339
|
+
................EEEEEEEEEE......\
|
340
|
+
.EEEEEE.........................\
|
341
|
+
.EEEEEE.........................\
|
342
|
+
................................\
|
343
|
+
................................\
|
344
|
+
................................\
|
345
|
+
................................u";
|
346
|
+
|
347
|
+
static const char utf_map[257] = "\
|
348
|
+
................................\
|
349
|
+
................................\
|
350
|
+
................................\
|
351
|
+
................................\
|
352
|
+
SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
|
353
|
+
SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
|
354
|
+
................................\
|
355
|
+
................................8";
|
356
|
+
|
357
|
+
static const char space_map[257] = "\
|
358
|
+
.........ab..a..................\
|
359
|
+
a...............................\
|
360
|
+
................................\
|
361
|
+
................................\
|
362
|
+
................................\
|
363
|
+
................................\
|
364
|
+
................................\
|
365
|
+
................................S";
|
366
|
+
|
367
|
+
static const char trail_map[257] = "\
|
368
|
+
.........ab..a..................\
|
369
|
+
a...............................\
|
370
|
+
................................\
|
371
|
+
................................\
|
372
|
+
................................\
|
373
|
+
................................\
|
374
|
+
................................\
|
375
|
+
................................R";
|
376
|
+
|
377
|
+
static const byte hex_map[256] = "\
|
378
|
+
................................\
|
379
|
+
................\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09......\
|
380
|
+
.\x0a\x0b\x0c\x0d\x0e\x0f.........................\
|
381
|
+
.\x0a\x0b\x0c\x0d\x0e\x0f.........................\
|
382
|
+
................................\
|
383
|
+
................................\
|
384
|
+
................................\
|
385
|
+
................................";
|
386
|
+
|
387
|
+
static long double pow_map[401] = {1.0L, 1.0e1L, 1.0e2L, 1.0e3L, 1.0e4L,
|
388
|
+
1.0e5L, 1.0e6L, 1.0e7L, 1.0e8L, 1.0e9L, // 00
|
389
|
+
1.0e10L, 1.0e11L, 1.0e12L, 1.0e13L, 1.0e14L,
|
390
|
+
1.0e15L, 1.0e16L, 1.0e17L, 1.0e18L, 1.0e19L, // 10
|
391
|
+
1.0e20L, 1.0e21L, 1.0e22L, 1.0e23L, 1.0e24L,
|
392
|
+
1.0e25L, 1.0e26L, 1.0e27L, 1.0e28L, 1.0e29L, // 20
|
393
|
+
1.0e30L, 1.0e31L, 1.0e32L, 1.0e33L, 1.0e34L,
|
394
|
+
1.0e35L, 1.0e36L, 1.0e37L, 1.0e38L, 1.0e39L, // 30
|
395
|
+
1.0e40L, 1.0e41L, 1.0e42L, 1.0e43L, 1.0e44L,
|
396
|
+
1.0e45L, 1.0e46L, 1.0e47L, 1.0e48L, 1.0e49L, // 40
|
397
|
+
1.0e50L, 1.0e51L, 1.0e52L, 1.0e53L, 1.0e54L,
|
398
|
+
1.0e55L, 1.0e56L, 1.0e57L, 1.0e58L, 1.0e59L, // 50
|
399
|
+
1.0e60L, 1.0e61L, 1.0e62L, 1.0e63L, 1.0e64L,
|
400
|
+
1.0e65L, 1.0e66L, 1.0e67L, 1.0e68L, 1.0e69L, // 60
|
401
|
+
1.0e70L, 1.0e71L, 1.0e72L, 1.0e73L, 1.0e74L,
|
402
|
+
1.0e75L, 1.0e76L, 1.0e77L, 1.0e78L, 1.0e79L, // 70
|
403
|
+
1.0e80L, 1.0e81L, 1.0e82L, 1.0e83L, 1.0e84L,
|
404
|
+
1.0e85L, 1.0e86L, 1.0e87L, 1.0e88L, 1.0e89L, // 80
|
405
|
+
1.0e90L, 1.0e91L, 1.0e92L, 1.0e93L, 1.0e94L,
|
406
|
+
1.0e95L, 1.0e96L, 1.0e97L, 1.0e98L, 1.0e99L, // 90
|
407
|
+
1.0e100L, 1.0e101L, 1.0e102L, 1.0e103L, 1.0e104L,
|
408
|
+
1.0e105L, 1.0e106L, 1.0e107L, 1.0e108L, 1.0e109L, // 100
|
409
|
+
1.0e110L, 1.0e111L, 1.0e112L, 1.0e113L, 1.0e114L,
|
410
|
+
1.0e115L, 1.0e116L, 1.0e117L, 1.0e118L, 1.0e119L, // 110
|
411
|
+
1.0e120L, 1.0e121L, 1.0e122L, 1.0e123L, 1.0e124L,
|
412
|
+
1.0e125L, 1.0e126L, 1.0e127L, 1.0e128L, 1.0e129L, // 120
|
413
|
+
1.0e130L, 1.0e131L, 1.0e132L, 1.0e133L, 1.0e134L,
|
414
|
+
1.0e135L, 1.0e136L, 1.0e137L, 1.0e138L, 1.0e139L, // 130
|
415
|
+
1.0e140L, 1.0e141L, 1.0e142L, 1.0e143L, 1.0e144L,
|
416
|
+
1.0e145L, 1.0e146L, 1.0e147L, 1.0e148L, 1.0e149L, // 140
|
417
|
+
1.0e150L, 1.0e151L, 1.0e152L, 1.0e153L, 1.0e154L,
|
418
|
+
1.0e155L, 1.0e156L, 1.0e157L, 1.0e158L, 1.0e159L, // 150
|
419
|
+
1.0e160L, 1.0e161L, 1.0e162L, 1.0e163L, 1.0e164L,
|
420
|
+
1.0e165L, 1.0e166L, 1.0e167L, 1.0e168L, 1.0e169L, // 160
|
421
|
+
1.0e170L, 1.0e171L, 1.0e172L, 1.0e173L, 1.0e174L,
|
422
|
+
1.0e175L, 1.0e176L, 1.0e177L, 1.0e178L, 1.0e179L, // 170
|
423
|
+
1.0e180L, 1.0e181L, 1.0e182L, 1.0e183L, 1.0e184L,
|
424
|
+
1.0e185L, 1.0e186L, 1.0e187L, 1.0e188L, 1.0e189L, // 180
|
425
|
+
1.0e190L, 1.0e191L, 1.0e192L, 1.0e193L, 1.0e194L,
|
426
|
+
1.0e195L, 1.0e196L, 1.0e197L, 1.0e198L, 1.0e199L, // 190
|
427
|
+
1.0e200L, 1.0e201L, 1.0e202L, 1.0e203L, 1.0e204L,
|
428
|
+
1.0e205L, 1.0e206L, 1.0e207L, 1.0e208L, 1.0e209L, // 200
|
429
|
+
1.0e210L, 1.0e211L, 1.0e212L, 1.0e213L, 1.0e214L,
|
430
|
+
1.0e215L, 1.0e216L, 1.0e217L, 1.0e218L, 1.0e219L, // 210
|
431
|
+
1.0e220L, 1.0e221L, 1.0e222L, 1.0e223L, 1.0e224L,
|
432
|
+
1.0e225L, 1.0e226L, 1.0e227L, 1.0e228L, 1.0e229L, // 220
|
433
|
+
1.0e230L, 1.0e231L, 1.0e232L, 1.0e233L, 1.0e234L,
|
434
|
+
1.0e235L, 1.0e236L, 1.0e237L, 1.0e238L, 1.0e239L, // 230
|
435
|
+
1.0e240L, 1.0e241L, 1.0e242L, 1.0e243L, 1.0e244L,
|
436
|
+
1.0e245L, 1.0e246L, 1.0e247L, 1.0e248L, 1.0e249L, // 240
|
437
|
+
1.0e250L, 1.0e251L, 1.0e252L, 1.0e253L, 1.0e254L,
|
438
|
+
1.0e255L, 1.0e256L, 1.0e257L, 1.0e258L, 1.0e259L, // 250
|
439
|
+
1.0e260L, 1.0e261L, 1.0e262L, 1.0e263L, 1.0e264L,
|
440
|
+
1.0e265L, 1.0e266L, 1.0e267L, 1.0e268L, 1.0e269L, // 260
|
441
|
+
1.0e270L, 1.0e271L, 1.0e272L, 1.0e273L, 1.0e274L,
|
442
|
+
1.0e275L, 1.0e276L, 1.0e277L, 1.0e278L, 1.0e279L, // 270
|
443
|
+
1.0e280L, 1.0e281L, 1.0e282L, 1.0e283L, 1.0e284L,
|
444
|
+
1.0e285L, 1.0e286L, 1.0e287L, 1.0e288L, 1.0e289L, // 280
|
445
|
+
1.0e290L, 1.0e291L, 1.0e292L, 1.0e293L, 1.0e294L,
|
446
|
+
1.0e295L, 1.0e296L, 1.0e297L, 1.0e298L, 1.0e299L, // 290
|
447
|
+
1.0e300L, 1.0e301L, 1.0e302L, 1.0e303L, 1.0e304L,
|
448
|
+
1.0e305L, 1.0e306L, 1.0e307L, 1.0e308L, 1.0e309L, // 300
|
449
|
+
1.0e310L, 1.0e311L, 1.0e312L, 1.0e313L, 1.0e314L,
|
450
|
+
1.0e315L, 1.0e316L, 1.0e317L, 1.0e318L, 1.0e319L, // 310
|
451
|
+
1.0e320L, 1.0e321L, 1.0e322L, 1.0e323L, 1.0e324L,
|
452
|
+
1.0e325L, 1.0e326L, 1.0e327L, 1.0e328L, 1.0e329L, // 320
|
453
|
+
1.0e330L, 1.0e331L, 1.0e332L, 1.0e333L, 1.0e334L,
|
454
|
+
1.0e335L, 1.0e336L, 1.0e337L, 1.0e338L, 1.0e339L, // 330
|
455
|
+
1.0e340L, 1.0e341L, 1.0e342L, 1.0e343L, 1.0e344L,
|
456
|
+
1.0e345L, 1.0e346L, 1.0e347L, 1.0e348L, 1.0e349L, // 340
|
457
|
+
1.0e350L, 1.0e351L, 1.0e352L, 1.0e353L, 1.0e354L,
|
458
|
+
1.0e355L, 1.0e356L, 1.0e357L, 1.0e358L, 1.0e359L, // 350
|
459
|
+
1.0e360L, 1.0e361L, 1.0e362L, 1.0e363L, 1.0e364L,
|
460
|
+
1.0e365L, 1.0e366L, 1.0e367L, 1.0e368L, 1.0e369L, // 360
|
461
|
+
1.0e370L, 1.0e371L, 1.0e372L, 1.0e373L, 1.0e374L,
|
462
|
+
1.0e375L, 1.0e376L, 1.0e377L, 1.0e378L, 1.0e379L, // 370
|
463
|
+
1.0e380L, 1.0e381L, 1.0e382L, 1.0e383L, 1.0e384L,
|
464
|
+
1.0e385L, 1.0e386L, 1.0e387L, 1.0e388L, 1.0e389L, // 380
|
465
|
+
1.0e390L, 1.0e391L, 1.0e392L, 1.0e393L, 1.0e394L,
|
466
|
+
1.0e395L, 1.0e396L, 1.0e397L, 1.0e398L, 1.0e399L, // 390
|
467
|
+
1.0e400L};
|
468
|
+
|
469
|
+
static VALUE parser_class;
|
470
|
+
|
471
|
+
// Works with extended unicode as well. \Uffffffff if support is desired in
|
472
|
+
// the future.
|
473
|
+
static size_t unicodeToUtf8(uint32_t code, byte *buf) {
|
474
|
+
byte *start = buf;
|
475
|
+
|
476
|
+
if (0x0000007F >= code) {
|
477
|
+
*buf++ = (byte)code;
|
478
|
+
} else if (0x000007FF >= code) {
|
479
|
+
*buf++ = 0xC0 | (code >> 6);
|
480
|
+
*buf++ = 0x80 | (0x3F & code);
|
481
|
+
} else if (0x0000FFFF >= code) {
|
482
|
+
*buf++ = 0xE0 | (code >> 12);
|
483
|
+
*buf++ = 0x80 | ((code >> 6) & 0x3F);
|
484
|
+
*buf++ = 0x80 | (0x3F & code);
|
485
|
+
} else if (0x001FFFFF >= code) {
|
486
|
+
*buf++ = 0xF0 | (code >> 18);
|
487
|
+
*buf++ = 0x80 | ((code >> 12) & 0x3F);
|
488
|
+
*buf++ = 0x80 | ((code >> 6) & 0x3F);
|
489
|
+
*buf++ = 0x80 | (0x3F & code);
|
490
|
+
} else if (0x03FFFFFF >= code) {
|
491
|
+
*buf++ = 0xF8 | (code >> 24);
|
492
|
+
*buf++ = 0x80 | ((code >> 18) & 0x3F);
|
493
|
+
*buf++ = 0x80 | ((code >> 12) & 0x3F);
|
494
|
+
*buf++ = 0x80 | ((code >> 6) & 0x3F);
|
495
|
+
*buf++ = 0x80 | (0x3F & code);
|
496
|
+
} else if (0x7FFFFFFF >= code) {
|
497
|
+
*buf++ = 0xFC | (code >> 30);
|
498
|
+
*buf++ = 0x80 | ((code >> 24) & 0x3F);
|
499
|
+
*buf++ = 0x80 | ((code >> 18) & 0x3F);
|
500
|
+
*buf++ = 0x80 | ((code >> 12) & 0x3F);
|
501
|
+
*buf++ = 0x80 | ((code >> 6) & 0x3F);
|
502
|
+
*buf++ = 0x80 | (0x3F & code);
|
503
|
+
}
|
504
|
+
return buf - start;
|
505
|
+
}
|
506
|
+
|
507
|
+
static void parser_reset(ojParser p) {
|
508
|
+
p->reader = 0;
|
509
|
+
memset(&p->num, 0, sizeof(p->num));
|
510
|
+
buf_reset(&p->key);
|
511
|
+
buf_reset(&p->buf);
|
512
|
+
p->map = value_map;
|
513
|
+
p->next_map = NULL;
|
514
|
+
p->depth = 0;
|
515
|
+
}
|
516
|
+
|
517
|
+
static void parse_error(ojParser p, const char *fmt, ...) {
|
518
|
+
va_list ap;
|
519
|
+
char buf[256];
|
520
|
+
|
521
|
+
va_start(ap, fmt);
|
522
|
+
vsnprintf(buf, sizeof(buf), fmt, ap);
|
523
|
+
va_end(ap);
|
524
|
+
rb_raise(oj_json_parser_error_class, "%s at %ld:%ld", buf, p->line, p->col);
|
525
|
+
}
|
526
|
+
|
527
|
+
static void byte_error(ojParser p, byte b) {
|
528
|
+
switch (p->map[256]) {
|
529
|
+
case 'N': // null_map
|
530
|
+
parse_error(p, "expected null");
|
531
|
+
break;
|
532
|
+
case 'T': // true_map
|
533
|
+
parse_error(p, "expected true");
|
534
|
+
break;
|
535
|
+
case 'F': // false_map
|
536
|
+
parse_error(p, "expected false");
|
537
|
+
break;
|
538
|
+
case 's': // string_map
|
539
|
+
parse_error(p, "invalid JSON character 0x%02x", b);
|
540
|
+
break;
|
541
|
+
default: parse_error(p, "unexpected character '%c' in '%c' mode", b, p->map[256]); break;
|
542
|
+
}
|
543
|
+
}
|
544
|
+
|
545
|
+
static void calc_num(ojParser p) {
|
546
|
+
switch (p->type) {
|
547
|
+
case OJ_INT:
|
548
|
+
if (p->num.neg) {
|
549
|
+
p->num.fixnum = -p->num.fixnum;
|
550
|
+
p->num.neg = false;
|
551
|
+
}
|
552
|
+
p->funcs[p->stack[p->depth]].add_int(p);
|
553
|
+
break;
|
554
|
+
case OJ_DECIMAL: {
|
555
|
+
long double d = (long double)p->num.fixnum;
|
556
|
+
|
557
|
+
if (p->num.neg) {
|
558
|
+
d = -d;
|
559
|
+
}
|
560
|
+
if (0 < p->num.shift) {
|
561
|
+
d /= pow_map[p->num.shift];
|
562
|
+
}
|
563
|
+
if (0 < p->num.exp) {
|
564
|
+
long double x;
|
565
|
+
|
566
|
+
if (MAX_POW < p->num.exp) {
|
567
|
+
x = powl(10.0L, (long double)p->num.exp);
|
568
|
+
} else {
|
569
|
+
x = pow_map[p->num.exp];
|
570
|
+
}
|
571
|
+
if (p->num.exp_neg) {
|
572
|
+
d /= x;
|
573
|
+
} else {
|
574
|
+
d *= x;
|
575
|
+
}
|
576
|
+
}
|
577
|
+
p->num.dub = d;
|
578
|
+
p->funcs[p->stack[p->depth]].add_float(p);
|
579
|
+
break;
|
580
|
+
}
|
581
|
+
case OJ_BIG: p->funcs[p->stack[p->depth]].add_big(p);
|
582
|
+
default:
|
583
|
+
// nothing to do
|
584
|
+
break;
|
585
|
+
}
|
586
|
+
}
|
587
|
+
|
588
|
+
static void big_change(ojParser p) {
|
589
|
+
char buf[32];
|
590
|
+
int64_t i = p->num.fixnum;
|
591
|
+
int len = 0;
|
592
|
+
|
593
|
+
buf[sizeof(buf) - 1] = '\0';
|
594
|
+
p->buf.tail = p->buf.head;
|
595
|
+
switch (p->type) {
|
596
|
+
case OJ_INT:
|
597
|
+
// If an int then it will fit in the num.raw so no need to check length;
|
598
|
+
for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10) {
|
599
|
+
buf[len] = '0' + (i % 10);
|
600
|
+
}
|
601
|
+
if (p->num.neg) {
|
602
|
+
buf[len] = '-';
|
603
|
+
len--;
|
604
|
+
}
|
605
|
+
buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
|
606
|
+
p->type = OJ_BIG;
|
607
|
+
break;
|
608
|
+
case OJ_DECIMAL: {
|
609
|
+
int shift = p->num.shift;
|
610
|
+
|
611
|
+
for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10, shift--) {
|
612
|
+
if (0 == shift) {
|
613
|
+
buf[len] = '.';
|
614
|
+
len--;
|
615
|
+
}
|
616
|
+
buf[len] = '0' + (i % 10);
|
617
|
+
}
|
618
|
+
if (p->num.neg) {
|
619
|
+
buf[len] = '-';
|
620
|
+
len--;
|
621
|
+
}
|
622
|
+
buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
|
623
|
+
if (0 < p->num.exp) {
|
624
|
+
int x = p->num.exp;
|
625
|
+
int d;
|
626
|
+
bool started = false;
|
627
|
+
|
628
|
+
buf_append(&p->buf, 'e');
|
629
|
+
if (0 < p->num.exp_neg) {
|
630
|
+
buf_append(&p->buf, '-');
|
631
|
+
}
|
632
|
+
for (int div = 1000; 0 < div; div /= 10) {
|
633
|
+
d = x / div % 10;
|
634
|
+
if (started || 0 < d) {
|
635
|
+
buf_append(&p->buf, '0' + d);
|
636
|
+
}
|
637
|
+
}
|
638
|
+
}
|
639
|
+
p->type = OJ_BIG;
|
640
|
+
break;
|
641
|
+
}
|
642
|
+
default: break;
|
643
|
+
}
|
644
|
+
}
|
645
|
+
|
646
|
+
static void parse(ojParser p, const byte *json) {
|
647
|
+
const byte *start;
|
648
|
+
const byte *b = json;
|
649
|
+
|
650
|
+
#if DEBUG
|
651
|
+
printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
|
652
|
+
#endif
|
653
|
+
for (; '\0' != *b; b++) {
|
654
|
+
switch (p->map[*b]) {
|
655
|
+
case SKIP_NEWLINE:
|
656
|
+
p->line++;
|
657
|
+
p->col = b - json;
|
658
|
+
b++;
|
659
|
+
#ifdef SPACE_JUMP
|
660
|
+
// for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
|
661
|
+
for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
|
662
|
+
b += 2;
|
663
|
+
}
|
664
|
+
#endif
|
665
|
+
for (; SKIP_CHAR == space_map[*b]; b++) {
|
666
|
+
}
|
667
|
+
b--;
|
668
|
+
break;
|
669
|
+
case COLON_COLON: p->map = value_map; break;
|
670
|
+
case SKIP_CHAR: break;
|
671
|
+
case KEY_QUOTE:
|
672
|
+
b++;
|
673
|
+
p->key.tail = p->key.head;
|
674
|
+
start = b;
|
675
|
+
for (; STR_OK == string_map[*b]; b++) {
|
676
|
+
}
|
677
|
+
buf_append_string(&p->key, (const char *)start, b - start);
|
678
|
+
if ('"' == *b) {
|
679
|
+
p->map = colon_map;
|
680
|
+
break;
|
681
|
+
}
|
682
|
+
b--;
|
683
|
+
p->map = string_map;
|
684
|
+
p->next_map = colon_map;
|
685
|
+
break;
|
686
|
+
case AFTER_COMMA:
|
687
|
+
if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
|
688
|
+
p->map = key_map;
|
689
|
+
} else {
|
690
|
+
p->map = comma_map;
|
691
|
+
}
|
692
|
+
break;
|
693
|
+
case VAL_QUOTE:
|
694
|
+
b++;
|
695
|
+
start = b;
|
696
|
+
p->buf.tail = p->buf.head;
|
697
|
+
for (; STR_OK == string_map[*b]; b++) {
|
698
|
+
}
|
699
|
+
buf_append_string(&p->buf, (const char *)start, b - start);
|
700
|
+
if ('"' == *b) {
|
701
|
+
p->funcs[p->stack[p->depth]].add_str(p);
|
702
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
703
|
+
break;
|
704
|
+
}
|
705
|
+
b--;
|
706
|
+
p->map = string_map;
|
707
|
+
p->next_map = (0 == p->depth) ? value_map : after_map;
|
708
|
+
break;
|
709
|
+
case OPEN_OBJECT:
|
710
|
+
p->funcs[p->stack[p->depth]].open_object(p);
|
711
|
+
p->depth++;
|
712
|
+
p->stack[p->depth] = OBJECT_FUN;
|
713
|
+
p->map = key1_map;
|
714
|
+
break;
|
715
|
+
case NUM_CLOSE_OBJECT:
|
716
|
+
calc_num(p);
|
717
|
+
// flow through
|
718
|
+
case CLOSE_OBJECT:
|
719
|
+
p->map = (1 == p->depth) ? value_map : after_map;
|
720
|
+
if (p->depth <= 0 || OBJECT_FUN != p->stack[p->depth]) {
|
721
|
+
p->col = b - json - p->col + 1;
|
722
|
+
parse_error(p, "unexpected object close");
|
723
|
+
return;
|
724
|
+
}
|
725
|
+
p->depth--;
|
726
|
+
p->funcs[p->stack[p->depth]].close_object(p);
|
727
|
+
break;
|
728
|
+
case OPEN_ARRAY:
|
729
|
+
p->funcs[p->stack[p->depth]].open_array(p);
|
730
|
+
p->depth++;
|
731
|
+
p->stack[p->depth] = ARRAY_FUN;
|
732
|
+
p->map = value_map;
|
733
|
+
break;
|
734
|
+
case NUM_CLOSE_ARRAY:
|
735
|
+
calc_num(p);
|
736
|
+
// flow through
|
737
|
+
case CLOSE_ARRAY:
|
738
|
+
p->map = (1 == p->depth) ? value_map : after_map;
|
739
|
+
if (p->depth <= 0 || ARRAY_FUN != p->stack[p->depth]) {
|
740
|
+
p->col = b - json - p->col + 1;
|
741
|
+
parse_error(p, "unexpected array close");
|
742
|
+
return;
|
743
|
+
}
|
744
|
+
p->depth--;
|
745
|
+
p->funcs[p->stack[p->depth]].close_array(p);
|
746
|
+
break;
|
747
|
+
case NUM_COMMA:
|
748
|
+
calc_num(p);
|
749
|
+
if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
|
750
|
+
p->map = key_map;
|
751
|
+
} else {
|
752
|
+
p->map = comma_map;
|
753
|
+
}
|
754
|
+
break;
|
755
|
+
case VAL0:
|
756
|
+
p->type = OJ_INT;
|
757
|
+
p->num.fixnum = 0;
|
758
|
+
p->num.neg = false;
|
759
|
+
p->num.shift = 0;
|
760
|
+
p->num.len = 0;
|
761
|
+
p->num.exp = 0;
|
762
|
+
p->num.exp_neg = false;
|
763
|
+
p->map = zero_map;
|
764
|
+
break;
|
765
|
+
case VAL_NEG:
|
766
|
+
p->type = OJ_INT;
|
767
|
+
p->num.fixnum = 0;
|
768
|
+
p->num.neg = true;
|
769
|
+
p->num.shift = 0;
|
770
|
+
p->num.len = 0;
|
771
|
+
p->num.exp = 0;
|
772
|
+
p->num.exp_neg = false;
|
773
|
+
p->map = neg_map;
|
774
|
+
break;
|
775
|
+
;
|
776
|
+
case VAL_DIGIT:
|
777
|
+
p->type = OJ_INT;
|
778
|
+
p->num.fixnum = 0;
|
779
|
+
p->num.neg = false;
|
780
|
+
p->num.shift = 0;
|
781
|
+
p->num.exp = 0;
|
782
|
+
p->num.exp_neg = false;
|
783
|
+
p->num.len = 0;
|
784
|
+
p->map = digit_map;
|
785
|
+
for (; NUM_DIGIT == digit_map[*b]; b++) {
|
786
|
+
uint64_t x = (uint64_t)p->num.fixnum * 10 + (uint64_t)(*b - '0');
|
787
|
+
|
788
|
+
// Tried just checking for an int less than zero but that
|
789
|
+
// fails when optimization is on for some reason with the
|
790
|
+
// clang compiler so us a bit mask instead.
|
791
|
+
if (x < BIG_LIMIT) {
|
792
|
+
p->num.fixnum = (int64_t)x;
|
793
|
+
} else {
|
794
|
+
big_change(p);
|
795
|
+
p->map = big_digit_map;
|
796
|
+
break;
|
797
|
+
}
|
798
|
+
}
|
799
|
+
b--;
|
800
|
+
break;
|
801
|
+
case NUM_DIGIT:
|
802
|
+
for (; NUM_DIGIT == digit_map[*b]; b++) {
|
803
|
+
uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
|
804
|
+
|
805
|
+
if (x < BIG_LIMIT) {
|
806
|
+
p->num.fixnum = (int64_t)x;
|
807
|
+
} else {
|
808
|
+
big_change(p);
|
809
|
+
p->map = big_digit_map;
|
810
|
+
break;
|
811
|
+
}
|
812
|
+
}
|
813
|
+
b--;
|
814
|
+
break;
|
815
|
+
case NUM_DOT:
|
816
|
+
p->type = OJ_DECIMAL;
|
817
|
+
p->map = dot_map;
|
818
|
+
break;
|
819
|
+
case NUM_FRAC:
|
820
|
+
p->map = frac_map;
|
821
|
+
for (; NUM_FRAC == frac_map[*b]; b++) {
|
822
|
+
uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
|
823
|
+
|
824
|
+
if (x < FRAC_LIMIT) {
|
825
|
+
p->num.fixnum = (int64_t)x;
|
826
|
+
p->num.shift++;
|
827
|
+
} else {
|
828
|
+
big_change(p);
|
829
|
+
p->map = big_frac_map;
|
830
|
+
break;
|
831
|
+
}
|
832
|
+
}
|
833
|
+
b--;
|
834
|
+
break;
|
835
|
+
case FRAC_E:
|
836
|
+
p->type = OJ_DECIMAL;
|
837
|
+
p->map = exp_sign_map;
|
838
|
+
break;
|
839
|
+
case NUM_ZERO: p->map = zero_map; break;
|
840
|
+
case NEG_DIGIT:
|
841
|
+
for (; NUM_DIGIT == digit_map[*b]; b++) {
|
842
|
+
uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
|
843
|
+
|
844
|
+
if (x < BIG_LIMIT) {
|
845
|
+
p->num.fixnum = (int64_t)x;
|
846
|
+
} else {
|
847
|
+
big_change(p);
|
848
|
+
p->map = big_digit_map;
|
849
|
+
break;
|
850
|
+
}
|
851
|
+
}
|
852
|
+
b--;
|
853
|
+
p->map = digit_map;
|
854
|
+
break;
|
855
|
+
case EXP_SIGN:
|
856
|
+
p->num.exp_neg = ('-' == *b);
|
857
|
+
p->map = exp_zero_map;
|
858
|
+
break;
|
859
|
+
case EXP_DIGIT:
|
860
|
+
p->map = exp_map;
|
861
|
+
for (; NUM_DIGIT == digit_map[*b]; b++) {
|
862
|
+
int16_t x = p->num.exp * 10 + (int16_t)(*b - '0');
|
863
|
+
|
864
|
+
if (x <= MAX_EXP) {
|
865
|
+
p->num.exp = x;
|
866
|
+
} else {
|
867
|
+
big_change(p);
|
868
|
+
p->map = big_exp_map;
|
869
|
+
break;
|
870
|
+
}
|
871
|
+
}
|
872
|
+
b--;
|
873
|
+
break;
|
874
|
+
case BIG_DIGIT:
|
875
|
+
start = b;
|
876
|
+
for (; NUM_DIGIT == digit_map[*b]; b++) {
|
877
|
+
}
|
878
|
+
buf_append_string(&p->buf, (const char *)start, b - start);
|
879
|
+
b--;
|
880
|
+
break;
|
881
|
+
case BIG_DOT:
|
882
|
+
buf_append(&p->buf, '.');
|
883
|
+
p->map = big_dot_map;
|
884
|
+
break;
|
885
|
+
case BIG_FRAC:
|
886
|
+
p->map = big_frac_map;
|
887
|
+
start = b;
|
888
|
+
for (; NUM_FRAC == frac_map[*b]; b++) {
|
889
|
+
}
|
890
|
+
buf_append_string(&p->buf, (const char *)start, b - start);
|
891
|
+
b--;
|
892
|
+
break;
|
893
|
+
case BIG_E:
|
894
|
+
buf_append(&p->buf, *b);
|
895
|
+
p->map = big_exp_sign_map;
|
896
|
+
break;
|
897
|
+
case BIG_EXP_SIGN:
|
898
|
+
buf_append(&p->buf, *b);
|
899
|
+
p->map = big_exp_zero_map;
|
900
|
+
break;
|
901
|
+
case BIG_EXP:
|
902
|
+
start = b;
|
903
|
+
for (; NUM_DIGIT == digit_map[*b]; b++) {
|
904
|
+
}
|
905
|
+
buf_append_string(&p->buf, (const char *)start, b - start);
|
906
|
+
b--;
|
907
|
+
p->map = big_exp_map;
|
908
|
+
break;
|
909
|
+
case NUM_SPC: calc_num(p); break;
|
910
|
+
case NUM_NEWLINE: calc_num(p); b++;
|
911
|
+
#ifdef SPACE_JUMP
|
912
|
+
// for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
|
913
|
+
for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
|
914
|
+
b += 2;
|
915
|
+
}
|
916
|
+
#endif
|
917
|
+
for (; SKIP_CHAR == space_map[*b]; b++) {
|
918
|
+
}
|
919
|
+
b--;
|
920
|
+
break;
|
921
|
+
case STR_OK:
|
922
|
+
start = b;
|
923
|
+
for (; STR_OK == string_map[*b]; b++) {
|
924
|
+
}
|
925
|
+
if (':' == p->next_map[256]) {
|
926
|
+
buf_append_string(&p->key, (const char *)start, b - start);
|
927
|
+
} else {
|
928
|
+
buf_append_string(&p->buf, (const char *)start, b - start);
|
929
|
+
}
|
930
|
+
if ('"' == *b) {
|
931
|
+
p->map = p->next_map;
|
932
|
+
break;
|
933
|
+
}
|
934
|
+
b--;
|
935
|
+
break;
|
936
|
+
case STR_SLASH: p->map = esc_map; break;
|
937
|
+
case STR_QUOTE: p->map = p->next_map; break;
|
938
|
+
case ESC_U:
|
939
|
+
p->map = u_map;
|
940
|
+
p->ri = 0;
|
941
|
+
p->ucode = 0;
|
942
|
+
break;
|
943
|
+
case U_OK:
|
944
|
+
p->ri++;
|
945
|
+
p->ucode = p->ucode << 4 | (uint32_t)hex_map[*b];
|
946
|
+
if (4 <= p->ri) {
|
947
|
+
byte utf8[8];
|
948
|
+
size_t ulen = unicodeToUtf8(p->ucode, utf8);
|
949
|
+
|
950
|
+
if (0 < ulen) {
|
951
|
+
if (':' == p->next_map[256]) {
|
952
|
+
buf_append_string(&p->key, (const char *)utf8, ulen);
|
953
|
+
} else {
|
954
|
+
buf_append_string(&p->buf, (const char *)utf8, ulen);
|
955
|
+
}
|
956
|
+
} else {
|
957
|
+
parse_error(p, "invalid unicode");
|
958
|
+
return;
|
959
|
+
}
|
960
|
+
p->map = string_map;
|
961
|
+
}
|
962
|
+
break;
|
963
|
+
case ESC_OK:
|
964
|
+
if (':' == p->next_map[256]) {
|
965
|
+
buf_append(&p->key, esc_byte_map[*b]);
|
966
|
+
} else {
|
967
|
+
buf_append(&p->buf, esc_byte_map[*b]);
|
968
|
+
}
|
969
|
+
p->map = string_map;
|
970
|
+
break;
|
971
|
+
case UTF1:
|
972
|
+
p->ri = 1;
|
973
|
+
p->map = utf_map;
|
974
|
+
if (':' == p->next_map[256]) {
|
975
|
+
buf_append(&p->key, *b);
|
976
|
+
} else {
|
977
|
+
buf_append(&p->buf, *b);
|
978
|
+
}
|
979
|
+
break;
|
980
|
+
case UTF2:
|
981
|
+
p->ri = 2;
|
982
|
+
p->map = utf_map;
|
983
|
+
if (':' == p->next_map[256]) {
|
984
|
+
buf_append(&p->key, *b);
|
985
|
+
} else {
|
986
|
+
buf_append(&p->buf, *b);
|
987
|
+
}
|
988
|
+
break;
|
989
|
+
case UTF3:
|
990
|
+
p->ri = 3;
|
991
|
+
p->map = utf_map;
|
992
|
+
if (':' == p->next_map[256]) {
|
993
|
+
buf_append(&p->key, *b);
|
994
|
+
} else {
|
995
|
+
buf_append(&p->buf, *b);
|
996
|
+
}
|
997
|
+
break;
|
998
|
+
case UTFX:
|
999
|
+
p->ri--;
|
1000
|
+
if (':' == p->next_map[256]) {
|
1001
|
+
buf_append(&p->key, *b);
|
1002
|
+
} else {
|
1003
|
+
buf_append(&p->buf, *b);
|
1004
|
+
}
|
1005
|
+
if (p->ri <= 0) {
|
1006
|
+
p->map = string_map;
|
1007
|
+
}
|
1008
|
+
break;
|
1009
|
+
case VAL_NULL:
|
1010
|
+
if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
|
1011
|
+
b += 3;
|
1012
|
+
p->funcs[p->stack[p->depth]].add_null(p);
|
1013
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
1014
|
+
break;
|
1015
|
+
}
|
1016
|
+
p->ri = 0;
|
1017
|
+
*p->token = *b++;
|
1018
|
+
for (int i = 1; i < 4; i++) {
|
1019
|
+
if ('\0' == *b) {
|
1020
|
+
p->ri = i;
|
1021
|
+
break;
|
1022
|
+
} else {
|
1023
|
+
p->token[i] = *b++;
|
1024
|
+
}
|
1025
|
+
}
|
1026
|
+
if (0 < p->ri) {
|
1027
|
+
p->map = null_map;
|
1028
|
+
b--;
|
1029
|
+
break;
|
1030
|
+
}
|
1031
|
+
p->col = b - json - p->col;
|
1032
|
+
parse_error(p, "expected null");
|
1033
|
+
return;
|
1034
|
+
case VAL_TRUE:
|
1035
|
+
if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
|
1036
|
+
b += 3;
|
1037
|
+
p->funcs[p->stack[p->depth]].add_true(p);
|
1038
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
1039
|
+
break;
|
1040
|
+
}
|
1041
|
+
p->ri = 0;
|
1042
|
+
*p->token = *b++;
|
1043
|
+
for (int i = 1; i < 4; i++) {
|
1044
|
+
if ('\0' == *b) {
|
1045
|
+
p->ri = i;
|
1046
|
+
break;
|
1047
|
+
} else {
|
1048
|
+
p->token[i] = *b++;
|
1049
|
+
}
|
1050
|
+
}
|
1051
|
+
if (0 < p->ri) {
|
1052
|
+
p->map = true_map;
|
1053
|
+
b--;
|
1054
|
+
break;
|
1055
|
+
}
|
1056
|
+
p->col = b - json - p->col;
|
1057
|
+
parse_error(p, "expected true");
|
1058
|
+
return;
|
1059
|
+
case VAL_FALSE:
|
1060
|
+
if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
|
1061
|
+
b += 4;
|
1062
|
+
p->funcs[p->stack[p->depth]].add_false(p);
|
1063
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
1064
|
+
break;
|
1065
|
+
}
|
1066
|
+
p->ri = 0;
|
1067
|
+
*p->token = *b++;
|
1068
|
+
for (int i = 1; i < 5; i++) {
|
1069
|
+
if ('\0' == *b) {
|
1070
|
+
p->ri = i;
|
1071
|
+
break;
|
1072
|
+
} else {
|
1073
|
+
p->token[i] = *b++;
|
1074
|
+
}
|
1075
|
+
}
|
1076
|
+
if (0 < p->ri) {
|
1077
|
+
p->map = false_map;
|
1078
|
+
b--;
|
1079
|
+
break;
|
1080
|
+
}
|
1081
|
+
p->col = b - json - p->col;
|
1082
|
+
parse_error(p, "expected false");
|
1083
|
+
return;
|
1084
|
+
case TOKEN_OK:
|
1085
|
+
p->token[p->ri] = *b;
|
1086
|
+
p->ri++;
|
1087
|
+
switch (p->map[256]) {
|
1088
|
+
case 'N':
|
1089
|
+
if (4 == p->ri) {
|
1090
|
+
if (0 != strncmp("null", p->token, 4)) {
|
1091
|
+
p->col = b - json - p->col;
|
1092
|
+
parse_error(p, "expected null");
|
1093
|
+
return;
|
1094
|
+
}
|
1095
|
+
p->funcs[p->stack[p->depth]].add_null(p);
|
1096
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
1097
|
+
}
|
1098
|
+
break;
|
1099
|
+
case 'F':
|
1100
|
+
if (5 == p->ri) {
|
1101
|
+
if (0 != strncmp("false", p->token, 5)) {
|
1102
|
+
p->col = b - json - p->col;
|
1103
|
+
parse_error(p, "expected false");
|
1104
|
+
return;
|
1105
|
+
}
|
1106
|
+
p->funcs[p->stack[p->depth]].add_false(p);
|
1107
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
1108
|
+
}
|
1109
|
+
break;
|
1110
|
+
case 'T':
|
1111
|
+
if (4 == p->ri) {
|
1112
|
+
if (0 != strncmp("true", p->token, 4)) {
|
1113
|
+
p->col = b - json - p->col;
|
1114
|
+
parse_error(p, "expected true");
|
1115
|
+
return;
|
1116
|
+
}
|
1117
|
+
p->funcs[p->stack[p->depth]].add_true(p);
|
1118
|
+
p->map = (0 == p->depth) ? value_map : after_map;
|
1119
|
+
}
|
1120
|
+
break;
|
1121
|
+
default:
|
1122
|
+
p->col = b - json - p->col;
|
1123
|
+
parse_error(p, "parse error");
|
1124
|
+
return;
|
1125
|
+
}
|
1126
|
+
break;
|
1127
|
+
case CHAR_ERR: byte_error(p, *b); return;
|
1128
|
+
default: break;
|
1129
|
+
}
|
1130
|
+
if (0 == p->depth && 'v' == p->map[256] && p->just_one) {
|
1131
|
+
p->map = trail_map;
|
1132
|
+
}
|
1133
|
+
}
|
1134
|
+
if (0 == p->depth) {
|
1135
|
+
switch (p->map[256]) {
|
1136
|
+
case '0':
|
1137
|
+
case 'd':
|
1138
|
+
case 'f':
|
1139
|
+
case 'z':
|
1140
|
+
case 'X':
|
1141
|
+
case 'D':
|
1142
|
+
case 'g':
|
1143
|
+
case 'B':
|
1144
|
+
case 'Y': calc_num(p); break;
|
1145
|
+
}
|
1146
|
+
}
|
1147
|
+
return;
|
1148
|
+
}
|
1149
|
+
|
1150
|
+
static void parser_free(void *ptr) {
|
1151
|
+
ojParser p;
|
1152
|
+
|
1153
|
+
if (0 == ptr) {
|
1154
|
+
return;
|
1155
|
+
}
|
1156
|
+
p = (ojParser)ptr;
|
1157
|
+
buf_cleanup(&p->key);
|
1158
|
+
buf_cleanup(&p->buf);
|
1159
|
+
p->free(p);
|
1160
|
+
xfree(ptr);
|
1161
|
+
}
|
1162
|
+
|
1163
|
+
static void parser_mark(void *ptr) {
|
1164
|
+
if (NULL != ptr) {
|
1165
|
+
ojParser p = (ojParser)ptr;
|
1166
|
+
|
1167
|
+
if (0 != p->reader) {
|
1168
|
+
rb_gc_mark(p->reader);
|
1169
|
+
}
|
1170
|
+
p->mark(p);
|
1171
|
+
}
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
extern void oj_set_parser_validator(ojParser p);
|
1175
|
+
extern void oj_set_parser_saj(ojParser p);
|
1176
|
+
extern void oj_set_parser_usual(ojParser p);
|
1177
|
+
extern void oj_set_parser_debug(ojParser p);
|
1178
|
+
|
1179
|
+
/* Document-method: new
|
1180
|
+
* call-seq: new(mode=nil)
|
1181
|
+
*
|
1182
|
+
* Creates a new Parser with the specified mode. If no mode is provided
|
1183
|
+
* validation is assumed.
|
1184
|
+
*/
|
1185
|
+
static VALUE parser_new(VALUE self, VALUE mode) {
|
1186
|
+
ojParser p = ALLOC(struct _ojParser);
|
1187
|
+
|
1188
|
+
#if HAVE_RB_EXT_RACTOR_SAFE
|
1189
|
+
// This doesn't seem to do anything.
|
1190
|
+
rb_ext_ractor_safe(true);
|
1191
|
+
#endif
|
1192
|
+
memset(p, 0, sizeof(struct _ojParser));
|
1193
|
+
buf_init(&p->key);
|
1194
|
+
buf_init(&p->buf);
|
1195
|
+
|
1196
|
+
p->map = value_map;
|
1197
|
+
if (Qnil == mode) {
|
1198
|
+
oj_set_parser_validator(p);
|
1199
|
+
} else {
|
1200
|
+
const char *ms = NULL;
|
1201
|
+
|
1202
|
+
switch (rb_type(mode)) {
|
1203
|
+
case RUBY_T_SYMBOL:
|
1204
|
+
mode = rb_sym2str(mode);
|
1205
|
+
// fall through
|
1206
|
+
case RUBY_T_STRING: ms = RSTRING_PTR(mode); break;
|
1207
|
+
default:
|
1208
|
+
rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
|
1209
|
+
}
|
1210
|
+
if (0 == strcmp("usual", ms) || 0 == strcmp("standard", ms) || 0 == strcmp("strict", ms) ||
|
1211
|
+
0 == strcmp("compat", ms)) {
|
1212
|
+
oj_set_parser_usual(p);
|
1213
|
+
} else if (0 == strcmp("object", ms)) {
|
1214
|
+
// TBD
|
1215
|
+
} else if (0 == strcmp("saj", ms)) {
|
1216
|
+
oj_set_parser_saj(p);
|
1217
|
+
} else if (0 == strcmp("validate", ms)) {
|
1218
|
+
oj_set_parser_validator(p);
|
1219
|
+
} else if (0 == strcmp("debug", ms)) {
|
1220
|
+
oj_set_parser_debug(p);
|
1221
|
+
} else {
|
1222
|
+
rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
|
1223
|
+
}
|
1224
|
+
}
|
1225
|
+
return Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
/* Document-method: method_missing(value)
|
1229
|
+
* call-seq: method_missing(value)
|
1230
|
+
*
|
1231
|
+
* Methods not handled by the parser are passed to the delegate. The methods
|
1232
|
+
* supported by delegate are:
|
1233
|
+
*
|
1234
|
+
* - *:validate*
|
1235
|
+
* - no options
|
1236
|
+
*
|
1237
|
+
* - *:saj*
|
1238
|
+
* - _cache_keys=_ sets the value of the _cache_keys_ flag.
|
1239
|
+
* - _cache_keys_ returns the value of the _cache_keys_ flag.
|
1240
|
+
* - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than that length are cached.
|
1241
|
+
* - _cache_strings_ returns the value of the _cache_strings_ integer value.
|
1242
|
+
* - _handler=_ sets the SAJ handler
|
1243
|
+
* - _handler_ returns the SAJ handler
|
1244
|
+
*
|
1245
|
+
* - *:usual*
|
1246
|
+
* - _cache_keys=_ sets the value of the _cache_keys_ flag.
|
1247
|
+
* - _cache_keys_ returns the value of the _cache_keys_ flag.
|
1248
|
+
* - _cache_strings=_ sets the value of the _cache_strings_ to an positive integer less than 35. Strings shorter than that length are cached.
|
1249
|
+
* - _cache_strings_ returns the value of the _cache_strings_ integer value.
|
1250
|
+
* - _capacity=_ sets the capacity of the parser. The parser grows automatically but can be updated directly with this call.
|
1251
|
+
* - _capacity_ returns the current capacity of the parser's internal stack.
|
1252
|
+
* - _create_id_ returns the value _create_id_ or _nil_ if there is no _create_id_.
|
1253
|
+
* - _create_id=_ sets the value _create_id_ or if _nil_ unsets it. Parsed JSON objects that include the specified element use the element value as the name of the class to create an object from instead of a Hash.
|
1254
|
+
* - _decimal=_ sets the approach to how decimals are parser. If _:auto_ then the decimals with significant digits are 16 or less are Floats and long ones are BigDecimal. _:ruby_ uses a call to Ruby to convert a string to a Float. _:float_ always generates a Float. _:bigdecimal_ always results in a BigDecimal.
|
1255
|
+
* - _decimal_ returns the value of the decimal conversion option which can be :auto (default), :ruby, :float, or :bigdecimal.
|
1256
|
+
* - _ignore_json_create_ returns the value of the _ignore_json_create_ flag.
|
1257
|
+
* - _ignore_json_create=_ sets the value of the _ignore_json_create_ flag. When set the class json_create method is ignored on parsing in favor of creating an instance and populating directly.
|
1258
|
+
* - _missing_class_ return the value of the _missing_class_ indicator.
|
1259
|
+
* - _missing_class=_ sets the value of the _missing_class_ flag. Valid values are _:auto_ which creates any missing classes on parse, :ignore which ignores and continues as a Hash (default), and :raise which raises an exception if the class is not found.
|
1260
|
+
* - _omit_null=_ sets the _omit_null_ flag. If true then null values in a map or object are omitted from the resulting Hash or Object.
|
1261
|
+
* - _omit_null_ returns the value of the _omit_null_ flag.
|
1262
|
+
* - _symbol_keys=_ sets the flag that indicates Hash keys should be parsed to Symbols versus Strings.
|
1263
|
+
* - _symbol_keys_ returns the value of the _symbol_keys_ flag.
|
1264
|
+
*/
|
1265
|
+
static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
|
1266
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1267
|
+
const char * key = NULL;
|
1268
|
+
volatile VALUE rkey = *argv;
|
1269
|
+
volatile VALUE rv = Qnil;
|
1270
|
+
|
1271
|
+
#if HAVE_RB_EXT_RACTOR_SAFE
|
1272
|
+
// This doesn't seem to do anything.
|
1273
|
+
rb_ext_ractor_safe(true);
|
1274
|
+
#endif
|
1275
|
+
switch (rb_type(rkey)) {
|
1276
|
+
case RUBY_T_SYMBOL:
|
1277
|
+
rkey = rb_sym2str(rkey);
|
1278
|
+
// fall through
|
1279
|
+
case RUBY_T_STRING: key = rb_string_value_ptr(&rkey); break;
|
1280
|
+
default: rb_raise(rb_eArgError, "option method must be a symbol or string");
|
1281
|
+
}
|
1282
|
+
if (1 < argc) {
|
1283
|
+
rv = argv[1];
|
1284
|
+
}
|
1285
|
+
return p->option(p, key, rv);
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
/* Document-method: parse(json)
|
1289
|
+
* call-seq: parse(json)
|
1290
|
+
*
|
1291
|
+
* Parse a JSON string.
|
1292
|
+
*
|
1293
|
+
* Returns the result according to the delegate of the parser.
|
1294
|
+
*/
|
1295
|
+
static VALUE parser_parse(VALUE self, VALUE json) {
|
1296
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1297
|
+
|
1298
|
+
Check_Type(json, T_STRING);
|
1299
|
+
parser_reset(p);
|
1300
|
+
p->start(p);
|
1301
|
+
parse(p, (const byte *)rb_string_value_ptr(&json));
|
1302
|
+
|
1303
|
+
return p->result(p);
|
1304
|
+
}
|
1305
|
+
|
1306
|
+
static VALUE load_rescue(VALUE self, VALUE x) {
|
1307
|
+
// Normal EOF. No action needed other than to stop loading.
|
1308
|
+
return Qfalse;
|
1309
|
+
}
|
1310
|
+
|
1311
|
+
static VALUE load(VALUE self) {
|
1312
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1313
|
+
volatile VALUE rbuf = rb_str_new2("");
|
1314
|
+
|
1315
|
+
p->start(p);
|
1316
|
+
while (true) {
|
1317
|
+
rb_funcall(p->reader, oj_readpartial_id, 2, INT2NUM(16385), rbuf);
|
1318
|
+
if (0 < RSTRING_LEN(rbuf)) {
|
1319
|
+
parse(p, (byte *)StringValuePtr(rbuf));
|
1320
|
+
}
|
1321
|
+
}
|
1322
|
+
return Qtrue;
|
1323
|
+
}
|
1324
|
+
|
1325
|
+
/* Document-method: load(reader)
|
1326
|
+
* call-seq: load(reader)
|
1327
|
+
*
|
1328
|
+
* Parse a JSON stream.
|
1329
|
+
*
|
1330
|
+
* Returns the result according to the delegate of the parser.
|
1331
|
+
*/
|
1332
|
+
static VALUE parser_load(VALUE self, VALUE reader) {
|
1333
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1334
|
+
|
1335
|
+
parser_reset(p);
|
1336
|
+
p->reader = reader;
|
1337
|
+
rb_rescue2(load, self, load_rescue, Qnil, rb_eEOFError, 0);
|
1338
|
+
|
1339
|
+
return p->result(p);
|
1340
|
+
}
|
1341
|
+
|
1342
|
+
/* Document-method: file(filename)
|
1343
|
+
* call-seq: file(filename)
|
1344
|
+
*
|
1345
|
+
* Parse a JSON file.
|
1346
|
+
*
|
1347
|
+
* Returns the result according to the delegate of the parser.
|
1348
|
+
*/
|
1349
|
+
static VALUE parser_file(VALUE self, VALUE filename) {
|
1350
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1351
|
+
const char *path;
|
1352
|
+
int fd;
|
1353
|
+
|
1354
|
+
Check_Type(filename, T_STRING);
|
1355
|
+
path = rb_string_value_ptr(&filename);
|
1356
|
+
|
1357
|
+
parser_reset(p);
|
1358
|
+
p->start(p);
|
1359
|
+
|
1360
|
+
if (0 > (fd = open(path, O_RDONLY))) {
|
1361
|
+
rb_raise(rb_eIOError, "error opening %s", path);
|
1362
|
+
}
|
1363
|
+
#if USE_THREAD_LIMIT
|
1364
|
+
struct stat info;
|
1365
|
+
// st_size will be 0 if not a file
|
1366
|
+
if (0 == fstat(fd, &info) && USE_THREAD_LIMIT < info.st_size) {
|
1367
|
+
// Use threaded version.
|
1368
|
+
// TBD only if has pthreads
|
1369
|
+
// TBD parse_large(p, fd);
|
1370
|
+
return p->result(p);
|
1371
|
+
}
|
1372
|
+
#endif
|
1373
|
+
byte buf[16385];
|
1374
|
+
size_t size = sizeof(buf) - 1;
|
1375
|
+
size_t rsize;
|
1376
|
+
|
1377
|
+
while (true) {
|
1378
|
+
if (0 < (rsize = read(fd, buf, size))) {
|
1379
|
+
buf[rsize] = '\0';
|
1380
|
+
parse(p, buf);
|
1381
|
+
}
|
1382
|
+
if (rsize <= 0) {
|
1383
|
+
if (0 != rsize) {
|
1384
|
+
rb_raise(rb_eIOError, "error reading from %s", path);
|
1385
|
+
}
|
1386
|
+
break;
|
1387
|
+
}
|
1388
|
+
}
|
1389
|
+
return p->result(p);
|
1390
|
+
}
|
1391
|
+
|
1392
|
+
/* Document-method: just_one
|
1393
|
+
* call-seq: just_one
|
1394
|
+
*
|
1395
|
+
* Returns the current state of the just_one [_Boolean_] option.
|
1396
|
+
*/
|
1397
|
+
static VALUE parser_just_one(VALUE self) {
|
1398
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1399
|
+
|
1400
|
+
return p->just_one ? Qtrue : Qfalse;
|
1401
|
+
}
|
1402
|
+
|
1403
|
+
/* Document-method: just_one=
|
1404
|
+
* call-seq: just_one=(value)
|
1405
|
+
*
|
1406
|
+
* Sets the *just_one* option which limits the parsing of a string or or
|
1407
|
+
* stream to a single JSON element.
|
1408
|
+
*
|
1409
|
+
* Returns the current state of the just_one [_Boolean_] option.
|
1410
|
+
*/
|
1411
|
+
static VALUE parser_just_one_set(VALUE self, VALUE v) {
|
1412
|
+
ojParser p = (ojParser)DATA_PTR(self);
|
1413
|
+
|
1414
|
+
p->just_one = (Qtrue == v);
|
1415
|
+
|
1416
|
+
return p->just_one ? Qtrue : Qfalse;
|
1417
|
+
}
|
1418
|
+
|
1419
|
+
static VALUE usual_parser = Qundef;
|
1420
|
+
|
1421
|
+
/* Document-method: usual
|
1422
|
+
* call-seq: usual
|
1423
|
+
*
|
1424
|
+
* Returns the default usual parser. Note the default usual parser can not be
|
1425
|
+
* used concurrently in more than one thread.
|
1426
|
+
*/
|
1427
|
+
static VALUE parser_usual(VALUE self) {
|
1428
|
+
if (Qundef == usual_parser) {
|
1429
|
+
ojParser p = ALLOC(struct _ojParser);
|
1430
|
+
|
1431
|
+
memset(p, 0, sizeof(struct _ojParser));
|
1432
|
+
buf_init(&p->key);
|
1433
|
+
buf_init(&p->buf);
|
1434
|
+
p->map = value_map;
|
1435
|
+
oj_set_parser_usual(p);
|
1436
|
+
usual_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
|
1437
|
+
rb_gc_register_address(&usual_parser);
|
1438
|
+
}
|
1439
|
+
return usual_parser;
|
1440
|
+
}
|
1441
|
+
|
1442
|
+
static VALUE saj_parser = Qundef;
|
1443
|
+
|
1444
|
+
/* Document-method: saj
|
1445
|
+
* call-seq: saj
|
1446
|
+
*
|
1447
|
+
* Returns the default saj parser. Note the default SAJ parser can not be used
|
1448
|
+
* concurrently in more than one thread.
|
1449
|
+
*/
|
1450
|
+
static VALUE parser_saj(VALUE self) {
|
1451
|
+
if (Qundef == saj_parser) {
|
1452
|
+
ojParser p = ALLOC(struct _ojParser);
|
1453
|
+
|
1454
|
+
memset(p, 0, sizeof(struct _ojParser));
|
1455
|
+
buf_init(&p->key);
|
1456
|
+
buf_init(&p->buf);
|
1457
|
+
p->map = value_map;
|
1458
|
+
oj_set_parser_saj(p);
|
1459
|
+
saj_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
|
1460
|
+
rb_gc_register_address(&saj_parser);
|
1461
|
+
}
|
1462
|
+
return saj_parser;
|
1463
|
+
}
|
1464
|
+
|
1465
|
+
static VALUE validate_parser = Qundef;
|
1466
|
+
|
1467
|
+
/* Document-method: validate
|
1468
|
+
* call-seq: validate
|
1469
|
+
*
|
1470
|
+
* Returns the default validate parser.
|
1471
|
+
*/
|
1472
|
+
static VALUE parser_validate(VALUE self) {
|
1473
|
+
if (Qundef == validate_parser) {
|
1474
|
+
ojParser p = ALLOC(struct _ojParser);
|
1475
|
+
|
1476
|
+
memset(p, 0, sizeof(struct _ojParser));
|
1477
|
+
buf_init(&p->key);
|
1478
|
+
buf_init(&p->buf);
|
1479
|
+
p->map = value_map;
|
1480
|
+
oj_set_parser_validator(p);
|
1481
|
+
validate_parser = Data_Wrap_Struct(parser_class, parser_mark, parser_free, p);
|
1482
|
+
rb_gc_register_address(&validate_parser);
|
1483
|
+
}
|
1484
|
+
return validate_parser;
|
1485
|
+
}
|
1486
|
+
|
1487
|
+
/* Document-class: Oj::Parser
|
1488
|
+
*
|
1489
|
+
* A reusable parser that makes use of named delegates to determine the
|
1490
|
+
* handling of parsed data. Delegates are available for validation, a callback
|
1491
|
+
* parser (SAJ), and a usual delegate that builds Ruby objects as parsing
|
1492
|
+
* proceeds.
|
1493
|
+
*
|
1494
|
+
* This parser is considerably faster than the older Oj.parse call and
|
1495
|
+
* isolates options to just the parser so that other parts of the code are not
|
1496
|
+
* forced to use the same options.
|
1497
|
+
*/
|
1498
|
+
void oj_parser_init() {
|
1499
|
+
parser_class = rb_define_class_under(Oj, "Parser", rb_cObject);
|
1500
|
+
rb_define_module_function(parser_class, "new", parser_new, 1);
|
1501
|
+
rb_define_method(parser_class, "parse", parser_parse, 1);
|
1502
|
+
rb_define_method(parser_class, "load", parser_load, 1);
|
1503
|
+
rb_define_method(parser_class, "file", parser_file, 1);
|
1504
|
+
rb_define_method(parser_class, "just_one", parser_just_one, 0);
|
1505
|
+
rb_define_method(parser_class, "just_one=", parser_just_one_set, 1);
|
1506
|
+
rb_define_method(parser_class, "method_missing", parser_missing, -1);
|
1507
|
+
|
1508
|
+
rb_define_module_function(parser_class, "usual", parser_usual, 0);
|
1509
|
+
rb_define_module_function(parser_class, "saj", parser_saj, 0);
|
1510
|
+
rb_define_module_function(parser_class, "validate", parser_validate, 0);
|
1511
|
+
}
|