oj 3.11.5 → 3.16.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1421 -0
  3. data/README.md +19 -5
  4. data/RELEASE_NOTES.md +61 -0
  5. data/ext/oj/buf.h +20 -6
  6. data/ext/oj/cache.c +329 -0
  7. data/ext/oj/cache.h +22 -0
  8. data/ext/oj/cache8.c +10 -9
  9. data/ext/oj/circarray.c +8 -6
  10. data/ext/oj/circarray.h +2 -2
  11. data/ext/oj/code.c +19 -33
  12. data/ext/oj/code.h +2 -2
  13. data/ext/oj/compat.c +27 -77
  14. data/ext/oj/custom.c +86 -179
  15. data/ext/oj/debug.c +126 -0
  16. data/ext/oj/dump.c +256 -249
  17. data/ext/oj/dump.h +26 -12
  18. data/ext/oj/dump_compat.c +565 -642
  19. data/ext/oj/dump_leaf.c +17 -63
  20. data/ext/oj/dump_object.c +65 -187
  21. data/ext/oj/dump_strict.c +27 -51
  22. data/ext/oj/encoder.c +43 -0
  23. data/ext/oj/err.c +2 -13
  24. data/ext/oj/err.h +24 -8
  25. data/ext/oj/extconf.rb +21 -6
  26. data/ext/oj/fast.c +149 -149
  27. data/ext/oj/intern.c +313 -0
  28. data/ext/oj/intern.h +22 -0
  29. data/ext/oj/mem.c +318 -0
  30. data/ext/oj/mem.h +53 -0
  31. data/ext/oj/mimic_json.c +121 -106
  32. data/ext/oj/object.c +85 -162
  33. data/ext/oj/odd.c +89 -67
  34. data/ext/oj/odd.h +15 -15
  35. data/ext/oj/oj.c +542 -411
  36. data/ext/oj/oj.h +99 -73
  37. data/ext/oj/parse.c +175 -187
  38. data/ext/oj/parse.h +26 -24
  39. data/ext/oj/parser.c +1600 -0
  40. data/ext/oj/parser.h +101 -0
  41. data/ext/oj/rails.c +112 -159
  42. data/ext/oj/rails.h +1 -1
  43. data/ext/oj/reader.c +11 -14
  44. data/ext/oj/reader.h +4 -2
  45. data/ext/oj/resolve.c +5 -24
  46. data/ext/oj/rxclass.c +7 -6
  47. data/ext/oj/rxclass.h +1 -1
  48. data/ext/oj/saj.c +22 -33
  49. data/ext/oj/saj2.c +584 -0
  50. data/ext/oj/saj2.h +23 -0
  51. data/ext/oj/scp.c +5 -28
  52. data/ext/oj/sparse.c +28 -72
  53. data/ext/oj/stream_writer.c +50 -40
  54. data/ext/oj/strict.c +56 -61
  55. data/ext/oj/string_writer.c +72 -39
  56. data/ext/oj/trace.h +31 -4
  57. data/ext/oj/usual.c +1218 -0
  58. data/ext/oj/usual.h +69 -0
  59. data/ext/oj/util.h +1 -1
  60. data/ext/oj/val_stack.c +14 -3
  61. data/ext/oj/val_stack.h +8 -7
  62. data/ext/oj/validate.c +46 -0
  63. data/ext/oj/wab.c +63 -88
  64. data/lib/oj/active_support_helper.rb +1 -3
  65. data/lib/oj/bag.rb +7 -1
  66. data/lib/oj/easy_hash.rb +4 -5
  67. data/lib/oj/error.rb +1 -2
  68. data/lib/oj/json.rb +162 -150
  69. data/lib/oj/mimic.rb +9 -7
  70. data/lib/oj/saj.rb +20 -6
  71. data/lib/oj/schandler.rb +5 -4
  72. data/lib/oj/state.rb +12 -8
  73. data/lib/oj/version.rb +1 -2
  74. data/lib/oj.rb +2 -0
  75. data/pages/Compatibility.md +1 -1
  76. data/pages/InstallOptions.md +20 -0
  77. data/pages/JsonGem.md +15 -0
  78. data/pages/Modes.md +8 -3
  79. data/pages/Options.md +43 -5
  80. data/pages/Parser.md +309 -0
  81. data/pages/Rails.md +14 -2
  82. data/test/_test_active.rb +8 -9
  83. data/test/_test_active_mimic.rb +7 -8
  84. data/test/_test_mimic_rails.rb +17 -20
  85. data/test/activerecord/result_test.rb +5 -6
  86. data/test/activesupport6/encoding_test.rb +63 -28
  87. data/test/{activesupport5 → activesupport7}/abstract_unit.rb +16 -12
  88. data/test/{activesupport5 → activesupport7}/decoding_test.rb +2 -10
  89. data/test/{activesupport5 → activesupport7}/encoding_test.rb +86 -50
  90. data/test/{activesupport5 → activesupport7}/encoding_test_cases.rb +6 -0
  91. data/test/{activesupport5 → activesupport7}/time_zone_test_helpers.rb +8 -0
  92. data/test/files.rb +15 -15
  93. data/test/foo.rb +16 -45
  94. data/test/helper.rb +11 -8
  95. data/test/isolated/shared.rb +3 -2
  96. data/test/json_gem/json_addition_test.rb +2 -2
  97. data/test/json_gem/json_common_interface_test.rb +8 -6
  98. data/test/json_gem/json_encoding_test.rb +0 -0
  99. data/test/json_gem/json_ext_parser_test.rb +1 -0
  100. data/test/json_gem/json_fixtures_test.rb +3 -2
  101. data/test/json_gem/json_generator_test.rb +56 -38
  102. data/test/json_gem/json_generic_object_test.rb +11 -11
  103. data/test/json_gem/json_parser_test.rb +54 -47
  104. data/test/json_gem/json_string_matching_test.rb +9 -9
  105. data/test/json_gem/test_helper.rb +7 -3
  106. data/test/mem.rb +34 -0
  107. data/test/perf.rb +22 -27
  108. data/test/perf_compat.rb +31 -33
  109. data/test/perf_dump.rb +50 -0
  110. data/test/perf_fast.rb +80 -82
  111. data/test/perf_file.rb +27 -29
  112. data/test/perf_object.rb +65 -69
  113. data/test/perf_once.rb +59 -0
  114. data/test/perf_parser.rb +183 -0
  115. data/test/perf_saj.rb +46 -54
  116. data/test/perf_scp.rb +58 -69
  117. data/test/perf_simple.rb +41 -39
  118. data/test/perf_strict.rb +74 -82
  119. data/test/perf_wab.rb +67 -69
  120. data/test/prec.rb +5 -5
  121. data/test/sample/change.rb +0 -1
  122. data/test/sample/dir.rb +0 -1
  123. data/test/sample/doc.rb +0 -1
  124. data/test/sample/file.rb +0 -1
  125. data/test/sample/group.rb +0 -1
  126. data/test/sample/hasprops.rb +0 -1
  127. data/test/sample/layer.rb +0 -1
  128. data/test/sample/rect.rb +0 -1
  129. data/test/sample/shape.rb +0 -1
  130. data/test/sample/text.rb +0 -1
  131. data/test/sample.rb +16 -16
  132. data/test/sample_json.rb +8 -8
  133. data/test/test_compat.rb +95 -43
  134. data/test/test_custom.rb +73 -51
  135. data/test/test_debian.rb +7 -10
  136. data/test/test_fast.rb +135 -79
  137. data/test/test_file.rb +41 -30
  138. data/test/test_gc.rb +16 -5
  139. data/test/test_generate.rb +5 -5
  140. data/test/test_hash.rb +5 -5
  141. data/test/test_integer_range.rb +9 -9
  142. data/test/test_null.rb +20 -20
  143. data/test/test_object.rb +99 -96
  144. data/test/test_parser.rb +11 -0
  145. data/test/test_parser_debug.rb +27 -0
  146. data/test/test_parser_saj.rb +337 -0
  147. data/test/test_parser_usual.rb +251 -0
  148. data/test/test_rails.rb +2 -2
  149. data/test/test_saj.rb +10 -8
  150. data/test/test_scp.rb +37 -39
  151. data/test/test_strict.rb +40 -32
  152. data/test/test_various.rb +165 -84
  153. data/test/test_wab.rb +48 -44
  154. data/test/test_writer.rb +47 -47
  155. data/test/tests.rb +13 -5
  156. data/test/tests_mimic.rb +12 -3
  157. data/test/tests_mimic_addition.rb +12 -3
  158. metadata +74 -128
  159. data/ext/oj/hash.c +0 -131
  160. data/ext/oj/hash.h +0 -19
  161. data/ext/oj/hash_test.c +0 -491
  162. data/test/activesupport4/decoding_test.rb +0 -108
  163. data/test/activesupport4/encoding_test.rb +0 -531
  164. data/test/activesupport4/test_helper.rb +0 -41
  165. data/test/activesupport5/test_helper.rb +0 -72
  166. data/test/bar.rb +0 -35
  167. data/test/baz.rb +0 -16
  168. data/test/zoo.rb +0 -13
data/ext/oj/parser.c ADDED
@@ -0,0 +1,1600 @@
1
+ // Copyright (c) 2020, 2021, Peter Ohler, All rights reserved.
2
+
3
+ #include "parser.h"
4
+
5
+ #include <fcntl.h>
6
+
7
+ #include "oj.h"
8
+
9
+ #define DEBUG 0
10
+
11
+ #define USE_THREAD_LIMIT 0
12
+ // #define USE_THREAD_LIMIT 100000
13
+ #define MAX_EXP 4932
14
+ // max in the pow_map which is the limit for double
15
+ #define MAX_POW 308
16
+
17
+ #define MIN_SLEEP (1000000000LL / (double)CLOCKS_PER_SEC)
18
+ // 9,223,372,036,854,775,807
19
+ #define BIG_LIMIT LLONG_MAX / 10
20
+ #define FRAC_LIMIT 10000000000000000ULL
21
+
22
+ // Give better performance with indented JSON but worse with unindented.
23
+ // #define SPACE_JUMP
24
+
25
+ enum {
26
+ SKIP_CHAR = 'a',
27
+ SKIP_NEWLINE = 'b',
28
+ VAL_NULL = 'c',
29
+ VAL_TRUE = 'd',
30
+ VAL_FALSE = 'e',
31
+ VAL_NEG = 'f',
32
+ VAL0 = 'g',
33
+ VAL_DIGIT = 'h',
34
+ VAL_QUOTE = 'i',
35
+ OPEN_ARRAY = 'k',
36
+ OPEN_OBJECT = 'l',
37
+ CLOSE_ARRAY = 'm',
38
+ CLOSE_OBJECT = 'n',
39
+ AFTER_COMMA = 'o',
40
+ KEY_QUOTE = 'p',
41
+ COLON_COLON = 'q',
42
+ NUM_SPC = 'r',
43
+ NUM_NEWLINE = 's',
44
+ NUM_DOT = 't',
45
+ NUM_COMMA = 'u',
46
+ NUM_FRAC = 'v',
47
+ FRAC_E = 'w',
48
+ EXP_SIGN = 'x',
49
+ EXP_DIGIT = 'y',
50
+ STR_QUOTE = 'z',
51
+ NEG_DIGIT = '-',
52
+ STR_SLASH = 'A',
53
+ ESC_OK = 'B',
54
+ BIG_DIGIT = 'C',
55
+ BIG_DOT = 'D',
56
+ U_OK = 'E',
57
+ TOKEN_OK = 'F',
58
+ NUM_CLOSE_OBJECT = 'G',
59
+ NUM_CLOSE_ARRAY = 'H',
60
+ BIG_FRAC = 'I',
61
+ BIG_E = 'J',
62
+ BIG_EXP_SIGN = 'K',
63
+ BIG_EXP = 'L',
64
+ UTF1 = 'M', // expect 1 more follow byte
65
+ NUM_DIGIT = 'N',
66
+ NUM_ZERO = 'O',
67
+ UTF2 = 'P', // expect 2 more follow byte
68
+ UTF3 = 'Q', // expect 3 more follow byte
69
+ STR_OK = 'R',
70
+ UTFX = 'S', // following bytes
71
+ ESC_U = 'U',
72
+ CHAR_ERR = '.',
73
+ DONE = 'X',
74
+ };
75
+
76
+ /*
77
+ 0123456789abcdef0123456789abcdef */
78
+ static const char value_map[257] = "\
79
+ X........ab..a..................\
80
+ a.i..........f..ghhhhhhhhh......\
81
+ ...........................k.m..\
82
+ ......e.......c.....d......l.n..\
83
+ ................................\
84
+ ................................\
85
+ ................................\
86
+ ................................v";
87
+
88
+ static const char null_map[257] = "\
89
+ ................................\
90
+ ............o...................\
91
+ ................................\
92
+ ............F........F..........\
93
+ ................................\
94
+ ................................\
95
+ ................................\
96
+ ................................N";
97
+
98
+ static const char true_map[257] = "\
99
+ ................................\
100
+ ............o...................\
101
+ ................................\
102
+ .....F............F..F..........\
103
+ ................................\
104
+ ................................\
105
+ ................................\
106
+ ................................T";
107
+
108
+ static const char false_map[257] = "\
109
+ ................................\
110
+ ............o...................\
111
+ ................................\
112
+ .F...F......F......F............\
113
+ ................................\
114
+ ................................\
115
+ ................................\
116
+ ................................F";
117
+
118
+ static const char comma_map[257] = "\
119
+ .........ab..a..................\
120
+ a.i..........f..ghhhhhhhhh......\
121
+ ...........................k....\
122
+ ......e.......c.....d......l....\
123
+ ................................\
124
+ ................................\
125
+ ................................\
126
+ ................................,";
127
+
128
+ static const char after_map[257] = "\
129
+ X........ab..a..................\
130
+ a...........o...................\
131
+ .............................m..\
132
+ .............................n..\
133
+ ................................\
134
+ ................................\
135
+ ................................\
136
+ ................................a";
137
+
138
+ static const char key1_map[257] = "\
139
+ .........ab..a..................\
140
+ a.p.............................\
141
+ ................................\
142
+ .............................n..\
143
+ ................................\
144
+ ................................\
145
+ ................................\
146
+ ................................K";
147
+
148
+ static const char key_map[257] = "\
149
+ .........ab..a..................\
150
+ a.p.............................\
151
+ ................................\
152
+ ................................\
153
+ ................................\
154
+ ................................\
155
+ ................................\
156
+ ................................k";
157
+
158
+ static const char colon_map[257] = "\
159
+ .........ab..a..................\
160
+ a.........................q.....\
161
+ ................................\
162
+ ................................\
163
+ ................................\
164
+ ................................\
165
+ ................................\
166
+ ................................:";
167
+
168
+ static const char neg_map[257] = "\
169
+ ................................\
170
+ ................O---------......\
171
+ ................................\
172
+ ................................\
173
+ ................................\
174
+ ................................\
175
+ ................................\
176
+ ................................-";
177
+
178
+ static const char zero_map[257] = "\
179
+ .........rs..r..................\
180
+ r...........u.t.................\
181
+ .............................H..\
182
+ .............................G..\
183
+ ................................\
184
+ ................................\
185
+ ................................\
186
+ ................................0";
187
+
188
+ static const char digit_map[257] = "\
189
+ .........rs..r..................\
190
+ r...........u.t.NNNNNNNNNN......\
191
+ .....w.......................H..\
192
+ .....w.......................G..\
193
+ ................................\
194
+ ................................\
195
+ ................................\
196
+ ................................d";
197
+
198
+ static const char dot_map[257] = "\
199
+ ................................\
200
+ ................vvvvvvvvvv......\
201
+ ................................\
202
+ ................................\
203
+ ................................\
204
+ ................................\
205
+ ................................\
206
+ .................................";
207
+
208
+ static const char frac_map[257] = "\
209
+ .........rs..r..................\
210
+ r...........u...vvvvvvvvvv......\
211
+ .....w.......................H..\
212
+ .....w.......................G..\
213
+ ................................\
214
+ ................................\
215
+ ................................\
216
+ ................................f";
217
+
218
+ static const char exp_sign_map[257] = "\
219
+ ................................\
220
+ ...........x.x..yyyyyyyyyy......\
221
+ ................................\
222
+ ................................\
223
+ ................................\
224
+ ................................\
225
+ ................................\
226
+ ................................x";
227
+
228
+ static const char exp_zero_map[257] = "\
229
+ ................................\
230
+ ................yyyyyyyyyy......\
231
+ ................................\
232
+ ................................\
233
+ ................................\
234
+ ................................\
235
+ ................................\
236
+ ................................z";
237
+
238
+ static const char exp_map[257] = "\
239
+ .........rs..r..................\
240
+ r...........u...yyyyyyyyyy......\
241
+ .............................H..\
242
+ .............................G..\
243
+ ................................\
244
+ ................................\
245
+ ................................\
246
+ ................................X";
247
+
248
+ static const char big_digit_map[257] = "\
249
+ .........rs..r..................\
250
+ r...........u.D.CCCCCCCCCC......\
251
+ .....J.......................H..\
252
+ .....J.......................G..\
253
+ ................................\
254
+ ................................\
255
+ ................................\
256
+ ................................D";
257
+
258
+ static const char big_dot_map[257] = "\
259
+ ................................\
260
+ ................IIIIIIIIII......\
261
+ ................................\
262
+ ................................\
263
+ ................................\
264
+ ................................\
265
+ ................................\
266
+ ................................o";
267
+
268
+ static const char big_frac_map[257] = "\
269
+ .........rs..r..................\
270
+ r...........u...IIIIIIIIII......\
271
+ .....J.......................H..\
272
+ .....J.......................G..\
273
+ ................................\
274
+ ................................\
275
+ ................................\
276
+ ................................g";
277
+
278
+ static const char big_exp_sign_map[257] = "\
279
+ ................................\
280
+ ...........K.K..LLLLLLLLLL......\
281
+ ................................\
282
+ ................................\
283
+ ................................\
284
+ ................................\
285
+ ................................\
286
+ ................................B";
287
+
288
+ static const char big_exp_zero_map[257] = "\
289
+ ................................\
290
+ ................LLLLLLLLLL......\
291
+ ................................\
292
+ ................................\
293
+ ................................\
294
+ ................................\
295
+ ................................\
296
+ ................................Z";
297
+
298
+ static const char big_exp_map[257] = "\
299
+ .........rs..r..................\
300
+ r...........u...LLLLLLLLLL......\
301
+ .............................H..\
302
+ .............................G..\
303
+ ................................\
304
+ ................................\
305
+ ................................\
306
+ ................................Y";
307
+
308
+ static const char string_map[257] = "\
309
+ ................................\
310
+ RRzRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
311
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRARRR\
312
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
313
+ ................................\
314
+ ................................\
315
+ MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\
316
+ PPPPPPPPPPPPPPPPQQQQQQQQ........s";
317
+
318
+ static const char esc_map[257] = "\
319
+ ................................\
320
+ ..B............B................\
321
+ ............................B...\
322
+ ..B...B.......B...B.BU..........\
323
+ ................................\
324
+ ................................\
325
+ ................................\
326
+ ................................~";
327
+
328
+ static const char esc_byte_map[257] = "\
329
+ ................................\
330
+ ..\"............/................\
331
+ ............................\\...\
332
+ ..\b...\f.......\n...\r.\t..........\
333
+ ................................\
334
+ ................................\
335
+ ................................\
336
+ ................................b";
337
+
338
+ static const char u_map[257] = "\
339
+ ................................\
340
+ ................EEEEEEEEEE......\
341
+ .EEEEEE.........................\
342
+ .EEEEEE.........................\
343
+ ................................\
344
+ ................................\
345
+ ................................\
346
+ ................................u";
347
+
348
+ static const char utf_map[257] = "\
349
+ ................................\
350
+ ................................\
351
+ ................................\
352
+ ................................\
353
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
354
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
355
+ ................................\
356
+ ................................8";
357
+
358
+ static const char space_map[257] = "\
359
+ .........ab..a..................\
360
+ a...............................\
361
+ ................................\
362
+ ................................\
363
+ ................................\
364
+ ................................\
365
+ ................................\
366
+ ................................S";
367
+
368
+ static const char trail_map[257] = "\
369
+ .........ab..a..................\
370
+ a...............................\
371
+ ................................\
372
+ ................................\
373
+ ................................\
374
+ ................................\
375
+ ................................\
376
+ ................................R";
377
+
378
+ static const byte hex_map[256] = "\
379
+ ................................\
380
+ ................\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09......\
381
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
382
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
383
+ ................................\
384
+ ................................\
385
+ ................................\
386
+ ................................";
387
+
388
+ static long double pow_map[309] = {
389
+ 1.0L, 1.0e1L, 1.0e2L, 1.0e3L, 1.0e4L, 1.0e5L, 1.0e6L, 1.0e7L, 1.0e8L, 1.0e9L, 1.0e10L,
390
+ 1.0e11L, 1.0e12L, 1.0e13L, 1.0e14L, 1.0e15L, 1.0e16L, 1.0e17L, 1.0e18L, 1.0e19L, 1.0e20L, 1.0e21L,
391
+ 1.0e22L, 1.0e23L, 1.0e24L, 1.0e25L, 1.0e26L, 1.0e27L, 1.0e28L, 1.0e29L, 1.0e30L, 1.0e31L, 1.0e32L,
392
+ 1.0e33L, 1.0e34L, 1.0e35L, 1.0e36L, 1.0e37L, 1.0e38L, 1.0e39L, 1.0e40L, 1.0e41L, 1.0e42L, 1.0e43L,
393
+ 1.0e44L, 1.0e45L, 1.0e46L, 1.0e47L, 1.0e48L, 1.0e49L, 1.0e50L, 1.0e51L, 1.0e52L, 1.0e53L, 1.0e54L,
394
+ 1.0e55L, 1.0e56L, 1.0e57L, 1.0e58L, 1.0e59L, 1.0e60L, 1.0e61L, 1.0e62L, 1.0e63L, 1.0e64L, 1.0e65L,
395
+ 1.0e66L, 1.0e67L, 1.0e68L, 1.0e69L, 1.0e70L, 1.0e71L, 1.0e72L, 1.0e73L, 1.0e74L, 1.0e75L, 1.0e76L,
396
+ 1.0e77L, 1.0e78L, 1.0e79L, 1.0e80L, 1.0e81L, 1.0e82L, 1.0e83L, 1.0e84L, 1.0e85L, 1.0e86L, 1.0e87L,
397
+ 1.0e88L, 1.0e89L, 1.0e90L, 1.0e91L, 1.0e92L, 1.0e93L, 1.0e94L, 1.0e95L, 1.0e96L, 1.0e97L, 1.0e98L,
398
+ 1.0e99L, 1.0e100L, 1.0e101L, 1.0e102L, 1.0e103L, 1.0e104L, 1.0e105L, 1.0e106L, 1.0e107L, 1.0e108L, 1.0e109L,
399
+ 1.0e110L, 1.0e111L, 1.0e112L, 1.0e113L, 1.0e114L, 1.0e115L, 1.0e116L, 1.0e117L, 1.0e118L, 1.0e119L, 1.0e120L,
400
+ 1.0e121L, 1.0e122L, 1.0e123L, 1.0e124L, 1.0e125L, 1.0e126L, 1.0e127L, 1.0e128L, 1.0e129L, 1.0e130L, 1.0e131L,
401
+ 1.0e132L, 1.0e133L, 1.0e134L, 1.0e135L, 1.0e136L, 1.0e137L, 1.0e138L, 1.0e139L, 1.0e140L, 1.0e141L, 1.0e142L,
402
+ 1.0e143L, 1.0e144L, 1.0e145L, 1.0e146L, 1.0e147L, 1.0e148L, 1.0e149L, 1.0e150L, 1.0e151L, 1.0e152L, 1.0e153L,
403
+ 1.0e154L, 1.0e155L, 1.0e156L, 1.0e157L, 1.0e158L, 1.0e159L, 1.0e160L, 1.0e161L, 1.0e162L, 1.0e163L, 1.0e164L,
404
+ 1.0e165L, 1.0e166L, 1.0e167L, 1.0e168L, 1.0e169L, 1.0e170L, 1.0e171L, 1.0e172L, 1.0e173L, 1.0e174L, 1.0e175L,
405
+ 1.0e176L, 1.0e177L, 1.0e178L, 1.0e179L, 1.0e180L, 1.0e181L, 1.0e182L, 1.0e183L, 1.0e184L, 1.0e185L, 1.0e186L,
406
+ 1.0e187L, 1.0e188L, 1.0e189L, 1.0e190L, 1.0e191L, 1.0e192L, 1.0e193L, 1.0e194L, 1.0e195L, 1.0e196L, 1.0e197L,
407
+ 1.0e198L, 1.0e199L, 1.0e200L, 1.0e201L, 1.0e202L, 1.0e203L, 1.0e204L, 1.0e205L, 1.0e206L, 1.0e207L, 1.0e208L,
408
+ 1.0e209L, 1.0e210L, 1.0e211L, 1.0e212L, 1.0e213L, 1.0e214L, 1.0e215L, 1.0e216L, 1.0e217L, 1.0e218L, 1.0e219L,
409
+ 1.0e220L, 1.0e221L, 1.0e222L, 1.0e223L, 1.0e224L, 1.0e225L, 1.0e226L, 1.0e227L, 1.0e228L, 1.0e229L, 1.0e230L,
410
+ 1.0e231L, 1.0e232L, 1.0e233L, 1.0e234L, 1.0e235L, 1.0e236L, 1.0e237L, 1.0e238L, 1.0e239L, 1.0e240L, 1.0e241L,
411
+ 1.0e242L, 1.0e243L, 1.0e244L, 1.0e245L, 1.0e246L, 1.0e247L, 1.0e248L, 1.0e249L, 1.0e250L, 1.0e251L, 1.0e252L,
412
+ 1.0e253L, 1.0e254L, 1.0e255L, 1.0e256L, 1.0e257L, 1.0e258L, 1.0e259L, 1.0e260L, 1.0e261L, 1.0e262L, 1.0e263L,
413
+ 1.0e264L, 1.0e265L, 1.0e266L, 1.0e267L, 1.0e268L, 1.0e269L, 1.0e270L, 1.0e271L, 1.0e272L, 1.0e273L, 1.0e274L,
414
+ 1.0e275L, 1.0e276L, 1.0e277L, 1.0e278L, 1.0e279L, 1.0e280L, 1.0e281L, 1.0e282L, 1.0e283L, 1.0e284L, 1.0e285L,
415
+ 1.0e286L, 1.0e287L, 1.0e288L, 1.0e289L, 1.0e290L, 1.0e291L, 1.0e292L, 1.0e293L, 1.0e294L, 1.0e295L, 1.0e296L,
416
+ 1.0e297L, 1.0e298L, 1.0e299L, 1.0e300L, 1.0e301L, 1.0e302L, 1.0e303L, 1.0e304L, 1.0e305L, 1.0e306L, 1.0e307L,
417
+ 1.0e308L};
418
+
419
+ static VALUE parser_class;
420
+
421
+ // Works with extended unicode as well. \Uffffffff if support is desired in
422
+ // the future.
423
+ static size_t unicodeToUtf8(uint32_t code, byte *buf) {
424
+ byte *start = buf;
425
+
426
+ if (0x0000007F >= code) {
427
+ *buf++ = (byte)code;
428
+ } else if (0x000007FF >= code) {
429
+ *buf++ = 0xC0 | (code >> 6);
430
+ *buf++ = 0x80 | (0x3F & code);
431
+ } else if (0x0000FFFF >= code) {
432
+ *buf++ = 0xE0 | (code >> 12);
433
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
434
+ *buf++ = 0x80 | (0x3F & code);
435
+ } else if (0x001FFFFF >= code) {
436
+ *buf++ = 0xF0 | (code >> 18);
437
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
438
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
439
+ *buf++ = 0x80 | (0x3F & code);
440
+ } else if (0x03FFFFFF >= code) {
441
+ *buf++ = 0xF8 | (code >> 24);
442
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
443
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
444
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
445
+ *buf++ = 0x80 | (0x3F & code);
446
+ } else if (0x7FFFFFFF >= code) {
447
+ *buf++ = 0xFC | (code >> 30);
448
+ *buf++ = 0x80 | ((code >> 24) & 0x3F);
449
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
450
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
451
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
452
+ *buf++ = 0x80 | (0x3F & code);
453
+ }
454
+ return buf - start;
455
+ }
456
+
457
+ static void parser_reset(ojParser p) {
458
+ p->reader = 0;
459
+ memset(&p->num, 0, sizeof(p->num));
460
+ buf_reset(&p->key);
461
+ buf_reset(&p->buf);
462
+ p->map = value_map;
463
+ p->next_map = NULL;
464
+ p->depth = 0;
465
+ }
466
+
467
+ static void parse_error(ojParser p, const char *fmt, ...) {
468
+ va_list ap;
469
+ char buf[256];
470
+
471
+ va_start(ap, fmt);
472
+ vsnprintf(buf, sizeof(buf), fmt, ap);
473
+ va_end(ap);
474
+ rb_raise(oj_json_parser_error_class, "%s at %ld:%ld", buf, p->line, p->col);
475
+ }
476
+
477
+ static void byte_error(ojParser p, byte b) {
478
+ switch (p->map[256]) {
479
+ case 'N': // null_map
480
+ parse_error(p, "expected null");
481
+ break;
482
+ case 'T': // true_map
483
+ parse_error(p, "expected true");
484
+ break;
485
+ case 'F': // false_map
486
+ parse_error(p, "expected false");
487
+ break;
488
+ case 's': // string_map
489
+ parse_error(p, "invalid JSON character 0x%02x", b);
490
+ break;
491
+ default: parse_error(p, "unexpected character '%c' in '%c' mode", b, p->map[256]); break;
492
+ }
493
+ }
494
+
495
+ static void calc_num(ojParser p) {
496
+ switch (p->type) {
497
+ case OJ_INT:
498
+ if (p->num.neg) {
499
+ p->num.fixnum = -p->num.fixnum;
500
+ p->num.neg = false;
501
+ }
502
+ p->funcs[p->stack[p->depth]].add_int(p);
503
+ break;
504
+ case OJ_DECIMAL: {
505
+ long double d = (long double)p->num.fixnum;
506
+
507
+ if (p->num.neg) {
508
+ d = -d;
509
+ }
510
+ if (0 < p->num.shift) {
511
+ d /= pow_map[p->num.shift];
512
+ }
513
+ if (0 < p->num.exp) {
514
+ long double x;
515
+
516
+ if (MAX_POW < p->num.exp) {
517
+ x = powl(10.0L, (long double)p->num.exp);
518
+ } else {
519
+ x = pow_map[p->num.exp];
520
+ }
521
+ if (p->num.exp_neg) {
522
+ d /= x;
523
+ } else {
524
+ d *= x;
525
+ }
526
+ }
527
+ p->num.dub = d;
528
+ p->funcs[p->stack[p->depth]].add_float(p);
529
+ break;
530
+ }
531
+ case OJ_BIG: p->funcs[p->stack[p->depth]].add_big(p);
532
+ default:
533
+ // nothing to do
534
+ break;
535
+ }
536
+ p->type = OJ_NONE;
537
+ }
538
+
539
+ static void big_change(ojParser p) {
540
+ char buf[32];
541
+ int64_t i = p->num.fixnum;
542
+ int len = 0;
543
+
544
+ buf[sizeof(buf) - 1] = '\0';
545
+ p->buf.tail = p->buf.head;
546
+ switch (p->type) {
547
+ case OJ_INT:
548
+ // If an int then it will fit in the num.raw so no need to check length;
549
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10) {
550
+ buf[len] = '0' + (i % 10);
551
+ }
552
+ if (p->num.neg) {
553
+ buf[len] = '-';
554
+ len--;
555
+ }
556
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
557
+ p->type = OJ_BIG;
558
+ break;
559
+ case OJ_DECIMAL: {
560
+ int shift = p->num.shift;
561
+
562
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10, shift--) {
563
+ if (0 == shift) {
564
+ buf[len] = '.';
565
+ len--;
566
+ }
567
+ buf[len] = '0' + (i % 10);
568
+ }
569
+ if (p->num.neg) {
570
+ buf[len] = '-';
571
+ len--;
572
+ }
573
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
574
+ if (0 < p->num.exp) {
575
+ int x = p->num.exp;
576
+ int d, div;
577
+ bool started = false;
578
+
579
+ buf_append(&p->buf, 'e');
580
+ if (0 < p->num.exp_neg) {
581
+ buf_append(&p->buf, '-');
582
+ }
583
+ for (div = 1000; 0 < div; div /= 10) {
584
+ d = x / div % 10;
585
+ if (started || 0 < d) {
586
+ buf_append(&p->buf, '0' + d);
587
+ }
588
+ }
589
+ }
590
+ p->type = OJ_BIG;
591
+ break;
592
+ }
593
+ default: break;
594
+ }
595
+ }
596
+
597
+ static void parse(ojParser p, const byte *json) {
598
+ const byte *start;
599
+ const byte *b = json;
600
+ int i;
601
+
602
+ p->line = 1;
603
+ p->col = -1;
604
+ #if DEBUG
605
+ printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
606
+ #endif
607
+ for (; '\0' != *b; b++) {
608
+ #if DEBUG
609
+ printf("*** parse - mode: %c %02x %s => %c\n", p->map[256], *b, b, p->map[*b]);
610
+ #endif
611
+ switch (p->map[*b]) {
612
+ case SKIP_NEWLINE:
613
+ p->line++;
614
+ p->col = b - json;
615
+ b++;
616
+ #ifdef SPACE_JUMP
617
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
618
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
619
+ b += 2;
620
+ }
621
+ #endif
622
+ for (; SKIP_CHAR == space_map[*b]; b++) {
623
+ }
624
+ b--;
625
+ break;
626
+ case COLON_COLON: p->map = value_map; break;
627
+ case SKIP_CHAR: break;
628
+ case KEY_QUOTE:
629
+ b++;
630
+ p->key.tail = p->key.head;
631
+ start = b;
632
+ for (; STR_OK == string_map[*b]; b++) {
633
+ }
634
+ buf_append_string(&p->key, (const char *)start, b - start);
635
+ if ('"' == *b) {
636
+ p->map = colon_map;
637
+ break;
638
+ }
639
+ b--;
640
+ p->map = string_map;
641
+ p->next_map = colon_map;
642
+ break;
643
+ case AFTER_COMMA:
644
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
645
+ p->map = key_map;
646
+ } else {
647
+ p->map = comma_map;
648
+ }
649
+ break;
650
+ case VAL_QUOTE:
651
+ b++;
652
+ start = b;
653
+ p->buf.tail = p->buf.head;
654
+ for (; STR_OK == string_map[*b]; b++) {
655
+ }
656
+ buf_append_string(&p->buf, (const char *)start, b - start);
657
+ if ('"' == *b) {
658
+ p->cur = b - json;
659
+ p->funcs[p->stack[p->depth]].add_str(p);
660
+ p->map = (0 == p->depth) ? value_map : after_map;
661
+ break;
662
+ }
663
+ b--;
664
+ p->map = string_map;
665
+ p->next_map = (0 == p->depth) ? value_map : after_map;
666
+ break;
667
+ case OPEN_OBJECT:
668
+ p->cur = b - json;
669
+ p->funcs[p->stack[p->depth]].open_object(p);
670
+ p->depth++;
671
+ p->stack[p->depth] = OBJECT_FUN;
672
+ p->map = key1_map;
673
+ break;
674
+ case NUM_CLOSE_OBJECT:
675
+ p->cur = b - json;
676
+ calc_num(p);
677
+ // flow through
678
+ case CLOSE_OBJECT:
679
+ p->map = (1 == p->depth) ? value_map : after_map;
680
+ if (p->depth <= 0 || OBJECT_FUN != p->stack[p->depth]) {
681
+ p->col = b - json - p->col + 1;
682
+ parse_error(p, "unexpected object close");
683
+ return;
684
+ }
685
+ p->depth--;
686
+ p->cur = b - json;
687
+ p->funcs[p->stack[p->depth]].close_object(p);
688
+ break;
689
+ case OPEN_ARRAY:
690
+ p->cur = b - json;
691
+ p->funcs[p->stack[p->depth]].open_array(p);
692
+ p->depth++;
693
+ p->stack[p->depth] = ARRAY_FUN;
694
+ p->map = value_map;
695
+ break;
696
+ case NUM_CLOSE_ARRAY:
697
+ p->cur = b - json;
698
+ calc_num(p);
699
+ // flow through
700
+ case CLOSE_ARRAY:
701
+ p->map = (1 == p->depth) ? value_map : after_map;
702
+ if (p->depth <= 0 || ARRAY_FUN != p->stack[p->depth]) {
703
+ p->col = b - json - p->col + 1;
704
+ parse_error(p, "unexpected array close");
705
+ return;
706
+ }
707
+ p->depth--;
708
+ p->cur = b - json;
709
+ p->funcs[p->stack[p->depth]].close_array(p);
710
+ break;
711
+ case NUM_COMMA:
712
+ p->cur = b - json;
713
+ calc_num(p);
714
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
715
+ p->map = key_map;
716
+ } else {
717
+ p->map = comma_map;
718
+ }
719
+ break;
720
+ case VAL0:
721
+ p->type = OJ_INT;
722
+ p->num.fixnum = 0;
723
+ p->num.neg = false;
724
+ p->num.shift = 0;
725
+ p->num.len = 0;
726
+ p->num.exp = 0;
727
+ p->num.exp_neg = false;
728
+ p->map = zero_map;
729
+ break;
730
+ case VAL_NEG:
731
+ p->type = OJ_INT;
732
+ p->num.fixnum = 0;
733
+ p->num.neg = true;
734
+ p->num.shift = 0;
735
+ p->num.len = 0;
736
+ p->num.exp = 0;
737
+ p->num.exp_neg = false;
738
+ p->map = neg_map;
739
+ break;
740
+ ;
741
+ case VAL_DIGIT:
742
+ p->type = OJ_INT;
743
+ p->num.fixnum = 0;
744
+ p->num.neg = false;
745
+ p->num.shift = 0;
746
+ p->num.exp = 0;
747
+ p->num.exp_neg = false;
748
+ p->num.len = 0;
749
+ p->map = digit_map;
750
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
751
+ uint64_t x = (uint64_t)p->num.fixnum * 10 + (uint64_t)(*b - '0');
752
+
753
+ // Tried just checking for an int less than zero but that
754
+ // fails when optimization is on for some reason with the
755
+ // clang compiler so us a bit mask instead.
756
+ if (x < BIG_LIMIT) {
757
+ p->num.fixnum = (int64_t)x;
758
+ } else {
759
+ big_change(p);
760
+ p->map = big_digit_map;
761
+ break;
762
+ }
763
+ }
764
+ b--;
765
+ break;
766
+ case NUM_DIGIT:
767
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
768
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
769
+
770
+ if (x < BIG_LIMIT) {
771
+ p->num.fixnum = (int64_t)x;
772
+ } else {
773
+ big_change(p);
774
+ p->map = big_digit_map;
775
+ break;
776
+ }
777
+ }
778
+ b--;
779
+ break;
780
+ case NUM_DOT:
781
+ p->type = OJ_DECIMAL;
782
+ p->map = dot_map;
783
+ break;
784
+ case NUM_FRAC:
785
+ p->map = frac_map;
786
+ for (; NUM_FRAC == frac_map[*b]; b++) {
787
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
788
+
789
+ if (x < FRAC_LIMIT) {
790
+ p->num.fixnum = (int64_t)x;
791
+ p->num.shift++;
792
+ } else {
793
+ big_change(p);
794
+ p->map = big_frac_map;
795
+ break;
796
+ }
797
+ }
798
+ b--;
799
+ break;
800
+ case FRAC_E:
801
+ p->type = OJ_DECIMAL;
802
+ p->map = exp_sign_map;
803
+ break;
804
+ case NUM_ZERO: p->map = zero_map; break;
805
+ case NEG_DIGIT:
806
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
807
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
808
+
809
+ if (x < BIG_LIMIT) {
810
+ p->num.fixnum = (int64_t)x;
811
+ } else {
812
+ big_change(p);
813
+ p->map = big_digit_map;
814
+ break;
815
+ }
816
+ }
817
+ b--;
818
+ p->map = digit_map;
819
+ break;
820
+ case EXP_SIGN:
821
+ p->num.exp_neg = ('-' == *b);
822
+ p->map = exp_zero_map;
823
+ break;
824
+ case EXP_DIGIT:
825
+ p->map = exp_map;
826
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
827
+ int16_t x = p->num.exp * 10 + (int16_t)(*b - '0');
828
+
829
+ if (x <= MAX_EXP) {
830
+ p->num.exp = x;
831
+ } else {
832
+ big_change(p);
833
+ p->map = big_exp_map;
834
+ break;
835
+ }
836
+ }
837
+ b--;
838
+ break;
839
+ case BIG_DIGIT:
840
+ start = b;
841
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
842
+ }
843
+ buf_append_string(&p->buf, (const char *)start, b - start);
844
+ b--;
845
+ break;
846
+ case BIG_DOT:
847
+ buf_append(&p->buf, '.');
848
+ p->map = big_dot_map;
849
+ break;
850
+ case BIG_FRAC:
851
+ p->map = big_frac_map;
852
+ start = b;
853
+ for (; NUM_FRAC == frac_map[*b]; b++) {
854
+ }
855
+ buf_append_string(&p->buf, (const char *)start, b - start);
856
+ b--;
857
+ break;
858
+ case BIG_E:
859
+ buf_append(&p->buf, *b);
860
+ p->map = big_exp_sign_map;
861
+ break;
862
+ case BIG_EXP_SIGN:
863
+ buf_append(&p->buf, *b);
864
+ p->map = big_exp_zero_map;
865
+ break;
866
+ case BIG_EXP:
867
+ start = b;
868
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
869
+ }
870
+ buf_append_string(&p->buf, (const char *)start, b - start);
871
+ b--;
872
+ p->map = big_exp_map;
873
+ break;
874
+ case NUM_SPC:
875
+ p->cur = b - json;
876
+ calc_num(p);
877
+ break;
878
+ case NUM_NEWLINE:
879
+ p->cur = b - json;
880
+ calc_num(p);
881
+ b++;
882
+ #ifdef SPACE_JUMP
883
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
884
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
885
+ b += 2;
886
+ }
887
+ #endif
888
+ for (; SKIP_CHAR == space_map[*b]; b++) {
889
+ }
890
+ b--;
891
+ break;
892
+ case STR_OK:
893
+ start = b;
894
+ for (; STR_OK == string_map[*b]; b++) {
895
+ }
896
+ if (':' == p->next_map[256]) {
897
+ buf_append_string(&p->key, (const char *)start, b - start);
898
+ } else {
899
+ buf_append_string(&p->buf, (const char *)start, b - start);
900
+ }
901
+ if ('"' == *b) {
902
+ p->cur = b - json;
903
+ p->funcs[p->stack[p->depth]].add_str(p);
904
+ p->map = p->next_map;
905
+ break;
906
+ }
907
+ b--;
908
+ break;
909
+ case STR_SLASH: p->map = esc_map; break;
910
+ case STR_QUOTE:
911
+ p->cur = b - json;
912
+ p->funcs[p->stack[p->depth]].add_str(p);
913
+ p->map = p->next_map;
914
+ break;
915
+ case ESC_U:
916
+ p->map = u_map;
917
+ p->ri = 0;
918
+ p->ucode = 0;
919
+ break;
920
+ case U_OK:
921
+ p->ri++;
922
+ p->ucode = p->ucode << 4 | (uint32_t)hex_map[*b];
923
+ if (4 <= p->ri) {
924
+ byte utf8[8];
925
+ size_t ulen = unicodeToUtf8(p->ucode, utf8);
926
+
927
+ if (0 < ulen) {
928
+ if (':' == p->next_map[256]) {
929
+ buf_append_string(&p->key, (const char *)utf8, ulen);
930
+ } else {
931
+ buf_append_string(&p->buf, (const char *)utf8, ulen);
932
+ }
933
+ } else {
934
+ parse_error(p, "invalid unicode");
935
+ return;
936
+ }
937
+ p->map = string_map;
938
+ }
939
+ break;
940
+ case ESC_OK:
941
+ if (':' == p->next_map[256]) {
942
+ buf_append(&p->key, esc_byte_map[*b]);
943
+ } else {
944
+ buf_append(&p->buf, esc_byte_map[*b]);
945
+ }
946
+ p->map = string_map;
947
+ break;
948
+ case UTF1:
949
+ p->ri = 1;
950
+ p->map = utf_map;
951
+ if (':' == p->next_map[256]) {
952
+ buf_append(&p->key, *b);
953
+ } else {
954
+ buf_append(&p->buf, *b);
955
+ }
956
+ break;
957
+ case UTF2:
958
+ p->ri = 2;
959
+ p->map = utf_map;
960
+ if (':' == p->next_map[256]) {
961
+ buf_append(&p->key, *b);
962
+ } else {
963
+ buf_append(&p->buf, *b);
964
+ }
965
+ break;
966
+ case UTF3:
967
+ p->ri = 3;
968
+ p->map = utf_map;
969
+ if (':' == p->next_map[256]) {
970
+ buf_append(&p->key, *b);
971
+ } else {
972
+ buf_append(&p->buf, *b);
973
+ }
974
+ break;
975
+ case UTFX:
976
+ p->ri--;
977
+ if (':' == p->next_map[256]) {
978
+ buf_append(&p->key, *b);
979
+ } else {
980
+ buf_append(&p->buf, *b);
981
+ }
982
+ if (p->ri <= 0) {
983
+ p->map = string_map;
984
+ }
985
+ break;
986
+ case VAL_NULL:
987
+ if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
988
+ b += 3;
989
+ p->cur = b - json;
990
+ p->funcs[p->stack[p->depth]].add_null(p);
991
+ p->map = (0 == p->depth) ? value_map : after_map;
992
+ break;
993
+ }
994
+ p->ri = 0;
995
+ *p->token = *b++;
996
+ for (i = 1; i < 4; i++) {
997
+ if ('\0' == *b) {
998
+ p->ri = i;
999
+ break;
1000
+ } else {
1001
+ p->token[i] = *b++;
1002
+ }
1003
+ }
1004
+ if (0 < p->ri) {
1005
+ p->map = null_map;
1006
+ b--;
1007
+ break;
1008
+ }
1009
+ p->col = b - json - p->col;
1010
+ parse_error(p, "expected null");
1011
+ return;
1012
+ case VAL_TRUE:
1013
+ if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
1014
+ b += 3;
1015
+ p->cur = b - json;
1016
+ p->funcs[p->stack[p->depth]].add_true(p);
1017
+ p->map = (0 == p->depth) ? value_map : after_map;
1018
+ break;
1019
+ }
1020
+ p->ri = 0;
1021
+ *p->token = *b++;
1022
+ for (i = 1; i < 4; i++) {
1023
+ if ('\0' == *b) {
1024
+ p->ri = i;
1025
+ break;
1026
+ } else {
1027
+ p->token[i] = *b++;
1028
+ }
1029
+ }
1030
+ if (0 < p->ri) {
1031
+ p->map = true_map;
1032
+ b--;
1033
+ break;
1034
+ }
1035
+ p->col = b - json - p->col;
1036
+ parse_error(p, "expected true");
1037
+ return;
1038
+ case VAL_FALSE:
1039
+ if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1040
+ b += 4;
1041
+ p->cur = b - json;
1042
+ p->funcs[p->stack[p->depth]].add_false(p);
1043
+ p->map = (0 == p->depth) ? value_map : after_map;
1044
+ break;
1045
+ }
1046
+ p->ri = 0;
1047
+ *p->token = *b++;
1048
+ for (i = 1; i < 5; i++) {
1049
+ if ('\0' == *b) {
1050
+ p->ri = i;
1051
+ break;
1052
+ } else {
1053
+ p->token[i] = *b++;
1054
+ }
1055
+ }
1056
+ if (0 < p->ri) {
1057
+ p->map = false_map;
1058
+ b--;
1059
+ break;
1060
+ }
1061
+ p->col = b - json - p->col;
1062
+ parse_error(p, "expected false");
1063
+ return;
1064
+ case TOKEN_OK:
1065
+ p->token[p->ri] = *b;
1066
+ p->ri++;
1067
+ switch (p->map[256]) {
1068
+ case 'N':
1069
+ if (4 == p->ri) {
1070
+ if (0 != strncmp("null", p->token, 4)) {
1071
+ p->col = b - json - p->col;
1072
+ parse_error(p, "expected null");
1073
+ return;
1074
+ }
1075
+ p->cur = b - json;
1076
+ p->funcs[p->stack[p->depth]].add_null(p);
1077
+ p->map = (0 == p->depth) ? value_map : after_map;
1078
+ }
1079
+ break;
1080
+ case 'F':
1081
+ if (5 == p->ri) {
1082
+ if (0 != strncmp("false", p->token, 5)) {
1083
+ p->col = b - json - p->col;
1084
+ parse_error(p, "expected false");
1085
+ return;
1086
+ }
1087
+ p->cur = b - json;
1088
+ p->funcs[p->stack[p->depth]].add_false(p);
1089
+ p->map = (0 == p->depth) ? value_map : after_map;
1090
+ }
1091
+ break;
1092
+ case 'T':
1093
+ if (4 == p->ri) {
1094
+ if (0 != strncmp("true", p->token, 4)) {
1095
+ p->col = b - json - p->col;
1096
+ parse_error(p, "expected true");
1097
+ return;
1098
+ }
1099
+ p->cur = b - json;
1100
+ p->funcs[p->stack[p->depth]].add_true(p);
1101
+ p->map = (0 == p->depth) ? value_map : after_map;
1102
+ }
1103
+ break;
1104
+ default:
1105
+ p->col = b - json - p->col;
1106
+ parse_error(p, "parse error");
1107
+ return;
1108
+ }
1109
+ break;
1110
+ case CHAR_ERR: byte_error(p, *b); return;
1111
+ default: break;
1112
+ }
1113
+ if (0 == p->depth && 'v' == p->map[256] && p->just_one) {
1114
+ p->map = trail_map;
1115
+ }
1116
+ }
1117
+ if (0 == p->depth) {
1118
+ switch (p->map[256]) {
1119
+ case '0':
1120
+ case 'd':
1121
+ case 'f':
1122
+ case 'z':
1123
+ case 'X':
1124
+ case 'D':
1125
+ case 'g':
1126
+ case 'B':
1127
+ case 'Y':
1128
+ p->cur = b - json;
1129
+ calc_num(p);
1130
+ break;
1131
+ }
1132
+ }
1133
+ return;
1134
+ }
1135
+
1136
+ static void parser_free(void *ptr) {
1137
+ ojParser p;
1138
+
1139
+ if (0 == ptr) {
1140
+ return;
1141
+ }
1142
+ p = (ojParser)ptr;
1143
+ buf_cleanup(&p->key);
1144
+ buf_cleanup(&p->buf);
1145
+ if (NULL != p->free) {
1146
+ p->free(p);
1147
+ }
1148
+ OJ_R_FREE(ptr);
1149
+ }
1150
+
1151
+ static void parser_mark(void *ptr) {
1152
+ if (NULL != ptr) {
1153
+ ojParser p = (ojParser)ptr;
1154
+
1155
+ if (0 != p->reader) {
1156
+ rb_gc_mark(p->reader);
1157
+ }
1158
+ if (NULL != p->mark) {
1159
+ p->mark(p);
1160
+ }
1161
+ }
1162
+ }
1163
+
1164
+ static const rb_data_type_t oj_parser_type = {
1165
+ "Oj/parser",
1166
+ {
1167
+ parser_mark,
1168
+ parser_free,
1169
+ NULL,
1170
+ },
1171
+ 0,
1172
+ 0,
1173
+ };
1174
+
1175
+ extern void oj_set_parser_validator(ojParser p);
1176
+ extern void oj_set_parser_saj(ojParser p);
1177
+ extern void oj_set_parser_usual(ojParser p);
1178
+ extern void oj_set_parser_debug(ojParser p);
1179
+
1180
+ static int opt_cb(VALUE rkey, VALUE value, VALUE ptr) {
1181
+ ojParser p = (ojParser)ptr;
1182
+ const char *key = NULL;
1183
+ char set_key[64];
1184
+ long klen;
1185
+
1186
+ switch (rb_type(rkey)) {
1187
+ case RUBY_T_SYMBOL:
1188
+ rkey = rb_sym2str(rkey);
1189
+ // fall through
1190
+ case RUBY_T_STRING:
1191
+ key = StringValuePtr(rkey);
1192
+ klen = RSTRING_LEN(rkey);
1193
+ break;
1194
+ default: rb_raise(rb_eArgError, "option keys must be a symbol or string");
1195
+ }
1196
+ if ((long)sizeof(set_key) - 1 <= klen) {
1197
+ return ST_CONTINUE;
1198
+ }
1199
+ memcpy(set_key, key, klen);
1200
+ set_key[klen] = '=';
1201
+ set_key[klen + 1] = '\0';
1202
+ p->option(p, set_key, value);
1203
+
1204
+ return ST_CONTINUE;
1205
+ }
1206
+
1207
+ /* Document-method: new
1208
+ * call-seq: new(mode=nil)
1209
+ *
1210
+ * Creates a new Parser with the specified mode. If no mode is provided
1211
+ * validation is assumed. Optional arguments can be provided that match the
1212
+ * mode. For example with the :usual mode the call might look like
1213
+ * Oj::Parser.new(:usual, cache_keys: true).
1214
+ */
1215
+ static VALUE parser_new(int argc, VALUE *argv, VALUE self) {
1216
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1217
+
1218
+ #if HAVE_RB_EXT_RACTOR_SAFE
1219
+ // This doesn't seem to do anything.
1220
+ rb_ext_ractor_safe(true);
1221
+ #endif
1222
+ memset(p, 0, sizeof(struct _ojParser));
1223
+ buf_init(&p->key);
1224
+ buf_init(&p->buf);
1225
+ p->map = value_map;
1226
+
1227
+ if (argc < 1) {
1228
+ oj_set_parser_validator(p);
1229
+ } else {
1230
+ VALUE mode = argv[0];
1231
+
1232
+ if (Qnil == mode) {
1233
+ oj_set_parser_validator(p);
1234
+ } else {
1235
+ const char *ms = NULL;
1236
+
1237
+ switch (rb_type(mode)) {
1238
+ case RUBY_T_SYMBOL:
1239
+ mode = rb_sym2str(mode);
1240
+ // fall through
1241
+ case RUBY_T_STRING: ms = RSTRING_PTR(mode); break;
1242
+ default: rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1243
+ }
1244
+ if (0 == strcmp("usual", ms) || 0 == strcmp("standard", ms) || 0 == strcmp("strict", ms) ||
1245
+ 0 == strcmp("compat", ms)) {
1246
+ oj_set_parser_usual(p);
1247
+ } else if (0 == strcmp("object", ms)) {
1248
+ // TBD
1249
+ } else if (0 == strcmp("saj", ms)) {
1250
+ oj_set_parser_saj(p);
1251
+ } else if (0 == strcmp("validate", ms)) {
1252
+ oj_set_parser_validator(p);
1253
+ } else if (0 == strcmp("debug", ms)) {
1254
+ oj_set_parser_debug(p);
1255
+ } else {
1256
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1257
+ }
1258
+ }
1259
+ if (1 < argc) {
1260
+ VALUE ropts = argv[1];
1261
+
1262
+ Check_Type(ropts, T_HASH);
1263
+ rb_hash_foreach(ropts, opt_cb, (VALUE)p);
1264
+ }
1265
+ }
1266
+ return TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1267
+ }
1268
+
1269
+ // Create a new parser without setting the delegate. The parser is
1270
+ // wrapped. The parser is (ojParser)DATA_PTR(value) where value is the return
1271
+ // from this function. A delegate must be added before the parser can be
1272
+ // used. Optionally oj_parser_set_options can be called if the options are not
1273
+ // set directly.
1274
+ VALUE oj_parser_new(void) {
1275
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1276
+
1277
+ #if HAVE_RB_EXT_RACTOR_SAFE
1278
+ // This doesn't seem to do anything.
1279
+ rb_ext_ractor_safe(true);
1280
+ #endif
1281
+ memset(p, 0, sizeof(struct _ojParser));
1282
+ buf_init(&p->key);
1283
+ buf_init(&p->buf);
1284
+ p->map = value_map;
1285
+
1286
+ return TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1287
+ }
1288
+
1289
+ // Set set the options from a hash (ropts).
1290
+ void oj_parser_set_option(ojParser p, VALUE ropts) {
1291
+ Check_Type(ropts, T_HASH);
1292
+ rb_hash_foreach(ropts, opt_cb, (VALUE)p);
1293
+ }
1294
+
1295
+ /* Document-method: method_missing(value)
1296
+ * call-seq: method_missing(value)
1297
+ *
1298
+ * Methods not handled by the parser are passed to the delegate. The methods
1299
+ * supported by delegate are:
1300
+ *
1301
+ * - *:validate*
1302
+ * - no options
1303
+ *
1304
+ * - *:saj*
1305
+ * - _cache_keys_ is a flag indicating hash keys should be cached.
1306
+ * - _cache_strings_ is a positive integer less than 35. Strings shorter than that length are cached.
1307
+ * - _handler_ is the SAJ handler
1308
+ *
1309
+ * - *:usual*
1310
+ * - _cache_keys_ is a flag indicating hash keys should be cached.
1311
+ * - _cache_strings_ is a positive integer less than 35. Strings shorter than that length are cached.
1312
+ * - _cache_expunge_ dictates when the cache will be expunged where 0 never expunges,
1313
+ * 1 expunges slowly, 2 expunges faster, and 3 or higher expunges agressively.
1314
+ * - _capacity_ is the capacity of the parser's internal stack. The parser grows automatically
1315
+ * but can be updated directly with this call.
1316
+ * - _create_id_ if non-nil is the key that is used to specify the type of object to create
1317
+ * when parsing. Parsed JSON objects that include the specified element use the element
1318
+ * value as the name of the class to create an object from instead of a Hash.
1319
+ * - _decimal_ is the approach to how decimals are parsed. If _:auto_ then
1320
+ * the decimals with significant digits are 16 or less are Floats and long
1321
+ * ones are BigDecimal. _:ruby_ uses a call to Ruby to convert a string to a Float.
1322
+ * _:float_ always generates a Float. _:bigdecimal_ always results in a BigDecimal.
1323
+ * - _ignore_json_create_ is a flag that when set the class json_create method is
1324
+ * ignored on parsing in favor of creating an instance and populating directly.
1325
+ * - _missing_class_ is an indicator that determines how unknown class names are handled.
1326
+ * Valid values are _:auto_ which creates any missing classes on parse, :ignore which ignores
1327
+ * and continues as a Hash (default), and :raise which raises an exception if the class is not found.
1328
+ * - _omit_null_ is a flag that if true then null values in a map or object are omitted
1329
+ * from the resulting Hash or Object.
1330
+ * - _symbol_keys_ is a flag that indicates Hash keys should be parsed to Symbols versus Strings.
1331
+ */
1332
+ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
1333
+ ojParser p;
1334
+ const char *key = NULL;
1335
+ volatile VALUE rkey = *argv;
1336
+ volatile VALUE rv = Qnil;
1337
+
1338
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1339
+
1340
+ #if HAVE_RB_EXT_RACTOR_SAFE
1341
+ // This doesn't seem to do anything.
1342
+ rb_ext_ractor_safe(true);
1343
+ #endif
1344
+ switch (rb_type(rkey)) {
1345
+ case RUBY_T_SYMBOL:
1346
+ rkey = rb_sym2str(rkey);
1347
+ // fall through
1348
+ case RUBY_T_STRING: key = StringValuePtr(rkey); break;
1349
+ default: rb_raise(rb_eArgError, "option method must be a symbol or string");
1350
+ }
1351
+ if (1 < argc) {
1352
+ rv = argv[1];
1353
+ }
1354
+ return p->option(p, key, rv);
1355
+ }
1356
+
1357
+ /* Document-method: parse(json)
1358
+ * call-seq: parse(json)
1359
+ *
1360
+ * Parse a JSON string.
1361
+ *
1362
+ * Returns the result according to the delegate of the parser.
1363
+ */
1364
+ static VALUE parser_parse(VALUE self, VALUE json) {
1365
+ ojParser p;
1366
+ const byte *ptr = (const byte *)StringValuePtr(json);
1367
+
1368
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1369
+
1370
+ parser_reset(p);
1371
+ p->start(p);
1372
+ parse(p, ptr);
1373
+
1374
+ return p->result(p);
1375
+ }
1376
+
1377
+ static VALUE load_rescue(VALUE self, VALUE x) {
1378
+ // Normal EOF. No action needed other than to stop loading.
1379
+ return Qfalse;
1380
+ }
1381
+
1382
+ static VALUE load(VALUE self) {
1383
+ ojParser p;
1384
+ volatile VALUE rbuf = rb_str_new2("");
1385
+
1386
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1387
+
1388
+ p->start(p);
1389
+ while (true) {
1390
+ rb_funcall(p->reader, oj_readpartial_id, 2, INT2NUM(16385), rbuf);
1391
+ if (0 < RSTRING_LEN(rbuf)) {
1392
+ parse(p, (byte *)StringValuePtr(rbuf));
1393
+ }
1394
+ if (Qtrue == rb_funcall(p->reader, oj_eofq_id, 0)) {
1395
+ if (0 < p->depth) {
1396
+ parse_error(p, "parse error, not closed");
1397
+ }
1398
+ break;
1399
+ }
1400
+ }
1401
+ return Qtrue;
1402
+ }
1403
+
1404
+ /* Document-method: load(reader)
1405
+ * call-seq: load(reader)
1406
+ *
1407
+ * Parse a JSON stream.
1408
+ *
1409
+ * Returns the result according to the delegate of the parser.
1410
+ */
1411
+ static VALUE parser_load(VALUE self, VALUE reader) {
1412
+ ojParser p;
1413
+
1414
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1415
+
1416
+ parser_reset(p);
1417
+ p->reader = reader;
1418
+ rb_rescue2(load, self, load_rescue, Qnil, rb_eEOFError, 0);
1419
+
1420
+ return p->result(p);
1421
+ }
1422
+
1423
+ /* Document-method: file(filename)
1424
+ * call-seq: file(filename)
1425
+ *
1426
+ * Parse a JSON file.
1427
+ *
1428
+ * Returns the result according to the delegate of the parser.
1429
+ */
1430
+ static VALUE parser_file(VALUE self, VALUE filename) {
1431
+ ojParser p;
1432
+ const char *path;
1433
+ int fd;
1434
+
1435
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1436
+
1437
+ path = StringValuePtr(filename);
1438
+
1439
+ parser_reset(p);
1440
+ p->start(p);
1441
+
1442
+ if (0 > (fd = open(path, O_RDONLY))) {
1443
+ rb_raise(rb_eIOError, "error opening %s", path);
1444
+ }
1445
+ #if USE_THREAD_LIMIT
1446
+ struct stat info;
1447
+ // st_size will be 0 if not a file
1448
+ if (0 == fstat(fd, &info) && USE_THREAD_LIMIT < info.st_size) {
1449
+ // Use threaded version.
1450
+ // TBD only if has pthreads
1451
+ // TBD parse_large(p, fd);
1452
+ return p->result(p);
1453
+ }
1454
+ #endif
1455
+ byte buf[16385];
1456
+ size_t size = sizeof(buf) - 1;
1457
+ size_t rsize;
1458
+
1459
+ while (true) {
1460
+ if (0 < (rsize = read(fd, buf, size))) {
1461
+ buf[rsize] = '\0';
1462
+ parse(p, buf);
1463
+ }
1464
+ if (rsize <= 0) {
1465
+ if (0 != rsize) {
1466
+ rb_raise(rb_eIOError, "error reading from %s", path);
1467
+ }
1468
+ break;
1469
+ }
1470
+ }
1471
+ return p->result(p);
1472
+ }
1473
+
1474
+ /* Document-method: just_one
1475
+ * call-seq: just_one
1476
+ *
1477
+ * Returns the current state of the just_one [_Boolean_] option.
1478
+ */
1479
+ static VALUE parser_just_one(VALUE self) {
1480
+ ojParser p;
1481
+
1482
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1483
+
1484
+ return p->just_one ? Qtrue : Qfalse;
1485
+ }
1486
+
1487
+ /* Document-method: just_one=
1488
+ * call-seq: just_one=(value)
1489
+ *
1490
+ * Sets the *just_one* option which limits the parsing of a string or or
1491
+ * stream to a single JSON element.
1492
+ *
1493
+ * Returns the current state of the just_one [_Boolean_] option.
1494
+ */
1495
+ static VALUE parser_just_one_set(VALUE self, VALUE v) {
1496
+ ojParser p;
1497
+
1498
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1499
+
1500
+ p->just_one = (Qtrue == v);
1501
+
1502
+ return p->just_one ? Qtrue : Qfalse;
1503
+ }
1504
+
1505
+ static VALUE usual_parser = Qundef;
1506
+
1507
+ /* Document-method: usual
1508
+ * call-seq: usual
1509
+ *
1510
+ * Returns the default usual parser. Note the default usual parser can not be
1511
+ * used concurrently in more than one thread.
1512
+ */
1513
+ static VALUE parser_usual(VALUE self) {
1514
+ if (Qundef == usual_parser) {
1515
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1516
+
1517
+ memset(p, 0, sizeof(struct _ojParser));
1518
+ buf_init(&p->key);
1519
+ buf_init(&p->buf);
1520
+ p->map = value_map;
1521
+ oj_set_parser_usual(p);
1522
+ usual_parser = TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1523
+ rb_gc_register_address(&usual_parser);
1524
+ }
1525
+ return usual_parser;
1526
+ }
1527
+
1528
+ static VALUE saj_parser = Qundef;
1529
+
1530
+ /* Document-method: saj
1531
+ * call-seq: saj
1532
+ *
1533
+ * Returns the default SAJ parser. Note the default SAJ parser can not be used
1534
+ * concurrently in more than one thread.
1535
+ */
1536
+ static VALUE parser_saj(VALUE self) {
1537
+ if (Qundef == saj_parser) {
1538
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1539
+
1540
+ memset(p, 0, sizeof(struct _ojParser));
1541
+ buf_init(&p->key);
1542
+ buf_init(&p->buf);
1543
+ p->map = value_map;
1544
+ oj_set_parser_saj(p);
1545
+ saj_parser = TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1546
+ rb_gc_register_address(&saj_parser);
1547
+ }
1548
+ return saj_parser;
1549
+ }
1550
+
1551
+ static VALUE validate_parser = Qundef;
1552
+
1553
+ /* Document-method: validate
1554
+ * call-seq: validate
1555
+ *
1556
+ * Returns the default validate parser.
1557
+ */
1558
+ static VALUE parser_validate(VALUE self) {
1559
+ if (Qundef == validate_parser) {
1560
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1561
+
1562
+ memset(p, 0, sizeof(struct _ojParser));
1563
+ buf_init(&p->key);
1564
+ buf_init(&p->buf);
1565
+ p->map = value_map;
1566
+ oj_set_parser_validator(p);
1567
+ validate_parser = TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1568
+ rb_gc_register_address(&validate_parser);
1569
+ }
1570
+ return validate_parser;
1571
+ }
1572
+
1573
+ /* Document-class: Oj::Parser
1574
+ *
1575
+ * A reusable parser that makes use of named delegates to determine the
1576
+ * handling of parsed data. Delegates are available for validation, a callback
1577
+ * parser (SAJ), and a usual delegate that builds Ruby objects as parsing
1578
+ * proceeds.
1579
+ *
1580
+ * This parser is considerably faster than the older Oj.parse call and
1581
+ * isolates options to just the parser so that other parts of the code are not
1582
+ * forced to use the same options.
1583
+ */
1584
+ void oj_parser_init(void) {
1585
+ parser_class = rb_define_class_under(Oj, "Parser", rb_cObject);
1586
+ rb_gc_register_address(&parser_class);
1587
+ rb_undef_alloc_func(parser_class);
1588
+
1589
+ rb_define_module_function(parser_class, "new", parser_new, -1);
1590
+ rb_define_method(parser_class, "parse", parser_parse, 1);
1591
+ rb_define_method(parser_class, "load", parser_load, 1);
1592
+ rb_define_method(parser_class, "file", parser_file, 1);
1593
+ rb_define_method(parser_class, "just_one", parser_just_one, 0);
1594
+ rb_define_method(parser_class, "just_one=", parser_just_one_set, 1);
1595
+ rb_define_method(parser_class, "method_missing", parser_missing, -1);
1596
+
1597
+ rb_define_module_function(parser_class, "usual", parser_usual, 0);
1598
+ rb_define_module_function(parser_class, "saj", parser_saj, 0);
1599
+ rb_define_module_function(parser_class, "validate", parser_validate, 0);
1600
+ }