oj 3.11.5 → 3.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1421 -0
  3. data/README.md +19 -5
  4. data/RELEASE_NOTES.md +61 -0
  5. data/ext/oj/buf.h +20 -6
  6. data/ext/oj/cache.c +329 -0
  7. data/ext/oj/cache.h +22 -0
  8. data/ext/oj/cache8.c +10 -9
  9. data/ext/oj/circarray.c +8 -6
  10. data/ext/oj/circarray.h +2 -2
  11. data/ext/oj/code.c +19 -33
  12. data/ext/oj/code.h +2 -2
  13. data/ext/oj/compat.c +27 -77
  14. data/ext/oj/custom.c +86 -179
  15. data/ext/oj/debug.c +126 -0
  16. data/ext/oj/dump.c +256 -249
  17. data/ext/oj/dump.h +26 -12
  18. data/ext/oj/dump_compat.c +565 -642
  19. data/ext/oj/dump_leaf.c +17 -63
  20. data/ext/oj/dump_object.c +65 -187
  21. data/ext/oj/dump_strict.c +27 -51
  22. data/ext/oj/encoder.c +43 -0
  23. data/ext/oj/err.c +2 -13
  24. data/ext/oj/err.h +24 -8
  25. data/ext/oj/extconf.rb +21 -6
  26. data/ext/oj/fast.c +149 -149
  27. data/ext/oj/intern.c +313 -0
  28. data/ext/oj/intern.h +22 -0
  29. data/ext/oj/mem.c +318 -0
  30. data/ext/oj/mem.h +53 -0
  31. data/ext/oj/mimic_json.c +121 -106
  32. data/ext/oj/object.c +85 -162
  33. data/ext/oj/odd.c +89 -67
  34. data/ext/oj/odd.h +15 -15
  35. data/ext/oj/oj.c +542 -411
  36. data/ext/oj/oj.h +99 -73
  37. data/ext/oj/parse.c +175 -187
  38. data/ext/oj/parse.h +26 -24
  39. data/ext/oj/parser.c +1600 -0
  40. data/ext/oj/parser.h +101 -0
  41. data/ext/oj/rails.c +112 -159
  42. data/ext/oj/rails.h +1 -1
  43. data/ext/oj/reader.c +11 -14
  44. data/ext/oj/reader.h +4 -2
  45. data/ext/oj/resolve.c +5 -24
  46. data/ext/oj/rxclass.c +7 -6
  47. data/ext/oj/rxclass.h +1 -1
  48. data/ext/oj/saj.c +22 -33
  49. data/ext/oj/saj2.c +584 -0
  50. data/ext/oj/saj2.h +23 -0
  51. data/ext/oj/scp.c +5 -28
  52. data/ext/oj/sparse.c +28 -72
  53. data/ext/oj/stream_writer.c +50 -40
  54. data/ext/oj/strict.c +56 -61
  55. data/ext/oj/string_writer.c +72 -39
  56. data/ext/oj/trace.h +31 -4
  57. data/ext/oj/usual.c +1218 -0
  58. data/ext/oj/usual.h +69 -0
  59. data/ext/oj/util.h +1 -1
  60. data/ext/oj/val_stack.c +14 -3
  61. data/ext/oj/val_stack.h +8 -7
  62. data/ext/oj/validate.c +46 -0
  63. data/ext/oj/wab.c +63 -88
  64. data/lib/oj/active_support_helper.rb +1 -3
  65. data/lib/oj/bag.rb +7 -1
  66. data/lib/oj/easy_hash.rb +4 -5
  67. data/lib/oj/error.rb +1 -2
  68. data/lib/oj/json.rb +162 -150
  69. data/lib/oj/mimic.rb +9 -7
  70. data/lib/oj/saj.rb +20 -6
  71. data/lib/oj/schandler.rb +5 -4
  72. data/lib/oj/state.rb +12 -8
  73. data/lib/oj/version.rb +1 -2
  74. data/lib/oj.rb +2 -0
  75. data/pages/Compatibility.md +1 -1
  76. data/pages/InstallOptions.md +20 -0
  77. data/pages/JsonGem.md +15 -0
  78. data/pages/Modes.md +8 -3
  79. data/pages/Options.md +43 -5
  80. data/pages/Parser.md +309 -0
  81. data/pages/Rails.md +14 -2
  82. data/test/_test_active.rb +8 -9
  83. data/test/_test_active_mimic.rb +7 -8
  84. data/test/_test_mimic_rails.rb +17 -20
  85. data/test/activerecord/result_test.rb +5 -6
  86. data/test/activesupport6/encoding_test.rb +63 -28
  87. data/test/{activesupport5 → activesupport7}/abstract_unit.rb +16 -12
  88. data/test/{activesupport5 → activesupport7}/decoding_test.rb +2 -10
  89. data/test/{activesupport5 → activesupport7}/encoding_test.rb +86 -50
  90. data/test/{activesupport5 → activesupport7}/encoding_test_cases.rb +6 -0
  91. data/test/{activesupport5 → activesupport7}/time_zone_test_helpers.rb +8 -0
  92. data/test/files.rb +15 -15
  93. data/test/foo.rb +16 -45
  94. data/test/helper.rb +11 -8
  95. data/test/isolated/shared.rb +3 -2
  96. data/test/json_gem/json_addition_test.rb +2 -2
  97. data/test/json_gem/json_common_interface_test.rb +8 -6
  98. data/test/json_gem/json_encoding_test.rb +0 -0
  99. data/test/json_gem/json_ext_parser_test.rb +1 -0
  100. data/test/json_gem/json_fixtures_test.rb +3 -2
  101. data/test/json_gem/json_generator_test.rb +56 -38
  102. data/test/json_gem/json_generic_object_test.rb +11 -11
  103. data/test/json_gem/json_parser_test.rb +54 -47
  104. data/test/json_gem/json_string_matching_test.rb +9 -9
  105. data/test/json_gem/test_helper.rb +7 -3
  106. data/test/mem.rb +34 -0
  107. data/test/perf.rb +22 -27
  108. data/test/perf_compat.rb +31 -33
  109. data/test/perf_dump.rb +50 -0
  110. data/test/perf_fast.rb +80 -82
  111. data/test/perf_file.rb +27 -29
  112. data/test/perf_object.rb +65 -69
  113. data/test/perf_once.rb +59 -0
  114. data/test/perf_parser.rb +183 -0
  115. data/test/perf_saj.rb +46 -54
  116. data/test/perf_scp.rb +58 -69
  117. data/test/perf_simple.rb +41 -39
  118. data/test/perf_strict.rb +74 -82
  119. data/test/perf_wab.rb +67 -69
  120. data/test/prec.rb +5 -5
  121. data/test/sample/change.rb +0 -1
  122. data/test/sample/dir.rb +0 -1
  123. data/test/sample/doc.rb +0 -1
  124. data/test/sample/file.rb +0 -1
  125. data/test/sample/group.rb +0 -1
  126. data/test/sample/hasprops.rb +0 -1
  127. data/test/sample/layer.rb +0 -1
  128. data/test/sample/rect.rb +0 -1
  129. data/test/sample/shape.rb +0 -1
  130. data/test/sample/text.rb +0 -1
  131. data/test/sample.rb +16 -16
  132. data/test/sample_json.rb +8 -8
  133. data/test/test_compat.rb +95 -43
  134. data/test/test_custom.rb +73 -51
  135. data/test/test_debian.rb +7 -10
  136. data/test/test_fast.rb +135 -79
  137. data/test/test_file.rb +41 -30
  138. data/test/test_gc.rb +16 -5
  139. data/test/test_generate.rb +5 -5
  140. data/test/test_hash.rb +5 -5
  141. data/test/test_integer_range.rb +9 -9
  142. data/test/test_null.rb +20 -20
  143. data/test/test_object.rb +99 -96
  144. data/test/test_parser.rb +11 -0
  145. data/test/test_parser_debug.rb +27 -0
  146. data/test/test_parser_saj.rb +337 -0
  147. data/test/test_parser_usual.rb +251 -0
  148. data/test/test_rails.rb +2 -2
  149. data/test/test_saj.rb +10 -8
  150. data/test/test_scp.rb +37 -39
  151. data/test/test_strict.rb +40 -32
  152. data/test/test_various.rb +165 -84
  153. data/test/test_wab.rb +48 -44
  154. data/test/test_writer.rb +47 -47
  155. data/test/tests.rb +13 -5
  156. data/test/tests_mimic.rb +12 -3
  157. data/test/tests_mimic_addition.rb +12 -3
  158. metadata +74 -128
  159. data/ext/oj/hash.c +0 -131
  160. data/ext/oj/hash.h +0 -19
  161. data/ext/oj/hash_test.c +0 -491
  162. data/test/activesupport4/decoding_test.rb +0 -108
  163. data/test/activesupport4/encoding_test.rb +0 -531
  164. data/test/activesupport4/test_helper.rb +0 -41
  165. data/test/activesupport5/test_helper.rb +0 -72
  166. data/test/bar.rb +0 -35
  167. data/test/baz.rb +0 -16
  168. data/test/zoo.rb +0 -13
data/ext/oj/parser.c ADDED
@@ -0,0 +1,1600 @@
1
+ // Copyright (c) 2020, 2021, Peter Ohler, All rights reserved.
2
+
3
+ #include "parser.h"
4
+
5
+ #include <fcntl.h>
6
+
7
+ #include "oj.h"
8
+
9
+ #define DEBUG 0
10
+
11
+ #define USE_THREAD_LIMIT 0
12
+ // #define USE_THREAD_LIMIT 100000
13
+ #define MAX_EXP 4932
14
+ // max in the pow_map which is the limit for double
15
+ #define MAX_POW 308
16
+
17
+ #define MIN_SLEEP (1000000000LL / (double)CLOCKS_PER_SEC)
18
+ // 9,223,372,036,854,775,807
19
+ #define BIG_LIMIT LLONG_MAX / 10
20
+ #define FRAC_LIMIT 10000000000000000ULL
21
+
22
+ // Give better performance with indented JSON but worse with unindented.
23
+ // #define SPACE_JUMP
24
+
25
+ enum {
26
+ SKIP_CHAR = 'a',
27
+ SKIP_NEWLINE = 'b',
28
+ VAL_NULL = 'c',
29
+ VAL_TRUE = 'd',
30
+ VAL_FALSE = 'e',
31
+ VAL_NEG = 'f',
32
+ VAL0 = 'g',
33
+ VAL_DIGIT = 'h',
34
+ VAL_QUOTE = 'i',
35
+ OPEN_ARRAY = 'k',
36
+ OPEN_OBJECT = 'l',
37
+ CLOSE_ARRAY = 'm',
38
+ CLOSE_OBJECT = 'n',
39
+ AFTER_COMMA = 'o',
40
+ KEY_QUOTE = 'p',
41
+ COLON_COLON = 'q',
42
+ NUM_SPC = 'r',
43
+ NUM_NEWLINE = 's',
44
+ NUM_DOT = 't',
45
+ NUM_COMMA = 'u',
46
+ NUM_FRAC = 'v',
47
+ FRAC_E = 'w',
48
+ EXP_SIGN = 'x',
49
+ EXP_DIGIT = 'y',
50
+ STR_QUOTE = 'z',
51
+ NEG_DIGIT = '-',
52
+ STR_SLASH = 'A',
53
+ ESC_OK = 'B',
54
+ BIG_DIGIT = 'C',
55
+ BIG_DOT = 'D',
56
+ U_OK = 'E',
57
+ TOKEN_OK = 'F',
58
+ NUM_CLOSE_OBJECT = 'G',
59
+ NUM_CLOSE_ARRAY = 'H',
60
+ BIG_FRAC = 'I',
61
+ BIG_E = 'J',
62
+ BIG_EXP_SIGN = 'K',
63
+ BIG_EXP = 'L',
64
+ UTF1 = 'M', // expect 1 more follow byte
65
+ NUM_DIGIT = 'N',
66
+ NUM_ZERO = 'O',
67
+ UTF2 = 'P', // expect 2 more follow byte
68
+ UTF3 = 'Q', // expect 3 more follow byte
69
+ STR_OK = 'R',
70
+ UTFX = 'S', // following bytes
71
+ ESC_U = 'U',
72
+ CHAR_ERR = '.',
73
+ DONE = 'X',
74
+ };
75
+
76
+ /*
77
+ 0123456789abcdef0123456789abcdef */
78
+ static const char value_map[257] = "\
79
+ X........ab..a..................\
80
+ a.i..........f..ghhhhhhhhh......\
81
+ ...........................k.m..\
82
+ ......e.......c.....d......l.n..\
83
+ ................................\
84
+ ................................\
85
+ ................................\
86
+ ................................v";
87
+
88
+ static const char null_map[257] = "\
89
+ ................................\
90
+ ............o...................\
91
+ ................................\
92
+ ............F........F..........\
93
+ ................................\
94
+ ................................\
95
+ ................................\
96
+ ................................N";
97
+
98
+ static const char true_map[257] = "\
99
+ ................................\
100
+ ............o...................\
101
+ ................................\
102
+ .....F............F..F..........\
103
+ ................................\
104
+ ................................\
105
+ ................................\
106
+ ................................T";
107
+
108
+ static const char false_map[257] = "\
109
+ ................................\
110
+ ............o...................\
111
+ ................................\
112
+ .F...F......F......F............\
113
+ ................................\
114
+ ................................\
115
+ ................................\
116
+ ................................F";
117
+
118
+ static const char comma_map[257] = "\
119
+ .........ab..a..................\
120
+ a.i..........f..ghhhhhhhhh......\
121
+ ...........................k....\
122
+ ......e.......c.....d......l....\
123
+ ................................\
124
+ ................................\
125
+ ................................\
126
+ ................................,";
127
+
128
+ static const char after_map[257] = "\
129
+ X........ab..a..................\
130
+ a...........o...................\
131
+ .............................m..\
132
+ .............................n..\
133
+ ................................\
134
+ ................................\
135
+ ................................\
136
+ ................................a";
137
+
138
+ static const char key1_map[257] = "\
139
+ .........ab..a..................\
140
+ a.p.............................\
141
+ ................................\
142
+ .............................n..\
143
+ ................................\
144
+ ................................\
145
+ ................................\
146
+ ................................K";
147
+
148
+ static const char key_map[257] = "\
149
+ .........ab..a..................\
150
+ a.p.............................\
151
+ ................................\
152
+ ................................\
153
+ ................................\
154
+ ................................\
155
+ ................................\
156
+ ................................k";
157
+
158
+ static const char colon_map[257] = "\
159
+ .........ab..a..................\
160
+ a.........................q.....\
161
+ ................................\
162
+ ................................\
163
+ ................................\
164
+ ................................\
165
+ ................................\
166
+ ................................:";
167
+
168
+ static const char neg_map[257] = "\
169
+ ................................\
170
+ ................O---------......\
171
+ ................................\
172
+ ................................\
173
+ ................................\
174
+ ................................\
175
+ ................................\
176
+ ................................-";
177
+
178
+ static const char zero_map[257] = "\
179
+ .........rs..r..................\
180
+ r...........u.t.................\
181
+ .............................H..\
182
+ .............................G..\
183
+ ................................\
184
+ ................................\
185
+ ................................\
186
+ ................................0";
187
+
188
+ static const char digit_map[257] = "\
189
+ .........rs..r..................\
190
+ r...........u.t.NNNNNNNNNN......\
191
+ .....w.......................H..\
192
+ .....w.......................G..\
193
+ ................................\
194
+ ................................\
195
+ ................................\
196
+ ................................d";
197
+
198
+ static const char dot_map[257] = "\
199
+ ................................\
200
+ ................vvvvvvvvvv......\
201
+ ................................\
202
+ ................................\
203
+ ................................\
204
+ ................................\
205
+ ................................\
206
+ .................................";
207
+
208
+ static const char frac_map[257] = "\
209
+ .........rs..r..................\
210
+ r...........u...vvvvvvvvvv......\
211
+ .....w.......................H..\
212
+ .....w.......................G..\
213
+ ................................\
214
+ ................................\
215
+ ................................\
216
+ ................................f";
217
+
218
+ static const char exp_sign_map[257] = "\
219
+ ................................\
220
+ ...........x.x..yyyyyyyyyy......\
221
+ ................................\
222
+ ................................\
223
+ ................................\
224
+ ................................\
225
+ ................................\
226
+ ................................x";
227
+
228
+ static const char exp_zero_map[257] = "\
229
+ ................................\
230
+ ................yyyyyyyyyy......\
231
+ ................................\
232
+ ................................\
233
+ ................................\
234
+ ................................\
235
+ ................................\
236
+ ................................z";
237
+
238
+ static const char exp_map[257] = "\
239
+ .........rs..r..................\
240
+ r...........u...yyyyyyyyyy......\
241
+ .............................H..\
242
+ .............................G..\
243
+ ................................\
244
+ ................................\
245
+ ................................\
246
+ ................................X";
247
+
248
+ static const char big_digit_map[257] = "\
249
+ .........rs..r..................\
250
+ r...........u.D.CCCCCCCCCC......\
251
+ .....J.......................H..\
252
+ .....J.......................G..\
253
+ ................................\
254
+ ................................\
255
+ ................................\
256
+ ................................D";
257
+
258
+ static const char big_dot_map[257] = "\
259
+ ................................\
260
+ ................IIIIIIIIII......\
261
+ ................................\
262
+ ................................\
263
+ ................................\
264
+ ................................\
265
+ ................................\
266
+ ................................o";
267
+
268
+ static const char big_frac_map[257] = "\
269
+ .........rs..r..................\
270
+ r...........u...IIIIIIIIII......\
271
+ .....J.......................H..\
272
+ .....J.......................G..\
273
+ ................................\
274
+ ................................\
275
+ ................................\
276
+ ................................g";
277
+
278
+ static const char big_exp_sign_map[257] = "\
279
+ ................................\
280
+ ...........K.K..LLLLLLLLLL......\
281
+ ................................\
282
+ ................................\
283
+ ................................\
284
+ ................................\
285
+ ................................\
286
+ ................................B";
287
+
288
+ static const char big_exp_zero_map[257] = "\
289
+ ................................\
290
+ ................LLLLLLLLLL......\
291
+ ................................\
292
+ ................................\
293
+ ................................\
294
+ ................................\
295
+ ................................\
296
+ ................................Z";
297
+
298
+ static const char big_exp_map[257] = "\
299
+ .........rs..r..................\
300
+ r...........u...LLLLLLLLLL......\
301
+ .............................H..\
302
+ .............................G..\
303
+ ................................\
304
+ ................................\
305
+ ................................\
306
+ ................................Y";
307
+
308
+ static const char string_map[257] = "\
309
+ ................................\
310
+ RRzRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
311
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRARRR\
312
+ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\
313
+ ................................\
314
+ ................................\
315
+ MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\
316
+ PPPPPPPPPPPPPPPPQQQQQQQQ........s";
317
+
318
+ static const char esc_map[257] = "\
319
+ ................................\
320
+ ..B............B................\
321
+ ............................B...\
322
+ ..B...B.......B...B.BU..........\
323
+ ................................\
324
+ ................................\
325
+ ................................\
326
+ ................................~";
327
+
328
+ static const char esc_byte_map[257] = "\
329
+ ................................\
330
+ ..\"............/................\
331
+ ............................\\...\
332
+ ..\b...\f.......\n...\r.\t..........\
333
+ ................................\
334
+ ................................\
335
+ ................................\
336
+ ................................b";
337
+
338
+ static const char u_map[257] = "\
339
+ ................................\
340
+ ................EEEEEEEEEE......\
341
+ .EEEEEE.........................\
342
+ .EEEEEE.........................\
343
+ ................................\
344
+ ................................\
345
+ ................................\
346
+ ................................u";
347
+
348
+ static const char utf_map[257] = "\
349
+ ................................\
350
+ ................................\
351
+ ................................\
352
+ ................................\
353
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
354
+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\
355
+ ................................\
356
+ ................................8";
357
+
358
+ static const char space_map[257] = "\
359
+ .........ab..a..................\
360
+ a...............................\
361
+ ................................\
362
+ ................................\
363
+ ................................\
364
+ ................................\
365
+ ................................\
366
+ ................................S";
367
+
368
+ static const char trail_map[257] = "\
369
+ .........ab..a..................\
370
+ a...............................\
371
+ ................................\
372
+ ................................\
373
+ ................................\
374
+ ................................\
375
+ ................................\
376
+ ................................R";
377
+
378
+ static const byte hex_map[256] = "\
379
+ ................................\
380
+ ................\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09......\
381
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
382
+ .\x0a\x0b\x0c\x0d\x0e\x0f.........................\
383
+ ................................\
384
+ ................................\
385
+ ................................\
386
+ ................................";
387
+
388
+ static long double pow_map[309] = {
389
+ 1.0L, 1.0e1L, 1.0e2L, 1.0e3L, 1.0e4L, 1.0e5L, 1.0e6L, 1.0e7L, 1.0e8L, 1.0e9L, 1.0e10L,
390
+ 1.0e11L, 1.0e12L, 1.0e13L, 1.0e14L, 1.0e15L, 1.0e16L, 1.0e17L, 1.0e18L, 1.0e19L, 1.0e20L, 1.0e21L,
391
+ 1.0e22L, 1.0e23L, 1.0e24L, 1.0e25L, 1.0e26L, 1.0e27L, 1.0e28L, 1.0e29L, 1.0e30L, 1.0e31L, 1.0e32L,
392
+ 1.0e33L, 1.0e34L, 1.0e35L, 1.0e36L, 1.0e37L, 1.0e38L, 1.0e39L, 1.0e40L, 1.0e41L, 1.0e42L, 1.0e43L,
393
+ 1.0e44L, 1.0e45L, 1.0e46L, 1.0e47L, 1.0e48L, 1.0e49L, 1.0e50L, 1.0e51L, 1.0e52L, 1.0e53L, 1.0e54L,
394
+ 1.0e55L, 1.0e56L, 1.0e57L, 1.0e58L, 1.0e59L, 1.0e60L, 1.0e61L, 1.0e62L, 1.0e63L, 1.0e64L, 1.0e65L,
395
+ 1.0e66L, 1.0e67L, 1.0e68L, 1.0e69L, 1.0e70L, 1.0e71L, 1.0e72L, 1.0e73L, 1.0e74L, 1.0e75L, 1.0e76L,
396
+ 1.0e77L, 1.0e78L, 1.0e79L, 1.0e80L, 1.0e81L, 1.0e82L, 1.0e83L, 1.0e84L, 1.0e85L, 1.0e86L, 1.0e87L,
397
+ 1.0e88L, 1.0e89L, 1.0e90L, 1.0e91L, 1.0e92L, 1.0e93L, 1.0e94L, 1.0e95L, 1.0e96L, 1.0e97L, 1.0e98L,
398
+ 1.0e99L, 1.0e100L, 1.0e101L, 1.0e102L, 1.0e103L, 1.0e104L, 1.0e105L, 1.0e106L, 1.0e107L, 1.0e108L, 1.0e109L,
399
+ 1.0e110L, 1.0e111L, 1.0e112L, 1.0e113L, 1.0e114L, 1.0e115L, 1.0e116L, 1.0e117L, 1.0e118L, 1.0e119L, 1.0e120L,
400
+ 1.0e121L, 1.0e122L, 1.0e123L, 1.0e124L, 1.0e125L, 1.0e126L, 1.0e127L, 1.0e128L, 1.0e129L, 1.0e130L, 1.0e131L,
401
+ 1.0e132L, 1.0e133L, 1.0e134L, 1.0e135L, 1.0e136L, 1.0e137L, 1.0e138L, 1.0e139L, 1.0e140L, 1.0e141L, 1.0e142L,
402
+ 1.0e143L, 1.0e144L, 1.0e145L, 1.0e146L, 1.0e147L, 1.0e148L, 1.0e149L, 1.0e150L, 1.0e151L, 1.0e152L, 1.0e153L,
403
+ 1.0e154L, 1.0e155L, 1.0e156L, 1.0e157L, 1.0e158L, 1.0e159L, 1.0e160L, 1.0e161L, 1.0e162L, 1.0e163L, 1.0e164L,
404
+ 1.0e165L, 1.0e166L, 1.0e167L, 1.0e168L, 1.0e169L, 1.0e170L, 1.0e171L, 1.0e172L, 1.0e173L, 1.0e174L, 1.0e175L,
405
+ 1.0e176L, 1.0e177L, 1.0e178L, 1.0e179L, 1.0e180L, 1.0e181L, 1.0e182L, 1.0e183L, 1.0e184L, 1.0e185L, 1.0e186L,
406
+ 1.0e187L, 1.0e188L, 1.0e189L, 1.0e190L, 1.0e191L, 1.0e192L, 1.0e193L, 1.0e194L, 1.0e195L, 1.0e196L, 1.0e197L,
407
+ 1.0e198L, 1.0e199L, 1.0e200L, 1.0e201L, 1.0e202L, 1.0e203L, 1.0e204L, 1.0e205L, 1.0e206L, 1.0e207L, 1.0e208L,
408
+ 1.0e209L, 1.0e210L, 1.0e211L, 1.0e212L, 1.0e213L, 1.0e214L, 1.0e215L, 1.0e216L, 1.0e217L, 1.0e218L, 1.0e219L,
409
+ 1.0e220L, 1.0e221L, 1.0e222L, 1.0e223L, 1.0e224L, 1.0e225L, 1.0e226L, 1.0e227L, 1.0e228L, 1.0e229L, 1.0e230L,
410
+ 1.0e231L, 1.0e232L, 1.0e233L, 1.0e234L, 1.0e235L, 1.0e236L, 1.0e237L, 1.0e238L, 1.0e239L, 1.0e240L, 1.0e241L,
411
+ 1.0e242L, 1.0e243L, 1.0e244L, 1.0e245L, 1.0e246L, 1.0e247L, 1.0e248L, 1.0e249L, 1.0e250L, 1.0e251L, 1.0e252L,
412
+ 1.0e253L, 1.0e254L, 1.0e255L, 1.0e256L, 1.0e257L, 1.0e258L, 1.0e259L, 1.0e260L, 1.0e261L, 1.0e262L, 1.0e263L,
413
+ 1.0e264L, 1.0e265L, 1.0e266L, 1.0e267L, 1.0e268L, 1.0e269L, 1.0e270L, 1.0e271L, 1.0e272L, 1.0e273L, 1.0e274L,
414
+ 1.0e275L, 1.0e276L, 1.0e277L, 1.0e278L, 1.0e279L, 1.0e280L, 1.0e281L, 1.0e282L, 1.0e283L, 1.0e284L, 1.0e285L,
415
+ 1.0e286L, 1.0e287L, 1.0e288L, 1.0e289L, 1.0e290L, 1.0e291L, 1.0e292L, 1.0e293L, 1.0e294L, 1.0e295L, 1.0e296L,
416
+ 1.0e297L, 1.0e298L, 1.0e299L, 1.0e300L, 1.0e301L, 1.0e302L, 1.0e303L, 1.0e304L, 1.0e305L, 1.0e306L, 1.0e307L,
417
+ 1.0e308L};
418
+
419
+ static VALUE parser_class;
420
+
421
+ // Works with extended unicode as well. \Uffffffff if support is desired in
422
+ // the future.
423
+ static size_t unicodeToUtf8(uint32_t code, byte *buf) {
424
+ byte *start = buf;
425
+
426
+ if (0x0000007F >= code) {
427
+ *buf++ = (byte)code;
428
+ } else if (0x000007FF >= code) {
429
+ *buf++ = 0xC0 | (code >> 6);
430
+ *buf++ = 0x80 | (0x3F & code);
431
+ } else if (0x0000FFFF >= code) {
432
+ *buf++ = 0xE0 | (code >> 12);
433
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
434
+ *buf++ = 0x80 | (0x3F & code);
435
+ } else if (0x001FFFFF >= code) {
436
+ *buf++ = 0xF0 | (code >> 18);
437
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
438
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
439
+ *buf++ = 0x80 | (0x3F & code);
440
+ } else if (0x03FFFFFF >= code) {
441
+ *buf++ = 0xF8 | (code >> 24);
442
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
443
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
444
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
445
+ *buf++ = 0x80 | (0x3F & code);
446
+ } else if (0x7FFFFFFF >= code) {
447
+ *buf++ = 0xFC | (code >> 30);
448
+ *buf++ = 0x80 | ((code >> 24) & 0x3F);
449
+ *buf++ = 0x80 | ((code >> 18) & 0x3F);
450
+ *buf++ = 0x80 | ((code >> 12) & 0x3F);
451
+ *buf++ = 0x80 | ((code >> 6) & 0x3F);
452
+ *buf++ = 0x80 | (0x3F & code);
453
+ }
454
+ return buf - start;
455
+ }
456
+
457
+ static void parser_reset(ojParser p) {
458
+ p->reader = 0;
459
+ memset(&p->num, 0, sizeof(p->num));
460
+ buf_reset(&p->key);
461
+ buf_reset(&p->buf);
462
+ p->map = value_map;
463
+ p->next_map = NULL;
464
+ p->depth = 0;
465
+ }
466
+
467
+ static void parse_error(ojParser p, const char *fmt, ...) {
468
+ va_list ap;
469
+ char buf[256];
470
+
471
+ va_start(ap, fmt);
472
+ vsnprintf(buf, sizeof(buf), fmt, ap);
473
+ va_end(ap);
474
+ rb_raise(oj_json_parser_error_class, "%s at %ld:%ld", buf, p->line, p->col);
475
+ }
476
+
477
+ static void byte_error(ojParser p, byte b) {
478
+ switch (p->map[256]) {
479
+ case 'N': // null_map
480
+ parse_error(p, "expected null");
481
+ break;
482
+ case 'T': // true_map
483
+ parse_error(p, "expected true");
484
+ break;
485
+ case 'F': // false_map
486
+ parse_error(p, "expected false");
487
+ break;
488
+ case 's': // string_map
489
+ parse_error(p, "invalid JSON character 0x%02x", b);
490
+ break;
491
+ default: parse_error(p, "unexpected character '%c' in '%c' mode", b, p->map[256]); break;
492
+ }
493
+ }
494
+
495
+ static void calc_num(ojParser p) {
496
+ switch (p->type) {
497
+ case OJ_INT:
498
+ if (p->num.neg) {
499
+ p->num.fixnum = -p->num.fixnum;
500
+ p->num.neg = false;
501
+ }
502
+ p->funcs[p->stack[p->depth]].add_int(p);
503
+ break;
504
+ case OJ_DECIMAL: {
505
+ long double d = (long double)p->num.fixnum;
506
+
507
+ if (p->num.neg) {
508
+ d = -d;
509
+ }
510
+ if (0 < p->num.shift) {
511
+ d /= pow_map[p->num.shift];
512
+ }
513
+ if (0 < p->num.exp) {
514
+ long double x;
515
+
516
+ if (MAX_POW < p->num.exp) {
517
+ x = powl(10.0L, (long double)p->num.exp);
518
+ } else {
519
+ x = pow_map[p->num.exp];
520
+ }
521
+ if (p->num.exp_neg) {
522
+ d /= x;
523
+ } else {
524
+ d *= x;
525
+ }
526
+ }
527
+ p->num.dub = d;
528
+ p->funcs[p->stack[p->depth]].add_float(p);
529
+ break;
530
+ }
531
+ case OJ_BIG: p->funcs[p->stack[p->depth]].add_big(p);
532
+ default:
533
+ // nothing to do
534
+ break;
535
+ }
536
+ p->type = OJ_NONE;
537
+ }
538
+
539
+ static void big_change(ojParser p) {
540
+ char buf[32];
541
+ int64_t i = p->num.fixnum;
542
+ int len = 0;
543
+
544
+ buf[sizeof(buf) - 1] = '\0';
545
+ p->buf.tail = p->buf.head;
546
+ switch (p->type) {
547
+ case OJ_INT:
548
+ // If an int then it will fit in the num.raw so no need to check length;
549
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10) {
550
+ buf[len] = '0' + (i % 10);
551
+ }
552
+ if (p->num.neg) {
553
+ buf[len] = '-';
554
+ len--;
555
+ }
556
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
557
+ p->type = OJ_BIG;
558
+ break;
559
+ case OJ_DECIMAL: {
560
+ int shift = p->num.shift;
561
+
562
+ for (len = sizeof(buf) - 1; 0 < i; len--, i /= 10, shift--) {
563
+ if (0 == shift) {
564
+ buf[len] = '.';
565
+ len--;
566
+ }
567
+ buf[len] = '0' + (i % 10);
568
+ }
569
+ if (p->num.neg) {
570
+ buf[len] = '-';
571
+ len--;
572
+ }
573
+ buf_append_string(&p->buf, buf + len + 1, sizeof(buf) - len - 1);
574
+ if (0 < p->num.exp) {
575
+ int x = p->num.exp;
576
+ int d, div;
577
+ bool started = false;
578
+
579
+ buf_append(&p->buf, 'e');
580
+ if (0 < p->num.exp_neg) {
581
+ buf_append(&p->buf, '-');
582
+ }
583
+ for (div = 1000; 0 < div; div /= 10) {
584
+ d = x / div % 10;
585
+ if (started || 0 < d) {
586
+ buf_append(&p->buf, '0' + d);
587
+ }
588
+ }
589
+ }
590
+ p->type = OJ_BIG;
591
+ break;
592
+ }
593
+ default: break;
594
+ }
595
+ }
596
+
597
+ static void parse(ojParser p, const byte *json) {
598
+ const byte *start;
599
+ const byte *b = json;
600
+ int i;
601
+
602
+ p->line = 1;
603
+ p->col = -1;
604
+ #if DEBUG
605
+ printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
606
+ #endif
607
+ for (; '\0' != *b; b++) {
608
+ #if DEBUG
609
+ printf("*** parse - mode: %c %02x %s => %c\n", p->map[256], *b, b, p->map[*b]);
610
+ #endif
611
+ switch (p->map[*b]) {
612
+ case SKIP_NEWLINE:
613
+ p->line++;
614
+ p->col = b - json;
615
+ b++;
616
+ #ifdef SPACE_JUMP
617
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
618
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
619
+ b += 2;
620
+ }
621
+ #endif
622
+ for (; SKIP_CHAR == space_map[*b]; b++) {
623
+ }
624
+ b--;
625
+ break;
626
+ case COLON_COLON: p->map = value_map; break;
627
+ case SKIP_CHAR: break;
628
+ case KEY_QUOTE:
629
+ b++;
630
+ p->key.tail = p->key.head;
631
+ start = b;
632
+ for (; STR_OK == string_map[*b]; b++) {
633
+ }
634
+ buf_append_string(&p->key, (const char *)start, b - start);
635
+ if ('"' == *b) {
636
+ p->map = colon_map;
637
+ break;
638
+ }
639
+ b--;
640
+ p->map = string_map;
641
+ p->next_map = colon_map;
642
+ break;
643
+ case AFTER_COMMA:
644
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
645
+ p->map = key_map;
646
+ } else {
647
+ p->map = comma_map;
648
+ }
649
+ break;
650
+ case VAL_QUOTE:
651
+ b++;
652
+ start = b;
653
+ p->buf.tail = p->buf.head;
654
+ for (; STR_OK == string_map[*b]; b++) {
655
+ }
656
+ buf_append_string(&p->buf, (const char *)start, b - start);
657
+ if ('"' == *b) {
658
+ p->cur = b - json;
659
+ p->funcs[p->stack[p->depth]].add_str(p);
660
+ p->map = (0 == p->depth) ? value_map : after_map;
661
+ break;
662
+ }
663
+ b--;
664
+ p->map = string_map;
665
+ p->next_map = (0 == p->depth) ? value_map : after_map;
666
+ break;
667
+ case OPEN_OBJECT:
668
+ p->cur = b - json;
669
+ p->funcs[p->stack[p->depth]].open_object(p);
670
+ p->depth++;
671
+ p->stack[p->depth] = OBJECT_FUN;
672
+ p->map = key1_map;
673
+ break;
674
+ case NUM_CLOSE_OBJECT:
675
+ p->cur = b - json;
676
+ calc_num(p);
677
+ // flow through
678
+ case CLOSE_OBJECT:
679
+ p->map = (1 == p->depth) ? value_map : after_map;
680
+ if (p->depth <= 0 || OBJECT_FUN != p->stack[p->depth]) {
681
+ p->col = b - json - p->col + 1;
682
+ parse_error(p, "unexpected object close");
683
+ return;
684
+ }
685
+ p->depth--;
686
+ p->cur = b - json;
687
+ p->funcs[p->stack[p->depth]].close_object(p);
688
+ break;
689
+ case OPEN_ARRAY:
690
+ p->cur = b - json;
691
+ p->funcs[p->stack[p->depth]].open_array(p);
692
+ p->depth++;
693
+ p->stack[p->depth] = ARRAY_FUN;
694
+ p->map = value_map;
695
+ break;
696
+ case NUM_CLOSE_ARRAY:
697
+ p->cur = b - json;
698
+ calc_num(p);
699
+ // flow through
700
+ case CLOSE_ARRAY:
701
+ p->map = (1 == p->depth) ? value_map : after_map;
702
+ if (p->depth <= 0 || ARRAY_FUN != p->stack[p->depth]) {
703
+ p->col = b - json - p->col + 1;
704
+ parse_error(p, "unexpected array close");
705
+ return;
706
+ }
707
+ p->depth--;
708
+ p->cur = b - json;
709
+ p->funcs[p->stack[p->depth]].close_array(p);
710
+ break;
711
+ case NUM_COMMA:
712
+ p->cur = b - json;
713
+ calc_num(p);
714
+ if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
715
+ p->map = key_map;
716
+ } else {
717
+ p->map = comma_map;
718
+ }
719
+ break;
720
+ case VAL0:
721
+ p->type = OJ_INT;
722
+ p->num.fixnum = 0;
723
+ p->num.neg = false;
724
+ p->num.shift = 0;
725
+ p->num.len = 0;
726
+ p->num.exp = 0;
727
+ p->num.exp_neg = false;
728
+ p->map = zero_map;
729
+ break;
730
+ case VAL_NEG:
731
+ p->type = OJ_INT;
732
+ p->num.fixnum = 0;
733
+ p->num.neg = true;
734
+ p->num.shift = 0;
735
+ p->num.len = 0;
736
+ p->num.exp = 0;
737
+ p->num.exp_neg = false;
738
+ p->map = neg_map;
739
+ break;
740
+ ;
741
+ case VAL_DIGIT:
742
+ p->type = OJ_INT;
743
+ p->num.fixnum = 0;
744
+ p->num.neg = false;
745
+ p->num.shift = 0;
746
+ p->num.exp = 0;
747
+ p->num.exp_neg = false;
748
+ p->num.len = 0;
749
+ p->map = digit_map;
750
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
751
+ uint64_t x = (uint64_t)p->num.fixnum * 10 + (uint64_t)(*b - '0');
752
+
753
+ // Tried just checking for an int less than zero but that
754
+ // fails when optimization is on for some reason with the
755
+ // clang compiler so us a bit mask instead.
756
+ if (x < BIG_LIMIT) {
757
+ p->num.fixnum = (int64_t)x;
758
+ } else {
759
+ big_change(p);
760
+ p->map = big_digit_map;
761
+ break;
762
+ }
763
+ }
764
+ b--;
765
+ break;
766
+ case NUM_DIGIT:
767
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
768
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
769
+
770
+ if (x < BIG_LIMIT) {
771
+ p->num.fixnum = (int64_t)x;
772
+ } else {
773
+ big_change(p);
774
+ p->map = big_digit_map;
775
+ break;
776
+ }
777
+ }
778
+ b--;
779
+ break;
780
+ case NUM_DOT:
781
+ p->type = OJ_DECIMAL;
782
+ p->map = dot_map;
783
+ break;
784
+ case NUM_FRAC:
785
+ p->map = frac_map;
786
+ for (; NUM_FRAC == frac_map[*b]; b++) {
787
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
788
+
789
+ if (x < FRAC_LIMIT) {
790
+ p->num.fixnum = (int64_t)x;
791
+ p->num.shift++;
792
+ } else {
793
+ big_change(p);
794
+ p->map = big_frac_map;
795
+ break;
796
+ }
797
+ }
798
+ b--;
799
+ break;
800
+ case FRAC_E:
801
+ p->type = OJ_DECIMAL;
802
+ p->map = exp_sign_map;
803
+ break;
804
+ case NUM_ZERO: p->map = zero_map; break;
805
+ case NEG_DIGIT:
806
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
807
+ uint64_t x = p->num.fixnum * 10 + (uint64_t)(*b - '0');
808
+
809
+ if (x < BIG_LIMIT) {
810
+ p->num.fixnum = (int64_t)x;
811
+ } else {
812
+ big_change(p);
813
+ p->map = big_digit_map;
814
+ break;
815
+ }
816
+ }
817
+ b--;
818
+ p->map = digit_map;
819
+ break;
820
+ case EXP_SIGN:
821
+ p->num.exp_neg = ('-' == *b);
822
+ p->map = exp_zero_map;
823
+ break;
824
+ case EXP_DIGIT:
825
+ p->map = exp_map;
826
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
827
+ int16_t x = p->num.exp * 10 + (int16_t)(*b - '0');
828
+
829
+ if (x <= MAX_EXP) {
830
+ p->num.exp = x;
831
+ } else {
832
+ big_change(p);
833
+ p->map = big_exp_map;
834
+ break;
835
+ }
836
+ }
837
+ b--;
838
+ break;
839
+ case BIG_DIGIT:
840
+ start = b;
841
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
842
+ }
843
+ buf_append_string(&p->buf, (const char *)start, b - start);
844
+ b--;
845
+ break;
846
+ case BIG_DOT:
847
+ buf_append(&p->buf, '.');
848
+ p->map = big_dot_map;
849
+ break;
850
+ case BIG_FRAC:
851
+ p->map = big_frac_map;
852
+ start = b;
853
+ for (; NUM_FRAC == frac_map[*b]; b++) {
854
+ }
855
+ buf_append_string(&p->buf, (const char *)start, b - start);
856
+ b--;
857
+ break;
858
+ case BIG_E:
859
+ buf_append(&p->buf, *b);
860
+ p->map = big_exp_sign_map;
861
+ break;
862
+ case BIG_EXP_SIGN:
863
+ buf_append(&p->buf, *b);
864
+ p->map = big_exp_zero_map;
865
+ break;
866
+ case BIG_EXP:
867
+ start = b;
868
+ for (; NUM_DIGIT == digit_map[*b]; b++) {
869
+ }
870
+ buf_append_string(&p->buf, (const char *)start, b - start);
871
+ b--;
872
+ p->map = big_exp_map;
873
+ break;
874
+ case NUM_SPC:
875
+ p->cur = b - json;
876
+ calc_num(p);
877
+ break;
878
+ case NUM_NEWLINE:
879
+ p->cur = b - json;
880
+ calc_num(p);
881
+ b++;
882
+ #ifdef SPACE_JUMP
883
+ // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
884
+ for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
885
+ b += 2;
886
+ }
887
+ #endif
888
+ for (; SKIP_CHAR == space_map[*b]; b++) {
889
+ }
890
+ b--;
891
+ break;
892
+ case STR_OK:
893
+ start = b;
894
+ for (; STR_OK == string_map[*b]; b++) {
895
+ }
896
+ if (':' == p->next_map[256]) {
897
+ buf_append_string(&p->key, (const char *)start, b - start);
898
+ } else {
899
+ buf_append_string(&p->buf, (const char *)start, b - start);
900
+ }
901
+ if ('"' == *b) {
902
+ p->cur = b - json;
903
+ p->funcs[p->stack[p->depth]].add_str(p);
904
+ p->map = p->next_map;
905
+ break;
906
+ }
907
+ b--;
908
+ break;
909
+ case STR_SLASH: p->map = esc_map; break;
910
+ case STR_QUOTE:
911
+ p->cur = b - json;
912
+ p->funcs[p->stack[p->depth]].add_str(p);
913
+ p->map = p->next_map;
914
+ break;
915
+ case ESC_U:
916
+ p->map = u_map;
917
+ p->ri = 0;
918
+ p->ucode = 0;
919
+ break;
920
+ case U_OK:
921
+ p->ri++;
922
+ p->ucode = p->ucode << 4 | (uint32_t)hex_map[*b];
923
+ if (4 <= p->ri) {
924
+ byte utf8[8];
925
+ size_t ulen = unicodeToUtf8(p->ucode, utf8);
926
+
927
+ if (0 < ulen) {
928
+ if (':' == p->next_map[256]) {
929
+ buf_append_string(&p->key, (const char *)utf8, ulen);
930
+ } else {
931
+ buf_append_string(&p->buf, (const char *)utf8, ulen);
932
+ }
933
+ } else {
934
+ parse_error(p, "invalid unicode");
935
+ return;
936
+ }
937
+ p->map = string_map;
938
+ }
939
+ break;
940
+ case ESC_OK:
941
+ if (':' == p->next_map[256]) {
942
+ buf_append(&p->key, esc_byte_map[*b]);
943
+ } else {
944
+ buf_append(&p->buf, esc_byte_map[*b]);
945
+ }
946
+ p->map = string_map;
947
+ break;
948
+ case UTF1:
949
+ p->ri = 1;
950
+ p->map = utf_map;
951
+ if (':' == p->next_map[256]) {
952
+ buf_append(&p->key, *b);
953
+ } else {
954
+ buf_append(&p->buf, *b);
955
+ }
956
+ break;
957
+ case UTF2:
958
+ p->ri = 2;
959
+ p->map = utf_map;
960
+ if (':' == p->next_map[256]) {
961
+ buf_append(&p->key, *b);
962
+ } else {
963
+ buf_append(&p->buf, *b);
964
+ }
965
+ break;
966
+ case UTF3:
967
+ p->ri = 3;
968
+ p->map = utf_map;
969
+ if (':' == p->next_map[256]) {
970
+ buf_append(&p->key, *b);
971
+ } else {
972
+ buf_append(&p->buf, *b);
973
+ }
974
+ break;
975
+ case UTFX:
976
+ p->ri--;
977
+ if (':' == p->next_map[256]) {
978
+ buf_append(&p->key, *b);
979
+ } else {
980
+ buf_append(&p->buf, *b);
981
+ }
982
+ if (p->ri <= 0) {
983
+ p->map = string_map;
984
+ }
985
+ break;
986
+ case VAL_NULL:
987
+ if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
988
+ b += 3;
989
+ p->cur = b - json;
990
+ p->funcs[p->stack[p->depth]].add_null(p);
991
+ p->map = (0 == p->depth) ? value_map : after_map;
992
+ break;
993
+ }
994
+ p->ri = 0;
995
+ *p->token = *b++;
996
+ for (i = 1; i < 4; i++) {
997
+ if ('\0' == *b) {
998
+ p->ri = i;
999
+ break;
1000
+ } else {
1001
+ p->token[i] = *b++;
1002
+ }
1003
+ }
1004
+ if (0 < p->ri) {
1005
+ p->map = null_map;
1006
+ b--;
1007
+ break;
1008
+ }
1009
+ p->col = b - json - p->col;
1010
+ parse_error(p, "expected null");
1011
+ return;
1012
+ case VAL_TRUE:
1013
+ if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
1014
+ b += 3;
1015
+ p->cur = b - json;
1016
+ p->funcs[p->stack[p->depth]].add_true(p);
1017
+ p->map = (0 == p->depth) ? value_map : after_map;
1018
+ break;
1019
+ }
1020
+ p->ri = 0;
1021
+ *p->token = *b++;
1022
+ for (i = 1; i < 4; i++) {
1023
+ if ('\0' == *b) {
1024
+ p->ri = i;
1025
+ break;
1026
+ } else {
1027
+ p->token[i] = *b++;
1028
+ }
1029
+ }
1030
+ if (0 < p->ri) {
1031
+ p->map = true_map;
1032
+ b--;
1033
+ break;
1034
+ }
1035
+ p->col = b - json - p->col;
1036
+ parse_error(p, "expected true");
1037
+ return;
1038
+ case VAL_FALSE:
1039
+ if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1040
+ b += 4;
1041
+ p->cur = b - json;
1042
+ p->funcs[p->stack[p->depth]].add_false(p);
1043
+ p->map = (0 == p->depth) ? value_map : after_map;
1044
+ break;
1045
+ }
1046
+ p->ri = 0;
1047
+ *p->token = *b++;
1048
+ for (i = 1; i < 5; i++) {
1049
+ if ('\0' == *b) {
1050
+ p->ri = i;
1051
+ break;
1052
+ } else {
1053
+ p->token[i] = *b++;
1054
+ }
1055
+ }
1056
+ if (0 < p->ri) {
1057
+ p->map = false_map;
1058
+ b--;
1059
+ break;
1060
+ }
1061
+ p->col = b - json - p->col;
1062
+ parse_error(p, "expected false");
1063
+ return;
1064
+ case TOKEN_OK:
1065
+ p->token[p->ri] = *b;
1066
+ p->ri++;
1067
+ switch (p->map[256]) {
1068
+ case 'N':
1069
+ if (4 == p->ri) {
1070
+ if (0 != strncmp("null", p->token, 4)) {
1071
+ p->col = b - json - p->col;
1072
+ parse_error(p, "expected null");
1073
+ return;
1074
+ }
1075
+ p->cur = b - json;
1076
+ p->funcs[p->stack[p->depth]].add_null(p);
1077
+ p->map = (0 == p->depth) ? value_map : after_map;
1078
+ }
1079
+ break;
1080
+ case 'F':
1081
+ if (5 == p->ri) {
1082
+ if (0 != strncmp("false", p->token, 5)) {
1083
+ p->col = b - json - p->col;
1084
+ parse_error(p, "expected false");
1085
+ return;
1086
+ }
1087
+ p->cur = b - json;
1088
+ p->funcs[p->stack[p->depth]].add_false(p);
1089
+ p->map = (0 == p->depth) ? value_map : after_map;
1090
+ }
1091
+ break;
1092
+ case 'T':
1093
+ if (4 == p->ri) {
1094
+ if (0 != strncmp("true", p->token, 4)) {
1095
+ p->col = b - json - p->col;
1096
+ parse_error(p, "expected true");
1097
+ return;
1098
+ }
1099
+ p->cur = b - json;
1100
+ p->funcs[p->stack[p->depth]].add_true(p);
1101
+ p->map = (0 == p->depth) ? value_map : after_map;
1102
+ }
1103
+ break;
1104
+ default:
1105
+ p->col = b - json - p->col;
1106
+ parse_error(p, "parse error");
1107
+ return;
1108
+ }
1109
+ break;
1110
+ case CHAR_ERR: byte_error(p, *b); return;
1111
+ default: break;
1112
+ }
1113
+ if (0 == p->depth && 'v' == p->map[256] && p->just_one) {
1114
+ p->map = trail_map;
1115
+ }
1116
+ }
1117
+ if (0 == p->depth) {
1118
+ switch (p->map[256]) {
1119
+ case '0':
1120
+ case 'd':
1121
+ case 'f':
1122
+ case 'z':
1123
+ case 'X':
1124
+ case 'D':
1125
+ case 'g':
1126
+ case 'B':
1127
+ case 'Y':
1128
+ p->cur = b - json;
1129
+ calc_num(p);
1130
+ break;
1131
+ }
1132
+ }
1133
+ return;
1134
+ }
1135
+
1136
+ static void parser_free(void *ptr) {
1137
+ ojParser p;
1138
+
1139
+ if (0 == ptr) {
1140
+ return;
1141
+ }
1142
+ p = (ojParser)ptr;
1143
+ buf_cleanup(&p->key);
1144
+ buf_cleanup(&p->buf);
1145
+ if (NULL != p->free) {
1146
+ p->free(p);
1147
+ }
1148
+ OJ_R_FREE(ptr);
1149
+ }
1150
+
1151
+ static void parser_mark(void *ptr) {
1152
+ if (NULL != ptr) {
1153
+ ojParser p = (ojParser)ptr;
1154
+
1155
+ if (0 != p->reader) {
1156
+ rb_gc_mark(p->reader);
1157
+ }
1158
+ if (NULL != p->mark) {
1159
+ p->mark(p);
1160
+ }
1161
+ }
1162
+ }
1163
+
1164
+ static const rb_data_type_t oj_parser_type = {
1165
+ "Oj/parser",
1166
+ {
1167
+ parser_mark,
1168
+ parser_free,
1169
+ NULL,
1170
+ },
1171
+ 0,
1172
+ 0,
1173
+ };
1174
+
1175
+ extern void oj_set_parser_validator(ojParser p);
1176
+ extern void oj_set_parser_saj(ojParser p);
1177
+ extern void oj_set_parser_usual(ojParser p);
1178
+ extern void oj_set_parser_debug(ojParser p);
1179
+
1180
+ static int opt_cb(VALUE rkey, VALUE value, VALUE ptr) {
1181
+ ojParser p = (ojParser)ptr;
1182
+ const char *key = NULL;
1183
+ char set_key[64];
1184
+ long klen;
1185
+
1186
+ switch (rb_type(rkey)) {
1187
+ case RUBY_T_SYMBOL:
1188
+ rkey = rb_sym2str(rkey);
1189
+ // fall through
1190
+ case RUBY_T_STRING:
1191
+ key = StringValuePtr(rkey);
1192
+ klen = RSTRING_LEN(rkey);
1193
+ break;
1194
+ default: rb_raise(rb_eArgError, "option keys must be a symbol or string");
1195
+ }
1196
+ if ((long)sizeof(set_key) - 1 <= klen) {
1197
+ return ST_CONTINUE;
1198
+ }
1199
+ memcpy(set_key, key, klen);
1200
+ set_key[klen] = '=';
1201
+ set_key[klen + 1] = '\0';
1202
+ p->option(p, set_key, value);
1203
+
1204
+ return ST_CONTINUE;
1205
+ }
1206
+
1207
+ /* Document-method: new
1208
+ * call-seq: new(mode=nil)
1209
+ *
1210
+ * Creates a new Parser with the specified mode. If no mode is provided
1211
+ * validation is assumed. Optional arguments can be provided that match the
1212
+ * mode. For example with the :usual mode the call might look like
1213
+ * Oj::Parser.new(:usual, cache_keys: true).
1214
+ */
1215
+ static VALUE parser_new(int argc, VALUE *argv, VALUE self) {
1216
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1217
+
1218
+ #if HAVE_RB_EXT_RACTOR_SAFE
1219
+ // This doesn't seem to do anything.
1220
+ rb_ext_ractor_safe(true);
1221
+ #endif
1222
+ memset(p, 0, sizeof(struct _ojParser));
1223
+ buf_init(&p->key);
1224
+ buf_init(&p->buf);
1225
+ p->map = value_map;
1226
+
1227
+ if (argc < 1) {
1228
+ oj_set_parser_validator(p);
1229
+ } else {
1230
+ VALUE mode = argv[0];
1231
+
1232
+ if (Qnil == mode) {
1233
+ oj_set_parser_validator(p);
1234
+ } else {
1235
+ const char *ms = NULL;
1236
+
1237
+ switch (rb_type(mode)) {
1238
+ case RUBY_T_SYMBOL:
1239
+ mode = rb_sym2str(mode);
1240
+ // fall through
1241
+ case RUBY_T_STRING: ms = RSTRING_PTR(mode); break;
1242
+ default: rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1243
+ }
1244
+ if (0 == strcmp("usual", ms) || 0 == strcmp("standard", ms) || 0 == strcmp("strict", ms) ||
1245
+ 0 == strcmp("compat", ms)) {
1246
+ oj_set_parser_usual(p);
1247
+ } else if (0 == strcmp("object", ms)) {
1248
+ // TBD
1249
+ } else if (0 == strcmp("saj", ms)) {
1250
+ oj_set_parser_saj(p);
1251
+ } else if (0 == strcmp("validate", ms)) {
1252
+ oj_set_parser_validator(p);
1253
+ } else if (0 == strcmp("debug", ms)) {
1254
+ oj_set_parser_debug(p);
1255
+ } else {
1256
+ rb_raise(rb_eArgError, "mode must be :validate, :usual, :saj, or :object");
1257
+ }
1258
+ }
1259
+ if (1 < argc) {
1260
+ VALUE ropts = argv[1];
1261
+
1262
+ Check_Type(ropts, T_HASH);
1263
+ rb_hash_foreach(ropts, opt_cb, (VALUE)p);
1264
+ }
1265
+ }
1266
+ return TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1267
+ }
1268
+
1269
+ // Create a new parser without setting the delegate. The parser is
1270
+ // wrapped. The parser is (ojParser)DATA_PTR(value) where value is the return
1271
+ // from this function. A delegate must be added before the parser can be
1272
+ // used. Optionally oj_parser_set_options can be called if the options are not
1273
+ // set directly.
1274
+ VALUE oj_parser_new(void) {
1275
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1276
+
1277
+ #if HAVE_RB_EXT_RACTOR_SAFE
1278
+ // This doesn't seem to do anything.
1279
+ rb_ext_ractor_safe(true);
1280
+ #endif
1281
+ memset(p, 0, sizeof(struct _ojParser));
1282
+ buf_init(&p->key);
1283
+ buf_init(&p->buf);
1284
+ p->map = value_map;
1285
+
1286
+ return TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1287
+ }
1288
+
1289
+ // Set set the options from a hash (ropts).
1290
+ void oj_parser_set_option(ojParser p, VALUE ropts) {
1291
+ Check_Type(ropts, T_HASH);
1292
+ rb_hash_foreach(ropts, opt_cb, (VALUE)p);
1293
+ }
1294
+
1295
+ /* Document-method: method_missing(value)
1296
+ * call-seq: method_missing(value)
1297
+ *
1298
+ * Methods not handled by the parser are passed to the delegate. The methods
1299
+ * supported by delegate are:
1300
+ *
1301
+ * - *:validate*
1302
+ * - no options
1303
+ *
1304
+ * - *:saj*
1305
+ * - _cache_keys_ is a flag indicating hash keys should be cached.
1306
+ * - _cache_strings_ is a positive integer less than 35. Strings shorter than that length are cached.
1307
+ * - _handler_ is the SAJ handler
1308
+ *
1309
+ * - *:usual*
1310
+ * - _cache_keys_ is a flag indicating hash keys should be cached.
1311
+ * - _cache_strings_ is a positive integer less than 35. Strings shorter than that length are cached.
1312
+ * - _cache_expunge_ dictates when the cache will be expunged where 0 never expunges,
1313
+ * 1 expunges slowly, 2 expunges faster, and 3 or higher expunges agressively.
1314
+ * - _capacity_ is the capacity of the parser's internal stack. The parser grows automatically
1315
+ * but can be updated directly with this call.
1316
+ * - _create_id_ if non-nil is the key that is used to specify the type of object to create
1317
+ * when parsing. Parsed JSON objects that include the specified element use the element
1318
+ * value as the name of the class to create an object from instead of a Hash.
1319
+ * - _decimal_ is the approach to how decimals are parsed. If _:auto_ then
1320
+ * the decimals with significant digits are 16 or less are Floats and long
1321
+ * ones are BigDecimal. _:ruby_ uses a call to Ruby to convert a string to a Float.
1322
+ * _:float_ always generates a Float. _:bigdecimal_ always results in a BigDecimal.
1323
+ * - _ignore_json_create_ is a flag that when set the class json_create method is
1324
+ * ignored on parsing in favor of creating an instance and populating directly.
1325
+ * - _missing_class_ is an indicator that determines how unknown class names are handled.
1326
+ * Valid values are _:auto_ which creates any missing classes on parse, :ignore which ignores
1327
+ * and continues as a Hash (default), and :raise which raises an exception if the class is not found.
1328
+ * - _omit_null_ is a flag that if true then null values in a map or object are omitted
1329
+ * from the resulting Hash or Object.
1330
+ * - _symbol_keys_ is a flag that indicates Hash keys should be parsed to Symbols versus Strings.
1331
+ */
1332
+ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
1333
+ ojParser p;
1334
+ const char *key = NULL;
1335
+ volatile VALUE rkey = *argv;
1336
+ volatile VALUE rv = Qnil;
1337
+
1338
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1339
+
1340
+ #if HAVE_RB_EXT_RACTOR_SAFE
1341
+ // This doesn't seem to do anything.
1342
+ rb_ext_ractor_safe(true);
1343
+ #endif
1344
+ switch (rb_type(rkey)) {
1345
+ case RUBY_T_SYMBOL:
1346
+ rkey = rb_sym2str(rkey);
1347
+ // fall through
1348
+ case RUBY_T_STRING: key = StringValuePtr(rkey); break;
1349
+ default: rb_raise(rb_eArgError, "option method must be a symbol or string");
1350
+ }
1351
+ if (1 < argc) {
1352
+ rv = argv[1];
1353
+ }
1354
+ return p->option(p, key, rv);
1355
+ }
1356
+
1357
+ /* Document-method: parse(json)
1358
+ * call-seq: parse(json)
1359
+ *
1360
+ * Parse a JSON string.
1361
+ *
1362
+ * Returns the result according to the delegate of the parser.
1363
+ */
1364
+ static VALUE parser_parse(VALUE self, VALUE json) {
1365
+ ojParser p;
1366
+ const byte *ptr = (const byte *)StringValuePtr(json);
1367
+
1368
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1369
+
1370
+ parser_reset(p);
1371
+ p->start(p);
1372
+ parse(p, ptr);
1373
+
1374
+ return p->result(p);
1375
+ }
1376
+
1377
+ static VALUE load_rescue(VALUE self, VALUE x) {
1378
+ // Normal EOF. No action needed other than to stop loading.
1379
+ return Qfalse;
1380
+ }
1381
+
1382
+ static VALUE load(VALUE self) {
1383
+ ojParser p;
1384
+ volatile VALUE rbuf = rb_str_new2("");
1385
+
1386
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1387
+
1388
+ p->start(p);
1389
+ while (true) {
1390
+ rb_funcall(p->reader, oj_readpartial_id, 2, INT2NUM(16385), rbuf);
1391
+ if (0 < RSTRING_LEN(rbuf)) {
1392
+ parse(p, (byte *)StringValuePtr(rbuf));
1393
+ }
1394
+ if (Qtrue == rb_funcall(p->reader, oj_eofq_id, 0)) {
1395
+ if (0 < p->depth) {
1396
+ parse_error(p, "parse error, not closed");
1397
+ }
1398
+ break;
1399
+ }
1400
+ }
1401
+ return Qtrue;
1402
+ }
1403
+
1404
+ /* Document-method: load(reader)
1405
+ * call-seq: load(reader)
1406
+ *
1407
+ * Parse a JSON stream.
1408
+ *
1409
+ * Returns the result according to the delegate of the parser.
1410
+ */
1411
+ static VALUE parser_load(VALUE self, VALUE reader) {
1412
+ ojParser p;
1413
+
1414
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1415
+
1416
+ parser_reset(p);
1417
+ p->reader = reader;
1418
+ rb_rescue2(load, self, load_rescue, Qnil, rb_eEOFError, 0);
1419
+
1420
+ return p->result(p);
1421
+ }
1422
+
1423
+ /* Document-method: file(filename)
1424
+ * call-seq: file(filename)
1425
+ *
1426
+ * Parse a JSON file.
1427
+ *
1428
+ * Returns the result according to the delegate of the parser.
1429
+ */
1430
+ static VALUE parser_file(VALUE self, VALUE filename) {
1431
+ ojParser p;
1432
+ const char *path;
1433
+ int fd;
1434
+
1435
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1436
+
1437
+ path = StringValuePtr(filename);
1438
+
1439
+ parser_reset(p);
1440
+ p->start(p);
1441
+
1442
+ if (0 > (fd = open(path, O_RDONLY))) {
1443
+ rb_raise(rb_eIOError, "error opening %s", path);
1444
+ }
1445
+ #if USE_THREAD_LIMIT
1446
+ struct stat info;
1447
+ // st_size will be 0 if not a file
1448
+ if (0 == fstat(fd, &info) && USE_THREAD_LIMIT < info.st_size) {
1449
+ // Use threaded version.
1450
+ // TBD only if has pthreads
1451
+ // TBD parse_large(p, fd);
1452
+ return p->result(p);
1453
+ }
1454
+ #endif
1455
+ byte buf[16385];
1456
+ size_t size = sizeof(buf) - 1;
1457
+ size_t rsize;
1458
+
1459
+ while (true) {
1460
+ if (0 < (rsize = read(fd, buf, size))) {
1461
+ buf[rsize] = '\0';
1462
+ parse(p, buf);
1463
+ }
1464
+ if (rsize <= 0) {
1465
+ if (0 != rsize) {
1466
+ rb_raise(rb_eIOError, "error reading from %s", path);
1467
+ }
1468
+ break;
1469
+ }
1470
+ }
1471
+ return p->result(p);
1472
+ }
1473
+
1474
+ /* Document-method: just_one
1475
+ * call-seq: just_one
1476
+ *
1477
+ * Returns the current state of the just_one [_Boolean_] option.
1478
+ */
1479
+ static VALUE parser_just_one(VALUE self) {
1480
+ ojParser p;
1481
+
1482
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1483
+
1484
+ return p->just_one ? Qtrue : Qfalse;
1485
+ }
1486
+
1487
+ /* Document-method: just_one=
1488
+ * call-seq: just_one=(value)
1489
+ *
1490
+ * Sets the *just_one* option which limits the parsing of a string or or
1491
+ * stream to a single JSON element.
1492
+ *
1493
+ * Returns the current state of the just_one [_Boolean_] option.
1494
+ */
1495
+ static VALUE parser_just_one_set(VALUE self, VALUE v) {
1496
+ ojParser p;
1497
+
1498
+ TypedData_Get_Struct(self, struct _ojParser, &oj_parser_type, p);
1499
+
1500
+ p->just_one = (Qtrue == v);
1501
+
1502
+ return p->just_one ? Qtrue : Qfalse;
1503
+ }
1504
+
1505
+ static VALUE usual_parser = Qundef;
1506
+
1507
+ /* Document-method: usual
1508
+ * call-seq: usual
1509
+ *
1510
+ * Returns the default usual parser. Note the default usual parser can not be
1511
+ * used concurrently in more than one thread.
1512
+ */
1513
+ static VALUE parser_usual(VALUE self) {
1514
+ if (Qundef == usual_parser) {
1515
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1516
+
1517
+ memset(p, 0, sizeof(struct _ojParser));
1518
+ buf_init(&p->key);
1519
+ buf_init(&p->buf);
1520
+ p->map = value_map;
1521
+ oj_set_parser_usual(p);
1522
+ usual_parser = TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1523
+ rb_gc_register_address(&usual_parser);
1524
+ }
1525
+ return usual_parser;
1526
+ }
1527
+
1528
+ static VALUE saj_parser = Qundef;
1529
+
1530
+ /* Document-method: saj
1531
+ * call-seq: saj
1532
+ *
1533
+ * Returns the default SAJ parser. Note the default SAJ parser can not be used
1534
+ * concurrently in more than one thread.
1535
+ */
1536
+ static VALUE parser_saj(VALUE self) {
1537
+ if (Qundef == saj_parser) {
1538
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1539
+
1540
+ memset(p, 0, sizeof(struct _ojParser));
1541
+ buf_init(&p->key);
1542
+ buf_init(&p->buf);
1543
+ p->map = value_map;
1544
+ oj_set_parser_saj(p);
1545
+ saj_parser = TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1546
+ rb_gc_register_address(&saj_parser);
1547
+ }
1548
+ return saj_parser;
1549
+ }
1550
+
1551
+ static VALUE validate_parser = Qundef;
1552
+
1553
+ /* Document-method: validate
1554
+ * call-seq: validate
1555
+ *
1556
+ * Returns the default validate parser.
1557
+ */
1558
+ static VALUE parser_validate(VALUE self) {
1559
+ if (Qundef == validate_parser) {
1560
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1561
+
1562
+ memset(p, 0, sizeof(struct _ojParser));
1563
+ buf_init(&p->key);
1564
+ buf_init(&p->buf);
1565
+ p->map = value_map;
1566
+ oj_set_parser_validator(p);
1567
+ validate_parser = TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1568
+ rb_gc_register_address(&validate_parser);
1569
+ }
1570
+ return validate_parser;
1571
+ }
1572
+
1573
+ /* Document-class: Oj::Parser
1574
+ *
1575
+ * A reusable parser that makes use of named delegates to determine the
1576
+ * handling of parsed data. Delegates are available for validation, a callback
1577
+ * parser (SAJ), and a usual delegate that builds Ruby objects as parsing
1578
+ * proceeds.
1579
+ *
1580
+ * This parser is considerably faster than the older Oj.parse call and
1581
+ * isolates options to just the parser so that other parts of the code are not
1582
+ * forced to use the same options.
1583
+ */
1584
+ void oj_parser_init(void) {
1585
+ parser_class = rb_define_class_under(Oj, "Parser", rb_cObject);
1586
+ rb_gc_register_address(&parser_class);
1587
+ rb_undef_alloc_func(parser_class);
1588
+
1589
+ rb_define_module_function(parser_class, "new", parser_new, -1);
1590
+ rb_define_method(parser_class, "parse", parser_parse, 1);
1591
+ rb_define_method(parser_class, "load", parser_load, 1);
1592
+ rb_define_method(parser_class, "file", parser_file, 1);
1593
+ rb_define_method(parser_class, "just_one", parser_just_one, 0);
1594
+ rb_define_method(parser_class, "just_one=", parser_just_one_set, 1);
1595
+ rb_define_method(parser_class, "method_missing", parser_missing, -1);
1596
+
1597
+ rb_define_module_function(parser_class, "usual", parser_usual, 0);
1598
+ rb_define_module_function(parser_class, "saj", parser_saj, 0);
1599
+ rb_define_module_function(parser_class, "validate", parser_validate, 0);
1600
+ }