oj_windows 3.16.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +44 -0
  3. data/LICENSE +21 -0
  4. data/README.md +164 -0
  5. data/ext/oj_windows/buf.h +85 -0
  6. data/ext/oj_windows/cache.c +339 -0
  7. data/ext/oj_windows/cache.h +22 -0
  8. data/ext/oj_windows/cache8.c +105 -0
  9. data/ext/oj_windows/cache8.h +21 -0
  10. data/ext/oj_windows/circarray.c +64 -0
  11. data/ext/oj_windows/circarray.h +22 -0
  12. data/ext/oj_windows/code.c +214 -0
  13. data/ext/oj_windows/code.h +40 -0
  14. data/ext/oj_windows/compat.c +239 -0
  15. data/ext/oj_windows/custom.c +1074 -0
  16. data/ext/oj_windows/debug.c +126 -0
  17. data/ext/oj_windows/dump.c +1556 -0
  18. data/ext/oj_windows/dump.h +110 -0
  19. data/ext/oj_windows/dump_compat.c +901 -0
  20. data/ext/oj_windows/dump_leaf.c +162 -0
  21. data/ext/oj_windows/dump_object.c +710 -0
  22. data/ext/oj_windows/dump_strict.c +405 -0
  23. data/ext/oj_windows/encode.h +16 -0
  24. data/ext/oj_windows/err.c +57 -0
  25. data/ext/oj_windows/err.h +67 -0
  26. data/ext/oj_windows/extconf.rb +77 -0
  27. data/ext/oj_windows/fast.c +1710 -0
  28. data/ext/oj_windows/intern.c +325 -0
  29. data/ext/oj_windows/intern.h +22 -0
  30. data/ext/oj_windows/mem.c +320 -0
  31. data/ext/oj_windows/mem.h +53 -0
  32. data/ext/oj_windows/mimic_json.c +919 -0
  33. data/ext/oj_windows/object.c +726 -0
  34. data/ext/oj_windows/odd.c +245 -0
  35. data/ext/oj_windows/odd.h +43 -0
  36. data/ext/oj_windows/oj.c +2097 -0
  37. data/ext/oj_windows/oj.h +420 -0
  38. data/ext/oj_windows/parse.c +1317 -0
  39. data/ext/oj_windows/parse.h +113 -0
  40. data/ext/oj_windows/parser.c +1600 -0
  41. data/ext/oj_windows/parser.h +103 -0
  42. data/ext/oj_windows/rails.c +1484 -0
  43. data/ext/oj_windows/rails.h +18 -0
  44. data/ext/oj_windows/reader.c +222 -0
  45. data/ext/oj_windows/reader.h +137 -0
  46. data/ext/oj_windows/resolve.c +80 -0
  47. data/ext/oj_windows/resolve.h +12 -0
  48. data/ext/oj_windows/rxclass.c +144 -0
  49. data/ext/oj_windows/rxclass.h +26 -0
  50. data/ext/oj_windows/saj.c +675 -0
  51. data/ext/oj_windows/saj2.c +584 -0
  52. data/ext/oj_windows/saj2.h +23 -0
  53. data/ext/oj_windows/scp.c +187 -0
  54. data/ext/oj_windows/simd.h +47 -0
  55. data/ext/oj_windows/sparse.c +946 -0
  56. data/ext/oj_windows/stream_writer.c +329 -0
  57. data/ext/oj_windows/strict.c +189 -0
  58. data/ext/oj_windows/string_writer.c +517 -0
  59. data/ext/oj_windows/trace.c +72 -0
  60. data/ext/oj_windows/trace.h +55 -0
  61. data/ext/oj_windows/usual.c +1218 -0
  62. data/ext/oj_windows/usual.h +69 -0
  63. data/ext/oj_windows/util.c +136 -0
  64. data/ext/oj_windows/util.h +20 -0
  65. data/ext/oj_windows/val_stack.c +101 -0
  66. data/ext/oj_windows/val_stack.h +151 -0
  67. data/ext/oj_windows/validate.c +46 -0
  68. data/ext/oj_windows/wab.c +584 -0
  69. data/lib/oj/active_support_helper.rb +39 -0
  70. data/lib/oj/bag.rb +95 -0
  71. data/lib/oj/easy_hash.rb +52 -0
  72. data/lib/oj/error.rb +21 -0
  73. data/lib/oj/json.rb +188 -0
  74. data/lib/oj/mimic.rb +301 -0
  75. data/lib/oj/saj.rb +80 -0
  76. data/lib/oj/schandler.rb +143 -0
  77. data/lib/oj/state.rb +135 -0
  78. data/lib/oj/version.rb +4 -0
  79. data/lib/oj_windows/active_support_helper.rb +39 -0
  80. data/lib/oj_windows/bag.rb +95 -0
  81. data/lib/oj_windows/easy_hash.rb +52 -0
  82. data/lib/oj_windows/error.rb +21 -0
  83. data/lib/oj_windows/json.rb +188 -0
  84. data/lib/oj_windows/mimic.rb +301 -0
  85. data/lib/oj_windows/saj.rb +80 -0
  86. data/lib/oj_windows/schandler.rb +143 -0
  87. data/lib/oj_windows/state.rb +135 -0
  88. data/lib/oj_windows/version.rb +4 -0
  89. data/lib/oj_windows.rb +15 -0
  90. data/pages/Advanced.md +38 -0
  91. data/pages/Compatibility.md +49 -0
  92. data/pages/Custom.md +37 -0
  93. data/pages/Encoding.md +61 -0
  94. data/pages/InstallOptions.md +20 -0
  95. data/pages/JsonGem.md +60 -0
  96. data/pages/Modes.md +94 -0
  97. data/pages/Options.md +339 -0
  98. data/pages/Parser.md +134 -0
  99. data/pages/Rails.md +85 -0
  100. data/pages/Security.md +43 -0
  101. data/pages/WAB.md +12 -0
  102. metadata +242 -0
@@ -0,0 +1,1556 @@
1
+ // Copyright (c) 2012, 2017 Peter Ohler. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
+
4
+ #include "dump.h"
5
+
6
+ #include <errno.h>
7
+ #include <math.h>
8
+ #include <stdint.h>
9
+ #include <stdio.h>
10
+ #include <stdlib.h>
11
+ #include <string.h>
12
+ #if !IS_WINDOWS
13
+ #include <unistd.h>
14
+ #endif
15
+ #if !IS_WINDOWS
16
+ #include <poll.h>
17
+ #endif
18
+
19
+ #include "cache8.h"
20
+ #include "mem.h"
21
+ #include "odd.h"
22
+ #include "oj.h"
23
+ #include "trace.h"
24
+ #include "util.h"
25
+
26
+ // Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
27
+ #ifdef _MSC_VER
28
+ #define OJ_INFINITY HUGE_VAL
29
+ #else
30
+ #define OJ_INFINITY (1.0 / 0.0)
31
+ #endif
32
+
33
+ #define MAX_DEPTH 1000
34
+
35
+ static const char inf_val[] = INF_VAL;
36
+ static const char ninf_val[] = NINF_VAL;
37
+ static const char nan_val[] = NAN_VAL;
38
+
39
+ typedef unsigned long ulong;
40
+
41
+ static size_t hibit_friendly_size(const uint8_t *str, size_t len);
42
+ static size_t slash_friendly_size(const uint8_t *str, size_t len);
43
+ static size_t xss_friendly_size(const uint8_t *str, size_t len);
44
+ static size_t ascii_friendly_size(const uint8_t *str, size_t len);
45
+
46
+ static const char hex_chars[17] = "0123456789abcdef";
47
+
48
+ // JSON standard except newlines are no escaped
49
+ static char newline_friendly_chars[256] = "\
50
+ 66666666221622666666666666666666\
51
+ 11211111111111111111111111111111\
52
+ 11111111111111111111111111112111\
53
+ 11111111111111111111111111111111\
54
+ 11111111111111111111111111111111\
55
+ 11111111111111111111111111111111\
56
+ 11111111111111111111111111111111\
57
+ 11111111111111111111111111111111";
58
+
59
+ // JSON standard
60
+ static char hibit_friendly_chars[256] = "\
61
+ 66666666222622666666666666666666\
62
+ 11211111111111111111111111111111\
63
+ 11111111111111111111111111112111\
64
+ 11111111111111111111111111111111\
65
+ 11111111111111111111111111111111\
66
+ 11111111111111111111111111111111\
67
+ 11111111111111111111111111111111\
68
+ 11111111111111111111111111111111";
69
+
70
+ // JSON standard but escape forward slashes `/`
71
+ static char slash_friendly_chars[256] = "\
72
+ 66666666222622666666666666666666\
73
+ 11211111111111121111111111111111\
74
+ 11111111111111111111111111112111\
75
+ 11111111111111111111111111111111\
76
+ 11111111111111111111111111111111\
77
+ 11111111111111111111111111111111\
78
+ 11111111111111111111111111111111\
79
+ 11111111111111111111111111111111";
80
+
81
+ // High bit set characters are always encoded as unicode. Worse case is 3
82
+ // bytes per character in the output. That makes this conservative.
83
+ static char ascii_friendly_chars[256] = "\
84
+ 66666666222622666666666666666666\
85
+ 11211111111111111111111111111111\
86
+ 11111111111111111111111111112111\
87
+ 11111111111111111111111111111116\
88
+ 33333333333333333333333333333333\
89
+ 33333333333333333333333333333333\
90
+ 33333333333333333333333333333333\
91
+ 33333333333333333333333333333333";
92
+
93
+ // XSS safe mode
94
+ static char xss_friendly_chars[256] = "\
95
+ 66666666222622666666666666666666\
96
+ 11211161111111121111111111116161\
97
+ 11111111111111111111111111112111\
98
+ 11111111111111111111111111111116\
99
+ 33333333333333333333333333333333\
100
+ 33333333333333333333333333333333\
101
+ 33333333333333333333333333333333\
102
+ 33333333333333333333333333333333";
103
+
104
+ // JSON XSS combo
105
+ static char hixss_friendly_chars[256] = "\
106
+ 66666666222622666666666666666666\
107
+ 11211111111111111111111111111111\
108
+ 11111111111111111111111111112111\
109
+ 11111111111111111111111111111111\
110
+ 11111111111111111111111111111111\
111
+ 11111111111111111111111111111111\
112
+ 11111111111111111111111111111111\
113
+ 11611111111111111111111111111111";
114
+
115
+ // Rails XSS combo
116
+ static char rails_xss_friendly_chars[256] = "\
117
+ 66666666222622666666666666666666\
118
+ 11211161111111111111111111116161\
119
+ 11111111111111111111111111112111\
120
+ 11111111111111111111111111111111\
121
+ 11111111111111111111111111111111\
122
+ 11111111111111111111111111111111\
123
+ 11111111111111111111111111111111\
124
+ 11611111111111111111111111111111";
125
+
126
+ // Rails HTML non-escape
127
+ static char rails_friendly_chars[256] = "\
128
+ 66666666222622666666666666666666\
129
+ 11211111111111111111111111111111\
130
+ 11111111111111111111111111112111\
131
+ 11111111111111111111111111111111\
132
+ 11111111111111111111111111111111\
133
+ 11111111111111111111111111111111\
134
+ 11111111111111111111111111111111\
135
+ 11111111111111111111111111111111";
136
+
137
+ static void raise_strict(VALUE obj) {
138
+ rb_raise(rb_eTypeError, "Failed to dump %s Object to JSON in strict mode.", rb_class2name(rb_obj_class(obj)));
139
+ }
140
+
141
+ inline static size_t calculate_string_size(const uint8_t *str, size_t len, const char *table) {
142
+ size_t size = 0;
143
+ size_t i = len;
144
+
145
+ for (; 3 < i; i -= 4) {
146
+ size += table[*str++];
147
+ size += table[*str++];
148
+ size += table[*str++];
149
+ size += table[*str++];
150
+ }
151
+ for (; 0 < i; i--) {
152
+ size += table[*str++];
153
+ }
154
+ return size - len * (size_t)'0';
155
+ }
156
+
157
+ inline static size_t newline_friendly_size(const uint8_t *str, size_t len) {
158
+ return calculate_string_size(str, len, newline_friendly_chars);
159
+ }
160
+
161
+ #ifdef HAVE_SIMD_NEON
162
+ inline static uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
163
+ uint8x16x4_t tab;
164
+ tab.val[0] = vld1q_u8(table);
165
+ tab.val[1] = vld1q_u8(table + 16);
166
+ tab.val[2] = vld1q_u8(table + 32);
167
+ tab.val[3] = vld1q_u8(table + 48);
168
+ return tab;
169
+ }
170
+
171
+ static uint8x16x4_t hibit_friendly_chars_neon[2];
172
+ static uint8x16x4_t rails_friendly_chars_neon[2];
173
+ static uint8x16x4_t rails_xss_friendly_chars_neon[4];
174
+
175
+ void initialize_neon(void) {
176
+ // We only need the first 128 bytes of the hibit friendly chars table. Everything above 127 is
177
+ // set to 1. If that ever changes, the code will need to be updated.
178
+ hibit_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars);
179
+ hibit_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars + 64);
180
+
181
+ // rails_friendly_chars is the same as hibit_friendly_chars. Only the first 128 bytes have values
182
+ // that are not '1'. If that ever changes, the code will need to be updated.
183
+ rails_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_friendly_chars);
184
+ rails_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_friendly_chars + 64);
185
+
186
+ rails_xss_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars);
187
+ rails_xss_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 64);
188
+ rails_xss_friendly_chars_neon[2] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 128);
189
+ rails_xss_friendly_chars_neon[3] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 192);
190
+
191
+ // All bytes should be 0 except for those that need more than 1 byte of output. This will allow the
192
+ // code to limit the lookups to the first 128 bytes (values 0 - 127). Bytes above 127 will result
193
+ // in 0 with the vqtbl4q_u8 instruction.
194
+ uint8x16_t one = vdupq_n_u8('1');
195
+ for (int i = 0; i < 2; i++) {
196
+ for (int j = 0; j < 4; j++) {
197
+ hibit_friendly_chars_neon[i].val[j] = vsubq_u8(hibit_friendly_chars_neon[i].val[j], one);
198
+ rails_friendly_chars_neon[i].val[j] = vsubq_u8(rails_friendly_chars_neon[i].val[j], one);
199
+ }
200
+ }
201
+
202
+ for (int i = 0; i < 4; i++) {
203
+ for (int j = 0; j < 4; j++) {
204
+ rails_xss_friendly_chars_neon[i].val[j] = vsubq_u8(rails_xss_friendly_chars_neon[i].val[j], one);
205
+ }
206
+ }
207
+ }
208
+ #endif
209
+
210
+ inline static size_t hibit_friendly_size(const uint8_t *str, size_t len) {
211
+ #ifdef HAVE_SIMD_NEON
212
+ size_t size = 0;
213
+ size_t i = 0;
214
+
215
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
216
+ size += sizeof(uint8x16_t);
217
+
218
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
219
+ uint8x16_t chunk = vld1q_u8(str);
220
+ uint8x16_t tmp1 = vqtbl4q_u8(hibit_friendly_chars_neon[0], chunk);
221
+ uint8x16_t tmp2 = vqtbl4q_u8(hibit_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
222
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
223
+ uint8_t tmp = vaddvq_u8(result);
224
+ size += tmp;
225
+ }
226
+
227
+ size_t total = size + calculate_string_size(str, len - i, hibit_friendly_chars);
228
+ return total;
229
+ #else
230
+ return calculate_string_size(str, len, hibit_friendly_chars);
231
+ #endif
232
+ }
233
+
234
+ inline static size_t slash_friendly_size(const uint8_t *str, size_t len) {
235
+ return calculate_string_size(str, len, slash_friendly_chars);
236
+ }
237
+
238
+ inline static size_t ascii_friendly_size(const uint8_t *str, size_t len) {
239
+ return calculate_string_size(str, len, ascii_friendly_chars);
240
+ }
241
+
242
+ inline static size_t xss_friendly_size(const uint8_t *str, size_t len) {
243
+ return calculate_string_size(str, len, xss_friendly_chars);
244
+ }
245
+
246
+ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) {
247
+ size_t size = 0;
248
+ size_t i = len;
249
+ bool check = false;
250
+
251
+ for (; 0 < i; str++, i--) {
252
+ size += hixss_friendly_chars[*str];
253
+ if (0 != (0x80 & *str)) {
254
+ check = true;
255
+ }
256
+ }
257
+ return size - len * (size_t)'0' + check;
258
+ }
259
+
260
+ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
261
+ long size = 0;
262
+ uint32_t hi = 0;
263
+
264
+ #ifdef HAVE_SIMD_NEON
265
+ size_t i = 0;
266
+
267
+ if (len >= sizeof(uint8x16_t)) {
268
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
269
+ uint8x16_t hibit = vdupq_n_u8(0x80);
270
+
271
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
272
+ size += sizeof(uint8x16_t);
273
+
274
+ uint8x16_t chunk = vld1q_u8(str);
275
+
276
+ // Check to see if any of these bytes have the high bit set.
277
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
278
+
279
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
280
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
281
+ uint8x16_t tmp3 = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
282
+ uint8x16_t tmp4 = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
283
+ uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
284
+ uint8_t tmp = vaddvq_u8(result);
285
+ size += tmp;
286
+ }
287
+
288
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
289
+ hi = vmaxvq_u8(has_some_hibit) != 0;
290
+ }
291
+
292
+ size_t len_remaining = len - i;
293
+
294
+ for (; i < len; str++, i++) {
295
+ size += rails_xss_friendly_chars[*str];
296
+ hi |= *str & 0x80;
297
+ }
298
+
299
+ size -= (len_remaining * ((size_t)'0'));
300
+
301
+ if (0 == hi) {
302
+ return size;
303
+ }
304
+ return -(size);
305
+ #else
306
+ size_t i = len;
307
+ for (; 0 < i; str++, i--) {
308
+ size += rails_xss_friendly_chars[*str];
309
+ hi |= *str & 0x80;
310
+ }
311
+ if (0 == hi) {
312
+ return size - len * (size_t)'0';
313
+ }
314
+ return -(size - len * (size_t)'0');
315
+ #endif /* HAVE_SIMD_NEON */
316
+ }
317
+
318
+ inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
319
+ long size = 0;
320
+ uint32_t hi = 0;
321
+ #ifdef HAVE_SIMD_NEON
322
+ size_t i = 0;
323
+ long extra = 0;
324
+
325
+ if (len >= sizeof(uint8x16_t)) {
326
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
327
+ uint8x16_t hibit = vdupq_n_u8(0x80);
328
+
329
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
330
+ size += sizeof(uint8x16_t);
331
+
332
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
333
+ uint8x16_t chunk = vld1q_u8(str);
334
+
335
+ // Check to see if any of these bytes have the high bit set.
336
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
337
+
338
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
339
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
340
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
341
+ uint8_t tmp = vaddvq_u8(result);
342
+ size += tmp;
343
+ }
344
+
345
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
346
+ hi = vmaxvq_u8(has_some_hibit) != 0;
347
+ }
348
+
349
+ for (; i < len; str++, i++, extra++) {
350
+ size += rails_friendly_chars[*str];
351
+ hi |= *str & 0x80;
352
+ }
353
+
354
+ size -= (extra * ((size_t)'0'));
355
+
356
+ if (0 == hi) {
357
+ return size;
358
+ }
359
+ return -(size);
360
+ #else
361
+ size_t i = len;
362
+ for (; 0 < i; str++, i--) {
363
+ size += rails_friendly_chars[*str];
364
+ hi |= *str & 0x80;
365
+ }
366
+ if (0 == hi) {
367
+ return size - len * (size_t)'0';
368
+ }
369
+ return -(size - len * (size_t)'0');
370
+ #endif /* HAVE_SIMD_NEON */
371
+ }
372
+
373
+ const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp) {
374
+ const char *str = NULL;
375
+
376
+ if (AutoNan == opt) {
377
+ switch (mode) {
378
+ case CompatMode: opt = WordNan; break;
379
+ case StrictMode: opt = RaiseNan; break;
380
+ default: break;
381
+ }
382
+ }
383
+ switch (opt) {
384
+ case RaiseNan: raise_strict(obj); break;
385
+ case WordNan:
386
+ if (plus) {
387
+ str = "Infinity";
388
+ *lenp = 8;
389
+ } else {
390
+ str = "-Infinity";
391
+ *lenp = 9;
392
+ }
393
+ break;
394
+ case NullNan:
395
+ str = "null";
396
+ *lenp = 4;
397
+ break;
398
+ case HugeNan:
399
+ default:
400
+ if (plus) {
401
+ str = inf_val;
402
+ *lenp = sizeof(inf_val) - 1;
403
+ } else {
404
+ str = ninf_val;
405
+ *lenp = sizeof(ninf_val) - 1;
406
+ }
407
+ break;
408
+ }
409
+ return str;
410
+ }
411
+
412
+ inline static void dump_hex(uint8_t c, Out out) {
413
+ uint8_t d = (c >> 4) & 0x0F;
414
+
415
+ *out->cur++ = hex_chars[d];
416
+ d = c & 0x0F;
417
+ *out->cur++ = hex_chars[d];
418
+ }
419
+
420
+ static void raise_invalid_unicode(const char *str, int len, int pos) {
421
+ char c;
422
+ char code[32];
423
+ char *cp = code;
424
+ int i;
425
+ uint8_t d;
426
+
427
+ *cp++ = '[';
428
+ for (i = pos; i < len && i - pos < 5; i++) {
429
+ c = str[i];
430
+ d = (c >> 4) & 0x0F;
431
+ *cp++ = hex_chars[d];
432
+ d = c & 0x0F;
433
+ *cp++ = hex_chars[d];
434
+ *cp++ = ' ';
435
+ }
436
+ cp--;
437
+ *cp++ = ']';
438
+ *cp = '\0';
439
+ rb_raise(oj_json_generator_error_class, "Invalid Unicode %s at %d", code, pos);
440
+ }
441
+
442
+ static const char *dump_unicode(const char *str, const char *end, Out out, const char *orig) {
443
+ uint32_t code = 0;
444
+ uint8_t b = *(uint8_t *)str;
445
+ int i, cnt;
446
+
447
+ if (0xC0 == (0xE0 & b)) {
448
+ cnt = 1;
449
+ code = b & 0x0000001F;
450
+ } else if (0xE0 == (0xF0 & b)) {
451
+ cnt = 2;
452
+ code = b & 0x0000000F;
453
+ } else if (0xF0 == (0xF8 & b)) {
454
+ cnt = 3;
455
+ code = b & 0x00000007;
456
+ } else if (0xF8 == (0xFC & b)) {
457
+ cnt = 4;
458
+ code = b & 0x00000003;
459
+ } else if (0xFC == (0xFE & b)) {
460
+ cnt = 5;
461
+ code = b & 0x00000001;
462
+ } else {
463
+ cnt = 0;
464
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
465
+ }
466
+ str++;
467
+ for (; 0 < cnt; cnt--, str++) {
468
+ b = *(uint8_t *)str;
469
+ if (end <= str || 0x80 != (0xC0 & b)) {
470
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
471
+ }
472
+ code = (code << 6) | (b & 0x0000003F);
473
+ }
474
+ if (0x0000FFFF < code) {
475
+ uint32_t c1;
476
+
477
+ code -= 0x00010000;
478
+ c1 = ((code >> 10) & 0x000003FF) + 0x0000D800;
479
+ code = (code & 0x000003FF) + 0x0000DC00;
480
+ APPEND_CHARS(out->cur, "\\u", 2);
481
+ for (i = 3; 0 <= i; i--) {
482
+ *out->cur++ = hex_chars[(uint8_t)(c1 >> (i * 4)) & 0x0F];
483
+ }
484
+ }
485
+ APPEND_CHARS(out->cur, "\\u", 2);
486
+ for (i = 3; 0 <= i; i--) {
487
+ *out->cur++ = hex_chars[(uint8_t)(code >> (i * 4)) & 0x0F];
488
+ }
489
+ return str - 1;
490
+ }
491
+
492
+ static const char *check_unicode(const char *str, const char *end, const char *orig) {
493
+ uint8_t b = *(uint8_t *)str;
494
+ int cnt = 0;
495
+
496
+ if (0xC0 == (0xE0 & b)) {
497
+ cnt = 1;
498
+ } else if (0xE0 == (0xF0 & b)) {
499
+ cnt = 2;
500
+ } else if (0xF0 == (0xF8 & b)) {
501
+ cnt = 3;
502
+ } else if (0xF8 == (0xFC & b)) {
503
+ cnt = 4;
504
+ } else if (0xFC == (0xFE & b)) {
505
+ cnt = 5;
506
+ } else {
507
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
508
+ }
509
+ str++;
510
+ for (; 0 < cnt; cnt--, str++) {
511
+ b = *(uint8_t *)str;
512
+ if (end <= str || 0x80 != (0xC0 & b)) {
513
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
514
+ }
515
+ }
516
+ return str;
517
+ }
518
+
519
+ // Returns 0 if not using circular references, -1 if no further writing is
520
+ // needed (duplicate), and a positive value if the object was added to the
521
+ // cache.
522
+ long oj_check_circular(VALUE obj, Out out) {
523
+ slot_t id = 0;
524
+ slot_t *slot;
525
+
526
+ if (Yes == out->opts->circular) {
527
+ if (0 == (id = oj_cache8_get(out->circ_cache, obj, &slot))) {
528
+ out->circ_cnt++;
529
+ id = out->circ_cnt;
530
+ *slot = id;
531
+ } else {
532
+ if (ObjectMode == out->opts->mode) {
533
+ assure_size(out, 18);
534
+ APPEND_CHARS(out->cur, "\"^r", 3);
535
+ dump_ulong(id, out);
536
+ *out->cur++ = '"';
537
+ }
538
+ return -1;
539
+ }
540
+ }
541
+ return (long)id;
542
+ }
543
+
544
+ void oj_dump_time(VALUE obj, Out out, int withZone) {
545
+ char buf[64];
546
+ char *b = buf + sizeof(buf) - 1;
547
+ long size;
548
+ char *dot;
549
+ int neg = 0;
550
+ long one = 1000000000;
551
+ long long sec;
552
+ long long nsec;
553
+
554
+ // rb_time_timespec as well as rb_time_timeeval have a bug that causes an
555
+ // exception to be raised if a time is before 1970 on 32 bit systems so
556
+ // check the timespec size and use the ruby calls if a 32 bit system.
557
+ if (16 <= sizeof(struct timespec)) {
558
+ struct timespec ts = rb_time_timespec(obj);
559
+
560
+ sec = (long long)ts.tv_sec;
561
+ nsec = ts.tv_nsec;
562
+ } else {
563
+ sec = NUM2LL(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
564
+ nsec = NUM2LL(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
565
+ }
566
+
567
+ *b-- = '\0';
568
+ if (withZone) {
569
+ long tzsecs = NUM2LONG(rb_funcall2(obj, oj_utc_offset_id, 0, 0));
570
+ int zneg = (0 > tzsecs);
571
+
572
+ if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
573
+ tzsecs = 86400;
574
+ }
575
+ if (zneg) {
576
+ tzsecs = -tzsecs;
577
+ }
578
+ if (0 == tzsecs) {
579
+ *b-- = '0';
580
+ } else {
581
+ for (; 0 < tzsecs; b--, tzsecs /= 10) {
582
+ *b = '0' + (tzsecs % 10);
583
+ }
584
+ if (zneg) {
585
+ *b-- = '-';
586
+ }
587
+ }
588
+ *b-- = 'e';
589
+ }
590
+ if (0 > sec) {
591
+ neg = 1;
592
+ sec = -sec;
593
+ if (0 < nsec) {
594
+ nsec = 1000000000 - nsec;
595
+ sec--;
596
+ }
597
+ }
598
+ dot = b - 9;
599
+ if (0 < out->opts->sec_prec) {
600
+ if (9 > out->opts->sec_prec) {
601
+ int i;
602
+
603
+ for (i = 9 - out->opts->sec_prec; 0 < i; i--) {
604
+ dot++;
605
+ nsec = (nsec + 5) / 10;
606
+ one /= 10;
607
+ }
608
+ }
609
+ if (one <= nsec) {
610
+ nsec -= one;
611
+ sec++;
612
+ }
613
+ for (; dot < b; b--, nsec /= 10) {
614
+ *b = '0' + (nsec % 10);
615
+ }
616
+ *b-- = '.';
617
+ }
618
+ if (0 == sec) {
619
+ *b-- = '0';
620
+ } else {
621
+ for (; 0 < sec; b--, sec /= 10) {
622
+ *b = '0' + (sec % 10);
623
+ }
624
+ }
625
+ if (neg) {
626
+ *b-- = '-';
627
+ }
628
+ b++;
629
+ size = sizeof(buf) - (b - buf) - 1;
630
+ assure_size(out, size);
631
+ APPEND_CHARS(out->cur, b, size);
632
+ *out->cur = '\0';
633
+ }
634
+
635
+ void oj_dump_ruby_time(VALUE obj, Out out) {
636
+ volatile VALUE rstr = oj_safe_string_convert(obj);
637
+
638
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
639
+ }
640
+
641
+ void oj_dump_xml_time(VALUE obj, Out out) {
642
+ char buf[64];
643
+ struct _timeInfo ti;
644
+ long one = 1000000000;
645
+ int64_t sec;
646
+ long long nsec;
647
+ long tzsecs = NUM2LONG(rb_funcall2(obj, oj_utc_offset_id, 0, 0));
648
+ int tzhour, tzmin;
649
+ char tzsign = '+';
650
+
651
+ if (16 <= sizeof(struct timespec)) {
652
+ struct timespec ts = rb_time_timespec(obj);
653
+
654
+ sec = ts.tv_sec;
655
+ nsec = ts.tv_nsec;
656
+ } else {
657
+ sec = NUM2LL(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
658
+ nsec = NUM2LL(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
659
+ }
660
+
661
+ assure_size(out, 36);
662
+ if (9 > out->opts->sec_prec) {
663
+ int i;
664
+
665
+ // This is pretty lame but to be compatible with rails and active
666
+ // support rounding is not done but instead a floor is done when
667
+ // second precision is 3 just to be like rails. sigh.
668
+ if (3 == out->opts->sec_prec) {
669
+ nsec /= 1000000;
670
+ one = 1000;
671
+ } else {
672
+ for (i = 9 - out->opts->sec_prec; 0 < i; i--) {
673
+ nsec = (nsec + 5) / 10;
674
+ one /= 10;
675
+ }
676
+ if (one <= nsec) {
677
+ nsec -= one;
678
+ sec++;
679
+ }
680
+ }
681
+ }
682
+ // 2012-01-05T23:58:07.123456000+09:00
683
+ // tm = localtime(&sec);
684
+ sec += tzsecs;
685
+ sec_as_time((int64_t)sec, &ti);
686
+ if (0 > tzsecs) {
687
+ tzsign = '-';
688
+ tzhour = (int)(tzsecs / -3600);
689
+ tzmin = (int)(tzsecs / -60) - (tzhour * 60);
690
+ } else {
691
+ tzhour = (int)(tzsecs / 3600);
692
+ tzmin = (int)(tzsecs / 60) - (tzhour * 60);
693
+ }
694
+ if ((0 == nsec && !out->opts->sec_prec_set) || 0 == out->opts->sec_prec) {
695
+ if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
696
+ int len = sprintf(buf, "%04d-%02d-%02dT%02d:%02d:%02dZ", ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec);
697
+ oj_dump_cstr(buf, len, 0, 0, out);
698
+ } else {
699
+ int len = sprintf(buf,
700
+ "%04d-%02d-%02dT%02d:%02d:%02d%c%02d:%02d",
701
+ ti.year,
702
+ ti.mon,
703
+ ti.day,
704
+ ti.hour,
705
+ ti.min,
706
+ ti.sec,
707
+ tzsign,
708
+ tzhour,
709
+ tzmin);
710
+ oj_dump_cstr(buf, len, 0, 0, out);
711
+ }
712
+ } else if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
713
+ char format[64] = "%04d-%02d-%02dT%02d:%02d:%02d.%09ldZ";
714
+ int len;
715
+
716
+ if (9 > out->opts->sec_prec) {
717
+ format[32] = '0' + out->opts->sec_prec;
718
+ }
719
+ len = sprintf(buf, format, ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec, (long)nsec);
720
+ oj_dump_cstr(buf, len, 0, 0, out);
721
+ } else {
722
+ char format[64] = "%04d-%02d-%02dT%02d:%02d:%02d.%09ld%c%02d:%02d";
723
+ int len;
724
+
725
+ if (9 > out->opts->sec_prec) {
726
+ format[32] = '0' + out->opts->sec_prec;
727
+ }
728
+ len = sprintf(buf, format, ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec, (long)nsec, tzsign, tzhour, tzmin);
729
+ oj_dump_cstr(buf, len, 0, 0, out);
730
+ }
731
+ }
732
+
733
+ void oj_dump_obj_to_json(VALUE obj, Options copts, Out out) {
734
+ oj_dump_obj_to_json_using_params(obj, copts, out, 0, 0);
735
+ }
736
+
737
+ void oj_dump_obj_to_json_using_params(VALUE obj, Options copts, Out out, int argc, VALUE *argv) {
738
+ if (0 == out->buf) {
739
+ oj_out_init(out);
740
+ }
741
+ out->circ_cnt = 0;
742
+ out->opts = copts;
743
+ out->hash_cnt = 0;
744
+ out->indent = copts->indent;
745
+ out->argc = argc;
746
+ out->argv = argv;
747
+ out->ropts = NULL;
748
+ if (Yes == copts->circular) {
749
+ oj_cache8_new(&out->circ_cache);
750
+ }
751
+ switch (copts->mode) {
752
+ case StrictMode: oj_dump_strict_val(obj, 0, out); break;
753
+ case NullMode: oj_dump_null_val(obj, 0, out); break;
754
+ case ObjectMode: oj_dump_obj_val(obj, 0, out); break;
755
+ case CompatMode: oj_dump_compat_val(obj, 0, out, Yes == copts->to_json); break;
756
+ case RailsMode: oj_dump_rails_val(obj, 0, out); break;
757
+ case CustomMode: oj_dump_custom_val(obj, 0, out, true); break;
758
+ case WabMode: oj_dump_wab_val(obj, 0, out); break;
759
+ default: oj_dump_custom_val(obj, 0, out, true); break;
760
+ }
761
+ if (0 < out->indent) {
762
+ switch (*(out->cur - 1)) {
763
+ case ']':
764
+ case '}': assure_size(out, 1); *out->cur++ = '\n';
765
+ default: break;
766
+ }
767
+ }
768
+ *out->cur = '\0';
769
+ if (Yes == copts->circular) {
770
+ oj_cache8_delete(out->circ_cache);
771
+ }
772
+ }
773
+
774
+ void oj_write_obj_to_file(VALUE obj, const char *path, Options copts) {
775
+ struct _out out;
776
+ size_t size;
777
+ FILE *f;
778
+ int ok;
779
+
780
+ oj_out_init(&out);
781
+
782
+ out.omit_nil = copts->dump_opts.omit_nil;
783
+ oj_dump_obj_to_json(obj, copts, &out);
784
+ size = out.cur - out.buf;
785
+ if (0 == (f = fopen(path, "w"))) {
786
+ oj_out_free(&out);
787
+ rb_raise(rb_eIOError, "%s", strerror(errno));
788
+ }
789
+ ok = (size == fwrite(out.buf, 1, size, f));
790
+
791
+ oj_out_free(&out);
792
+
793
+ if (!ok) {
794
+ int err = ferror(f);
795
+ fclose(f);
796
+
797
+ rb_raise(rb_eIOError, "Write failed. [%d:%s]", err, strerror(err));
798
+ }
799
+ fclose(f);
800
+ }
801
+
802
+ #if !IS_WINDOWS
803
+ static void write_ready(int fd) {
804
+ struct pollfd pp;
805
+ int i;
806
+
807
+ pp.fd = fd;
808
+ pp.events = POLLERR | POLLOUT;
809
+ pp.revents = 0;
810
+ if (0 >= (i = poll(&pp, 1, 5000))) {
811
+ if (0 == i || EAGAIN == errno) {
812
+ rb_raise(rb_eIOError, "write timed out");
813
+ }
814
+ rb_raise(rb_eIOError, "write failed. %d %s.", errno, strerror(errno));
815
+ }
816
+ }
817
+ #endif
818
+
819
+ void oj_write_obj_to_stream(VALUE obj, VALUE stream, Options copts) {
820
+ struct _out out;
821
+ ssize_t size;
822
+ VALUE clas = rb_obj_class(stream);
823
+ #if !IS_WINDOWS
824
+ int fd;
825
+ VALUE s;
826
+ #endif
827
+
828
+ oj_out_init(&out);
829
+
830
+ out.omit_nil = copts->dump_opts.omit_nil;
831
+ oj_dump_obj_to_json(obj, copts, &out);
832
+ size = out.cur - out.buf;
833
+ if (oj_stringio_class == clas) {
834
+ rb_funcall(stream, oj_write_id, 1, rb_str_new(out.buf, size));
835
+ #if !IS_WINDOWS
836
+ } else if (rb_respond_to(stream, oj_fileno_id) && Qnil != (s = rb_funcall(stream, oj_fileno_id, 0)) &&
837
+ 0 != (fd = FIX2INT(s))) {
838
+ ssize_t cnt;
839
+ ssize_t total = 0;
840
+
841
+ while (true) {
842
+ if (0 > (cnt = write(fd, out.buf + total, size - total))) {
843
+ if (EAGAIN != errno) {
844
+ rb_raise(rb_eIOError, "write failed. %d %s.", errno, strerror(errno));
845
+ break;
846
+ }
847
+ }
848
+ total += cnt;
849
+ if (size <= total) {
850
+ // Completed
851
+ break;
852
+ }
853
+ write_ready(fd);
854
+ }
855
+ #endif
856
+ } else if (rb_respond_to(stream, oj_write_id)) {
857
+ rb_funcall(stream, oj_write_id, 1, rb_str_new(out.buf, size));
858
+ } else {
859
+ oj_out_free(&out);
860
+ rb_raise(rb_eArgError, "to_stream() expected an IO Object.");
861
+ }
862
+ oj_out_free(&out);
863
+ }
864
+
865
+ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) {
866
+ int idx = RB_ENCODING_GET(obj);
867
+
868
+ if (oj_utf8_encoding_index != idx) {
869
+ rb_encoding *enc = rb_enc_from_index(idx);
870
+ obj = rb_str_conv_enc(obj, enc, oj_utf8_encoding);
871
+ }
872
+ oj_dump_cstr(RSTRING_PTR(obj), RSTRING_LEN(obj), 0, 0, out);
873
+ }
874
+
875
+ void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
876
+ volatile VALUE s = rb_sym2str(obj);
877
+
878
+ oj_dump_cstr(RSTRING_PTR(s), RSTRING_LEN(s), 0, 0, out);
879
+ }
880
+
881
+ static void debug_raise(const char *orig, size_t cnt, int line) {
882
+ char buf[1024];
883
+ char *b = buf;
884
+ const char *s = orig;
885
+ const char *s_end = s + cnt;
886
+
887
+ if (32 < s_end - s) {
888
+ s_end = s + 32;
889
+ }
890
+ for (; s < s_end; s++) {
891
+ b += sprintf(b, " %02x", *s);
892
+ }
893
+ *b = '\0';
894
+ rb_raise(oj_json_generator_error_class, "Partial character in string. %s @ %d", buf, line);
895
+ }
896
+
897
+ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
898
+ if (oj_string_writer_class == rb_obj_class(obj)) {
899
+ StrWriter sw;
900
+ size_t len;
901
+
902
+ sw = oj_str_writer_unwrap(obj);
903
+ len = sw->out.cur - sw->out.buf;
904
+
905
+ if (0 < len) {
906
+ len--;
907
+ }
908
+ oj_dump_raw(sw->out.buf, len, out);
909
+ } else {
910
+ volatile VALUE jv;
911
+
912
+ TRACE(out->opts->trace, "raw_json", obj, depth + 1, TraceRubyIn);
913
+ jv = rb_funcall(obj, oj_raw_json_id, 2, RB_INT2NUM(depth), RB_INT2NUM(out->indent));
914
+ TRACE(out->opts->trace, "raw_json", obj, depth + 1, TraceRubyOut);
915
+ oj_dump_raw(RSTRING_PTR(jv), (size_t)RSTRING_LEN(jv), out);
916
+ }
917
+ }
918
+
919
+ #if defined(__clang__) || defined(__GNUC__)
920
+ #define FORCE_INLINE __attribute__((always_inline))
921
+ #else
922
+ #define FORCE_INLINE
923
+ #endif
924
+
925
+ #ifdef HAVE_SIMD_NEON
926
+ typedef struct _neon_match_result {
927
+ uint8x16_t needs_escape;
928
+ bool has_some_hibit;
929
+ bool do_unicode_validation;
930
+ } neon_match_result;
931
+
932
+ static inline FORCE_INLINE neon_match_result
933
+ neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
934
+ neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
935
+
936
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)str);
937
+ uint8x16_t tmp1 = vqtbl4q_u8(cmap_neon[0], chunk);
938
+ uint8x16_t tmp2 = vqtbl4q_u8(cmap_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
939
+ result.needs_escape = vorrq_u8(tmp1, tmp2);
940
+ if (neon_table_size > 2) {
941
+ uint8x16_t tmp3 = vqtbl4q_u8(cmap_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
942
+ uint8x16_t tmp4 = vqtbl4q_u8(cmap_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
943
+ result.needs_escape = vorrq_u8(result.needs_escape, vorrq_u8(tmp4, tmp3));
944
+ }
945
+ if (has_hi && do_unicode_validation) {
946
+ uint8x16_t has_some_hibit = vandq_u8(chunk, vdupq_n_u8(0x80));
947
+ result.has_some_hibit = vmaxvq_u8(has_some_hibit) != 0;
948
+ result.do_unicode_validation = has_hi && do_unicode_validation && result.has_some_hibit;
949
+ }
950
+ return result;
951
+ }
952
+
953
+ #endif /* HAVE_SIMD_NEON */
954
+
955
+ static inline FORCE_INLINE const char *process_character(char action,
956
+ const char *str,
957
+ const char *end,
958
+ Out out,
959
+ const char *orig,
960
+ bool do_unicode_validation,
961
+ const char **check_start_) {
962
+ const char *check_start = *check_start_;
963
+ switch (action) {
964
+ case '1':
965
+ if (do_unicode_validation && check_start <= str) {
966
+ if (0 != (0x80 & (uint8_t)*str)) {
967
+ if (0xC0 == (0xC0 & (uint8_t)*str)) {
968
+ *check_start_ = check_unicode(str, end, orig);
969
+ } else {
970
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
971
+ }
972
+ }
973
+ }
974
+ *out->cur++ = *str;
975
+ break;
976
+ case '2':
977
+ *out->cur++ = '\\';
978
+ switch (*str) {
979
+ case '\\': *out->cur++ = '\\'; break;
980
+ case '\b': *out->cur++ = 'b'; break;
981
+ case '\t': *out->cur++ = 't'; break;
982
+ case '\n': *out->cur++ = 'n'; break;
983
+ case '\f': *out->cur++ = 'f'; break;
984
+ case '\r': *out->cur++ = 'r'; break;
985
+ default: *out->cur++ = *str; break;
986
+ }
987
+ break;
988
+ case '3': // Unicode
989
+ if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
990
+ if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
991
+ str = dump_unicode(str, end, out, orig);
992
+ } else {
993
+ *check_start_ = check_unicode(str, end, orig);
994
+ *out->cur++ = *str;
995
+ }
996
+ break;
997
+ }
998
+ str = dump_unicode(str, end, out, orig);
999
+ break;
1000
+ case '6': // control characters
1001
+ if (*(uint8_t *)str < 0x80) {
1002
+ if (0 == (uint8_t)*str && out->opts->dump_opts.omit_null_byte) {
1003
+ break;
1004
+ }
1005
+ APPEND_CHARS(out->cur, "\\u00", 4);
1006
+ dump_hex((uint8_t)*str, out);
1007
+ } else {
1008
+ if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
1009
+ if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
1010
+ str = dump_unicode(str, end, out, orig);
1011
+ } else {
1012
+ *check_start_ = check_unicode(str, end, orig);
1013
+ *out->cur++ = *str;
1014
+ }
1015
+ break;
1016
+ }
1017
+ str = dump_unicode(str, end, out, orig);
1018
+ }
1019
+ break;
1020
+ default: break; // ignore, should never happen if the table is correct
1021
+ }
1022
+
1023
+ return str;
1024
+ }
1025
+
1026
+ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
1027
+ size_t size;
1028
+ char *cmap;
1029
+ #ifdef HAVE_SIMD_NEON
1030
+ uint8x16x4_t *cmap_neon = NULL;
1031
+ int neon_table_size = 0;
1032
+ #endif /* HAVE_SIMD_NEON */
1033
+ const char *orig = str;
1034
+ bool has_hi = false;
1035
+ bool do_unicode_validation = false;
1036
+
1037
+ switch (out->opts->escape_mode) {
1038
+ case NLEsc:
1039
+ cmap = newline_friendly_chars;
1040
+ size = newline_friendly_size((uint8_t *)str, cnt);
1041
+ break;
1042
+ case ASCIIEsc:
1043
+ cmap = ascii_friendly_chars;
1044
+ size = ascii_friendly_size((uint8_t *)str, cnt);
1045
+ break;
1046
+ case SlashEsc:
1047
+ has_hi = true;
1048
+ cmap = slash_friendly_chars;
1049
+ size = slash_friendly_size((uint8_t *)str, cnt);
1050
+ break;
1051
+ case XSSEsc:
1052
+ cmap = xss_friendly_chars;
1053
+ size = xss_friendly_size((uint8_t *)str, cnt);
1054
+ break;
1055
+ case JXEsc:
1056
+ cmap = hixss_friendly_chars;
1057
+ size = hixss_friendly_size((uint8_t *)str, cnt);
1058
+ do_unicode_validation = true;
1059
+ break;
1060
+ case RailsXEsc: {
1061
+ long sz;
1062
+
1063
+ cmap = rails_xss_friendly_chars;
1064
+ #ifdef HAVE_SIMD_NEON
1065
+ cmap_neon = rails_xss_friendly_chars_neon;
1066
+ neon_table_size = 4;
1067
+ #endif /* HAVE_NEON_SIMD */
1068
+ sz = rails_xss_friendly_size((uint8_t *)str, cnt);
1069
+ if (sz < 0) {
1070
+ has_hi = true;
1071
+ size = (size_t)-sz;
1072
+ } else {
1073
+ size = (size_t)sz;
1074
+ }
1075
+ do_unicode_validation = true;
1076
+ break;
1077
+ }
1078
+ case RailsEsc: {
1079
+ long sz;
1080
+ cmap = rails_friendly_chars;
1081
+ #ifdef HAVE_SIMD_NEON
1082
+ cmap_neon = rails_friendly_chars_neon;
1083
+ neon_table_size = 2;
1084
+ #endif /* HAVE_NEON_SIMD */
1085
+ sz = rails_friendly_size((uint8_t *)str, cnt);
1086
+ if (sz < 0) {
1087
+ has_hi = true;
1088
+ size = (size_t)-sz;
1089
+ } else {
1090
+ size = (size_t)sz;
1091
+ }
1092
+ do_unicode_validation = true;
1093
+ break;
1094
+ }
1095
+ case JSONEsc:
1096
+ default: cmap = hibit_friendly_chars;
1097
+ #ifdef HAVE_SIMD_NEON
1098
+ cmap_neon = hibit_friendly_chars_neon;
1099
+ neon_table_size = 2;
1100
+ #endif /* HAVE_NEON_SIMD */
1101
+ size = hibit_friendly_size((uint8_t *)str, cnt);
1102
+ }
1103
+ assure_size(out, size + BUFFER_EXTRA);
1104
+ *out->cur++ = '"';
1105
+
1106
+ if (escape1) {
1107
+ APPEND_CHARS(out->cur, "\\u00", 4);
1108
+ dump_hex((uint8_t)*str, out);
1109
+ cnt--;
1110
+ size--;
1111
+ str++;
1112
+ is_sym = 0; // just to make sure
1113
+ }
1114
+ if (cnt == size && !has_hi) {
1115
+ if (is_sym) {
1116
+ *out->cur++ = ':';
1117
+ }
1118
+ APPEND_CHARS(out->cur, str, cnt);
1119
+ *out->cur++ = '"';
1120
+ } else {
1121
+ const char *end = str + cnt;
1122
+ const char *check_start = str;
1123
+
1124
+ if (is_sym) {
1125
+ *out->cur++ = ':';
1126
+ }
1127
+ #ifdef HAVE_SIMD_NEON
1128
+ const char *chunk_start;
1129
+ const char *chunk_end;
1130
+ const char *cursor = str;
1131
+ bool use_neon = (cmap_neon != NULL && cnt >= (sizeof(uint8x16_t))) ? true : false;
1132
+ char matches[16];
1133
+ #define SEARCH_FLUSH \
1134
+ if (str > cursor) { \
1135
+ APPEND_CHARS(out->cur, cursor, str - cursor); \
1136
+ cursor = str; \
1137
+ }
1138
+
1139
+ #endif /* HAVE_SIMD_NEON */
1140
+ #ifdef HAVE_SIMD_NEON
1141
+ if (use_neon) {
1142
+ while (str < end) {
1143
+ const char *chunk_ptr = NULL;
1144
+ if (str + sizeof(uint8x16_t) <= end) {
1145
+ chunk_ptr = str;
1146
+ chunk_start = str;
1147
+ chunk_end = str + sizeof(uint8x16_t);
1148
+ } else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
1149
+ memset(out->cur, 'A', sizeof(uint8x16_t));
1150
+ memcpy(out->cur, str, (end - str));
1151
+ chunk_ptr = out->cur;
1152
+ chunk_start = str;
1153
+ chunk_end = end;
1154
+ } else {
1155
+ break;
1156
+ }
1157
+ neon_match_result result = neon_update(chunk_ptr,
1158
+ cmap_neon,
1159
+ neon_table_size,
1160
+ do_unicode_validation,
1161
+ has_hi);
1162
+ if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
1163
+ SEARCH_FLUSH;
1164
+ uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
1165
+ uint8_t num_matches = vaddvq_u8(vandq_u8(result.needs_escape, vdupq_n_u8(0x1)));
1166
+ vst1q_u8((unsigned char *)matches, actions);
1167
+ bool process_each = result.do_unicode_validation || (num_matches > sizeof(uint8x16_t) / 2);
1168
+ // If no byte in this chunk had the high bit set then we can skip
1169
+ // all of the '1' bytes by directly copying them to the output.
1170
+ if (!process_each) {
1171
+ while (str < chunk_end) {
1172
+ long i = str - chunk_start;
1173
+ char action;
1174
+ while (str < chunk_end && (action = matches[i++]) == '1') {
1175
+ *out->cur++ = *str++;
1176
+ }
1177
+ cursor = str;
1178
+ if (str >= chunk_end) {
1179
+ break;
1180
+ }
1181
+ str = process_character(action, str, end, out, orig, do_unicode_validation, &check_start);
1182
+ str++;
1183
+ }
1184
+ } else {
1185
+ while (str < chunk_end) {
1186
+ long match_index = str - chunk_start;
1187
+ str = process_character(matches[match_index],
1188
+ str,
1189
+ end,
1190
+ out,
1191
+ orig,
1192
+ do_unicode_validation,
1193
+ &check_start);
1194
+ str++;
1195
+ }
1196
+ }
1197
+ cursor = str;
1198
+ continue;
1199
+ }
1200
+ str = chunk_end;
1201
+ }
1202
+ SEARCH_FLUSH;
1203
+ }
1204
+ #endif /* HAVE_SIMD_NEON */
1205
+ for (; str < end; str++) {
1206
+ str = process_character(cmap[(uint8_t)*str], str, end, out, orig, do_unicode_validation, &check_start);
1207
+ }
1208
+ *out->cur++ = '"';
1209
+ }
1210
+ if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
1211
+ uint8_t c = (uint8_t)*(str - 1);
1212
+ int i;
1213
+ int scnt = (int)(str - orig);
1214
+
1215
+ // Last utf-8 characters must be 0x10xxxxxx. The start must be
1216
+ // 0x110xxxxx for 2 characters, 0x1110xxxx for 3, and 0x11110xxx for
1217
+ // 4.
1218
+ if (0 != (0x40 & c)) {
1219
+ debug_raise(orig, cnt, __LINE__);
1220
+ }
1221
+ for (i = 1; i < (int)scnt && i < 4; i++) {
1222
+ c = str[-1 - i];
1223
+ if (0x80 != (0xC0 & c)) {
1224
+ switch (i) {
1225
+ case 1:
1226
+ if (0xC0 != (0xE0 & c)) {
1227
+ debug_raise(orig, cnt, __LINE__);
1228
+ }
1229
+ break;
1230
+ case 2:
1231
+ if (0xE0 != (0xF0 & c)) {
1232
+ debug_raise(orig, cnt, __LINE__);
1233
+ }
1234
+ break;
1235
+ case 3:
1236
+ if (0xF0 != (0xF8 & c)) {
1237
+ debug_raise(orig, cnt, __LINE__);
1238
+ }
1239
+ break;
1240
+ default: // can't get here
1241
+ break;
1242
+ }
1243
+ break;
1244
+ }
1245
+ }
1246
+ if (i == (int)scnt || 4 <= i) {
1247
+ debug_raise(orig, cnt, __LINE__);
1248
+ }
1249
+ }
1250
+ *out->cur = '\0';
1251
+ }
1252
+
1253
+ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) {
1254
+ const char *s = rb_class2name(obj);
1255
+
1256
+ oj_dump_cstr(s, strlen(s), 0, 0, out);
1257
+ }
1258
+
1259
+ void oj_dump_obj_to_s(VALUE obj, Out out) {
1260
+ volatile VALUE rstr = oj_safe_string_convert(obj);
1261
+
1262
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
1263
+ }
1264
+
1265
+ void oj_dump_raw(const char *str, size_t cnt, Out out) {
1266
+ assure_size(out, cnt + 10);
1267
+ APPEND_CHARS(out->cur, str, cnt);
1268
+ *out->cur = '\0';
1269
+ }
1270
+
1271
+ void oj_out_init(Out out) {
1272
+ out->buf = out->stack_buffer;
1273
+ out->cur = out->buf;
1274
+ out->end = out->buf + sizeof(out->stack_buffer) - BUFFER_EXTRA;
1275
+ out->allocated = false;
1276
+ }
1277
+
1278
+ void oj_out_free(Out out) {
1279
+ if (out->allocated) {
1280
+ OJ_R_FREE(out->buf); // TBD
1281
+ }
1282
+ }
1283
+
1284
+ void oj_grow_out(Out out, size_t len) {
1285
+ size_t size = out->end - out->buf;
1286
+ long pos = out->cur - out->buf;
1287
+ char *buf = out->buf;
1288
+
1289
+ size *= 2;
1290
+ if (size <= len * 2 + pos) {
1291
+ size += len;
1292
+ }
1293
+ if (out->allocated) {
1294
+ OJ_R_REALLOC_N(buf, char, (size + BUFFER_EXTRA));
1295
+ } else {
1296
+ buf = OJ_R_ALLOC_N(char, (size + BUFFER_EXTRA));
1297
+ out->allocated = true;
1298
+ memcpy(buf, out->buf, out->end - out->buf + BUFFER_EXTRA);
1299
+ }
1300
+ if (0 == buf) {
1301
+ rb_raise(rb_eNoMemError, "Failed to create string. [%d:%s]", ENOSPC, strerror(ENOSPC));
1302
+ }
1303
+ out->buf = buf;
1304
+ out->end = buf + size;
1305
+ out->cur = out->buf + pos;
1306
+ }
1307
+
1308
+ void oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok) {
1309
+ assure_size(out, 4);
1310
+ APPEND_CHARS(out->cur, "null", 4);
1311
+ *out->cur = '\0';
1312
+ }
1313
+
1314
+ void oj_dump_true(VALUE obj, int depth, Out out, bool as_ok) {
1315
+ assure_size(out, 4);
1316
+ APPEND_CHARS(out->cur, "true", 4);
1317
+ *out->cur = '\0';
1318
+ }
1319
+
1320
+ void oj_dump_false(VALUE obj, int depth, Out out, bool as_ok) {
1321
+ assure_size(out, 5);
1322
+ APPEND_CHARS(out->cur, "false", 5);
1323
+ *out->cur = '\0';
1324
+ }
1325
+
1326
+ static const char digits_table[] = "\
1327
+ 00010203040506070809\
1328
+ 10111213141516171819\
1329
+ 20212223242526272829\
1330
+ 30313233343536373839\
1331
+ 40414243444546474849\
1332
+ 50515253545556575859\
1333
+ 60616263646566676869\
1334
+ 70717273747576777879\
1335
+ 80818283848586878889\
1336
+ 90919293949596979899";
1337
+
1338
+ char *oj_longlong_to_string(long long num, bool negative, char *buf) {
1339
+ while (100 <= num) {
1340
+ unsigned idx = num % 100 * 2;
1341
+ *buf-- = digits_table[idx + 1];
1342
+ *buf-- = digits_table[idx];
1343
+ num /= 100;
1344
+ }
1345
+ if (num < 10) {
1346
+ *buf-- = num + '0';
1347
+ } else {
1348
+ *buf-- = digits_table[num * 2 + 1];
1349
+ *buf-- = digits_table[num * 2];
1350
+ }
1351
+
1352
+ if (negative) {
1353
+ *buf = '-';
1354
+ } else {
1355
+ buf++;
1356
+ }
1357
+ return buf;
1358
+ }
1359
+
1360
+ void oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) {
1361
+ char buf[32];
1362
+ char *b = buf + sizeof(buf) - 1;
1363
+ long long num = NUM2LL(obj);
1364
+ bool neg = false;
1365
+ size_t cnt = 0;
1366
+ bool dump_as_string = false;
1367
+
1368
+ if (out->opts->int_range_max != 0 && out->opts->int_range_min != 0 &&
1369
+ (out->opts->int_range_max < num || out->opts->int_range_min > num)) {
1370
+ dump_as_string = true;
1371
+ }
1372
+ if (0 > num) {
1373
+ neg = true;
1374
+ num = -num;
1375
+ }
1376
+ *b-- = '\0';
1377
+
1378
+ if (dump_as_string) {
1379
+ *b-- = '"';
1380
+ }
1381
+ if (0 < num) {
1382
+ b = oj_longlong_to_string(num, neg, b);
1383
+ } else {
1384
+ *b = '0';
1385
+ }
1386
+ if (dump_as_string) {
1387
+ *--b = '"';
1388
+ }
1389
+ cnt = sizeof(buf) - (b - buf) - 1;
1390
+ assure_size(out, cnt);
1391
+ APPEND_CHARS(out->cur, b, cnt);
1392
+ *out->cur = '\0';
1393
+ }
1394
+
1395
+ void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
1396
+ volatile VALUE rs = rb_big2str(obj, 10);
1397
+ size_t cnt = RSTRING_LEN(rs);
1398
+ bool dump_as_string = false;
1399
+
1400
+ if (out->opts->int_range_max != 0 || out->opts->int_range_min != 0) { // Bignum cannot be inside of Fixnum range
1401
+ dump_as_string = true;
1402
+ assure_size(out, cnt + 2);
1403
+ *out->cur++ = '"';
1404
+ } else {
1405
+ assure_size(out, cnt);
1406
+ }
1407
+ APPEND_CHARS(out->cur, RSTRING_PTR(rs), cnt);
1408
+ if (dump_as_string) {
1409
+ *out->cur++ = '"';
1410
+ }
1411
+ *out->cur = '\0';
1412
+ }
1413
+
1414
+ // Removed dependencies on math due to problems with CentOS 5.4.
1415
+ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1416
+ char buf[64];
1417
+ char *b;
1418
+ double d = rb_num2dbl(obj);
1419
+ size_t cnt = 0;
1420
+
1421
+ if (0.0 == d) {
1422
+ b = buf;
1423
+ *b++ = '0';
1424
+ *b++ = '.';
1425
+ *b++ = '0';
1426
+ *b++ = '\0';
1427
+ cnt = 3;
1428
+ } else if (OJ_INFINITY == d) {
1429
+ if (ObjectMode == out->opts->mode) {
1430
+ strcpy(buf, inf_val);
1431
+ cnt = sizeof(inf_val) - 1;
1432
+ } else {
1433
+ NanDump nd = out->opts->dump_opts.nan_dump;
1434
+
1435
+ if (AutoNan == nd) {
1436
+ switch (out->opts->mode) {
1437
+ case CompatMode: nd = WordNan; break;
1438
+ case StrictMode: nd = RaiseNan; break;
1439
+ case NullMode: nd = NullNan; break;
1440
+ case CustomMode: nd = NullNan; break;
1441
+ default: break;
1442
+ }
1443
+ }
1444
+ switch (nd) {
1445
+ case RaiseNan: raise_strict(obj); break;
1446
+ case WordNan:
1447
+ strcpy(buf, "Infinity");
1448
+ cnt = 8;
1449
+ break;
1450
+ case NullNan:
1451
+ strcpy(buf, "null");
1452
+ cnt = 4;
1453
+ break;
1454
+ case HugeNan:
1455
+ default:
1456
+ strcpy(buf, inf_val);
1457
+ cnt = sizeof(inf_val) - 1;
1458
+ break;
1459
+ }
1460
+ }
1461
+ } else if (-OJ_INFINITY == d) {
1462
+ if (ObjectMode == out->opts->mode) {
1463
+ strcpy(buf, ninf_val);
1464
+ cnt = sizeof(ninf_val) - 1;
1465
+ } else {
1466
+ NanDump nd = out->opts->dump_opts.nan_dump;
1467
+
1468
+ if (AutoNan == nd) {
1469
+ switch (out->opts->mode) {
1470
+ case CompatMode: nd = WordNan; break;
1471
+ case StrictMode: nd = RaiseNan; break;
1472
+ case NullMode: nd = NullNan; break;
1473
+ default: break;
1474
+ }
1475
+ }
1476
+ switch (nd) {
1477
+ case RaiseNan: raise_strict(obj); break;
1478
+ case WordNan:
1479
+ strcpy(buf, "-Infinity");
1480
+ cnt = 9;
1481
+ break;
1482
+ case NullNan:
1483
+ strcpy(buf, "null");
1484
+ cnt = 4;
1485
+ break;
1486
+ case HugeNan:
1487
+ default:
1488
+ strcpy(buf, ninf_val);
1489
+ cnt = sizeof(ninf_val) - 1;
1490
+ break;
1491
+ }
1492
+ }
1493
+ } else if (isnan(d)) {
1494
+ if (ObjectMode == out->opts->mode) {
1495
+ strcpy(buf, nan_val);
1496
+ cnt = sizeof(nan_val) - 1;
1497
+ } else {
1498
+ NanDump nd = out->opts->dump_opts.nan_dump;
1499
+
1500
+ if (AutoNan == nd) {
1501
+ switch (out->opts->mode) {
1502
+ case ObjectMode: nd = HugeNan; break;
1503
+ case StrictMode: nd = RaiseNan; break;
1504
+ case NullMode: nd = NullNan; break;
1505
+ default: break;
1506
+ }
1507
+ }
1508
+ switch (nd) {
1509
+ case RaiseNan: raise_strict(obj); break;
1510
+ case WordNan:
1511
+ strcpy(buf, "NaN");
1512
+ cnt = 3;
1513
+ break;
1514
+ case NullNan:
1515
+ strcpy(buf, "null");
1516
+ cnt = 4;
1517
+ break;
1518
+ case HugeNan:
1519
+ default:
1520
+ strcpy(buf, nan_val);
1521
+ cnt = sizeof(nan_val) - 1;
1522
+ break;
1523
+ }
1524
+ }
1525
+ } else if (d == (double)(long long int)d) {
1526
+ cnt = snprintf(buf, sizeof(buf), "%.1f", d);
1527
+ } else if (0 == out->opts->float_prec) {
1528
+ volatile VALUE rstr = oj_safe_string_convert(obj);
1529
+
1530
+ cnt = RSTRING_LEN(rstr);
1531
+ if ((int)sizeof(buf) <= cnt) {
1532
+ cnt = sizeof(buf) - 1;
1533
+ }
1534
+ memcpy(buf, RSTRING_PTR(rstr), cnt);
1535
+ buf[cnt] = '\0';
1536
+ } else {
1537
+ cnt = oj_dump_float_printf(buf, sizeof(buf), obj, d, out->opts->float_fmt);
1538
+ }
1539
+ assure_size(out, cnt);
1540
+ APPEND_CHARS(out->cur, buf, cnt);
1541
+ *out->cur = '\0';
1542
+ }
1543
+
1544
+ size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1545
+ size_t cnt = snprintf(buf, blen, format, d);
1546
+
1547
+ // Round off issues at 16 significant digits so check for obvious ones of
1548
+ // 0001 and 9999.
1549
+ if (17 <= cnt && (0 == strcmp("0001", buf + cnt - 4) || 0 == strcmp("9999", buf + cnt - 4))) {
1550
+ volatile VALUE rstr = oj_safe_string_convert(obj);
1551
+
1552
+ strcpy(buf, RSTRING_PTR(rstr));
1553
+ cnt = RSTRING_LEN(rstr);
1554
+ }
1555
+ return cnt;
1556
+ }