oj 3.9.1 → 3.16.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1452 -0
  3. data/README.md +21 -6
  4. data/RELEASE_NOTES.md +61 -0
  5. data/ext/oj/buf.h +50 -68
  6. data/ext/oj/cache.c +329 -0
  7. data/ext/oj/cache.h +22 -0
  8. data/ext/oj/cache8.c +60 -62
  9. data/ext/oj/cache8.h +9 -36
  10. data/ext/oj/circarray.c +38 -42
  11. data/ext/oj/circarray.h +12 -13
  12. data/ext/oj/code.c +158 -179
  13. data/ext/oj/code.h +20 -22
  14. data/ext/oj/compat.c +145 -205
  15. data/ext/oj/custom.c +740 -880
  16. data/ext/oj/debug.c +126 -0
  17. data/ext/oj/dump.c +1145 -844
  18. data/ext/oj/dump.h +71 -57
  19. data/ext/oj/dump_compat.c +575 -655
  20. data/ext/oj/dump_leaf.c +96 -186
  21. data/ext/oj/dump_object.c +533 -660
  22. data/ext/oj/dump_strict.c +306 -340
  23. data/ext/oj/encode.h +4 -33
  24. data/ext/oj/encoder.c +43 -0
  25. data/ext/oj/err.c +28 -28
  26. data/ext/oj/err.h +39 -42
  27. data/ext/oj/extconf.rb +28 -7
  28. data/ext/oj/fast.c +1052 -1113
  29. data/ext/oj/intern.c +313 -0
  30. data/ext/oj/intern.h +22 -0
  31. data/ext/oj/mem.c +318 -0
  32. data/ext/oj/mem.h +53 -0
  33. data/ext/oj/mimic_json.c +471 -430
  34. data/ext/oj/object.c +532 -580
  35. data/ext/oj/odd.c +156 -142
  36. data/ext/oj/odd.h +25 -26
  37. data/ext/oj/oj.c +1346 -961
  38. data/ext/oj/oj.h +307 -290
  39. data/ext/oj/parse.c +954 -858
  40. data/ext/oj/parse.h +74 -72
  41. data/ext/oj/parser.c +1600 -0
  42. data/ext/oj/parser.h +103 -0
  43. data/ext/oj/rails.c +819 -836
  44. data/ext/oj/rails.h +8 -11
  45. data/ext/oj/reader.c +136 -147
  46. data/ext/oj/reader.h +69 -83
  47. data/ext/oj/resolve.c +41 -63
  48. data/ext/oj/resolve.h +4 -6
  49. data/ext/oj/rxclass.c +69 -72
  50. data/ext/oj/rxclass.h +12 -13
  51. data/ext/oj/saj.c +440 -485
  52. data/ext/oj/saj2.c +584 -0
  53. data/ext/oj/saj2.h +23 -0
  54. data/ext/oj/scp.c +79 -118
  55. data/ext/oj/simd.h +10 -0
  56. data/ext/oj/sparse.c +739 -709
  57. data/ext/oj/stream_writer.c +141 -175
  58. data/ext/oj/strict.c +103 -128
  59. data/ext/oj/string_writer.c +244 -261
  60. data/ext/oj/trace.c +34 -41
  61. data/ext/oj/trace.h +42 -15
  62. data/ext/oj/usual.c +1218 -0
  63. data/ext/oj/usual.h +69 -0
  64. data/ext/oj/util.c +107 -107
  65. data/ext/oj/util.h +4 -3
  66. data/ext/oj/val_stack.c +61 -78
  67. data/ext/oj/val_stack.h +80 -114
  68. data/ext/oj/validate.c +46 -0
  69. data/ext/oj/wab.c +316 -361
  70. data/lib/oj/active_support_helper.rb +1 -3
  71. data/lib/oj/bag.rb +8 -1
  72. data/lib/oj/easy_hash.rb +9 -9
  73. data/lib/oj/error.rb +1 -2
  74. data/lib/oj/json.rb +162 -150
  75. data/lib/oj/mimic.rb +54 -20
  76. data/lib/oj/saj.rb +20 -6
  77. data/lib/oj/schandler.rb +5 -4
  78. data/lib/oj/state.rb +12 -8
  79. data/lib/oj/version.rb +1 -2
  80. data/lib/oj.rb +2 -8
  81. data/pages/Compatibility.md +1 -1
  82. data/pages/Encoding.md +1 -1
  83. data/pages/InstallOptions.md +20 -0
  84. data/pages/JsonGem.md +15 -0
  85. data/pages/Modes.md +9 -3
  86. data/pages/Options.md +62 -12
  87. data/pages/Parser.md +309 -0
  88. data/pages/Rails.md +73 -22
  89. metadata +68 -192
  90. data/ext/oj/hash.c +0 -163
  91. data/ext/oj/hash.h +0 -46
  92. data/ext/oj/hash_test.c +0 -512
  93. data/test/_test_active.rb +0 -76
  94. data/test/_test_active_mimic.rb +0 -96
  95. data/test/_test_mimic_rails.rb +0 -126
  96. data/test/activerecord/result_test.rb +0 -27
  97. data/test/activesupport4/decoding_test.rb +0 -108
  98. data/test/activesupport4/encoding_test.rb +0 -531
  99. data/test/activesupport4/test_helper.rb +0 -41
  100. data/test/activesupport5/decoding_test.rb +0 -125
  101. data/test/activesupport5/encoding_test.rb +0 -485
  102. data/test/activesupport5/encoding_test_cases.rb +0 -90
  103. data/test/activesupport5/test_helper.rb +0 -50
  104. data/test/activesupport5/time_zone_test_helpers.rb +0 -24
  105. data/test/bar.rb +0 -25
  106. data/test/files.rb +0 -29
  107. data/test/foo.rb +0 -21
  108. data/test/helper.rb +0 -26
  109. data/test/isolated/shared.rb +0 -308
  110. data/test/isolated/test_mimic_after.rb +0 -13
  111. data/test/isolated/test_mimic_alone.rb +0 -12
  112. data/test/isolated/test_mimic_as_json.rb +0 -45
  113. data/test/isolated/test_mimic_before.rb +0 -13
  114. data/test/isolated/test_mimic_define.rb +0 -28
  115. data/test/isolated/test_mimic_rails_after.rb +0 -22
  116. data/test/isolated/test_mimic_rails_before.rb +0 -21
  117. data/test/isolated/test_mimic_redefine.rb +0 -15
  118. data/test/json_gem/json_addition_test.rb +0 -216
  119. data/test/json_gem/json_common_interface_test.rb +0 -148
  120. data/test/json_gem/json_encoding_test.rb +0 -107
  121. data/test/json_gem/json_ext_parser_test.rb +0 -20
  122. data/test/json_gem/json_fixtures_test.rb +0 -35
  123. data/test/json_gem/json_generator_test.rb +0 -383
  124. data/test/json_gem/json_generic_object_test.rb +0 -90
  125. data/test/json_gem/json_parser_test.rb +0 -470
  126. data/test/json_gem/json_string_matching_test.rb +0 -42
  127. data/test/json_gem/test_helper.rb +0 -18
  128. data/test/perf.rb +0 -107
  129. data/test/perf_compat.rb +0 -130
  130. data/test/perf_fast.rb +0 -164
  131. data/test/perf_file.rb +0 -64
  132. data/test/perf_object.rb +0 -138
  133. data/test/perf_saj.rb +0 -109
  134. data/test/perf_scp.rb +0 -151
  135. data/test/perf_simple.rb +0 -287
  136. data/test/perf_strict.rb +0 -145
  137. data/test/perf_wab.rb +0 -131
  138. data/test/sample/change.rb +0 -14
  139. data/test/sample/dir.rb +0 -19
  140. data/test/sample/doc.rb +0 -36
  141. data/test/sample/file.rb +0 -48
  142. data/test/sample/group.rb +0 -16
  143. data/test/sample/hasprops.rb +0 -16
  144. data/test/sample/layer.rb +0 -12
  145. data/test/sample/line.rb +0 -20
  146. data/test/sample/oval.rb +0 -10
  147. data/test/sample/rect.rb +0 -10
  148. data/test/sample/shape.rb +0 -35
  149. data/test/sample/text.rb +0 -20
  150. data/test/sample.rb +0 -54
  151. data/test/sample_json.rb +0 -37
  152. data/test/test_compat.rb +0 -509
  153. data/test/test_custom.rb +0 -503
  154. data/test/test_debian.rb +0 -53
  155. data/test/test_fast.rb +0 -470
  156. data/test/test_file.rb +0 -239
  157. data/test/test_gc.rb +0 -49
  158. data/test/test_hash.rb +0 -29
  159. data/test/test_integer_range.rb +0 -73
  160. data/test/test_null.rb +0 -376
  161. data/test/test_object.rb +0 -1018
  162. data/test/test_saj.rb +0 -186
  163. data/test/test_scp.rb +0 -433
  164. data/test/test_strict.rb +0 -410
  165. data/test/test_various.rb +0 -741
  166. data/test/test_wab.rb +0 -307
  167. data/test/test_writer.rb +0 -380
  168. data/test/tests.rb +0 -24
  169. data/test/tests_mimic.rb +0 -14
  170. data/test/tests_mimic_addition.rb +0 -7
  171. data/test/zoo.rb +0 -13
data/ext/oj/dump.c CHANGED
@@ -1,7 +1,7 @@
1
- /* dump.c
2
- * Copyright (c) 2012, 2017, Peter Ohler
3
- * All rights reserved.
4
- */
1
+ // Copyright (c) 2012, 2017 Peter Ohler. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
+
4
+ #include "dump.h"
5
5
 
6
6
  #include <errno.h>
7
7
  #include <math.h>
@@ -10,33 +10,37 @@
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
  #include <unistd.h>
13
+ #if !IS_WINDOWS
14
+ #include <poll.h>
15
+ #endif
13
16
 
14
- #include "oj.h"
15
17
  #include "cache8.h"
16
- #include "dump.h"
18
+ #include "mem.h"
17
19
  #include "odd.h"
20
+ #include "oj.h"
18
21
  #include "trace.h"
19
22
  #include "util.h"
20
23
 
21
24
  // Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
22
- #define OJ_INFINITY (1.0/0.0)
25
+ #define OJ_INFINITY (1.0 / 0.0)
23
26
 
24
27
  #define MAX_DEPTH 1000
25
28
 
26
- static const char inf_val[] = INF_VAL;
27
- static const char ninf_val[] = NINF_VAL;
28
- static const char nan_val[] = NAN_VAL;
29
+ static const char inf_val[] = INF_VAL;
30
+ static const char ninf_val[] = NINF_VAL;
31
+ static const char nan_val[] = NAN_VAL;
29
32
 
30
- typedef unsigned long ulong;
33
+ typedef unsigned long ulong;
31
34
 
32
- static size_t hibit_friendly_size(const uint8_t *str, size_t len);
33
- static size_t xss_friendly_size(const uint8_t *str, size_t len);
34
- static size_t ascii_friendly_size(const uint8_t *str, size_t len);
35
+ static size_t hibit_friendly_size(const uint8_t *str, size_t len);
36
+ static size_t slash_friendly_size(const uint8_t *str, size_t len);
37
+ static size_t xss_friendly_size(const uint8_t *str, size_t len);
38
+ static size_t ascii_friendly_size(const uint8_t *str, size_t len);
35
39
 
36
- static const char hex_chars[17] = "0123456789abcdef";
40
+ static const char hex_chars[17] = "0123456789abcdef";
37
41
 
38
42
  // JSON standard except newlines are no escaped
39
- static char newline_friendly_chars[256] = "\
43
+ static char newline_friendly_chars[256] = "\
40
44
  66666666221622666666666666666666\
41
45
  11211111111111111111111111111111\
42
46
  11111111111111111111111111112111\
@@ -47,7 +51,7 @@ static char newline_friendly_chars[256] = "\
47
51
  11111111111111111111111111111111";
48
52
 
49
53
  // JSON standard
50
- static char hibit_friendly_chars[256] = "\
54
+ static char hibit_friendly_chars[256] = "\
51
55
  66666666222622666666666666666666\
52
56
  11211111111111111111111111111111\
53
57
  11111111111111111111111111112111\
@@ -57,9 +61,20 @@ static char hibit_friendly_chars[256] = "\
57
61
  11111111111111111111111111111111\
58
62
  11111111111111111111111111111111";
59
63
 
64
+ // JSON standard but escape forward slashes `/`
65
+ static char slash_friendly_chars[256] = "\
66
+ 66666666222622666666666666666666\
67
+ 11211111111111121111111111111111\
68
+ 11111111111111111111111111112111\
69
+ 11111111111111111111111111111111\
70
+ 11111111111111111111111111111111\
71
+ 11111111111111111111111111111111\
72
+ 11111111111111111111111111111111\
73
+ 11111111111111111111111111111111";
74
+
60
75
  // High bit set characters are always encoded as unicode. Worse case is 3
61
76
  // bytes per character in the output. That makes this conservative.
62
- static char ascii_friendly_chars[256] = "\
77
+ static char ascii_friendly_chars[256] = "\
63
78
  66666666222622666666666666666666\
64
79
  11211111111111111111111111111111\
65
80
  11111111111111111111111111112111\
@@ -70,7 +85,7 @@ static char ascii_friendly_chars[256] = "\
70
85
  33333333333333333333333333333333";
71
86
 
72
87
  // XSS safe mode
73
- static char xss_friendly_chars[256] = "\
88
+ static char xss_friendly_chars[256] = "\
74
89
  66666666222622666666666666666666\
75
90
  11211161111111121111111111116161\
76
91
  11111111111111111111111111112111\
@@ -81,7 +96,7 @@ static char xss_friendly_chars[256] = "\
81
96
  33333333333333333333333333333333";
82
97
 
83
98
  // JSON XSS combo
84
- static char hixss_friendly_chars[256] = "\
99
+ static char hixss_friendly_chars[256] = "\
85
100
  66666666222622666666666666666666\
86
101
  11211111111111111111111111111111\
87
102
  11111111111111111111111111112111\
@@ -92,7 +107,7 @@ static char hixss_friendly_chars[256] = "\
92
107
  11611111111111111111111111111111";
93
108
 
94
109
  // Rails XSS combo
95
- static char rails_xss_friendly_chars[256] = "\
110
+ static char rails_xss_friendly_chars[256] = "\
96
111
  66666666222622666666666666666666\
97
112
  11211161111111111111111111116161\
98
113
  11111111111111111111111111112111\
@@ -103,7 +118,7 @@ static char rails_xss_friendly_chars[256] = "\
103
118
  11611111111111111111111111111111";
104
119
 
105
120
  // Rails HTML non-escape
106
- static char rails_friendly_chars[256] = "\
121
+ static char rails_friendly_chars[256] = "\
107
122
  66666666222622666666666666666666\
108
123
  11211111111111111111111111111111\
109
124
  11111111111111111111111111112111\
@@ -111,247 +126,372 @@ static char rails_friendly_chars[256] = "\
111
126
  11111111111111111111111111111111\
112
127
  11111111111111111111111111111111\
113
128
  11111111111111111111111111111111\
114
- 11611111111111111111111111111111";
129
+ 11111111111111111111111111111111";
115
130
 
116
- static void
117
- raise_strict(VALUE obj) {
131
+ static void raise_strict(VALUE obj) {
118
132
  rb_raise(rb_eTypeError, "Failed to dump %s Object to JSON in strict mode.", rb_class2name(rb_obj_class(obj)));
119
133
  }
120
134
 
121
- inline static size_t
122
- newline_friendly_size(const uint8_t *str, size_t len) {
123
- size_t size = 0;
124
- size_t i = len;
135
+ inline static size_t calculate_string_size(const uint8_t *str, size_t len, const char *table) {
136
+ size_t size = 0;
137
+ size_t i = len;
125
138
 
126
- for (; 0 < i; str++, i--) {
127
- size += newline_friendly_chars[*str];
139
+ for (; 3 < i; i -= 4) {
140
+ size += table[*str++];
141
+ size += table[*str++];
142
+ size += table[*str++];
143
+ size += table[*str++];
144
+ }
145
+ for (; 0 < i; i--) {
146
+ size += table[*str++];
128
147
  }
129
148
  return size - len * (size_t)'0';
130
149
  }
131
150
 
132
- inline static size_t
133
- hibit_friendly_size(const uint8_t *str, size_t len) {
134
- size_t size = 0;
135
- size_t i = len;
151
+ inline static size_t newline_friendly_size(const uint8_t *str, size_t len) {
152
+ return calculate_string_size(str, len, newline_friendly_chars);
153
+ }
136
154
 
137
- for (; 0 < i; str++, i--) {
138
- size += hibit_friendly_chars[*str];
139
- }
140
- return size - len * (size_t)'0';
155
+ #ifdef HAVE_SIMD_NEON
156
+ inline static uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
157
+ uint8x16x4_t tab;
158
+ tab.val[0] = vld1q_u8(table);
159
+ tab.val[1] = vld1q_u8(table + 16);
160
+ tab.val[2] = vld1q_u8(table + 32);
161
+ tab.val[3] = vld1q_u8(table + 48);
162
+ return tab;
141
163
  }
142
164
 
143
- inline static size_t
144
- ascii_friendly_size(const uint8_t *str, size_t len) {
145
- size_t size = 0;
146
- size_t i = len;
165
+ static uint8x16x4_t hibit_friendly_chars_neon[2];
166
+ static uint8x16x4_t rails_friendly_chars_neon[2];
167
+ static uint8x16x4_t rails_xss_friendly_chars_neon[4];
168
+
169
+ void initialize_neon(void) {
170
+ // We only need the first 128 bytes of the hibit friendly chars table. Everything above 127 is
171
+ // set to 1. If that ever changes, the code will need to be updated.
172
+ hibit_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars);
173
+ hibit_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars + 64);
174
+
175
+ // rails_friendly_chars is the same as hibit_friendly_chars. Only the first 128 bytes have values
176
+ // that are not '1'. If that ever changes, the code will need to be updated.
177
+ rails_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_friendly_chars);
178
+ rails_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_friendly_chars + 64);
179
+
180
+ rails_xss_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars);
181
+ rails_xss_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 64);
182
+ rails_xss_friendly_chars_neon[2] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 128);
183
+ rails_xss_friendly_chars_neon[3] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 192);
184
+
185
+ // All bytes should be 0 except for those that need more than 1 byte of output. This will allow the
186
+ // code to limit the lookups to the first 128 bytes (values 0 - 127). Bytes above 127 will result
187
+ // in 0 with the vqtbl4q_u8 instruction.
188
+ uint8x16_t one = vdupq_n_u8('1');
189
+ for (int i = 0; i < 2; i++) {
190
+ for (int j = 0; j < 4; j++) {
191
+ hibit_friendly_chars_neon[i].val[j] = vsubq_u8(hibit_friendly_chars_neon[i].val[j], one);
192
+ rails_friendly_chars_neon[i].val[j] = vsubq_u8(rails_friendly_chars_neon[i].val[j], one);
193
+ }
194
+ }
147
195
 
148
- for (; 0 < i; str++, i--) {
149
- size += ascii_friendly_chars[*str];
196
+ for (int i = 0; i < 4; i++) {
197
+ for (int j = 0; j < 4; j++) {
198
+ rails_xss_friendly_chars_neon[i].val[j] = vsubq_u8(rails_xss_friendly_chars_neon[i].val[j], one);
199
+ }
150
200
  }
151
- return size - len * (size_t)'0';
152
201
  }
202
+ #endif
153
203
 
154
- inline static size_t
155
- xss_friendly_size(const uint8_t *str, size_t len) {
156
- size_t size = 0;
157
- size_t i = len;
158
-
159
- for (; 0 < i; str++, i--) {
160
- size += xss_friendly_chars[*str];
204
+ inline static size_t hibit_friendly_size(const uint8_t *str, size_t len) {
205
+ #ifdef HAVE_SIMD_NEON
206
+ size_t size = 0;
207
+ size_t i = 0;
208
+
209
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
210
+ size += sizeof(uint8x16_t);
211
+
212
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
213
+ uint8x16_t chunk = vld1q_u8(str);
214
+ uint8x16_t tmp1 = vqtbl4q_u8(hibit_friendly_chars_neon[0], chunk);
215
+ uint8x16_t tmp2 = vqtbl4q_u8(hibit_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
216
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
217
+ uint8_t tmp = vaddvq_u8(result);
218
+ size += tmp;
161
219
  }
162
- return size - len * (size_t)'0';
220
+
221
+ size_t total = size + calculate_string_size(str, len - i, hibit_friendly_chars);
222
+ return total;
223
+ #else
224
+ return calculate_string_size(str, len, hibit_friendly_chars);
225
+ #endif
163
226
  }
164
227
 
165
- inline static size_t
166
- hixss_friendly_size(const uint8_t *str, size_t len) {
167
- size_t size = 0;
168
- size_t i = len;
169
- bool check = false;
228
+ inline static size_t slash_friendly_size(const uint8_t *str, size_t len) {
229
+ return calculate_string_size(str, len, slash_friendly_chars);
230
+ }
231
+
232
+ inline static size_t ascii_friendly_size(const uint8_t *str, size_t len) {
233
+ return calculate_string_size(str, len, ascii_friendly_chars);
234
+ }
235
+
236
+ inline static size_t xss_friendly_size(const uint8_t *str, size_t len) {
237
+ return calculate_string_size(str, len, xss_friendly_chars);
238
+ }
239
+
240
+ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) {
241
+ size_t size = 0;
242
+ size_t i = len;
243
+ bool check = false;
170
244
 
171
245
  for (; 0 < i; str++, i--) {
172
- size += hixss_friendly_chars[*str];
173
- if (0 != (0x80 & *str)) {
174
- check = true;
175
- }
246
+ size += hixss_friendly_chars[*str];
247
+ if (0 != (0x80 & *str)) {
248
+ check = true;
249
+ }
176
250
  }
177
251
  return size - len * (size_t)'0' + check;
178
252
  }
179
253
 
180
- inline static size_t
181
- rails_xss_friendly_size(const uint8_t *str, size_t len) {
182
- size_t size = 0;
183
- size_t i = len;
254
+ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
255
+ long size = 0;
256
+ uint8_t hi = 0;
257
+
258
+ #ifdef HAVE_SIMD_NEON
259
+ size_t i = 0;
260
+
261
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
262
+ uint8x16_t hibit = vdupq_n_u8(0x80);
263
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
264
+ size += sizeof(uint8x16_t);
184
265
 
266
+ uint8x16_t chunk = vld1q_u8(str);
267
+
268
+ // Check to see if any of these bytes have the high bit set.
269
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
270
+
271
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
272
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
273
+ uint8x16_t tmp3 = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
274
+ uint8x16_t tmp4 = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
275
+ uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
276
+ uint8_t tmp = vaddvq_u8(result);
277
+ size += tmp;
278
+ }
279
+
280
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
281
+ hi = vmaxvq_u8(has_some_hibit) != 0;
282
+
283
+ for (; i < len; str++, i++) {
284
+ size += rails_xss_friendly_chars[*str] - '0';
285
+ hi |= *str & 0x80;
286
+ }
287
+ if (0 == hi) {
288
+ return size;
289
+ }
290
+ return -(size);
291
+ #else
292
+ size_t i = len;
185
293
  for (; 0 < i; str++, i--) {
186
- size += rails_xss_friendly_chars[*str];
294
+ size += rails_xss_friendly_chars[*str];
295
+ hi |= *str & 0x80;
187
296
  }
188
- return size - len * (size_t)'0';
297
+ if (0 == hi) {
298
+ return size - len * (size_t)'0';
299
+ }
300
+ return -(size - len * (size_t)'0');
301
+ #endif /* HAVE_SIMD_NEON */
189
302
  }
190
303
 
191
- inline static size_t
192
- rails_friendly_size(const uint8_t *str, size_t len) {
193
- size_t size = 0;
194
- size_t i = len;
304
+ inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
305
+ long size = 0;
306
+ uint8_t hi = 0;
307
+ #ifdef HAVE_SIMD_NEON
308
+ size_t i = 0;
309
+
310
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
311
+ uint8x16_t hibit = vdupq_n_u8(0x80);
312
+
313
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
314
+ size += sizeof(uint8x16_t);
315
+
316
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
317
+ uint8x16_t chunk = vld1q_u8(str);
318
+
319
+ // Check to see if any of these bytes have the high bit set.
320
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
321
+
322
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
323
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
324
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
325
+ uint8_t tmp = vaddvq_u8(result);
326
+ size += tmp;
327
+ }
328
+
329
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
330
+ hi = vmaxvq_u8(has_some_hibit) != 0;
195
331
 
332
+ for (; i < len; str++, i++) {
333
+ size += rails_friendly_chars[*str] - '0';
334
+ hi |= *str & 0x80;
335
+ }
336
+ if (0 == hi) {
337
+ return size;
338
+ }
339
+ return -(size);
340
+ #else
341
+ size_t i = len;
196
342
  for (; 0 < i; str++, i--) {
197
- size += rails_friendly_chars[*str];
343
+ size += rails_friendly_chars[*str];
344
+ hi |= *str & 0x80;
198
345
  }
199
- return size - len * (size_t)'0';
346
+ if (0 == hi) {
347
+ return size - len * (size_t)'0';
348
+ }
349
+ return -(size - len * (size_t)'0');
350
+ #endif /* HAVE_SIMD_NEON */
200
351
  }
201
352
 
202
- const char*
203
- oj_nan_str(VALUE obj, int opt, int mode, bool plus, int *lenp) {
204
- const char *str = NULL;
353
+ const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp) {
354
+ const char *str = NULL;
205
355
 
206
356
  if (AutoNan == opt) {
207
- switch (mode) {
208
- case CompatMode: opt = WordNan; break;
209
- case StrictMode: opt = RaiseNan; break;
210
- default: break;
211
- }
357
+ switch (mode) {
358
+ case CompatMode: opt = WordNan; break;
359
+ case StrictMode: opt = RaiseNan; break;
360
+ default: break;
361
+ }
212
362
  }
213
363
  switch (opt) {
214
- case RaiseNan:
215
- raise_strict(obj);
216
- break;
364
+ case RaiseNan: raise_strict(obj); break;
217
365
  case WordNan:
218
- if (plus) {
219
- str = "Infinity";
220
- *lenp = 8;
221
- } else {
222
- str = "-Infinity";
223
- *lenp = 9;
224
- }
225
- break;
366
+ if (plus) {
367
+ str = "Infinity";
368
+ *lenp = 8;
369
+ } else {
370
+ str = "-Infinity";
371
+ *lenp = 9;
372
+ }
373
+ break;
226
374
  case NullNan:
227
- str = "null";
228
- *lenp = 4;
229
- break;
375
+ str = "null";
376
+ *lenp = 4;
377
+ break;
230
378
  case HugeNan:
231
379
  default:
232
- if (plus) {
233
- str = inf_val;
234
- *lenp = sizeof(inf_val) - 1;
235
- } else {
236
- str = ninf_val;
237
- *lenp = sizeof(ninf_val) - 1;
238
- }
239
- break;
380
+ if (plus) {
381
+ str = inf_val;
382
+ *lenp = sizeof(inf_val) - 1;
383
+ } else {
384
+ str = ninf_val;
385
+ *lenp = sizeof(ninf_val) - 1;
386
+ }
387
+ break;
240
388
  }
241
389
  return str;
242
390
  }
243
391
 
244
- inline static void
245
- dump_hex(uint8_t c, Out out) {
246
- uint8_t d = (c >> 4) & 0x0F;
392
+ inline static void dump_hex(uint8_t c, Out out) {
393
+ uint8_t d = (c >> 4) & 0x0F;
247
394
 
248
395
  *out->cur++ = hex_chars[d];
249
- d = c & 0x0F;
396
+ d = c & 0x0F;
250
397
  *out->cur++ = hex_chars[d];
251
398
  }
252
399
 
253
- static void
254
- raise_invalid_unicode(const char *str, int len, int pos) {
255
- char buf[len + 1];
256
- char c;
257
- char code[32];
258
- char *cp = code;
259
- int i;
260
- uint8_t d;
400
+ static void raise_invalid_unicode(const char *str, int len, int pos) {
401
+ char c;
402
+ char code[32];
403
+ char *cp = code;
404
+ int i;
405
+ uint8_t d;
261
406
 
262
407
  *cp++ = '[';
263
408
  for (i = pos; i < len && i - pos < 5; i++) {
264
- c = str[i];
265
- d = (c >> 4) & 0x0F;
266
- *cp++ = hex_chars[d];
267
- d = c & 0x0F;
268
- *cp++ = hex_chars[d];
269
- *cp++ = ' ';
409
+ c = str[i];
410
+ d = (c >> 4) & 0x0F;
411
+ *cp++ = hex_chars[d];
412
+ d = c & 0x0F;
413
+ *cp++ = hex_chars[d];
414
+ *cp++ = ' ';
270
415
  }
271
416
  cp--;
272
417
  *cp++ = ']';
273
- *cp = '\0';
274
- strncpy(buf, str, len);
275
- rb_raise(oj_json_generator_error_class, "Invalid Unicode %s at %d in '%s'", code, pos, buf);
418
+ *cp = '\0';
419
+ rb_raise(oj_json_generator_error_class, "Invalid Unicode %s at %d", code, pos);
276
420
  }
277
421
 
278
- static const char*
279
- dump_unicode(const char *str, const char *end, Out out, const char *orig) {
280
- uint32_t code = 0;
281
- uint8_t b = *(uint8_t*)str;
282
- int i, cnt;
422
+ static const char *dump_unicode(const char *str, const char *end, Out out, const char *orig) {
423
+ uint32_t code = 0;
424
+ uint8_t b = *(uint8_t *)str;
425
+ int i, cnt;
283
426
 
284
427
  if (0xC0 == (0xE0 & b)) {
285
- cnt = 1;
286
- code = b & 0x0000001F;
428
+ cnt = 1;
429
+ code = b & 0x0000001F;
287
430
  } else if (0xE0 == (0xF0 & b)) {
288
- cnt = 2;
289
- code = b & 0x0000000F;
431
+ cnt = 2;
432
+ code = b & 0x0000000F;
290
433
  } else if (0xF0 == (0xF8 & b)) {
291
- cnt = 3;
292
- code = b & 0x00000007;
434
+ cnt = 3;
435
+ code = b & 0x00000007;
293
436
  } else if (0xF8 == (0xFC & b)) {
294
- cnt = 4;
295
- code = b & 0x00000003;
437
+ cnt = 4;
438
+ code = b & 0x00000003;
296
439
  } else if (0xFC == (0xFE & b)) {
297
- cnt = 5;
298
- code = b & 0x00000001;
440
+ cnt = 5;
441
+ code = b & 0x00000001;
299
442
  } else {
300
- cnt = 0;
301
- raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
443
+ cnt = 0;
444
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
302
445
  }
303
446
  str++;
304
447
  for (; 0 < cnt; cnt--, str++) {
305
- b = *(uint8_t*)str;
306
- if (end <= str || 0x80 != (0xC0 & b)) {
307
- raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
308
- }
309
- code = (code << 6) | (b & 0x0000003F);
448
+ b = *(uint8_t *)str;
449
+ if (end <= str || 0x80 != (0xC0 & b)) {
450
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
451
+ }
452
+ code = (code << 6) | (b & 0x0000003F);
310
453
  }
311
454
  if (0x0000FFFF < code) {
312
- uint32_t c1;
313
-
314
- code -= 0x00010000;
315
- c1 = ((code >> 10) & 0x000003FF) + 0x0000D800;
316
- code = (code & 0x000003FF) + 0x0000DC00;
317
- *out->cur++ = '\\';
318
- *out->cur++ = 'u';
319
- for (i = 3; 0 <= i; i--) {
320
- *out->cur++ = hex_chars[(uint8_t)(c1 >> (i * 4)) & 0x0F];
321
- }
322
- }
323
- *out->cur++ = '\\';
324
- *out->cur++ = 'u';
455
+ uint32_t c1;
456
+
457
+ code -= 0x00010000;
458
+ c1 = ((code >> 10) & 0x000003FF) + 0x0000D800;
459
+ code = (code & 0x000003FF) + 0x0000DC00;
460
+ APPEND_CHARS(out->cur, "\\u", 2);
461
+ for (i = 3; 0 <= i; i--) {
462
+ *out->cur++ = hex_chars[(uint8_t)(c1 >> (i * 4)) & 0x0F];
463
+ }
464
+ }
465
+ APPEND_CHARS(out->cur, "\\u", 2);
325
466
  for (i = 3; 0 <= i; i--) {
326
- *out->cur++ = hex_chars[(uint8_t)(code >> (i * 4)) & 0x0F];
467
+ *out->cur++ = hex_chars[(uint8_t)(code >> (i * 4)) & 0x0F];
327
468
  }
328
469
  return str - 1;
329
470
  }
330
471
 
331
- static const char*
332
- check_unicode(const char *str, const char *end, const char *orig) {
333
- uint8_t b = *(uint8_t*)str;
334
- int cnt = 0;
472
+ static const char *check_unicode(const char *str, const char *end, const char *orig) {
473
+ uint8_t b = *(uint8_t *)str;
474
+ int cnt = 0;
335
475
 
336
476
  if (0xC0 == (0xE0 & b)) {
337
- cnt = 1;
477
+ cnt = 1;
338
478
  } else if (0xE0 == (0xF0 & b)) {
339
- cnt = 2;
479
+ cnt = 2;
340
480
  } else if (0xF0 == (0xF8 & b)) {
341
- cnt = 3;
481
+ cnt = 3;
342
482
  } else if (0xF8 == (0xFC & b)) {
343
- cnt = 4;
483
+ cnt = 4;
344
484
  } else if (0xFC == (0xFE & b)) {
345
- cnt = 5;
485
+ cnt = 5;
346
486
  } else {
347
- raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
487
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
348
488
  }
349
489
  str++;
350
490
  for (; 0 < cnt; cnt--, str++) {
351
- b = *(uint8_t*)str;
352
- if (end <= str || 0x80 != (0xC0 & b)) {
353
- raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
354
- }
491
+ b = *(uint8_t *)str;
492
+ if (end <= str || 0x80 != (0xC0 & b)) {
493
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
494
+ }
355
495
  }
356
496
  return str;
357
497
  }
@@ -359,894 +499,1055 @@ check_unicode(const char *str, const char *end, const char *orig) {
359
499
  // Returns 0 if not using circular references, -1 if no further writing is
360
500
  // needed (duplicate), and a positive value if the object was added to the
361
501
  // cache.
362
- long
363
- oj_check_circular(VALUE obj, Out out) {
364
- slot_t id = 0;
365
- slot_t *slot;
502
+ long oj_check_circular(VALUE obj, Out out) {
503
+ slot_t id = 0;
504
+ slot_t *slot;
366
505
 
367
506
  if (Yes == out->opts->circular) {
368
- if (0 == (id = oj_cache8_get(out->circ_cache, obj, &slot))) {
369
- out->circ_cnt++;
370
- id = out->circ_cnt;
371
- *slot = id;
372
- } else {
373
- if (ObjectMode == out->opts->mode) {
374
- assure_size(out, 18);
375
- *out->cur++ = '"';
376
- *out->cur++ = '^';
377
- *out->cur++ = 'r';
378
- dump_ulong(id, out);
379
- *out->cur++ = '"';
380
- }
381
- return -1;
382
- }
507
+ if (0 == (id = oj_cache8_get(out->circ_cache, obj, &slot))) {
508
+ out->circ_cnt++;
509
+ id = out->circ_cnt;
510
+ *slot = id;
511
+ } else {
512
+ if (ObjectMode == out->opts->mode) {
513
+ assure_size(out, 18);
514
+ APPEND_CHARS(out->cur, "\"^r", 3);
515
+ dump_ulong(id, out);
516
+ *out->cur++ = '"';
517
+ }
518
+ return -1;
519
+ }
383
520
  }
384
521
  return (long)id;
385
522
  }
386
523
 
387
- void
388
- oj_dump_time(VALUE obj, Out out, int withZone) {
389
- char buf[64];
390
- char *b = buf + sizeof(buf) - 1;
391
- long size;
392
- char *dot;
393
- int neg = 0;
394
- long one = 1000000000;
395
- long long sec;
396
- long long nsec;
397
-
398
- #ifdef HAVE_RB_TIME_TIMESPEC
524
+ void oj_dump_time(VALUE obj, Out out, int withZone) {
525
+ char buf[64];
526
+ char *b = buf + sizeof(buf) - 1;
527
+ long size;
528
+ char *dot;
529
+ int neg = 0;
530
+ long one = 1000000000;
531
+ long long sec;
532
+ long long nsec;
533
+
399
534
  // rb_time_timespec as well as rb_time_timeeval have a bug that causes an
400
535
  // exception to be raised if a time is before 1970 on 32 bit systems so
401
536
  // check the timespec size and use the ruby calls if a 32 bit system.
402
537
  if (16 <= sizeof(struct timespec)) {
403
- struct timespec ts = rb_time_timespec(obj);
538
+ struct timespec ts = rb_time_timespec(obj);
404
539
 
405
- sec = (long long)ts.tv_sec;
406
- nsec = ts.tv_nsec;
540
+ sec = (long long)ts.tv_sec;
541
+ nsec = ts.tv_nsec;
407
542
  } else {
408
- sec = rb_num2ll(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
409
- nsec = rb_num2ll(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
543
+ sec = NUM2LL(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
544
+ nsec = NUM2LL(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
410
545
  }
411
- #else
412
- sec = rb_num2ll(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
413
- nsec = rb_num2ll(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
414
- #endif
415
546
 
416
547
  *b-- = '\0';
417
548
  if (withZone) {
418
- long tzsecs = NUM2LONG(rb_funcall2(obj, oj_utc_offset_id, 0, 0));
419
- int zneg = (0 > tzsecs);
420
-
421
- if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
422
- tzsecs = 86400;
423
- }
424
- if (zneg) {
425
- tzsecs = -tzsecs;
426
- }
427
- if (0 == tzsecs) {
428
- *b-- = '0';
429
- } else {
430
- for (; 0 < tzsecs; b--, tzsecs /= 10) {
431
- *b = '0' + (tzsecs % 10);
432
- }
433
- if (zneg) {
434
- *b-- = '-';
435
- }
436
- }
437
- *b-- = 'e';
549
+ long tzsecs = NUM2LONG(rb_funcall2(obj, oj_utc_offset_id, 0, 0));
550
+ int zneg = (0 > tzsecs);
551
+
552
+ if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
553
+ tzsecs = 86400;
554
+ }
555
+ if (zneg) {
556
+ tzsecs = -tzsecs;
557
+ }
558
+ if (0 == tzsecs) {
559
+ *b-- = '0';
560
+ } else {
561
+ for (; 0 < tzsecs; b--, tzsecs /= 10) {
562
+ *b = '0' + (tzsecs % 10);
563
+ }
564
+ if (zneg) {
565
+ *b-- = '-';
566
+ }
567
+ }
568
+ *b-- = 'e';
438
569
  }
439
570
  if (0 > sec) {
440
- neg = 1;
441
- sec = -sec;
442
- if (0 < nsec) {
443
- nsec = 1000000000 - nsec;
444
- sec--;
445
- }
571
+ neg = 1;
572
+ sec = -sec;
573
+ if (0 < nsec) {
574
+ nsec = 1000000000 - nsec;
575
+ sec--;
576
+ }
446
577
  }
447
578
  dot = b - 9;
448
579
  if (0 < out->opts->sec_prec) {
449
- if (9 > out->opts->sec_prec) {
450
- int i;
451
-
452
- for (i = 9 - out->opts->sec_prec; 0 < i; i--) {
453
- dot++;
454
- nsec = (nsec + 5) / 10;
455
- one /= 10;
456
- }
457
- }
458
- if (one <= nsec) {
459
- nsec -= one;
460
- sec++;
461
- }
462
- for (; dot < b; b--, nsec /= 10) {
463
- *b = '0' + (nsec % 10);
464
- }
465
- *b-- = '.';
580
+ if (9 > out->opts->sec_prec) {
581
+ int i;
582
+
583
+ for (i = 9 - out->opts->sec_prec; 0 < i; i--) {
584
+ dot++;
585
+ nsec = (nsec + 5) / 10;
586
+ one /= 10;
587
+ }
588
+ }
589
+ if (one <= nsec) {
590
+ nsec -= one;
591
+ sec++;
592
+ }
593
+ for (; dot < b; b--, nsec /= 10) {
594
+ *b = '0' + (nsec % 10);
595
+ }
596
+ *b-- = '.';
466
597
  }
467
598
  if (0 == sec) {
468
- *b-- = '0';
599
+ *b-- = '0';
469
600
  } else {
470
- for (; 0 < sec; b--, sec /= 10) {
471
- *b = '0' + (sec % 10);
472
- }
601
+ for (; 0 < sec; b--, sec /= 10) {
602
+ *b = '0' + (sec % 10);
603
+ }
473
604
  }
474
605
  if (neg) {
475
- *b-- = '-';
606
+ *b-- = '-';
476
607
  }
477
608
  b++;
478
609
  size = sizeof(buf) - (b - buf) - 1;
479
610
  assure_size(out, size);
480
- memcpy(out->cur, b, size);
481
- out->cur += size;
611
+ APPEND_CHARS(out->cur, b, size);
482
612
  *out->cur = '\0';
483
613
  }
484
614
 
485
- void
486
- oj_dump_ruby_time(VALUE obj, Out out) {
487
- volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0);
615
+ void oj_dump_ruby_time(VALUE obj, Out out) {
616
+ volatile VALUE rstr = oj_safe_string_convert(obj);
488
617
 
489
- oj_dump_cstr(rb_string_value_ptr((VALUE*)&rstr), (int)RSTRING_LEN(rstr), 0, 0, out);
618
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
490
619
  }
491
620
 
492
- void
493
- oj_dump_xml_time(VALUE obj, Out out) {
494
- char buf[64];
495
- struct _timeInfo ti;
496
- long one = 1000000000;
497
- int64_t sec;
498
- long long nsec;
499
- long tzsecs = NUM2LONG(rb_funcall2(obj, oj_utc_offset_id, 0, 0));
500
- int tzhour, tzmin;
501
- char tzsign = '+';
502
-
503
- #ifdef HAVE_RB_TIME_TIMESPEC
621
+ void oj_dump_xml_time(VALUE obj, Out out) {
622
+ char buf[64];
623
+ struct _timeInfo ti;
624
+ long one = 1000000000;
625
+ int64_t sec;
626
+ long long nsec;
627
+ long tzsecs = NUM2LONG(rb_funcall2(obj, oj_utc_offset_id, 0, 0));
628
+ int tzhour, tzmin;
629
+ char tzsign = '+';
630
+
504
631
  if (16 <= sizeof(struct timespec)) {
505
- struct timespec ts = rb_time_timespec(obj);
632
+ struct timespec ts = rb_time_timespec(obj);
506
633
 
507
- sec = ts.tv_sec;
508
- nsec = ts.tv_nsec;
634
+ sec = ts.tv_sec;
635
+ nsec = ts.tv_nsec;
509
636
  } else {
510
- sec = rb_num2ll(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
511
- nsec = rb_num2ll(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
637
+ sec = NUM2LL(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
638
+ nsec = NUM2LL(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
512
639
  }
513
- #else
514
- sec = rb_num2ll(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
515
- nsec = rb_num2ll(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
516
- #endif
517
640
 
518
641
  assure_size(out, 36);
519
642
  if (9 > out->opts->sec_prec) {
520
- int i;
521
-
522
- // This is pretty lame but to be compatible with rails and active
523
- // support rounding is not done but instead a floor is done when
524
- // second precision is 3 just to be like rails. sigh.
525
- if (3 == out->opts->sec_prec) {
526
- nsec /= 1000000;
527
- one = 1000;
528
- } else {
529
- for (i = 9 - out->opts->sec_prec; 0 < i; i--) {
530
- nsec = (nsec + 5) / 10;
531
- one /= 10;
532
- }
533
- if (one <= nsec) {
534
- nsec -= one;
535
- sec++;
536
- }
537
- }
643
+ int i;
644
+
645
+ // This is pretty lame but to be compatible with rails and active
646
+ // support rounding is not done but instead a floor is done when
647
+ // second precision is 3 just to be like rails. sigh.
648
+ if (3 == out->opts->sec_prec) {
649
+ nsec /= 1000000;
650
+ one = 1000;
651
+ } else {
652
+ for (i = 9 - out->opts->sec_prec; 0 < i; i--) {
653
+ nsec = (nsec + 5) / 10;
654
+ one /= 10;
655
+ }
656
+ if (one <= nsec) {
657
+ nsec -= one;
658
+ sec++;
659
+ }
660
+ }
538
661
  }
539
662
  // 2012-01-05T23:58:07.123456000+09:00
540
- //tm = localtime(&sec);
663
+ // tm = localtime(&sec);
541
664
  sec += tzsecs;
542
665
  sec_as_time((int64_t)sec, &ti);
543
666
  if (0 > tzsecs) {
544
667
  tzsign = '-';
545
668
  tzhour = (int)(tzsecs / -3600);
546
- tzmin = (int)(tzsecs / -60) - (tzhour * 60);
669
+ tzmin = (int)(tzsecs / -60) - (tzhour * 60);
547
670
  } else {
548
671
  tzhour = (int)(tzsecs / 3600);
549
- tzmin = (int)(tzsecs / 60) - (tzhour * 60);
550
- }
551
- if (0 == nsec || 0 == out->opts->sec_prec) {
552
- if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
553
- sprintf(buf, "%04d-%02d-%02dT%02d:%02d:%02dZ", ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec);
554
- oj_dump_cstr(buf, 20, 0, 0, out);
555
- } else {
556
- sprintf(buf, "%04d-%02d-%02dT%02d:%02d:%02d%c%02d:%02d", ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec,
557
- tzsign, tzhour, tzmin);
558
- oj_dump_cstr(buf, 25, 0, 0, out);
559
- }
672
+ tzmin = (int)(tzsecs / 60) - (tzhour * 60);
673
+ }
674
+ if ((0 == nsec && !out->opts->sec_prec_set) || 0 == out->opts->sec_prec) {
675
+ if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
676
+ int len = sprintf(buf, "%04d-%02d-%02dT%02d:%02d:%02dZ", ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec);
677
+ oj_dump_cstr(buf, len, 0, 0, out);
678
+ } else {
679
+ int len = sprintf(buf,
680
+ "%04d-%02d-%02dT%02d:%02d:%02d%c%02d:%02d",
681
+ ti.year,
682
+ ti.mon,
683
+ ti.day,
684
+ ti.hour,
685
+ ti.min,
686
+ ti.sec,
687
+ tzsign,
688
+ tzhour,
689
+ tzmin);
690
+ oj_dump_cstr(buf, len, 0, 0, out);
691
+ }
560
692
  } else if (0 == tzsecs && rb_funcall2(obj, oj_utcq_id, 0, 0)) {
561
- char format[64] = "%04d-%02d-%02dT%02d:%02d:%02d.%09ldZ";
562
- int len = 30;
563
-
564
- if (9 > out->opts->sec_prec) {
565
- format[32] = '0' + out->opts->sec_prec;
566
- len -= 9 - out->opts->sec_prec;
567
- }
568
- sprintf(buf, format, ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec, (long)nsec);
569
- oj_dump_cstr(buf, len, 0, 0, out);
693
+ char format[64] = "%04d-%02d-%02dT%02d:%02d:%02d.%09ldZ";
694
+ int len;
695
+
696
+ if (9 > out->opts->sec_prec) {
697
+ format[32] = '0' + out->opts->sec_prec;
698
+ }
699
+ len = sprintf(buf, format, ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec, (long)nsec);
700
+ oj_dump_cstr(buf, len, 0, 0, out);
570
701
  } else {
571
- char format[64] = "%04d-%02d-%02dT%02d:%02d:%02d.%09ld%c%02d:%02d";
572
- int len = 35;
573
-
574
- if (9 > out->opts->sec_prec) {
575
- format[32] = '0' + out->opts->sec_prec;
576
- len -= 9 - out->opts->sec_prec;
577
- }
578
- sprintf(buf, format, ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec, (long)nsec, tzsign, tzhour, tzmin);
579
- oj_dump_cstr(buf, len, 0, 0, out);
702
+ char format[64] = "%04d-%02d-%02dT%02d:%02d:%02d.%09ld%c%02d:%02d";
703
+ int len;
704
+
705
+ if (9 > out->opts->sec_prec) {
706
+ format[32] = '0' + out->opts->sec_prec;
707
+ }
708
+ len = sprintf(buf, format, ti.year, ti.mon, ti.day, ti.hour, ti.min, ti.sec, (long)nsec, tzsign, tzhour, tzmin);
709
+ oj_dump_cstr(buf, len, 0, 0, out);
580
710
  }
581
711
  }
582
712
 
583
- void
584
- oj_dump_obj_to_json(VALUE obj, Options copts, Out out) {
713
+ void oj_dump_obj_to_json(VALUE obj, Options copts, Out out) {
585
714
  oj_dump_obj_to_json_using_params(obj, copts, out, 0, 0);
586
715
  }
587
716
 
588
- void
589
- oj_dump_obj_to_json_using_params(VALUE obj, Options copts, Out out, int argc, VALUE *argv) {
717
+ void oj_dump_obj_to_json_using_params(VALUE obj, Options copts, Out out, int argc, VALUE *argv) {
590
718
  if (0 == out->buf) {
591
- out->buf = ALLOC_N(char, 4096);
592
- out->end = out->buf + 4095 - BUFFER_EXTRA; // 1 less than end plus extra for possible errors
593
- out->allocated = true;
719
+ oj_out_init(out);
594
720
  }
595
- out->cur = out->buf;
596
721
  out->circ_cnt = 0;
597
- out->opts = copts;
722
+ out->opts = copts;
598
723
  out->hash_cnt = 0;
599
- out->indent = copts->indent;
600
- out->argc = argc;
601
- out->argv = argv;
602
- out->ropts = NULL;
724
+ out->indent = copts->indent;
725
+ out->argc = argc;
726
+ out->argv = argv;
727
+ out->ropts = NULL;
603
728
  if (Yes == copts->circular) {
604
- oj_cache8_new(&out->circ_cache);
729
+ oj_cache8_new(&out->circ_cache);
605
730
  }
606
731
  switch (copts->mode) {
607
- case StrictMode: oj_dump_strict_val(obj, 0, out); break;
608
- case NullMode: oj_dump_null_val(obj, 0, out); break;
609
- case ObjectMode: oj_dump_obj_val(obj, 0, out); break;
610
- case CompatMode: oj_dump_compat_val(obj, 0, out, Yes == copts->to_json); break;
611
- case RailsMode: oj_dump_rails_val(obj, 0, out); break;
612
- case CustomMode: oj_dump_custom_val(obj, 0, out, true); break;
613
- case WabMode: oj_dump_wab_val(obj, 0, out); break;
614
- default: oj_dump_custom_val(obj, 0, out, true); break;
732
+ case StrictMode: oj_dump_strict_val(obj, 0, out); break;
733
+ case NullMode: oj_dump_null_val(obj, 0, out); break;
734
+ case ObjectMode: oj_dump_obj_val(obj, 0, out); break;
735
+ case CompatMode: oj_dump_compat_val(obj, 0, out, Yes == copts->to_json); break;
736
+ case RailsMode: oj_dump_rails_val(obj, 0, out); break;
737
+ case CustomMode: oj_dump_custom_val(obj, 0, out, true); break;
738
+ case WabMode: oj_dump_wab_val(obj, 0, out); break;
739
+ default: oj_dump_custom_val(obj, 0, out, true); break;
615
740
  }
616
741
  if (0 < out->indent) {
617
- switch (*(out->cur - 1)) {
618
- case ']':
619
- case '}':
620
- assure_size(out, 1);
621
- *out->cur++ = '\n';
622
- default:
623
- break;
624
- }
742
+ switch (*(out->cur - 1)) {
743
+ case ']':
744
+ case '}': assure_size(out, 1); *out->cur++ = '\n';
745
+ default: break;
746
+ }
625
747
  }
626
748
  *out->cur = '\0';
627
749
  if (Yes == copts->circular) {
628
- oj_cache8_delete(out->circ_cache);
750
+ oj_cache8_delete(out->circ_cache);
629
751
  }
630
752
  }
631
753
 
632
- void
633
- oj_write_obj_to_file(VALUE obj, const char *path, Options copts) {
634
- char buf[4096];
754
+ void oj_write_obj_to_file(VALUE obj, const char *path, Options copts) {
635
755
  struct _out out;
636
- size_t size;
637
- FILE *f;
638
- int ok;
756
+ size_t size;
757
+ FILE *f;
758
+ int ok;
759
+
760
+ oj_out_init(&out);
639
761
 
640
- out.buf = buf;
641
- out.end = buf + sizeof(buf) - BUFFER_EXTRA;
642
- out.allocated = false;
643
762
  out.omit_nil = copts->dump_opts.omit_nil;
644
763
  oj_dump_obj_to_json(obj, copts, &out);
645
764
  size = out.cur - out.buf;
646
765
  if (0 == (f = fopen(path, "w"))) {
647
- if (out.allocated) {
648
- xfree(out.buf);
649
- }
650
- rb_raise(rb_eIOError, "%s", strerror(errno));
766
+ oj_out_free(&out);
767
+ rb_raise(rb_eIOError, "%s", strerror(errno));
651
768
  }
652
769
  ok = (size == fwrite(out.buf, 1, size, f));
653
- if (out.allocated) {
654
- xfree(out.buf);
770
+
771
+ oj_out_free(&out);
772
+
773
+ if (!ok) {
774
+ int err = ferror(f);
775
+ fclose(f);
776
+
777
+ rb_raise(rb_eIOError, "Write failed. [%d:%s]", err, strerror(err));
655
778
  }
656
779
  fclose(f);
657
- if (!ok) {
658
- int err = ferror(f);
780
+ }
659
781
 
660
- rb_raise(rb_eIOError, "Write failed. [%d:%s]", err, strerror(err));
782
+ #if !IS_WINDOWS
783
+ static void write_ready(int fd) {
784
+ struct pollfd pp;
785
+ int i;
786
+
787
+ pp.fd = fd;
788
+ pp.events = POLLERR | POLLOUT;
789
+ pp.revents = 0;
790
+ if (0 >= (i = poll(&pp, 1, 5000))) {
791
+ if (0 == i || EAGAIN == errno) {
792
+ rb_raise(rb_eIOError, "write timed out");
793
+ }
794
+ rb_raise(rb_eIOError, "write failed. %d %s.", errno, strerror(errno));
661
795
  }
662
796
  }
797
+ #endif
663
798
 
664
- void
665
- oj_write_obj_to_stream(VALUE obj, VALUE stream, Options copts) {
666
- char buf[4096];
799
+ void oj_write_obj_to_stream(VALUE obj, VALUE stream, Options copts) {
667
800
  struct _out out;
668
- ssize_t size;
669
- VALUE clas = rb_obj_class(stream);
801
+ ssize_t size;
802
+ VALUE clas = rb_obj_class(stream);
670
803
  #if !IS_WINDOWS
671
- int fd;
672
- VALUE s;
804
+ int fd;
805
+ VALUE s;
673
806
  #endif
674
807
 
675
- out.buf = buf;
676
- out.end = buf + sizeof(buf) - BUFFER_EXTRA;
677
- out.allocated = false;
808
+ oj_out_init(&out);
809
+
678
810
  out.omit_nil = copts->dump_opts.omit_nil;
679
811
  oj_dump_obj_to_json(obj, copts, &out);
680
812
  size = out.cur - out.buf;
681
813
  if (oj_stringio_class == clas) {
682
- rb_funcall(stream, oj_write_id, 1, rb_str_new(out.buf, size));
814
+ rb_funcall(stream, oj_write_id, 1, rb_str_new(out.buf, size));
683
815
  #if !IS_WINDOWS
684
- } else if (rb_respond_to(stream, oj_fileno_id) &&
685
- Qnil != (s = rb_funcall(stream, oj_fileno_id, 0)) &&
686
- 0 != (fd = FIX2INT(s))) {
687
- if (size != write(fd, out.buf, size)) {
688
- if (out.allocated) {
689
- xfree(out.buf);
690
- }
691
- rb_raise(rb_eIOError, "Write failed. [%d:%s]", errno, strerror(errno));
692
- }
816
+ } else if (rb_respond_to(stream, oj_fileno_id) && Qnil != (s = rb_funcall(stream, oj_fileno_id, 0)) &&
817
+ 0 != (fd = FIX2INT(s))) {
818
+ ssize_t cnt;
819
+ ssize_t total = 0;
820
+
821
+ while (true) {
822
+ if (0 > (cnt = write(fd, out.buf + total, size - total))) {
823
+ if (EAGAIN != errno) {
824
+ rb_raise(rb_eIOError, "write failed. %d %s.", errno, strerror(errno));
825
+ break;
826
+ }
827
+ }
828
+ total += cnt;
829
+ if (size <= total) {
830
+ // Completed
831
+ break;
832
+ }
833
+ write_ready(fd);
834
+ }
693
835
  #endif
694
836
  } else if (rb_respond_to(stream, oj_write_id)) {
695
- rb_funcall(stream, oj_write_id, 1, rb_str_new(out.buf, size));
837
+ rb_funcall(stream, oj_write_id, 1, rb_str_new(out.buf, size));
696
838
  } else {
697
- if (out.allocated) {
698
- xfree(out.buf);
699
- }
700
- rb_raise(rb_eArgError, "to_stream() expected an IO Object.");
701
- }
702
- if (out.allocated) {
703
- xfree(out.buf);
839
+ oj_out_free(&out);
840
+ rb_raise(rb_eArgError, "to_stream() expected an IO Object.");
704
841
  }
842
+ oj_out_free(&out);
705
843
  }
706
844
 
707
- void
708
- oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) {
709
- rb_encoding *enc = rb_to_encoding(rb_obj_encoding(obj));
845
+ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) {
846
+ int idx = RB_ENCODING_GET(obj);
710
847
 
711
- if (rb_utf8_encoding() != enc) {
712
- obj = rb_str_conv_enc(obj, enc, rb_utf8_encoding());
848
+ if (oj_utf8_encoding_index != idx) {
849
+ rb_encoding *enc = rb_enc_from_index(idx);
850
+ obj = rb_str_conv_enc(obj, enc, oj_utf8_encoding);
713
851
  }
714
- oj_dump_cstr(rb_string_value_ptr((VALUE*)&obj), (int)RSTRING_LEN(obj), 0, 0, out);
852
+ oj_dump_cstr(RSTRING_PTR(obj), RSTRING_LEN(obj), 0, 0, out);
715
853
  }
716
854
 
717
- void
718
- oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
719
- // This causes a memory leak in 2.5.1. Maybe in other versions as well.
720
- //const char *sym = rb_id2name(SYM2ID(obj));
855
+ void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
856
+ volatile VALUE s = rb_sym2str(obj);
721
857
 
722
- volatile VALUE s = rb_sym_to_s(obj);
723
-
724
- oj_dump_cstr(rb_string_value_ptr((VALUE*)&s), (int)RSTRING_LEN(s), 0, 0, out);
858
+ oj_dump_cstr(RSTRING_PTR(s), RSTRING_LEN(s), 0, 0, out);
725
859
  }
726
860
 
727
- static void
728
- debug_raise(const char *orig, size_t cnt, int line) {
729
- char buf[1024];
730
- char *b = buf;
731
- const char *s = orig;
732
- const char *s_end = s + cnt;
861
+ static void debug_raise(const char *orig, size_t cnt, int line) {
862
+ char buf[1024];
863
+ char *b = buf;
864
+ const char *s = orig;
865
+ const char *s_end = s + cnt;
733
866
 
734
867
  if (32 < s_end - s) {
735
- s_end = s + 32;
868
+ s_end = s + 32;
736
869
  }
737
870
  for (; s < s_end; s++) {
738
- b += sprintf(b, " %02x", *s);
871
+ b += sprintf(b, " %02x", *s);
739
872
  }
740
873
  *b = '\0';
741
874
  rb_raise(oj_json_generator_error_class, "Partial character in string. %s @ %d", buf, line);
742
875
  }
743
876
 
744
- void
745
- oj_dump_raw_json(VALUE obj, int depth, Out out) {
877
+ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
746
878
  if (oj_string_writer_class == rb_obj_class(obj)) {
747
- StrWriter sw = (StrWriter)DATA_PTR(obj);
748
- size_t len = sw->out.cur - sw->out.buf;
879
+ StrWriter sw;
880
+ size_t len;
881
+
882
+ sw = oj_str_writer_unwrap(obj);
883
+ len = sw->out.cur - sw->out.buf;
749
884
 
750
- if (0 < len) {
751
- len--;
752
- }
753
- oj_dump_raw(sw->out.buf, len, out);
885
+ if (0 < len) {
886
+ len--;
887
+ }
888
+ oj_dump_raw(sw->out.buf, len, out);
754
889
  } else {
755
- volatile VALUE jv;
890
+ volatile VALUE jv;
891
+
892
+ TRACE(out->opts->trace, "raw_json", obj, depth + 1, TraceRubyIn);
893
+ jv = rb_funcall(obj, oj_raw_json_id, 2, RB_INT2NUM(depth), RB_INT2NUM(out->indent));
894
+ TRACE(out->opts->trace, "raw_json", obj, depth + 1, TraceRubyOut);
895
+ oj_dump_raw(RSTRING_PTR(jv), (size_t)RSTRING_LEN(jv), out);
896
+ }
897
+ }
898
+
899
+ #ifdef HAVE_SIMD_NEON
900
+ typedef struct _neon_match_result {
901
+ uint8x16_t needs_escape;
902
+ bool has_some_hibit;
903
+ bool do_unicode_validation;
904
+ } neon_match_result;
756
905
 
757
- if (Yes == out->opts->trace) {
758
- oj_trace("raw_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyIn);
759
- }
760
- jv = rb_funcall(obj, oj_raw_json_id, 2, RB_INT2NUM(depth), RB_INT2NUM(out->indent));
761
- if (Yes == out->opts->trace) {
762
- oj_trace("raw_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyOut);
763
- }
764
- oj_dump_raw(rb_string_value_ptr((VALUE*)&jv), (size_t)RSTRING_LEN(jv), out);
906
+ #if defined(__clang__) || defined(__GNUC__)
907
+ #define FORCE_INLINE __attribute__((always_inline))
908
+ #else
909
+ #define FORCE_INLINE
910
+ #endif
911
+
912
+ static inline FORCE_INLINE neon_match_result
913
+ neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
914
+ neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
915
+
916
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)str);
917
+ uint8x16_t tmp1 = vqtbl4q_u8(cmap_neon[0], chunk);
918
+ uint8x16_t tmp2 = vqtbl4q_u8(cmap_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
919
+ result.needs_escape = vorrq_u8(tmp1, tmp2);
920
+ if (neon_table_size > 2) {
921
+ uint8x16_t tmp3 = vqtbl4q_u8(cmap_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
922
+ uint8x16_t tmp4 = vqtbl4q_u8(cmap_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
923
+ result.needs_escape = vorrq_u8(result.needs_escape, vorrq_u8(tmp4, tmp3));
924
+ }
925
+ if (has_hi && do_unicode_validation) {
926
+ uint8x16_t has_some_hibit = vandq_u8(chunk, vdupq_n_u8(0x80));
927
+ result.has_some_hibit = vmaxvq_u8(has_some_hibit) != 0;
928
+ result.do_unicode_validation = has_hi && do_unicode_validation && result.has_some_hibit;
765
929
  }
930
+ return result;
766
931
  }
767
932
 
768
- void
769
- oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
770
- size_t size;
771
- char *cmap;
772
- const char *orig = str;
933
+ #endif /* HAVE_SIMD_NEON */
934
+
935
+ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
936
+ size_t size;
937
+ char *cmap;
938
+ #ifdef HAVE_SIMD_NEON
939
+ uint8x16x4_t *cmap_neon = NULL;
940
+ int neon_table_size;
941
+ #endif /* HAVE_SIMD_NEON */
942
+ const char *orig = str;
943
+ bool has_hi = false;
944
+ bool do_unicode_validation = false;
773
945
 
774
946
  switch (out->opts->escape_mode) {
775
947
  case NLEsc:
776
- cmap = newline_friendly_chars;
777
- size = newline_friendly_size((uint8_t*)str, cnt);
778
- break;
948
+ cmap = newline_friendly_chars;
949
+ size = newline_friendly_size((uint8_t *)str, cnt);
950
+ break;
779
951
  case ASCIIEsc:
780
- cmap = ascii_friendly_chars;
781
- size = ascii_friendly_size((uint8_t*)str, cnt);
782
- break;
952
+ cmap = ascii_friendly_chars;
953
+ size = ascii_friendly_size((uint8_t *)str, cnt);
954
+ break;
955
+ case SlashEsc:
956
+ has_hi = true;
957
+ cmap = slash_friendly_chars;
958
+ size = slash_friendly_size((uint8_t *)str, cnt);
959
+ break;
783
960
  case XSSEsc:
784
- cmap = xss_friendly_chars;
785
- size = xss_friendly_size((uint8_t*)str, cnt);
786
- break;
961
+ cmap = xss_friendly_chars;
962
+ size = xss_friendly_size((uint8_t *)str, cnt);
963
+ break;
787
964
  case JXEsc:
788
- cmap = hixss_friendly_chars;
789
- size = hixss_friendly_size((uint8_t*)str, cnt);
790
- break;
791
- case RailsXEsc:
792
- cmap = rails_xss_friendly_chars;
793
- size = rails_xss_friendly_size((uint8_t*)str, cnt);
794
- break;
795
- case RailsEsc:
796
- cmap = rails_friendly_chars;
797
- size = rails_friendly_size((uint8_t*)str, cnt);
798
- break;
965
+ cmap = hixss_friendly_chars;
966
+ size = hixss_friendly_size((uint8_t *)str, cnt);
967
+ do_unicode_validation = true;
968
+ break;
969
+ case RailsXEsc: {
970
+ long sz;
971
+
972
+ cmap = rails_xss_friendly_chars;
973
+ #ifdef HAVE_SIMD_NEON
974
+ cmap_neon = rails_xss_friendly_chars_neon;
975
+ neon_table_size = 4;
976
+ #endif /* HAVE_NEON_SIMD */
977
+ sz = rails_xss_friendly_size((uint8_t *)str, cnt);
978
+ if (sz < 0) {
979
+ has_hi = true;
980
+ size = (size_t)-sz;
981
+ } else {
982
+ size = (size_t)sz;
983
+ }
984
+ do_unicode_validation = true;
985
+ break;
986
+ }
987
+ case RailsEsc: {
988
+ long sz;
989
+ cmap = rails_friendly_chars;
990
+ #ifdef HAVE_SIMD_NEON
991
+ cmap_neon = rails_friendly_chars_neon;
992
+ neon_table_size = 2;
993
+ #endif /* HAVE_NEON_SIMD */
994
+ sz = rails_friendly_size((uint8_t *)str, cnt);
995
+ if (sz < 0) {
996
+ has_hi = true;
997
+ size = (size_t)-sz;
998
+ } else {
999
+ size = (size_t)sz;
1000
+ }
1001
+ do_unicode_validation = true;
1002
+ break;
1003
+ }
799
1004
  case JSONEsc:
800
- default:
801
- cmap = hibit_friendly_chars;
802
- size = hibit_friendly_size((uint8_t*)str, cnt);
1005
+ default: cmap = hibit_friendly_chars;
1006
+ #ifdef HAVE_SIMD_NEON
1007
+ cmap_neon = hibit_friendly_chars_neon;
1008
+ neon_table_size = 2;
1009
+ #endif /* HAVE_NEON_SIMD */
1010
+ size = hibit_friendly_size((uint8_t *)str, cnt);
803
1011
  }
804
1012
  assure_size(out, size + BUFFER_EXTRA);
805
1013
  *out->cur++ = '"';
806
1014
 
807
1015
  if (escape1) {
808
- *out->cur++ = '\\';
809
- *out->cur++ = 'u';
810
- *out->cur++ = '0';
811
- *out->cur++ = '0';
812
- dump_hex((uint8_t)*str, out);
813
- cnt--;
814
- size--;
815
- str++;
816
- is_sym = 0; // just to make sure
817
- }
818
- if (cnt == size) {
819
- if (is_sym) {
820
- *out->cur++ = ':';
821
- }
822
- for (; '\0' != *str; str++) {
823
- *out->cur++ = *str;
824
- }
825
- *out->cur++ = '"';
1016
+ APPEND_CHARS(out->cur, "\\u00", 4);
1017
+ dump_hex((uint8_t)*str, out);
1018
+ cnt--;
1019
+ size--;
1020
+ str++;
1021
+ is_sym = 0; // just to make sure
1022
+ }
1023
+ if (cnt == size && !has_hi) {
1024
+ if (is_sym) {
1025
+ *out->cur++ = ':';
1026
+ }
1027
+ APPEND_CHARS(out->cur, str, cnt);
1028
+ *out->cur++ = '"';
826
1029
  } else {
827
- const char *end = str + cnt;
828
- const char *check_start = str;
829
-
830
- if (is_sym) {
831
- *out->cur++ = ':';
832
- }
833
- for (; str < end; str++) {
834
- switch (cmap[(uint8_t)*str]) {
835
- case '1':
836
- if ((JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && check_start <= str) {
837
- if (0 != (0x80 & (uint8_t)*str)) {
838
- if (0xC0 == (0xC0 & (uint8_t)*str)) {
839
- check_start = check_unicode(str, end, orig);
840
- } else {
841
- raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
842
- }
843
- }
844
- }
845
- *out->cur++ = *str;
846
- break;
847
- case '2':
848
- *out->cur++ = '\\';
849
- switch (*str) {
850
- case '\\': *out->cur++ = '\\'; break;
851
- case '\b': *out->cur++ = 'b'; break;
852
- case '\t': *out->cur++ = 't'; break;
853
- case '\n': *out->cur++ = 'n'; break;
854
- case '\f': *out->cur++ = 'f'; break;
855
- case '\r': *out->cur++ = 'r'; break;
856
- default: *out->cur++ = *str; break;
857
- }
858
- break;
859
- case '3': // Unicode
860
- if (0xe2 == (uint8_t)*str && (JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && 2 <= end - str) {
861
- if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
862
- str = dump_unicode(str, end, out, orig);
863
- } else {
864
- check_start = check_unicode(str, end, orig);
865
- *out->cur++ = *str;
866
- }
867
- break;
868
- }
869
- str = dump_unicode(str, end, out, orig);
870
- break;
871
- case '6': // control characters
872
- if (*(uint8_t*)str < 0x80) {
873
- *out->cur++ = '\\';
874
- *out->cur++ = 'u';
875
- *out->cur++ = '0';
876
- *out->cur++ = '0';
877
- dump_hex((uint8_t)*str, out);
878
- } else {
879
- if (0xe2 == (uint8_t)*str && (JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && 2 <= end - str) {
880
- if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
881
- str = dump_unicode(str, end, out, orig);
882
- } else {
883
- check_start = check_unicode(str, end, orig);
884
- *out->cur++ = *str;
885
- }
886
- break;
887
- }
888
- str = dump_unicode(str, end, out, orig);
889
- }
890
- break;
891
- default:
892
- break; // ignore, should never happen if the table is correct
893
- }
894
- }
895
- *out->cur++ = '"';
896
- }
897
- if ((JXEsc == out->opts->escape_mode || RailsXEsc == out->opts->escape_mode) && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
898
- uint8_t c = (uint8_t)*(str - 1);
899
- int i;
900
- int scnt = (int)(str - orig);
901
-
902
- // Last utf-8 characters must be 0x10xxxxxx. The start must be
903
- // 0x110xxxxx for 2 characters, 0x1110xxxx for 3, and 0x11110xxx for
904
- // 4.
905
- if (0 != (0x40 & c)) {
906
- debug_raise(orig, cnt, __LINE__);
907
- }
908
- for (i = 1; i < (int)scnt && i < 4; i++) {
909
- c = str[-1 - i];
910
- if (0x80 != (0xC0 & c)) {
911
- switch (i) {
912
- case 1:
913
- if (0xC0 != (0xE0 & c)) {
914
- debug_raise(orig, cnt, __LINE__);
915
- }
916
- break;
917
- case 2:
918
- if (0xE0 != (0xF0 & c)) {
919
- debug_raise(orig, cnt, __LINE__);
920
- }
921
- break;
922
- case 3:
923
- if (0xF0 != (0xF8 & c)) {
924
- debug_raise(orig, cnt, __LINE__);
925
- }
926
- break;
927
- default: // can't get here
928
- break;
929
- }
930
- break;
931
- }
932
- }
933
- if (i == (int)scnt || 4 <= i) {
934
- debug_raise(orig, cnt, __LINE__);
935
- }
1030
+ const char *end = str + cnt;
1031
+ const char *check_start = str;
1032
+
1033
+ if (is_sym) {
1034
+ *out->cur++ = ':';
1035
+ }
1036
+ #ifdef HAVE_SIMD_NEON
1037
+ const char *chunk_start;
1038
+ const char *chunk_end;
1039
+ const char *cursor = str;
1040
+ int neon_state = (cmap_neon != NULL) ? 1 : 4;
1041
+ char matches[16];
1042
+ bool do_hi_validation = false;
1043
+ // uint64_t neon_match_mask = 0;
1044
+ #define SEARCH_FLUSH \
1045
+ if (str > cursor) { \
1046
+ APPEND_CHARS(out->cur, cursor, str - cursor); \
1047
+ cursor = str; \
1048
+ }
1049
+
1050
+ loop:
1051
+ #endif /* HAVE_SIMD_NEON */
1052
+ for (; str < end; str++) {
1053
+ char action = 0;
1054
+ #ifdef HAVE_SIMD_NEON
1055
+ /* neon_state:
1056
+ * 1: Scanning for matches. There must be at least
1057
+ sizeof(uint8x16_t) bytes of input data to use SIMD and
1058
+ cmap_neon must be non-null.
1059
+ * 2: Matches have been found. Will set str to the position of the
1060
+ * next match and set the state to 3.
1061
+ * If there are no more matches it will transition to state 1.
1062
+ * 4: Fallback to the scalar algorithm. Not enough data to use
1063
+ * SIMD.
1064
+ */
1065
+ #define NEON_SET_STATE(state) \
1066
+ neon_state = state; \
1067
+ goto loop;
1068
+ #define NEON_RETURN_TO_STATE(state) neon_state = state;
1069
+ switch (neon_state) {
1070
+ case 1: {
1071
+ while (true) {
1072
+ const char *chunk_ptr = NULL;
1073
+ if (str + sizeof(uint8x16_t) <= end) {
1074
+ chunk_ptr = str;
1075
+ chunk_start = str;
1076
+ chunk_end = str + sizeof(uint8x16_t);
1077
+ } else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
1078
+ memset(out->cur, 'A', sizeof(uint8x16_t));
1079
+ memcpy(out->cur, str, (end - str));
1080
+ chunk_ptr = out->cur;
1081
+ chunk_start = str;
1082
+ chunk_end = end;
1083
+ } else {
1084
+ SEARCH_FLUSH;
1085
+ NEON_SET_STATE(4);
1086
+ break; /* Unreachable */
1087
+ }
1088
+ neon_match_result result = neon_update(chunk_ptr,
1089
+ cmap_neon,
1090
+ neon_table_size,
1091
+ do_unicode_validation,
1092
+ has_hi);
1093
+ if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
1094
+ SEARCH_FLUSH;
1095
+ uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
1096
+ do_hi_validation = result.do_unicode_validation;
1097
+ vst1q_u8((unsigned char *)matches, actions);
1098
+ NEON_SET_STATE(2);
1099
+ break; /* Unreachable */
1100
+ }
1101
+ str = chunk_end;
1102
+ }
1103
+ // We must have run out of data to use SIMD. Go to state 4.
1104
+ SEARCH_FLUSH;
1105
+ NEON_SET_STATE(4);
1106
+ } break;
1107
+ case 3:
1108
+ cursor = str;
1109
+ // This fall through is intentional. We return to state 3 after we process
1110
+ // a byte (or multiple). We return to this state to ensure the cursor is
1111
+ // pointing to the correct location. We then resume looking for matches
1112
+ // within the previously processed chunk.
1113
+ case 2:
1114
+ if (str >= chunk_end) {
1115
+ NEON_SET_STATE(1);
1116
+ }
1117
+ if (!do_hi_validation) {
1118
+ long i = str - chunk_start;
1119
+ for (; str < chunk_end; i++) {
1120
+ if ((action = matches[i]) != '1') {
1121
+ break;
1122
+ }
1123
+ *out->cur++ = *str++;
1124
+ }
1125
+ // The loop above may have advanced str and directly output them to out->cur.
1126
+ // Ensure cursor is set appropriately.
1127
+ cursor = str;
1128
+ if (str >= chunk_end) {
1129
+ // We must have advanced past the end... we are done.
1130
+ NEON_SET_STATE(1);
1131
+ }
1132
+ } else {
1133
+ long match_index = str - chunk_start;
1134
+ action = matches[match_index];
1135
+ }
1136
+ NEON_RETURN_TO_STATE(3);
1137
+ break;
1138
+ case 4: action = cmap[(uint8_t)*str];
1139
+ }
1140
+ #undef NEON_SET_STATE
1141
+ #undef NEON_RETURN_TO_STATE
1142
+ #else
1143
+ action = cmap[(uint8_t)*str];
1144
+ #endif /* HAVE_SIMD_NEON */
1145
+ switch (action) {
1146
+ case '1':
1147
+ if (do_unicode_validation && check_start <= str) {
1148
+ if (0 != (0x80 & (uint8_t)*str)) {
1149
+ if (0xC0 == (0xC0 & (uint8_t)*str)) {
1150
+ check_start = check_unicode(str, end, orig);
1151
+ } else {
1152
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
1153
+ }
1154
+ }
1155
+ }
1156
+ *out->cur++ = *str;
1157
+ break;
1158
+ case '2':
1159
+ *out->cur++ = '\\';
1160
+ switch (*str) {
1161
+ case '\\': *out->cur++ = '\\'; break;
1162
+ case '\b': *out->cur++ = 'b'; break;
1163
+ case '\t': *out->cur++ = 't'; break;
1164
+ case '\n': *out->cur++ = 'n'; break;
1165
+ case '\f': *out->cur++ = 'f'; break;
1166
+ case '\r': *out->cur++ = 'r'; break;
1167
+ default: *out->cur++ = *str; break;
1168
+ }
1169
+ break;
1170
+ case '3': // Unicode
1171
+ if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
1172
+ if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
1173
+ str = dump_unicode(str, end, out, orig);
1174
+ } else {
1175
+ check_start = check_unicode(str, end, orig);
1176
+ *out->cur++ = *str;
1177
+ }
1178
+ break;
1179
+ }
1180
+ str = dump_unicode(str, end, out, orig);
1181
+ break;
1182
+ case '6': // control characters
1183
+ if (*(uint8_t *)str < 0x80) {
1184
+ if (0 == (uint8_t)*str && out->opts->dump_opts.omit_null_byte) {
1185
+ break;
1186
+ }
1187
+ APPEND_CHARS(out->cur, "\\u00", 4);
1188
+ dump_hex((uint8_t)*str, out);
1189
+ } else {
1190
+ if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
1191
+ if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
1192
+ str = dump_unicode(str, end, out, orig);
1193
+ } else {
1194
+ check_start = check_unicode(str, end, orig);
1195
+ *out->cur++ = *str;
1196
+ }
1197
+ break;
1198
+ }
1199
+ str = dump_unicode(str, end, out, orig);
1200
+ }
1201
+ break;
1202
+ default: break; // ignore, should never happen if the table is correct
1203
+ }
1204
+ }
1205
+ *out->cur++ = '"';
1206
+ }
1207
+ if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
1208
+ uint8_t c = (uint8_t)*(str - 1);
1209
+ int i;
1210
+ int scnt = (int)(str - orig);
1211
+
1212
+ // Last utf-8 characters must be 0x10xxxxxx. The start must be
1213
+ // 0x110xxxxx for 2 characters, 0x1110xxxx for 3, and 0x11110xxx for
1214
+ // 4.
1215
+ if (0 != (0x40 & c)) {
1216
+ debug_raise(orig, cnt, __LINE__);
1217
+ }
1218
+ for (i = 1; i < (int)scnt && i < 4; i++) {
1219
+ c = str[-1 - i];
1220
+ if (0x80 != (0xC0 & c)) {
1221
+ switch (i) {
1222
+ case 1:
1223
+ if (0xC0 != (0xE0 & c)) {
1224
+ debug_raise(orig, cnt, __LINE__);
1225
+ }
1226
+ break;
1227
+ case 2:
1228
+ if (0xE0 != (0xF0 & c)) {
1229
+ debug_raise(orig, cnt, __LINE__);
1230
+ }
1231
+ break;
1232
+ case 3:
1233
+ if (0xF0 != (0xF8 & c)) {
1234
+ debug_raise(orig, cnt, __LINE__);
1235
+ }
1236
+ break;
1237
+ default: // can't get here
1238
+ break;
1239
+ }
1240
+ break;
1241
+ }
1242
+ }
1243
+ if (i == (int)scnt || 4 <= i) {
1244
+ debug_raise(orig, cnt, __LINE__);
1245
+ }
936
1246
  }
937
1247
  *out->cur = '\0';
938
1248
  }
939
1249
 
940
- void
941
- oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) {
942
- const char *s = rb_class2name(obj);
1250
+ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) {
1251
+ const char *s = rb_class2name(obj);
943
1252
 
944
1253
  oj_dump_cstr(s, strlen(s), 0, 0, out);
945
1254
  }
946
1255
 
947
- void
948
- oj_dump_obj_to_s(VALUE obj, Out out) {
949
- volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0);
1256
+ void oj_dump_obj_to_s(VALUE obj, Out out) {
1257
+ volatile VALUE rstr = oj_safe_string_convert(obj);
950
1258
 
951
- oj_dump_cstr(rb_string_value_ptr((VALUE*)&rstr), (int)RSTRING_LEN(rstr), 0, 0, out);
1259
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
952
1260
  }
953
1261
 
954
- void
955
- oj_dump_raw(const char *str, size_t cnt, Out out) {
1262
+ void oj_dump_raw(const char *str, size_t cnt, Out out) {
956
1263
  assure_size(out, cnt + 10);
957
- memcpy(out->cur, str, cnt);
958
- out->cur += cnt;
1264
+ APPEND_CHARS(out->cur, str, cnt);
959
1265
  *out->cur = '\0';
960
1266
  }
961
1267
 
962
- void
963
- oj_grow_out(Out out, size_t len) {
964
- size_t size = out->end - out->buf;
965
- long pos = out->cur - out->buf;
966
- char *buf = out->buf;
1268
+ void oj_out_init(Out out) {
1269
+ out->buf = out->stack_buffer;
1270
+ out->cur = out->buf;
1271
+ out->end = out->buf + sizeof(out->stack_buffer) - BUFFER_EXTRA;
1272
+ out->allocated = false;
1273
+ }
1274
+
1275
+ void oj_out_free(Out out) {
1276
+ if (out->allocated) {
1277
+ OJ_R_FREE(out->buf); // TBD
1278
+ }
1279
+ }
1280
+
1281
+ void oj_grow_out(Out out, size_t len) {
1282
+ size_t size = out->end - out->buf;
1283
+ long pos = out->cur - out->buf;
1284
+ char *buf = out->buf;
967
1285
 
968
1286
  size *= 2;
969
1287
  if (size <= len * 2 + pos) {
970
- size += len;
1288
+ size += len;
971
1289
  }
972
1290
  if (out->allocated) {
973
- REALLOC_N(buf, char, (size + BUFFER_EXTRA));
1291
+ OJ_R_REALLOC_N(buf, char, (size + BUFFER_EXTRA));
974
1292
  } else {
975
- buf = ALLOC_N(char, (size + BUFFER_EXTRA));
976
- out->allocated = true;
977
- memcpy(buf, out->buf, out->end - out->buf + BUFFER_EXTRA);
1293
+ buf = OJ_R_ALLOC_N(char, (size + BUFFER_EXTRA));
1294
+ out->allocated = true;
1295
+ memcpy(buf, out->buf, out->end - out->buf + BUFFER_EXTRA);
978
1296
  }
979
1297
  if (0 == buf) {
980
- rb_raise(rb_eNoMemError, "Failed to create string. [%d:%s]", ENOSPC, strerror(ENOSPC));
1298
+ rb_raise(rb_eNoMemError, "Failed to create string. [%d:%s]", ENOSPC, strerror(ENOSPC));
981
1299
  }
982
1300
  out->buf = buf;
983
1301
  out->end = buf + size;
984
1302
  out->cur = out->buf + pos;
985
1303
  }
986
1304
 
987
- void
988
- oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok) {
1305
+ void oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok) {
989
1306
  assure_size(out, 4);
990
- *out->cur++ = 'n';
991
- *out->cur++ = 'u';
992
- *out->cur++ = 'l';
993
- *out->cur++ = 'l';
1307
+ APPEND_CHARS(out->cur, "null", 4);
994
1308
  *out->cur = '\0';
995
1309
  }
996
1310
 
997
- void
998
- oj_dump_true(VALUE obj, int depth, Out out, bool as_ok) {
1311
+ void oj_dump_true(VALUE obj, int depth, Out out, bool as_ok) {
999
1312
  assure_size(out, 4);
1000
- *out->cur++ = 't';
1001
- *out->cur++ = 'r';
1002
- *out->cur++ = 'u';
1003
- *out->cur++ = 'e';
1313
+ APPEND_CHARS(out->cur, "true", 4);
1004
1314
  *out->cur = '\0';
1005
1315
  }
1006
1316
 
1007
- void
1008
- oj_dump_false(VALUE obj, int depth, Out out, bool as_ok) {
1317
+ void oj_dump_false(VALUE obj, int depth, Out out, bool as_ok) {
1009
1318
  assure_size(out, 5);
1010
- *out->cur++ = 'f';
1011
- *out->cur++ = 'a';
1012
- *out->cur++ = 'l';
1013
- *out->cur++ = 's';
1014
- *out->cur++ = 'e';
1319
+ APPEND_CHARS(out->cur, "false", 5);
1015
1320
  *out->cur = '\0';
1016
1321
  }
1017
1322
 
1018
- void
1019
- oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) {
1020
- char buf[32];
1021
- char *b = buf + sizeof(buf) - 1;
1022
- long long num = rb_num2ll(obj);
1023
- int neg = 0;
1024
- bool dump_as_string = false;
1323
+ static const char digits_table[] = "\
1324
+ 00010203040506070809\
1325
+ 10111213141516171819\
1326
+ 20212223242526272829\
1327
+ 30313233343536373839\
1328
+ 40414243444546474849\
1329
+ 50515253545556575859\
1330
+ 60616263646566676869\
1331
+ 70717273747576777879\
1332
+ 80818283848586878889\
1333
+ 90919293949596979899";
1334
+
1335
+ char *oj_longlong_to_string(long long num, bool negative, char *buf) {
1336
+ while (100 <= num) {
1337
+ unsigned idx = num % 100 * 2;
1338
+ *buf-- = digits_table[idx + 1];
1339
+ *buf-- = digits_table[idx];
1340
+ num /= 100;
1341
+ }
1342
+ if (num < 10) {
1343
+ *buf-- = num + '0';
1344
+ } else {
1345
+ *buf-- = digits_table[num * 2 + 1];
1346
+ *buf-- = digits_table[num * 2];
1347
+ }
1348
+
1349
+ if (negative) {
1350
+ *buf = '-';
1351
+ } else {
1352
+ buf++;
1353
+ }
1354
+ return buf;
1355
+ }
1025
1356
 
1026
- if (out->opts->integer_range_max != 0 && out->opts->integer_range_min != 0 &&
1027
- (out->opts->integer_range_max < num || out->opts->integer_range_min > num)) {
1028
- dump_as_string = true;
1357
+ void oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) {
1358
+ char buf[32];
1359
+ char *b = buf + sizeof(buf) - 1;
1360
+ long long num = NUM2LL(obj);
1361
+ bool neg = false;
1362
+ size_t cnt = 0;
1363
+ bool dump_as_string = false;
1364
+
1365
+ if (out->opts->int_range_max != 0 && out->opts->int_range_min != 0 &&
1366
+ (out->opts->int_range_max < num || out->opts->int_range_min > num)) {
1367
+ dump_as_string = true;
1029
1368
  }
1030
1369
  if (0 > num) {
1031
- neg = 1;
1032
- num = -num;
1370
+ neg = true;
1371
+ num = -num;
1033
1372
  }
1034
1373
  *b-- = '\0';
1035
1374
 
1036
1375
  if (dump_as_string) {
1037
- *b-- = '"';
1376
+ *b-- = '"';
1038
1377
  }
1039
1378
  if (0 < num) {
1040
- for (; 0 < num; num /= 10, b--) {
1041
- *b = (num % 10) + '0';
1042
- }
1043
- if (neg) {
1044
- *b = '-';
1045
- } else {
1046
- b++;
1047
- }
1379
+ b = oj_longlong_to_string(num, neg, b);
1048
1380
  } else {
1049
- *b = '0';
1381
+ *b = '0';
1050
1382
  }
1051
1383
  if (dump_as_string) {
1052
- *--b = '"';
1053
- }
1054
- assure_size(out, (sizeof(buf) - (b - buf)));
1055
- for (; '\0' != *b; b++) {
1056
- *out->cur++ = *b;
1384
+ *--b = '"';
1057
1385
  }
1386
+ cnt = sizeof(buf) - (b - buf) - 1;
1387
+ assure_size(out, cnt);
1388
+ APPEND_CHARS(out->cur, b, cnt);
1058
1389
  *out->cur = '\0';
1059
1390
  }
1060
1391
 
1061
- void
1062
- oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
1063
- volatile VALUE rs = rb_big2str(obj, 10);
1064
- int cnt = (int)RSTRING_LEN(rs);
1065
- bool dump_as_string = false;
1066
-
1067
- if (out->opts->integer_range_max != 0 || out->opts->integer_range_min != 0) { // Bignum cannot be inside of Fixnum range
1068
- dump_as_string = true;
1069
- assure_size(out, cnt + 2);
1070
- *out->cur++ = '"';
1071
- } else {
1072
- assure_size(out, cnt);
1073
- }
1074
-
1075
- memcpy(out->cur, rb_string_value_ptr((VALUE*)&rs), cnt);
1076
- out->cur += cnt;
1077
-
1078
- if(dump_as_string) {
1079
- *out->cur++ = '"';
1080
- }
1392
+ void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
1393
+ volatile VALUE rs = rb_big2str(obj, 10);
1394
+ size_t cnt = RSTRING_LEN(rs);
1395
+ bool dump_as_string = false;
1081
1396
 
1397
+ if (out->opts->int_range_max != 0 || out->opts->int_range_min != 0) { // Bignum cannot be inside of Fixnum range
1398
+ dump_as_string = true;
1399
+ assure_size(out, cnt + 2);
1400
+ *out->cur++ = '"';
1401
+ } else {
1402
+ assure_size(out, cnt);
1403
+ }
1404
+ APPEND_CHARS(out->cur, RSTRING_PTR(rs), cnt);
1405
+ if (dump_as_string) {
1406
+ *out->cur++ = '"';
1407
+ }
1082
1408
  *out->cur = '\0';
1083
1409
  }
1084
1410
 
1085
1411
  // Removed dependencies on math due to problems with CentOS 5.4.
1086
- void
1087
- oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1088
- char buf[64];
1089
- char *b;
1090
- double d = rb_num2dbl(obj);
1091
- int cnt = 0;
1412
+ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1413
+ char buf[64];
1414
+ char *b;
1415
+ double d = rb_num2dbl(obj);
1416
+ size_t cnt = 0;
1092
1417
 
1093
1418
  if (0.0 == d) {
1094
- b = buf;
1095
- *b++ = '0';
1096
- *b++ = '.';
1097
- *b++ = '0';
1098
- *b++ = '\0';
1099
- cnt = 3;
1419
+ b = buf;
1420
+ *b++ = '0';
1421
+ *b++ = '.';
1422
+ *b++ = '0';
1423
+ *b++ = '\0';
1424
+ cnt = 3;
1100
1425
  } else if (OJ_INFINITY == d) {
1101
- if (ObjectMode == out->opts->mode) {
1102
- strcpy(buf, inf_val);
1103
- cnt = sizeof(inf_val) - 1;
1104
- } else {
1105
- NanDump nd = out->opts->dump_opts.nan_dump;
1106
-
1107
- if (AutoNan == nd) {
1108
- switch (out->opts->mode) {
1109
- case CompatMode: nd = WordNan; break;
1110
- case StrictMode: nd = RaiseNan; break;
1111
- case NullMode: nd = NullNan; break;
1112
- case CustomMode: nd = NullNan; break;
1113
- default: break;
1114
- }
1115
- }
1116
- switch (nd) {
1117
- case RaiseNan:
1118
- raise_strict(obj);
1119
- break;
1120
- case WordNan:
1121
- strcpy(buf, "Infinity");
1122
- cnt = 8;
1123
- break;
1124
- case NullNan:
1125
- strcpy(buf, "null");
1126
- cnt = 4;
1127
- break;
1128
- case HugeNan:
1129
- default:
1130
- strcpy(buf, inf_val);
1131
- cnt = sizeof(inf_val) - 1;
1132
- break;
1133
- }
1134
- }
1426
+ if (ObjectMode == out->opts->mode) {
1427
+ strcpy(buf, inf_val);
1428
+ cnt = sizeof(inf_val) - 1;
1429
+ } else {
1430
+ NanDump nd = out->opts->dump_opts.nan_dump;
1431
+
1432
+ if (AutoNan == nd) {
1433
+ switch (out->opts->mode) {
1434
+ case CompatMode: nd = WordNan; break;
1435
+ case StrictMode: nd = RaiseNan; break;
1436
+ case NullMode: nd = NullNan; break;
1437
+ case CustomMode: nd = NullNan; break;
1438
+ default: break;
1439
+ }
1440
+ }
1441
+ switch (nd) {
1442
+ case RaiseNan: raise_strict(obj); break;
1443
+ case WordNan:
1444
+ strcpy(buf, "Infinity");
1445
+ cnt = 8;
1446
+ break;
1447
+ case NullNan:
1448
+ strcpy(buf, "null");
1449
+ cnt = 4;
1450
+ break;
1451
+ case HugeNan:
1452
+ default:
1453
+ strcpy(buf, inf_val);
1454
+ cnt = sizeof(inf_val) - 1;
1455
+ break;
1456
+ }
1457
+ }
1135
1458
  } else if (-OJ_INFINITY == d) {
1136
- if (ObjectMode == out->opts->mode) {
1137
- strcpy(buf, ninf_val);
1138
- cnt = sizeof(ninf_val) - 1;
1139
- } else {
1140
- NanDump nd = out->opts->dump_opts.nan_dump;
1141
-
1142
- if (AutoNan == nd) {
1143
- switch (out->opts->mode) {
1144
- case CompatMode: nd = WordNan; break;
1145
- case StrictMode: nd = RaiseNan; break;
1146
- case NullMode: nd = NullNan; break;
1147
- default: break;
1148
- }
1149
- }
1150
- switch (nd) {
1151
- case RaiseNan:
1152
- raise_strict(obj);
1153
- break;
1154
- case WordNan:
1155
- strcpy(buf, "-Infinity");
1156
- cnt = 9;
1157
- break;
1158
- case NullNan:
1159
- strcpy(buf, "null");
1160
- cnt = 4;
1161
- break;
1162
- case HugeNan:
1163
- default:
1164
- strcpy(buf, ninf_val);
1165
- cnt = sizeof(ninf_val) - 1;
1166
- break;
1167
- }
1168
- }
1459
+ if (ObjectMode == out->opts->mode) {
1460
+ strcpy(buf, ninf_val);
1461
+ cnt = sizeof(ninf_val) - 1;
1462
+ } else {
1463
+ NanDump nd = out->opts->dump_opts.nan_dump;
1464
+
1465
+ if (AutoNan == nd) {
1466
+ switch (out->opts->mode) {
1467
+ case CompatMode: nd = WordNan; break;
1468
+ case StrictMode: nd = RaiseNan; break;
1469
+ case NullMode: nd = NullNan; break;
1470
+ default: break;
1471
+ }
1472
+ }
1473
+ switch (nd) {
1474
+ case RaiseNan: raise_strict(obj); break;
1475
+ case WordNan:
1476
+ strcpy(buf, "-Infinity");
1477
+ cnt = 9;
1478
+ break;
1479
+ case NullNan:
1480
+ strcpy(buf, "null");
1481
+ cnt = 4;
1482
+ break;
1483
+ case HugeNan:
1484
+ default:
1485
+ strcpy(buf, ninf_val);
1486
+ cnt = sizeof(ninf_val) - 1;
1487
+ break;
1488
+ }
1489
+ }
1169
1490
  } else if (isnan(d)) {
1170
- if (ObjectMode == out->opts->mode) {
1171
- strcpy(buf, nan_val);
1172
- cnt = sizeof(ninf_val) - 1;
1173
- } else {
1174
- NanDump nd = out->opts->dump_opts.nan_dump;
1175
-
1176
- if (AutoNan == nd) {
1177
- switch (out->opts->mode) {
1178
- case ObjectMode: nd = HugeNan; break;
1179
- case StrictMode: nd = RaiseNan; break;
1180
- case NullMode: nd = NullNan; break;
1181
- default: break;
1182
- }
1183
- }
1184
- switch (nd) {
1185
- case RaiseNan:
1186
- raise_strict(obj);
1187
- break;
1188
- case WordNan:
1189
- strcpy(buf, "NaN");
1190
- cnt = 3;
1191
- break;
1192
- case NullNan:
1193
- strcpy(buf, "null");
1194
- cnt = 4;
1195
- break;
1196
- case HugeNan:
1197
- default:
1198
- strcpy(buf, nan_val);
1199
- cnt = sizeof(nan_val) - 1;
1200
- break;
1201
- }
1202
- }
1491
+ if (ObjectMode == out->opts->mode) {
1492
+ strcpy(buf, nan_val);
1493
+ cnt = sizeof(nan_val) - 1;
1494
+ } else {
1495
+ NanDump nd = out->opts->dump_opts.nan_dump;
1496
+
1497
+ if (AutoNan == nd) {
1498
+ switch (out->opts->mode) {
1499
+ case ObjectMode: nd = HugeNan; break;
1500
+ case StrictMode: nd = RaiseNan; break;
1501
+ case NullMode: nd = NullNan; break;
1502
+ default: break;
1503
+ }
1504
+ }
1505
+ switch (nd) {
1506
+ case RaiseNan: raise_strict(obj); break;
1507
+ case WordNan:
1508
+ strcpy(buf, "NaN");
1509
+ cnt = 3;
1510
+ break;
1511
+ case NullNan:
1512
+ strcpy(buf, "null");
1513
+ cnt = 4;
1514
+ break;
1515
+ case HugeNan:
1516
+ default:
1517
+ strcpy(buf, nan_val);
1518
+ cnt = sizeof(nan_val) - 1;
1519
+ break;
1520
+ }
1521
+ }
1203
1522
  } else if (d == (double)(long long int)d) {
1204
- cnt = snprintf(buf, sizeof(buf), "%.1f", d);
1523
+ cnt = snprintf(buf, sizeof(buf), "%.1f", d);
1205
1524
  } else if (0 == out->opts->float_prec) {
1206
- volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0);
1207
-
1208
- cnt = (int)RSTRING_LEN(rstr);
1209
- if ((int)sizeof(buf) <= cnt) {
1210
- cnt = sizeof(buf) - 1;
1211
- }
1212
- strncpy(buf, rb_string_value_ptr((VALUE*)&rstr), cnt);
1213
- buf[cnt] = '\0';
1525
+ volatile VALUE rstr = oj_safe_string_convert(obj);
1526
+
1527
+ cnt = RSTRING_LEN(rstr);
1528
+ if ((int)sizeof(buf) <= cnt) {
1529
+ cnt = sizeof(buf) - 1;
1530
+ }
1531
+ memcpy(buf, RSTRING_PTR(rstr), cnt);
1532
+ buf[cnt] = '\0';
1214
1533
  } else {
1215
- cnt = oj_dump_float_printf(buf, sizeof(buf), obj, d, out->opts->float_fmt);
1534
+ cnt = oj_dump_float_printf(buf, sizeof(buf), obj, d, out->opts->float_fmt);
1216
1535
  }
1217
1536
  assure_size(out, cnt);
1218
- for (b = buf; '\0' != *b; b++) {
1219
- *out->cur++ = *b;
1220
- }
1537
+ APPEND_CHARS(out->cur, buf, cnt);
1221
1538
  *out->cur = '\0';
1222
1539
  }
1223
1540
 
1224
- int
1225
- oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1226
- int cnt = snprintf(buf, blen, format, d);
1541
+ size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1542
+ size_t cnt = snprintf(buf, blen, format, d);
1227
1543
 
1228
1544
  // Round off issues at 16 significant digits so check for obvious ones of
1229
1545
  // 0001 and 9999.
1230
1546
  if (17 <= cnt && (0 == strcmp("0001", buf + cnt - 4) || 0 == strcmp("9999", buf + cnt - 4))) {
1231
- volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0);
1547
+ volatile VALUE rstr = oj_safe_string_convert(obj);
1232
1548
 
1233
- strcpy(buf, rb_string_value_ptr((VALUE*)&rstr));
1234
- cnt = (int)RSTRING_LEN(rstr);
1549
+ strcpy(buf, RSTRING_PTR(rstr));
1550
+ cnt = RSTRING_LEN(rstr);
1235
1551
  }
1236
1552
  return cnt;
1237
1553
  }
1238
-
1239
- bool
1240
- oj_dump_ignore(Options opts, VALUE obj) {
1241
- if (NULL != opts->ignore && (ObjectMode == opts->mode || CustomMode == opts->mode)) {
1242
- VALUE *vp = opts->ignore;
1243
- VALUE clas = rb_obj_class(obj);
1244
-
1245
- for (; Qnil != *vp; vp++) {
1246
- if (clas == *vp) {
1247
- return true;
1248
- }
1249
- }
1250
- }
1251
- return false;
1252
- }