oj 3.16.9 → 3.16.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/oj/custom.c +10 -9
- data/ext/oj/dump.c +319 -20
- data/ext/oj/dump.h +7 -2
- data/ext/oj/dump_compat.c +9 -8
- data/ext/oj/dump_leaf.c +1 -1
- data/ext/oj/dump_object.c +27 -17
- data/ext/oj/dump_strict.c +7 -6
- data/ext/oj/fast.c +4 -7
- data/ext/oj/mimic_json.c +3 -6
- data/ext/oj/object.c +8 -8
- data/ext/oj/oj.c +12 -9
- data/ext/oj/parse.c +15 -5
- data/ext/oj/parser.c +1 -1
- data/ext/oj/parser.h +2 -0
- data/ext/oj/rails.c +20 -19
- data/ext/oj/saj.c +3 -6
- data/ext/oj/scp.c +3 -6
- data/ext/oj/simd.h +10 -0
- data/ext/oj/stream_writer.c +1 -7
- data/ext/oj/strict.c +2 -4
- data/ext/oj/string_writer.c +1 -3
- data/ext/oj/wab.c +4 -3
- data/lib/oj/version.rb +1 -1
- data/pages/Encoding.md +1 -1
- metadata +4 -98
- data/test/_test_active.rb +0 -75
- data/test/_test_active_mimic.rb +0 -95
- data/test/_test_mimic_rails.rb +0 -123
- data/test/activerecord/result_test.rb +0 -31
- data/test/activesupport6/abstract_unit.rb +0 -44
- data/test/activesupport6/decoding_test.rb +0 -133
- data/test/activesupport6/encoding_test.rb +0 -542
- data/test/activesupport6/encoding_test_cases.rb +0 -98
- data/test/activesupport6/test_common.rb +0 -17
- data/test/activesupport6/test_helper.rb +0 -163
- data/test/activesupport6/time_zone_test_helpers.rb +0 -39
- data/test/activesupport7/abstract_unit.rb +0 -52
- data/test/activesupport7/decoding_test.rb +0 -125
- data/test/activesupport7/encoding_test.rb +0 -536
- data/test/activesupport7/encoding_test_cases.rb +0 -104
- data/test/activesupport7/time_zone_test_helpers.rb +0 -47
- data/test/files.rb +0 -29
- data/test/foo.rb +0 -26
- data/test/helper.rb +0 -39
- data/test/isolated/shared.rb +0 -309
- data/test/isolated/test_mimic_after.rb +0 -13
- data/test/isolated/test_mimic_alone.rb +0 -12
- data/test/isolated/test_mimic_as_json.rb +0 -45
- data/test/isolated/test_mimic_before.rb +0 -13
- data/test/isolated/test_mimic_define.rb +0 -28
- data/test/isolated/test_mimic_rails_after.rb +0 -22
- data/test/isolated/test_mimic_rails_before.rb +0 -21
- data/test/isolated/test_mimic_redefine.rb +0 -15
- data/test/json_gem/json_addition_test.rb +0 -216
- data/test/json_gem/json_common_interface_test.rb +0 -155
- data/test/json_gem/json_encoding_test.rb +0 -107
- data/test/json_gem/json_ext_parser_test.rb +0 -21
- data/test/json_gem/json_fixtures_test.rb +0 -36
- data/test/json_gem/json_generator_test.rb +0 -413
- data/test/json_gem/json_generic_object_test.rb +0 -90
- data/test/json_gem/json_parser_test.rb +0 -477
- data/test/json_gem/json_string_matching_test.rb +0 -42
- data/test/json_gem/test_helper.rb +0 -30
- data/test/mem.rb +0 -34
- data/test/perf.rb +0 -102
- data/test/perf_compat.rb +0 -128
- data/test/perf_dump.rb +0 -50
- data/test/perf_fast.rb +0 -162
- data/test/perf_file.rb +0 -62
- data/test/perf_object.rb +0 -134
- data/test/perf_once.rb +0 -59
- data/test/perf_parser.rb +0 -183
- data/test/perf_saj.rb +0 -101
- data/test/perf_scp.rb +0 -140
- data/test/perf_simple.rb +0 -289
- data/test/perf_strict.rb +0 -137
- data/test/perf_wab.rb +0 -129
- data/test/prec.rb +0 -23
- data/test/sample/change.rb +0 -13
- data/test/sample/dir.rb +0 -18
- data/test/sample/doc.rb +0 -35
- data/test/sample/file.rb +0 -47
- data/test/sample/group.rb +0 -15
- data/test/sample/hasprops.rb +0 -15
- data/test/sample/layer.rb +0 -11
- data/test/sample/line.rb +0 -20
- data/test/sample/oval.rb +0 -10
- data/test/sample/rect.rb +0 -9
- data/test/sample/shape.rb +0 -34
- data/test/sample/text.rb +0 -19
- data/test/sample.rb +0 -54
- data/test/sample_json.rb +0 -37
- data/test/test_compat.rb +0 -567
- data/test/test_custom.rb +0 -555
- data/test/test_debian.rb +0 -50
- data/test/test_fast.rb +0 -526
- data/test/test_file.rb +0 -250
- data/test/test_gc.rb +0 -60
- data/test/test_generate.rb +0 -21
- data/test/test_hash.rb +0 -39
- data/test/test_integer_range.rb +0 -72
- data/test/test_null.rb +0 -376
- data/test/test_object.rb +0 -1030
- data/test/test_parser.rb +0 -11
- data/test/test_parser_debug.rb +0 -27
- data/test/test_parser_saj.rb +0 -337
- data/test/test_parser_usual.rb +0 -255
- data/test/test_rails.rb +0 -35
- data/test/test_saj.rb +0 -188
- data/test/test_scp.rb +0 -431
- data/test/test_strict.rb +0 -441
- data/test/test_various.rb +0 -801
- data/test/test_wab.rb +0 -311
- data/test/test_writer.rb +0 -396
- data/test/tests.rb +0 -33
- data/test/tests_mimic.rb +0 -23
- data/test/tests_mimic_addition.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30aea721380a4e3edc306dd19906d8777f230a639ba3427e9394dd543a3a7e3b
|
4
|
+
data.tar.gz: b024a9d4513c16c1bfe4fc3c4adeacb1afd9a0e670987476d32cfa2fa74e9b1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 527ea1162cb135bbe16eefc10a7cb05444182767aca6fa0b6986622e52d7082bcec020c43e663251406c81602018f7d0842c2c5cee37aeca0269560e502d99dd
|
7
|
+
data.tar.gz: e49e9f63e373cb0ec21f604f97899f87815b86ef5a5eafad30e7bddbd11e71156f92beaa1259c83609c2d45d2a8aac87c8b27e3e266fcc2bd99a1908327c796d
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 3.16.11 - 2025-05-29
|
4
|
+
|
5
|
+
- Fixed range encoding with the :circular option
|
6
|
+
|
7
|
+
## 3.16.10 - 2025-02-24
|
8
|
+
|
9
|
+
- Changed oj_parser_type to be non-static.
|
10
|
+
|
11
|
+
- Changed ARM versions to used Neon instructions thanks to @samyron.
|
12
|
+
|
3
13
|
## 3.16.9 - 2024-12-28
|
4
14
|
|
5
15
|
- Fixed `Oj::Parser` create_id size issue #931.
|
@@ -64,6 +74,7 @@
|
|
64
74
|
## 3.14.3 - 2023-04-07
|
65
75
|
|
66
76
|
- Fixed compat parse with optimized Hash when parsing a JSON::GenericObject.
|
77
|
+
- Deprecated Ruby <= 2.6.10
|
67
78
|
|
68
79
|
## 3.14.2 - 2023-02-10
|
69
80
|
|
data/ext/oj/custom.c
CHANGED
@@ -40,7 +40,7 @@ static void dump_obj_as_str(VALUE obj, int depth, Out out) {
|
|
40
40
|
static void bigdecimal_dump(VALUE obj, int depth, Out out) {
|
41
41
|
volatile VALUE rstr = oj_safe_string_convert(obj);
|
42
42
|
const char *str = RSTRING_PTR(rstr);
|
43
|
-
|
43
|
+
size_t len = RSTRING_LEN(rstr);
|
44
44
|
|
45
45
|
if (0 == strcasecmp("Infinity", str)) {
|
46
46
|
str = oj_nan_str(obj, out->opts->dump_opts.nan_dump, out->opts->mode, true, &len);
|
@@ -123,7 +123,7 @@ static void date_dump(VALUE obj, int depth, Out out) {
|
|
123
123
|
case RubyTime:
|
124
124
|
case XmlTime:
|
125
125
|
v = rb_funcall(obj, rb_intern("iso8601"), 0);
|
126
|
-
oj_dump_cstr(RSTRING_PTR(v),
|
126
|
+
oj_dump_cstr(RSTRING_PTR(v), RSTRING_LEN(v), 0, 0, out);
|
127
127
|
break;
|
128
128
|
case UnixZTime:
|
129
129
|
v = rb_funcall(obj, rb_intern("to_time"), 0);
|
@@ -405,7 +405,7 @@ static void dump_odd(VALUE obj, Odd odd, VALUE clas, int depth, Out out) {
|
|
405
405
|
rb_raise(rb_eEncodingError, "Invalid type for raw JSON.\n");
|
406
406
|
} else {
|
407
407
|
const char *s = RSTRING_PTR(v);
|
408
|
-
|
408
|
+
size_t len = RSTRING_LEN(v);
|
409
409
|
const char *name = rb_id2name(*odd->attrs);
|
410
410
|
size_t nlen = strlen(name);
|
411
411
|
|
@@ -478,7 +478,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) {
|
|
478
478
|
} else if (Yes == out->opts->to_json && rb_respond_to(obj, oj_to_json_id)) {
|
479
479
|
volatile VALUE rs;
|
480
480
|
const char *s;
|
481
|
-
|
481
|
+
size_t len;
|
482
482
|
|
483
483
|
TRACE(out->opts->trace, "to_json", obj, depth + 1, TraceRubyIn);
|
484
484
|
if (0 == rb_obj_method_arity(obj, oj_to_json_id)) {
|
@@ -488,7 +488,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) {
|
|
488
488
|
}
|
489
489
|
TRACE(out->opts->trace, "to_json", obj, depth + 1, TraceRubyOut);
|
490
490
|
s = RSTRING_PTR(rs);
|
491
|
-
len =
|
491
|
+
len = RSTRING_LEN(rs);
|
492
492
|
|
493
493
|
assure_size(out, len + 1);
|
494
494
|
APPEND_CHARS(out->cur, s, len);
|
@@ -509,7 +509,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) {
|
|
509
509
|
if (aj == obj) {
|
510
510
|
volatile VALUE rstr = oj_safe_string_convert(obj);
|
511
511
|
|
512
|
-
oj_dump_cstr(RSTRING_PTR(rstr),
|
512
|
+
oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), false, false, out);
|
513
513
|
} else {
|
514
514
|
oj_dump_custom_val(aj, depth, out, true);
|
515
515
|
}
|
@@ -676,7 +676,8 @@ static void dump_obj(VALUE obj, int depth, Out out, bool as_ok) {
|
|
676
676
|
|
677
677
|
static void dump_array(VALUE a, int depth, Out out, bool as_ok) {
|
678
678
|
size_t size;
|
679
|
-
|
679
|
+
size_t i;
|
680
|
+
size_t cnt;
|
680
681
|
int d2 = depth + 1;
|
681
682
|
long id = oj_check_circular(a, out);
|
682
683
|
|
@@ -684,7 +685,7 @@ static void dump_array(VALUE a, int depth, Out out, bool as_ok) {
|
|
684
685
|
oj_dump_nil(Qnil, depth, out, false);
|
685
686
|
return;
|
686
687
|
}
|
687
|
-
cnt =
|
688
|
+
cnt = RARRAY_LEN(a);
|
688
689
|
*out->cur++ = '[';
|
689
690
|
assure_size(out, 2);
|
690
691
|
if (0 == cnt) {
|
@@ -795,7 +796,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) {
|
|
795
796
|
volatile VALUE s = rb_sym2str(RARRAY_AREF(ma, i));
|
796
797
|
|
797
798
|
name = RSTRING_PTR(s);
|
798
|
-
len =
|
799
|
+
len = RSTRING_LEN(s);
|
799
800
|
} else {
|
800
801
|
len = snprintf(num_id, sizeof(num_id), "%d", i);
|
801
802
|
name = num_id;
|
data/ext/oj/dump.c
CHANGED
@@ -152,8 +152,77 @@ inline static size_t newline_friendly_size(const uint8_t *str, size_t len) {
|
|
152
152
|
return calculate_string_size(str, len, newline_friendly_chars);
|
153
153
|
}
|
154
154
|
|
155
|
+
#ifdef HAVE_SIMD_NEON
|
156
|
+
inline static uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
157
|
+
uint8x16x4_t tab;
|
158
|
+
tab.val[0] = vld1q_u8(table);
|
159
|
+
tab.val[1] = vld1q_u8(table + 16);
|
160
|
+
tab.val[2] = vld1q_u8(table + 32);
|
161
|
+
tab.val[3] = vld1q_u8(table + 48);
|
162
|
+
return tab;
|
163
|
+
}
|
164
|
+
|
165
|
+
static uint8x16x4_t hibit_friendly_chars_neon[2];
|
166
|
+
static uint8x16x4_t rails_friendly_chars_neon[2];
|
167
|
+
static uint8x16x4_t rails_xss_friendly_chars_neon[4];
|
168
|
+
|
169
|
+
void initialize_neon(void) {
|
170
|
+
// We only need the first 128 bytes of the hibit friendly chars table. Everything above 127 is
|
171
|
+
// set to 1. If that ever changes, the code will need to be updated.
|
172
|
+
hibit_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars);
|
173
|
+
hibit_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars + 64);
|
174
|
+
|
175
|
+
// rails_friendly_chars is the same as hibit_friendly_chars. Only the first 128 bytes have values
|
176
|
+
// that are not '1'. If that ever changes, the code will need to be updated.
|
177
|
+
rails_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_friendly_chars);
|
178
|
+
rails_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_friendly_chars + 64);
|
179
|
+
|
180
|
+
rails_xss_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars);
|
181
|
+
rails_xss_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 64);
|
182
|
+
rails_xss_friendly_chars_neon[2] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 128);
|
183
|
+
rails_xss_friendly_chars_neon[3] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 192);
|
184
|
+
|
185
|
+
// All bytes should be 0 except for those that need more than 1 byte of output. This will allow the
|
186
|
+
// code to limit the lookups to the first 128 bytes (values 0 - 127). Bytes above 127 will result
|
187
|
+
// in 0 with the vqtbl4q_u8 instruction.
|
188
|
+
uint8x16_t one = vdupq_n_u8('1');
|
189
|
+
for (int i = 0; i < 2; i++) {
|
190
|
+
for (int j = 0; j < 4; j++) {
|
191
|
+
hibit_friendly_chars_neon[i].val[j] = vsubq_u8(hibit_friendly_chars_neon[i].val[j], one);
|
192
|
+
rails_friendly_chars_neon[i].val[j] = vsubq_u8(rails_friendly_chars_neon[i].val[j], one);
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
for (int i = 0; i < 4; i++) {
|
197
|
+
for (int j = 0; j < 4; j++) {
|
198
|
+
rails_xss_friendly_chars_neon[i].val[j] = vsubq_u8(rails_xss_friendly_chars_neon[i].val[j], one);
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
#endif
|
203
|
+
|
155
204
|
inline static size_t hibit_friendly_size(const uint8_t *str, size_t len) {
|
205
|
+
#ifdef HAVE_SIMD_NEON
|
206
|
+
size_t size = 0;
|
207
|
+
size_t i = 0;
|
208
|
+
|
209
|
+
for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
|
210
|
+
size += sizeof(uint8x16_t);
|
211
|
+
|
212
|
+
// See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
|
213
|
+
uint8x16_t chunk = vld1q_u8(str);
|
214
|
+
uint8x16_t tmp1 = vqtbl4q_u8(hibit_friendly_chars_neon[0], chunk);
|
215
|
+
uint8x16_t tmp2 = vqtbl4q_u8(hibit_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
|
216
|
+
uint8x16_t result = vorrq_u8(tmp1, tmp2);
|
217
|
+
uint8_t tmp = vaddvq_u8(result);
|
218
|
+
size += tmp;
|
219
|
+
}
|
220
|
+
|
221
|
+
size_t total = size + calculate_string_size(str, len - i, hibit_friendly_chars);
|
222
|
+
return total;
|
223
|
+
#else
|
156
224
|
return calculate_string_size(str, len, hibit_friendly_chars);
|
225
|
+
#endif
|
157
226
|
}
|
158
227
|
|
159
228
|
inline static size_t slash_friendly_size(const uint8_t *str, size_t len) {
|
@@ -184,9 +253,43 @@ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) {
|
|
184
253
|
|
185
254
|
inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
|
186
255
|
long size = 0;
|
187
|
-
size_t i = len;
|
188
256
|
uint8_t hi = 0;
|
189
257
|
|
258
|
+
#ifdef HAVE_SIMD_NEON
|
259
|
+
size_t i = 0;
|
260
|
+
|
261
|
+
uint8x16_t has_some_hibit = vdupq_n_u8(0);
|
262
|
+
uint8x16_t hibit = vdupq_n_u8(0x80);
|
263
|
+
for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
|
264
|
+
size += sizeof(uint8x16_t);
|
265
|
+
|
266
|
+
uint8x16_t chunk = vld1q_u8(str);
|
267
|
+
|
268
|
+
// Check to see if any of these bytes have the high bit set.
|
269
|
+
has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
|
270
|
+
|
271
|
+
uint8x16_t tmp1 = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
|
272
|
+
uint8x16_t tmp2 = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
|
273
|
+
uint8x16_t tmp3 = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
|
274
|
+
uint8x16_t tmp4 = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
|
275
|
+
uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
|
276
|
+
uint8_t tmp = vaddvq_u8(result);
|
277
|
+
size += tmp;
|
278
|
+
}
|
279
|
+
|
280
|
+
// 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
|
281
|
+
hi = vmaxvq_u8(has_some_hibit) != 0;
|
282
|
+
|
283
|
+
for (; i < len; str++, i++) {
|
284
|
+
size += rails_xss_friendly_chars[*str] - '0';
|
285
|
+
hi |= *str & 0x80;
|
286
|
+
}
|
287
|
+
if (0 == hi) {
|
288
|
+
return size;
|
289
|
+
}
|
290
|
+
return -(size);
|
291
|
+
#else
|
292
|
+
size_t i = len;
|
190
293
|
for (; 0 < i; str++, i--) {
|
191
294
|
size += rails_xss_friendly_chars[*str];
|
192
295
|
hi |= *str & 0x80;
|
@@ -195,13 +298,47 @@ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
|
|
195
298
|
return size - len * (size_t)'0';
|
196
299
|
}
|
197
300
|
return -(size - len * (size_t)'0');
|
301
|
+
#endif /* HAVE_SIMD_NEON */
|
198
302
|
}
|
199
303
|
|
200
304
|
inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
|
201
305
|
long size = 0;
|
202
|
-
size_t i = len;
|
203
306
|
uint8_t hi = 0;
|
307
|
+
#ifdef HAVE_SIMD_NEON
|
308
|
+
size_t i = 0;
|
309
|
+
|
310
|
+
uint8x16_t has_some_hibit = vdupq_n_u8(0);
|
311
|
+
uint8x16_t hibit = vdupq_n_u8(0x80);
|
312
|
+
|
313
|
+
for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
|
314
|
+
size += sizeof(uint8x16_t);
|
315
|
+
|
316
|
+
// See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
|
317
|
+
uint8x16_t chunk = vld1q_u8(str);
|
318
|
+
|
319
|
+
// Check to see if any of these bytes have the high bit set.
|
320
|
+
has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
|
204
321
|
|
322
|
+
uint8x16_t tmp1 = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
|
323
|
+
uint8x16_t tmp2 = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
|
324
|
+
uint8x16_t result = vorrq_u8(tmp1, tmp2);
|
325
|
+
uint8_t tmp = vaddvq_u8(result);
|
326
|
+
size += tmp;
|
327
|
+
}
|
328
|
+
|
329
|
+
// 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
|
330
|
+
hi = vmaxvq_u8(has_some_hibit) != 0;
|
331
|
+
|
332
|
+
for (; i < len; str++, i++) {
|
333
|
+
size += rails_friendly_chars[*str] - '0';
|
334
|
+
hi |= *str & 0x80;
|
335
|
+
}
|
336
|
+
if (0 == hi) {
|
337
|
+
return size;
|
338
|
+
}
|
339
|
+
return -(size);
|
340
|
+
#else
|
341
|
+
size_t i = len;
|
205
342
|
for (; 0 < i; str++, i--) {
|
206
343
|
size += rails_friendly_chars[*str];
|
207
344
|
hi |= *str & 0x80;
|
@@ -210,9 +347,10 @@ inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
|
|
210
347
|
return size - len * (size_t)'0';
|
211
348
|
}
|
212
349
|
return -(size - len * (size_t)'0');
|
350
|
+
#endif /* HAVE_SIMD_NEON */
|
213
351
|
}
|
214
352
|
|
215
|
-
const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus,
|
353
|
+
const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp) {
|
216
354
|
const char *str = NULL;
|
217
355
|
|
218
356
|
if (AutoNan == opt) {
|
@@ -477,7 +615,7 @@ void oj_dump_time(VALUE obj, Out out, int withZone) {
|
|
477
615
|
void oj_dump_ruby_time(VALUE obj, Out out) {
|
478
616
|
volatile VALUE rstr = oj_safe_string_convert(obj);
|
479
617
|
|
480
|
-
oj_dump_cstr(RSTRING_PTR(rstr),
|
618
|
+
oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
|
481
619
|
}
|
482
620
|
|
483
621
|
void oj_dump_xml_time(VALUE obj, Out out) {
|
@@ -711,13 +849,13 @@ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) {
|
|
711
849
|
rb_encoding *enc = rb_enc_from_index(idx);
|
712
850
|
obj = rb_str_conv_enc(obj, enc, oj_utf8_encoding);
|
713
851
|
}
|
714
|
-
oj_dump_cstr(RSTRING_PTR(obj),
|
852
|
+
oj_dump_cstr(RSTRING_PTR(obj), RSTRING_LEN(obj), 0, 0, out);
|
715
853
|
}
|
716
854
|
|
717
855
|
void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
|
718
856
|
volatile VALUE s = rb_sym2str(obj);
|
719
857
|
|
720
|
-
oj_dump_cstr(RSTRING_PTR(s),
|
858
|
+
oj_dump_cstr(RSTRING_PTR(s), RSTRING_LEN(s), 0, 0, out);
|
721
859
|
}
|
722
860
|
|
723
861
|
static void debug_raise(const char *orig, size_t cnt, int line) {
|
@@ -758,9 +896,49 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
|
|
758
896
|
}
|
759
897
|
}
|
760
898
|
|
899
|
+
#ifdef HAVE_SIMD_NEON
|
900
|
+
typedef struct _neon_match_result {
|
901
|
+
uint8x16_t needs_escape;
|
902
|
+
bool has_some_hibit;
|
903
|
+
bool do_unicode_validation;
|
904
|
+
} neon_match_result;
|
905
|
+
|
906
|
+
#if defined(__clang__) || defined(__GNUC__)
|
907
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
908
|
+
#else
|
909
|
+
#define FORCE_INLINE
|
910
|
+
#endif
|
911
|
+
|
912
|
+
static inline FORCE_INLINE neon_match_result
|
913
|
+
neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
|
914
|
+
neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
|
915
|
+
|
916
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)str);
|
917
|
+
uint8x16_t tmp1 = vqtbl4q_u8(cmap_neon[0], chunk);
|
918
|
+
uint8x16_t tmp2 = vqtbl4q_u8(cmap_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
|
919
|
+
result.needs_escape = vorrq_u8(tmp1, tmp2);
|
920
|
+
if (neon_table_size > 2) {
|
921
|
+
uint8x16_t tmp3 = vqtbl4q_u8(cmap_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
|
922
|
+
uint8x16_t tmp4 = vqtbl4q_u8(cmap_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
|
923
|
+
result.needs_escape = vorrq_u8(result.needs_escape, vorrq_u8(tmp4, tmp3));
|
924
|
+
}
|
925
|
+
if (has_hi && do_unicode_validation) {
|
926
|
+
uint8x16_t has_some_hibit = vandq_u8(chunk, vdupq_n_u8(0x80));
|
927
|
+
result.has_some_hibit = vmaxvq_u8(has_some_hibit) != 0;
|
928
|
+
result.do_unicode_validation = has_hi && do_unicode_validation && result.has_some_hibit;
|
929
|
+
}
|
930
|
+
return result;
|
931
|
+
}
|
932
|
+
|
933
|
+
#endif /* HAVE_SIMD_NEON */
|
934
|
+
|
761
935
|
void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
|
762
|
-
size_t
|
763
|
-
char
|
936
|
+
size_t size;
|
937
|
+
char *cmap;
|
938
|
+
#ifdef HAVE_SIMD_NEON
|
939
|
+
uint8x16x4_t *cmap_neon = NULL;
|
940
|
+
int neon_table_size;
|
941
|
+
#endif /* HAVE_SIMD_NEON */
|
764
942
|
const char *orig = str;
|
765
943
|
bool has_hi = false;
|
766
944
|
bool do_unicode_validation = false;
|
@@ -792,7 +970,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
|
|
792
970
|
long sz;
|
793
971
|
|
794
972
|
cmap = rails_xss_friendly_chars;
|
795
|
-
|
973
|
+
#ifdef HAVE_SIMD_NEON
|
974
|
+
cmap_neon = rails_xss_friendly_chars_neon;
|
975
|
+
neon_table_size = 4;
|
976
|
+
#endif /* HAVE_NEON_SIMD */
|
977
|
+
sz = rails_xss_friendly_size((uint8_t *)str, cnt);
|
796
978
|
if (sz < 0) {
|
797
979
|
has_hi = true;
|
798
980
|
size = (size_t)-sz;
|
@@ -805,7 +987,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
|
|
805
987
|
case RailsEsc: {
|
806
988
|
long sz;
|
807
989
|
cmap = rails_friendly_chars;
|
808
|
-
|
990
|
+
#ifdef HAVE_SIMD_NEON
|
991
|
+
cmap_neon = rails_friendly_chars_neon;
|
992
|
+
neon_table_size = 2;
|
993
|
+
#endif /* HAVE_NEON_SIMD */
|
994
|
+
sz = rails_friendly_size((uint8_t *)str, cnt);
|
809
995
|
if (sz < 0) {
|
810
996
|
has_hi = true;
|
811
997
|
size = (size_t)-sz;
|
@@ -816,7 +1002,12 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
|
|
816
1002
|
break;
|
817
1003
|
}
|
818
1004
|
case JSONEsc:
|
819
|
-
default: cmap = hibit_friendly_chars;
|
1005
|
+
default: cmap = hibit_friendly_chars;
|
1006
|
+
#ifdef HAVE_SIMD_NEON
|
1007
|
+
cmap_neon = hibit_friendly_chars_neon;
|
1008
|
+
neon_table_size = 2;
|
1009
|
+
#endif /* HAVE_NEON_SIMD */
|
1010
|
+
size = hibit_friendly_size((uint8_t *)str, cnt);
|
820
1011
|
}
|
821
1012
|
assure_size(out, size + BUFFER_EXTRA);
|
822
1013
|
*out->cur++ = '"';
|
@@ -842,8 +1033,116 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
|
|
842
1033
|
if (is_sym) {
|
843
1034
|
*out->cur++ = ':';
|
844
1035
|
}
|
1036
|
+
#ifdef HAVE_SIMD_NEON
|
1037
|
+
const char *chunk_start;
|
1038
|
+
const char *chunk_end;
|
1039
|
+
const char *cursor = str;
|
1040
|
+
int neon_state = (cmap_neon != NULL) ? 1 : 4;
|
1041
|
+
char matches[16];
|
1042
|
+
bool do_hi_validation = false;
|
1043
|
+
// uint64_t neon_match_mask = 0;
|
1044
|
+
#define SEARCH_FLUSH \
|
1045
|
+
if (str > cursor) { \
|
1046
|
+
APPEND_CHARS(out->cur, cursor, str - cursor); \
|
1047
|
+
cursor = str; \
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
loop:
|
1051
|
+
#endif /* HAVE_SIMD_NEON */
|
845
1052
|
for (; str < end; str++) {
|
846
|
-
|
1053
|
+
char action = 0;
|
1054
|
+
#ifdef HAVE_SIMD_NEON
|
1055
|
+
/* neon_state:
|
1056
|
+
* 1: Scanning for matches. There must be at least
|
1057
|
+
sizeof(uint8x16_t) bytes of input data to use SIMD and
|
1058
|
+
cmap_neon must be non-null.
|
1059
|
+
* 2: Matches have been found. Will set str to the position of the
|
1060
|
+
* next match and set the state to 3.
|
1061
|
+
* If there are no more matches it will transition to state 1.
|
1062
|
+
* 4: Fallback to the scalar algorithm. Not enough data to use
|
1063
|
+
* SIMD.
|
1064
|
+
*/
|
1065
|
+
#define NEON_SET_STATE(state) \
|
1066
|
+
neon_state = state; \
|
1067
|
+
goto loop;
|
1068
|
+
#define NEON_RETURN_TO_STATE(state) neon_state = state;
|
1069
|
+
switch (neon_state) {
|
1070
|
+
case 1: {
|
1071
|
+
while (true) {
|
1072
|
+
const char *chunk_ptr = NULL;
|
1073
|
+
if (str + sizeof(uint8x16_t) <= end) {
|
1074
|
+
chunk_ptr = str;
|
1075
|
+
chunk_start = str;
|
1076
|
+
chunk_end = str + sizeof(uint8x16_t);
|
1077
|
+
} else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
|
1078
|
+
memset(out->cur, 'A', sizeof(uint8x16_t));
|
1079
|
+
memcpy(out->cur, str, (end - str));
|
1080
|
+
chunk_ptr = out->cur;
|
1081
|
+
chunk_start = str;
|
1082
|
+
chunk_end = end;
|
1083
|
+
} else {
|
1084
|
+
SEARCH_FLUSH;
|
1085
|
+
NEON_SET_STATE(4);
|
1086
|
+
break; /* Unreachable */
|
1087
|
+
}
|
1088
|
+
neon_match_result result = neon_update(chunk_ptr,
|
1089
|
+
cmap_neon,
|
1090
|
+
neon_table_size,
|
1091
|
+
do_unicode_validation,
|
1092
|
+
has_hi);
|
1093
|
+
if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
|
1094
|
+
SEARCH_FLUSH;
|
1095
|
+
uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
|
1096
|
+
do_hi_validation = result.do_unicode_validation;
|
1097
|
+
vst1q_u8((unsigned char *)matches, actions);
|
1098
|
+
NEON_SET_STATE(2);
|
1099
|
+
break; /* Unreachable */
|
1100
|
+
}
|
1101
|
+
str = chunk_end;
|
1102
|
+
}
|
1103
|
+
// We must have run out of data to use SIMD. Go to state 4.
|
1104
|
+
SEARCH_FLUSH;
|
1105
|
+
NEON_SET_STATE(4);
|
1106
|
+
} break;
|
1107
|
+
case 3:
|
1108
|
+
cursor = str;
|
1109
|
+
// This fall through is intentional. We return to state 3 after we process
|
1110
|
+
// a byte (or multiple). We return to this state to ensure the cursor is
|
1111
|
+
// pointing to the correct location. We then resume looking for matches
|
1112
|
+
// within the previously processed chunk.
|
1113
|
+
case 2:
|
1114
|
+
if (str >= chunk_end) {
|
1115
|
+
NEON_SET_STATE(1);
|
1116
|
+
}
|
1117
|
+
if (!do_hi_validation) {
|
1118
|
+
long i = str - chunk_start;
|
1119
|
+
for (; str < chunk_end; i++) {
|
1120
|
+
if ((action = matches[i]) != '1') {
|
1121
|
+
break;
|
1122
|
+
}
|
1123
|
+
*out->cur++ = *str++;
|
1124
|
+
}
|
1125
|
+
// The loop above may have advanced str and directly output them to out->cur.
|
1126
|
+
// Ensure cursor is set appropriately.
|
1127
|
+
cursor = str;
|
1128
|
+
if (str >= chunk_end) {
|
1129
|
+
// We must have advanced past the end... we are done.
|
1130
|
+
NEON_SET_STATE(1);
|
1131
|
+
}
|
1132
|
+
} else {
|
1133
|
+
long match_index = str - chunk_start;
|
1134
|
+
action = matches[match_index];
|
1135
|
+
}
|
1136
|
+
NEON_RETURN_TO_STATE(3);
|
1137
|
+
break;
|
1138
|
+
case 4: action = cmap[(uint8_t)*str];
|
1139
|
+
}
|
1140
|
+
#undef NEON_SET_STATE
|
1141
|
+
#undef NEON_RETURN_TO_STATE
|
1142
|
+
#else
|
1143
|
+
action = cmap[(uint8_t)*str];
|
1144
|
+
#endif /* HAVE_SIMD_NEON */
|
1145
|
+
switch (action) {
|
847
1146
|
case '1':
|
848
1147
|
if (do_unicode_validation && check_start <= str) {
|
849
1148
|
if (0 != (0x80 & (uint8_t)*str)) {
|
@@ -906,7 +1205,7 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
|
|
906
1205
|
*out->cur++ = '"';
|
907
1206
|
}
|
908
1207
|
if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
|
909
|
-
uint8_t c = (uint8_t)
|
1208
|
+
uint8_t c = (uint8_t)*(str - 1);
|
910
1209
|
int i;
|
911
1210
|
int scnt = (int)(str - orig);
|
912
1211
|
|
@@ -957,7 +1256,7 @@ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) {
|
|
957
1256
|
void oj_dump_obj_to_s(VALUE obj, Out out) {
|
958
1257
|
volatile VALUE rstr = oj_safe_string_convert(obj);
|
959
1258
|
|
960
|
-
oj_dump_cstr(RSTRING_PTR(rstr),
|
1259
|
+
oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
|
961
1260
|
}
|
962
1261
|
|
963
1262
|
void oj_dump_raw(const char *str, size_t cnt, Out out) {
|
@@ -1092,7 +1391,7 @@ void oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) {
|
|
1092
1391
|
|
1093
1392
|
void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
|
1094
1393
|
volatile VALUE rs = rb_big2str(obj, 10);
|
1095
|
-
|
1394
|
+
size_t cnt = RSTRING_LEN(rs);
|
1096
1395
|
bool dump_as_string = false;
|
1097
1396
|
|
1098
1397
|
if (out->opts->int_range_max != 0 || out->opts->int_range_min != 0) { // Bignum cannot be inside of Fixnum range
|
@@ -1114,7 +1413,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
|
|
1114
1413
|
char buf[64];
|
1115
1414
|
char *b;
|
1116
1415
|
double d = rb_num2dbl(obj);
|
1117
|
-
|
1416
|
+
size_t cnt = 0;
|
1118
1417
|
|
1119
1418
|
if (0.0 == d) {
|
1120
1419
|
b = buf;
|
@@ -1225,7 +1524,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
|
|
1225
1524
|
} else if (0 == out->opts->float_prec) {
|
1226
1525
|
volatile VALUE rstr = oj_safe_string_convert(obj);
|
1227
1526
|
|
1228
|
-
cnt =
|
1527
|
+
cnt = RSTRING_LEN(rstr);
|
1229
1528
|
if ((int)sizeof(buf) <= cnt) {
|
1230
1529
|
cnt = sizeof(buf) - 1;
|
1231
1530
|
}
|
@@ -1239,8 +1538,8 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
|
|
1239
1538
|
*out->cur = '\0';
|
1240
1539
|
}
|
1241
1540
|
|
1242
|
-
|
1243
|
-
|
1541
|
+
size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
|
1542
|
+
size_t cnt = snprintf(buf, blen, format, d);
|
1244
1543
|
|
1245
1544
|
// Round off issues at 16 significant digits so check for obvious ones of
|
1246
1545
|
// 0001 and 9999.
|
@@ -1248,7 +1547,7 @@ int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char
|
|
1248
1547
|
volatile VALUE rstr = oj_safe_string_convert(obj);
|
1249
1548
|
|
1250
1549
|
strcpy(buf, RSTRING_PTR(rstr));
|
1251
|
-
cnt =
|
1550
|
+
cnt = RSTRING_LEN(rstr);
|
1252
1551
|
}
|
1253
1552
|
return cnt;
|
1254
1553
|
}
|
data/ext/oj/dump.h
CHANGED
@@ -7,12 +7,17 @@
|
|
7
7
|
#include <ruby.h>
|
8
8
|
|
9
9
|
#include "oj.h"
|
10
|
+
#include "simd.h"
|
10
11
|
|
11
12
|
#define MAX_DEPTH 1000
|
12
13
|
|
13
14
|
// Extra padding at end of buffer.
|
14
15
|
#define BUFFER_EXTRA 64
|
15
16
|
|
17
|
+
#ifdef HAVE_SIMD_NEON
|
18
|
+
extern void initialize_neon(void);
|
19
|
+
#endif /* HAVE_SIMD_NEON */
|
20
|
+
|
16
21
|
extern void oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok);
|
17
22
|
extern void oj_dump_true(VALUE obj, int depth, Out out, bool as_ok);
|
18
23
|
extern void oj_dump_false(VALUE obj, int depth, Out out, bool as_ok);
|
@@ -30,7 +35,7 @@ extern void oj_dump_xml_time(VALUE obj, Out out);
|
|
30
35
|
extern void oj_dump_time(VALUE obj, Out out, int withZone);
|
31
36
|
extern void oj_dump_obj_to_s(VALUE obj, Out out);
|
32
37
|
|
33
|
-
extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus,
|
38
|
+
extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp);
|
34
39
|
|
35
40
|
// initialize an out buffer with the provided stack allocated memory
|
36
41
|
extern void oj_out_init(Out out);
|
@@ -53,7 +58,7 @@ extern void oj_dump_raw_json(VALUE obj, int depth, Out out);
|
|
53
58
|
extern VALUE oj_add_to_json(int argc, VALUE *argv, VALUE self);
|
54
59
|
extern VALUE oj_remove_to_json(int argc, VALUE *argv, VALUE self);
|
55
60
|
|
56
|
-
extern
|
61
|
+
extern size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format);
|
57
62
|
|
58
63
|
extern time_t oj_sec_from_time_hard_way(VALUE obj);
|
59
64
|
|