oj 3.16.11 → 3.16.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/oj/dump.c +193 -196
- data/ext/oj/dump_compat.c +1 -1
- data/ext/oj/extconf.rb +6 -7
- data/ext/oj/parse.c +136 -16
- data/ext/oj/rails.c +4 -2
- data/ext/oj/simd.h +38 -1
- data/lib/oj/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fc0290fa1cfe6af1094de1d7188836e0c09cb04f2f08401de118253026604650
|
|
4
|
+
data.tar.gz: de5258e96984a21afb2fac946fe28ad255926893a6157f2874445edf10aa8bbe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d7870818fd86043a17b834756b67a4009a6f7ef60baf53b02a0b0d4431ccba723d9e533553bea04ae46ae9f233e447b79c4106e6827782bd0c2ffb9c332081a3
|
|
7
|
+
data.tar.gz: fd3966ac7fb5da9f1a5ebb68f4a8f5b9a5f9fa1a1255e93dfef078f66f00a6af5bb7e37676441f7d6229b29222741a2bc7b75164fd445a39b906ef904946d41b
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
|
+
## 3.16.13 - 2025-12-05
|
|
4
|
+
|
|
5
|
+
- Fixed rails encoding for Hash and Array subclasses.
|
|
6
|
+
|
|
7
|
+
## 3.16.12 - 2025-10-29
|
|
8
|
+
|
|
9
|
+
- Fixed dump realloc bug that occurred when using the compat mode dump options.
|
|
10
|
+
|
|
3
11
|
## 3.16.11 - 2025-05-29
|
|
4
12
|
|
|
5
13
|
- Fixed range encoding with the :circular option
|
data/ext/oj/dump.c
CHANGED
|
@@ -252,38 +252,46 @@ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) {
|
|
|
252
252
|
}
|
|
253
253
|
|
|
254
254
|
inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
|
|
255
|
-
long
|
|
256
|
-
|
|
255
|
+
long size = 0;
|
|
256
|
+
uint32_t hi = 0;
|
|
257
257
|
|
|
258
258
|
#ifdef HAVE_SIMD_NEON
|
|
259
259
|
size_t i = 0;
|
|
260
260
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
size += sizeof(uint8x16_t);
|
|
261
|
+
if (len >= sizeof(uint8x16_t)) {
|
|
262
|
+
uint8x16_t has_some_hibit = vdupq_n_u8(0);
|
|
263
|
+
uint8x16_t hibit = vdupq_n_u8(0x80);
|
|
265
264
|
|
|
266
|
-
uint8x16_t
|
|
265
|
+
for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
|
|
266
|
+
size += sizeof(uint8x16_t);
|
|
267
267
|
|
|
268
|
-
|
|
269
|
-
has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
|
|
268
|
+
uint8x16_t chunk = vld1q_u8(str);
|
|
270
269
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
270
|
+
// Check to see if any of these bytes have the high bit set.
|
|
271
|
+
has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
|
|
272
|
+
|
|
273
|
+
uint8x16_t tmp1 = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
|
|
274
|
+
uint8x16_t tmp2 = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
|
|
275
|
+
uint8x16_t tmp3 = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
|
|
276
|
+
uint8x16_t tmp4 = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
|
|
277
|
+
uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
|
|
278
|
+
uint8_t tmp = vaddvq_u8(result);
|
|
279
|
+
size += tmp;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
|
|
283
|
+
hi = vmaxvq_u8(has_some_hibit) != 0;
|
|
278
284
|
}
|
|
279
285
|
|
|
280
|
-
|
|
281
|
-
hi = vmaxvq_u8(has_some_hibit) != 0;
|
|
286
|
+
size_t len_remaining = len - i;
|
|
282
287
|
|
|
283
288
|
for (; i < len; str++, i++) {
|
|
284
|
-
size += rails_xss_friendly_chars[*str]
|
|
289
|
+
size += rails_xss_friendly_chars[*str];
|
|
285
290
|
hi |= *str & 0x80;
|
|
286
291
|
}
|
|
292
|
+
|
|
293
|
+
size -= (len_remaining * ((size_t)'0'));
|
|
294
|
+
|
|
287
295
|
if (0 == hi) {
|
|
288
296
|
return size;
|
|
289
297
|
}
|
|
@@ -302,37 +310,43 @@ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
|
|
|
302
310
|
}
|
|
303
311
|
|
|
304
312
|
inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
|
|
305
|
-
long
|
|
306
|
-
|
|
313
|
+
long size = 0;
|
|
314
|
+
uint32_t hi = 0;
|
|
307
315
|
#ifdef HAVE_SIMD_NEON
|
|
308
|
-
size_t i
|
|
316
|
+
size_t i = 0;
|
|
317
|
+
long extra = 0;
|
|
309
318
|
|
|
310
|
-
|
|
311
|
-
|
|
319
|
+
if (len >= sizeof(uint8x16_t)) {
|
|
320
|
+
uint8x16_t has_some_hibit = vdupq_n_u8(0);
|
|
321
|
+
uint8x16_t hibit = vdupq_n_u8(0x80);
|
|
312
322
|
|
|
313
|
-
|
|
314
|
-
|
|
323
|
+
for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
|
|
324
|
+
size += sizeof(uint8x16_t);
|
|
315
325
|
|
|
316
|
-
|
|
317
|
-
|
|
326
|
+
// See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
|
|
327
|
+
uint8x16_t chunk = vld1q_u8(str);
|
|
318
328
|
|
|
319
|
-
|
|
320
|
-
|
|
329
|
+
// Check to see if any of these bytes have the high bit set.
|
|
330
|
+
has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
|
|
321
331
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
332
|
+
uint8x16_t tmp1 = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
|
|
333
|
+
uint8x16_t tmp2 = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
|
|
334
|
+
uint8x16_t result = vorrq_u8(tmp1, tmp2);
|
|
335
|
+
uint8_t tmp = vaddvq_u8(result);
|
|
336
|
+
size += tmp;
|
|
337
|
+
}
|
|
328
338
|
|
|
329
|
-
|
|
330
|
-
|
|
339
|
+
// 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
|
|
340
|
+
hi = vmaxvq_u8(has_some_hibit) != 0;
|
|
341
|
+
}
|
|
331
342
|
|
|
332
|
-
for (; i < len; str++, i++) {
|
|
333
|
-
size += rails_friendly_chars[*str]
|
|
343
|
+
for (; i < len; str++, i++, extra++) {
|
|
344
|
+
size += rails_friendly_chars[*str];
|
|
334
345
|
hi |= *str & 0x80;
|
|
335
346
|
}
|
|
347
|
+
|
|
348
|
+
size -= (extra * ((size_t)'0'));
|
|
349
|
+
|
|
336
350
|
if (0 == hi) {
|
|
337
351
|
return size;
|
|
338
352
|
}
|
|
@@ -896,6 +910,12 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
|
|
|
896
910
|
}
|
|
897
911
|
}
|
|
898
912
|
|
|
913
|
+
#if defined(__clang__) || defined(__GNUC__)
|
|
914
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
|
915
|
+
#else
|
|
916
|
+
#define FORCE_INLINE
|
|
917
|
+
#endif
|
|
918
|
+
|
|
899
919
|
#ifdef HAVE_SIMD_NEON
|
|
900
920
|
typedef struct _neon_match_result {
|
|
901
921
|
uint8x16_t needs_escape;
|
|
@@ -903,12 +923,6 @@ typedef struct _neon_match_result {
|
|
|
903
923
|
bool do_unicode_validation;
|
|
904
924
|
} neon_match_result;
|
|
905
925
|
|
|
906
|
-
#if defined(__clang__) || defined(__GNUC__)
|
|
907
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
908
|
-
#else
|
|
909
|
-
#define FORCE_INLINE
|
|
910
|
-
#endif
|
|
911
|
-
|
|
912
926
|
static inline FORCE_INLINE neon_match_result
|
|
913
927
|
neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
|
|
914
928
|
neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
|
|
@@ -932,12 +946,83 @@ neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool
|
|
|
932
946
|
|
|
933
947
|
#endif /* HAVE_SIMD_NEON */
|
|
934
948
|
|
|
949
|
+
static inline FORCE_INLINE const char *process_character(char action,
|
|
950
|
+
const char *str,
|
|
951
|
+
const char *end,
|
|
952
|
+
Out out,
|
|
953
|
+
const char *orig,
|
|
954
|
+
bool do_unicode_validation,
|
|
955
|
+
const char **check_start_) {
|
|
956
|
+
const char *check_start = *check_start_;
|
|
957
|
+
switch (action) {
|
|
958
|
+
case '1':
|
|
959
|
+
if (do_unicode_validation && check_start <= str) {
|
|
960
|
+
if (0 != (0x80 & (uint8_t)*str)) {
|
|
961
|
+
if (0xC0 == (0xC0 & (uint8_t)*str)) {
|
|
962
|
+
*check_start_ = check_unicode(str, end, orig);
|
|
963
|
+
} else {
|
|
964
|
+
raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
*out->cur++ = *str;
|
|
969
|
+
break;
|
|
970
|
+
case '2':
|
|
971
|
+
*out->cur++ = '\\';
|
|
972
|
+
switch (*str) {
|
|
973
|
+
case '\\': *out->cur++ = '\\'; break;
|
|
974
|
+
case '\b': *out->cur++ = 'b'; break;
|
|
975
|
+
case '\t': *out->cur++ = 't'; break;
|
|
976
|
+
case '\n': *out->cur++ = 'n'; break;
|
|
977
|
+
case '\f': *out->cur++ = 'f'; break;
|
|
978
|
+
case '\r': *out->cur++ = 'r'; break;
|
|
979
|
+
default: *out->cur++ = *str; break;
|
|
980
|
+
}
|
|
981
|
+
break;
|
|
982
|
+
case '3': // Unicode
|
|
983
|
+
if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
|
|
984
|
+
if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
|
|
985
|
+
str = dump_unicode(str, end, out, orig);
|
|
986
|
+
} else {
|
|
987
|
+
*check_start_ = check_unicode(str, end, orig);
|
|
988
|
+
*out->cur++ = *str;
|
|
989
|
+
}
|
|
990
|
+
break;
|
|
991
|
+
}
|
|
992
|
+
str = dump_unicode(str, end, out, orig);
|
|
993
|
+
break;
|
|
994
|
+
case '6': // control characters
|
|
995
|
+
if (*(uint8_t *)str < 0x80) {
|
|
996
|
+
if (0 == (uint8_t)*str && out->opts->dump_opts.omit_null_byte) {
|
|
997
|
+
break;
|
|
998
|
+
}
|
|
999
|
+
APPEND_CHARS(out->cur, "\\u00", 4);
|
|
1000
|
+
dump_hex((uint8_t)*str, out);
|
|
1001
|
+
} else {
|
|
1002
|
+
if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
|
|
1003
|
+
if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
|
|
1004
|
+
str = dump_unicode(str, end, out, orig);
|
|
1005
|
+
} else {
|
|
1006
|
+
*check_start_ = check_unicode(str, end, orig);
|
|
1007
|
+
*out->cur++ = *str;
|
|
1008
|
+
}
|
|
1009
|
+
break;
|
|
1010
|
+
}
|
|
1011
|
+
str = dump_unicode(str, end, out, orig);
|
|
1012
|
+
}
|
|
1013
|
+
break;
|
|
1014
|
+
default: break; // ignore, should never happen if the table is correct
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
return str;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
935
1020
|
void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
|
|
936
1021
|
size_t size;
|
|
937
1022
|
char *cmap;
|
|
938
1023
|
#ifdef HAVE_SIMD_NEON
|
|
939
|
-
uint8x16x4_t *cmap_neon
|
|
940
|
-
int neon_table_size;
|
|
1024
|
+
uint8x16x4_t *cmap_neon = NULL;
|
|
1025
|
+
int neon_table_size = 0;
|
|
941
1026
|
#endif /* HAVE_SIMD_NEON */
|
|
942
1027
|
const char *orig = str;
|
|
943
1028
|
bool has_hi = false;
|
|
@@ -1036,171 +1121,83 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
|
|
|
1036
1121
|
#ifdef HAVE_SIMD_NEON
|
|
1037
1122
|
const char *chunk_start;
|
|
1038
1123
|
const char *chunk_end;
|
|
1039
|
-
const char *cursor
|
|
1040
|
-
|
|
1124
|
+
const char *cursor = str;
|
|
1125
|
+
bool use_neon = (cmap_neon != NULL && cnt >= (sizeof(uint8x16_t))) ? true : false;
|
|
1041
1126
|
char matches[16];
|
|
1042
|
-
bool do_hi_validation = false;
|
|
1043
|
-
// uint64_t neon_match_mask = 0;
|
|
1044
1127
|
#define SEARCH_FLUSH \
|
|
1045
1128
|
if (str > cursor) { \
|
|
1046
1129
|
APPEND_CHARS(out->cur, cursor, str - cursor); \
|
|
1047
1130
|
cursor = str; \
|
|
1048
1131
|
}
|
|
1049
1132
|
|
|
1050
|
-
loop:
|
|
1051
1133
|
#endif /* HAVE_SIMD_NEON */
|
|
1052
|
-
for (; str < end; str++) {
|
|
1053
|
-
char action = 0;
|
|
1054
1134
|
#ifdef HAVE_SIMD_NEON
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
#define NEON_RETURN_TO_STATE(state) neon_state = state;
|
|
1069
|
-
switch (neon_state) {
|
|
1070
|
-
case 1: {
|
|
1071
|
-
while (true) {
|
|
1072
|
-
const char *chunk_ptr = NULL;
|
|
1073
|
-
if (str + sizeof(uint8x16_t) <= end) {
|
|
1074
|
-
chunk_ptr = str;
|
|
1075
|
-
chunk_start = str;
|
|
1076
|
-
chunk_end = str + sizeof(uint8x16_t);
|
|
1077
|
-
} else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
|
|
1078
|
-
memset(out->cur, 'A', sizeof(uint8x16_t));
|
|
1079
|
-
memcpy(out->cur, str, (end - str));
|
|
1080
|
-
chunk_ptr = out->cur;
|
|
1081
|
-
chunk_start = str;
|
|
1082
|
-
chunk_end = end;
|
|
1083
|
-
} else {
|
|
1084
|
-
SEARCH_FLUSH;
|
|
1085
|
-
NEON_SET_STATE(4);
|
|
1086
|
-
break; /* Unreachable */
|
|
1087
|
-
}
|
|
1088
|
-
neon_match_result result = neon_update(chunk_ptr,
|
|
1089
|
-
cmap_neon,
|
|
1090
|
-
neon_table_size,
|
|
1091
|
-
do_unicode_validation,
|
|
1092
|
-
has_hi);
|
|
1093
|
-
if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
|
|
1094
|
-
SEARCH_FLUSH;
|
|
1095
|
-
uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
|
|
1096
|
-
do_hi_validation = result.do_unicode_validation;
|
|
1097
|
-
vst1q_u8((unsigned char *)matches, actions);
|
|
1098
|
-
NEON_SET_STATE(2);
|
|
1099
|
-
break; /* Unreachable */
|
|
1100
|
-
}
|
|
1101
|
-
str = chunk_end;
|
|
1102
|
-
}
|
|
1103
|
-
// We must have run out of data to use SIMD. Go to state 4.
|
|
1104
|
-
SEARCH_FLUSH;
|
|
1105
|
-
NEON_SET_STATE(4);
|
|
1106
|
-
} break;
|
|
1107
|
-
case 3:
|
|
1108
|
-
cursor = str;
|
|
1109
|
-
// This fall through is intentional. We return to state 3 after we process
|
|
1110
|
-
// a byte (or multiple). We return to this state to ensure the cursor is
|
|
1111
|
-
// pointing to the correct location. We then resume looking for matches
|
|
1112
|
-
// within the previously processed chunk.
|
|
1113
|
-
case 2:
|
|
1114
|
-
if (str >= chunk_end) {
|
|
1115
|
-
NEON_SET_STATE(1);
|
|
1116
|
-
}
|
|
1117
|
-
if (!do_hi_validation) {
|
|
1118
|
-
long i = str - chunk_start;
|
|
1119
|
-
for (; str < chunk_end; i++) {
|
|
1120
|
-
if ((action = matches[i]) != '1') {
|
|
1121
|
-
break;
|
|
1122
|
-
}
|
|
1123
|
-
*out->cur++ = *str++;
|
|
1124
|
-
}
|
|
1125
|
-
// The loop above may have advanced str and directly output them to out->cur.
|
|
1126
|
-
// Ensure cursor is set appropriately.
|
|
1127
|
-
cursor = str;
|
|
1128
|
-
if (str >= chunk_end) {
|
|
1129
|
-
// We must have advanced past the end... we are done.
|
|
1130
|
-
NEON_SET_STATE(1);
|
|
1131
|
-
}
|
|
1135
|
+
if (use_neon) {
|
|
1136
|
+
while (str < end) {
|
|
1137
|
+
const char *chunk_ptr = NULL;
|
|
1138
|
+
if (str + sizeof(uint8x16_t) <= end) {
|
|
1139
|
+
chunk_ptr = str;
|
|
1140
|
+
chunk_start = str;
|
|
1141
|
+
chunk_end = str + sizeof(uint8x16_t);
|
|
1142
|
+
} else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
|
|
1143
|
+
memset(out->cur, 'A', sizeof(uint8x16_t));
|
|
1144
|
+
memcpy(out->cur, str, (end - str));
|
|
1145
|
+
chunk_ptr = out->cur;
|
|
1146
|
+
chunk_start = str;
|
|
1147
|
+
chunk_end = end;
|
|
1132
1148
|
} else {
|
|
1133
|
-
|
|
1134
|
-
action = matches[match_index];
|
|
1149
|
+
break;
|
|
1135
1150
|
}
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1151
|
+
neon_match_result result = neon_update(chunk_ptr,
|
|
1152
|
+
cmap_neon,
|
|
1153
|
+
neon_table_size,
|
|
1154
|
+
do_unicode_validation,
|
|
1155
|
+
has_hi);
|
|
1156
|
+
if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
|
|
1157
|
+
SEARCH_FLUSH;
|
|
1158
|
+
uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
|
|
1159
|
+
uint8_t num_matches = vaddvq_u8(vandq_u8(result.needs_escape, vdupq_n_u8(0x1)));
|
|
1160
|
+
vst1q_u8((unsigned char *)matches, actions);
|
|
1161
|
+
bool process_each = result.do_unicode_validation || (num_matches > sizeof(uint8x16_t) / 2);
|
|
1162
|
+
// If no byte in this chunk had the high bit set then we can skip
|
|
1163
|
+
// all of the '1' bytes by directly copying them to the output.
|
|
1164
|
+
if (!process_each) {
|
|
1165
|
+
while (str < chunk_end) {
|
|
1166
|
+
long i = str - chunk_start;
|
|
1167
|
+
char action;
|
|
1168
|
+
while (str < chunk_end && (action = matches[i++]) == '1') {
|
|
1169
|
+
*out->cur++ = *str++;
|
|
1170
|
+
}
|
|
1171
|
+
cursor = str;
|
|
1172
|
+
if (str >= chunk_end) {
|
|
1173
|
+
break;
|
|
1174
|
+
}
|
|
1175
|
+
str = process_character(action, str, end, out, orig, do_unicode_validation, &check_start);
|
|
1176
|
+
str++;
|
|
1153
1177
|
}
|
|
1154
|
-
}
|
|
1155
|
-
}
|
|
1156
|
-
*out->cur++ = *str;
|
|
1157
|
-
break;
|
|
1158
|
-
case '2':
|
|
1159
|
-
*out->cur++ = '\\';
|
|
1160
|
-
switch (*str) {
|
|
1161
|
-
case '\\': *out->cur++ = '\\'; break;
|
|
1162
|
-
case '\b': *out->cur++ = 'b'; break;
|
|
1163
|
-
case '\t': *out->cur++ = 't'; break;
|
|
1164
|
-
case '\n': *out->cur++ = 'n'; break;
|
|
1165
|
-
case '\f': *out->cur++ = 'f'; break;
|
|
1166
|
-
case '\r': *out->cur++ = 'r'; break;
|
|
1167
|
-
default: *out->cur++ = *str; break;
|
|
1168
|
-
}
|
|
1169
|
-
break;
|
|
1170
|
-
case '3': // Unicode
|
|
1171
|
-
if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
|
|
1172
|
-
if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
|
|
1173
|
-
str = dump_unicode(str, end, out, orig);
|
|
1174
1178
|
} else {
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
break;
|
|
1186
|
-
}
|
|
1187
|
-
APPEND_CHARS(out->cur, "\\u00", 4);
|
|
1188
|
-
dump_hex((uint8_t)*str, out);
|
|
1189
|
-
} else {
|
|
1190
|
-
if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
|
|
1191
|
-
if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
|
|
1192
|
-
str = dump_unicode(str, end, out, orig);
|
|
1193
|
-
} else {
|
|
1194
|
-
check_start = check_unicode(str, end, orig);
|
|
1195
|
-
*out->cur++ = *str;
|
|
1179
|
+
while (str < chunk_end) {
|
|
1180
|
+
long match_index = str - chunk_start;
|
|
1181
|
+
str = process_character(matches[match_index],
|
|
1182
|
+
str,
|
|
1183
|
+
end,
|
|
1184
|
+
out,
|
|
1185
|
+
orig,
|
|
1186
|
+
do_unicode_validation,
|
|
1187
|
+
&check_start);
|
|
1188
|
+
str++;
|
|
1196
1189
|
}
|
|
1197
|
-
break;
|
|
1198
1190
|
}
|
|
1199
|
-
|
|
1191
|
+
cursor = str;
|
|
1192
|
+
continue;
|
|
1200
1193
|
}
|
|
1201
|
-
|
|
1202
|
-
default: break; // ignore, should never happen if the table is correct
|
|
1194
|
+
str = chunk_end;
|
|
1203
1195
|
}
|
|
1196
|
+
SEARCH_FLUSH;
|
|
1197
|
+
}
|
|
1198
|
+
#endif /* HAVE_SIMD_NEON */
|
|
1199
|
+
for (; str < end; str++) {
|
|
1200
|
+
str = process_character(cmap[(uint8_t)*str], str, end, out, orig, do_unicode_validation, &check_start);
|
|
1204
1201
|
}
|
|
1205
1202
|
*out->cur++ = '"';
|
|
1206
1203
|
}
|
data/ext/oj/dump_compat.c
CHANGED
|
@@ -148,10 +148,10 @@ static void dump_array(VALUE a, int depth, Out out, bool as_ok) {
|
|
|
148
148
|
} else {
|
|
149
149
|
size = d2 * out->indent + 2;
|
|
150
150
|
}
|
|
151
|
-
assure_size(out, size * cnt);
|
|
152
151
|
cnt--;
|
|
153
152
|
for (i = 0; i <= cnt; i++) {
|
|
154
153
|
if (out->opts->dump_opts.use) {
|
|
154
|
+
assure_size(out, size);
|
|
155
155
|
if (0 < out->opts->dump_opts.array_size) {
|
|
156
156
|
APPEND_CHARS(out->cur, out->opts->dump_opts.array_nl, out->opts->dump_opts.array_size);
|
|
157
157
|
}
|
data/ext/oj/extconf.rb
CHANGED
|
@@ -35,13 +35,12 @@ have_func('rb_ext_ractor_safe', 'ruby.h')
|
|
|
35
35
|
|
|
36
36
|
dflags['OJ_DEBUG'] = true unless ENV['OJ_DEBUG'].nil?
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
end
|
|
38
|
+
# Enable SIMD optimizations - try SSE4.2 on x86_64 for best performance
|
|
39
|
+
# Falls back to SSE2 or compiler defaults if not available
|
|
40
|
+
if try_cflags('-msse4.2')
|
|
41
|
+
$CPPFLAGS += ' -msse4.2'
|
|
42
|
+
elsif try_cflags('-msse2')
|
|
43
|
+
$CPPFLAGS += ' -msse2'
|
|
45
44
|
end
|
|
46
45
|
|
|
47
46
|
if enable_config('trace-log', false)
|
data/ext/oj/parse.c
CHANGED
|
@@ -15,12 +15,9 @@
|
|
|
15
15
|
#include "mem.h"
|
|
16
16
|
#include "oj.h"
|
|
17
17
|
#include "rxclass.h"
|
|
18
|
+
#include "simd.h"
|
|
18
19
|
#include "val_stack.h"
|
|
19
20
|
|
|
20
|
-
#ifdef OJ_USE_SSE4_2
|
|
21
|
-
#include <nmmintrin.h>
|
|
22
|
-
#endif
|
|
23
|
-
|
|
24
21
|
// Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
|
|
25
22
|
#define OJ_INFINITY (1.0 / 0.0)
|
|
26
23
|
|
|
@@ -202,23 +199,143 @@ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
|
|
|
202
199
|
return str;
|
|
203
200
|
}
|
|
204
201
|
|
|
205
|
-
#ifdef
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
202
|
+
#ifdef HAVE_SIMD_SSE4_2
|
|
203
|
+
// Optimized SIMD string scanner using SSE4.2 instructions
|
|
204
|
+
// Uses prefetching and processes multiple chunks in parallel to reduce latency
|
|
205
|
+
static inline const char *scan_string_SSE42(const char *str, const char *end) {
|
|
206
|
+
static const char chars[16] = "\x00\\\"";
|
|
207
|
+
const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
|
|
208
|
+
const char *safe_end_64 = end - 64;
|
|
209
|
+
const char *safe_end_16 = end - 16;
|
|
210
|
+
|
|
211
|
+
// Process 64 bytes at a time with parallel SIMD operations
|
|
212
|
+
// This reduces pipeline stalls and improves instruction-level parallelism
|
|
213
|
+
while (str <= safe_end_64) {
|
|
214
|
+
// Prefetch next cache line for better memory throughput
|
|
215
|
+
__builtin_prefetch(str + 64, 0, 0);
|
|
216
|
+
|
|
217
|
+
// Load and compare 4 chunks in parallel
|
|
218
|
+
const __m128i chunk0 = _mm_loadu_si128((const __m128i *)(str));
|
|
219
|
+
const __m128i chunk1 = _mm_loadu_si128((const __m128i *)(str + 16));
|
|
220
|
+
const __m128i chunk2 = _mm_loadu_si128((const __m128i *)(str + 32));
|
|
221
|
+
const __m128i chunk3 = _mm_loadu_si128((const __m128i *)(str + 48));
|
|
222
|
+
|
|
223
|
+
const int r0 = _mm_cmpestri(terminate,
|
|
224
|
+
3,
|
|
225
|
+
chunk0,
|
|
226
|
+
16,
|
|
227
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
228
|
+
if (__builtin_expect(r0 != 16, 0))
|
|
229
|
+
return str + r0;
|
|
230
|
+
|
|
231
|
+
const int r1 = _mm_cmpestri(terminate,
|
|
232
|
+
3,
|
|
233
|
+
chunk1,
|
|
234
|
+
16,
|
|
235
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
236
|
+
if (__builtin_expect(r1 != 16, 0))
|
|
237
|
+
return str + 16 + r1;
|
|
238
|
+
|
|
239
|
+
const int r2 = _mm_cmpestri(terminate,
|
|
240
|
+
3,
|
|
241
|
+
chunk2,
|
|
242
|
+
16,
|
|
243
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
244
|
+
if (__builtin_expect(r2 != 16, 0))
|
|
245
|
+
return str + 32 + r2;
|
|
246
|
+
|
|
247
|
+
const int r3 = _mm_cmpestri(terminate,
|
|
248
|
+
3,
|
|
249
|
+
chunk3,
|
|
250
|
+
16,
|
|
251
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
252
|
+
if (__builtin_expect(r3 != 16, 0))
|
|
253
|
+
return str + 48 + r3;
|
|
254
|
+
|
|
255
|
+
str += 64;
|
|
256
|
+
}
|
|
210
257
|
|
|
211
|
-
|
|
258
|
+
// Handle remaining 16-byte chunks
|
|
259
|
+
for (; str <= safe_end_16; str += 16) {
|
|
212
260
|
const __m128i string = _mm_loadu_si128((const __m128i *)str);
|
|
213
261
|
const int r = _mm_cmpestri(terminate,
|
|
214
262
|
3,
|
|
215
263
|
string,
|
|
216
264
|
16,
|
|
217
265
|
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
218
|
-
if (r != 16)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
266
|
+
if (r != 16)
|
|
267
|
+
return str + r;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return scan_string_noSIMD(str, end);
|
|
271
|
+
}
|
|
272
|
+
#endif
|
|
273
|
+
|
|
274
|
+
#ifdef HAVE_SIMD_SSE2
|
|
275
|
+
// Optimized SSE2 string scanner (fallback for older x86_64 CPUs)
|
|
276
|
+
// Uses SSE2 instructions with prefetching and parallel processing
|
|
277
|
+
static inline const char *scan_string_SSE2(const char *str, const char *end) {
|
|
278
|
+
const char *safe_end_64 = end - 64;
|
|
279
|
+
const char *safe_end_16 = end - 16;
|
|
280
|
+
|
|
281
|
+
// Create comparison vectors for our three special characters
|
|
282
|
+
const __m128i null_char = _mm_setzero_si128();
|
|
283
|
+
const __m128i backslash = _mm_set1_epi8('\\');
|
|
284
|
+
const __m128i quote = _mm_set1_epi8('"');
|
|
285
|
+
|
|
286
|
+
// Process 64 bytes at a time for better throughput
|
|
287
|
+
while (str <= safe_end_64) {
|
|
288
|
+
__builtin_prefetch(str + 64, 0, 0);
|
|
289
|
+
|
|
290
|
+
// Load 4 chunks
|
|
291
|
+
const __m128i chunk0 = _mm_loadu_si128((const __m128i *)(str));
|
|
292
|
+
const __m128i chunk1 = _mm_loadu_si128((const __m128i *)(str + 16));
|
|
293
|
+
const __m128i chunk2 = _mm_loadu_si128((const __m128i *)(str + 32));
|
|
294
|
+
const __m128i chunk3 = _mm_loadu_si128((const __m128i *)(str + 48));
|
|
295
|
+
|
|
296
|
+
// Compare all chunks (allows CPU to parallelize)
|
|
297
|
+
const __m128i cmp0 = _mm_or_si128(
|
|
298
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk0, null_char), _mm_cmpeq_epi8(chunk0, backslash)),
|
|
299
|
+
_mm_cmpeq_epi8(chunk0, quote));
|
|
300
|
+
const __m128i cmp1 = _mm_or_si128(
|
|
301
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk1, null_char), _mm_cmpeq_epi8(chunk1, backslash)),
|
|
302
|
+
_mm_cmpeq_epi8(chunk1, quote));
|
|
303
|
+
const __m128i cmp2 = _mm_or_si128(
|
|
304
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk2, null_char), _mm_cmpeq_epi8(chunk2, backslash)),
|
|
305
|
+
_mm_cmpeq_epi8(chunk2, quote));
|
|
306
|
+
const __m128i cmp3 = _mm_or_si128(
|
|
307
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk3, null_char), _mm_cmpeq_epi8(chunk3, backslash)),
|
|
308
|
+
_mm_cmpeq_epi8(chunk3, quote));
|
|
309
|
+
|
|
310
|
+
// Convert to masks
|
|
311
|
+
int mask0 = _mm_movemask_epi8(cmp0);
|
|
312
|
+
if (__builtin_expect(mask0 != 0, 0))
|
|
313
|
+
return str + __builtin_ctz(mask0);
|
|
314
|
+
|
|
315
|
+
int mask1 = _mm_movemask_epi8(cmp1);
|
|
316
|
+
if (__builtin_expect(mask1 != 0, 0))
|
|
317
|
+
return str + 16 + __builtin_ctz(mask1);
|
|
318
|
+
|
|
319
|
+
int mask2 = _mm_movemask_epi8(cmp2);
|
|
320
|
+
if (__builtin_expect(mask2 != 0, 0))
|
|
321
|
+
return str + 32 + __builtin_ctz(mask2);
|
|
322
|
+
|
|
323
|
+
int mask3 = _mm_movemask_epi8(cmp3);
|
|
324
|
+
if (__builtin_expect(mask3 != 0, 0))
|
|
325
|
+
return str + 48 + __builtin_ctz(mask3);
|
|
326
|
+
|
|
327
|
+
str += 64;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Handle remaining 16-byte chunks
|
|
331
|
+
for (; str <= safe_end_16; str += 16) {
|
|
332
|
+
const __m128i chunk = _mm_loadu_si128((const __m128i *)str);
|
|
333
|
+
const __m128i matches = _mm_or_si128(
|
|
334
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk, null_char), _mm_cmpeq_epi8(chunk, backslash)),
|
|
335
|
+
_mm_cmpeq_epi8(chunk, quote));
|
|
336
|
+
int mask = _mm_movemask_epi8(matches);
|
|
337
|
+
if (mask != 0)
|
|
338
|
+
return str + __builtin_ctz(mask);
|
|
222
339
|
}
|
|
223
340
|
|
|
224
341
|
return scan_string_noSIMD(str, end);
|
|
@@ -228,9 +345,12 @@ static inline const char *scan_string_SIMD(const char *str, const char *end) {
|
|
|
228
345
|
static const char *(*scan_func)(const char *str, const char *end) = scan_string_noSIMD;
|
|
229
346
|
|
|
230
347
|
void oj_scanner_init(void) {
|
|
231
|
-
#ifdef
|
|
232
|
-
scan_func =
|
|
348
|
+
#ifdef HAVE_SIMD_SSE4_2
|
|
349
|
+
scan_func = scan_string_SSE42;
|
|
350
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
351
|
+
scan_func = scan_string_SSE2;
|
|
233
352
|
#endif
|
|
353
|
+
// Note: ARM NEON string scanning would be added here if needed
|
|
234
354
|
}
|
|
235
355
|
|
|
236
356
|
// entered at /
|
data/ext/oj/rails.c
CHANGED
|
@@ -661,13 +661,15 @@ static VALUE encoder_new(int argc, VALUE *argv, VALUE self) {
|
|
|
661
661
|
Encoder e = OJ_R_ALLOC(struct _encoder);
|
|
662
662
|
|
|
663
663
|
e->opts = oj_default_options;
|
|
664
|
-
e->arg = Qnil;
|
|
665
664
|
copy_opts(&ropts, &e->ropts);
|
|
666
665
|
|
|
667
666
|
if (1 <= argc && Qnil != *argv) {
|
|
668
|
-
oj_parse_options(*argv, &e->opts);
|
|
669
667
|
e->arg = *argv;
|
|
668
|
+
} else {
|
|
669
|
+
e->arg = rb_hash_new();
|
|
670
670
|
}
|
|
671
|
+
oj_parse_options(*argv, &e->opts);
|
|
672
|
+
|
|
671
673
|
return TypedData_Wrap_Struct(encoder_class, &oj_encoder_type, e);
|
|
672
674
|
}
|
|
673
675
|
|
data/ext/oj/simd.h
CHANGED
|
@@ -1,10 +1,47 @@
|
|
|
1
1
|
#ifndef OJ_SIMD_H
|
|
2
2
|
#define OJ_SIMD_H
|
|
3
3
|
|
|
4
|
+
// SIMD architecture detection and configuration
|
|
5
|
+
// This header provides unified SIMD support across different CPU architectures
|
|
6
|
+
|
|
7
|
+
// x86/x86_64 SIMD detection
|
|
8
|
+
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
9
|
+
#define HAVE_SIMD_X86 1
|
|
10
|
+
|
|
11
|
+
// SSE4.2 support (Intel Core i7+, AMD Bulldozer+)
|
|
12
|
+
// Enabled automatically when compiler has -msse4.2 flag
|
|
13
|
+
#if defined(__SSE4_2__)
|
|
14
|
+
#define HAVE_SIMD_SSE4_2 1
|
|
15
|
+
#include <nmmintrin.h>
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
// SSE2 support (fallback for older x86_64 CPUs - all x86_64 CPUs support SSE2)
|
|
19
|
+
#if defined(__SSE2__) && !defined(HAVE_SIMD_SSE4_2)
|
|
20
|
+
#define HAVE_SIMD_SSE2 1
|
|
21
|
+
#include <emmintrin.h>
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
#endif // x86/x86_64
|
|
25
|
+
|
|
26
|
+
// ARM NEON detection
|
|
4
27
|
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
|
5
28
|
#define HAVE_SIMD_NEON 1
|
|
6
29
|
#define SIMD_MINIMUM_THRESHOLD 6
|
|
7
30
|
#include <arm_neon.h>
|
|
8
31
|
#endif
|
|
9
32
|
|
|
10
|
-
|
|
33
|
+
// Define which SIMD implementation to use (priority order: SSE4.2 > NEON > SSE2)
|
|
34
|
+
#if defined(HAVE_SIMD_SSE4_2)
|
|
35
|
+
#define HAVE_SIMD_STRING_SCAN 1
|
|
36
|
+
#define SIMD_TYPE "SSE4.2"
|
|
37
|
+
#elif defined(HAVE_SIMD_NEON)
|
|
38
|
+
#define HAVE_SIMD_STRING_SCAN 1
|
|
39
|
+
#define SIMD_TYPE "NEON"
|
|
40
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
41
|
+
#define HAVE_SIMD_STRING_SCAN 1
|
|
42
|
+
#define SIMD_TYPE "SSE2"
|
|
43
|
+
#else
|
|
44
|
+
#define SIMD_TYPE "none"
|
|
45
|
+
#endif
|
|
46
|
+
|
|
47
|
+
#endif /* OJ_SIMD_H */
|
data/lib/oj/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: oj
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.16.
|
|
4
|
+
version: 3.16.13
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Peter Ohler
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: bigdecimal
|
|
@@ -91,9 +91,9 @@ executables: []
|
|
|
91
91
|
extensions:
|
|
92
92
|
- ext/oj/extconf.rb
|
|
93
93
|
extra_rdoc_files:
|
|
94
|
-
- README.md
|
|
95
|
-
- LICENSE
|
|
96
94
|
- CHANGELOG.md
|
|
95
|
+
- LICENSE
|
|
96
|
+
- README.md
|
|
97
97
|
- RELEASE_NOTES.md
|
|
98
98
|
- pages/Advanced.md
|
|
99
99
|
- pages/Compatibility.md
|
|
@@ -229,7 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
229
229
|
- !ruby/object:Gem::Version
|
|
230
230
|
version: '0'
|
|
231
231
|
requirements: []
|
|
232
|
-
rubygems_version: 3.6.
|
|
232
|
+
rubygems_version: 3.6.9
|
|
233
233
|
specification_version: 4
|
|
234
234
|
summary: A fast JSON parser and serializer.
|
|
235
235
|
test_files: []
|