oj 3.16.10 → 3.16.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/oj/dump.c CHANGED
@@ -152,8 +152,77 @@ inline static size_t newline_friendly_size(const uint8_t *str, size_t len) {
152
152
  return calculate_string_size(str, len, newline_friendly_chars);
153
153
  }
154
154
 
155
+ #ifdef HAVE_SIMD_NEON
156
+ inline static uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
157
+ uint8x16x4_t tab;
158
+ tab.val[0] = vld1q_u8(table);
159
+ tab.val[1] = vld1q_u8(table + 16);
160
+ tab.val[2] = vld1q_u8(table + 32);
161
+ tab.val[3] = vld1q_u8(table + 48);
162
+ return tab;
163
+ }
164
+
165
+ static uint8x16x4_t hibit_friendly_chars_neon[2];
166
+ static uint8x16x4_t rails_friendly_chars_neon[2];
167
+ static uint8x16x4_t rails_xss_friendly_chars_neon[4];
168
+
169
+ void initialize_neon(void) {
170
+ // We only need the first 128 bytes of the hibit friendly chars table. Everything above 127 is
171
+ // set to 1. If that ever changes, the code will need to be updated.
172
+ hibit_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars);
173
+ hibit_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars + 64);
174
+
175
+ // rails_friendly_chars is the same as hibit_friendly_chars. Only the first 128 bytes have values
176
+ // that are not '1'. If that ever changes, the code will need to be updated.
177
+ rails_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_friendly_chars);
178
+ rails_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_friendly_chars + 64);
179
+
180
+ rails_xss_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars);
181
+ rails_xss_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 64);
182
+ rails_xss_friendly_chars_neon[2] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 128);
183
+ rails_xss_friendly_chars_neon[3] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 192);
184
+
185
+ // All bytes should be 0 except for those that need more than 1 byte of output. This will allow the
186
+ // code to limit the lookups to the first 128 bytes (values 0 - 127). Bytes above 127 will result
187
+ // in 0 with the vqtbl4q_u8 instruction.
188
+ uint8x16_t one = vdupq_n_u8('1');
189
+ for (int i = 0; i < 2; i++) {
190
+ for (int j = 0; j < 4; j++) {
191
+ hibit_friendly_chars_neon[i].val[j] = vsubq_u8(hibit_friendly_chars_neon[i].val[j], one);
192
+ rails_friendly_chars_neon[i].val[j] = vsubq_u8(rails_friendly_chars_neon[i].val[j], one);
193
+ }
194
+ }
195
+
196
+ for (int i = 0; i < 4; i++) {
197
+ for (int j = 0; j < 4; j++) {
198
+ rails_xss_friendly_chars_neon[i].val[j] = vsubq_u8(rails_xss_friendly_chars_neon[i].val[j], one);
199
+ }
200
+ }
201
+ }
202
+ #endif
203
+
155
204
  inline static size_t hibit_friendly_size(const uint8_t *str, size_t len) {
205
+ #ifdef HAVE_SIMD_NEON
206
+ size_t size = 0;
207
+ size_t i = 0;
208
+
209
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
210
+ size += sizeof(uint8x16_t);
211
+
212
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
213
+ uint8x16_t chunk = vld1q_u8(str);
214
+ uint8x16_t tmp1 = vqtbl4q_u8(hibit_friendly_chars_neon[0], chunk);
215
+ uint8x16_t tmp2 = vqtbl4q_u8(hibit_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
216
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
217
+ uint8_t tmp = vaddvq_u8(result);
218
+ size += tmp;
219
+ }
220
+
221
+ size_t total = size + calculate_string_size(str, len - i, hibit_friendly_chars);
222
+ return total;
223
+ #else
156
224
  return calculate_string_size(str, len, hibit_friendly_chars);
225
+ #endif
157
226
  }
158
227
 
159
228
  inline static size_t slash_friendly_size(const uint8_t *str, size_t len) {
@@ -183,10 +252,52 @@ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) {
183
252
  }
184
253
 
185
254
  inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
186
- long size = 0;
187
- size_t i = len;
188
- uint8_t hi = 0;
255
+ long size = 0;
256
+ uint32_t hi = 0;
257
+
258
+ #ifdef HAVE_SIMD_NEON
259
+ size_t i = 0;
260
+
261
+ if (len >= sizeof(uint8x16_t)) {
262
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
263
+ uint8x16_t hibit = vdupq_n_u8(0x80);
264
+
265
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
266
+ size += sizeof(uint8x16_t);
267
+
268
+ uint8x16_t chunk = vld1q_u8(str);
269
+
270
+ // Check to see if any of these bytes have the high bit set.
271
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
272
+
273
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
274
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
275
+ uint8x16_t tmp3 = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
276
+ uint8x16_t tmp4 = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
277
+ uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
278
+ uint8_t tmp = vaddvq_u8(result);
279
+ size += tmp;
280
+ }
281
+
282
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
283
+ hi = vmaxvq_u8(has_some_hibit) != 0;
284
+ }
285
+
286
+ size_t len_remaining = len - i;
189
287
 
288
+ for (; i < len; str++, i++) {
289
+ size += rails_xss_friendly_chars[*str];
290
+ hi |= *str & 0x80;
291
+ }
292
+
293
+ size -= (len_remaining * ((size_t)'0'));
294
+
295
+ if (0 == hi) {
296
+ return size;
297
+ }
298
+ return -(size);
299
+ #else
300
+ size_t i = len;
190
301
  for (; 0 < i; str++, i--) {
191
302
  size += rails_xss_friendly_chars[*str];
192
303
  hi |= *str & 0x80;
@@ -195,13 +306,53 @@ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
195
306
  return size - len * (size_t)'0';
196
307
  }
197
308
  return -(size - len * (size_t)'0');
309
+ #endif /* HAVE_SIMD_NEON */
198
310
  }
199
311
 
200
312
  inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
201
- long size = 0;
202
- size_t i = len;
203
- uint8_t hi = 0;
313
+ long size = 0;
314
+ uint32_t hi = 0;
315
+ #ifdef HAVE_SIMD_NEON
316
+ size_t i = 0;
317
+ long extra = 0;
318
+
319
+ if (len >= sizeof(uint8x16_t)) {
320
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
321
+ uint8x16_t hibit = vdupq_n_u8(0x80);
322
+
323
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
324
+ size += sizeof(uint8x16_t);
325
+
326
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
327
+ uint8x16_t chunk = vld1q_u8(str);
328
+
329
+ // Check to see if any of these bytes have the high bit set.
330
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
331
+
332
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
333
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
334
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
335
+ uint8_t tmp = vaddvq_u8(result);
336
+ size += tmp;
337
+ }
338
+
339
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
340
+ hi = vmaxvq_u8(has_some_hibit) != 0;
341
+ }
342
+
343
+ for (; i < len; str++, i++, extra++) {
344
+ size += rails_friendly_chars[*str];
345
+ hi |= *str & 0x80;
346
+ }
347
+
348
+ size -= (extra * ((size_t)'0'));
204
349
 
350
+ if (0 == hi) {
351
+ return size;
352
+ }
353
+ return -(size);
354
+ #else
355
+ size_t i = len;
205
356
  for (; 0 < i; str++, i--) {
206
357
  size += rails_friendly_chars[*str];
207
358
  hi |= *str & 0x80;
@@ -210,9 +361,10 @@ inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
210
361
  return size - len * (size_t)'0';
211
362
  }
212
363
  return -(size - len * (size_t)'0');
364
+ #endif /* HAVE_SIMD_NEON */
213
365
  }
214
366
 
215
- const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, int *lenp) {
367
+ const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp) {
216
368
  const char *str = NULL;
217
369
 
218
370
  if (AutoNan == opt) {
@@ -477,7 +629,7 @@ void oj_dump_time(VALUE obj, Out out, int withZone) {
477
629
  void oj_dump_ruby_time(VALUE obj, Out out) {
478
630
  volatile VALUE rstr = oj_safe_string_convert(obj);
479
631
 
480
- oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out);
632
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
481
633
  }
482
634
 
483
635
  void oj_dump_xml_time(VALUE obj, Out out) {
@@ -711,13 +863,13 @@ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) {
711
863
  rb_encoding *enc = rb_enc_from_index(idx);
712
864
  obj = rb_str_conv_enc(obj, enc, oj_utf8_encoding);
713
865
  }
714
- oj_dump_cstr(RSTRING_PTR(obj), (int)RSTRING_LEN(obj), 0, 0, out);
866
+ oj_dump_cstr(RSTRING_PTR(obj), RSTRING_LEN(obj), 0, 0, out);
715
867
  }
716
868
 
717
869
  void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
718
870
  volatile VALUE s = rb_sym2str(obj);
719
871
 
720
- oj_dump_cstr(RSTRING_PTR(s), (int)RSTRING_LEN(s), 0, 0, out);
872
+ oj_dump_cstr(RSTRING_PTR(s), RSTRING_LEN(s), 0, 0, out);
721
873
  }
722
874
 
723
875
  static void debug_raise(const char *orig, size_t cnt, int line) {
@@ -758,9 +910,120 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
758
910
  }
759
911
  }
760
912
 
913
+ #if defined(__clang__) || defined(__GNUC__)
914
+ #define FORCE_INLINE __attribute__((always_inline))
915
+ #else
916
+ #define FORCE_INLINE
917
+ #endif
918
+
919
+ #ifdef HAVE_SIMD_NEON
920
+ typedef struct _neon_match_result {
921
+ uint8x16_t needs_escape;
922
+ bool has_some_hibit;
923
+ bool do_unicode_validation;
924
+ } neon_match_result;
925
+
926
+ static inline FORCE_INLINE neon_match_result
927
+ neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
928
+ neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
929
+
930
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)str);
931
+ uint8x16_t tmp1 = vqtbl4q_u8(cmap_neon[0], chunk);
932
+ uint8x16_t tmp2 = vqtbl4q_u8(cmap_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
933
+ result.needs_escape = vorrq_u8(tmp1, tmp2);
934
+ if (neon_table_size > 2) {
935
+ uint8x16_t tmp3 = vqtbl4q_u8(cmap_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
936
+ uint8x16_t tmp4 = vqtbl4q_u8(cmap_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
937
+ result.needs_escape = vorrq_u8(result.needs_escape, vorrq_u8(tmp4, tmp3));
938
+ }
939
+ if (has_hi && do_unicode_validation) {
940
+ uint8x16_t has_some_hibit = vandq_u8(chunk, vdupq_n_u8(0x80));
941
+ result.has_some_hibit = vmaxvq_u8(has_some_hibit) != 0;
942
+ result.do_unicode_validation = has_hi && do_unicode_validation && result.has_some_hibit;
943
+ }
944
+ return result;
945
+ }
946
+
947
+ #endif /* HAVE_SIMD_NEON */
948
+
949
+ static inline FORCE_INLINE const char *process_character(char action,
950
+ const char *str,
951
+ const char *end,
952
+ Out out,
953
+ const char *orig,
954
+ bool do_unicode_validation,
955
+ const char **check_start_) {
956
+ const char *check_start = *check_start_;
957
+ switch (action) {
958
+ case '1':
959
+ if (do_unicode_validation && check_start <= str) {
960
+ if (0 != (0x80 & (uint8_t)*str)) {
961
+ if (0xC0 == (0xC0 & (uint8_t)*str)) {
962
+ *check_start_ = check_unicode(str, end, orig);
963
+ } else {
964
+ raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
965
+ }
966
+ }
967
+ }
968
+ *out->cur++ = *str;
969
+ break;
970
+ case '2':
971
+ *out->cur++ = '\\';
972
+ switch (*str) {
973
+ case '\\': *out->cur++ = '\\'; break;
974
+ case '\b': *out->cur++ = 'b'; break;
975
+ case '\t': *out->cur++ = 't'; break;
976
+ case '\n': *out->cur++ = 'n'; break;
977
+ case '\f': *out->cur++ = 'f'; break;
978
+ case '\r': *out->cur++ = 'r'; break;
979
+ default: *out->cur++ = *str; break;
980
+ }
981
+ break;
982
+ case '3': // Unicode
983
+ if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
984
+ if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
985
+ str = dump_unicode(str, end, out, orig);
986
+ } else {
987
+ *check_start_ = check_unicode(str, end, orig);
988
+ *out->cur++ = *str;
989
+ }
990
+ break;
991
+ }
992
+ str = dump_unicode(str, end, out, orig);
993
+ break;
994
+ case '6': // control characters
995
+ if (*(uint8_t *)str < 0x80) {
996
+ if (0 == (uint8_t)*str && out->opts->dump_opts.omit_null_byte) {
997
+ break;
998
+ }
999
+ APPEND_CHARS(out->cur, "\\u00", 4);
1000
+ dump_hex((uint8_t)*str, out);
1001
+ } else {
1002
+ if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
1003
+ if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
1004
+ str = dump_unicode(str, end, out, orig);
1005
+ } else {
1006
+ *check_start_ = check_unicode(str, end, orig);
1007
+ *out->cur++ = *str;
1008
+ }
1009
+ break;
1010
+ }
1011
+ str = dump_unicode(str, end, out, orig);
1012
+ }
1013
+ break;
1014
+ default: break; // ignore, should never happen if the table is correct
1015
+ }
1016
+
1017
+ return str;
1018
+ }
1019
+
761
1020
  void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
762
- size_t size;
763
- char *cmap;
1021
+ size_t size;
1022
+ char *cmap;
1023
+ #ifdef HAVE_SIMD_NEON
1024
+ uint8x16x4_t *cmap_neon = NULL;
1025
+ int neon_table_size = 0;
1026
+ #endif /* HAVE_SIMD_NEON */
764
1027
  const char *orig = str;
765
1028
  bool has_hi = false;
766
1029
  bool do_unicode_validation = false;
@@ -792,7 +1055,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
792
1055
  long sz;
793
1056
 
794
1057
  cmap = rails_xss_friendly_chars;
795
- sz = rails_xss_friendly_size((uint8_t *)str, cnt);
1058
+ #ifdef HAVE_SIMD_NEON
1059
+ cmap_neon = rails_xss_friendly_chars_neon;
1060
+ neon_table_size = 4;
1061
+ #endif /* HAVE_NEON_SIMD */
1062
+ sz = rails_xss_friendly_size((uint8_t *)str, cnt);
796
1063
  if (sz < 0) {
797
1064
  has_hi = true;
798
1065
  size = (size_t)-sz;
@@ -805,7 +1072,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
805
1072
  case RailsEsc: {
806
1073
  long sz;
807
1074
  cmap = rails_friendly_chars;
808
- sz = rails_friendly_size((uint8_t *)str, cnt);
1075
+ #ifdef HAVE_SIMD_NEON
1076
+ cmap_neon = rails_friendly_chars_neon;
1077
+ neon_table_size = 2;
1078
+ #endif /* HAVE_NEON_SIMD */
1079
+ sz = rails_friendly_size((uint8_t *)str, cnt);
809
1080
  if (sz < 0) {
810
1081
  has_hi = true;
811
1082
  size = (size_t)-sz;
@@ -816,7 +1087,12 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
816
1087
  break;
817
1088
  }
818
1089
  case JSONEsc:
819
- default: cmap = hibit_friendly_chars; size = hibit_friendly_size((uint8_t *)str, cnt);
1090
+ default: cmap = hibit_friendly_chars;
1091
+ #ifdef HAVE_SIMD_NEON
1092
+ cmap_neon = hibit_friendly_chars_neon;
1093
+ neon_table_size = 2;
1094
+ #endif /* HAVE_NEON_SIMD */
1095
+ size = hibit_friendly_size((uint8_t *)str, cnt);
820
1096
  }
821
1097
  assure_size(out, size + BUFFER_EXTRA);
822
1098
  *out->cur++ = '"';
@@ -842,71 +1118,91 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
842
1118
  if (is_sym) {
843
1119
  *out->cur++ = ':';
844
1120
  }
845
- for (; str < end; str++) {
846
- switch (cmap[(uint8_t)*str]) {
847
- case '1':
848
- if (do_unicode_validation && check_start <= str) {
849
- if (0 != (0x80 & (uint8_t)*str)) {
850
- if (0xC0 == (0xC0 & (uint8_t)*str)) {
851
- check_start = check_unicode(str, end, orig);
852
- } else {
853
- raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
854
- }
855
- }
856
- }
857
- *out->cur++ = *str;
858
- break;
859
- case '2':
860
- *out->cur++ = '\\';
861
- switch (*str) {
862
- case '\\': *out->cur++ = '\\'; break;
863
- case '\b': *out->cur++ = 'b'; break;
864
- case '\t': *out->cur++ = 't'; break;
865
- case '\n': *out->cur++ = 'n'; break;
866
- case '\f': *out->cur++ = 'f'; break;
867
- case '\r': *out->cur++ = 'r'; break;
868
- default: *out->cur++ = *str; break;
869
- }
870
- break;
871
- case '3': // Unicode
872
- if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
873
- if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
874
- str = dump_unicode(str, end, out, orig);
875
- } else {
876
- check_start = check_unicode(str, end, orig);
877
- *out->cur++ = *str;
878
- }
1121
+ #ifdef HAVE_SIMD_NEON
1122
+ const char *chunk_start;
1123
+ const char *chunk_end;
1124
+ const char *cursor = str;
1125
+ bool use_neon = (cmap_neon != NULL && cnt >= (sizeof(uint8x16_t))) ? true : false;
1126
+ char matches[16];
1127
+ #define SEARCH_FLUSH \
1128
+ if (str > cursor) { \
1129
+ APPEND_CHARS(out->cur, cursor, str - cursor); \
1130
+ cursor = str; \
1131
+ }
1132
+
1133
+ #endif /* HAVE_SIMD_NEON */
1134
+ #ifdef HAVE_SIMD_NEON
1135
+ if (use_neon) {
1136
+ while (str < end) {
1137
+ const char *chunk_ptr = NULL;
1138
+ if (str + sizeof(uint8x16_t) <= end) {
1139
+ chunk_ptr = str;
1140
+ chunk_start = str;
1141
+ chunk_end = str + sizeof(uint8x16_t);
1142
+ } else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
1143
+ memset(out->cur, 'A', sizeof(uint8x16_t));
1144
+ memcpy(out->cur, str, (end - str));
1145
+ chunk_ptr = out->cur;
1146
+ chunk_start = str;
1147
+ chunk_end = end;
1148
+ } else {
879
1149
  break;
880
1150
  }
881
- str = dump_unicode(str, end, out, orig);
882
- break;
883
- case '6': // control characters
884
- if (*(uint8_t *)str < 0x80) {
885
- if (0 == (uint8_t)*str && out->opts->dump_opts.omit_null_byte) {
886
- break;
887
- }
888
- APPEND_CHARS(out->cur, "\\u00", 4);
889
- dump_hex((uint8_t)*str, out);
890
- } else {
891
- if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
892
- if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
893
- str = dump_unicode(str, end, out, orig);
894
- } else {
895
- check_start = check_unicode(str, end, orig);
896
- *out->cur++ = *str;
1151
+ neon_match_result result = neon_update(chunk_ptr,
1152
+ cmap_neon,
1153
+ neon_table_size,
1154
+ do_unicode_validation,
1155
+ has_hi);
1156
+ if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
1157
+ SEARCH_FLUSH;
1158
+ uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
1159
+ uint8_t num_matches = vaddvq_u8(vandq_u8(result.needs_escape, vdupq_n_u8(0x1)));
1160
+ vst1q_u8((unsigned char *)matches, actions);
1161
+ bool process_each = result.do_unicode_validation || (num_matches > sizeof(uint8x16_t) / 2);
1162
+ // If no byte in this chunk had the high bit set then we can skip
1163
+ // all of the '1' bytes by directly copying them to the output.
1164
+ if (!process_each) {
1165
+ while (str < chunk_end) {
1166
+ long i = str - chunk_start;
1167
+ char action;
1168
+ while (str < chunk_end && (action = matches[i++]) == '1') {
1169
+ *out->cur++ = *str++;
1170
+ }
1171
+ cursor = str;
1172
+ if (str >= chunk_end) {
1173
+ break;
1174
+ }
1175
+ str = process_character(action, str, end, out, orig, do_unicode_validation, &check_start);
1176
+ str++;
1177
+ }
1178
+ } else {
1179
+ while (str < chunk_end) {
1180
+ long match_index = str - chunk_start;
1181
+ str = process_character(matches[match_index],
1182
+ str,
1183
+ end,
1184
+ out,
1185
+ orig,
1186
+ do_unicode_validation,
1187
+ &check_start);
1188
+ str++;
897
1189
  }
898
- break;
899
1190
  }
900
- str = dump_unicode(str, end, out, orig);
1191
+ cursor = str;
1192
+ continue;
901
1193
  }
902
- break;
903
- default: break; // ignore, should never happen if the table is correct
1194
+ str = chunk_end;
904
1195
  }
1196
+ SEARCH_FLUSH;
1197
+ }
1198
+ #endif /* HAVE_SIMD_NEON */
1199
+ for (; str < end; str++) {
1200
+ str = process_character(cmap[(uint8_t)*str], str, end, out, orig, do_unicode_validation, &check_start);
905
1201
  }
906
1202
  *out->cur++ = '"';
907
1203
  }
908
1204
  if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
909
- uint8_t c = (uint8_t) * (str - 1);
1205
+ uint8_t c = (uint8_t)*(str - 1);
910
1206
  int i;
911
1207
  int scnt = (int)(str - orig);
912
1208
 
@@ -957,7 +1253,7 @@ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) {
957
1253
  void oj_dump_obj_to_s(VALUE obj, Out out) {
958
1254
  volatile VALUE rstr = oj_safe_string_convert(obj);
959
1255
 
960
- oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out);
1256
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
961
1257
  }
962
1258
 
963
1259
  void oj_dump_raw(const char *str, size_t cnt, Out out) {
@@ -1092,7 +1388,7 @@ void oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) {
1092
1388
 
1093
1389
  void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
1094
1390
  volatile VALUE rs = rb_big2str(obj, 10);
1095
- int cnt = (int)RSTRING_LEN(rs);
1391
+ size_t cnt = RSTRING_LEN(rs);
1096
1392
  bool dump_as_string = false;
1097
1393
 
1098
1394
  if (out->opts->int_range_max != 0 || out->opts->int_range_min != 0) { // Bignum cannot be inside of Fixnum range
@@ -1114,7 +1410,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1114
1410
  char buf[64];
1115
1411
  char *b;
1116
1412
  double d = rb_num2dbl(obj);
1117
- int cnt = 0;
1413
+ size_t cnt = 0;
1118
1414
 
1119
1415
  if (0.0 == d) {
1120
1416
  b = buf;
@@ -1225,7 +1521,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1225
1521
  } else if (0 == out->opts->float_prec) {
1226
1522
  volatile VALUE rstr = oj_safe_string_convert(obj);
1227
1523
 
1228
- cnt = (int)RSTRING_LEN(rstr);
1524
+ cnt = RSTRING_LEN(rstr);
1229
1525
  if ((int)sizeof(buf) <= cnt) {
1230
1526
  cnt = sizeof(buf) - 1;
1231
1527
  }
@@ -1239,8 +1535,8 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1239
1535
  *out->cur = '\0';
1240
1536
  }
1241
1537
 
1242
- int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1243
- int cnt = snprintf(buf, blen, format, d);
1538
+ size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1539
+ size_t cnt = snprintf(buf, blen, format, d);
1244
1540
 
1245
1541
  // Round off issues at 16 significant digits so check for obvious ones of
1246
1542
  // 0001 and 9999.
@@ -1248,7 +1544,7 @@ int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char
1248
1544
  volatile VALUE rstr = oj_safe_string_convert(obj);
1249
1545
 
1250
1546
  strcpy(buf, RSTRING_PTR(rstr));
1251
- cnt = (int)RSTRING_LEN(rstr);
1547
+ cnt = RSTRING_LEN(rstr);
1252
1548
  }
1253
1549
  return cnt;
1254
1550
  }
data/ext/oj/dump.h CHANGED
@@ -7,12 +7,17 @@
7
7
  #include <ruby.h>
8
8
 
9
9
  #include "oj.h"
10
+ #include "simd.h"
10
11
 
11
12
  #define MAX_DEPTH 1000
12
13
 
13
14
  // Extra padding at end of buffer.
14
15
  #define BUFFER_EXTRA 64
15
16
 
17
+ #ifdef HAVE_SIMD_NEON
18
+ extern void initialize_neon(void);
19
+ #endif /* HAVE_SIMD_NEON */
20
+
16
21
  extern void oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok);
17
22
  extern void oj_dump_true(VALUE obj, int depth, Out out, bool as_ok);
18
23
  extern void oj_dump_false(VALUE obj, int depth, Out out, bool as_ok);
@@ -30,7 +35,7 @@ extern void oj_dump_xml_time(VALUE obj, Out out);
30
35
  extern void oj_dump_time(VALUE obj, Out out, int withZone);
31
36
  extern void oj_dump_obj_to_s(VALUE obj, Out out);
32
37
 
33
- extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, int *lenp);
38
+ extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp);
34
39
 
35
40
  // initialize an out buffer with the provided stack allocated memory
36
41
  extern void oj_out_init(Out out);
@@ -53,7 +58,7 @@ extern void oj_dump_raw_json(VALUE obj, int depth, Out out);
53
58
  extern VALUE oj_add_to_json(int argc, VALUE *argv, VALUE self);
54
59
  extern VALUE oj_remove_to_json(int argc, VALUE *argv, VALUE self);
55
60
 
56
- extern int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format);
61
+ extern size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format);
57
62
 
58
63
  extern time_t oj_sec_from_time_hard_way(VALUE obj);
59
64