oj 3.16.9 → 3.16.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/ext/oj/custom.c +10 -9
  4. data/ext/oj/dump.c +319 -20
  5. data/ext/oj/dump.h +7 -2
  6. data/ext/oj/dump_compat.c +9 -8
  7. data/ext/oj/dump_leaf.c +1 -1
  8. data/ext/oj/dump_object.c +27 -17
  9. data/ext/oj/dump_strict.c +7 -6
  10. data/ext/oj/fast.c +4 -7
  11. data/ext/oj/mimic_json.c +3 -6
  12. data/ext/oj/object.c +8 -8
  13. data/ext/oj/oj.c +12 -9
  14. data/ext/oj/parse.c +15 -5
  15. data/ext/oj/parser.c +1 -1
  16. data/ext/oj/parser.h +2 -0
  17. data/ext/oj/rails.c +20 -19
  18. data/ext/oj/saj.c +3 -6
  19. data/ext/oj/scp.c +3 -6
  20. data/ext/oj/simd.h +10 -0
  21. data/ext/oj/stream_writer.c +1 -7
  22. data/ext/oj/strict.c +2 -4
  23. data/ext/oj/string_writer.c +1 -3
  24. data/ext/oj/wab.c +4 -3
  25. data/lib/oj/version.rb +1 -1
  26. data/pages/Encoding.md +1 -1
  27. metadata +4 -98
  28. data/test/_test_active.rb +0 -75
  29. data/test/_test_active_mimic.rb +0 -95
  30. data/test/_test_mimic_rails.rb +0 -123
  31. data/test/activerecord/result_test.rb +0 -31
  32. data/test/activesupport6/abstract_unit.rb +0 -44
  33. data/test/activesupport6/decoding_test.rb +0 -133
  34. data/test/activesupport6/encoding_test.rb +0 -542
  35. data/test/activesupport6/encoding_test_cases.rb +0 -98
  36. data/test/activesupport6/test_common.rb +0 -17
  37. data/test/activesupport6/test_helper.rb +0 -163
  38. data/test/activesupport6/time_zone_test_helpers.rb +0 -39
  39. data/test/activesupport7/abstract_unit.rb +0 -52
  40. data/test/activesupport7/decoding_test.rb +0 -125
  41. data/test/activesupport7/encoding_test.rb +0 -536
  42. data/test/activesupport7/encoding_test_cases.rb +0 -104
  43. data/test/activesupport7/time_zone_test_helpers.rb +0 -47
  44. data/test/files.rb +0 -29
  45. data/test/foo.rb +0 -26
  46. data/test/helper.rb +0 -39
  47. data/test/isolated/shared.rb +0 -309
  48. data/test/isolated/test_mimic_after.rb +0 -13
  49. data/test/isolated/test_mimic_alone.rb +0 -12
  50. data/test/isolated/test_mimic_as_json.rb +0 -45
  51. data/test/isolated/test_mimic_before.rb +0 -13
  52. data/test/isolated/test_mimic_define.rb +0 -28
  53. data/test/isolated/test_mimic_rails_after.rb +0 -22
  54. data/test/isolated/test_mimic_rails_before.rb +0 -21
  55. data/test/isolated/test_mimic_redefine.rb +0 -15
  56. data/test/json_gem/json_addition_test.rb +0 -216
  57. data/test/json_gem/json_common_interface_test.rb +0 -155
  58. data/test/json_gem/json_encoding_test.rb +0 -107
  59. data/test/json_gem/json_ext_parser_test.rb +0 -21
  60. data/test/json_gem/json_fixtures_test.rb +0 -36
  61. data/test/json_gem/json_generator_test.rb +0 -413
  62. data/test/json_gem/json_generic_object_test.rb +0 -90
  63. data/test/json_gem/json_parser_test.rb +0 -477
  64. data/test/json_gem/json_string_matching_test.rb +0 -42
  65. data/test/json_gem/test_helper.rb +0 -30
  66. data/test/mem.rb +0 -34
  67. data/test/perf.rb +0 -102
  68. data/test/perf_compat.rb +0 -128
  69. data/test/perf_dump.rb +0 -50
  70. data/test/perf_fast.rb +0 -162
  71. data/test/perf_file.rb +0 -62
  72. data/test/perf_object.rb +0 -134
  73. data/test/perf_once.rb +0 -59
  74. data/test/perf_parser.rb +0 -183
  75. data/test/perf_saj.rb +0 -101
  76. data/test/perf_scp.rb +0 -140
  77. data/test/perf_simple.rb +0 -289
  78. data/test/perf_strict.rb +0 -137
  79. data/test/perf_wab.rb +0 -129
  80. data/test/prec.rb +0 -23
  81. data/test/sample/change.rb +0 -13
  82. data/test/sample/dir.rb +0 -18
  83. data/test/sample/doc.rb +0 -35
  84. data/test/sample/file.rb +0 -47
  85. data/test/sample/group.rb +0 -15
  86. data/test/sample/hasprops.rb +0 -15
  87. data/test/sample/layer.rb +0 -11
  88. data/test/sample/line.rb +0 -20
  89. data/test/sample/oval.rb +0 -10
  90. data/test/sample/rect.rb +0 -9
  91. data/test/sample/shape.rb +0 -34
  92. data/test/sample/text.rb +0 -19
  93. data/test/sample.rb +0 -54
  94. data/test/sample_json.rb +0 -37
  95. data/test/test_compat.rb +0 -567
  96. data/test/test_custom.rb +0 -555
  97. data/test/test_debian.rb +0 -50
  98. data/test/test_fast.rb +0 -526
  99. data/test/test_file.rb +0 -250
  100. data/test/test_gc.rb +0 -60
  101. data/test/test_generate.rb +0 -21
  102. data/test/test_hash.rb +0 -39
  103. data/test/test_integer_range.rb +0 -72
  104. data/test/test_null.rb +0 -376
  105. data/test/test_object.rb +0 -1030
  106. data/test/test_parser.rb +0 -11
  107. data/test/test_parser_debug.rb +0 -27
  108. data/test/test_parser_saj.rb +0 -337
  109. data/test/test_parser_usual.rb +0 -255
  110. data/test/test_rails.rb +0 -35
  111. data/test/test_saj.rb +0 -188
  112. data/test/test_scp.rb +0 -431
  113. data/test/test_strict.rb +0 -441
  114. data/test/test_various.rb +0 -801
  115. data/test/test_wab.rb +0 -311
  116. data/test/test_writer.rb +0 -396
  117. data/test/tests.rb +0 -33
  118. data/test/tests_mimic.rb +0 -23
  119. data/test/tests_mimic_addition.rb +0 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3e57c02a1fe6782596953f34b8e2a2b729a09b5d8e7128dd4633d430ca7aa0c
4
- data.tar.gz: 7698f8c0203459d62f4421f11a9c3637b04b5b808ecf87ce4ca057e2fd073762
3
+ metadata.gz: 30aea721380a4e3edc306dd19906d8777f230a639ba3427e9394dd543a3a7e3b
4
+ data.tar.gz: b024a9d4513c16c1bfe4fc3c4adeacb1afd9a0e670987476d32cfa2fa74e9b1e
5
5
  SHA512:
6
- metadata.gz: 417ada5b645a6ba48e81b52bb72cec97bb4a64595a61252989346a975c1026b3cbc039cbf7cef166b5dbfbdd79554d5c9e5786da3299ce1fd3f2ec70d3ef479f
7
- data.tar.gz: ceffb29c6732b107d42091bc8754e8b4174e26e6d8eb9c4162ed8a12a65d37aa2450f9f5398d39242d92fabd46d63be5fc14e0dc003774378e4e6211b02e3f0d
6
+ metadata.gz: 527ea1162cb135bbe16eefc10a7cb05444182767aca6fa0b6986622e52d7082bcec020c43e663251406c81602018f7d0842c2c5cee37aeca0269560e502d99dd
7
+ data.tar.gz: e49e9f63e373cb0ec21f604f97899f87815b86ef5a5eafad30e7bddbd11e71156f92beaa1259c83609c2d45d2a8aac87c8b27e3e266fcc2bd99a1908327c796d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 3.16.11 - 2025-05-29
4
+
5
+ - Fixed range encoding with the :circular option
6
+
7
+ ## 3.16.10 - 2025-02-24
8
+
9
+ - Changed oj_parser_type to be non-static.
10
+
11
+ - Changed ARM versions to used Neon instructions thanks to @samyron.
12
+
3
13
  ## 3.16.9 - 2024-12-28
4
14
 
5
15
  - Fixed `Oj::Parser` create_id size issue #931.
@@ -64,6 +74,7 @@
64
74
  ## 3.14.3 - 2023-04-07
65
75
 
66
76
  - Fixed compat parse with optimized Hash when parsing a JSON::GenericObject.
77
+ - Deprecated Ruby <= 2.6.10
67
78
 
68
79
  ## 3.14.2 - 2023-02-10
69
80
 
data/ext/oj/custom.c CHANGED
@@ -40,7 +40,7 @@ static void dump_obj_as_str(VALUE obj, int depth, Out out) {
40
40
  static void bigdecimal_dump(VALUE obj, int depth, Out out) {
41
41
  volatile VALUE rstr = oj_safe_string_convert(obj);
42
42
  const char *str = RSTRING_PTR(rstr);
43
- int len = (int)RSTRING_LEN(rstr);
43
+ size_t len = RSTRING_LEN(rstr);
44
44
 
45
45
  if (0 == strcasecmp("Infinity", str)) {
46
46
  str = oj_nan_str(obj, out->opts->dump_opts.nan_dump, out->opts->mode, true, &len);
@@ -123,7 +123,7 @@ static void date_dump(VALUE obj, int depth, Out out) {
123
123
  case RubyTime:
124
124
  case XmlTime:
125
125
  v = rb_funcall(obj, rb_intern("iso8601"), 0);
126
- oj_dump_cstr(RSTRING_PTR(v), (int)RSTRING_LEN(v), 0, 0, out);
126
+ oj_dump_cstr(RSTRING_PTR(v), RSTRING_LEN(v), 0, 0, out);
127
127
  break;
128
128
  case UnixZTime:
129
129
  v = rb_funcall(obj, rb_intern("to_time"), 0);
@@ -405,7 +405,7 @@ static void dump_odd(VALUE obj, Odd odd, VALUE clas, int depth, Out out) {
405
405
  rb_raise(rb_eEncodingError, "Invalid type for raw JSON.\n");
406
406
  } else {
407
407
  const char *s = RSTRING_PTR(v);
408
- int len = (int)RSTRING_LEN(v);
408
+ size_t len = RSTRING_LEN(v);
409
409
  const char *name = rb_id2name(*odd->attrs);
410
410
  size_t nlen = strlen(name);
411
411
 
@@ -478,7 +478,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) {
478
478
  } else if (Yes == out->opts->to_json && rb_respond_to(obj, oj_to_json_id)) {
479
479
  volatile VALUE rs;
480
480
  const char *s;
481
- int len;
481
+ size_t len;
482
482
 
483
483
  TRACE(out->opts->trace, "to_json", obj, depth + 1, TraceRubyIn);
484
484
  if (0 == rb_obj_method_arity(obj, oj_to_json_id)) {
@@ -488,7 +488,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) {
488
488
  }
489
489
  TRACE(out->opts->trace, "to_json", obj, depth + 1, TraceRubyOut);
490
490
  s = RSTRING_PTR(rs);
491
- len = (int)RSTRING_LEN(rs);
491
+ len = RSTRING_LEN(rs);
492
492
 
493
493
  assure_size(out, len + 1);
494
494
  APPEND_CHARS(out->cur, s, len);
@@ -509,7 +509,7 @@ static VALUE dump_common(VALUE obj, int depth, Out out) {
509
509
  if (aj == obj) {
510
510
  volatile VALUE rstr = oj_safe_string_convert(obj);
511
511
 
512
- oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), false, false, out);
512
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), false, false, out);
513
513
  } else {
514
514
  oj_dump_custom_val(aj, depth, out, true);
515
515
  }
@@ -676,7 +676,8 @@ static void dump_obj(VALUE obj, int depth, Out out, bool as_ok) {
676
676
 
677
677
  static void dump_array(VALUE a, int depth, Out out, bool as_ok) {
678
678
  size_t size;
679
- int i, cnt;
679
+ size_t i;
680
+ size_t cnt;
680
681
  int d2 = depth + 1;
681
682
  long id = oj_check_circular(a, out);
682
683
 
@@ -684,7 +685,7 @@ static void dump_array(VALUE a, int depth, Out out, bool as_ok) {
684
685
  oj_dump_nil(Qnil, depth, out, false);
685
686
  return;
686
687
  }
687
- cnt = (int)RARRAY_LEN(a);
688
+ cnt = RARRAY_LEN(a);
688
689
  *out->cur++ = '[';
689
690
  assure_size(out, 2);
690
691
  if (0 == cnt) {
@@ -795,7 +796,7 @@ static void dump_struct(VALUE obj, int depth, Out out, bool as_ok) {
795
796
  volatile VALUE s = rb_sym2str(RARRAY_AREF(ma, i));
796
797
 
797
798
  name = RSTRING_PTR(s);
798
- len = (int)RSTRING_LEN(s);
799
+ len = RSTRING_LEN(s);
799
800
  } else {
800
801
  len = snprintf(num_id, sizeof(num_id), "%d", i);
801
802
  name = num_id;
data/ext/oj/dump.c CHANGED
@@ -152,8 +152,77 @@ inline static size_t newline_friendly_size(const uint8_t *str, size_t len) {
152
152
  return calculate_string_size(str, len, newline_friendly_chars);
153
153
  }
154
154
 
155
+ #ifdef HAVE_SIMD_NEON
156
+ inline static uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
157
+ uint8x16x4_t tab;
158
+ tab.val[0] = vld1q_u8(table);
159
+ tab.val[1] = vld1q_u8(table + 16);
160
+ tab.val[2] = vld1q_u8(table + 32);
161
+ tab.val[3] = vld1q_u8(table + 48);
162
+ return tab;
163
+ }
164
+
165
+ static uint8x16x4_t hibit_friendly_chars_neon[2];
166
+ static uint8x16x4_t rails_friendly_chars_neon[2];
167
+ static uint8x16x4_t rails_xss_friendly_chars_neon[4];
168
+
169
+ void initialize_neon(void) {
170
+ // We only need the first 128 bytes of the hibit friendly chars table. Everything above 127 is
171
+ // set to 1. If that ever changes, the code will need to be updated.
172
+ hibit_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars);
173
+ hibit_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars + 64);
174
+
175
+ // rails_friendly_chars is the same as hibit_friendly_chars. Only the first 128 bytes have values
176
+ // that are not '1'. If that ever changes, the code will need to be updated.
177
+ rails_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_friendly_chars);
178
+ rails_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_friendly_chars + 64);
179
+
180
+ rails_xss_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars);
181
+ rails_xss_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 64);
182
+ rails_xss_friendly_chars_neon[2] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 128);
183
+ rails_xss_friendly_chars_neon[3] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 192);
184
+
185
+ // All bytes should be 0 except for those that need more than 1 byte of output. This will allow the
186
+ // code to limit the lookups to the first 128 bytes (values 0 - 127). Bytes above 127 will result
187
+ // in 0 with the vqtbl4q_u8 instruction.
188
+ uint8x16_t one = vdupq_n_u8('1');
189
+ for (int i = 0; i < 2; i++) {
190
+ for (int j = 0; j < 4; j++) {
191
+ hibit_friendly_chars_neon[i].val[j] = vsubq_u8(hibit_friendly_chars_neon[i].val[j], one);
192
+ rails_friendly_chars_neon[i].val[j] = vsubq_u8(rails_friendly_chars_neon[i].val[j], one);
193
+ }
194
+ }
195
+
196
+ for (int i = 0; i < 4; i++) {
197
+ for (int j = 0; j < 4; j++) {
198
+ rails_xss_friendly_chars_neon[i].val[j] = vsubq_u8(rails_xss_friendly_chars_neon[i].val[j], one);
199
+ }
200
+ }
201
+ }
202
+ #endif
203
+
155
204
  inline static size_t hibit_friendly_size(const uint8_t *str, size_t len) {
205
+ #ifdef HAVE_SIMD_NEON
206
+ size_t size = 0;
207
+ size_t i = 0;
208
+
209
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
210
+ size += sizeof(uint8x16_t);
211
+
212
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
213
+ uint8x16_t chunk = vld1q_u8(str);
214
+ uint8x16_t tmp1 = vqtbl4q_u8(hibit_friendly_chars_neon[0], chunk);
215
+ uint8x16_t tmp2 = vqtbl4q_u8(hibit_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
216
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
217
+ uint8_t tmp = vaddvq_u8(result);
218
+ size += tmp;
219
+ }
220
+
221
+ size_t total = size + calculate_string_size(str, len - i, hibit_friendly_chars);
222
+ return total;
223
+ #else
156
224
  return calculate_string_size(str, len, hibit_friendly_chars);
225
+ #endif
157
226
  }
158
227
 
159
228
  inline static size_t slash_friendly_size(const uint8_t *str, size_t len) {
@@ -184,9 +253,43 @@ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) {
184
253
 
185
254
  inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
186
255
  long size = 0;
187
- size_t i = len;
188
256
  uint8_t hi = 0;
189
257
 
258
+ #ifdef HAVE_SIMD_NEON
259
+ size_t i = 0;
260
+
261
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
262
+ uint8x16_t hibit = vdupq_n_u8(0x80);
263
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
264
+ size += sizeof(uint8x16_t);
265
+
266
+ uint8x16_t chunk = vld1q_u8(str);
267
+
268
+ // Check to see if any of these bytes have the high bit set.
269
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
270
+
271
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
272
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
273
+ uint8x16_t tmp3 = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
274
+ uint8x16_t tmp4 = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
275
+ uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
276
+ uint8_t tmp = vaddvq_u8(result);
277
+ size += tmp;
278
+ }
279
+
280
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
281
+ hi = vmaxvq_u8(has_some_hibit) != 0;
282
+
283
+ for (; i < len; str++, i++) {
284
+ size += rails_xss_friendly_chars[*str] - '0';
285
+ hi |= *str & 0x80;
286
+ }
287
+ if (0 == hi) {
288
+ return size;
289
+ }
290
+ return -(size);
291
+ #else
292
+ size_t i = len;
190
293
  for (; 0 < i; str++, i--) {
191
294
  size += rails_xss_friendly_chars[*str];
192
295
  hi |= *str & 0x80;
@@ -195,13 +298,47 @@ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
195
298
  return size - len * (size_t)'0';
196
299
  }
197
300
  return -(size - len * (size_t)'0');
301
+ #endif /* HAVE_SIMD_NEON */
198
302
  }
199
303
 
200
304
  inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
201
305
  long size = 0;
202
- size_t i = len;
203
306
  uint8_t hi = 0;
307
+ #ifdef HAVE_SIMD_NEON
308
+ size_t i = 0;
309
+
310
+ uint8x16_t has_some_hibit = vdupq_n_u8(0);
311
+ uint8x16_t hibit = vdupq_n_u8(0x80);
312
+
313
+ for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
314
+ size += sizeof(uint8x16_t);
315
+
316
+ // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
317
+ uint8x16_t chunk = vld1q_u8(str);
318
+
319
+ // Check to see if any of these bytes have the high bit set.
320
+ has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
204
321
 
322
+ uint8x16_t tmp1 = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
323
+ uint8x16_t tmp2 = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
324
+ uint8x16_t result = vorrq_u8(tmp1, tmp2);
325
+ uint8_t tmp = vaddvq_u8(result);
326
+ size += tmp;
327
+ }
328
+
329
+ // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
330
+ hi = vmaxvq_u8(has_some_hibit) != 0;
331
+
332
+ for (; i < len; str++, i++) {
333
+ size += rails_friendly_chars[*str] - '0';
334
+ hi |= *str & 0x80;
335
+ }
336
+ if (0 == hi) {
337
+ return size;
338
+ }
339
+ return -(size);
340
+ #else
341
+ size_t i = len;
205
342
  for (; 0 < i; str++, i--) {
206
343
  size += rails_friendly_chars[*str];
207
344
  hi |= *str & 0x80;
@@ -210,9 +347,10 @@ inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
210
347
  return size - len * (size_t)'0';
211
348
  }
212
349
  return -(size - len * (size_t)'0');
350
+ #endif /* HAVE_SIMD_NEON */
213
351
  }
214
352
 
215
- const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, int *lenp) {
353
+ const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp) {
216
354
  const char *str = NULL;
217
355
 
218
356
  if (AutoNan == opt) {
@@ -477,7 +615,7 @@ void oj_dump_time(VALUE obj, Out out, int withZone) {
477
615
  void oj_dump_ruby_time(VALUE obj, Out out) {
478
616
  volatile VALUE rstr = oj_safe_string_convert(obj);
479
617
 
480
- oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out);
618
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
481
619
  }
482
620
 
483
621
  void oj_dump_xml_time(VALUE obj, Out out) {
@@ -711,13 +849,13 @@ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) {
711
849
  rb_encoding *enc = rb_enc_from_index(idx);
712
850
  obj = rb_str_conv_enc(obj, enc, oj_utf8_encoding);
713
851
  }
714
- oj_dump_cstr(RSTRING_PTR(obj), (int)RSTRING_LEN(obj), 0, 0, out);
852
+ oj_dump_cstr(RSTRING_PTR(obj), RSTRING_LEN(obj), 0, 0, out);
715
853
  }
716
854
 
717
855
  void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
718
856
  volatile VALUE s = rb_sym2str(obj);
719
857
 
720
- oj_dump_cstr(RSTRING_PTR(s), (int)RSTRING_LEN(s), 0, 0, out);
858
+ oj_dump_cstr(RSTRING_PTR(s), RSTRING_LEN(s), 0, 0, out);
721
859
  }
722
860
 
723
861
  static void debug_raise(const char *orig, size_t cnt, int line) {
@@ -758,9 +896,49 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) {
758
896
  }
759
897
  }
760
898
 
899
+ #ifdef HAVE_SIMD_NEON
900
+ typedef struct _neon_match_result {
901
+ uint8x16_t needs_escape;
902
+ bool has_some_hibit;
903
+ bool do_unicode_validation;
904
+ } neon_match_result;
905
+
906
+ #if defined(__clang__) || defined(__GNUC__)
907
+ #define FORCE_INLINE __attribute__((always_inline))
908
+ #else
909
+ #define FORCE_INLINE
910
+ #endif
911
+
912
+ static inline FORCE_INLINE neon_match_result
913
+ neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
914
+ neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
915
+
916
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)str);
917
+ uint8x16_t tmp1 = vqtbl4q_u8(cmap_neon[0], chunk);
918
+ uint8x16_t tmp2 = vqtbl4q_u8(cmap_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
919
+ result.needs_escape = vorrq_u8(tmp1, tmp2);
920
+ if (neon_table_size > 2) {
921
+ uint8x16_t tmp3 = vqtbl4q_u8(cmap_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
922
+ uint8x16_t tmp4 = vqtbl4q_u8(cmap_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
923
+ result.needs_escape = vorrq_u8(result.needs_escape, vorrq_u8(tmp4, tmp3));
924
+ }
925
+ if (has_hi && do_unicode_validation) {
926
+ uint8x16_t has_some_hibit = vandq_u8(chunk, vdupq_n_u8(0x80));
927
+ result.has_some_hibit = vmaxvq_u8(has_some_hibit) != 0;
928
+ result.do_unicode_validation = has_hi && do_unicode_validation && result.has_some_hibit;
929
+ }
930
+ return result;
931
+ }
932
+
933
+ #endif /* HAVE_SIMD_NEON */
934
+
761
935
  void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
762
- size_t size;
763
- char *cmap;
936
+ size_t size;
937
+ char *cmap;
938
+ #ifdef HAVE_SIMD_NEON
939
+ uint8x16x4_t *cmap_neon = NULL;
940
+ int neon_table_size;
941
+ #endif /* HAVE_SIMD_NEON */
764
942
  const char *orig = str;
765
943
  bool has_hi = false;
766
944
  bool do_unicode_validation = false;
@@ -792,7 +970,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
792
970
  long sz;
793
971
 
794
972
  cmap = rails_xss_friendly_chars;
795
- sz = rails_xss_friendly_size((uint8_t *)str, cnt);
973
+ #ifdef HAVE_SIMD_NEON
974
+ cmap_neon = rails_xss_friendly_chars_neon;
975
+ neon_table_size = 4;
976
+ #endif /* HAVE_NEON_SIMD */
977
+ sz = rails_xss_friendly_size((uint8_t *)str, cnt);
796
978
  if (sz < 0) {
797
979
  has_hi = true;
798
980
  size = (size_t)-sz;
@@ -805,7 +987,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
805
987
  case RailsEsc: {
806
988
  long sz;
807
989
  cmap = rails_friendly_chars;
808
- sz = rails_friendly_size((uint8_t *)str, cnt);
990
+ #ifdef HAVE_SIMD_NEON
991
+ cmap_neon = rails_friendly_chars_neon;
992
+ neon_table_size = 2;
993
+ #endif /* HAVE_NEON_SIMD */
994
+ sz = rails_friendly_size((uint8_t *)str, cnt);
809
995
  if (sz < 0) {
810
996
  has_hi = true;
811
997
  size = (size_t)-sz;
@@ -816,7 +1002,12 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
816
1002
  break;
817
1003
  }
818
1004
  case JSONEsc:
819
- default: cmap = hibit_friendly_chars; size = hibit_friendly_size((uint8_t *)str, cnt);
1005
+ default: cmap = hibit_friendly_chars;
1006
+ #ifdef HAVE_SIMD_NEON
1007
+ cmap_neon = hibit_friendly_chars_neon;
1008
+ neon_table_size = 2;
1009
+ #endif /* HAVE_NEON_SIMD */
1010
+ size = hibit_friendly_size((uint8_t *)str, cnt);
820
1011
  }
821
1012
  assure_size(out, size + BUFFER_EXTRA);
822
1013
  *out->cur++ = '"';
@@ -842,8 +1033,116 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
842
1033
  if (is_sym) {
843
1034
  *out->cur++ = ':';
844
1035
  }
1036
+ #ifdef HAVE_SIMD_NEON
1037
+ const char *chunk_start;
1038
+ const char *chunk_end;
1039
+ const char *cursor = str;
1040
+ int neon_state = (cmap_neon != NULL) ? 1 : 4;
1041
+ char matches[16];
1042
+ bool do_hi_validation = false;
1043
+ // uint64_t neon_match_mask = 0;
1044
+ #define SEARCH_FLUSH \
1045
+ if (str > cursor) { \
1046
+ APPEND_CHARS(out->cur, cursor, str - cursor); \
1047
+ cursor = str; \
1048
+ }
1049
+
1050
+ loop:
1051
+ #endif /* HAVE_SIMD_NEON */
845
1052
  for (; str < end; str++) {
846
- switch (cmap[(uint8_t)*str]) {
1053
+ char action = 0;
1054
+ #ifdef HAVE_SIMD_NEON
1055
+ /* neon_state:
1056
+ * 1: Scanning for matches. There must be at least
1057
+ sizeof(uint8x16_t) bytes of input data to use SIMD and
1058
+ cmap_neon must be non-null.
1059
+ * 2: Matches have been found. Will set str to the position of the
1060
+ * next match and set the state to 3.
1061
+ * If there are no more matches it will transition to state 1.
1062
+ * 4: Fallback to the scalar algorithm. Not enough data to use
1063
+ * SIMD.
1064
+ */
1065
+ #define NEON_SET_STATE(state) \
1066
+ neon_state = state; \
1067
+ goto loop;
1068
+ #define NEON_RETURN_TO_STATE(state) neon_state = state;
1069
+ switch (neon_state) {
1070
+ case 1: {
1071
+ while (true) {
1072
+ const char *chunk_ptr = NULL;
1073
+ if (str + sizeof(uint8x16_t) <= end) {
1074
+ chunk_ptr = str;
1075
+ chunk_start = str;
1076
+ chunk_end = str + sizeof(uint8x16_t);
1077
+ } else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
1078
+ memset(out->cur, 'A', sizeof(uint8x16_t));
1079
+ memcpy(out->cur, str, (end - str));
1080
+ chunk_ptr = out->cur;
1081
+ chunk_start = str;
1082
+ chunk_end = end;
1083
+ } else {
1084
+ SEARCH_FLUSH;
1085
+ NEON_SET_STATE(4);
1086
+ break; /* Unreachable */
1087
+ }
1088
+ neon_match_result result = neon_update(chunk_ptr,
1089
+ cmap_neon,
1090
+ neon_table_size,
1091
+ do_unicode_validation,
1092
+ has_hi);
1093
+ if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
1094
+ SEARCH_FLUSH;
1095
+ uint8x16_t actions = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
1096
+ do_hi_validation = result.do_unicode_validation;
1097
+ vst1q_u8((unsigned char *)matches, actions);
1098
+ NEON_SET_STATE(2);
1099
+ break; /* Unreachable */
1100
+ }
1101
+ str = chunk_end;
1102
+ }
1103
+ // We must have run out of data to use SIMD. Go to state 4.
1104
+ SEARCH_FLUSH;
1105
+ NEON_SET_STATE(4);
1106
+ } break;
1107
+ case 3:
1108
+ cursor = str;
1109
+ // This fall through is intentional. We return to state 3 after we process
1110
+ // a byte (or multiple). We return to this state to ensure the cursor is
1111
+ // pointing to the correct location. We then resume looking for matches
1112
+ // within the previously processed chunk.
1113
+ case 2:
1114
+ if (str >= chunk_end) {
1115
+ NEON_SET_STATE(1);
1116
+ }
1117
+ if (!do_hi_validation) {
1118
+ long i = str - chunk_start;
1119
+ for (; str < chunk_end; i++) {
1120
+ if ((action = matches[i]) != '1') {
1121
+ break;
1122
+ }
1123
+ *out->cur++ = *str++;
1124
+ }
1125
+ // The loop above may have advanced str and directly output them to out->cur.
1126
+ // Ensure cursor is set appropriately.
1127
+ cursor = str;
1128
+ if (str >= chunk_end) {
1129
+ // We must have advanced past the end... we are done.
1130
+ NEON_SET_STATE(1);
1131
+ }
1132
+ } else {
1133
+ long match_index = str - chunk_start;
1134
+ action = matches[match_index];
1135
+ }
1136
+ NEON_RETURN_TO_STATE(3);
1137
+ break;
1138
+ case 4: action = cmap[(uint8_t)*str];
1139
+ }
1140
+ #undef NEON_SET_STATE
1141
+ #undef NEON_RETURN_TO_STATE
1142
+ #else
1143
+ action = cmap[(uint8_t)*str];
1144
+ #endif /* HAVE_SIMD_NEON */
1145
+ switch (action) {
847
1146
  case '1':
848
1147
  if (do_unicode_validation && check_start <= str) {
849
1148
  if (0 != (0x80 & (uint8_t)*str)) {
@@ -906,7 +1205,7 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
906
1205
  *out->cur++ = '"';
907
1206
  }
908
1207
  if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
909
- uint8_t c = (uint8_t) * (str - 1);
1208
+ uint8_t c = (uint8_t)*(str - 1);
910
1209
  int i;
911
1210
  int scnt = (int)(str - orig);
912
1211
 
@@ -957,7 +1256,7 @@ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) {
957
1256
  void oj_dump_obj_to_s(VALUE obj, Out out) {
958
1257
  volatile VALUE rstr = oj_safe_string_convert(obj);
959
1258
 
960
- oj_dump_cstr(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), 0, 0, out);
1259
+ oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
961
1260
  }
962
1261
 
963
1262
  void oj_dump_raw(const char *str, size_t cnt, Out out) {
@@ -1092,7 +1391,7 @@ void oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) {
1092
1391
 
1093
1392
  void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
1094
1393
  volatile VALUE rs = rb_big2str(obj, 10);
1095
- int cnt = (int)RSTRING_LEN(rs);
1394
+ size_t cnt = RSTRING_LEN(rs);
1096
1395
  bool dump_as_string = false;
1097
1396
 
1098
1397
  if (out->opts->int_range_max != 0 || out->opts->int_range_min != 0) { // Bignum cannot be inside of Fixnum range
@@ -1114,7 +1413,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1114
1413
  char buf[64];
1115
1414
  char *b;
1116
1415
  double d = rb_num2dbl(obj);
1117
- int cnt = 0;
1416
+ size_t cnt = 0;
1118
1417
 
1119
1418
  if (0.0 == d) {
1120
1419
  b = buf;
@@ -1225,7 +1524,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1225
1524
  } else if (0 == out->opts->float_prec) {
1226
1525
  volatile VALUE rstr = oj_safe_string_convert(obj);
1227
1526
 
1228
- cnt = (int)RSTRING_LEN(rstr);
1527
+ cnt = RSTRING_LEN(rstr);
1229
1528
  if ((int)sizeof(buf) <= cnt) {
1230
1529
  cnt = sizeof(buf) - 1;
1231
1530
  }
@@ -1239,8 +1538,8 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) {
1239
1538
  *out->cur = '\0';
1240
1539
  }
1241
1540
 
1242
- int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1243
- int cnt = snprintf(buf, blen, format, d);
1541
+ size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
1542
+ size_t cnt = snprintf(buf, blen, format, d);
1244
1543
 
1245
1544
  // Round off issues at 16 significant digits so check for obvious ones of
1246
1545
  // 0001 and 9999.
@@ -1248,7 +1547,7 @@ int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char
1248
1547
  volatile VALUE rstr = oj_safe_string_convert(obj);
1249
1548
 
1250
1549
  strcpy(buf, RSTRING_PTR(rstr));
1251
- cnt = (int)RSTRING_LEN(rstr);
1550
+ cnt = RSTRING_LEN(rstr);
1252
1551
  }
1253
1552
  return cnt;
1254
1553
  }
data/ext/oj/dump.h CHANGED
@@ -7,12 +7,17 @@
7
7
  #include <ruby.h>
8
8
 
9
9
  #include "oj.h"
10
+ #include "simd.h"
10
11
 
11
12
  #define MAX_DEPTH 1000
12
13
 
13
14
  // Extra padding at end of buffer.
14
15
  #define BUFFER_EXTRA 64
15
16
 
17
+ #ifdef HAVE_SIMD_NEON
18
+ extern void initialize_neon(void);
19
+ #endif /* HAVE_SIMD_NEON */
20
+
16
21
  extern void oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok);
17
22
  extern void oj_dump_true(VALUE obj, int depth, Out out, bool as_ok);
18
23
  extern void oj_dump_false(VALUE obj, int depth, Out out, bool as_ok);
@@ -30,7 +35,7 @@ extern void oj_dump_xml_time(VALUE obj, Out out);
30
35
  extern void oj_dump_time(VALUE obj, Out out, int withZone);
31
36
  extern void oj_dump_obj_to_s(VALUE obj, Out out);
32
37
 
33
- extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, int *lenp);
38
+ extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp);
34
39
 
35
40
  // initialize an out buffer with the provided stack allocated memory
36
41
  extern void oj_out_init(Out out);
@@ -53,7 +58,7 @@ extern void oj_dump_raw_json(VALUE obj, int depth, Out out);
53
58
  extern VALUE oj_add_to_json(int argc, VALUE *argv, VALUE self);
54
59
  extern VALUE oj_remove_to_json(int argc, VALUE *argv, VALUE self);
55
60
 
56
- extern int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format);
61
+ extern size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format);
57
62
 
58
63
  extern time_t oj_sec_from_time_hard_way(VALUE obj);
59
64