json 2.12.2 → 2.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b76bc749fd1bc82cd84df8c7d14317305d5426e0962d0e034864b218e952e474
4
- data.tar.gz: 8e8d9179b6ca7ce69c7a281691973dc9cb1e6b147a5812b96d02c3b41d8f8cec
3
+ metadata.gz: 71499860706a6f27871853ec88fe26d5bd6a53f85fba2e9764b9ef0aadc170d9
4
+ data.tar.gz: c07cd26190c4f36864490465890c162e1b9ce0ae1f262069e51c94d7cf74b117
5
5
  SHA512:
6
- metadata.gz: a241873428f8de2106604f3a31484aa243b6f62c6c6f38f8e381669f937b19f03b35add48ddf4f1d24c5a9f469c2dbdc8bafca36d8f3bcf678313ad8d9ea11d3
7
- data.tar.gz: 99d07dbf3dcacda3cf662ee05bc181aa3ca6e43ce8779b9f5edc454fba8e718abe876eb81b4918e0c0bb1a3b4286e1d85df086425e552743420942a8e0098f0e
6
+ metadata.gz: f1c626d30c67e99c56d9f411b7944b6263261676567c02f6d57ba7566087743b26835d3c6c4c1636a0dc76cc2b8c4e2739a1130f286cd069406c38c66138df97
7
+ data.tar.gz: eb4fd62079fc730962359e4128f7abd36ca04c17b580506c3e8e3ebdd0e8e0c54c3cf75d1ae899dc53e13697bfa0c11c6c04277d701cfc83fa9ee611785ac85e
data/CHANGES.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  ### Unreleased
4
4
 
5
+ ### 2025-07-28 (2.13.2)
6
+
7
+ * Improve duplicate key warning and errors to include the key name and point to the right caller.
8
+
9
+ ### 2025-07-24 (2.13.1)
10
+
11
+ * Fix support for older compilers without `__builtin_cpu_supports`.
12
+
13
+ ### 2025-07-17 (2.13.0)
14
+
15
+ * Add new `allow_duplicate_key` parsing options. By default a warning is now emitted when a duplicated key is encountered.
16
+ In `json 3.0` an error will be raised.
17
+ * Optimize parsing further using SIMD to scan strings.
18
+
5
19
  ### 2025-05-23 (2.12.2)
6
20
 
7
21
  * Fix compiler optimization level.
@@ -9,31 +9,7 @@ else
9
9
  $defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]
10
10
 
11
11
  if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
12
- if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
13
- # Try to compile a small program using NEON instructions
14
- if have_header('arm_neon.h')
15
- have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
16
- #include <arm_neon.h>
17
- int main() {
18
- uint8x16_t test = vdupq_n_u8(32);
19
- return 0;
20
- }
21
- SRC
22
- $defs.push("-DJSON_ENABLE_SIMD")
23
- end
24
- end
25
-
26
- if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
27
- #include <x86intrin.h>
28
- int main() {
29
- __m128i test = _mm_set1_epi8(32);
30
- return 0;
31
- }
32
- SRC
33
- $defs.push("-DJSON_ENABLE_SIMD")
34
- end
35
-
36
- have_header('cpuid.h')
12
+ load __dir__ + "/../simd/conf.rb"
37
13
  end
38
14
 
39
15
  create_makefile 'json/ext/generator'
@@ -5,7 +5,7 @@
5
5
  #include <math.h>
6
6
  #include <ctype.h>
7
7
 
8
- #include "simd.h"
8
+ #include "../simd/simd.h"
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
@@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304
304
  return 1;
305
305
  }
306
306
 
307
- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
- static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
- {
310
- const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
- const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
- return mask & 0x8888888888888888ull;
313
- }
314
-
315
- static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
- {
317
- uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
-
319
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
- const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
-
323
- uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
- uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
-
326
- return neon_match_mask(needs_escape);
327
- }
328
-
329
307
  static inline unsigned char search_escape_basic_neon(search_state *search)
330
308
  {
331
309
  if (RB_UNLIKELY(search->has_matches)) {
@@ -333,7 +311,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
333
311
  if (search->matches_mask > 0) {
334
312
  return neon_next_match(search);
335
313
  } else {
336
- // neon_next_match will only advance search->ptr up to the last matching character.
314
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
315
  // Skip over any characters in the last chunk that occur after the last match.
338
316
  search->has_matches = false;
339
317
  search->ptr = search->chunk_end;
@@ -342,67 +320,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
342
320
 
343
321
  /*
344
322
  * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
- * need to be escaped.
346
- *
323
+ * need to be escaped.
324
+ *
347
325
  * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
- *
326
+ *
349
327
  * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
328
  * the vector insructions may work on larger vectors.
351
- *
329
+ *
352
330
  * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
- *
354
- * lower_bound: [20 20 20 20 20 20 20 20]
355
- * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
- * dblquote: [22 22 22 22 22 22 22 22]
357
- *
358
- * Next we load the first chunk of the ptr:
331
+ *
332
+ * lower_bound: [20 20 20 20 20 20 20 20]
333
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
334
+ * dblquote: [22 22 22 22 22 22 22 22]
335
+ *
336
+ * Next we load the first chunk of the ptr:
359
337
  * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
- *
338
+ *
361
339
  * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
340
  * as no bytes are less than 32 (0x20):
363
341
  * [0 0 0 0 0 0 0 0]
364
- *
342
+ *
365
343
  * Next, we check if any byte in chunk is equal to a backslash:
366
344
  * [0 0 0 FF 0 0 0 0]
367
- *
345
+ *
368
346
  * Finally we check if any byte in chunk is equal to a double quote:
369
- * [FF 0 0 0 0 0 0 0]
370
- *
347
+ * [FF 0 0 0 0 0 0 0]
348
+ *
371
349
  * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
350
  * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
351
  * This is the needs_escape vector and it is equal to:
374
- * [FF 0 0 FF 0 0 0 0]
375
- *
352
+ * [FF 0 0 FF 0 0 0 0]
353
+ *
376
354
  * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
355
  * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
- *
356
+ *
379
357
  * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
358
  * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
359
  * have at least one byte that needs to be escaped.
382
360
  */
383
- while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
- uint64_t mask = neon_rules_update(search->ptr);
385
361
 
386
- if (!mask) {
387
- search->ptr += sizeof(uint8x16_t);
388
- continue;
389
- }
390
- search->matches_mask = mask;
362
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
391
363
  search->has_matches = true;
392
364
  search->chunk_base = search->ptr;
393
365
  search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
366
  return neon_next_match(search);
395
367
  }
396
368
 
397
- // There are fewer than 16 bytes left.
369
+ // There are fewer than 16 bytes left.
398
370
  unsigned long remaining = (search->end - search->ptr);
399
371
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
372
  char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
373
 
402
- uint64_t mask = neon_rules_update(s);
374
+ uint64_t mask = compute_chunk_mask_neon(s);
403
375
 
404
376
  if (!mask) {
405
- // Nothing to escape, ensure search_flush doesn't do anything by setting
377
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
378
  // search->cursor to search->ptr.
407
379
  fbuffer_consumed(search->buffer, remaining);
408
380
  search->ptr = search->end;
@@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428
400
 
429
401
  #ifdef HAVE_SIMD_SSE2
430
402
 
431
- #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
- #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
- #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
- #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
-
436
403
  static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
437
404
  {
438
405
  int mask = search->matches_mask;
@@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457
424
  #define TARGET_SSE2
458
425
  #endif
459
426
 
460
- static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
- {
462
- __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
-
464
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
- __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
- __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
- return _mm_movemask_epi8(needs_escape);
470
- }
471
-
472
427
  static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
473
428
  {
474
429
  if (RB_UNLIKELY(search->has_matches)) {
@@ -476,7 +431,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
476
431
  if (search->matches_mask > 0) {
477
432
  return sse2_next_match(search);
478
433
  } else {
479
- // sse2_next_match will only advance search->ptr up to the last matching character.
434
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
435
  // Skip over any characters in the last chunk that occur after the last match.
481
436
  search->has_matches = false;
482
437
  if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
@@ -487,29 +442,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487
442
  }
488
443
  }
489
444
 
490
- while (search->ptr + sizeof(__m128i) <= search->end) {
491
- int needs_escape_mask = sse2_update(search->ptr);
492
-
493
- if (needs_escape_mask == 0) {
494
- search->ptr += sizeof(__m128i);
495
- continue;
496
- }
497
-
445
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
498
446
  search->has_matches = true;
499
- search->matches_mask = needs_escape_mask;
500
447
  search->chunk_base = search->ptr;
448
+ search->chunk_end = search->ptr + sizeof(__m128i);
501
449
  return sse2_next_match(search);
502
450
  }
503
451
 
504
- // There are fewer than 16 bytes left.
452
+ // There are fewer than 16 bytes left.
505
453
  unsigned long remaining = (search->end - search->ptr);
506
454
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
455
  char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
456
 
509
- int needs_escape_mask = sse2_update(s);
457
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
510
458
 
511
459
  if (needs_escape_mask == 0) {
512
- // Nothing to escape, ensure search_flush doesn't do anything by setting
460
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
461
  // search->cursor to search->ptr.
514
462
  fbuffer_consumed(search->buffer, remaining);
515
463
  search->ptr = search->end;
@@ -638,7 +586,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
638
586
  return 0;
639
587
  }
640
588
 
641
- static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
589
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
590
+ {
642
591
  const unsigned char ch = (unsigned char)*search->ptr;
643
592
  switch (ch_len) {
644
593
  case 1: {
@@ -668,7 +617,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
668
617
 
669
618
  uint32_t wchar = 0;
670
619
 
671
- switch(ch_len) {
620
+ switch (ch_len) {
672
621
  case 2:
673
622
  wchar = ch & 0x1F;
674
623
  break;
@@ -828,7 +777,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
828
777
  * _state_ is a JSON::State object, that can also be used to configure the
829
778
  * produced JSON string output further.
830
779
  */
831
- static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
780
+ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
781
+ {
832
782
  rb_check_arity(argc, 0, 1);
833
783
  VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
784
  return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -890,7 +840,8 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
890
840
  *
891
841
  * Extends _modul_ with the String::Extend module.
892
842
  */
893
- static VALUE mString_included_s(VALUE self, VALUE modul) {
843
+ static VALUE mString_included_s(VALUE self, VALUE modul)
844
+ {
894
845
  VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
895
846
  rb_call_super(1, &modul);
896
847
  return result;
@@ -1135,7 +1086,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1135
1086
  }
1136
1087
 
1137
1088
  VALUE key_to_s;
1138
- switch(rb_type(key)) {
1089
+ switch (rb_type(key)) {
1139
1090
  case T_STRING:
1140
1091
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
1092
  key_to_s = key;
@@ -1219,7 +1170,7 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1219
1170
 
1220
1171
  fbuffer_append_char(buffer, '[');
1221
1172
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1222
- for(i = 0; i < RARRAY_LEN(obj); i++) {
1173
+ for (i = 0; i < RARRAY_LEN(obj); i++) {
1223
1174
  if (i > 0) {
1224
1175
  fbuffer_append_char(buffer, ',');
1225
1176
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
@@ -1304,7 +1255,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
1304
1255
  search.chunk_base = NULL;
1305
1256
  #endif /* HAVE_SIMD */
1306
1257
 
1307
- switch(rb_enc_str_coderange(obj)) {
1258
+ switch (rb_enc_str_coderange(obj)) {
1308
1259
  case ENC_CODERANGE_7BIT:
1309
1260
  case ENC_CODERANGE_VALID:
1310
1261
  if (RB_UNLIKELY(data->state->ascii_only)) {
@@ -1956,15 +1907,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
1956
1907
  return Qnil;
1957
1908
  }
1958
1909
 
1910
+ struct configure_state_data {
1911
+ JSON_Generator_State *state;
1912
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
1913
+ };
1914
+
1915
+ static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
1916
+ {
1917
+ if (RTEST(data->vstate)) {
1918
+ RB_OBJ_WRITE(data->vstate, field, value);
1919
+ } else {
1920
+ *field = value;
1921
+ }
1922
+ }
1923
+
1959
1924
  static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1960
1925
  {
1961
- JSON_Generator_State *state = (JSON_Generator_State *)_arg;
1926
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
1927
+ JSON_Generator_State *state = data->state;
1962
1928
 
1963
- if (key == sym_indent) { state->indent = string_config(val); }
1964
- else if (key == sym_space) { state->space = string_config(val); }
1965
- else if (key == sym_space_before) { state->space_before = string_config(val); }
1966
- else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1967
- else if (key == sym_array_nl) { state->array_nl = string_config(val); }
1929
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
1930
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
1931
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
1932
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
1933
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
1968
1934
  else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1969
1935
  else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1970
1936
  else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
@@ -1973,11 +1939,14 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1973
1939
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1974
1940
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1975
1941
  else if (key == sym_strict) { state->strict = RTEST(val); }
1976
- else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
1942
+ else if (key == sym_as_json) {
1943
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
1944
+ state_write_value(data, &state->as_json, proc);
1945
+ }
1977
1946
  return ST_CONTINUE;
1978
1947
  }
1979
1948
 
1980
- static void configure_state(JSON_Generator_State *state, VALUE config)
1949
+ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
1981
1950
  {
1982
1951
  if (!RTEST(config)) return;
1983
1952
 
@@ -1985,15 +1954,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
1985
1954
 
1986
1955
  if (!RHASH_SIZE(config)) return;
1987
1956
 
1957
+ struct configure_state_data data = {
1958
+ .state = state,
1959
+ .vstate = vstate
1960
+ };
1961
+
1988
1962
  // We assume in most cases few keys are set so it's faster to go over
1989
1963
  // the provided keys than to check all possible keys.
1990
- rb_hash_foreach(config, configure_state_i, (VALUE)state);
1964
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
1991
1965
  }
1992
1966
 
1993
1967
  static VALUE cState_configure(VALUE self, VALUE opts)
1994
1968
  {
1995
1969
  GET_STATE(self);
1996
- configure_state(state, opts);
1970
+ configure_state(state, self, opts);
1997
1971
  return self;
1998
1972
  }
1999
1973
 
@@ -2001,7 +1975,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2001
1975
  {
2002
1976
  JSON_Generator_State state = {0};
2003
1977
  state_init(&state);
2004
- configure_state(&state, opts);
1978
+ configure_state(&state, Qfalse, opts);
2005
1979
 
2006
1980
  char stack_buffer[FBUFFER_STACK_SIZE];
2007
1981
  FBuffer buffer = {
@@ -2168,7 +2142,7 @@ void Init_generator(void)
2168
2142
  rb_require("json/ext/generator/state");
2169
2143
 
2170
2144
 
2171
- switch(find_simd_implementation()) {
2145
+ switch (find_simd_implementation()) {
2172
2146
  #ifdef HAVE_SIMD
2173
2147
  #ifdef HAVE_SIMD_NEON
2174
2148
  case SIMD_NEON:
@@ -1,11 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
3
 
4
- have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
4
+ have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
5
5
  have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
6
6
  have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
7
7
  have_func("strnlen", "string.h") # Missing on Solaris 10
8
8
 
9
9
  append_cflags("-std=c99")
10
10
 
11
+ if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
12
+ load __dir__ + "/../simd/conf.rb"
13
+ end
14
+
11
15
  create_makefile 'json/ext/parser'
@@ -20,6 +20,8 @@ typedef unsigned char _Bool;
20
20
  #endif
21
21
  #endif
22
22
 
23
+ #include "../simd/simd.h"
24
+
23
25
  #ifndef RB_UNLIKELY
24
26
  #define RB_UNLIKELY(expr) expr
25
27
  #endif
@@ -35,7 +37,7 @@ static ID i_chr, i_aset, i_aref,
35
37
  i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
36
38
 
37
39
  static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38
- sym_decimal_class, sym_on_load;
40
+ sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
39
41
 
40
42
  static int binary_encindex;
41
43
  static int utf8_encindex;
@@ -363,10 +365,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
363
365
  return len;
364
366
  }
365
367
 
368
+ enum duplicate_key_action {
369
+ JSON_DEPRECATED = 0,
370
+ JSON_IGNORE,
371
+ JSON_RAISE,
372
+ };
373
+
366
374
  typedef struct JSON_ParserStruct {
367
375
  VALUE on_load_proc;
368
376
  VALUE decimal_class;
369
377
  ID decimal_method_id;
378
+ enum duplicate_key_action on_duplicate_key;
370
379
  int max_nesting;
371
380
  bool allow_nan;
372
381
  bool allow_trailing_comma;
@@ -386,15 +395,8 @@ typedef struct JSON_ParserStateStruct {
386
395
  int current_nesting;
387
396
  } JSON_ParserState;
388
397
 
389
-
390
- #define PARSE_ERROR_FRAGMENT_LEN 32
391
- #ifdef RBIMPL_ATTR_NORETURN
392
- RBIMPL_ATTR_NORETURN()
393
- #endif
394
- static void raise_parse_error(const char *format, JSON_ParserState *state)
398
+ static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
395
399
  {
396
- unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
397
-
398
400
  const char *cursor = state->cursor;
399
401
  long column = 0;
400
402
  long line = 1;
@@ -411,6 +413,29 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
411
413
  line++;
412
414
  }
413
415
  }
416
+ *line_out = line;
417
+ *column_out = column;
418
+ }
419
+
420
+ static void emit_parse_warning(const char *message, JSON_ParserState *state)
421
+ {
422
+ long line, column;
423
+ cursor_position(state, &line, &column);
424
+
425
+ VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
426
+ rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
427
+ }
428
+
429
+ #define PARSE_ERROR_FRAGMENT_LEN 32
430
+
431
+ #ifdef RBIMPL_ATTR_NORETURN
432
+ RBIMPL_ATTR_NORETURN()
433
+ #endif
434
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
435
+ {
436
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
437
+ long line, column;
438
+ cursor_position(state, &line, &column);
414
439
 
415
440
  const char *ptr = "EOF";
416
441
  if (state->cursor && state->cursor < state->end) {
@@ -517,7 +542,7 @@ static void
517
542
  json_eat_comments(JSON_ParserState *state)
518
543
  {
519
544
  if (state->cursor + 1 < state->end) {
520
- switch(state->cursor[1]) {
545
+ switch (state->cursor[1]) {
521
546
  case '/': {
522
547
  state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
523
548
  if (!state->cursor) {
@@ -807,10 +832,67 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
807
832
  return array;
808
833
  }
809
834
 
810
- static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
835
+ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
836
+ {
837
+ VALUE set = rb_hash_new_capa(count / 2);
838
+ for (size_t index = 0; index < count; index += 2) {
839
+ size_t before = RHASH_SIZE(set);
840
+ VALUE key = pairs[index];
841
+ rb_hash_aset(set, key, Qtrue);
842
+ if (RHASH_SIZE(set) == before) {
843
+ if (RB_SYMBOL_P(key)) {
844
+ return rb_sym2str(key);
845
+ }
846
+ return key;
847
+ }
848
+ }
849
+ return Qfalse;
850
+ }
851
+
852
+ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
853
+ {
854
+ VALUE message = rb_sprintf(
855
+ "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
856
+ rb_inspect(duplicate_key)
857
+ );
858
+
859
+ emit_parse_warning(RSTRING_PTR(message), state);
860
+ RB_GC_GUARD(message);
861
+ }
862
+
863
+ #ifdef RBIMPL_ATTR_NORETURN
864
+ RBIMPL_ATTR_NORETURN()
865
+ #endif
866
+ static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
867
+ {
868
+ VALUE message = rb_sprintf(
869
+ "duplicate key %"PRIsVALUE,
870
+ rb_inspect(duplicate_key)
871
+ );
872
+
873
+ raise_parse_error(RSTRING_PTR(message), state);
874
+ RB_GC_GUARD(message);
875
+ }
876
+
877
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
811
878
  {
812
- VALUE object = rb_hash_new_capa(count);
813
- rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
879
+ size_t entries_count = count / 2;
880
+ VALUE object = rb_hash_new_capa(entries_count);
881
+ const VALUE *pairs = rvalue_stack_peek(state->stack, count);
882
+ rb_hash_bulk_insert(count, pairs, object);
883
+
884
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
885
+ switch (config->on_duplicate_key) {
886
+ case JSON_IGNORE:
887
+ break;
888
+ case JSON_DEPRECATED:
889
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
890
+ break;
891
+ case JSON_RAISE:
892
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
893
+ break;
894
+ }
895
+ }
814
896
 
815
897
  rvalue_stack_pop(state->stack, count);
816
898
 
@@ -844,7 +926,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
844
926
  return value;
845
927
  }
846
928
 
847
- static const bool string_scan[256] = {
929
+ static const bool string_scan_table[256] = {
848
930
  // ASCII Control Characters
849
931
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
850
932
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -857,32 +939,71 @@ static const bool string_scan[256] = {
857
939
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
858
940
  };
859
941
 
942
+ #if (defined(__GNUC__ ) || defined(__clang__))
943
+ #define FORCE_INLINE __attribute__((always_inline))
944
+ #else
945
+ #define FORCE_INLINE
946
+ #endif
947
+
948
+ #ifdef HAVE_SIMD
949
+ static SIMD_Implementation simd_impl = SIMD_NONE;
950
+ #endif /* HAVE_SIMD */
951
+
952
+ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
953
+ {
954
+ #ifdef HAVE_SIMD
955
+ #if defined(HAVE_SIMD_NEON)
956
+
957
+ uint64_t mask = 0;
958
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
959
+ state->cursor += trailing_zeros64(mask) >> 2;
960
+ return 1;
961
+ }
962
+
963
+ #elif defined(HAVE_SIMD_SSE2)
964
+ if (simd_impl == SIMD_SSE2) {
965
+ int mask = 0;
966
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
967
+ state->cursor += trailing_zeros(mask);
968
+ return 1;
969
+ }
970
+ }
971
+ #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
972
+ #endif /* HAVE_SIMD */
973
+
974
+ while (state->cursor < state->end) {
975
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
976
+ return 1;
977
+ }
978
+ *state->cursor++;
979
+ }
980
+ return 0;
981
+ }
982
+
860
983
  static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
861
984
  {
862
985
  state->cursor++;
863
986
  const char *start = state->cursor;
864
987
  bool escaped = false;
865
988
 
866
- while (state->cursor < state->end) {
867
- if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
868
- switch (*state->cursor) {
869
- case '"': {
870
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
871
- state->cursor++;
872
- return json_push_value(state, config, string);
873
- }
874
- case '\\': {
875
- state->cursor++;
876
- escaped = true;
877
- if ((unsigned char)*state->cursor < 0x20) {
878
- raise_parse_error("invalid ASCII control character in string: %s", state);
879
- }
880
- break;
881
- }
882
- default:
989
+ while (RB_UNLIKELY(string_scan(state))) {
990
+ switch (*state->cursor) {
991
+ case '"': {
992
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
993
+ state->cursor++;
994
+ return json_push_value(state, config, string);
995
+ }
996
+ case '\\': {
997
+ state->cursor++;
998
+ escaped = true;
999
+ if ((unsigned char)*state->cursor < 0x20) {
883
1000
  raise_parse_error("invalid ASCII control character in string: %s", state);
884
- break;
1001
+ }
1002
+ break;
885
1003
  }
1004
+ default:
1005
+ raise_parse_error("invalid ASCII control character in string: %s", state);
1006
+ break;
886
1007
  }
887
1008
 
888
1009
  state->cursor++;
@@ -1060,6 +1181,8 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1060
1181
  break;
1061
1182
  }
1062
1183
  case '{': {
1184
+ const char *object_start_cursor = state->cursor;
1185
+
1063
1186
  state->cursor++;
1064
1187
  json_eat_whitespace(state);
1065
1188
  long stack_head = state->stack->head;
@@ -1094,8 +1217,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1094
1217
  if (*state->cursor == '}') {
1095
1218
  state->cursor++;
1096
1219
  state->current_nesting--;
1097
- long count = state->stack->head - stack_head;
1098
- return json_push_value(state, config, json_decode_object(state, config, count));
1220
+ size_t count = state->stack->head - stack_head;
1221
+
1222
+ // Temporary rewind cursor in case an error is raised
1223
+ const char *final_cursor = state->cursor;
1224
+ state->cursor = object_start_cursor;
1225
+ VALUE object = json_decode_object(state, config, count);
1226
+ state->cursor = final_cursor;
1227
+
1228
+ return json_push_value(state, config, object);
1099
1229
  }
1100
1230
 
1101
1231
  if (*state->cursor == ',') {
@@ -1184,6 +1314,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1184
1314
  else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1185
1315
  else if (key == sym_freeze) { config->freeze = RTEST(val); }
1186
1316
  else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1317
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1187
1318
  else if (key == sym_decimal_class) {
1188
1319
  if (RTEST(val)) {
1189
1320
  if (rb_respond_to(val, i_try_convert)) {
@@ -1400,6 +1531,7 @@ void Init_parser(void)
1400
1531
  sym_freeze = ID2SYM(rb_intern("freeze"));
1401
1532
  sym_on_load = ID2SYM(rb_intern("on_load"));
1402
1533
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1534
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1403
1535
 
1404
1536
  i_chr = rb_intern("chr");
1405
1537
  i_aset = rb_intern("[]=");
@@ -1413,4 +1545,8 @@ void Init_parser(void)
1413
1545
  binary_encindex = rb_ascii8bit_encindex();
1414
1546
  utf8_encindex = rb_utf8_encindex();
1415
1547
  enc_utf8 = rb_utf8_encoding();
1548
+
1549
+ #ifdef HAVE_SIMD
1550
+ simd_impl = find_simd_implementation();
1551
+ #endif
1416
1552
  }
@@ -0,0 +1,24 @@
1
+ case RbConfig::CONFIG['host_cpu']
2
+ when /^(arm|aarch64)/
3
+ # Try to compile a small program using NEON instructions
4
+ header, type, init, extra = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)', nil
5
+ when /^(x86_64|x64)/
6
+ header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }'
7
+ end
8
+ if header
9
+ if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration')
10
+ #{cpp_include(header)}
11
+ int main(int argc, char **argv) {
12
+ #{type} test = #{init};
13
+ #{extra}
14
+ if (argc > 100000) printf("%p", &test);
15
+ return 0;
16
+ }
17
+ SRC
18
+ $defs.push("-DJSON_ENABLE_SIMD")
19
+ else
20
+ puts "Disable SIMD"
21
+ end
22
+ end
23
+
24
+ have_header('cpuid.h')
@@ -0,0 +1,188 @@
1
+ typedef enum {
2
+ SIMD_NONE,
3
+ SIMD_NEON,
4
+ SIMD_SSE2
5
+ } SIMD_Implementation;
6
+
7
+ #ifdef JSON_ENABLE_SIMD
8
+
9
+ #ifdef __clang__
10
+ # if __has_builtin(__builtin_ctzll)
11
+ # define HAVE_BUILTIN_CTZLL 1
12
+ # else
13
+ # define HAVE_BUILTIN_CTZLL 0
14
+ # endif
15
+ #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
16
+ # define HAVE_BUILTIN_CTZLL 1
17
+ #else
18
+ # define HAVE_BUILTIN_CTZLL 0
19
+ #endif
20
+
21
+ static inline uint32_t trailing_zeros64(uint64_t input)
22
+ {
23
+ #if HAVE_BUILTIN_CTZLL
24
+ return __builtin_ctzll(input);
25
+ #else
26
+ uint32_t trailing_zeros = 0;
27
+ uint64_t temp = input;
28
+ while ((temp & 1) == 0 && temp > 0) {
29
+ trailing_zeros++;
30
+ temp >>= 1;
31
+ }
32
+ return trailing_zeros;
33
+ #endif
34
+ }
35
+
36
+ static inline int trailing_zeros(int input)
37
+ {
38
+ #if HAVE_BUILTIN_CTZLL
39
+ return __builtin_ctz(input);
40
+ #else
41
+ int trailing_zeros = 0;
42
+ int temp = input;
43
+ while ((temp & 1) == 0 && temp > 0) {
44
+ trailing_zeros++;
45
+ temp >>= 1;
46
+ }
47
+ return trailing_zeros;
48
+ #endif
49
+ }
50
+
51
+ #if (defined(__GNUC__ ) || defined(__clang__))
52
+ #define FORCE_INLINE __attribute__((always_inline))
53
+ #else
54
+ #define FORCE_INLINE
55
+ #endif
56
+
57
+
58
+ #define SIMD_MINIMUM_THRESHOLD 6
59
+
60
+ #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
61
+ #include <arm_neon.h>
62
+
63
+ #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
64
+ static inline SIMD_Implementation find_simd_implementation(void)
65
+ {
66
+ return SIMD_NEON;
67
+ }
68
+
69
+ #define HAVE_SIMD 1
70
+ #define HAVE_SIMD_NEON 1
71
+
72
+ // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
73
+ static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
74
+ {
75
+ const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
76
+ const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
77
+ return mask & 0x8888888888888888ull;
78
+ }
79
+
80
+ static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr)
81
+ {
82
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
83
+
84
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
85
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
86
+ const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
87
+
88
+ uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
89
+ uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
90
+ return neon_match_mask(needs_escape);
91
+ }
92
+
93
+ static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
94
+ {
95
+ while (*ptr + sizeof(uint8x16_t) <= end) {
96
+ uint64_t chunk_mask = compute_chunk_mask_neon(*ptr);
97
+ if (chunk_mask) {
98
+ *mask = chunk_mask;
99
+ return 1;
100
+ }
101
+ *ptr += sizeof(uint8x16_t);
102
+ }
103
+ return 0;
104
+ }
105
+
106
+ static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)
107
+ {
108
+ uint8x16x4_t tab;
109
+ tab.val[0] = vld1q_u8(table);
110
+ tab.val[1] = vld1q_u8(table+16);
111
+ tab.val[2] = vld1q_u8(table+32);
112
+ tab.val[3] = vld1q_u8(table+48);
113
+ return tab;
114
+ }
115
+
116
+ #endif /* ARM Neon Support.*/
117
+
118
+ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
119
+
120
+ #ifdef HAVE_X86INTRIN_H
121
+ #include <x86intrin.h>
122
+
123
+ #define HAVE_SIMD 1
124
+ #define HAVE_SIMD_SSE2 1
125
+
126
+ #ifdef HAVE_CPUID_H
127
+ #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
128
+
129
+ #if defined(__clang__) || defined(__GNUC__)
130
+ #define TARGET_SSE2 __attribute__((target("sse2")))
131
+ #else
132
+ #define TARGET_SSE2
133
+ #endif
134
+
135
+ #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
136
+ #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
137
+ #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
138
+ #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
139
+
140
+ static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr)
141
+ {
142
+ __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
143
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
144
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
145
+ __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
146
+ __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
147
+ __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
148
+ return _mm_movemask_epi8(needs_escape);
149
+ }
150
+
151
+ static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
152
+ {
153
+ while (*ptr + sizeof(__m128i) <= end) {
154
+ int chunk_mask = compute_chunk_mask_sse2(*ptr);
155
+ if (chunk_mask) {
156
+ *mask = chunk_mask;
157
+ return 1;
158
+ }
159
+ *ptr += sizeof(__m128i);
160
+ }
161
+
162
+ return 0;
163
+ }
164
+
165
+ #include <cpuid.h>
166
+ #endif /* HAVE_CPUID_H */
167
+
168
+ static inline SIMD_Implementation find_simd_implementation(void)
169
+ {
170
+ // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
171
+ if (__builtin_cpu_supports("sse2")) {
172
+ return SIMD_SSE2;
173
+ }
174
+
175
+ return SIMD_NONE;
176
+ }
177
+
178
+ #endif /* HAVE_X86INTRIN_H */
179
+ #endif /* X86_64 Support */
180
+
181
+ #endif /* JSON_ENABLE_SIMD */
182
+
183
+ #ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
184
+ static inline SIMD_Implementation find_simd_implementation(void)
185
+ {
186
+ return SIMD_NONE;
187
+ }
188
+ #endif
data/json.gemspec CHANGED
@@ -44,15 +44,14 @@ spec = Gem::Specification.new do |s|
44
44
  "LEGAL",
45
45
  "README.md",
46
46
  "json.gemspec",
47
- *Dir["lib/**/*.rb"],
48
- ]
47
+ ] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__))
49
48
 
50
49
  if java_ext
51
50
  s.platform = 'java'
52
51
  s.files += Dir["lib/json/ext/**/*.jar"]
53
52
  else
54
53
  s.extensions = Dir["ext/json/**/extconf.rb"]
55
- s.files += Dir["ext/json/**/*.{c,h}"]
54
+ s.files += Dir["ext/json/**/*.{c,h,rb}"]
56
55
  end
57
56
  end
58
57
 
data/lib/json/common.rb CHANGED
@@ -48,7 +48,7 @@ module JSON
48
48
  end
49
49
  end
50
50
 
51
- # TODO: exctract :create_additions support to another gem for version 3.0
51
+ # TODO: extract :create_additions support to another gem for version 3.0
52
52
  def create_additions_proc(opts)
53
53
  if opts[:symbolize_names]
54
54
  raise ArgumentError, "options :symbolize_names and :create_additions cannot be used in conjunction"
@@ -87,31 +87,32 @@ module JSON
87
87
  opts
88
88
  end
89
89
 
90
- GEM_ROOT = File.expand_path("../../../", __FILE__) + "/"
91
90
  def create_additions_warning
92
- message = "JSON.load implicit support for `create_additions: true` is deprecated " \
91
+ JSON.deprecation_warning "JSON.load implicit support for `create_additions: true` is deprecated " \
93
92
  "and will be removed in 3.0, use JSON.unsafe_load or explicitly " \
94
93
  "pass `create_additions: true`"
94
+ end
95
+ end
96
+ end
95
97
 
96
- uplevel = 4
97
- caller_locations(uplevel, 10).each do |frame|
98
- if frame.path.nil? || frame.path.start_with?(GEM_ROOT) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c")
99
- uplevel += 1
100
- else
101
- break
102
- end
103
- end
104
-
105
- if RUBY_VERSION >= "3.0"
106
- warn(message, uplevel: uplevel - 1, category: :deprecated)
98
+ class << self
99
+ def deprecation_warning(message, uplevel = 3) # :nodoc:
100
+ gem_root = File.expand_path("../../../", __FILE__) + "/"
101
+ caller_locations(uplevel, 10).each do |frame|
102
+ if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c")
103
+ uplevel += 1
107
104
  else
108
- warn(message, uplevel: uplevel - 1)
105
+ break
109
106
  end
110
107
  end
108
+
109
+ if RUBY_VERSION >= "3.0"
110
+ warn(message, uplevel: uplevel, category: :deprecated)
111
+ else
112
+ warn(message, uplevel: uplevel)
113
+ end
111
114
  end
112
- end
113
115
 
114
- class << self
115
116
  # :call-seq:
116
117
  # JSON[object] -> new_array or new_string
117
118
  #
@@ -268,7 +269,7 @@ module JSON
268
269
  # to string interpolation.
269
270
  #
270
271
  # Note: no validation is performed on the provided string. It is the
271
- # responsability of the caller to ensure the string contains valid JSON.
272
+ # responsibility of the caller to ensure the string contains valid JSON.
272
273
  Fragment = Struct.new(:json) do
273
274
  def initialize(json)
274
275
  unless string = String.try_convert(json)
data/lib/json/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JSON
4
- VERSION = '2.12.2'
4
+ VERSION = '2.13.2'
5
5
  end
data/lib/json.rb CHANGED
@@ -127,6 +127,24 @@ require 'json/common'
127
127
  #
128
128
  # ---
129
129
  #
130
+ # Option +allow_duplicate_key+ specifies whether duplicate keys in objects
131
+ # should be ignored or cause an error to be raised:
132
+ #
133
+ # When not specified:
134
+ # # The last value is used and a deprecation warning emitted.
135
+ # JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
136
+ # # waring: detected duplicate keys in JSON object.
137
+ # # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
138
+ #
139
+ # When set to `+true+`
140
+ # # The last value is used.
141
+ # JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
142
+ #
143
+ # When set to `+false+`, the future default:
144
+ # JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError)
145
+ #
146
+ # ---
147
+ #
130
148
  # Option +allow_nan+ (boolean) specifies whether to allow
131
149
  # NaN, Infinity, and MinusInfinity in +source+;
132
150
  # defaults to +false+.
@@ -143,8 +161,23 @@ require 'json/common'
143
161
  # ruby = JSON.parse(source, {allow_nan: true})
144
162
  # ruby # => [NaN, Infinity, -Infinity]
145
163
  #
164
+ # ---
165
+ #
166
+ # Option +allow_trailing_comma+ (boolean) specifies whether to allow
167
+ # trailing commas in objects and arrays;
168
+ # defaults to +false+.
169
+ #
170
+ # With the default, +false+:
171
+ # JSON.parse('[1,]') # unexpected character: ']' at line 1 column 4 (JSON::ParserError)
172
+ #
173
+ # When enabled:
174
+ # JSON.parse('[1,]', allow_trailing_comma: true) # => [1]
175
+ #
146
176
  # ====== Output Options
147
177
  #
178
+ # Option +freeze+ (boolean) specifies whether the returned objects will be frozen;
179
+ # defaults to +false+.
180
+ #
148
181
  # Option +symbolize_names+ (boolean) specifies whether returned \Hash keys
149
182
  # should be Symbols;
150
183
  # defaults to +false+ (use Strings).
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.12.2
4
+ version: 2.13.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-05-23 00:00:00.000000000 Z
10
+ date: 2025-07-28 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: This is a JSON implementation as a Ruby extension in C.
13
13
  email: flori@ping.de
@@ -26,9 +26,10 @@ files:
26
26
  - ext/json/ext/fbuffer/fbuffer.h
27
27
  - ext/json/ext/generator/extconf.rb
28
28
  - ext/json/ext/generator/generator.c
29
- - ext/json/ext/generator/simd.h
30
29
  - ext/json/ext/parser/extconf.rb
31
30
  - ext/json/ext/parser/parser.c
31
+ - ext/json/ext/simd/conf.rb
32
+ - ext/json/ext/simd/simd.h
32
33
  - ext/json/ext/vendor/fpconv.c
33
34
  - ext/json/ext/vendor/jeaiii-ltoa.h
34
35
  - json.gemspec
@@ -1,112 +0,0 @@
1
- typedef enum {
2
- SIMD_NONE,
3
- SIMD_NEON,
4
- SIMD_SSE2
5
- } SIMD_Implementation;
6
-
7
- #ifdef JSON_ENABLE_SIMD
8
-
9
- #ifdef __clang__
10
- #if __has_builtin(__builtin_ctzll)
11
- #define HAVE_BUILTIN_CTZLL 1
12
- #else
13
- #define HAVE_BUILTIN_CTZLL 0
14
- #endif
15
- #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
16
- #define HAVE_BUILTIN_CTZLL 1
17
- #else
18
- #define HAVE_BUILTIN_CTZLL 0
19
- #endif
20
-
21
- static inline uint32_t trailing_zeros64(uint64_t input) {
22
- #if HAVE_BUILTIN_CTZLL
23
- return __builtin_ctzll(input);
24
- #else
25
- uint32_t trailing_zeros = 0;
26
- uint64_t temp = input;
27
- while ((temp & 1) == 0 && temp > 0) {
28
- trailing_zeros++;
29
- temp >>= 1;
30
- }
31
- return trailing_zeros;
32
- #endif
33
- }
34
-
35
- static inline int trailing_zeros(int input) {
36
- #if HAVE_BUILTIN_CTZLL
37
- return __builtin_ctz(input);
38
- #else
39
- int trailing_zeros = 0;
40
- int temp = input;
41
- while ((temp & 1) == 0 && temp > 0) {
42
- trailing_zeros++;
43
- temp >>= 1;
44
- }
45
- return trailing_zeros;
46
- #endif
47
- }
48
-
49
- #define SIMD_MINIMUM_THRESHOLD 6
50
-
51
- #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
52
- #include <arm_neon.h>
53
-
54
- #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
55
- static SIMD_Implementation find_simd_implementation(void) {
56
- return SIMD_NEON;
57
- }
58
-
59
- #define HAVE_SIMD 1
60
- #define HAVE_SIMD_NEON 1
61
-
62
- uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
63
- uint8x16x4_t tab;
64
- tab.val[0] = vld1q_u8(table);
65
- tab.val[1] = vld1q_u8(table+16);
66
- tab.val[2] = vld1q_u8(table+32);
67
- tab.val[3] = vld1q_u8(table+48);
68
- return tab;
69
- }
70
-
71
- #endif /* ARM Neon Support.*/
72
-
73
- #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
74
-
75
- #ifdef HAVE_X86INTRIN_H
76
- #include <x86intrin.h>
77
-
78
- #define HAVE_SIMD 1
79
- #define HAVE_SIMD_SSE2 1
80
-
81
- #ifdef HAVE_CPUID_H
82
- #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
83
-
84
- #include <cpuid.h>
85
- #endif /* HAVE_CPUID_H */
86
-
87
- static SIMD_Implementation find_simd_implementation(void) {
88
-
89
- #if defined(__GNUC__ ) || defined(__clang__)
90
- #ifdef __GNUC__
91
- __builtin_cpu_init();
92
- #endif /* __GNUC__ */
93
-
94
- // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
95
- if (__builtin_cpu_supports("sse2")) {
96
- return SIMD_SSE2;
97
- }
98
- #endif /* __GNUC__ || __clang__*/
99
-
100
- return SIMD_NONE;
101
- }
102
-
103
- #endif /* HAVE_X86INTRIN_H */
104
- #endif /* X86_64 Support */
105
-
106
- #endif /* JSON_ENABLE_SIMD */
107
-
108
- #ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
109
- static SIMD_Implementation find_simd_implementation(void) {
110
- return SIMD_NONE;
111
- }
112
- #endif