json 2.18.0 → 2.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51eab66896e862b679d424133f11e1367d5d8e71add943e67cf0673d0d562fcd
4
- data.tar.gz: 7b69d4a42137897fe9a45bd60a21b759d133c112cb4ba16020099f27074ac2fd
3
+ metadata.gz: cb2890db4c527125d27bc7c21fc64d3ac532ffbec8080f89a678daf48c36e09e
4
+ data.tar.gz: c4b37d085d05d3c43df97b3c24898dc6be61c76ba64c749b5a8a86bf4fc1198d
5
5
  SHA512:
6
- metadata.gz: ea3b026c8ccd6cb477858bf06f07f8b5adc5bcf7b52a175487c19dc2835ef63db2e4f87074a00ec2fe2c70e588c205f679116536da40f15e767f35351a52fc5c
7
- data.tar.gz: f58144a5329ad95128e00bbc5670280f6a699e04cf05060bdfc7acaf112e62eb44c14c36328a9683d86e138c6eb2f3599f5ec35347867ac99ab9f0e16813c4df
6
+ metadata.gz: fb55ef5a0aa6961ef0fe3bb30f398834820357045ad27a8fdb7e53eaba3af7c4d356ef26c0e73b7a87d2d9d51e500eae7193d7d1ae3aa1058c7973bcc462674b
7
+ data.tar.gz: bfb499789bbcee7f5f8d67e32ded664dc62c632ae39fe80bf4bff3d6aec16eee3730a7c4883216a393326f2ab33e94e5f9c58da4c5b31627347108c36c2b211c
data/CHANGES.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  ### Unreleased
4
4
 
5
+ ### 2026-02-03 (2.18.1)
6
+
7
+ * Fix a potential crash in very specific circumstance if GC triggers during a call to `to_json`
8
+ without first invoking a user defined `#to_json` method.
9
+
5
10
  ### 2025-12-11 (2.18.0)
6
11
 
7
12
  * Add `:allow_control_characters` parser options, to allow JSON strings containing unescaped ASCII control characters (e.g. newlines).
@@ -66,7 +71,7 @@
66
71
  * Fix `JSON.generate` `strict: true` mode to also restrict hash keys.
67
72
  * Fix `JSON::Coder` to also invoke block for hash keys that aren't strings nor symbols.
68
73
  * Fix `JSON.unsafe_load` usage with proc
69
- * Fix the parser to more consistently reject invalid UTF-16 surogate pairs.
74
+ * Fix the parser to more consistently reject invalid UTF-16 surogate pairs.
70
75
  * Stop defining `String.json_create`, `String#to_json_raw`, `String#to_json_raw_object` when `json/add` isn't loaded.
71
76
 
72
77
  ### 2025-07-28 (2.13.2)
@@ -161,23 +161,25 @@ static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr)
161
161
 
162
162
  static void fbuffer_append_str(FBuffer *fb, VALUE str)
163
163
  {
164
- const char *newstr = StringValuePtr(str);
165
- unsigned long len = RSTRING_LEN(str);
164
+ const char *ptr;
165
+ unsigned long len;
166
+ RSTRING_GETMEM(str, ptr, len);
166
167
 
167
- fbuffer_append(fb, newstr, len);
168
+ fbuffer_append(fb, ptr, len);
168
169
  }
169
170
 
170
171
  static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat)
171
172
  {
172
- const char *newstr = StringValuePtr(str);
173
- unsigned long len = RSTRING_LEN(str);
173
+ const char *ptr;
174
+ unsigned long len;
175
+ RSTRING_GETMEM(str, ptr, len);
174
176
 
175
177
  fbuffer_inc_capa(fb, repeat * len);
176
178
  while (repeat) {
177
179
  #if JSON_DEBUG
178
180
  fb->requested = len;
179
181
  #endif
180
- fbuffer_append_reserved(fb, newstr, len);
182
+ fbuffer_append_reserved(fb, ptr, len);
181
183
  repeat--;
182
184
  }
183
185
  }
@@ -63,6 +63,8 @@ struct generate_json_data {
63
63
  long depth;
64
64
  };
65
65
 
66
+ static SIMD_Implementation simd_impl;
67
+
66
68
  static VALUE cState_from_state_s(VALUE self, VALUE opts);
67
69
  static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
68
70
  static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
@@ -155,8 +157,6 @@ static const unsigned char escape_table_basic[256] = {
155
157
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
156
158
  };
157
159
 
158
- static unsigned char (*search_escape_basic_impl)(search_state *);
159
-
160
160
  static inline unsigned char search_escape_basic(search_state *search)
161
161
  {
162
162
  while (search->ptr < search->end) {
@@ -212,11 +212,39 @@ ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
212
212
  * Everything else (should be UTF-8) is just passed through and
213
213
  * appended to the result.
214
214
  */
215
+
216
+
217
+ #if defined(HAVE_SIMD_NEON)
218
+ static inline unsigned char search_escape_basic_neon(search_state *search);
219
+ #elif defined(HAVE_SIMD_SSE2)
220
+ static inline unsigned char search_escape_basic_sse2(search_state *search);
221
+ #endif
222
+
223
+ static inline unsigned char search_escape_basic(search_state *search);
224
+
215
225
  static inline void convert_UTF8_to_JSON(search_state *search)
216
226
  {
217
- while (search_escape_basic_impl(search)) {
227
+ #ifdef HAVE_SIMD
228
+ #if defined(HAVE_SIMD_NEON)
229
+ while (search_escape_basic_neon(search)) {
230
+ escape_UTF8_char_basic(search);
231
+ }
232
+ #elif defined(HAVE_SIMD_SSE2)
233
+ if (simd_impl == SIMD_SSE2) {
234
+ while (search_escape_basic_sse2(search)) {
235
+ escape_UTF8_char_basic(search);
236
+ }
237
+ return;
238
+ }
239
+ while (search_escape_basic(search)) {
240
+ escape_UTF8_char_basic(search);
241
+ }
242
+ #endif
243
+ #else
244
+ while (search_escape_basic(search)) {
218
245
  escape_UTF8_char_basic(search);
219
246
  }
247
+ #endif /* HAVE_SIMD */
220
248
  }
221
249
 
222
250
  static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
@@ -260,6 +288,8 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
260
288
 
261
289
  ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
262
290
  {
291
+ RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
292
+
263
293
  // Flush the buffer so everything up until the last 'len' characters are unflushed.
264
294
  search_flush(search);
265
295
 
@@ -269,12 +299,18 @@ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned
269
299
  char *s = (buf->ptr + buf->len);
270
300
 
271
301
  // Pad the buffer with dummy characters that won't need escaping.
272
- // This seem wateful at first sight, but memset of vector length is very fast.
273
- memset(s, 'X', vec_len);
302
+ // This seem wasteful at first sight, but memset of vector length is very fast.
303
+ // This is a space as it can be directly represented as an immediate on AArch64.
304
+ memset(s, ' ', vec_len);
274
305
 
275
306
  // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
276
307
  // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
277
- MEMCPY(s, search->ptr, char, len);
308
+ if (vec_len == 16) {
309
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
310
+ json_fast_memcpy16(s, search->ptr, len);
311
+ } else {
312
+ MEMCPY(s, search->ptr, char, len);
313
+ }
278
314
 
279
315
  return s;
280
316
  }
@@ -1091,6 +1127,7 @@ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data
1091
1127
  search.matches_mask = 0;
1092
1128
  search.has_matches = false;
1093
1129
  search.chunk_base = NULL;
1130
+ search.chunk_end = NULL;
1094
1131
  #endif /* HAVE_SIMD */
1095
1132
 
1096
1133
  switch (rb_enc_str_coderange(obj)) {
@@ -1337,7 +1374,7 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
1337
1374
  static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1338
1375
  {
1339
1376
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
1340
- fbuffer_append_str(buffer, tmp);
1377
+ fbuffer_append_str(buffer, StringValue(tmp));
1341
1378
  }
1342
1379
 
1343
1380
  #ifdef RUBY_INTEGER_UNIFICATION
@@ -1503,7 +1540,9 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1503
1540
  .obj = obj,
1504
1541
  .func = func
1505
1542
  };
1506
- return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1543
+ VALUE result = rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1544
+ RB_GC_GUARD(self);
1545
+ return result;
1507
1546
  }
1508
1547
 
1509
1548
  /* call-seq:
@@ -2181,22 +2220,5 @@ void Init_generator(void)
2181
2220
 
2182
2221
  rb_require("json/ext/generator/state");
2183
2222
 
2184
-
2185
- switch (find_simd_implementation()) {
2186
- #ifdef HAVE_SIMD
2187
- #ifdef HAVE_SIMD_NEON
2188
- case SIMD_NEON:
2189
- search_escape_basic_impl = search_escape_basic_neon;
2190
- break;
2191
- #endif /* HAVE_SIMD_NEON */
2192
- #ifdef HAVE_SIMD_SSE2
2193
- case SIMD_SSE2:
2194
- search_escape_basic_impl = search_escape_basic_sse2;
2195
- break;
2196
- #endif /* HAVE_SIMD_SSE2 */
2197
- #endif /* HAVE_SIMD */
2198
- default:
2199
- search_escape_basic_impl = search_escape_basic;
2200
- break;
2201
- }
2223
+ simd_impl = find_simd_implementation();
2202
2224
  }
data/ext/json/ext/json.h CHANGED
@@ -5,6 +5,10 @@
5
5
  #include "ruby/encoding.h"
6
6
  #include <stdint.h>
7
7
 
8
+ #ifndef RBIMPL_ASSERT_OR_ASSUME
9
+ # define RBIMPL_ASSERT_OR_ASSUME(x)
10
+ #endif
11
+
8
12
  #if defined(RUBY_DEBUG) && RUBY_DEBUG
9
13
  # define JSON_ASSERT RUBY_ASSERT
10
14
  #else
@@ -477,23 +477,24 @@ static const signed char digit_values[256] = {
477
477
  -1, -1, -1, -1, -1, -1, -1
478
478
  };
479
479
 
480
- static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
480
+ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
481
481
  {
482
- signed char b;
483
- uint32_t result = 0;
484
- b = digit_values[p[0]];
485
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
486
- result = (result << 4) | (unsigned char)b;
487
- b = digit_values[p[1]];
488
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
489
- result = (result << 4) | (unsigned char)b;
490
- b = digit_values[p[2]];
491
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
492
- result = (result << 4) | (unsigned char)b;
493
- b = digit_values[p[3]];
494
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
495
- result = (result << 4) | (unsigned char)b;
496
- return result;
482
+ if (RB_UNLIKELY(sp > spe - 4)) {
483
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
484
+ }
485
+
486
+ const unsigned char *p = (const unsigned char *)sp;
487
+
488
+ const signed char b0 = digit_values[p[0]];
489
+ const signed char b1 = digit_values[p[1]];
490
+ const signed char b2 = digit_values[p[2]];
491
+ const signed char b3 = digit_values[p[3]];
492
+
493
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
494
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
495
+ }
496
+
497
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
497
498
  }
498
499
 
499
500
  #define GET_PARSER_CONFIG \
@@ -643,7 +644,7 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserCon
643
644
  typedef struct _json_unescape_positions {
644
645
  long size;
645
646
  const char **positions;
646
- bool has_more;
647
+ unsigned long additional_backslashes;
647
648
  } JSON_UnescapePositions;
648
649
 
649
650
  static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
@@ -657,7 +658,8 @@ static inline const char *json_next_backslash(const char *pe, const char *string
657
658
  }
658
659
  }
659
660
 
660
- if (positions->has_more) {
661
+ if (positions->additional_backslashes) {
662
+ positions->additional_backslashes--;
661
663
  return memchr(pe, '\\', stringEnd - pe);
662
664
  }
663
665
 
@@ -707,50 +709,41 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
707
709
  case 'f':
708
710
  APPEND_CHAR('\f');
709
711
  break;
710
- case 'u':
711
- if (pe > stringEnd - 5) {
712
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
713
- } else {
714
- uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
715
- pe += 3;
716
- /* To handle values above U+FFFF, we take a sequence of
717
- * \uXXXX escapes in the U+D800..U+DBFF then
718
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
719
- * to make a 20-bit number, then add 0x10000 to get the
720
- * final codepoint.
721
- *
722
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
723
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
724
- * Area".
725
- */
726
- if ((ch & 0xFC00) == 0xD800) {
727
- pe++;
728
- if (pe > stringEnd - 6) {
729
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
730
- }
731
- if (pe[0] == '\\' && pe[1] == 'u') {
732
- uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
733
-
734
- if ((sur & 0xFC00) != 0xDC00) {
735
- raise_parse_error_at("invalid surrogate pair at %s", state, p);
736
- }
737
-
738
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
739
- | (sur & 0x3FF));
740
- pe += 5;
741
- } else {
742
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
743
- break;
712
+ case 'u': {
713
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
714
+ pe += 3;
715
+ /* To handle values above U+FFFF, we take a sequence of
716
+ * \uXXXX escapes in the U+D800..U+DBFF then
717
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
718
+ * to make a 20-bit number, then add 0x10000 to get the
719
+ * final codepoint.
720
+ *
721
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
722
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
723
+ * Area".
724
+ */
725
+ if ((ch & 0xFC00) == 0xD800) {
726
+ pe++;
727
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
728
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
729
+
730
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
731
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
744
732
  }
745
- }
746
733
 
747
- char buf[4];
748
- int unescape_len = convert_UTF32_to_UTF8(buf, ch);
749
- MEMCPY(buffer, buf, char, unescape_len);
750
- buffer += unescape_len;
751
- p = ++pe;
734
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
735
+ pe += 5;
736
+ } else {
737
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
738
+ break;
739
+ }
752
740
  }
741
+
742
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
743
+ buffer += unescape_len;
744
+ p = ++pe;
753
745
  break;
746
+ }
754
747
  default:
755
748
  if ((unsigned char)*pe < 0x20) {
756
749
  if (!config->allow_control_characters) {
@@ -992,7 +985,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
992
985
  JSON_UnescapePositions positions = {
993
986
  .size = 0,
994
987
  .positions = backslashes,
995
- .has_more = false,
988
+ .additional_backslashes = 0,
996
989
  };
997
990
 
998
991
  do {
@@ -1007,7 +1000,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
1007
1000
  backslashes[positions.size] = state->cursor;
1008
1001
  positions.size++;
1009
1002
  } else {
1010
- positions.has_more = true;
1003
+ positions.additional_backslashes++;
1011
1004
  }
1012
1005
  state->cursor++;
1013
1006
  break;
@@ -58,7 +58,34 @@ static inline int trailing_zeros(int input)
58
58
 
59
59
  #ifdef JSON_ENABLE_SIMD
60
60
 
61
- #define SIMD_MINIMUM_THRESHOLD 6
61
+ #define SIMD_MINIMUM_THRESHOLD 4
62
+
63
+ ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
64
+ {
65
+ RBIMPL_ASSERT_OR_ASSUME(len < 16);
66
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
67
+ #if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
68
+ // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
69
+ // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
70
+ // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
71
+ // position in both copies.
72
+
73
+ // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
74
+ // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
75
+ // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
76
+ // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
77
+ // plus two loads and stores generated when using __builtin_memcpy.
78
+ if (len >= 8) {
79
+ __builtin_memcpy(dst, src, 8);
80
+ __builtin_memcpy(dst + len - 8, src + len - 8, 8);
81
+ } else {
82
+ __builtin_memcpy(dst, src, 4);
83
+ __builtin_memcpy(dst + len - 4, src + len - 4, 4);
84
+ }
85
+ #else
86
+ MEMCPY(dst, src, char, len);
87
+ #endif
88
+ }
62
89
 
63
90
  #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
64
91
  #include <arm_neon.h>
@@ -449,7 +449,7 @@ static int filter_special(double fp, char* dest)
449
449
  * }
450
450
  *
451
451
  */
452
- static int fpconv_dtoa(double d, char dest[28])
452
+ static int fpconv_dtoa(double d, char dest[32])
453
453
  {
454
454
  char digits[18];
455
455
 
data/lib/json/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JSON
4
- VERSION = '2.18.0'
4
+ VERSION = '2.18.1'
5
5
  end
data/lib/json.rb CHANGED
@@ -6,6 +6,15 @@ require 'json/common'
6
6
  #
7
7
  # \JSON is a lightweight data-interchange format.
8
8
  #
9
+ # \JSON is easy for us humans to read and write,
10
+ # and equally simple for machines to read (parse) and write (generate).
11
+ #
12
+ # \JSON is language-independent, making it an ideal interchange format
13
+ # for applications in differing programming languages
14
+ # and on differing operating systems.
15
+ #
16
+ # == \JSON Values
17
+ #
9
18
  # A \JSON value is one of the following:
10
19
  # - Double-quoted text: <tt>"foo"</tt>.
11
20
  # - Number: +1+, +1.0+, +2.0e2+.
@@ -173,6 +182,18 @@ require 'json/common'
173
182
  # When enabled:
174
183
  # JSON.parse('[1,]', allow_trailing_comma: true) # => [1]
175
184
  #
185
+ # ---
186
+ #
187
+ # Option +allow_control_characters+ (boolean) specifies whether to allow
188
+ # unescaped ASCII control characters, such as newlines, in strings;
189
+ # defaults to +false+.
190
+ #
191
+ # With the default, +false+:
192
+ # JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError)
193
+ #
194
+ # When enabled:
195
+ # JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld"
196
+ #
176
197
  # ====== Output Options
177
198
  #
178
199
  # Option +freeze+ (boolean) specifies whether the returned objects will be frozen;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.18.0
4
+ version: 2.18.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  - !ruby/object:Gem::Version
85
85
  version: '0'
86
86
  requirements: []
87
- rubygems_version: 3.6.9
87
+ rubygems_version: 4.1.0.dev
88
88
  specification_version: 4
89
89
  summary: JSON Implementation for Ruby
90
90
  test_files: []