json 2.12.2 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
3
 
4
- have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
4
+ have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
5
5
  have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
6
6
  have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
7
7
  have_func("strnlen", "string.h") # Missing on Solaris 10
8
8
 
9
9
  append_cflags("-std=c99")
10
10
 
11
+ if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
12
+ load __dir__ + "/../simd/conf.rb"
13
+ end
14
+
11
15
  create_makefile 'json/ext/parser'
@@ -20,6 +20,8 @@ typedef unsigned char _Bool;
20
20
  #endif
21
21
  #endif
22
22
 
23
+ #include "../simd/simd.h"
24
+
23
25
  #ifndef RB_UNLIKELY
24
26
  #define RB_UNLIKELY(expr) expr
25
27
  #endif
@@ -35,7 +37,7 @@ static ID i_chr, i_aset, i_aref,
35
37
  i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
36
38
 
37
39
  static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38
- sym_decimal_class, sym_on_load;
40
+ sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
39
41
 
40
42
  static int binary_encindex;
41
43
  static int utf8_encindex;
@@ -363,10 +365,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
363
365
  return len;
364
366
  }
365
367
 
368
+ enum duplicate_key_action {
369
+ JSON_DEPRECATED = 0,
370
+ JSON_IGNORE,
371
+ JSON_RAISE,
372
+ };
373
+
366
374
  typedef struct JSON_ParserStruct {
367
375
  VALUE on_load_proc;
368
376
  VALUE decimal_class;
369
377
  ID decimal_method_id;
378
+ enum duplicate_key_action on_duplicate_key;
370
379
  int max_nesting;
371
380
  bool allow_nan;
372
381
  bool allow_trailing_comma;
@@ -386,15 +395,8 @@ typedef struct JSON_ParserStateStruct {
386
395
  int current_nesting;
387
396
  } JSON_ParserState;
388
397
 
389
-
390
- #define PARSE_ERROR_FRAGMENT_LEN 32
391
- #ifdef RBIMPL_ATTR_NORETURN
392
- RBIMPL_ATTR_NORETURN()
393
- #endif
394
- static void raise_parse_error(const char *format, JSON_ParserState *state)
398
+ static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
395
399
  {
396
- unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
397
-
398
400
  const char *cursor = state->cursor;
399
401
  long column = 0;
400
402
  long line = 1;
@@ -411,6 +413,29 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
411
413
  line++;
412
414
  }
413
415
  }
416
+ *line_out = line;
417
+ *column_out = column;
418
+ }
419
+
420
+ static void emit_parse_warning(const char *message, JSON_ParserState *state)
421
+ {
422
+ long line, column;
423
+ cursor_position(state, &line, &column);
424
+
425
+ VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
426
+ rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
427
+ }
428
+
429
+ #define PARSE_ERROR_FRAGMENT_LEN 32
430
+
431
+ #ifdef RBIMPL_ATTR_NORETURN
432
+ RBIMPL_ATTR_NORETURN()
433
+ #endif
434
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
435
+ {
436
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
437
+ long line, column;
438
+ cursor_position(state, &line, &column);
414
439
 
415
440
  const char *ptr = "EOF";
416
441
  if (state->cursor && state->cursor < state->end) {
@@ -517,7 +542,7 @@ static void
517
542
  json_eat_comments(JSON_ParserState *state)
518
543
  {
519
544
  if (state->cursor + 1 < state->end) {
520
- switch(state->cursor[1]) {
545
+ switch (state->cursor[1]) {
521
546
  case '/': {
522
547
  state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
523
548
  if (!state->cursor) {
@@ -688,11 +713,16 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
688
713
  }
689
714
  if (pe[0] == '\\' && pe[1] == 'u') {
690
715
  uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
716
+
717
+ if ((sur & 0xFC00) != 0xDC00) {
718
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
719
+ }
720
+
691
721
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
692
722
  | (sur & 0x3FF));
693
723
  pe += 5;
694
724
  } else {
695
- unescape = (char *) "?";
725
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
696
726
  break;
697
727
  }
698
728
  }
@@ -807,10 +837,67 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
807
837
  return array;
808
838
  }
809
839
 
810
- static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
840
+ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
841
+ {
842
+ VALUE set = rb_hash_new_capa(count / 2);
843
+ for (size_t index = 0; index < count; index += 2) {
844
+ size_t before = RHASH_SIZE(set);
845
+ VALUE key = pairs[index];
846
+ rb_hash_aset(set, key, Qtrue);
847
+ if (RHASH_SIZE(set) == before) {
848
+ if (RB_SYMBOL_P(key)) {
849
+ return rb_sym2str(key);
850
+ }
851
+ return key;
852
+ }
853
+ }
854
+ return Qfalse;
855
+ }
856
+
857
+ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
811
858
  {
812
- VALUE object = rb_hash_new_capa(count);
813
- rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
859
+ VALUE message = rb_sprintf(
860
+ "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
861
+ rb_inspect(duplicate_key)
862
+ );
863
+
864
+ emit_parse_warning(RSTRING_PTR(message), state);
865
+ RB_GC_GUARD(message);
866
+ }
867
+
868
+ #ifdef RBIMPL_ATTR_NORETURN
869
+ RBIMPL_ATTR_NORETURN()
870
+ #endif
871
+ static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
872
+ {
873
+ VALUE message = rb_sprintf(
874
+ "duplicate key %"PRIsVALUE,
875
+ rb_inspect(duplicate_key)
876
+ );
877
+
878
+ raise_parse_error(RSTRING_PTR(message), state);
879
+ RB_GC_GUARD(message);
880
+ }
881
+
882
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
883
+ {
884
+ size_t entries_count = count / 2;
885
+ VALUE object = rb_hash_new_capa(entries_count);
886
+ const VALUE *pairs = rvalue_stack_peek(state->stack, count);
887
+ rb_hash_bulk_insert(count, pairs, object);
888
+
889
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
890
+ switch (config->on_duplicate_key) {
891
+ case JSON_IGNORE:
892
+ break;
893
+ case JSON_DEPRECATED:
894
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
895
+ break;
896
+ case JSON_RAISE:
897
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
898
+ break;
899
+ }
900
+ }
814
901
 
815
902
  rvalue_stack_pop(state->stack, count);
816
903
 
@@ -844,7 +931,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
844
931
  return value;
845
932
  }
846
933
 
847
- static const bool string_scan[256] = {
934
+ static const bool string_scan_table[256] = {
848
935
  // ASCII Control Characters
849
936
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
850
937
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -857,32 +944,71 @@ static const bool string_scan[256] = {
857
944
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
858
945
  };
859
946
 
947
+ #if (defined(__GNUC__ ) || defined(__clang__))
948
+ #define FORCE_INLINE __attribute__((always_inline))
949
+ #else
950
+ #define FORCE_INLINE
951
+ #endif
952
+
953
+ #ifdef HAVE_SIMD
954
+ static SIMD_Implementation simd_impl = SIMD_NONE;
955
+ #endif /* HAVE_SIMD */
956
+
957
+ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
958
+ {
959
+ #ifdef HAVE_SIMD
960
+ #if defined(HAVE_SIMD_NEON)
961
+
962
+ uint64_t mask = 0;
963
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
964
+ state->cursor += trailing_zeros64(mask) >> 2;
965
+ return 1;
966
+ }
967
+
968
+ #elif defined(HAVE_SIMD_SSE2)
969
+ if (simd_impl == SIMD_SSE2) {
970
+ int mask = 0;
971
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
972
+ state->cursor += trailing_zeros(mask);
973
+ return 1;
974
+ }
975
+ }
976
+ #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
977
+ #endif /* HAVE_SIMD */
978
+
979
+ while (state->cursor < state->end) {
980
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
981
+ return 1;
982
+ }
983
+ state->cursor++;
984
+ }
985
+ return 0;
986
+ }
987
+
860
988
  static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
861
989
  {
862
990
  state->cursor++;
863
991
  const char *start = state->cursor;
864
992
  bool escaped = false;
865
993
 
866
- while (state->cursor < state->end) {
867
- if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
868
- switch (*state->cursor) {
869
- case '"': {
870
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
871
- state->cursor++;
872
- return json_push_value(state, config, string);
873
- }
874
- case '\\': {
875
- state->cursor++;
876
- escaped = true;
877
- if ((unsigned char)*state->cursor < 0x20) {
878
- raise_parse_error("invalid ASCII control character in string: %s", state);
879
- }
880
- break;
881
- }
882
- default:
994
+ while (RB_UNLIKELY(string_scan(state))) {
995
+ switch (*state->cursor) {
996
+ case '"': {
997
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
998
+ state->cursor++;
999
+ return json_push_value(state, config, string);
1000
+ }
1001
+ case '\\': {
1002
+ state->cursor++;
1003
+ escaped = true;
1004
+ if ((unsigned char)*state->cursor < 0x20) {
883
1005
  raise_parse_error("invalid ASCII control character in string: %s", state);
884
- break;
1006
+ }
1007
+ break;
885
1008
  }
1009
+ default:
1010
+ raise_parse_error("invalid ASCII control character in string: %s", state);
1011
+ break;
886
1012
  }
887
1013
 
888
1014
  state->cursor++;
@@ -1060,6 +1186,8 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1060
1186
  break;
1061
1187
  }
1062
1188
  case '{': {
1189
+ const char *object_start_cursor = state->cursor;
1190
+
1063
1191
  state->cursor++;
1064
1192
  json_eat_whitespace(state);
1065
1193
  long stack_head = state->stack->head;
@@ -1094,8 +1222,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1094
1222
  if (*state->cursor == '}') {
1095
1223
  state->cursor++;
1096
1224
  state->current_nesting--;
1097
- long count = state->stack->head - stack_head;
1098
- return json_push_value(state, config, json_decode_object(state, config, count));
1225
+ size_t count = state->stack->head - stack_head;
1226
+
1227
+ // Temporary rewind cursor in case an error is raised
1228
+ const char *final_cursor = state->cursor;
1229
+ state->cursor = object_start_cursor;
1230
+ VALUE object = json_decode_object(state, config, count);
1231
+ state->cursor = final_cursor;
1232
+
1233
+ return json_push_value(state, config, object);
1099
1234
  }
1100
1235
 
1101
1236
  if (*state->cursor == ',') {
@@ -1135,7 +1270,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1135
1270
  break;
1136
1271
  }
1137
1272
 
1138
- raise_parse_error("unreacheable: %s", state);
1273
+ raise_parse_error("unreachable: %s", state);
1139
1274
  }
1140
1275
 
1141
1276
  static void json_ensure_eof(JSON_ParserState *state)
@@ -1184,6 +1319,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1184
1319
  else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1185
1320
  else if (key == sym_freeze) { config->freeze = RTEST(val); }
1186
1321
  else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1322
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1187
1323
  else if (key == sym_decimal_class) {
1188
1324
  if (RTEST(val)) {
1189
1325
  if (rb_respond_to(val, i_try_convert)) {
@@ -1400,6 +1536,7 @@ void Init_parser(void)
1400
1536
  sym_freeze = ID2SYM(rb_intern("freeze"));
1401
1537
  sym_on_load = ID2SYM(rb_intern("on_load"));
1402
1538
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1539
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1403
1540
 
1404
1541
  i_chr = rb_intern("chr");
1405
1542
  i_aset = rb_intern("[]=");
@@ -1413,4 +1550,8 @@ void Init_parser(void)
1413
1550
  binary_encindex = rb_ascii8bit_encindex();
1414
1551
  utf8_encindex = rb_utf8_encindex();
1415
1552
  enc_utf8 = rb_utf8_encoding();
1553
+
1554
+ #ifdef HAVE_SIMD
1555
+ simd_impl = find_simd_implementation();
1556
+ #endif
1416
1557
  }
@@ -0,0 +1,24 @@
1
+ case RbConfig::CONFIG['host_cpu']
2
+ when /^(arm|aarch64)/
3
+ # Try to compile a small program using NEON instructions
4
+ header, type, init, extra = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)', nil
5
+ when /^(x86_64|x64)/
6
+ header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }'
7
+ end
8
+ if header
9
+ if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration')
10
+ #{cpp_include(header)}
11
+ int main(int argc, char **argv) {
12
+ #{type} test = #{init};
13
+ #{extra}
14
+ if (argc > 100000) printf("%p", &test);
15
+ return 0;
16
+ }
17
+ SRC
18
+ $defs.push("-DJSON_ENABLE_SIMD")
19
+ else
20
+ puts "Disable SIMD"
21
+ end
22
+ end
23
+
24
+ have_header('cpuid.h')
@@ -0,0 +1,188 @@
1
+ typedef enum {
2
+ SIMD_NONE,
3
+ SIMD_NEON,
4
+ SIMD_SSE2
5
+ } SIMD_Implementation;
6
+
7
+ #ifdef JSON_ENABLE_SIMD
8
+
9
+ #ifdef __clang__
10
+ # if __has_builtin(__builtin_ctzll)
11
+ # define HAVE_BUILTIN_CTZLL 1
12
+ # else
13
+ # define HAVE_BUILTIN_CTZLL 0
14
+ # endif
15
+ #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
16
+ # define HAVE_BUILTIN_CTZLL 1
17
+ #else
18
+ # define HAVE_BUILTIN_CTZLL 0
19
+ #endif
20
+
21
+ static inline uint32_t trailing_zeros64(uint64_t input)
22
+ {
23
+ #if HAVE_BUILTIN_CTZLL
24
+ return __builtin_ctzll(input);
25
+ #else
26
+ uint32_t trailing_zeros = 0;
27
+ uint64_t temp = input;
28
+ while ((temp & 1) == 0 && temp > 0) {
29
+ trailing_zeros++;
30
+ temp >>= 1;
31
+ }
32
+ return trailing_zeros;
33
+ #endif
34
+ }
35
+
36
+ static inline int trailing_zeros(int input)
37
+ {
38
+ #if HAVE_BUILTIN_CTZLL
39
+ return __builtin_ctz(input);
40
+ #else
41
+ int trailing_zeros = 0;
42
+ int temp = input;
43
+ while ((temp & 1) == 0 && temp > 0) {
44
+ trailing_zeros++;
45
+ temp >>= 1;
46
+ }
47
+ return trailing_zeros;
48
+ #endif
49
+ }
50
+
51
+ #if (defined(__GNUC__ ) || defined(__clang__))
52
+ #define FORCE_INLINE __attribute__((always_inline))
53
+ #else
54
+ #define FORCE_INLINE
55
+ #endif
56
+
57
+
58
+ #define SIMD_MINIMUM_THRESHOLD 6
59
+
60
+ #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
61
+ #include <arm_neon.h>
62
+
63
+ #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
64
+ static inline SIMD_Implementation find_simd_implementation(void)
65
+ {
66
+ return SIMD_NEON;
67
+ }
68
+
69
+ #define HAVE_SIMD 1
70
+ #define HAVE_SIMD_NEON 1
71
+
72
+ // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
73
+ static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
74
+ {
75
+ const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
76
+ const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
77
+ return mask & 0x8888888888888888ull;
78
+ }
79
+
80
+ static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr)
81
+ {
82
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
83
+
84
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
85
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
86
+ const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
87
+
88
+ uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
89
+ uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
90
+ return neon_match_mask(needs_escape);
91
+ }
92
+
93
+ static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
94
+ {
95
+ while (*ptr + sizeof(uint8x16_t) <= end) {
96
+ uint64_t chunk_mask = compute_chunk_mask_neon(*ptr);
97
+ if (chunk_mask) {
98
+ *mask = chunk_mask;
99
+ return 1;
100
+ }
101
+ *ptr += sizeof(uint8x16_t);
102
+ }
103
+ return 0;
104
+ }
105
+
106
+ static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)
107
+ {
108
+ uint8x16x4_t tab;
109
+ tab.val[0] = vld1q_u8(table);
110
+ tab.val[1] = vld1q_u8(table+16);
111
+ tab.val[2] = vld1q_u8(table+32);
112
+ tab.val[3] = vld1q_u8(table+48);
113
+ return tab;
114
+ }
115
+
116
+ #endif /* ARM Neon Support.*/
117
+
118
+ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
119
+
120
+ #ifdef HAVE_X86INTRIN_H
121
+ #include <x86intrin.h>
122
+
123
+ #define HAVE_SIMD 1
124
+ #define HAVE_SIMD_SSE2 1
125
+
126
+ #ifdef HAVE_CPUID_H
127
+ #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
128
+
129
+ #if defined(__clang__) || defined(__GNUC__)
130
+ #define TARGET_SSE2 __attribute__((target("sse2")))
131
+ #else
132
+ #define TARGET_SSE2
133
+ #endif
134
+
135
+ #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
136
+ #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
137
+ #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
138
+ #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
139
+
140
+ static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr)
141
+ {
142
+ __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
143
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
144
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
145
+ __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
146
+ __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
147
+ __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
148
+ return _mm_movemask_epi8(needs_escape);
149
+ }
150
+
151
+ static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
152
+ {
153
+ while (*ptr + sizeof(__m128i) <= end) {
154
+ int chunk_mask = compute_chunk_mask_sse2(*ptr);
155
+ if (chunk_mask) {
156
+ *mask = chunk_mask;
157
+ return 1;
158
+ }
159
+ *ptr += sizeof(__m128i);
160
+ }
161
+
162
+ return 0;
163
+ }
164
+
165
+ #include <cpuid.h>
166
+ #endif /* HAVE_CPUID_H */
167
+
168
+ static inline SIMD_Implementation find_simd_implementation(void)
169
+ {
170
+ // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
171
+ if (__builtin_cpu_supports("sse2")) {
172
+ return SIMD_SSE2;
173
+ }
174
+
175
+ return SIMD_NONE;
176
+ }
177
+
178
+ #endif /* HAVE_X86INTRIN_H */
179
+ #endif /* X86_64 Support */
180
+
181
+ #endif /* JSON_ENABLE_SIMD */
182
+
183
+ #ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
184
+ static inline SIMD_Implementation find_simd_implementation(void)
185
+ {
186
+ return SIMD_NONE;
187
+ }
188
+ #endif
@@ -29,6 +29,10 @@
29
29
  #include <string.h>
30
30
  #include <stdint.h>
31
31
 
32
+ #ifdef JSON_DEBUG
33
+ #include <assert.h>
34
+ #endif
35
+
32
36
  #define npowers 87
33
37
  #define steppowers 8
34
38
  #define firstpower -348 /* 10 ^ -348 */
@@ -320,15 +324,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
320
324
  {
321
325
  int exp = absv(K + ndigits - 1);
322
326
 
323
- int max_trailing_zeros = 7;
324
-
325
- if(neg) {
326
- max_trailing_zeros -= 1;
327
- }
328
-
329
- /* write plain integer */
330
- if(K >= 0 && (exp < (ndigits + max_trailing_zeros))) {
331
-
327
+ if(K >= 0 && exp < 15) {
332
328
  memcpy(dest, digits, ndigits);
333
329
  memset(dest + ndigits, '0', K);
334
330
 
@@ -432,10 +428,12 @@ static int filter_special(double fp, char* dest)
432
428
  *
433
429
  * Input:
434
430
  * fp -> the double to convert, dest -> destination buffer.
435
- * The generated string will never be longer than 28 characters.
436
- * Make sure to pass a pointer to at least 28 bytes of memory.
431
+ * The generated string will never be longer than 32 characters.
432
+ * Make sure to pass a pointer to at least 32 bytes of memory.
437
433
  * The emitted string will not be null terminated.
438
434
  *
435
+ *
436
+ *
439
437
  * Output:
440
438
  * The number of written characters.
441
439
  *
@@ -474,6 +472,9 @@ static int fpconv_dtoa(double d, char dest[28])
474
472
  int ndigits = grisu2(d, digits, &K);
475
473
 
476
474
  str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);
475
+ #ifdef JSON_DEBUG
476
+ assert(str_len <= 32);
477
+ #endif
477
478
 
478
479
  return str_len;
479
480
  }
data/json.gemspec CHANGED
@@ -44,15 +44,14 @@ spec = Gem::Specification.new do |s|
44
44
  "LEGAL",
45
45
  "README.md",
46
46
  "json.gemspec",
47
- *Dir["lib/**/*.rb"],
48
- ]
47
+ ] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__))
49
48
 
50
49
  if java_ext
51
50
  s.platform = 'java'
52
51
  s.files += Dir["lib/json/ext/**/*.jar"]
53
52
  else
54
53
  s.extensions = Dir["ext/json/**/extconf.rb"]
55
- s.files += Dir["ext/json/**/*.{c,h}"]
54
+ s.files += Dir["ext/json/**/*.{c,h,rb}"]
56
55
  end
57
56
  end
58
57
 
data/lib/json/add/core.rb CHANGED
@@ -7,6 +7,7 @@ require 'json/add/date_time'
7
7
  require 'json/add/exception'
8
8
  require 'json/add/range'
9
9
  require 'json/add/regexp'
10
+ require 'json/add/string'
10
11
  require 'json/add/struct'
11
12
  require 'json/add/symbol'
12
13
  require 'json/add/time'
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+ unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
3
+ require 'json'
4
+ end
5
+
6
+ class String
7
+ # call-seq: json_create(o)
8
+ #
9
+ # Raw Strings are JSON Objects (the raw bytes are stored in an array for the
10
+ # key "raw"). The Ruby String can be created by this class method.
11
+ def self.json_create(object)
12
+ object["raw"].pack("C*")
13
+ end
14
+
15
+ # call-seq: to_json_raw_object()
16
+ #
17
+ # This method creates a raw object hash, that can be nested into
18
+ # other data structures and will be generated as a raw string. This
19
+ # method should be used, if you want to convert raw strings to JSON
20
+ # instead of UTF-8 strings, e. g. binary data.
21
+ def to_json_raw_object
22
+ {
23
+ JSON.create_id => self.class.name,
24
+ "raw" => unpack("C*"),
25
+ }
26
+ end
27
+
28
+ # call-seq: to_json_raw(*args)
29
+ #
30
+ # This method creates a JSON text from the result of a call to
31
+ # to_json_raw_object of this String.
32
+ def to_json_raw(...)
33
+ to_json_raw_object.to_json(...)
34
+ end
35
+ end