oj 3.16.16 → 3.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/oj/oj.c CHANGED
@@ -20,6 +20,8 @@
20
20
  #include "rails.h"
21
21
  #include "simd.h"
22
22
 
23
+ #define MAX_INDENT 16
24
+
23
25
  typedef struct _yesNoOpt {
24
26
  VALUE sym;
25
27
  char *attr;
@@ -120,7 +122,9 @@ static VALUE create_id_sym;
120
122
  static VALUE custom_sym;
121
123
  static VALUE empty_string_sym;
122
124
  static VALUE escape_mode_sym;
125
+ static VALUE except_sym;
123
126
  static VALUE integer_range_sym;
127
+ static VALUE max_integer_digits_sym;
124
128
  static VALUE fast_sym;
125
129
  static VALUE float_prec_sym;
126
130
  static VALUE float_format_sym;
@@ -138,6 +142,7 @@ static VALUE null_sym;
138
142
  static VALUE object_sym;
139
143
  static VALUE omit_null_byte_sym;
140
144
  static VALUE omit_nil_sym;
145
+ static VALUE only_sym;
141
146
  static VALUE rails_sym;
142
147
  static VALUE raise_sym;
143
148
  static VALUE ruby_sym;
@@ -204,6 +209,7 @@ struct _options oj_default_options = {
204
209
  0, // cache_str
205
210
  0, // int_range_min
206
211
  0, // int_range_max
212
+ 0, // max_integer_digits
207
213
  oj_json_class, // create_id
208
214
  10, // create_id_len
209
215
  9, // sec_prec
@@ -228,6 +234,8 @@ struct _options oj_default_options = {
228
234
  false, // omit_nil
229
235
  false, // omit_null_byte
230
236
  MAX_DEPTH, // max_depth
237
+ NULL, // only
238
+ NULL, // except
231
239
  },
232
240
  {
233
241
  // str_rx
@@ -238,6 +246,22 @@ struct _options oj_default_options = {
238
246
  NULL,
239
247
  };
240
248
 
249
+ static VALUE only_array_from_string(const char *str) {
250
+ volatile VALUE a = Qnil;
251
+
252
+ if (NULL != str && 2 < strlen(str)) {
253
+ str++;
254
+ char *cp;
255
+
256
+ a = rb_ary_new();
257
+ while (NULL != (cp = strchr(str, ':'))) {
258
+ rb_ary_push(a, rb_id2sym(rb_intern2(str, cp - str)));
259
+ str = cp + 1;
260
+ }
261
+ }
262
+ return a;
263
+ }
264
+
241
265
  /* Document-method: default_options()
242
266
  * call-seq: default_options()
243
267
  *
@@ -314,10 +338,17 @@ struct _options oj_default_options = {
314
338
  * - *:cache_str* [_Fixnum_] maximum string value length to cache (strings less
315
339
  * than this are cached)
316
340
  * - *:integer_range* [_Range_] Dump integers outside range as strings.
341
+ * - *:max_integer_digits* [_Fixnum_] Maximum number of decimal digits allowed in a
342
+ * parsed integer. When the limit is exceeded a parse error is raised. 0 (the
343
+ * default) disables the limit. Setting a reasonable limit is recommended when
344
+ * parsing untrusted input to mitigate CPU-DoS attacks. Only applies to the
345
+ * legacy parsers (Oj.load, Oj::Doc, JSON.parse mimic); Oj::Parser is unaffected.
317
346
  * - *:trace* [_true,_|_false_] Trace all load and dump calls, default is false
318
347
  * (trace is off)
319
348
  * - *:safe* [_true,_|_false_] Safe mimic breaks JSON mimic to be safer, default
320
349
  * is false (safe is off)
350
+ * - *:only* [_nil,_|_Array_] A list of the fields to encode. All others are skipped.
351
+ * - *:except* [_nil,_|_Array_] A list of the fields to not encode. All others are encoded.
321
352
  *
322
353
  * Return [_Hash_] all current option settings.
323
354
  */
@@ -426,6 +457,7 @@ static VALUE get_def_opts(VALUE self) {
426
457
  } else {
427
458
  rb_hash_aset(opts, integer_range_sym, Qnil);
428
459
  }
460
+ rb_hash_aset(opts, max_integer_digits_sym, LONG2NUM((long)oj_default_options.max_integer_digits));
429
461
  switch (oj_default_options.escape_mode) {
430
462
  case NLEsc: rb_hash_aset(opts, escape_mode_sym, newline_sym); break;
431
463
  case JSONEsc: rb_hash_aset(opts, escape_mode_sym, json_sym); break;
@@ -483,6 +515,9 @@ static VALUE get_def_opts(VALUE self) {
483
515
  rb_hash_aset(opts, oj_hash_class_sym, oj_default_options.hash_class);
484
516
  rb_hash_aset(opts, oj_array_class_sym, oj_default_options.array_class);
485
517
 
518
+ rb_hash_aset(opts, only_sym, only_array_from_string(oj_default_options.dump_opts.only));
519
+ rb_hash_aset(opts, except_sym, only_array_from_string(oj_default_options.dump_opts.except));
520
+
486
521
  if (NULL == oj_default_options.ignore) {
487
522
  rb_hash_aset(opts, ignore_sym, Qnil);
488
523
  } else {
@@ -566,6 +601,8 @@ static VALUE get_def_opts(VALUE self) {
566
601
  * - *:cache_keys* [_Boolean_] if true then hash keys are cached
567
602
  * - *:cache_str* [_Fixnum_] maximum string value length to cache (strings less than this are cached)
568
603
  * - *:integer_range* [_Range_] Dump integers outside range as strings.
604
+ * - *:max_integer_digits* [_Fixnum_] Maximum decimal digits in a parsed integer
605
+ * (0 = unlimited). Use to mitigate CPU-DoS via huge integer values in JSON.
569
606
  * - *:trace* [_Boolean_] turn trace on or off.
570
607
  * - *:safe* [_Boolean_] turn safe mimic on or off.
571
608
  */
@@ -625,6 +662,95 @@ bool set_yesno_options(VALUE key, VALUE value, Options copts) {
625
662
  return false;
626
663
  }
627
664
 
665
+ static const char *make_only_value(VALUE v) {
666
+ switch (rb_type(v)) {
667
+ case RUBY_T_NIL:
668
+ case RUBY_T_NONE: return NULL;
669
+ case RUBY_T_ARRAY: {
670
+ long len = rb_array_len(v);
671
+ long i;
672
+ long size = 0;
673
+ char *buf;
674
+ char *bp;
675
+
676
+ for (i = 0; i < len; i++) {
677
+ VALUE x = rb_ary_entry(v, i);
678
+
679
+ switch (rb_type(x)) {
680
+ case RUBY_T_SYMBOL:
681
+ size += strlen(rb_id2name(rb_sym2id(x)));
682
+ size++;
683
+ break;
684
+ case RUBY_T_STRING:
685
+ size += strlen(StringValueCStr(x));
686
+ size++;
687
+ break;
688
+ default: rb_raise(rb_eArgError, ":only and :except must be nil, symbol, string, or array."); break;
689
+ }
690
+ }
691
+ if (0 == size) {
692
+ return NULL;
693
+ }
694
+ buf = OJ_R_ALLOC_N(char, size + 2);
695
+ bp = buf;
696
+ *bp++ = ':';
697
+ for (i = 0; i < len; i++) {
698
+ VALUE x = rb_ary_entry(v, i);
699
+ const char *str;
700
+
701
+ switch (rb_type(x)) {
702
+ case RUBY_T_SYMBOL:
703
+ str = rb_id2name(rb_sym2id(x));
704
+ size = strlen(str);
705
+ memcpy(bp, str, size);
706
+ bp += size;
707
+ *bp++ = ':';
708
+ break;
709
+ case RUBY_T_STRING:
710
+ str = StringValueCStr(x);
711
+ size = strlen(str);
712
+ memcpy(bp, str, size);
713
+ bp += size;
714
+ *bp++ = ':';
715
+ break;
716
+ default:
717
+ // ignore
718
+ break;
719
+ }
720
+ }
721
+ *bp = '\0';
722
+
723
+ return buf;
724
+ }
725
+ case RUBY_T_STRING: {
726
+ const char *str = StringValueCStr(v);
727
+ size_t size = strlen(str);
728
+ char *buf = OJ_R_ALLOC_N(char, size + 3);
729
+
730
+ buf[0] = ':';
731
+ strcpy(buf + 1, str);
732
+ buf[size + 1] = ':';
733
+ buf[size + 2] = '\0';
734
+
735
+ return buf;
736
+ }
737
+ case RUBY_T_SYMBOL: {
738
+ const char *str = rb_id2name(rb_sym2id(v));
739
+ size_t size = strlen(str);
740
+ char *buf = OJ_R_ALLOC_N(char, size + 3);
741
+
742
+ buf[0] = ':';
743
+ strcpy(buf + 1, str);
744
+ buf[size + 1] = ':';
745
+ buf[size + 2] = '\0';
746
+
747
+ return buf;
748
+ }
749
+ default: rb_raise(rb_eArgError, ":only and zzz :except must be nil, symbol, string, or array."); break;
750
+ }
751
+ return NULL;
752
+ }
753
+
628
754
  static int parse_options_cb(VALUE k, VALUE v, VALUE opts) {
629
755
  Options copts = (Options)opts;
630
756
  size_t len;
@@ -642,7 +768,10 @@ static int parse_options_cb(VALUE k, VALUE v, VALUE opts) {
642
768
  case T_FIXNUM:
643
769
  copts->dump_opts.indent_size = 0;
644
770
  *copts->dump_opts.indent_str = '\0';
645
- copts->indent = FIX2INT(v);
771
+ if (MAX_INDENT < FIX2INT(v)) {
772
+ rb_raise(rb_eArgError, "indent is limited to %d characters.", MAX_INDENT);
773
+ }
774
+ copts->indent = FIX2INT(v);
646
775
  break;
647
776
  case T_STRING:
648
777
  if (sizeof(copts->dump_opts.indent_str) <= (len = RSTRING_LEN(v))) {
@@ -957,6 +1086,20 @@ static int parse_options_cb(VALUE k, VALUE v, VALUE opts) {
957
1086
  } else if (Qfalse != v) {
958
1087
  rb_raise(rb_eArgError, ":integer_range must be a range of Fixnum.");
959
1088
  }
1089
+ } else if (max_integer_digits_sym == k) {
1090
+ if (Qnil == v || Qfalse == v) {
1091
+ copts->max_integer_digits = 0;
1092
+ } else if (T_FIXNUM == rb_type(v)) {
1093
+ long n = FIX2LONG(v);
1094
+
1095
+ if (n < 0) {
1096
+ rb_raise(rb_eArgError, ":max_integer_digits must be >= 0.");
1097
+ }
1098
+
1099
+ copts->max_integer_digits = (size_t)n;
1100
+ } else {
1101
+ rb_raise(rb_eArgError, ":max_integer_digits must be a non-negative Integer.");
1102
+ }
960
1103
  } else if (symbol_keys_sym == k || oj_symbolize_names_sym == k) {
961
1104
  if (Qnil == v) {
962
1105
  return ST_CONTINUE;
@@ -981,6 +1124,18 @@ static int parse_options_cb(VALUE k, VALUE v, VALUE opts) {
981
1124
  }
982
1125
  strncpy(copts->float_fmt, RSTRING_PTR(v), (size_t)RSTRING_LEN(v));
983
1126
  copts->float_fmt[RSTRING_LEN(v)] = '\0';
1127
+ } else if (only_sym == k) {
1128
+ if (NULL != copts->dump_opts.only) {
1129
+ OJ_R_FREE((void *)copts->dump_opts.only);
1130
+ copts->dump_opts.only = NULL;
1131
+ }
1132
+ copts->dump_opts.only = make_only_value(v);
1133
+ } else if (except_sym == k) {
1134
+ if (NULL != copts->dump_opts.except) {
1135
+ OJ_R_FREE((void *)copts->dump_opts.except);
1136
+ copts->dump_opts.except = NULL;
1137
+ }
1138
+ copts->dump_opts.except = make_only_value(v);
984
1139
  }
985
1140
  return ST_CONTINUE;
986
1141
  }
@@ -2027,6 +2182,8 @@ void Init_oj(void) {
2027
2182
  rb_gc_register_address(&escape_mode_sym);
2028
2183
  integer_range_sym = ID2SYM(rb_intern("integer_range"));
2029
2184
  rb_gc_register_address(&integer_range_sym);
2185
+ max_integer_digits_sym = ID2SYM(rb_intern("max_integer_digits"));
2186
+ rb_gc_register_address(&max_integer_digits_sym);
2030
2187
  fast_sym = ID2SYM(rb_intern("fast"));
2031
2188
  rb_gc_register_address(&fast_sym);
2032
2189
  float_format_sym = ID2SYM(rb_intern("float_format"));
@@ -2133,6 +2290,10 @@ void Init_oj(void) {
2133
2290
  rb_gc_register_address(&xmlschema_sym);
2134
2291
  xss_safe_sym = ID2SYM(rb_intern("xss_safe"));
2135
2292
  rb_gc_register_address(&xss_safe_sym);
2293
+ only_sym = ID2SYM(rb_intern("only"));
2294
+ rb_gc_register_address(&only_sym);
2295
+ except_sym = ID2SYM(rb_intern("except"));
2296
+ rb_gc_register_address(&except_sym);
2136
2297
 
2137
2298
  oj_slash_string = rb_str_new2("/");
2138
2299
  rb_gc_register_address(&oj_slash_string);
data/ext/oj/oj.h CHANGED
@@ -104,61 +104,64 @@ typedef enum {
104
104
  } StreamWriterType;
105
105
 
106
106
  typedef struct _dumpOpts {
107
- bool use;
108
- char indent_str[16];
109
- char before_sep[16];
110
- char after_sep[16];
111
- char hash_nl[16];
112
- char array_nl[16];
113
- uint8_t indent_size;
114
- uint8_t before_size;
115
- uint8_t after_size;
116
- uint8_t hash_size;
117
- uint8_t array_size;
118
- char nan_dump; // NanDump
119
- bool omit_nil;
120
- bool omit_null_byte;
121
- int max_depth;
107
+ bool use;
108
+ char indent_str[16];
109
+ char before_sep[16];
110
+ char after_sep[16];
111
+ char hash_nl[16];
112
+ char array_nl[16];
113
+ uint8_t indent_size;
114
+ uint8_t before_size;
115
+ uint8_t after_size;
116
+ uint8_t hash_size;
117
+ uint8_t array_size;
118
+ char nan_dump; // NanDump
119
+ bool omit_nil;
120
+ bool omit_null_byte;
121
+ int max_depth;
122
+ const char *only;
123
+ const char *except;
122
124
  } *DumpOpts;
123
125
 
124
126
  typedef struct _options {
125
- int indent; // indention for dump, default 2
126
- char circular; // YesNo
127
- char auto_define; // YesNo
128
- char sym_key; // YesNo
129
- char escape_mode; // Escape_Mode
130
- char mode; // Mode
131
- char class_cache; // YesNo
132
- char time_format; // TimeFormat
133
- char bigdec_as_num; // YesNo
134
- char bigdec_load; // BigLoad
135
- char compat_bigdec; // boolean (0 or 1)
136
- char to_hash; // YesNo
137
- char to_json; // YesNo
138
- char as_json; // YesNo
139
- char raw_json; // YesNo
140
- char nilnil; // YesNo
141
- char empty_string; // YesNo
142
- char allow_gc; // allow GC during parse
143
- char quirks_mode; // allow single JSON values instead of documents
144
- char allow_invalid; // YesNo - allow invalid unicode
145
- char create_ok; // YesNo allow create_id
146
- char allow_nan; // YEsyNo for parsing only
147
- char trace; // YesNo
148
- char safe; // YesNo
149
- char sec_prec_set; // boolean (0 or 1)
150
- char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
151
- char cache_keys; // YesNo
152
- char cache_str; // string short than or equal to this are cache
153
- int64_t int_range_min; // dump numbers below as string
154
- int64_t int_range_max; // dump numbers above as string
155
- const char *create_id; // 0 or string
156
- size_t create_id_len; // length of create_id
157
- int sec_prec; // second precision when dumping time
158
- char float_prec; // float precision, linked to float_fmt
159
- char float_fmt[7]; // float format for dumping, if empty use Ruby
160
- VALUE hash_class; // class to use in place of Hash on load
161
- VALUE array_class; // class to use in place of Array on load
127
+ int indent; // indention for dump, default 2
128
+ char circular; // YesNo
129
+ char auto_define; // YesNo
130
+ char sym_key; // YesNo
131
+ char escape_mode; // Escape_Mode
132
+ char mode; // Mode
133
+ char class_cache; // YesNo
134
+ char time_format; // TimeFormat
135
+ char bigdec_as_num; // YesNo
136
+ char bigdec_load; // BigLoad
137
+ char compat_bigdec; // boolean (0 or 1)
138
+ char to_hash; // YesNo
139
+ char to_json; // YesNo
140
+ char as_json; // YesNo
141
+ char raw_json; // YesNo
142
+ char nilnil; // YesNo
143
+ char empty_string; // YesNo
144
+ char allow_gc; // allow GC during parse
145
+ char quirks_mode; // allow single JSON values instead of documents
146
+ char allow_invalid; // YesNo - allow invalid unicode
147
+ char create_ok; // YesNo allow create_id
148
+ char allow_nan; // YesNo for parsing only
149
+ char trace; // YesNo
150
+ char safe; // YesNo
151
+ char sec_prec_set; // boolean (0 or 1)
152
+ char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
153
+ char cache_keys; // YesNo
154
+ char cache_str; // string short than or equal to this are cache
155
+ int64_t int_range_min; // dump numbers below as string
156
+ int64_t int_range_max; // dump numbers above as string
157
+ size_t max_integer_digits; // 0 = unlimited; max decimal digits for parsed integers
158
+ const char *create_id; // 0 or string
159
+ size_t create_id_len; // length of create_id
160
+ int sec_prec; // second precision when dumping time
161
+ char float_prec; // float precision, linked to float_fmt
162
+ char float_fmt[7]; // float format for dumping, if empty use Ruby
163
+ VALUE hash_class; // class to use in place of Hash on load
164
+ VALUE array_class; // class to use in place of Array on load
162
165
  struct _dumpOpts dump_opts;
163
166
  struct _rxClass str_rx;
164
167
  VALUE *ignore; // Qnil terminated array of classes or NULL
data/ext/oj/parse.c CHANGED
@@ -209,7 +209,7 @@ static inline const char *string_scan_neon(const char *str, const char *end) {
209
209
  while (str + sizeof(uint8x16_t) <= end) {
210
210
  uint8x16_t chunk = vld1q_u8((const uint8_t *)str);
211
211
  uint8x16_t tmp = vorrq_u8(vorrq_u8(vceqq_u8(chunk, null_char), vceqq_u8(chunk, backslash)),
212
- vceqq_u8(chunk, quote));
212
+ vceqq_u8(chunk, quote));
213
213
  const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(tmp), 4);
214
214
  uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
215
215
  if (mask != 0) {
@@ -285,10 +285,10 @@ static OJ_TARGET_SSE42 const char *scan_string_SSE42(const char *str, const char
285
285
  for (; str <= safe_end_16; str += 16) {
286
286
  const __m128i string = _mm_loadu_si128((const __m128i *)str);
287
287
  const int r = _mm_cmpestri(terminate,
288
- 3,
289
- string,
290
- 16,
291
- _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
288
+ 3,
289
+ string,
290
+ 16,
291
+ _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
292
292
  if (r != 16)
293
293
  return str + r;
294
294
  }
@@ -394,7 +394,7 @@ void oj_scanner_init(void) {
394
394
  static void read_escaped_str(ParseInfo pi, const char *start) {
395
395
  struct _buf buf;
396
396
  const char *s;
397
- int cnt = (int)(pi->cur - start);
397
+ size_t cnt = pi->cur - start;
398
398
  uint32_t code;
399
399
  Val parent = stack_peek(&pi->stack);
400
400
 
@@ -611,7 +611,7 @@ static void read_num(ParseInfo pi) {
611
611
  ni.bigdec_load = pi->options.compat_bigdec;
612
612
  } else {
613
613
  ni.no_big = (FloatDec == pi->options.bigdec_load || FastDec == pi->options.bigdec_load ||
614
- RubyDec == pi->options.bigdec_load);
614
+ RubyDec == pi->options.bigdec_load);
615
615
  ni.bigdec_load = pi->options.bigdec_load;
616
616
  }
617
617
 
@@ -669,7 +669,7 @@ static void read_num(ParseInfo pi) {
669
669
  // A trailing . is not a valid decimal but if encountered allow it
670
670
  // except when mimicking the JSON gem or in strict mode.
671
671
  if (StrictMode == pi->options.mode || CompatMode == pi->options.mode) {
672
- int pos = (int)(pi->cur - ni.str);
672
+ size_t pos = pi->cur - ni.str;
673
673
 
674
674
  if (1 == pos || (2 == pos && ni.neg)) {
675
675
  oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
@@ -971,6 +971,20 @@ static long double exp_plus[] = {
971
971
  1.0e39, 1.0e40, 1.0e41, 1.0e42, 1.0e43, 1.0e44, 1.0e45, 1.0e46, 1.0e47, 1.0e48, 1.0e49,
972
972
  };
973
973
 
974
+ static void validate_integer_size(size_t limit, NumInfo ni) {
975
+ size_t digit_count = ni->len - (ni->neg ? 1 : 0);
976
+
977
+ if (digit_count > limit) {
978
+ oj_set_error_at(ni->pi,
979
+ (Qnil != ni->pi->err_class) ? ni->pi->err_class : oj_parse_error_class,
980
+ __FILE__,
981
+ __LINE__,
982
+ "integer exceeds :max_integer_digits (%lu > %lu)",
983
+ (unsigned long)digit_count,
984
+ (unsigned long)limit);
985
+ }
986
+ }
987
+
974
988
  VALUE
975
989
  oj_num_as_value(NumInfo ni) {
976
990
  VALUE rnum = Qnil;
@@ -984,6 +998,12 @@ oj_num_as_value(NumInfo ni) {
984
998
  } else if (ni->nan) {
985
999
  rnum = rb_float_new(0.0 / 0.0);
986
1000
  } else if (1 == ni->div && 0 == ni->exp && !ni->has_exp) { // fixnum
1001
+ size_t limit = (NULL != ni->pi) ? ni->pi->options.max_integer_digits : 0;
1002
+
1003
+ if (0 < limit) {
1004
+ validate_integer_size(limit, ni);
1005
+ }
1006
+
987
1007
  if (ni->big) {
988
1008
  if (256 > ni->len) {
989
1009
  char buf[256];
@@ -1193,11 +1213,19 @@ oj_pi_parse(int argc, VALUE *argv, ParseInfo pi, char *json, size_t len, int yie
1193
1213
  buf = OJ_R_ALLOC_N(char, len + 1);
1194
1214
  pi->json = buf;
1195
1215
  pi->end = buf + len;
1196
- if (0 >= (cnt = read(fd, (char *)pi->json, len)) || cnt != (ssize_t)len) {
1197
- if (0 != buf) {
1198
- OJ_R_FREE(buf);
1216
+ {
1217
+ size_t total = 0;
1218
+
1219
+ while (total < len) {
1220
+ cnt = read(fd, (char *)pi->json + total, len - total);
1221
+ if (cnt <= 0) {
1222
+ if (0 != buf) {
1223
+ OJ_R_FREE(buf);
1224
+ }
1225
+ rb_raise(rb_eIOError, "failed to read from IO Object.");
1226
+ }
1227
+ total += cnt;
1199
1228
  }
1200
- rb_raise(rb_eIOError, "failed to read from IO Object.");
1201
1229
  }
1202
1230
  ((char *)pi->json)[len] = '\0';
1203
1231
  /* skip UTF-8 BOM if present */