oj 3.13.15 → 3.13.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/oj/parse.c CHANGED
@@ -183,6 +183,74 @@ static void unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
183
183
  }
184
184
  }
185
185
 
186
+ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
187
+ for (; '"' != *str; str++) {
188
+ if (end <= str || '\0' == *str || '\\' == *str) {
189
+ break;
190
+ }
191
+ }
192
+ return str;
193
+ }
194
+
195
+ // Taken from Tensorflow:
196
+ // https://github.com/tensorflow/tensorflow/blob/5dcfc51118817f27fad5246812d83e5dccdc5f72/tensorflow/core/lib/hash/crc32c_accelerate.cc#L21-L38
197
+ #ifdef __SSE4_2__
198
+ #if defined(__x86_64__) && defined(__GNUC__) && \
199
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
200
+ #define USE_SSE_DETECT 1
201
+ #elif defined(__x86_64__) && defined(__clang__)
202
+ #if __has_builtin(__builtin_cpu_supports)
203
+ #define USE_SSE_DETECT 1
204
+ #endif
205
+ #endif
206
+ #endif /* __SSE4_2__ */
207
+
208
+ // This version of Apple clang has a bug:
209
+ // https://llvm.org/bugs/show_bug.cgi?id=25510
210
+ #if defined(__APPLE__) && (__clang_major__ <= 8)
211
+ #undef USE_SSE_DETECT
212
+ #endif
213
+
214
+ #ifdef USE_SSE_DETECT
215
+ #include <nmmintrin.h>
216
+
217
+ static inline const char *scan_string_SIMD(const char *str, const char *end) {
218
+ static const char chars[16] = "\x00\\\"";
219
+ const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
220
+ const char *_end = (const char *)(end - 16);
221
+
222
+ for (; str <= _end; str += 16) {
223
+ const __m128i string = _mm_loadu_si128((const __m128i *)str);
224
+ const int r = _mm_cmpestri(terminate, 3, string, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
225
+ if (r != 16) {
226
+ str = (char*)(str + r);
227
+ return str;
228
+ }
229
+ }
230
+
231
+ return scan_string_noSIMD(str, end);
232
+ }
233
+ #endif
234
+
235
+ static bool cpu_supports_sse42(void) {
236
+ #if USE_SSE_DETECT
237
+ __builtin_cpu_init();
238
+ return (__builtin_cpu_supports("sse4.2"));
239
+ #else
240
+ return false;
241
+ #endif
242
+ }
243
+
244
+ static const char *(*scan_func) (const char *str, const char *end) = scan_string_noSIMD;
245
+
246
+ void oj_scanner_init(void) {
247
+ if (cpu_supports_sse42()) {
248
+ #if USE_SSE_DETECT
249
+ scan_func = scan_string_SIMD;
250
+ #endif
251
+ }
252
+ }
253
+
186
254
  // entered at /
187
255
  static void read_escaped_str(ParseInfo pi, const char *start) {
188
256
  struct _buf buf;
@@ -192,11 +260,11 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
192
260
  Val parent = stack_peek(&pi->stack);
193
261
 
194
262
  buf_init(&buf);
195
- if (0 < cnt) {
196
- buf_append_string(&buf, start, cnt);
197
- }
198
- for (s = pi->cur; '"' != *s; s++) {
199
- if (s >= pi->end) {
263
+ buf_append_string(&buf, start, cnt);
264
+
265
+ for (s = pi->cur; '"' != *s;) {
266
+ const char *scanned = scan_func(s, pi->end);
267
+ if (scanned >= pi->end) {
200
268
  oj_set_error_at(pi,
201
269
  oj_parse_error_class,
202
270
  __FILE__,
@@ -204,7 +272,12 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
204
272
  "quoted string not terminated");
205
273
  buf_cleanup(&buf);
206
274
  return;
207
- } else if ('\\' == *s) {
275
+ }
276
+
277
+ buf_append_string(&buf, s, (size_t)(scanned - s));
278
+ s = scanned;
279
+
280
+ if ('\\' == *s) {
208
281
  s++;
209
282
  switch (*s) {
210
283
  case 'n': buf_append(&buf, '\n'); break;
@@ -273,8 +346,7 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
273
346
  buf_cleanup(&buf);
274
347
  return;
275
348
  }
276
- } else {
277
- buf_append(&buf, *s);
349
+ s++;
278
350
  }
279
351
  }
280
352
  if (0 == parent) {
@@ -327,44 +399,11 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
327
399
  buf_cleanup(&buf);
328
400
  }
329
401
 
330
- static inline void scan_string_noSIMD(ParseInfo pi) {
331
- for (; '"' != *pi->cur; pi->cur++) {
332
- if (pi->end <= pi->cur || '\0' == *pi->cur || '\\' == *pi->cur) {
333
- return;
334
- }
335
- }
336
- }
337
-
338
- #if defined(OJ_USE_SSE4_2)
339
- #include <nmmintrin.h>
340
-
341
- static inline void scan_string_SIMD(ParseInfo pi) {
342
- static const char chars[16] = "\x00\\\"";
343
- const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
344
- const char *end = (const char *)(pi->end - 16);
345
-
346
- for (; pi->cur <= end; pi->cur += 16) {
347
- const __m128i string = _mm_loadu_si128((const __m128i *)pi->cur);
348
- const int r = _mm_cmpestri(terminate, 3, string, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
349
- if (r != 16) {
350
- pi->cur = (const char*)(pi->cur + r);
351
- return;
352
- }
353
- }
354
-
355
- scan_string_noSIMD(pi);
356
- }
357
- #endif
358
-
359
402
  static void read_str(ParseInfo pi) {
360
403
  const char *str = pi->cur;
361
404
  Val parent = stack_peek(&pi->stack);
362
405
 
363
- #if defined(OJ_USE_SSE4_2)
364
- scan_string_SIMD(pi);
365
- #else
366
- scan_string_noSIMD(pi);
367
- #endif
406
+ pi->cur = scan_func(pi->cur, pi->end);
368
407
  if (RB_UNLIKELY(pi->end <= pi->cur)) {
369
408
  oj_set_error_at(pi,
370
409
  oj_parse_error_class,
@@ -494,33 +533,31 @@ static void read_num(ParseInfo pi) {
494
533
  int dec_cnt = 0;
495
534
  bool zero1 = false;
496
535
 
536
+ // Skip leading zeros.
537
+ for (; '0' == *pi->cur; pi->cur++) {
538
+ zero1 = true;
539
+ }
540
+
497
541
  for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
498
- if (0 == ni.i && '0' == *pi->cur) {
499
- zero1 = true;
500
- }
501
- if (0 < ni.i) {
502
- dec_cnt++;
503
- }
504
- if (!ni.big) {
505
- int d = (*pi->cur - '0');
542
+ int d = (*pi->cur - '0');
506
543
 
507
- if (0 < d) {
508
- if (zero1 && CompatMode == pi->options.mode) {
509
- oj_set_error_at(pi,
510
- oj_parse_error_class,
511
- __FILE__,
512
- __LINE__,
513
- "not a number");
514
- return;
515
- }
516
- zero1 = false;
517
- }
518
- ni.i = ni.i * 10 + d;
519
- if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
520
- ni.big = 1;
521
- }
544
+ if (RB_LIKELY(0 != ni.i)) {
545
+ dec_cnt++;
522
546
  }
547
+ ni.i = ni.i * 10 + d;
548
+ }
549
+ if (RB_UNLIKELY(0 != ni.i && zero1 && CompatMode == pi->options.mode)) {
550
+ oj_set_error_at(pi,
551
+ oj_parse_error_class,
552
+ __FILE__,
553
+ __LINE__,
554
+ "not a number");
555
+ return;
556
+ }
557
+ if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
558
+ ni.big = true;
523
559
  }
560
+
524
561
  if ('.' == *pi->cur) {
525
562
  pi->cur++;
526
563
  // A trailing . is not a valid decimal but if encountered allow it
@@ -540,25 +577,20 @@ static void read_num(ParseInfo pi) {
540
577
  for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
541
578
  int d = (*pi->cur - '0');
542
579
 
543
- if (0 < ni.num || 0 < ni.i) {
580
+ if (RB_LIKELY(0 != ni.num || 0 != ni.i)) {
544
581
  dec_cnt++;
545
582
  }
546
- if (INT64_MAX <= ni.div) {
547
- if (!ni.no_big) {
548
- ni.big = true;
549
- }
550
- } else {
551
- ni.num = ni.num * 10 + d;
552
- ni.div *= 10;
553
- ni.di++;
554
- if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
555
- if (!ni.no_big) {
556
- ni.big = true;
557
- }
558
- }
559
- }
583
+ ni.num = ni.num * 10 + d;
584
+ ni.div *= 10;
585
+ ni.di++;
560
586
  }
561
587
  }
588
+ if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
589
+ if (!ni.no_big) {
590
+ ni.big = true;
591
+ }
592
+ }
593
+
562
594
  if ('e' == *pi->cur || 'E' == *pi->cur) {
563
595
  int eneg = 0;
564
596
 
data/ext/oj/parse.h CHANGED
@@ -97,6 +97,8 @@ static inline void parse_info_init(ParseInfo pi) {
97
97
  memset(pi, 0, sizeof(struct _parseInfo));
98
98
  }
99
99
 
100
+ extern void oj_scanner_init();
101
+
100
102
  static inline bool empty_ok(Options options) {
101
103
  switch (options->mode) {
102
104
  case ObjectMode:
data/ext/oj/parser.c CHANGED
@@ -533,6 +533,7 @@ static void calc_num(ojParser p) {
533
533
  // nothing to do
534
534
  break;
535
535
  }
536
+ p->type = OJ_NONE;
536
537
  }
537
538
 
538
539
  static void big_change(ojParser p) {
@@ -598,6 +599,8 @@ static void parse(ojParser p, const byte *json) {
598
599
  const byte *b = json;
599
600
  int i;
600
601
 
602
+ p->line = 1;
603
+ p->col = -1;
601
604
  #if DEBUG
602
605
  printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
603
606
  #endif
@@ -652,6 +655,7 @@ static void parse(ojParser p, const byte *json) {
652
655
  }
653
656
  buf_append_string(&p->buf, (const char *)start, b - start);
654
657
  if ('"' == *b) {
658
+ p->cur = b - json;
655
659
  p->funcs[p->stack[p->depth]].add_str(p);
656
660
  p->map = (0 == p->depth) ? value_map : after_map;
657
661
  break;
@@ -661,12 +665,14 @@ static void parse(ojParser p, const byte *json) {
661
665
  p->next_map = (0 == p->depth) ? value_map : after_map;
662
666
  break;
663
667
  case OPEN_OBJECT:
668
+ p->cur = b - json;
664
669
  p->funcs[p->stack[p->depth]].open_object(p);
665
670
  p->depth++;
666
671
  p->stack[p->depth] = OBJECT_FUN;
667
672
  p->map = key1_map;
668
673
  break;
669
674
  case NUM_CLOSE_OBJECT:
675
+ p->cur = b - json;
670
676
  calc_num(p);
671
677
  // flow through
672
678
  case CLOSE_OBJECT:
@@ -677,15 +683,18 @@ static void parse(ojParser p, const byte *json) {
677
683
  return;
678
684
  }
679
685
  p->depth--;
686
+ p->cur = b - json;
680
687
  p->funcs[p->stack[p->depth]].close_object(p);
681
688
  break;
682
689
  case OPEN_ARRAY:
690
+ p->cur = b - json;
683
691
  p->funcs[p->stack[p->depth]].open_array(p);
684
692
  p->depth++;
685
693
  p->stack[p->depth] = ARRAY_FUN;
686
694
  p->map = value_map;
687
695
  break;
688
696
  case NUM_CLOSE_ARRAY:
697
+ p->cur = b - json;
689
698
  calc_num(p);
690
699
  // flow through
691
700
  case CLOSE_ARRAY:
@@ -696,9 +705,11 @@ static void parse(ojParser p, const byte *json) {
696
705
  return;
697
706
  }
698
707
  p->depth--;
708
+ p->cur = b - json;
699
709
  p->funcs[p->stack[p->depth]].close_array(p);
700
710
  break;
701
711
  case NUM_COMMA:
712
+ p->cur = b - json;
702
713
  calc_num(p);
703
714
  if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
704
715
  p->map = key_map;
@@ -860,8 +871,14 @@ static void parse(ojParser p, const byte *json) {
860
871
  b--;
861
872
  p->map = big_exp_map;
862
873
  break;
863
- case NUM_SPC: calc_num(p); break;
864
- case NUM_NEWLINE: calc_num(p); b++;
874
+ case NUM_SPC:
875
+ p->cur = b - json;
876
+ calc_num(p);
877
+ break;
878
+ case NUM_NEWLINE:
879
+ p->cur = b - json;
880
+ calc_num(p);
881
+ b++;
865
882
  #ifdef SPACE_JUMP
866
883
  // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
867
884
  for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
@@ -882,6 +899,7 @@ static void parse(ojParser p, const byte *json) {
882
899
  buf_append_string(&p->buf, (const char *)start, b - start);
883
900
  }
884
901
  if ('"' == *b) {
902
+ p->cur = b - json;
885
903
  p->funcs[p->stack[p->depth]].add_str(p);
886
904
  p->map = p->next_map;
887
905
  break;
@@ -890,6 +908,7 @@ static void parse(ojParser p, const byte *json) {
890
908
  break;
891
909
  case STR_SLASH: p->map = esc_map; break;
892
910
  case STR_QUOTE:
911
+ p->cur = b - json;
893
912
  p->funcs[p->stack[p->depth]].add_str(p);
894
913
  p->map = p->next_map;
895
914
  break;
@@ -967,6 +986,7 @@ static void parse(ojParser p, const byte *json) {
967
986
  case VAL_NULL:
968
987
  if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
969
988
  b += 3;
989
+ p->cur = b - json;
970
990
  p->funcs[p->stack[p->depth]].add_null(p);
971
991
  p->map = (0 == p->depth) ? value_map : after_map;
972
992
  break;
@@ -992,6 +1012,7 @@ static void parse(ojParser p, const byte *json) {
992
1012
  case VAL_TRUE:
993
1013
  if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
994
1014
  b += 3;
1015
+ p->cur = b - json;
995
1016
  p->funcs[p->stack[p->depth]].add_true(p);
996
1017
  p->map = (0 == p->depth) ? value_map : after_map;
997
1018
  break;
@@ -1017,6 +1038,7 @@ static void parse(ojParser p, const byte *json) {
1017
1038
  case VAL_FALSE:
1018
1039
  if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1019
1040
  b += 4;
1041
+ p->cur = b - json;
1020
1042
  p->funcs[p->stack[p->depth]].add_false(p);
1021
1043
  p->map = (0 == p->depth) ? value_map : after_map;
1022
1044
  break;
@@ -1050,6 +1072,7 @@ static void parse(ojParser p, const byte *json) {
1050
1072
  parse_error(p, "expected null");
1051
1073
  return;
1052
1074
  }
1075
+ p->cur = b - json;
1053
1076
  p->funcs[p->stack[p->depth]].add_null(p);
1054
1077
  p->map = (0 == p->depth) ? value_map : after_map;
1055
1078
  }
@@ -1061,6 +1084,7 @@ static void parse(ojParser p, const byte *json) {
1061
1084
  parse_error(p, "expected false");
1062
1085
  return;
1063
1086
  }
1087
+ p->cur = b - json;
1064
1088
  p->funcs[p->stack[p->depth]].add_false(p);
1065
1089
  p->map = (0 == p->depth) ? value_map : after_map;
1066
1090
  }
@@ -1072,6 +1096,7 @@ static void parse(ojParser p, const byte *json) {
1072
1096
  parse_error(p, "expected true");
1073
1097
  return;
1074
1098
  }
1099
+ p->cur = b - json;
1075
1100
  p->funcs[p->stack[p->depth]].add_true(p);
1076
1101
  p->map = (0 == p->depth) ? value_map : after_map;
1077
1102
  }
@@ -1089,6 +1114,9 @@ static void parse(ojParser p, const byte *json) {
1089
1114
  p->map = trail_map;
1090
1115
  }
1091
1116
  }
1117
+ if (0 < p->depth) {
1118
+ parse_error(p, "parse error, not closed");
1119
+ }
1092
1120
  if (0 == p->depth) {
1093
1121
  switch (p->map[256]) {
1094
1122
  case '0':
@@ -1099,7 +1127,10 @@ static void parse(ojParser p, const byte *json) {
1099
1127
  case 'D':
1100
1128
  case 'g':
1101
1129
  case 'B':
1102
- case 'Y': calc_num(p); break;
1130
+ case 'Y':
1131
+ p->cur = b - json;
1132
+ calc_num(p);
1133
+ break;
1103
1134
  }
1104
1135
  }
1105
1136
  return;
@@ -1456,7 +1487,7 @@ static VALUE saj_parser = Qundef;
1456
1487
  /* Document-method: saj
1457
1488
  * call-seq: saj
1458
1489
  *
1459
- * Returns the default saj parser. Note the default SAJ parser can not be used
1490
+ * Returns the default SAJ parser. Note the default SAJ parser can not be used
1460
1491
  * concurrently in more than one thread.
1461
1492
  */
1462
1493
  static VALUE parser_saj(VALUE self) {
data/ext/oj/parser.h CHANGED
@@ -80,6 +80,7 @@ typedef struct _ojParser {
80
80
 
81
81
  char token[8];
82
82
  long line;
83
+ long cur; // only set before call to a function
83
84
  long col;
84
85
  int ri;
85
86
  uint32_t ucode;
data/ext/oj/rails.c CHANGED
@@ -517,7 +517,7 @@ static void dump_as_string(VALUE obj, int depth, Out out, bool as_ok) {
517
517
  static void dump_as_json(VALUE obj, int depth, Out out, bool as_ok) {
518
518
  volatile VALUE ja;
519
519
 
520
- if (Yes == out->opts->trace) {
520
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
521
521
  oj_trace("as_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyIn);
522
522
  }
523
523
  // Some classes elect to not take an options argument so check the arity
@@ -527,7 +527,7 @@ static void dump_as_json(VALUE obj, int depth, Out out, bool as_ok) {
527
527
  } else {
528
528
  ja = rb_funcall2(obj, oj_as_json_id, out->argc, out->argv);
529
529
  }
530
- if (Yes == out->opts->trace) {
530
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
531
531
  oj_trace("as_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyOut);
532
532
  }
533
533
 
@@ -1464,7 +1464,7 @@ static DumpFunc rails_funcs[] = {
1464
1464
  static void dump_rails_val(VALUE obj, int depth, Out out, bool as_ok) {
1465
1465
  int type = rb_type(obj);
1466
1466
 
1467
- if (Yes == out->opts->trace) {
1467
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
1468
1468
  oj_trace("dump", obj, __FILE__, __LINE__, depth, TraceIn);
1469
1469
  }
1470
1470
  if (MAX_DEPTH < depth) {
@@ -1475,14 +1475,14 @@ static void dump_rails_val(VALUE obj, int depth, Out out, bool as_ok) {
1475
1475
 
1476
1476
  if (NULL != f) {
1477
1477
  f(obj, depth, out, as_ok);
1478
- if (Yes == out->opts->trace) {
1478
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
1479
1479
  oj_trace("dump", obj, __FILE__, __LINE__, depth, TraceOut);
1480
1480
  }
1481
1481
  return;
1482
1482
  }
1483
1483
  }
1484
1484
  oj_dump_nil(Qnil, depth, out, false);
1485
- if (Yes == out->opts->trace) {
1485
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
1486
1486
  oj_trace("dump", Qnil, __FILE__, __LINE__, depth, TraceOut);
1487
1487
  }
1488
1488
  }