oj 3.13.15 → 3.13.18

Sign up to get free protection for your applications and to get access to all the features.
data/ext/oj/parse.c CHANGED
@@ -183,6 +183,74 @@ static void unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
183
183
  }
184
184
  }
185
185
 
186
+ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
187
+ for (; '"' != *str; str++) {
188
+ if (end <= str || '\0' == *str || '\\' == *str) {
189
+ break;
190
+ }
191
+ }
192
+ return str;
193
+ }
194
+
195
+ // Taken from Tensorflow:
196
+ // https://github.com/tensorflow/tensorflow/blob/5dcfc51118817f27fad5246812d83e5dccdc5f72/tensorflow/core/lib/hash/crc32c_accelerate.cc#L21-L38
197
+ #ifdef __SSE4_2__
198
+ #if defined(__x86_64__) && defined(__GNUC__) && \
199
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
200
+ #define USE_SSE_DETECT 1
201
+ #elif defined(__x86_64__) && defined(__clang__)
202
+ #if __has_builtin(__builtin_cpu_supports)
203
+ #define USE_SSE_DETECT 1
204
+ #endif
205
+ #endif
206
+ #endif /* __SSE4_2__ */
207
+
208
+ // This version of Apple clang has a bug:
209
+ // https://llvm.org/bugs/show_bug.cgi?id=25510
210
+ #if defined(__APPLE__) && (__clang_major__ <= 8)
211
+ #undef USE_SSE_DETECT
212
+ #endif
213
+
214
+ #ifdef USE_SSE_DETECT
215
+ #include <nmmintrin.h>
216
+
217
+ static inline const char *scan_string_SIMD(const char *str, const char *end) {
218
+ static const char chars[16] = "\x00\\\"";
219
+ const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
220
+ const char *_end = (const char *)(end - 16);
221
+
222
+ for (; str <= _end; str += 16) {
223
+ const __m128i string = _mm_loadu_si128((const __m128i *)str);
224
+ const int r = _mm_cmpestri(terminate, 3, string, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
225
+ if (r != 16) {
226
+ str = (char*)(str + r);
227
+ return str;
228
+ }
229
+ }
230
+
231
+ return scan_string_noSIMD(str, end);
232
+ }
233
+ #endif
234
+
235
+ static bool cpu_supports_sse42(void) {
236
+ #if USE_SSE_DETECT
237
+ __builtin_cpu_init();
238
+ return (__builtin_cpu_supports("sse4.2"));
239
+ #else
240
+ return false;
241
+ #endif
242
+ }
243
+
244
+ static const char *(*scan_func) (const char *str, const char *end) = scan_string_noSIMD;
245
+
246
+ void oj_scanner_init(void) {
247
+ if (cpu_supports_sse42()) {
248
+ #if USE_SSE_DETECT
249
+ scan_func = scan_string_SIMD;
250
+ #endif
251
+ }
252
+ }
253
+
186
254
  // entered at /
187
255
  static void read_escaped_str(ParseInfo pi, const char *start) {
188
256
  struct _buf buf;
@@ -192,11 +260,11 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
192
260
  Val parent = stack_peek(&pi->stack);
193
261
 
194
262
  buf_init(&buf);
195
- if (0 < cnt) {
196
- buf_append_string(&buf, start, cnt);
197
- }
198
- for (s = pi->cur; '"' != *s; s++) {
199
- if (s >= pi->end) {
263
+ buf_append_string(&buf, start, cnt);
264
+
265
+ for (s = pi->cur; '"' != *s;) {
266
+ const char *scanned = scan_func(s, pi->end);
267
+ if (scanned >= pi->end) {
200
268
  oj_set_error_at(pi,
201
269
  oj_parse_error_class,
202
270
  __FILE__,
@@ -204,7 +272,12 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
204
272
  "quoted string not terminated");
205
273
  buf_cleanup(&buf);
206
274
  return;
207
- } else if ('\\' == *s) {
275
+ }
276
+
277
+ buf_append_string(&buf, s, (size_t)(scanned - s));
278
+ s = scanned;
279
+
280
+ if ('\\' == *s) {
208
281
  s++;
209
282
  switch (*s) {
210
283
  case 'n': buf_append(&buf, '\n'); break;
@@ -273,8 +346,7 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
273
346
  buf_cleanup(&buf);
274
347
  return;
275
348
  }
276
- } else {
277
- buf_append(&buf, *s);
349
+ s++;
278
350
  }
279
351
  }
280
352
  if (0 == parent) {
@@ -327,44 +399,11 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
327
399
  buf_cleanup(&buf);
328
400
  }
329
401
 
330
- static inline void scan_string_noSIMD(ParseInfo pi) {
331
- for (; '"' != *pi->cur; pi->cur++) {
332
- if (pi->end <= pi->cur || '\0' == *pi->cur || '\\' == *pi->cur) {
333
- return;
334
- }
335
- }
336
- }
337
-
338
- #if defined(OJ_USE_SSE4_2)
339
- #include <nmmintrin.h>
340
-
341
- static inline void scan_string_SIMD(ParseInfo pi) {
342
- static const char chars[16] = "\x00\\\"";
343
- const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
344
- const char *end = (const char *)(pi->end - 16);
345
-
346
- for (; pi->cur <= end; pi->cur += 16) {
347
- const __m128i string = _mm_loadu_si128((const __m128i *)pi->cur);
348
- const int r = _mm_cmpestri(terminate, 3, string, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
349
- if (r != 16) {
350
- pi->cur = (const char*)(pi->cur + r);
351
- return;
352
- }
353
- }
354
-
355
- scan_string_noSIMD(pi);
356
- }
357
- #endif
358
-
359
402
  static void read_str(ParseInfo pi) {
360
403
  const char *str = pi->cur;
361
404
  Val parent = stack_peek(&pi->stack);
362
405
 
363
- #if defined(OJ_USE_SSE4_2)
364
- scan_string_SIMD(pi);
365
- #else
366
- scan_string_noSIMD(pi);
367
- #endif
406
+ pi->cur = scan_func(pi->cur, pi->end);
368
407
  if (RB_UNLIKELY(pi->end <= pi->cur)) {
369
408
  oj_set_error_at(pi,
370
409
  oj_parse_error_class,
@@ -494,33 +533,31 @@ static void read_num(ParseInfo pi) {
494
533
  int dec_cnt = 0;
495
534
  bool zero1 = false;
496
535
 
536
+ // Skip leading zeros.
537
+ for (; '0' == *pi->cur; pi->cur++) {
538
+ zero1 = true;
539
+ }
540
+
497
541
  for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
498
- if (0 == ni.i && '0' == *pi->cur) {
499
- zero1 = true;
500
- }
501
- if (0 < ni.i) {
502
- dec_cnt++;
503
- }
504
- if (!ni.big) {
505
- int d = (*pi->cur - '0');
542
+ int d = (*pi->cur - '0');
506
543
 
507
- if (0 < d) {
508
- if (zero1 && CompatMode == pi->options.mode) {
509
- oj_set_error_at(pi,
510
- oj_parse_error_class,
511
- __FILE__,
512
- __LINE__,
513
- "not a number");
514
- return;
515
- }
516
- zero1 = false;
517
- }
518
- ni.i = ni.i * 10 + d;
519
- if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
520
- ni.big = 1;
521
- }
544
+ if (RB_LIKELY(0 != ni.i)) {
545
+ dec_cnt++;
522
546
  }
547
+ ni.i = ni.i * 10 + d;
548
+ }
549
+ if (RB_UNLIKELY(0 != ni.i && zero1 && CompatMode == pi->options.mode)) {
550
+ oj_set_error_at(pi,
551
+ oj_parse_error_class,
552
+ __FILE__,
553
+ __LINE__,
554
+ "not a number");
555
+ return;
556
+ }
557
+ if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
558
+ ni.big = true;
523
559
  }
560
+
524
561
  if ('.' == *pi->cur) {
525
562
  pi->cur++;
526
563
  // A trailing . is not a valid decimal but if encountered allow it
@@ -540,25 +577,20 @@ static void read_num(ParseInfo pi) {
540
577
  for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
541
578
  int d = (*pi->cur - '0');
542
579
 
543
- if (0 < ni.num || 0 < ni.i) {
580
+ if (RB_LIKELY(0 != ni.num || 0 != ni.i)) {
544
581
  dec_cnt++;
545
582
  }
546
- if (INT64_MAX <= ni.div) {
547
- if (!ni.no_big) {
548
- ni.big = true;
549
- }
550
- } else {
551
- ni.num = ni.num * 10 + d;
552
- ni.div *= 10;
553
- ni.di++;
554
- if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
555
- if (!ni.no_big) {
556
- ni.big = true;
557
- }
558
- }
559
- }
583
+ ni.num = ni.num * 10 + d;
584
+ ni.div *= 10;
585
+ ni.di++;
560
586
  }
561
587
  }
588
+ if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
589
+ if (!ni.no_big) {
590
+ ni.big = true;
591
+ }
592
+ }
593
+
562
594
  if ('e' == *pi->cur || 'E' == *pi->cur) {
563
595
  int eneg = 0;
564
596
 
data/ext/oj/parse.h CHANGED
@@ -97,6 +97,8 @@ static inline void parse_info_init(ParseInfo pi) {
97
97
  memset(pi, 0, sizeof(struct _parseInfo));
98
98
  }
99
99
 
100
+ extern void oj_scanner_init();
101
+
100
102
  static inline bool empty_ok(Options options) {
101
103
  switch (options->mode) {
102
104
  case ObjectMode:
data/ext/oj/parser.c CHANGED
@@ -533,6 +533,7 @@ static void calc_num(ojParser p) {
533
533
  // nothing to do
534
534
  break;
535
535
  }
536
+ p->type = OJ_NONE;
536
537
  }
537
538
 
538
539
  static void big_change(ojParser p) {
@@ -598,6 +599,8 @@ static void parse(ojParser p, const byte *json) {
598
599
  const byte *b = json;
599
600
  int i;
600
601
 
602
+ p->line = 1;
603
+ p->col = -1;
601
604
  #if DEBUG
602
605
  printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
603
606
  #endif
@@ -652,6 +655,7 @@ static void parse(ojParser p, const byte *json) {
652
655
  }
653
656
  buf_append_string(&p->buf, (const char *)start, b - start);
654
657
  if ('"' == *b) {
658
+ p->cur = b - json;
655
659
  p->funcs[p->stack[p->depth]].add_str(p);
656
660
  p->map = (0 == p->depth) ? value_map : after_map;
657
661
  break;
@@ -661,12 +665,14 @@ static void parse(ojParser p, const byte *json) {
661
665
  p->next_map = (0 == p->depth) ? value_map : after_map;
662
666
  break;
663
667
  case OPEN_OBJECT:
668
+ p->cur = b - json;
664
669
  p->funcs[p->stack[p->depth]].open_object(p);
665
670
  p->depth++;
666
671
  p->stack[p->depth] = OBJECT_FUN;
667
672
  p->map = key1_map;
668
673
  break;
669
674
  case NUM_CLOSE_OBJECT:
675
+ p->cur = b - json;
670
676
  calc_num(p);
671
677
  // flow through
672
678
  case CLOSE_OBJECT:
@@ -677,15 +683,18 @@ static void parse(ojParser p, const byte *json) {
677
683
  return;
678
684
  }
679
685
  p->depth--;
686
+ p->cur = b - json;
680
687
  p->funcs[p->stack[p->depth]].close_object(p);
681
688
  break;
682
689
  case OPEN_ARRAY:
690
+ p->cur = b - json;
683
691
  p->funcs[p->stack[p->depth]].open_array(p);
684
692
  p->depth++;
685
693
  p->stack[p->depth] = ARRAY_FUN;
686
694
  p->map = value_map;
687
695
  break;
688
696
  case NUM_CLOSE_ARRAY:
697
+ p->cur = b - json;
689
698
  calc_num(p);
690
699
  // flow through
691
700
  case CLOSE_ARRAY:
@@ -696,9 +705,11 @@ static void parse(ojParser p, const byte *json) {
696
705
  return;
697
706
  }
698
707
  p->depth--;
708
+ p->cur = b - json;
699
709
  p->funcs[p->stack[p->depth]].close_array(p);
700
710
  break;
701
711
  case NUM_COMMA:
712
+ p->cur = b - json;
702
713
  calc_num(p);
703
714
  if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
704
715
  p->map = key_map;
@@ -860,8 +871,14 @@ static void parse(ojParser p, const byte *json) {
860
871
  b--;
861
872
  p->map = big_exp_map;
862
873
  break;
863
- case NUM_SPC: calc_num(p); break;
864
- case NUM_NEWLINE: calc_num(p); b++;
874
+ case NUM_SPC:
875
+ p->cur = b - json;
876
+ calc_num(p);
877
+ break;
878
+ case NUM_NEWLINE:
879
+ p->cur = b - json;
880
+ calc_num(p);
881
+ b++;
865
882
  #ifdef SPACE_JUMP
866
883
  // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
867
884
  for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
@@ -882,6 +899,7 @@ static void parse(ojParser p, const byte *json) {
882
899
  buf_append_string(&p->buf, (const char *)start, b - start);
883
900
  }
884
901
  if ('"' == *b) {
902
+ p->cur = b - json;
885
903
  p->funcs[p->stack[p->depth]].add_str(p);
886
904
  p->map = p->next_map;
887
905
  break;
@@ -890,6 +908,7 @@ static void parse(ojParser p, const byte *json) {
890
908
  break;
891
909
  case STR_SLASH: p->map = esc_map; break;
892
910
  case STR_QUOTE:
911
+ p->cur = b - json;
893
912
  p->funcs[p->stack[p->depth]].add_str(p);
894
913
  p->map = p->next_map;
895
914
  break;
@@ -967,6 +986,7 @@ static void parse(ojParser p, const byte *json) {
967
986
  case VAL_NULL:
968
987
  if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
969
988
  b += 3;
989
+ p->cur = b - json;
970
990
  p->funcs[p->stack[p->depth]].add_null(p);
971
991
  p->map = (0 == p->depth) ? value_map : after_map;
972
992
  break;
@@ -992,6 +1012,7 @@ static void parse(ojParser p, const byte *json) {
992
1012
  case VAL_TRUE:
993
1013
  if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
994
1014
  b += 3;
1015
+ p->cur = b - json;
995
1016
  p->funcs[p->stack[p->depth]].add_true(p);
996
1017
  p->map = (0 == p->depth) ? value_map : after_map;
997
1018
  break;
@@ -1017,6 +1038,7 @@ static void parse(ojParser p, const byte *json) {
1017
1038
  case VAL_FALSE:
1018
1039
  if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1019
1040
  b += 4;
1041
+ p->cur = b - json;
1020
1042
  p->funcs[p->stack[p->depth]].add_false(p);
1021
1043
  p->map = (0 == p->depth) ? value_map : after_map;
1022
1044
  break;
@@ -1050,6 +1072,7 @@ static void parse(ojParser p, const byte *json) {
1050
1072
  parse_error(p, "expected null");
1051
1073
  return;
1052
1074
  }
1075
+ p->cur = b - json;
1053
1076
  p->funcs[p->stack[p->depth]].add_null(p);
1054
1077
  p->map = (0 == p->depth) ? value_map : after_map;
1055
1078
  }
@@ -1061,6 +1084,7 @@ static void parse(ojParser p, const byte *json) {
1061
1084
  parse_error(p, "expected false");
1062
1085
  return;
1063
1086
  }
1087
+ p->cur = b - json;
1064
1088
  p->funcs[p->stack[p->depth]].add_false(p);
1065
1089
  p->map = (0 == p->depth) ? value_map : after_map;
1066
1090
  }
@@ -1072,6 +1096,7 @@ static void parse(ojParser p, const byte *json) {
1072
1096
  parse_error(p, "expected true");
1073
1097
  return;
1074
1098
  }
1099
+ p->cur = b - json;
1075
1100
  p->funcs[p->stack[p->depth]].add_true(p);
1076
1101
  p->map = (0 == p->depth) ? value_map : after_map;
1077
1102
  }
@@ -1089,6 +1114,9 @@ static void parse(ojParser p, const byte *json) {
1089
1114
  p->map = trail_map;
1090
1115
  }
1091
1116
  }
1117
+ if (0 < p->depth) {
1118
+ parse_error(p, "parse error, not closed");
1119
+ }
1092
1120
  if (0 == p->depth) {
1093
1121
  switch (p->map[256]) {
1094
1122
  case '0':
@@ -1099,7 +1127,10 @@ static void parse(ojParser p, const byte *json) {
1099
1127
  case 'D':
1100
1128
  case 'g':
1101
1129
  case 'B':
1102
- case 'Y': calc_num(p); break;
1130
+ case 'Y':
1131
+ p->cur = b - json;
1132
+ calc_num(p);
1133
+ break;
1103
1134
  }
1104
1135
  }
1105
1136
  return;
@@ -1456,7 +1487,7 @@ static VALUE saj_parser = Qundef;
1456
1487
  /* Document-method: saj
1457
1488
  * call-seq: saj
1458
1489
  *
1459
- * Returns the default saj parser. Note the default SAJ parser can not be used
1490
+ * Returns the default SAJ parser. Note the default SAJ parser can not be used
1460
1491
  * concurrently in more than one thread.
1461
1492
  */
1462
1493
  static VALUE parser_saj(VALUE self) {
data/ext/oj/parser.h CHANGED
@@ -80,6 +80,7 @@ typedef struct _ojParser {
80
80
 
81
81
  char token[8];
82
82
  long line;
83
+ long cur; // only set before call to a function
83
84
  long col;
84
85
  int ri;
85
86
  uint32_t ucode;
data/ext/oj/rails.c CHANGED
@@ -517,7 +517,7 @@ static void dump_as_string(VALUE obj, int depth, Out out, bool as_ok) {
517
517
  static void dump_as_json(VALUE obj, int depth, Out out, bool as_ok) {
518
518
  volatile VALUE ja;
519
519
 
520
- if (Yes == out->opts->trace) {
520
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
521
521
  oj_trace("as_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyIn);
522
522
  }
523
523
  // Some classes elect to not take an options argument so check the arity
@@ -527,7 +527,7 @@ static void dump_as_json(VALUE obj, int depth, Out out, bool as_ok) {
527
527
  } else {
528
528
  ja = rb_funcall2(obj, oj_as_json_id, out->argc, out->argv);
529
529
  }
530
- if (Yes == out->opts->trace) {
530
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
531
531
  oj_trace("as_json", obj, __FILE__, __LINE__, depth + 1, TraceRubyOut);
532
532
  }
533
533
 
@@ -1464,7 +1464,7 @@ static DumpFunc rails_funcs[] = {
1464
1464
  static void dump_rails_val(VALUE obj, int depth, Out out, bool as_ok) {
1465
1465
  int type = rb_type(obj);
1466
1466
 
1467
- if (Yes == out->opts->trace) {
1467
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
1468
1468
  oj_trace("dump", obj, __FILE__, __LINE__, depth, TraceIn);
1469
1469
  }
1470
1470
  if (MAX_DEPTH < depth) {
@@ -1475,14 +1475,14 @@ static void dump_rails_val(VALUE obj, int depth, Out out, bool as_ok) {
1475
1475
 
1476
1476
  if (NULL != f) {
1477
1477
  f(obj, depth, out, as_ok);
1478
- if (Yes == out->opts->trace) {
1478
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
1479
1479
  oj_trace("dump", obj, __FILE__, __LINE__, depth, TraceOut);
1480
1480
  }
1481
1481
  return;
1482
1482
  }
1483
1483
  }
1484
1484
  oj_dump_nil(Qnil, depth, out, false);
1485
- if (Yes == out->opts->trace) {
1485
+ if (RB_UNLIKELY(Yes == out->opts->trace)) {
1486
1486
  oj_trace("dump", Qnil, __FILE__, __LINE__, depth, TraceOut);
1487
1487
  }
1488
1488
  }