oj 3.13.14 → 3.13.17

Sign up to get free protection for your applications and to get access to all the features.
data/ext/oj/parse.c CHANGED
@@ -183,6 +183,36 @@ static void unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
183
183
  }
184
184
  }
185
185
 
186
+ static inline const char *scan_string_noSIMD(const char *str, const char *end) {
187
+ for (; '"' != *str; str++) {
188
+ if (end <= str || '\0' == *str || '\\' == *str) {
189
+ break;
190
+ }
191
+ }
192
+ return str;
193
+ }
194
+
195
+ #if defined(OJ_USE_SSE4_2)
196
+ #include <nmmintrin.h>
197
+
198
+ static inline const char *scan_string_SIMD(const char *str, const char *end) {
199
+ static const char chars[16] = "\x00\\\"";
200
+ const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
201
+ const char *_end = (const char *)(end - 16);
202
+
203
+ for (; str <= _end; str += 16) {
204
+ const __m128i string = _mm_loadu_si128((const __m128i *)str);
205
+ const int r = _mm_cmpestri(terminate, 3, string, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
206
+ if (r != 16) {
207
+ str = (char*)(str + r);
208
+ return str;
209
+ }
210
+ }
211
+
212
+ return scan_string_noSIMD(str, end);
213
+ }
214
+ #endif
215
+
186
216
  // entered at /
187
217
  static void read_escaped_str(ParseInfo pi, const char *start) {
188
218
  struct _buf buf;
@@ -192,11 +222,15 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
192
222
  Val parent = stack_peek(&pi->stack);
193
223
 
194
224
  buf_init(&buf);
195
- if (0 < cnt) {
196
- buf_append_string(&buf, start, cnt);
197
- }
198
- for (s = pi->cur; '"' != *s; s++) {
199
- if (s >= pi->end) {
225
+ buf_append_string(&buf, start, cnt);
226
+
227
+ for (s = pi->cur; '"' != *s;) {
228
+ #if defined(OJ_USE_SSE4_2)
229
+ const char *scanned = scan_string_SIMD(s, pi->end);
230
+ #else
231
+ const char *scanned = scan_string_noSIMD(s, pi->end);
232
+ #endif
233
+ if (scanned >= pi->end) {
200
234
  oj_set_error_at(pi,
201
235
  oj_parse_error_class,
202
236
  __FILE__,
@@ -204,7 +238,12 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
204
238
  "quoted string not terminated");
205
239
  buf_cleanup(&buf);
206
240
  return;
207
- } else if ('\\' == *s) {
241
+ }
242
+
243
+ buf_append_string(&buf, s, (size_t)(scanned - s));
244
+ s = scanned;
245
+
246
+ if ('\\' == *s) {
208
247
  s++;
209
248
  switch (*s) {
210
249
  case 'n': buf_append(&buf, '\n'); break;
@@ -273,8 +312,7 @@ static void read_escaped_str(ParseInfo pi, const char *start) {
273
312
  buf_cleanup(&buf);
274
313
  return;
275
314
  }
276
- } else {
277
- buf_append(&buf, *s);
315
+ s++;
278
316
  }
279
317
  }
280
318
  if (0 == parent) {
@@ -331,22 +369,28 @@ static void read_str(ParseInfo pi) {
331
369
  const char *str = pi->cur;
332
370
  Val parent = stack_peek(&pi->stack);
333
371
 
334
- for (; '"' != *pi->cur; pi->cur++) {
335
- if (pi->end <= pi->cur) {
336
- oj_set_error_at(pi,
337
- oj_parse_error_class,
338
- __FILE__,
339
- __LINE__,
340
- "quoted string not terminated");
341
- return;
342
- } else if ('\0' == *pi->cur) {
343
- oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "NULL byte in string");
344
- return;
345
- } else if ('\\' == *pi->cur) {
346
- read_escaped_str(pi, str);
347
- return;
348
- }
372
+ #if defined(OJ_USE_SSE4_2)
373
+ pi->cur = scan_string_SIMD(pi->cur, pi->end);
374
+ #else
375
+ pi->cur = scan_string_noSIMD(pi->cur, pi->end);
376
+ #endif
377
+ if (RB_UNLIKELY(pi->end <= pi->cur)) {
378
+ oj_set_error_at(pi,
379
+ oj_parse_error_class,
380
+ __FILE__,
381
+ __LINE__,
382
+ "quoted string not terminated");
383
+ return;
384
+ }
385
+ if (RB_UNLIKELY('\0' == *pi->cur)) {
386
+ oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "NULL byte in string");
387
+ return;
349
388
  }
389
+ if ('\\' == *pi->cur) {
390
+ read_escaped_str(pi, str);
391
+ return;
392
+ }
393
+
350
394
  if (0 == parent) { // simple add
351
395
  pi->add_cstr(pi, str, pi->cur - str, str);
352
396
  } else {
data/ext/oj/parser.c CHANGED
@@ -533,6 +533,7 @@ static void calc_num(ojParser p) {
533
533
  // nothing to do
534
534
  break;
535
535
  }
536
+ p->type = OJ_NONE;
536
537
  }
537
538
 
538
539
  static void big_change(ojParser p) {
@@ -598,6 +599,8 @@ static void parse(ojParser p, const byte *json) {
598
599
  const byte *b = json;
599
600
  int i;
600
601
 
602
+ p->line = 1;
603
+ p->col = -1;
601
604
  #if DEBUG
602
605
  printf("*** parse - mode: %c %s\n", p->map[256], (const char *)json);
603
606
  #endif
@@ -652,6 +655,7 @@ static void parse(ojParser p, const byte *json) {
652
655
  }
653
656
  buf_append_string(&p->buf, (const char *)start, b - start);
654
657
  if ('"' == *b) {
658
+ p->cur = b - json;
655
659
  p->funcs[p->stack[p->depth]].add_str(p);
656
660
  p->map = (0 == p->depth) ? value_map : after_map;
657
661
  break;
@@ -661,12 +665,14 @@ static void parse(ojParser p, const byte *json) {
661
665
  p->next_map = (0 == p->depth) ? value_map : after_map;
662
666
  break;
663
667
  case OPEN_OBJECT:
668
+ p->cur = b - json;
664
669
  p->funcs[p->stack[p->depth]].open_object(p);
665
670
  p->depth++;
666
671
  p->stack[p->depth] = OBJECT_FUN;
667
672
  p->map = key1_map;
668
673
  break;
669
674
  case NUM_CLOSE_OBJECT:
675
+ p->cur = b - json;
670
676
  calc_num(p);
671
677
  // flow through
672
678
  case CLOSE_OBJECT:
@@ -677,15 +683,18 @@ static void parse(ojParser p, const byte *json) {
677
683
  return;
678
684
  }
679
685
  p->depth--;
686
+ p->cur = b - json;
680
687
  p->funcs[p->stack[p->depth]].close_object(p);
681
688
  break;
682
689
  case OPEN_ARRAY:
690
+ p->cur = b - json;
683
691
  p->funcs[p->stack[p->depth]].open_array(p);
684
692
  p->depth++;
685
693
  p->stack[p->depth] = ARRAY_FUN;
686
694
  p->map = value_map;
687
695
  break;
688
696
  case NUM_CLOSE_ARRAY:
697
+ p->cur = b - json;
689
698
  calc_num(p);
690
699
  // flow through
691
700
  case CLOSE_ARRAY:
@@ -696,9 +705,11 @@ static void parse(ojParser p, const byte *json) {
696
705
  return;
697
706
  }
698
707
  p->depth--;
708
+ p->cur = b - json;
699
709
  p->funcs[p->stack[p->depth]].close_array(p);
700
710
  break;
701
711
  case NUM_COMMA:
712
+ p->cur = b - json;
702
713
  calc_num(p);
703
714
  if (0 < p->depth && OBJECT_FUN == p->stack[p->depth]) {
704
715
  p->map = key_map;
@@ -860,8 +871,14 @@ static void parse(ojParser p, const byte *json) {
860
871
  b--;
861
872
  p->map = big_exp_map;
862
873
  break;
863
- case NUM_SPC: calc_num(p); break;
864
- case NUM_NEWLINE: calc_num(p); b++;
874
+ case NUM_SPC:
875
+ p->cur = b - json;
876
+ calc_num(p);
877
+ break;
878
+ case NUM_NEWLINE:
879
+ p->cur = b - json;
880
+ calc_num(p);
881
+ b++;
865
882
  #ifdef SPACE_JUMP
866
883
  // for (uint32_t *sj = (uint32_t*)b; 0x20202020 == *sj; sj++) { b += 4; }
867
884
  for (uint16_t *sj = (uint16_t *)b; 0x2020 == *sj; sj++) {
@@ -882,6 +899,7 @@ static void parse(ojParser p, const byte *json) {
882
899
  buf_append_string(&p->buf, (const char *)start, b - start);
883
900
  }
884
901
  if ('"' == *b) {
902
+ p->cur = b - json;
885
903
  p->funcs[p->stack[p->depth]].add_str(p);
886
904
  p->map = p->next_map;
887
905
  break;
@@ -890,6 +908,7 @@ static void parse(ojParser p, const byte *json) {
890
908
  break;
891
909
  case STR_SLASH: p->map = esc_map; break;
892
910
  case STR_QUOTE:
911
+ p->cur = b - json;
893
912
  p->funcs[p->stack[p->depth]].add_str(p);
894
913
  p->map = p->next_map;
895
914
  break;
@@ -967,6 +986,7 @@ static void parse(ojParser p, const byte *json) {
967
986
  case VAL_NULL:
968
987
  if ('u' == b[1] && 'l' == b[2] && 'l' == b[3]) {
969
988
  b += 3;
989
+ p->cur = b - json;
970
990
  p->funcs[p->stack[p->depth]].add_null(p);
971
991
  p->map = (0 == p->depth) ? value_map : after_map;
972
992
  break;
@@ -992,6 +1012,7 @@ static void parse(ojParser p, const byte *json) {
992
1012
  case VAL_TRUE:
993
1013
  if ('r' == b[1] && 'u' == b[2] && 'e' == b[3]) {
994
1014
  b += 3;
1015
+ p->cur = b - json;
995
1016
  p->funcs[p->stack[p->depth]].add_true(p);
996
1017
  p->map = (0 == p->depth) ? value_map : after_map;
997
1018
  break;
@@ -1017,6 +1038,7 @@ static void parse(ojParser p, const byte *json) {
1017
1038
  case VAL_FALSE:
1018
1039
  if ('a' == b[1] && 'l' == b[2] && 's' == b[3] && 'e' == b[4]) {
1019
1040
  b += 4;
1041
+ p->cur = b - json;
1020
1042
  p->funcs[p->stack[p->depth]].add_false(p);
1021
1043
  p->map = (0 == p->depth) ? value_map : after_map;
1022
1044
  break;
@@ -1050,6 +1072,7 @@ static void parse(ojParser p, const byte *json) {
1050
1072
  parse_error(p, "expected null");
1051
1073
  return;
1052
1074
  }
1075
+ p->cur = b - json;
1053
1076
  p->funcs[p->stack[p->depth]].add_null(p);
1054
1077
  p->map = (0 == p->depth) ? value_map : after_map;
1055
1078
  }
@@ -1061,6 +1084,7 @@ static void parse(ojParser p, const byte *json) {
1061
1084
  parse_error(p, "expected false");
1062
1085
  return;
1063
1086
  }
1087
+ p->cur = b - json;
1064
1088
  p->funcs[p->stack[p->depth]].add_false(p);
1065
1089
  p->map = (0 == p->depth) ? value_map : after_map;
1066
1090
  }
@@ -1072,6 +1096,7 @@ static void parse(ojParser p, const byte *json) {
1072
1096
  parse_error(p, "expected true");
1073
1097
  return;
1074
1098
  }
1099
+ p->cur = b - json;
1075
1100
  p->funcs[p->stack[p->depth]].add_true(p);
1076
1101
  p->map = (0 == p->depth) ? value_map : after_map;
1077
1102
  }
@@ -1089,6 +1114,9 @@ static void parse(ojParser p, const byte *json) {
1089
1114
  p->map = trail_map;
1090
1115
  }
1091
1116
  }
1117
+ if (0 < p->depth) {
1118
+ parse_error(p, "parse error, not closed");
1119
+ }
1092
1120
  if (0 == p->depth) {
1093
1121
  switch (p->map[256]) {
1094
1122
  case '0':
@@ -1099,7 +1127,10 @@ static void parse(ojParser p, const byte *json) {
1099
1127
  case 'D':
1100
1128
  case 'g':
1101
1129
  case 'B':
1102
- case 'Y': calc_num(p); break;
1130
+ case 'Y':
1131
+ p->cur = b - json;
1132
+ calc_num(p);
1133
+ break;
1103
1134
  }
1104
1135
  }
1105
1136
  return;
@@ -1456,7 +1487,7 @@ static VALUE saj_parser = Qundef;
1456
1487
  /* Document-method: saj
1457
1488
  * call-seq: saj
1458
1489
  *
1459
- * Returns the default saj parser. Note the default SAJ parser can not be used
1490
+ * Returns the default SAJ parser. Note the default SAJ parser can not be used
1460
1491
  * concurrently in more than one thread.
1461
1492
  */
1462
1493
  static VALUE parser_saj(VALUE self) {
data/ext/oj/parser.h CHANGED
@@ -80,6 +80,7 @@ typedef struct _ojParser {
80
80
 
81
81
  char token[8];
82
82
  long line;
83
+ long cur; // only set before call to a function
83
84
  long col;
84
85
  int ri;
85
86
  uint32_t ucode;
data/ext/oj/rails.c CHANGED
@@ -320,7 +320,6 @@ static void dump_time(VALUE obj, int depth, Out out, bool as_ok) {
320
320
  long long sec;
321
321
  long long nsec;
322
322
 
323
- #ifdef HAVE_RB_TIME_TIMESPEC
324
323
  if (16 <= sizeof(struct timespec)) {
325
324
  struct timespec ts = rb_time_timespec(obj);
326
325
 
@@ -330,10 +329,6 @@ static void dump_time(VALUE obj, int depth, Out out, bool as_ok) {
330
329
  sec = NUM2LL(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
331
330
  nsec = NUM2LL(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
332
331
  }
333
- #else
334
- sec = NUM2LL(rb_funcall2(obj, oj_tv_sec_id, 0, 0));
335
- nsec = NUM2LL(rb_funcall2(obj, oj_tv_nsec_id, 0, 0));
336
- #endif
337
332
  dump_sec_nano(obj, sec, nsec, out);
338
333
  }
339
334