google-protobuf 3.0.0.alpha.1.0 → 3.0.0.alpha.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -1269,6 +1269,7 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
1269
1269
  if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
1270
1270
  if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
1271
1271
  if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
1272
+ m->map_entry = false;
1272
1273
  return m;
1273
1274
 
1274
1275
  err1:
@@ -1283,6 +1284,7 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1283
1284
  if (!newm) return NULL;
1284
1285
  bool ok = upb_def_setfullname(UPB_UPCAST(newm),
1285
1286
  upb_def_fullname(UPB_UPCAST(m)), NULL);
1287
+ newm->map_entry = m->map_entry;
1286
1288
  UPB_ASSERT_VAR(ok, ok);
1287
1289
  upb_msg_iter i;
1288
1290
  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
@@ -1386,6 +1388,15 @@ int upb_msgdef_numfields(const upb_msgdef *m) {
1386
1388
  return upb_strtable_count(&m->ntof);
1387
1389
  }
1388
1390
 
1391
+ void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1392
+ assert(!upb_msgdef_isfrozen(m));
1393
+ m->map_entry = map_entry;
1394
+ }
1395
+
1396
+ bool upb_msgdef_mapentry(const upb_msgdef *m) {
1397
+ return m->map_entry;
1398
+ }
1399
+
1389
1400
  void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
1390
1401
  upb_inttable_begin(iter, &m->itof);
1391
1402
  }
@@ -3401,31 +3412,32 @@ int log2ceil(uint64_t v) {
3401
3412
  }
3402
3413
 
3403
3414
  char *upb_strdup(const char *s) {
3404
- size_t n = strlen(s) + 1;
3415
+ return upb_strdup2(s, strlen(s));
3416
+ }
3417
+
3418
+ char *upb_strdup2(const char *s, size_t len) {
3419
+ // Prevent overflow errors.
3420
+ if (len == SIZE_MAX) return NULL;
3421
+ // Always null-terminate, even if binary data; but don't rely on the input to
3422
+ // have a null-terminating byte since it may be a raw binary buffer.
3423
+ size_t n = len + 1;
3405
3424
  char *p = malloc(n);
3406
- if (p) memcpy(p, s, n);
3425
+ if (p) {
3426
+ memcpy(p, s, len);
3427
+ p[len] = 0;
3428
+ }
3407
3429
  return p;
3408
3430
  }
3409
3431
 
3410
3432
  // A type to represent the lookup key of either a strtable or an inttable.
3411
- // This is like upb_tabkey, but can carry a size also to allow lookups of
3412
- // non-NULL-terminated strings (we don't store string lengths in the table).
3413
3433
  typedef struct {
3414
3434
  upb_tabkey key;
3415
- uint32_t len; // For string keys only.
3416
3435
  } lookupkey_t;
3417
3436
 
3418
- static lookupkey_t strkey(const char *str) {
3419
- lookupkey_t k;
3420
- k.key.str = (char*)str;
3421
- k.len = strlen(str);
3422
- return k;
3423
- }
3424
-
3425
3437
  static lookupkey_t strkey2(const char *str, size_t len) {
3426
3438
  lookupkey_t k;
3427
- k.key.str = (char*)str;
3428
- k.len = len;
3439
+ k.key.s.str = (char*)str;
3440
+ k.key.s.length = len;
3429
3441
  return k;
3430
3442
  }
3431
3443
 
@@ -3607,11 +3619,12 @@ static size_t begin(const upb_table *t) {
3607
3619
  // A simple "subclass" of upb_table that only adds a hash function for strings.
3608
3620
 
3609
3621
  static uint32_t strhash(upb_tabkey key) {
3610
- return MurmurHash2(key.str, strlen(key.str), 0);
3622
+ return MurmurHash2(key.s.str, key.s.length, 0);
3611
3623
  }
3612
3624
 
3613
3625
  static bool streql(upb_tabkey k1, lookupkey_t k2) {
3614
- return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0';
3626
+ return k1.s.length == k2.key.s.length &&
3627
+ memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
3615
3628
  }
3616
3629
 
3617
3630
  bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
@@ -3620,7 +3633,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
3620
3633
 
3621
3634
  void upb_strtable_uninit(upb_strtable *t) {
3622
3635
  for (size_t i = 0; i < upb_table_size(&t->t); i++)
3623
- free((void*)t->t.entries[i].key.str);
3636
+ free((void*)t->t.entries[i].key.s.str);
3624
3637
  uninit(&t->t);
3625
3638
  }
3626
3639
 
@@ -3631,26 +3644,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
3631
3644
  upb_strtable_iter i;
3632
3645
  upb_strtable_begin(&i, t);
3633
3646
  for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3634
- upb_strtable_insert(
3635
- &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
3647
+ upb_strtable_insert2(
3648
+ &new_table,
3649
+ upb_strtable_iter_key(&i),
3650
+ upb_strtable_iter_keylength(&i),
3651
+ upb_strtable_iter_value(&i));
3636
3652
  }
3637
3653
  upb_strtable_uninit(t);
3638
3654
  *t = new_table;
3639
3655
  return true;
3640
3656
  }
3641
3657
 
3642
- bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
3658
+ bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
3659
+ upb_value v) {
3643
3660
  if (isfull(&t->t)) {
3644
3661
  // Need to resize. New table of double the size, add old elements to it.
3645
3662
  if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
3646
3663
  return false;
3647
3664
  }
3648
3665
  }
3649
- if ((k = upb_strdup(k)) == NULL) return false;
3666
+ if ((k = upb_strdup2(k, len)) == NULL) return false;
3650
3667
 
3651
- lookupkey_t key = strkey(k);
3652
- uint32_t hash = MurmurHash2(key.key.str, key.len, 0);
3653
- insert(&t->t, strkey(k), v, hash, &strhash, &streql);
3668
+ lookupkey_t key = strkey2(k, len);
3669
+ uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
3670
+ insert(&t->t, key, v, hash, &strhash, &streql);
3654
3671
  return true;
3655
3672
  }
3656
3673
 
@@ -3660,11 +3677,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
3660
3677
  return lookup(&t->t, strkey2(key, len), v, hash, &streql);
3661
3678
  }
3662
3679
 
3663
- bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
3680
+ bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
3681
+ upb_value *val) {
3664
3682
  uint32_t hash = MurmurHash2(key, strlen(key), 0);
3665
3683
  upb_tabkey tabkey;
3666
- if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) {
3667
- free((void*)tabkey.str);
3684
+ if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
3685
+ free((void*)tabkey.s.str);
3668
3686
  return true;
3669
3687
  } else {
3670
3688
  return false;
@@ -3693,7 +3711,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) {
3693
3711
 
3694
3712
  const char *upb_strtable_iter_key(upb_strtable_iter *i) {
3695
3713
  assert(!upb_strtable_done(i));
3696
- return str_tabent(i)->key.str;
3714
+ return str_tabent(i)->key.s.str;
3715
+ }
3716
+
3717
+ size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
3718
+ assert(!upb_strtable_done(i));
3719
+ return str_tabent(i)->key.s.length;
3697
3720
  }
3698
3721
 
3699
3722
  upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
@@ -4209,8 +4232,10 @@ static void nullz(upb_status *status) {
4209
4232
  }
4210
4233
 
4211
4234
  void upb_status_clear(upb_status *status) {
4212
- upb_status blank = UPB_STATUS_INIT;
4213
- upb_status_copy(status, &blank);
4235
+ if (!status) return;
4236
+ status->ok_ = true;
4237
+ status->code_ = 0;
4238
+ status->msg[0] = '\0';
4214
4239
  }
4215
4240
 
4216
4241
  bool upb_ok(const upb_status *status) { return status->ok_; }
@@ -5977,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) {
5977
6002
  case OP_SETDELIM:
5978
6003
  case OP_HALT:
5979
6004
  case OP_RET:
6005
+ case OP_DISPATCH:
5980
6006
  put32(c, op);
5981
6007
  break;
5982
6008
  case OP_PARSE_DOUBLE:
@@ -6057,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
6057
6083
  OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
6058
6084
  OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
6059
6085
  OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
6060
- OP(SETBIGGROUPNUM), OP(HALT),
6086
+ OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
6061
6087
  };
6062
6088
  return op > OP_HALT ? names[0] : names[op];
6063
6089
  #undef OP
@@ -6089,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6089
6115
  upb_handlers_msgdef(method->dest_handlers_)));
6090
6116
  break;
6091
6117
  }
6118
+ case OP_DISPATCH:
6092
6119
  case OP_STARTMSG:
6093
6120
  case OP_ENDMSG:
6094
6121
  case OP_PUSHLENDELIM:
@@ -6434,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6434
6461
  putop(c, OP_SETDISPATCH, &method->dispatch);
6435
6462
  putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
6436
6463
  label(c, LABEL_FIELD);
6464
+ uint32_t* start_pc = c->pc;
6437
6465
  upb_msg_iter i;
6438
6466
  for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
6439
6467
  const upb_fielddef *f = upb_msg_iter_field(&i);
@@ -6449,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6449
6477
  }
6450
6478
  }
6451
6479
 
6480
+ // If there were no fields, or if no handlers were defined, we need to
6481
+ // generate a non-empty loop body so that we can at least dispatch for unknown
6482
+ // fields and check for the end of the message.
6483
+ if (c->pc == start_pc) {
6484
+ // Check for end-of-message.
6485
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6486
+ // Unconditionally dispatch.
6487
+ putop(c, OP_DISPATCH, 0);
6488
+ }
6489
+
6452
6490
  // For now we just loop back to the last field of the message (or if none,
6453
- // the DISPATCH opcode for the message.
6491
+ // the DISPATCH opcode for the message).
6454
6492
  putop(c, OP_BRANCH, -LABEL_FIELD);
6455
6493
 
6456
6494
  // Insert both a label and a dispatch table entry for this end-of-msg.
@@ -7434,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
7434
7472
  if (result == DECODE_MISMATCH) goto badtag;
7435
7473
  if (result >= 0) return result;
7436
7474
  })
7475
+ VMCASE(OP_DISPATCH, {
7476
+ CHECK_RETURN(dispatch(d));
7477
+ })
7437
7478
  VMCASE(OP_HALT, {
7438
7479
  return size;
7439
7480
  })
@@ -7492,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7492
7533
  // Rewind from OP_TAG* to OP_CHECKDELIM.
7493
7534
  assert(getop(*d->pc) == OP_TAG1 ||
7494
7535
  getop(*d->pc) == OP_TAG2 ||
7495
- getop(*d->pc) == OP_TAGN);
7536
+ getop(*d->pc) == OP_TAGN ||
7537
+ getop(*d->pc == OP_DISPATCH));
7496
7538
  d->pc = p;
7497
7539
  }
7498
7540
  upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
@@ -8627,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
8627
8669
 
8628
8670
  #define PARSER_CHECK_RETURN(x) if (!(x)) return false
8629
8671
 
8672
+ // Used to signal that a capture has been suspended.
8673
+ static char suspend_capture;
8674
+
8630
8675
  static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
8631
8676
  upb_handlertype_t type) {
8632
8677
  upb_selector_t sel;
@@ -8640,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) {
8640
8685
  p, upb_handlers_getprimitivehandlertype(p->top->f));
8641
8686
  }
8642
8687
 
8643
- static void start_member(upb_json_parser *p) {
8644
- assert(!p->top->f);
8645
- assert(!p->accumulated);
8646
- p->accumulated_len = 0;
8647
- }
8648
-
8649
- static bool end_member(upb_json_parser *p) {
8650
- // TODO(haberman): support keys that span buffers or have escape sequences.
8651
- assert(!p->top->f);
8652
- assert(p->accumulated);
8653
- const upb_fielddef *f =
8654
- upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
8655
-
8656
- if (!f) {
8657
- // TODO(haberman): Ignore unknown fields if requested/configured to do so.
8658
- upb_status_seterrf(p->status, "No such field: %.*s\n",
8659
- (int)p->accumulated_len, p->accumulated);
8660
- return false;
8661
- }
8662
-
8663
- p->top->f = f;
8664
- p->accumulated = NULL;
8665
-
8666
- return true;
8667
- }
8668
-
8669
- static void start_object(upb_json_parser *p) {
8670
- upb_sink_startmsg(&p->top->sink);
8671
- }
8672
-
8673
- static void end_object(upb_json_parser *p) {
8674
- upb_status status;
8675
- upb_sink_endmsg(&p->top->sink, &status);
8676
- }
8677
-
8678
8688
  static bool check_stack(upb_json_parser *p) {
8679
8689
  if ((p->top + 1) == p->limit) {
8680
8690
  upb_status_seterrmsg(p->status, "Nesting too deep");
@@ -8684,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) {
8684
8694
  return true;
8685
8695
  }
8686
8696
 
8687
- static bool start_subobject(upb_json_parser *p) {
8688
- assert(p->top->f);
8689
-
8690
- if (!upb_fielddef_issubmsg(p->top->f)) {
8691
- upb_status_seterrf(p->status,
8692
- "Object specified for non-message/group field: %s",
8693
- upb_fielddef_name(p->top->f));
8694
- return false;
8695
- }
8696
-
8697
- if (!check_stack(p)) return false;
8698
-
8699
- upb_jsonparser_frame *inner = p->top + 1;
8700
-
8701
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
8702
- upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
8703
- inner->m = upb_fielddef_msgsubdef(p->top->f);
8704
- inner->f = NULL;
8705
- p->top = inner;
8697
+ // There are GCC/Clang built-ins for overflow checking which we could start
8698
+ // using if there was any performance benefit to it.
8706
8699
 
8700
+ static bool checked_add(size_t a, size_t b, size_t *c) {
8701
+ if (SIZE_MAX - a < b) return false;
8702
+ *c = a + b;
8707
8703
  return true;
8708
8704
  }
8709
8705
 
8710
- static void end_subobject(upb_json_parser *p) {
8711
- p->top--;
8712
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
8713
- upb_sink_endsubmsg(&p->top->sink, sel);
8714
- }
8715
-
8716
- static bool start_array(upb_json_parser *p) {
8717
- assert(p->top->f);
8718
-
8719
- if (!upb_fielddef_isseq(p->top->f)) {
8720
- upb_status_seterrf(p->status,
8721
- "Array specified for non-repeated field: %s",
8722
- upb_fielddef_name(p->top->f));
8723
- return false;
8706
+ static size_t saturating_multiply(size_t a, size_t b) {
8707
+ // size_t is unsigned, so this is defined behavior even on overflow.
8708
+ size_t ret = a * b;
8709
+ if (b != 0 && ret / b != a) {
8710
+ ret = SIZE_MAX;
8724
8711
  }
8725
-
8726
- if (!check_stack(p)) return false;
8727
-
8728
- upb_jsonparser_frame *inner = p->top + 1;
8729
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
8730
- upb_sink_startseq(&p->top->sink, sel, &inner->sink);
8731
- inner->m = p->top->m;
8732
- inner->f = p->top->f;
8733
- p->top = inner;
8734
-
8735
- return true;
8736
- }
8737
-
8738
- static void end_array(upb_json_parser *p) {
8739
- assert(p->top > p->stack);
8740
-
8741
- p->top--;
8742
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
8743
- upb_sink_endseq(&p->top->sink, sel);
8712
+ return ret;
8744
8713
  }
8745
8714
 
8746
- static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
8747
-
8748
- static bool parser_putbool(upb_json_parser *p, bool val) {
8749
- if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
8750
- upb_status_seterrf(p->status,
8751
- "Boolean value specified for non-bool field: %s",
8752
- upb_fielddef_name(p->top->f));
8753
- return false;
8754
- }
8755
8715
 
8756
- bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
8757
- UPB_ASSERT_VAR(ok, ok);
8758
- return true;
8759
- }
8716
+ /* Base64 decoding ************************************************************/
8760
8717
 
8761
- static void start_text(upb_json_parser *p, const char *ptr) {
8762
- p->text_begin = ptr;
8763
- }
8718
+ // TODO(haberman): make this streaming.
8764
8719
 
8765
8720
  static const signed char b64table[] = {
8766
8721
  -1, -1, -1, -1, -1, -1, -1, -1,
@@ -8880,193 +8835,274 @@ badpadding:
8880
8835
  return false;
8881
8836
  }
8882
8837
 
8883
- static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
8884
- assert(!p->accumulated); // TODO: handle this case.
8885
- p->accumulated = p->text_begin;
8886
- p->accumulated_len = ptr - p->text_begin;
8887
8838
 
8888
- if (p->top->f && upb_fielddef_isstring(p->top->f)) {
8889
- // This is a string field (as opposed to a member name).
8890
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
8891
- if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) {
8892
- PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated,
8893
- p->accumulated_len));
8894
- } else {
8895
- upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
8896
- }
8897
- p->accumulated = NULL;
8898
- } else if (p->top->f &&
8899
- upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
8900
- !is_num) {
8901
-
8902
- // Enum case: resolve enum symbolic name to integer value.
8903
- const upb_enumdef *enumdef =
8904
- (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
8905
-
8906
- int32_t int_val = 0;
8907
- if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
8908
- &int_val)) {
8909
- upb_selector_t sel = parser_getsel(p);
8910
- upb_sink_putint32(&p->top->sink, sel, int_val);
8911
- } else {
8912
- upb_status_seterrmsg(p->status, "Enum value name unknown");
8913
- return false;
8914
- }
8915
- p->accumulated = NULL;
8916
- }
8839
+ /* Accumulate buffer **********************************************************/
8917
8840
 
8918
- return true;
8841
+ // Functionality for accumulating a buffer.
8842
+ //
8843
+ // Some parts of the parser need an entire value as a contiguous string. For
8844
+ // example, to look up a member name in a hash table, or to turn a string into
8845
+ // a number, the relevant library routines need the input string to be in
8846
+ // contiguous memory, even if the value spanned two or more buffers in the
8847
+ // input. These routines handle that.
8848
+ //
8849
+ // In the common case we can just point to the input buffer to get this
8850
+ // contiguous string and avoid any actual copy. So we optimistically begin
8851
+ // this way. But there are a few cases where we must instead copy into a
8852
+ // separate buffer:
8853
+ //
8854
+ // 1. The string was not contiguous in the input (it spanned buffers).
8855
+ //
8856
+ // 2. The string included escape sequences that need to be interpreted to get
8857
+ // the true value in a contiguous buffer.
8858
+
8859
+ static void assert_accumulate_empty(upb_json_parser *p) {
8860
+ assert(p->accumulated == NULL);
8861
+ assert(p->accumulated_len == 0);
8919
8862
  }
8920
8863
 
8921
- static bool start_stringval(upb_json_parser *p) {
8922
- assert(p->top->f);
8864
+ static void accumulate_clear(upb_json_parser *p) {
8865
+ p->accumulated = NULL;
8866
+ p->accumulated_len = 0;
8867
+ }
8923
8868
 
8924
- if (upb_fielddef_isstring(p->top->f)) {
8925
- if (!check_stack(p)) return false;
8869
+ // Used internally by accumulate_append().
8870
+ static bool accumulate_realloc(upb_json_parser *p, size_t need) {
8871
+ size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
8872
+ while (new_size < need) {
8873
+ new_size = saturating_multiply(new_size, 2);
8874
+ }
8926
8875
 
8927
- // Start a new parser frame: parser frames correspond one-to-one with
8928
- // handler frames, and string events occur in a sub-frame.
8929
- upb_jsonparser_frame *inner = p->top + 1;
8930
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
8931
- upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
8932
- inner->m = p->top->m;
8933
- inner->f = p->top->f;
8934
- p->top = inner;
8876
+ void *mem = realloc(p->accumulate_buf, new_size);
8877
+ if (!mem) {
8878
+ upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
8879
+ return false;
8880
+ }
8935
8881
 
8882
+ p->accumulate_buf = mem;
8883
+ p->accumulate_buf_size = new_size;
8884
+ return true;
8885
+ }
8886
+
8887
+ // Logically appends the given data to the append buffer.
8888
+ // If "can_alias" is true, we will try to avoid actually copying, but the buffer
8889
+ // must be valid until the next accumulate_append() call (if any).
8890
+ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
8891
+ bool can_alias) {
8892
+ if (!p->accumulated && can_alias) {
8893
+ p->accumulated = buf;
8894
+ p->accumulated_len = len;
8936
8895
  return true;
8937
- } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
8938
- // Do nothing -- symbolic enum names in quotes remain in the
8939
- // current parser frame.
8940
- return true;
8941
- } else {
8942
- upb_status_seterrf(p->status,
8943
- "String specified for non-string/non-enum field: %s",
8944
- upb_fielddef_name(p->top->f));
8896
+ }
8897
+
8898
+ size_t need;
8899
+ if (!checked_add(p->accumulated_len, len, &need)) {
8900
+ upb_status_seterrmsg(p->status, "Integer overflow.");
8945
8901
  return false;
8946
8902
  }
8947
8903
 
8948
- }
8904
+ if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
8905
+ return false;
8906
+ }
8949
8907
 
8950
- static void end_stringval(upb_json_parser *p) {
8951
- if (upb_fielddef_isstring(p->top->f)) {
8952
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
8953
- upb_sink_endstr(&p->top->sink, sel);
8954
- p->top--;
8908
+ if (p->accumulated != p->accumulate_buf) {
8909
+ memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
8910
+ p->accumulated = p->accumulate_buf;
8955
8911
  }
8912
+
8913
+ memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
8914
+ p->accumulated_len += len;
8915
+ return true;
8956
8916
  }
8957
8917
 
8958
- static void start_number(upb_json_parser *p, const char *ptr) {
8959
- start_text(p, ptr);
8960
- assert(p->accumulated == NULL);
8918
+ // Returns a pointer to the data accumulated since the last accumulate_clear()
8919
+ // call, and writes the length to *len. This with point either to the input
8920
+ // buffer or a temporary accumulate buffer.
8921
+ static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
8922
+ assert(p->accumulated);
8923
+ *len = p->accumulated_len;
8924
+ return p->accumulated;
8961
8925
  }
8962
8926
 
8963
- static void end_number(upb_json_parser *p, const char *ptr) {
8964
- end_text(p, ptr, true);
8965
- const char *myend = p->accumulated + p->accumulated_len;
8966
- char *end;
8967
8927
 
8968
- switch (upb_fielddef_type(p->top->f)) {
8969
- case UPB_TYPE_ENUM:
8970
- case UPB_TYPE_INT32: {
8971
- long val = strtol(p->accumulated, &end, 0);
8972
- if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
8973
- assert(false);
8974
- else
8975
- upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
8976
- break;
8977
- }
8978
- case UPB_TYPE_INT64: {
8979
- long long val = strtoll(p->accumulated, &end, 0);
8980
- if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
8981
- assert(false);
8982
- else
8983
- upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
8984
- break;
8985
- }
8986
- case UPB_TYPE_UINT32: {
8987
- unsigned long val = strtoul(p->accumulated, &end, 0);
8988
- if (val > UINT32_MAX || errno == ERANGE || end != myend)
8989
- assert(false);
8990
- else
8991
- upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
8992
- break;
8993
- }
8994
- case UPB_TYPE_UINT64: {
8995
- unsigned long long val = strtoull(p->accumulated, &end, 0);
8996
- if (val > UINT64_MAX || errno == ERANGE || end != myend)
8997
- assert(false);
8998
- else
8999
- upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
9000
- break;
9001
- }
9002
- case UPB_TYPE_DOUBLE: {
9003
- double val = strtod(p->accumulated, &end);
9004
- if (errno == ERANGE || end != myend)
9005
- assert(false);
9006
- else
9007
- upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
8928
+ /* Mult-part text data ********************************************************/
8929
+
8930
+ // When we have text data in the input, it can often come in multiple segments.
8931
+ // For example, there may be some raw string data followed by an escape
8932
+ // sequence. The two segments are processed with different logic. Also buffer
8933
+ // seams in the input can cause multiple segments.
8934
+ //
8935
+ // As we see segments, there are two main cases for how we want to process them:
8936
+ //
8937
+ // 1. we want to push the captured input directly to string handlers.
8938
+ //
8939
+ // 2. we need to accumulate all the parts into a contiguous buffer for further
8940
+ // processing (field name lookup, string->number conversion, etc).
8941
+
8942
+ // This is the set of states for p->multipart_state.
8943
+ enum {
8944
+ // We are not currently processing multipart data.
8945
+ MULTIPART_INACTIVE = 0,
8946
+
8947
+ // We are processing multipart data by accumulating it into a contiguous
8948
+ // buffer.
8949
+ MULTIPART_ACCUMULATE = 1,
8950
+
8951
+ // We are processing multipart data by pushing each part directly to the
8952
+ // current string handlers.
8953
+ MULTIPART_PUSHEAGERLY = 2
8954
+ };
8955
+
8956
+ // Start a multi-part text value where we accumulate the data for processing at
8957
+ // the end.
8958
+ static void multipart_startaccum(upb_json_parser *p) {
8959
+ assert_accumulate_empty(p);
8960
+ assert(p->multipart_state == MULTIPART_INACTIVE);
8961
+ p->multipart_state = MULTIPART_ACCUMULATE;
8962
+ }
8963
+
8964
+ // Start a multi-part text value where we immediately push text data to a string
8965
+ // value with the given selector.
8966
+ static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
8967
+ assert_accumulate_empty(p);
8968
+ assert(p->multipart_state == MULTIPART_INACTIVE);
8969
+ p->multipart_state = MULTIPART_PUSHEAGERLY;
8970
+ p->string_selector = sel;
8971
+ }
8972
+
8973
+ static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
8974
+ bool can_alias) {
8975
+ switch (p->multipart_state) {
8976
+ case MULTIPART_INACTIVE:
8977
+ upb_status_seterrmsg(
8978
+ p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
8979
+ return false;
8980
+
8981
+ case MULTIPART_ACCUMULATE:
8982
+ if (!accumulate_append(p, buf, len, can_alias)) {
8983
+ return false;
8984
+ }
9008
8985
  break;
9009
- }
9010
- case UPB_TYPE_FLOAT: {
9011
- float val = strtof(p->accumulated, &end);
9012
- if (errno == ERANGE || end != myend)
9013
- assert(false);
9014
- else
9015
- upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
8986
+
8987
+ case MULTIPART_PUSHEAGERLY: {
8988
+ const upb_bufhandle *handle = can_alias ? p->handle : NULL;
8989
+ upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
9016
8990
  break;
9017
8991
  }
9018
- default:
9019
- assert(false);
9020
8992
  }
9021
8993
 
9022
- p->accumulated = NULL;
8994
+ return true;
9023
8995
  }
9024
8996
 
9025
- static char escape_char(char in) {
9026
- switch (in) {
9027
- case 'r': return '\r';
9028
- case 't': return '\t';
9029
- case 'n': return '\n';
9030
- case 'f': return '\f';
9031
- case 'b': return '\b';
9032
- case '/': return '/';
9033
- case '"': return '"';
9034
- case '\\': return '\\';
9035
- default:
9036
- assert(0);
9037
- return 'x';
9038
- }
8997
+ // Note: this invalidates the accumulate buffer! Call only after reading its
8998
+ // contents.
8999
+ static void multipart_end(upb_json_parser *p) {
9000
+ assert(p->multipart_state != MULTIPART_INACTIVE);
9001
+ p->multipart_state = MULTIPART_INACTIVE;
9002
+ accumulate_clear(p);
9039
9003
  }
9040
9004
 
9041
- static void escape(upb_json_parser *p, const char *ptr) {
9042
- char ch = escape_char(*ptr);
9043
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
9044
- upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL);
9005
+
9006
+ /* Input capture **************************************************************/
9007
+
9008
+ // Functionality for capturing a region of the input as text. Gracefully
9009
+ // handles the case where a buffer seam occurs in the middle of the captured
9010
+ // region.
9011
+
9012
+ static void capture_begin(upb_json_parser *p, const char *ptr) {
9013
+ assert(p->multipart_state != MULTIPART_INACTIVE);
9014
+ assert(p->capture == NULL);
9015
+ p->capture = ptr;
9045
9016
  }
9046
9017
 
9047
- static uint8_t hexdigit(char ch) {
9048
- if (ch >= '0' && ch <= '9') {
9049
- return ch - '0';
9050
- } else if (ch >= 'a' && ch <= 'f') {
9051
- return ch - 'a' + 10;
9018
+ static bool capture_end(upb_json_parser *p, const char *ptr) {
9019
+ assert(p->capture);
9020
+ if (multipart_text(p, p->capture, ptr - p->capture, true)) {
9021
+ p->capture = NULL;
9022
+ return true;
9052
9023
  } else {
9053
- assert(ch >= 'A' && ch <= 'F');
9054
- return ch - 'A' + 10;
9024
+ return false;
9055
9025
  }
9056
9026
  }
9057
9027
 
9058
- static void start_hex(upb_json_parser *p, const char *ptr) {
9059
- start_text(p, ptr);
9028
+ // This is called at the end of each input buffer (ie. when we have hit a
9029
+ // buffer seam). If we are in the middle of capturing the input, this
9030
+ // processes the unprocessed capture region.
9031
+ static void capture_suspend(upb_json_parser *p, const char **ptr) {
9032
+ if (!p->capture) return;
9033
+
9034
+ if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
9035
+ // We use this as a signal that we were in the middle of capturing, and
9036
+ // that capturing should resume at the beginning of the next buffer.
9037
+ //
9038
+ // We can't use *ptr here, because we have no guarantee that this pointer
9039
+ // will be valid when we resume (if the underlying memory is freed, then
9040
+ // using the pointer at all, even to compare to NULL, is likely undefined
9041
+ // behavior).
9042
+ p->capture = &suspend_capture;
9043
+ } else {
9044
+ // Need to back up the pointer to the beginning of the capture, since
9045
+ // we were not able to actually preserve it.
9046
+ *ptr = p->capture;
9047
+ }
9048
+ }
9049
+
9050
+ static void capture_resume(upb_json_parser *p, const char *ptr) {
9051
+ if (p->capture) {
9052
+ assert(p->capture == &suspend_capture);
9053
+ p->capture = ptr;
9054
+ }
9055
+ }
9056
+
9057
+
9058
+ /* Callbacks from the parser **************************************************/
9059
+
9060
+ // These are the functions called directly from the parser itself.
9061
+ // We define these in the same order as their declarations in the parser.
9062
+
9063
+ static char escape_char(char in) {
9064
+ switch (in) {
9065
+ case 'r': return '\r';
9066
+ case 't': return '\t';
9067
+ case 'n': return '\n';
9068
+ case 'f': return '\f';
9069
+ case 'b': return '\b';
9070
+ case '/': return '/';
9071
+ case '"': return '"';
9072
+ case '\\': return '\\';
9073
+ default:
9074
+ assert(0);
9075
+ return 'x';
9076
+ }
9077
+ }
9078
+
9079
+ static bool escape(upb_json_parser *p, const char *ptr) {
9080
+ char ch = escape_char(*ptr);
9081
+ return multipart_text(p, &ch, 1, false);
9082
+ }
9083
+
9084
+ static void start_hex(upb_json_parser *p) {
9085
+ p->digit = 0;
9086
+ }
9087
+
9088
+ static void hexdigit(upb_json_parser *p, const char *ptr) {
9089
+ char ch = *ptr;
9090
+
9091
+ p->digit <<= 4;
9092
+
9093
+ if (ch >= '0' && ch <= '9') {
9094
+ p->digit += (ch - '0');
9095
+ } else if (ch >= 'a' && ch <= 'f') {
9096
+ p->digit += ((ch - 'a') + 10);
9097
+ } else {
9098
+ assert(ch >= 'A' && ch <= 'F');
9099
+ p->digit += ((ch - 'A') + 10);
9100
+ }
9060
9101
  }
9061
9102
 
9062
- static void hex(upb_json_parser *p, const char *end) {
9063
- const char *start = p->text_begin;
9064
- UPB_ASSERT_VAR(end, end - start == 4);
9065
- uint16_t codepoint =
9066
- (hexdigit(start[0]) << 12) |
9067
- (hexdigit(start[1]) << 8) |
9068
- (hexdigit(start[2]) << 4) |
9069
- hexdigit(start[3]);
9103
+ static bool end_hex(upb_json_parser *p) {
9104
+ uint32_t codepoint = p->digit;
9105
+
9070
9106
  // emit the codepoint as UTF-8.
9071
9107
  char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
9072
9108
  int length = 0;
@@ -9089,160 +9125,466 @@ static void hex(upb_json_parser *p, const char *end) {
9089
9125
  // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
9090
9126
  // we have to wait for the next escape to get the full code point).
9091
9127
 
9092
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
9093
- upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
9128
+ return multipart_text(p, utf8, length, false);
9129
+ }
9130
+
9131
+ static void start_text(upb_json_parser *p, const char *ptr) {
9132
+ capture_begin(p, ptr);
9133
+ }
9134
+
9135
+ static bool end_text(upb_json_parser *p, const char *ptr) {
9136
+ return capture_end(p, ptr);
9137
+ }
9138
+
9139
+ static void start_number(upb_json_parser *p, const char *ptr) {
9140
+ multipart_startaccum(p);
9141
+ capture_begin(p, ptr);
9142
+ }
9143
+
9144
+ static bool end_number(upb_json_parser *p, const char *ptr) {
9145
+ if (!capture_end(p, ptr)) {
9146
+ return false;
9147
+ }
9148
+
9149
+ // strtol() and friends unfortunately do not support specifying the length of
9150
+ // the input string, so we need to force a copy into a NULL-terminated buffer.
9151
+ if (!multipart_text(p, "\0", 1, false)) {
9152
+ return false;
9153
+ }
9154
+
9155
+ size_t len;
9156
+ const char *buf = accumulate_getptr(p, &len);
9157
+ const char *myend = buf + len - 1; // One for NULL.
9158
+ char *end;
9159
+
9160
+ switch (upb_fielddef_type(p->top->f)) {
9161
+ case UPB_TYPE_ENUM:
9162
+ case UPB_TYPE_INT32: {
9163
+ long val = strtol(p->accumulated, &end, 0);
9164
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
9165
+ goto err;
9166
+ else
9167
+ upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
9168
+ break;
9169
+ }
9170
+ case UPB_TYPE_INT64: {
9171
+ long long val = strtoll(p->accumulated, &end, 0);
9172
+ if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
9173
+ goto err;
9174
+ else
9175
+ upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
9176
+ break;
9177
+ }
9178
+ case UPB_TYPE_UINT32: {
9179
+ unsigned long val = strtoul(p->accumulated, &end, 0);
9180
+ if (val > UINT32_MAX || errno == ERANGE || end != myend)
9181
+ goto err;
9182
+ else
9183
+ upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
9184
+ break;
9185
+ }
9186
+ case UPB_TYPE_UINT64: {
9187
+ unsigned long long val = strtoull(p->accumulated, &end, 0);
9188
+ if (val > UINT64_MAX || errno == ERANGE || end != myend)
9189
+ goto err;
9190
+ else
9191
+ upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
9192
+ break;
9193
+ }
9194
+ case UPB_TYPE_DOUBLE: {
9195
+ double val = strtod(p->accumulated, &end);
9196
+ if (errno == ERANGE || end != myend)
9197
+ goto err;
9198
+ else
9199
+ upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
9200
+ break;
9201
+ }
9202
+ case UPB_TYPE_FLOAT: {
9203
+ float val = strtof(p->accumulated, &end);
9204
+ if (errno == ERANGE || end != myend)
9205
+ goto err;
9206
+ else
9207
+ upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
9208
+ break;
9209
+ }
9210
+ default:
9211
+ assert(false);
9212
+ }
9213
+
9214
+ multipart_end(p);
9215
+ return true;
9216
+
9217
+ err:
9218
+ upb_status_seterrf(p->status, "error parsing number: %s", buf);
9219
+ multipart_end(p);
9220
+ return false;
9221
+ }
9222
+
9223
+ static bool parser_putbool(upb_json_parser *p, bool val) {
9224
+ if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
9225
+ upb_status_seterrf(p->status,
9226
+ "Boolean value specified for non-bool field: %s",
9227
+ upb_fielddef_name(p->top->f));
9228
+ return false;
9229
+ }
9230
+
9231
+ bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
9232
+ UPB_ASSERT_VAR(ok, ok);
9233
+ return true;
9234
+ }
9235
+
9236
+ static bool start_stringval(upb_json_parser *p) {
9237
+ assert(p->top->f);
9238
+
9239
+ if (upb_fielddef_isstring(p->top->f)) {
9240
+ if (!check_stack(p)) return false;
9241
+
9242
+ // Start a new parser frame: parser frames correspond one-to-one with
9243
+ // handler frames, and string events occur in a sub-frame.
9244
+ upb_jsonparser_frame *inner = p->top + 1;
9245
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
9246
+ upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
9247
+ inner->m = p->top->m;
9248
+ inner->f = p->top->f;
9249
+ p->top = inner;
9250
+
9251
+ if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
9252
+ // For STRING fields we push data directly to the handlers as it is
9253
+ // parsed. We don't do this yet for BYTES fields, because our base64
9254
+ // decoder is not streaming.
9255
+ //
9256
+ // TODO(haberman): make base64 decoding streaming also.
9257
+ multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
9258
+ return true;
9259
+ } else {
9260
+ multipart_startaccum(p);
9261
+ return true;
9262
+ }
9263
+ } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
9264
+ // No need to push a frame -- symbolic enum names in quotes remain in the
9265
+ // current parser frame.
9266
+ //
9267
+ // Enum string values must accumulate so we can look up the value in a table
9268
+ // once it is complete.
9269
+ multipart_startaccum(p);
9270
+ return true;
9271
+ } else {
9272
+ upb_status_seterrf(p->status,
9273
+ "String specified for non-string/non-enum field: %s",
9274
+ upb_fielddef_name(p->top->f));
9275
+ return false;
9276
+ }
9094
9277
  }
9095
9278
 
9279
+ static bool end_stringval(upb_json_parser *p) {
9280
+ bool ok = true;
9281
+
9282
+ switch (upb_fielddef_type(p->top->f)) {
9283
+ case UPB_TYPE_BYTES:
9284
+ if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
9285
+ p->accumulated, p->accumulated_len)) {
9286
+ return false;
9287
+ }
9288
+ // Fall through.
9289
+
9290
+ case UPB_TYPE_STRING: {
9291
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
9292
+ upb_sink_endstr(&p->top->sink, sel);
9293
+ p->top--;
9294
+ break;
9295
+ }
9296
+
9297
+ case UPB_TYPE_ENUM: {
9298
+ // Resolve enum symbolic name to integer value.
9299
+ const upb_enumdef *enumdef =
9300
+ (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
9301
+
9302
+ size_t len;
9303
+ const char *buf = accumulate_getptr(p, &len);
9304
+
9305
+ int32_t int_val = 0;
9306
+ ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
9307
+
9308
+ if (ok) {
9309
+ upb_selector_t sel = parser_getsel(p);
9310
+ upb_sink_putint32(&p->top->sink, sel, int_val);
9311
+ } else {
9312
+ upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
9313
+ }
9314
+
9315
+ break;
9316
+ }
9317
+
9318
+ default:
9319
+ assert(false);
9320
+ upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
9321
+ ok = false;
9322
+ break;
9323
+ }
9324
+
9325
+ multipart_end(p);
9326
+ return ok;
9327
+ }
9328
+
9329
+ static void start_member(upb_json_parser *p) {
9330
+ assert(!p->top->f);
9331
+ multipart_startaccum(p);
9332
+ }
9333
+
9334
+ static bool end_member(upb_json_parser *p) {
9335
+ assert(!p->top->f);
9336
+ size_t len;
9337
+ const char *buf = accumulate_getptr(p, &len);
9338
+
9339
+ const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
9340
+
9341
+ if (!f) {
9342
+ // TODO(haberman): Ignore unknown fields if requested/configured to do so.
9343
+ upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
9344
+ return false;
9345
+ }
9346
+
9347
+ p->top->f = f;
9348
+ multipart_end(p);
9349
+
9350
+ return true;
9351
+ }
9352
+
9353
+ static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
9354
+
9355
+ static bool start_subobject(upb_json_parser *p) {
9356
+ assert(p->top->f);
9357
+
9358
+ if (!upb_fielddef_issubmsg(p->top->f)) {
9359
+ upb_status_seterrf(p->status,
9360
+ "Object specified for non-message/group field: %s",
9361
+ upb_fielddef_name(p->top->f));
9362
+ return false;
9363
+ }
9364
+
9365
+ if (!check_stack(p)) return false;
9366
+
9367
+ upb_jsonparser_frame *inner = p->top + 1;
9368
+
9369
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
9370
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
9371
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
9372
+ inner->f = NULL;
9373
+ p->top = inner;
9374
+
9375
+ return true;
9376
+ }
9377
+
9378
+ static void end_subobject(upb_json_parser *p) {
9379
+ p->top--;
9380
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
9381
+ upb_sink_endsubmsg(&p->top->sink, sel);
9382
+ }
9383
+
9384
+ static bool start_array(upb_json_parser *p) {
9385
+ assert(p->top->f);
9386
+
9387
+ if (!upb_fielddef_isseq(p->top->f)) {
9388
+ upb_status_seterrf(p->status,
9389
+ "Array specified for non-repeated field: %s",
9390
+ upb_fielddef_name(p->top->f));
9391
+ return false;
9392
+ }
9393
+
9394
+ if (!check_stack(p)) return false;
9395
+
9396
+ upb_jsonparser_frame *inner = p->top + 1;
9397
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
9398
+ upb_sink_startseq(&p->top->sink, sel, &inner->sink);
9399
+ inner->m = p->top->m;
9400
+ inner->f = p->top->f;
9401
+ p->top = inner;
9402
+
9403
+ return true;
9404
+ }
9405
+
9406
+ static void end_array(upb_json_parser *p) {
9407
+ assert(p->top > p->stack);
9408
+
9409
+ p->top--;
9410
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
9411
+ upb_sink_endseq(&p->top->sink, sel);
9412
+ }
9413
+
9414
+ static void start_object(upb_json_parser *p) {
9415
+ upb_sink_startmsg(&p->top->sink);
9416
+ }
9417
+
9418
+ static void end_object(upb_json_parser *p) {
9419
+ upb_status status;
9420
+ upb_sink_endmsg(&p->top->sink, &status);
9421
+ }
9422
+
9423
+
9096
9424
  #define CHECK_RETURN_TOP(x) if (!(x)) goto error
9097
9425
 
9426
+
9427
+ /* The actual parser **********************************************************/
9428
+
9098
9429
  // What follows is the Ragel parser itself. The language is specified in Ragel
9099
9430
  // and the actions call our C functions above.
9431
+ //
9432
+ // Ragel has an extensive set of functionality, and we use only a small part of
9433
+ // it. There are many action types but we only use a few:
9434
+ //
9435
+ // ">" -- transition into a machine
9436
+ // "%" -- transition out of a machine
9437
+ // "@" -- transition into a final state of a machine.
9438
+ //
9439
+ // "@" transitions are tricky because a machine can transition into a final
9440
+ // state repeatedly. But in some cases we know this can't happen, for example
9441
+ // a string which is delimited by a final '"' can only transition into its
9442
+ // final state once, when the closing '"' is seen.
9100
9443
 
9101
- #line 596 "upb/json/parser.rl"
9102
9444
 
9445
+ #line 904 "upb/json/parser.rl"
9103
9446
 
9104
9447
 
9105
- #line 514 "upb/json/parser.c"
9448
+
9449
+ #line 816 "upb/json/parser.c"
9106
9450
  static const char _json_actions[] = {
9107
9451
  0, 1, 0, 1, 2, 1, 3, 1,
9108
- 4, 1, 5, 1, 6, 1, 7, 1,
9109
- 9, 1, 11, 1, 12, 1, 13, 1,
9110
- 14, 1, 15, 1, 16, 1, 24, 1,
9111
- 26, 2, 3, 7, 2, 5, 2, 2,
9112
- 5, 7, 2, 10, 8, 2, 12, 14,
9113
- 2, 13, 14, 2, 17, 1, 2, 18,
9114
- 26, 2, 19, 8, 2, 20, 26, 2,
9115
- 21, 26, 2, 22, 26, 2, 23, 26,
9116
- 2, 25, 26, 3, 13, 10, 8
9452
+ 5, 1, 6, 1, 7, 1, 8, 1,
9453
+ 10, 1, 12, 1, 13, 1, 14, 1,
9454
+ 15, 1, 16, 1, 17, 1, 21, 1,
9455
+ 25, 1, 27, 2, 3, 8, 2, 4,
9456
+ 5, 2, 6, 2, 2, 6, 8, 2,
9457
+ 11, 9, 2, 13, 15, 2, 14, 15,
9458
+ 2, 18, 1, 2, 19, 27, 2, 20,
9459
+ 9, 2, 22, 27, 2, 23, 27, 2,
9460
+ 24, 27, 2, 26, 27, 3, 14, 11,
9461
+ 9
9117
9462
  };
9118
9463
 
9119
9464
  static const unsigned char _json_key_offsets[] = {
9120
- 0, 0, 4, 9, 14, 18, 22, 27,
9121
- 32, 37, 41, 45, 48, 51, 53, 57,
9122
- 61, 63, 65, 70, 72, 74, 83, 89,
9123
- 95, 101, 107, 109, 118, 118, 118, 123,
9124
- 128, 133, 133, 134, 135, 136, 137, 137,
9125
- 138, 139, 140, 140, 141, 142, 143, 143,
9126
- 148, 153, 157, 161, 166, 171, 176, 180,
9127
- 180, 183, 183, 183
9465
+ 0, 0, 4, 9, 14, 15, 19, 24,
9466
+ 29, 34, 38, 42, 45, 48, 50, 54,
9467
+ 58, 60, 62, 67, 69, 71, 80, 86,
9468
+ 92, 98, 104, 106, 115, 116, 116, 116,
9469
+ 121, 126, 131, 132, 133, 134, 135, 135,
9470
+ 136, 137, 138, 138, 139, 140, 141, 141,
9471
+ 146, 151, 152, 156, 161, 166, 171, 175,
9472
+ 175, 178, 178, 178
9128
9473
  };
9129
9474
 
9130
9475
  static const char _json_trans_keys[] = {
9131
9476
  32, 123, 9, 13, 32, 34, 125, 9,
9132
- 13, 32, 34, 125, 9, 13, 32, 58,
9133
- 9, 13, 32, 58, 9, 13, 32, 93,
9134
- 125, 9, 13, 32, 44, 125, 9, 13,
9135
- 32, 44, 125, 9, 13, 32, 34, 9,
9136
- 13, 45, 48, 49, 57, 48, 49, 57,
9137
- 46, 69, 101, 48, 57, 69, 101, 48,
9138
- 57, 43, 45, 48, 57, 48, 57, 48,
9139
- 57, 46, 69, 101, 48, 57, 34, 92,
9140
- 34, 92, 34, 47, 92, 98, 102, 110,
9141
- 114, 116, 117, 48, 57, 65, 70, 97,
9142
- 102, 48, 57, 65, 70, 97, 102, 48,
9143
- 57, 65, 70, 97, 102, 48, 57, 65,
9144
- 70, 97, 102, 34, 92, 34, 45, 91,
9145
- 102, 110, 116, 123, 48, 57, 32, 93,
9146
- 125, 9, 13, 32, 44, 93, 9, 13,
9147
- 32, 93, 125, 9, 13, 97, 108, 115,
9148
- 101, 117, 108, 108, 114, 117, 101, 32,
9149
- 34, 125, 9, 13, 32, 34, 125, 9,
9150
- 13, 32, 58, 9, 13, 32, 58, 9,
9151
- 13, 32, 93, 125, 9, 13, 32, 44,
9152
- 125, 9, 13, 32, 44, 125, 9, 13,
9153
- 32, 34, 9, 13, 32, 9, 13, 0
9477
+ 13, 32, 34, 125, 9, 13, 34, 32,
9478
+ 58, 9, 13, 32, 93, 125, 9, 13,
9479
+ 32, 44, 125, 9, 13, 32, 44, 125,
9480
+ 9, 13, 32, 34, 9, 13, 45, 48,
9481
+ 49, 57, 48, 49, 57, 46, 69, 101,
9482
+ 48, 57, 69, 101, 48, 57, 43, 45,
9483
+ 48, 57, 48, 57, 48, 57, 46, 69,
9484
+ 101, 48, 57, 34, 92, 34, 92, 34,
9485
+ 47, 92, 98, 102, 110, 114, 116, 117,
9486
+ 48, 57, 65, 70, 97, 102, 48, 57,
9487
+ 65, 70, 97, 102, 48, 57, 65, 70,
9488
+ 97, 102, 48, 57, 65, 70, 97, 102,
9489
+ 34, 92, 34, 45, 91, 102, 110, 116,
9490
+ 123, 48, 57, 34, 32, 93, 125, 9,
9491
+ 13, 32, 44, 93, 9, 13, 32, 93,
9492
+ 125, 9, 13, 97, 108, 115, 101, 117,
9493
+ 108, 108, 114, 117, 101, 32, 34, 125,
9494
+ 9, 13, 32, 34, 125, 9, 13, 34,
9495
+ 32, 58, 9, 13, 32, 93, 125, 9,
9496
+ 13, 32, 44, 125, 9, 13, 32, 44,
9497
+ 125, 9, 13, 32, 34, 9, 13, 32,
9498
+ 9, 13, 0
9154
9499
  };
9155
9500
 
9156
9501
  static const char _json_single_lengths[] = {
9157
- 0, 2, 3, 3, 2, 2, 3, 3,
9502
+ 0, 2, 3, 3, 1, 2, 3, 3,
9158
9503
  3, 2, 2, 1, 3, 0, 2, 2,
9159
9504
  0, 0, 3, 2, 2, 9, 0, 0,
9160
- 0, 0, 2, 7, 0, 0, 3, 3,
9161
- 3, 0, 1, 1, 1, 1, 0, 1,
9505
+ 0, 0, 2, 7, 1, 0, 0, 3,
9506
+ 3, 3, 1, 1, 1, 1, 0, 1,
9162
9507
  1, 1, 0, 1, 1, 1, 0, 3,
9163
- 3, 2, 2, 3, 3, 3, 2, 0,
9508
+ 3, 1, 2, 3, 3, 3, 2, 0,
9164
9509
  1, 0, 0, 0
9165
9510
  };
9166
9511
 
9167
9512
  static const char _json_range_lengths[] = {
9168
- 0, 1, 1, 1, 1, 1, 1, 1,
9513
+ 0, 1, 1, 1, 0, 1, 1, 1,
9169
9514
  1, 1, 1, 1, 0, 1, 1, 1,
9170
9515
  1, 1, 1, 0, 0, 0, 3, 3,
9171
- 3, 3, 0, 1, 0, 0, 1, 1,
9172
- 1, 0, 0, 0, 0, 0, 0, 0,
9516
+ 3, 3, 0, 1, 0, 0, 0, 1,
9517
+ 1, 1, 0, 0, 0, 0, 0, 0,
9173
9518
  0, 0, 0, 0, 0, 0, 0, 1,
9174
- 1, 1, 1, 1, 1, 1, 1, 0,
9519
+ 1, 0, 1, 1, 1, 1, 1, 0,
9175
9520
  1, 0, 0, 0
9176
9521
  };
9177
9522
 
9178
9523
  static const short _json_index_offsets[] = {
9179
- 0, 0, 4, 9, 14, 18, 22, 27,
9180
- 32, 37, 41, 45, 48, 52, 54, 58,
9181
- 62, 64, 66, 71, 74, 77, 87, 91,
9182
- 95, 99, 103, 106, 115, 116, 117, 122,
9183
- 127, 132, 133, 135, 137, 139, 141, 142,
9184
- 144, 146, 148, 149, 151, 153, 155, 156,
9185
- 161, 166, 170, 174, 179, 184, 189, 193,
9186
- 194, 197, 198, 199
9524
+ 0, 0, 4, 9, 14, 16, 20, 25,
9525
+ 30, 35, 39, 43, 46, 50, 52, 56,
9526
+ 60, 62, 64, 69, 72, 75, 85, 89,
9527
+ 93, 97, 101, 104, 113, 115, 116, 117,
9528
+ 122, 127, 132, 134, 136, 138, 140, 141,
9529
+ 143, 145, 147, 148, 150, 152, 154, 155,
9530
+ 160, 165, 167, 171, 176, 181, 186, 190,
9531
+ 191, 194, 195, 196
9187
9532
  };
9188
9533
 
9189
9534
  static const char _json_indicies[] = {
9190
9535
  0, 2, 0, 1, 3, 4, 5, 3,
9191
- 1, 6, 7, 8, 6, 1, 9, 10,
9192
- 9, 1, 11, 12, 11, 1, 12, 1,
9193
- 1, 12, 13, 14, 15, 16, 14, 1,
9194
- 17, 18, 8, 17, 1, 18, 7, 18,
9195
- 1, 19, 20, 21, 1, 20, 21, 1,
9196
- 23, 24, 24, 22, 25, 1, 24, 24,
9197
- 25, 22, 26, 26, 27, 1, 27, 1,
9198
- 27, 22, 23, 24, 24, 21, 22, 29,
9199
- 30, 28, 32, 33, 31, 34, 34, 34,
9200
- 34, 34, 34, 34, 34, 35, 1, 36,
9201
- 36, 36, 1, 37, 37, 37, 1, 38,
9202
- 38, 38, 1, 39, 39, 39, 1, 41,
9203
- 42, 40, 43, 44, 45, 46, 47, 48,
9204
- 49, 44, 1, 50, 51, 53, 54, 1,
9536
+ 1, 6, 7, 8, 6, 1, 9, 1,
9537
+ 10, 11, 10, 1, 11, 1, 1, 11,
9538
+ 12, 13, 14, 15, 13, 1, 16, 17,
9539
+ 8, 16, 1, 17, 7, 17, 1, 18,
9540
+ 19, 20, 1, 19, 20, 1, 22, 23,
9541
+ 23, 21, 24, 1, 23, 23, 24, 21,
9542
+ 25, 25, 26, 1, 26, 1, 26, 21,
9543
+ 22, 23, 23, 20, 21, 28, 29, 27,
9544
+ 31, 32, 30, 33, 33, 33, 33, 33,
9545
+ 33, 33, 33, 34, 1, 35, 35, 35,
9546
+ 1, 36, 36, 36, 1, 37, 37, 37,
9547
+ 1, 38, 38, 38, 1, 40, 41, 39,
9548
+ 42, 43, 44, 45, 46, 47, 48, 43,
9549
+ 1, 49, 1, 50, 51, 53, 54, 1,
9205
9550
  53, 52, 55, 56, 54, 55, 1, 56,
9206
- 1, 1, 56, 52, 57, 58, 1, 59,
9207
- 1, 60, 1, 61, 1, 62, 63, 1,
9208
- 64, 1, 65, 1, 66, 67, 1, 68,
9209
- 1, 69, 1, 70, 71, 72, 73, 71,
9210
- 1, 74, 75, 76, 74, 1, 77, 78,
9211
- 77, 1, 79, 80, 79, 1, 80, 1,
9212
- 1, 80, 81, 82, 83, 84, 82, 1,
9213
- 85, 86, 76, 85, 1, 86, 75, 86,
9214
- 1, 87, 88, 88, 1, 1, 1, 1,
9215
- 0
9551
+ 1, 1, 56, 52, 57, 1, 58, 1,
9552
+ 59, 1, 60, 1, 61, 62, 1, 63,
9553
+ 1, 64, 1, 65, 66, 1, 67, 1,
9554
+ 68, 1, 69, 70, 71, 72, 70, 1,
9555
+ 73, 74, 75, 73, 1, 76, 1, 77,
9556
+ 78, 77, 1, 78, 1, 1, 78, 79,
9557
+ 80, 81, 82, 80, 1, 83, 84, 75,
9558
+ 83, 1, 84, 74, 84, 1, 85, 86,
9559
+ 86, 1, 1, 1, 1, 0
9216
9560
  };
9217
9561
 
9218
9562
  static const char _json_trans_targs[] = {
9219
9563
  1, 0, 2, 3, 4, 56, 3, 4,
9220
- 56, 5, 6, 5, 6, 7, 8, 9,
9221
- 56, 8, 9, 11, 12, 18, 57, 13,
9222
- 15, 14, 16, 17, 20, 58, 21, 20,
9223
- 58, 21, 19, 22, 23, 24, 25, 26,
9224
- 20, 58, 21, 28, 29, 30, 34, 39,
9225
- 43, 47, 59, 59, 31, 30, 33, 31,
9226
- 32, 59, 35, 36, 37, 38, 59, 40,
9227
- 41, 42, 59, 44, 45, 46, 59, 48,
9228
- 49, 55, 48, 49, 55, 50, 51, 50,
9229
- 51, 52, 53, 54, 55, 53, 54, 59,
9230
- 56
9564
+ 56, 5, 5, 6, 7, 8, 9, 56,
9565
+ 8, 9, 11, 12, 18, 57, 13, 15,
9566
+ 14, 16, 17, 20, 58, 21, 20, 58,
9567
+ 21, 19, 22, 23, 24, 25, 26, 20,
9568
+ 58, 21, 28, 30, 31, 34, 39, 43,
9569
+ 47, 29, 59, 59, 32, 31, 29, 32,
9570
+ 33, 35, 36, 37, 38, 59, 40, 41,
9571
+ 42, 59, 44, 45, 46, 59, 48, 49,
9572
+ 55, 48, 49, 55, 50, 50, 51, 52,
9573
+ 53, 54, 55, 53, 54, 59, 56
9231
9574
  };
9232
9575
 
9233
9576
  static const char _json_trans_actions[] = {
9234
- 0, 0, 0, 21, 75, 48, 0, 42,
9235
- 23, 17, 17, 0, 0, 15, 19, 19,
9236
- 45, 0, 0, 0, 0, 0, 1, 0,
9237
- 0, 0, 0, 0, 3, 13, 0, 0,
9238
- 33, 5, 11, 0, 7, 0, 0, 0,
9239
- 36, 39, 9, 57, 51, 25, 0, 0,
9240
- 0, 29, 60, 54, 15, 0, 27, 0,
9241
- 0, 31, 0, 0, 0, 0, 66, 0,
9242
- 0, 0, 69, 0, 0, 0, 63, 21,
9243
- 75, 48, 0, 42, 23, 17, 17, 0,
9244
- 0, 15, 19, 19, 45, 0, 0, 72,
9245
- 0
9577
+ 0, 0, 0, 21, 77, 53, 0, 47,
9578
+ 23, 17, 0, 0, 15, 19, 19, 50,
9579
+ 0, 0, 0, 0, 0, 1, 0, 0,
9580
+ 0, 0, 0, 3, 13, 0, 0, 35,
9581
+ 5, 11, 0, 38, 7, 7, 7, 41,
9582
+ 44, 9, 62, 56, 25, 0, 0, 0,
9583
+ 31, 29, 33, 59, 15, 0, 27, 0,
9584
+ 0, 0, 0, 0, 0, 68, 0, 0,
9585
+ 0, 71, 0, 0, 0, 65, 21, 77,
9586
+ 53, 0, 47, 23, 17, 0, 0, 15,
9587
+ 19, 19, 50, 0, 0, 74, 0
9246
9588
  };
9247
9589
 
9248
9590
  static const int json_start = 1;
@@ -9255,13 +9597,14 @@ static const int json_en_value_machine = 27;
9255
9597
  static const int json_en_main = 1;
9256
9598
 
9257
9599
 
9258
- #line 599 "upb/json/parser.rl"
9600
+ #line 907 "upb/json/parser.rl"
9259
9601
 
9260
9602
  size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9261
9603
  const upb_bufhandle *handle) {
9262
9604
  UPB_UNUSED(hd);
9263
9605
  UPB_UNUSED(handle);
9264
9606
  upb_json_parser *parser = closure;
9607
+ parser->handle = handle;
9265
9608
 
9266
9609
  // Variables used by Ragel's generated code.
9267
9610
  int cs = parser->current_state;
@@ -9271,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9271
9614
  const char *p = buf;
9272
9615
  const char *pe = buf + size;
9273
9616
 
9617
+ capture_resume(parser, buf);
9618
+
9274
9619
 
9275
- #line 684 "upb/json/parser.c"
9620
+ #line 987 "upb/json/parser.c"
9276
9621
  {
9277
9622
  int _klen;
9278
9623
  unsigned int _trans;
@@ -9347,114 +9692,118 @@ _match:
9347
9692
  switch ( *_acts++ )
9348
9693
  {
9349
9694
  case 0:
9350
- #line 517 "upb/json/parser.rl"
9695
+ #line 819 "upb/json/parser.rl"
9351
9696
  { p--; {cs = stack[--top]; goto _again;} }
9352
9697
  break;
9353
9698
  case 1:
9354
- #line 518 "upb/json/parser.rl"
9699
+ #line 820 "upb/json/parser.rl"
9355
9700
  { p--; {stack[top++] = cs; cs = 10; goto _again;} }
9356
9701
  break;
9357
9702
  case 2:
9358
- #line 522 "upb/json/parser.rl"
9703
+ #line 824 "upb/json/parser.rl"
9359
9704
  { start_text(parser, p); }
9360
9705
  break;
9361
9706
  case 3:
9362
- #line 523 "upb/json/parser.rl"
9363
- { CHECK_RETURN_TOP(end_text(parser, p, false)); }
9707
+ #line 825 "upb/json/parser.rl"
9708
+ { CHECK_RETURN_TOP(end_text(parser, p)); }
9364
9709
  break;
9365
9710
  case 4:
9366
- #line 529 "upb/json/parser.rl"
9367
- { start_hex(parser, p); }
9711
+ #line 831 "upb/json/parser.rl"
9712
+ { start_hex(parser); }
9368
9713
  break;
9369
9714
  case 5:
9370
- #line 530 "upb/json/parser.rl"
9371
- { hex(parser, p); }
9715
+ #line 832 "upb/json/parser.rl"
9716
+ { hexdigit(parser, p); }
9372
9717
  break;
9373
9718
  case 6:
9374
- #line 536 "upb/json/parser.rl"
9375
- { escape(parser, p); }
9719
+ #line 833 "upb/json/parser.rl"
9720
+ { CHECK_RETURN_TOP(end_hex(parser)); }
9376
9721
  break;
9377
9722
  case 7:
9378
- #line 539 "upb/json/parser.rl"
9379
- { {cs = stack[--top]; goto _again;} }
9723
+ #line 839 "upb/json/parser.rl"
9724
+ { CHECK_RETURN_TOP(escape(parser, p)); }
9380
9725
  break;
9381
9726
  case 8:
9382
- #line 540 "upb/json/parser.rl"
9383
- { {stack[top++] = cs; cs = 19; goto _again;} }
9727
+ #line 845 "upb/json/parser.rl"
9728
+ { p--; {cs = stack[--top]; goto _again;} }
9384
9729
  break;
9385
9730
  case 9:
9386
- #line 542 "upb/json/parser.rl"
9387
- { p--; {stack[top++] = cs; cs = 27; goto _again;} }
9731
+ #line 848 "upb/json/parser.rl"
9732
+ { {stack[top++] = cs; cs = 19; goto _again;} }
9388
9733
  break;
9389
9734
  case 10:
9390
- #line 547 "upb/json/parser.rl"
9391
- { start_member(parser); }
9735
+ #line 850 "upb/json/parser.rl"
9736
+ { p--; {stack[top++] = cs; cs = 27; goto _again;} }
9392
9737
  break;
9393
9738
  case 11:
9394
- #line 548 "upb/json/parser.rl"
9395
- { CHECK_RETURN_TOP(end_member(parser)); }
9739
+ #line 855 "upb/json/parser.rl"
9740
+ { start_member(parser); }
9396
9741
  break;
9397
9742
  case 12:
9398
- #line 551 "upb/json/parser.rl"
9399
- { clear_member(parser); }
9743
+ #line 856 "upb/json/parser.rl"
9744
+ { CHECK_RETURN_TOP(end_member(parser)); }
9400
9745
  break;
9401
9746
  case 13:
9402
- #line 557 "upb/json/parser.rl"
9403
- { start_object(parser); }
9747
+ #line 859 "upb/json/parser.rl"
9748
+ { clear_member(parser); }
9404
9749
  break;
9405
9750
  case 14:
9406
- #line 560 "upb/json/parser.rl"
9407
- { end_object(parser); }
9751
+ #line 865 "upb/json/parser.rl"
9752
+ { start_object(parser); }
9408
9753
  break;
9409
9754
  case 15:
9410
- #line 566 "upb/json/parser.rl"
9411
- { CHECK_RETURN_TOP(start_array(parser)); }
9755
+ #line 868 "upb/json/parser.rl"
9756
+ { end_object(parser); }
9412
9757
  break;
9413
9758
  case 16:
9414
- #line 570 "upb/json/parser.rl"
9415
- { end_array(parser); }
9759
+ #line 874 "upb/json/parser.rl"
9760
+ { CHECK_RETURN_TOP(start_array(parser)); }
9416
9761
  break;
9417
9762
  case 17:
9418
- #line 575 "upb/json/parser.rl"
9419
- { start_number(parser, p); }
9763
+ #line 878 "upb/json/parser.rl"
9764
+ { end_array(parser); }
9420
9765
  break;
9421
9766
  case 18:
9422
- #line 576 "upb/json/parser.rl"
9423
- { end_number(parser, p); }
9767
+ #line 883 "upb/json/parser.rl"
9768
+ { start_number(parser, p); }
9424
9769
  break;
9425
9770
  case 19:
9426
- #line 578 "upb/json/parser.rl"
9427
- { CHECK_RETURN_TOP(start_stringval(parser)); }
9771
+ #line 884 "upb/json/parser.rl"
9772
+ { CHECK_RETURN_TOP(end_number(parser, p)); }
9428
9773
  break;
9429
9774
  case 20:
9430
- #line 579 "upb/json/parser.rl"
9431
- { end_stringval(parser); }
9775
+ #line 886 "upb/json/parser.rl"
9776
+ { CHECK_RETURN_TOP(start_stringval(parser)); }
9432
9777
  break;
9433
9778
  case 21:
9434
- #line 581 "upb/json/parser.rl"
9435
- { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
9779
+ #line 887 "upb/json/parser.rl"
9780
+ { CHECK_RETURN_TOP(end_stringval(parser)); }
9436
9781
  break;
9437
9782
  case 22:
9438
- #line 583 "upb/json/parser.rl"
9439
- { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
9783
+ #line 889 "upb/json/parser.rl"
9784
+ { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
9440
9785
  break;
9441
9786
  case 23:
9442
- #line 585 "upb/json/parser.rl"
9443
- { /* null value */ }
9787
+ #line 891 "upb/json/parser.rl"
9788
+ { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
9444
9789
  break;
9445
9790
  case 24:
9446
- #line 587 "upb/json/parser.rl"
9447
- { CHECK_RETURN_TOP(start_subobject(parser)); }
9791
+ #line 893 "upb/json/parser.rl"
9792
+ { /* null value */ }
9448
9793
  break;
9449
9794
  case 25:
9450
- #line 588 "upb/json/parser.rl"
9451
- { end_subobject(parser); }
9795
+ #line 895 "upb/json/parser.rl"
9796
+ { CHECK_RETURN_TOP(start_subobject(parser)); }
9452
9797
  break;
9453
9798
  case 26:
9454
- #line 593 "upb/json/parser.rl"
9799
+ #line 896 "upb/json/parser.rl"
9800
+ { end_subobject(parser); }
9801
+ break;
9802
+ case 27:
9803
+ #line 901 "upb/json/parser.rl"
9455
9804
  { p--; {cs = stack[--top]; goto _again;} }
9456
9805
  break;
9457
- #line 866 "upb/json/parser.c"
9806
+ #line 1173 "upb/json/parser.c"
9458
9807
  }
9459
9808
  }
9460
9809
 
@@ -9467,10 +9816,12 @@ _again:
9467
9816
  _out: {}
9468
9817
  }
9469
9818
 
9470
- #line 615 "upb/json/parser.rl"
9819
+ #line 926 "upb/json/parser.rl"
9471
9820
 
9472
9821
  if (p != pe) {
9473
9822
  upb_status_seterrf(parser->status, "Parse error at %s\n", p);
9823
+ } else {
9824
+ capture_suspend(parser, &p);
9474
9825
  }
9475
9826
 
9476
9827
  error:
@@ -9487,8 +9838,13 @@ bool end(void *closure, const void *hd) {
9487
9838
  return true;
9488
9839
  }
9489
9840
 
9841
+
9842
+ /* Public API *****************************************************************/
9843
+
9490
9844
  void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
9491
9845
  p->limit = p->stack + UPB_JSON_MAX_DEPTH;
9846
+ p->accumulate_buf = NULL;
9847
+ p->accumulate_buf_size = 0;
9492
9848
  upb_byteshandler_init(&p->input_handler_);
9493
9849
  upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
9494
9850
  upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
@@ -9498,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
9498
9854
 
9499
9855
  void upb_json_parser_uninit(upb_json_parser *p) {
9500
9856
  upb_byteshandler_uninit(&p->input_handler_);
9857
+ free(p->accumulate_buf);
9501
9858
  }
9502
9859
 
9503
9860
  void upb_json_parser_reset(upb_json_parser *p) {
@@ -9508,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) {
9508
9865
  int top;
9509
9866
  // Emit Ragel initialization of the parser.
9510
9867
 
9511
- #line 920 "upb/json/parser.c"
9868
+ #line 1235 "upb/json/parser.c"
9512
9869
  {
9513
9870
  cs = json_start;
9514
9871
  top = 0;
9515
9872
  }
9516
9873
 
9517
- #line 655 "upb/json/parser.rl"
9874
+ #line 974 "upb/json/parser.rl"
9518
9875
  p->current_state = cs;
9519
9876
  p->parser_top = top;
9520
- p->text_begin = NULL;
9521
- p->accumulated = NULL;
9522
- p->accumulated_len = 0;
9877
+ accumulate_clear(p);
9878
+ p->multipart_state = MULTIPART_INACTIVE;
9879
+ p->capture = NULL;
9523
9880
  }
9524
9881
 
9525
9882
  void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {