google-protobuf 3.0.0.alpha.1.0 → 3.0.0.alpha.1.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -1269,6 +1269,7 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
1269
1269
  if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
1270
1270
  if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
1271
1271
  if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
1272
+ m->map_entry = false;
1272
1273
  return m;
1273
1274
 
1274
1275
  err1:
@@ -1283,6 +1284,7 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1283
1284
  if (!newm) return NULL;
1284
1285
  bool ok = upb_def_setfullname(UPB_UPCAST(newm),
1285
1286
  upb_def_fullname(UPB_UPCAST(m)), NULL);
1287
+ newm->map_entry = m->map_entry;
1286
1288
  UPB_ASSERT_VAR(ok, ok);
1287
1289
  upb_msg_iter i;
1288
1290
  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
@@ -1386,6 +1388,15 @@ int upb_msgdef_numfields(const upb_msgdef *m) {
1386
1388
  return upb_strtable_count(&m->ntof);
1387
1389
  }
1388
1390
 
1391
+ void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1392
+ assert(!upb_msgdef_isfrozen(m));
1393
+ m->map_entry = map_entry;
1394
+ }
1395
+
1396
+ bool upb_msgdef_mapentry(const upb_msgdef *m) {
1397
+ return m->map_entry;
1398
+ }
1399
+
1389
1400
  void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
1390
1401
  upb_inttable_begin(iter, &m->itof);
1391
1402
  }
@@ -3401,31 +3412,32 @@ int log2ceil(uint64_t v) {
3401
3412
  }
3402
3413
 
3403
3414
  char *upb_strdup(const char *s) {
3404
- size_t n = strlen(s) + 1;
3415
+ return upb_strdup2(s, strlen(s));
3416
+ }
3417
+
3418
+ char *upb_strdup2(const char *s, size_t len) {
3419
+ // Prevent overflow errors.
3420
+ if (len == SIZE_MAX) return NULL;
3421
+ // Always null-terminate, even if binary data; but don't rely on the input to
3422
+ // have a null-terminating byte since it may be a raw binary buffer.
3423
+ size_t n = len + 1;
3405
3424
  char *p = malloc(n);
3406
- if (p) memcpy(p, s, n);
3425
+ if (p) {
3426
+ memcpy(p, s, len);
3427
+ p[len] = 0;
3428
+ }
3407
3429
  return p;
3408
3430
  }
3409
3431
 
3410
3432
  // A type to represent the lookup key of either a strtable or an inttable.
3411
- // This is like upb_tabkey, but can carry a size also to allow lookups of
3412
- // non-NULL-terminated strings (we don't store string lengths in the table).
3413
3433
  typedef struct {
3414
3434
  upb_tabkey key;
3415
- uint32_t len; // For string keys only.
3416
3435
  } lookupkey_t;
3417
3436
 
3418
- static lookupkey_t strkey(const char *str) {
3419
- lookupkey_t k;
3420
- k.key.str = (char*)str;
3421
- k.len = strlen(str);
3422
- return k;
3423
- }
3424
-
3425
3437
  static lookupkey_t strkey2(const char *str, size_t len) {
3426
3438
  lookupkey_t k;
3427
- k.key.str = (char*)str;
3428
- k.len = len;
3439
+ k.key.s.str = (char*)str;
3440
+ k.key.s.length = len;
3429
3441
  return k;
3430
3442
  }
3431
3443
 
@@ -3607,11 +3619,12 @@ static size_t begin(const upb_table *t) {
3607
3619
  // A simple "subclass" of upb_table that only adds a hash function for strings.
3608
3620
 
3609
3621
  static uint32_t strhash(upb_tabkey key) {
3610
- return MurmurHash2(key.str, strlen(key.str), 0);
3622
+ return MurmurHash2(key.s.str, key.s.length, 0);
3611
3623
  }
3612
3624
 
3613
3625
  static bool streql(upb_tabkey k1, lookupkey_t k2) {
3614
- return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0';
3626
+ return k1.s.length == k2.key.s.length &&
3627
+ memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
3615
3628
  }
3616
3629
 
3617
3630
  bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
@@ -3620,7 +3633,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
3620
3633
 
3621
3634
  void upb_strtable_uninit(upb_strtable *t) {
3622
3635
  for (size_t i = 0; i < upb_table_size(&t->t); i++)
3623
- free((void*)t->t.entries[i].key.str);
3636
+ free((void*)t->t.entries[i].key.s.str);
3624
3637
  uninit(&t->t);
3625
3638
  }
3626
3639
 
@@ -3631,26 +3644,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
3631
3644
  upb_strtable_iter i;
3632
3645
  upb_strtable_begin(&i, t);
3633
3646
  for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3634
- upb_strtable_insert(
3635
- &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
3647
+ upb_strtable_insert2(
3648
+ &new_table,
3649
+ upb_strtable_iter_key(&i),
3650
+ upb_strtable_iter_keylength(&i),
3651
+ upb_strtable_iter_value(&i));
3636
3652
  }
3637
3653
  upb_strtable_uninit(t);
3638
3654
  *t = new_table;
3639
3655
  return true;
3640
3656
  }
3641
3657
 
3642
- bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
3658
+ bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
3659
+ upb_value v) {
3643
3660
  if (isfull(&t->t)) {
3644
3661
  // Need to resize. New table of double the size, add old elements to it.
3645
3662
  if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
3646
3663
  return false;
3647
3664
  }
3648
3665
  }
3649
- if ((k = upb_strdup(k)) == NULL) return false;
3666
+ if ((k = upb_strdup2(k, len)) == NULL) return false;
3650
3667
 
3651
- lookupkey_t key = strkey(k);
3652
- uint32_t hash = MurmurHash2(key.key.str, key.len, 0);
3653
- insert(&t->t, strkey(k), v, hash, &strhash, &streql);
3668
+ lookupkey_t key = strkey2(k, len);
3669
+ uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
3670
+ insert(&t->t, key, v, hash, &strhash, &streql);
3654
3671
  return true;
3655
3672
  }
3656
3673
 
@@ -3660,11 +3677,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
3660
3677
  return lookup(&t->t, strkey2(key, len), v, hash, &streql);
3661
3678
  }
3662
3679
 
3663
- bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
3680
+ bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
3681
+ upb_value *val) {
3664
3682
  uint32_t hash = MurmurHash2(key, strlen(key), 0);
3665
3683
  upb_tabkey tabkey;
3666
- if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) {
3667
- free((void*)tabkey.str);
3684
+ if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
3685
+ free((void*)tabkey.s.str);
3668
3686
  return true;
3669
3687
  } else {
3670
3688
  return false;
@@ -3693,7 +3711,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) {
3693
3711
 
3694
3712
  const char *upb_strtable_iter_key(upb_strtable_iter *i) {
3695
3713
  assert(!upb_strtable_done(i));
3696
- return str_tabent(i)->key.str;
3714
+ return str_tabent(i)->key.s.str;
3715
+ }
3716
+
3717
+ size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
3718
+ assert(!upb_strtable_done(i));
3719
+ return str_tabent(i)->key.s.length;
3697
3720
  }
3698
3721
 
3699
3722
  upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
@@ -4209,8 +4232,10 @@ static void nullz(upb_status *status) {
4209
4232
  }
4210
4233
 
4211
4234
  void upb_status_clear(upb_status *status) {
4212
- upb_status blank = UPB_STATUS_INIT;
4213
- upb_status_copy(status, &blank);
4235
+ if (!status) return;
4236
+ status->ok_ = true;
4237
+ status->code_ = 0;
4238
+ status->msg[0] = '\0';
4214
4239
  }
4215
4240
 
4216
4241
  bool upb_ok(const upb_status *status) { return status->ok_; }
@@ -5977,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) {
5977
6002
  case OP_SETDELIM:
5978
6003
  case OP_HALT:
5979
6004
  case OP_RET:
6005
+ case OP_DISPATCH:
5980
6006
  put32(c, op);
5981
6007
  break;
5982
6008
  case OP_PARSE_DOUBLE:
@@ -6057,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
6057
6083
  OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
6058
6084
  OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
6059
6085
  OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
6060
- OP(SETBIGGROUPNUM), OP(HALT),
6086
+ OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
6061
6087
  };
6062
6088
  return op > OP_HALT ? names[0] : names[op];
6063
6089
  #undef OP
@@ -6089,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6089
6115
  upb_handlers_msgdef(method->dest_handlers_)));
6090
6116
  break;
6091
6117
  }
6118
+ case OP_DISPATCH:
6092
6119
  case OP_STARTMSG:
6093
6120
  case OP_ENDMSG:
6094
6121
  case OP_PUSHLENDELIM:
@@ -6434,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6434
6461
  putop(c, OP_SETDISPATCH, &method->dispatch);
6435
6462
  putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
6436
6463
  label(c, LABEL_FIELD);
6464
+ uint32_t* start_pc = c->pc;
6437
6465
  upb_msg_iter i;
6438
6466
  for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
6439
6467
  const upb_fielddef *f = upb_msg_iter_field(&i);
@@ -6449,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6449
6477
  }
6450
6478
  }
6451
6479
 
6480
+ // If there were no fields, or if no handlers were defined, we need to
6481
+ // generate a non-empty loop body so that we can at least dispatch for unknown
6482
+ // fields and check for the end of the message.
6483
+ if (c->pc == start_pc) {
6484
+ // Check for end-of-message.
6485
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6486
+ // Unconditionally dispatch.
6487
+ putop(c, OP_DISPATCH, 0);
6488
+ }
6489
+
6452
6490
  // For now we just loop back to the last field of the message (or if none,
6453
- // the DISPATCH opcode for the message.
6491
+ // the DISPATCH opcode for the message).
6454
6492
  putop(c, OP_BRANCH, -LABEL_FIELD);
6455
6493
 
6456
6494
  // Insert both a label and a dispatch table entry for this end-of-msg.
@@ -7434,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
7434
7472
  if (result == DECODE_MISMATCH) goto badtag;
7435
7473
  if (result >= 0) return result;
7436
7474
  })
7475
+ VMCASE(OP_DISPATCH, {
7476
+ CHECK_RETURN(dispatch(d));
7477
+ })
7437
7478
  VMCASE(OP_HALT, {
7438
7479
  return size;
7439
7480
  })
@@ -7492,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7492
7533
  // Rewind from OP_TAG* to OP_CHECKDELIM.
7493
7534
  assert(getop(*d->pc) == OP_TAG1 ||
7494
7535
  getop(*d->pc) == OP_TAG2 ||
7495
- getop(*d->pc) == OP_TAGN);
7536
+ getop(*d->pc) == OP_TAGN ||
7537
+ getop(*d->pc == OP_DISPATCH));
7496
7538
  d->pc = p;
7497
7539
  }
7498
7540
  upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
@@ -8627,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
8627
8669
 
8628
8670
  #define PARSER_CHECK_RETURN(x) if (!(x)) return false
8629
8671
 
8672
+ // Used to signal that a capture has been suspended.
8673
+ static char suspend_capture;
8674
+
8630
8675
  static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
8631
8676
  upb_handlertype_t type) {
8632
8677
  upb_selector_t sel;
@@ -8640,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) {
8640
8685
  p, upb_handlers_getprimitivehandlertype(p->top->f));
8641
8686
  }
8642
8687
 
8643
- static void start_member(upb_json_parser *p) {
8644
- assert(!p->top->f);
8645
- assert(!p->accumulated);
8646
- p->accumulated_len = 0;
8647
- }
8648
-
8649
- static bool end_member(upb_json_parser *p) {
8650
- // TODO(haberman): support keys that span buffers or have escape sequences.
8651
- assert(!p->top->f);
8652
- assert(p->accumulated);
8653
- const upb_fielddef *f =
8654
- upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
8655
-
8656
- if (!f) {
8657
- // TODO(haberman): Ignore unknown fields if requested/configured to do so.
8658
- upb_status_seterrf(p->status, "No such field: %.*s\n",
8659
- (int)p->accumulated_len, p->accumulated);
8660
- return false;
8661
- }
8662
-
8663
- p->top->f = f;
8664
- p->accumulated = NULL;
8665
-
8666
- return true;
8667
- }
8668
-
8669
- static void start_object(upb_json_parser *p) {
8670
- upb_sink_startmsg(&p->top->sink);
8671
- }
8672
-
8673
- static void end_object(upb_json_parser *p) {
8674
- upb_status status;
8675
- upb_sink_endmsg(&p->top->sink, &status);
8676
- }
8677
-
8678
8688
  static bool check_stack(upb_json_parser *p) {
8679
8689
  if ((p->top + 1) == p->limit) {
8680
8690
  upb_status_seterrmsg(p->status, "Nesting too deep");
@@ -8684,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) {
8684
8694
  return true;
8685
8695
  }
8686
8696
 
8687
- static bool start_subobject(upb_json_parser *p) {
8688
- assert(p->top->f);
8689
-
8690
- if (!upb_fielddef_issubmsg(p->top->f)) {
8691
- upb_status_seterrf(p->status,
8692
- "Object specified for non-message/group field: %s",
8693
- upb_fielddef_name(p->top->f));
8694
- return false;
8695
- }
8696
-
8697
- if (!check_stack(p)) return false;
8698
-
8699
- upb_jsonparser_frame *inner = p->top + 1;
8700
-
8701
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
8702
- upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
8703
- inner->m = upb_fielddef_msgsubdef(p->top->f);
8704
- inner->f = NULL;
8705
- p->top = inner;
8697
+ // There are GCC/Clang built-ins for overflow checking which we could start
8698
+ // using if there was any performance benefit to it.
8706
8699
 
8700
+ static bool checked_add(size_t a, size_t b, size_t *c) {
8701
+ if (SIZE_MAX - a < b) return false;
8702
+ *c = a + b;
8707
8703
  return true;
8708
8704
  }
8709
8705
 
8710
- static void end_subobject(upb_json_parser *p) {
8711
- p->top--;
8712
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
8713
- upb_sink_endsubmsg(&p->top->sink, sel);
8714
- }
8715
-
8716
- static bool start_array(upb_json_parser *p) {
8717
- assert(p->top->f);
8718
-
8719
- if (!upb_fielddef_isseq(p->top->f)) {
8720
- upb_status_seterrf(p->status,
8721
- "Array specified for non-repeated field: %s",
8722
- upb_fielddef_name(p->top->f));
8723
- return false;
8706
+ static size_t saturating_multiply(size_t a, size_t b) {
8707
+ // size_t is unsigned, so this is defined behavior even on overflow.
8708
+ size_t ret = a * b;
8709
+ if (b != 0 && ret / b != a) {
8710
+ ret = SIZE_MAX;
8724
8711
  }
8725
-
8726
- if (!check_stack(p)) return false;
8727
-
8728
- upb_jsonparser_frame *inner = p->top + 1;
8729
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
8730
- upb_sink_startseq(&p->top->sink, sel, &inner->sink);
8731
- inner->m = p->top->m;
8732
- inner->f = p->top->f;
8733
- p->top = inner;
8734
-
8735
- return true;
8736
- }
8737
-
8738
- static void end_array(upb_json_parser *p) {
8739
- assert(p->top > p->stack);
8740
-
8741
- p->top--;
8742
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
8743
- upb_sink_endseq(&p->top->sink, sel);
8712
+ return ret;
8744
8713
  }
8745
8714
 
8746
- static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
8747
-
8748
- static bool parser_putbool(upb_json_parser *p, bool val) {
8749
- if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
8750
- upb_status_seterrf(p->status,
8751
- "Boolean value specified for non-bool field: %s",
8752
- upb_fielddef_name(p->top->f));
8753
- return false;
8754
- }
8755
8715
 
8756
- bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
8757
- UPB_ASSERT_VAR(ok, ok);
8758
- return true;
8759
- }
8716
+ /* Base64 decoding ************************************************************/
8760
8717
 
8761
- static void start_text(upb_json_parser *p, const char *ptr) {
8762
- p->text_begin = ptr;
8763
- }
8718
+ // TODO(haberman): make this streaming.
8764
8719
 
8765
8720
  static const signed char b64table[] = {
8766
8721
  -1, -1, -1, -1, -1, -1, -1, -1,
@@ -8880,193 +8835,274 @@ badpadding:
8880
8835
  return false;
8881
8836
  }
8882
8837
 
8883
- static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
8884
- assert(!p->accumulated); // TODO: handle this case.
8885
- p->accumulated = p->text_begin;
8886
- p->accumulated_len = ptr - p->text_begin;
8887
8838
 
8888
- if (p->top->f && upb_fielddef_isstring(p->top->f)) {
8889
- // This is a string field (as opposed to a member name).
8890
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
8891
- if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) {
8892
- PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated,
8893
- p->accumulated_len));
8894
- } else {
8895
- upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
8896
- }
8897
- p->accumulated = NULL;
8898
- } else if (p->top->f &&
8899
- upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
8900
- !is_num) {
8901
-
8902
- // Enum case: resolve enum symbolic name to integer value.
8903
- const upb_enumdef *enumdef =
8904
- (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
8905
-
8906
- int32_t int_val = 0;
8907
- if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
8908
- &int_val)) {
8909
- upb_selector_t sel = parser_getsel(p);
8910
- upb_sink_putint32(&p->top->sink, sel, int_val);
8911
- } else {
8912
- upb_status_seterrmsg(p->status, "Enum value name unknown");
8913
- return false;
8914
- }
8915
- p->accumulated = NULL;
8916
- }
8839
+ /* Accumulate buffer **********************************************************/
8917
8840
 
8918
- return true;
8841
+ // Functionality for accumulating a buffer.
8842
+ //
8843
+ // Some parts of the parser need an entire value as a contiguous string. For
8844
+ // example, to look up a member name in a hash table, or to turn a string into
8845
+ // a number, the relevant library routines need the input string to be in
8846
+ // contiguous memory, even if the value spanned two or more buffers in the
8847
+ // input. These routines handle that.
8848
+ //
8849
+ // In the common case we can just point to the input buffer to get this
8850
+ // contiguous string and avoid any actual copy. So we optimistically begin
8851
+ // this way. But there are a few cases where we must instead copy into a
8852
+ // separate buffer:
8853
+ //
8854
+ // 1. The string was not contiguous in the input (it spanned buffers).
8855
+ //
8856
+ // 2. The string included escape sequences that need to be interpreted to get
8857
+ // the true value in a contiguous buffer.
8858
+
8859
+ static void assert_accumulate_empty(upb_json_parser *p) {
8860
+ assert(p->accumulated == NULL);
8861
+ assert(p->accumulated_len == 0);
8919
8862
  }
8920
8863
 
8921
- static bool start_stringval(upb_json_parser *p) {
8922
- assert(p->top->f);
8864
+ static void accumulate_clear(upb_json_parser *p) {
8865
+ p->accumulated = NULL;
8866
+ p->accumulated_len = 0;
8867
+ }
8923
8868
 
8924
- if (upb_fielddef_isstring(p->top->f)) {
8925
- if (!check_stack(p)) return false;
8869
+ // Used internally by accumulate_append().
8870
+ static bool accumulate_realloc(upb_json_parser *p, size_t need) {
8871
+ size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
8872
+ while (new_size < need) {
8873
+ new_size = saturating_multiply(new_size, 2);
8874
+ }
8926
8875
 
8927
- // Start a new parser frame: parser frames correspond one-to-one with
8928
- // handler frames, and string events occur in a sub-frame.
8929
- upb_jsonparser_frame *inner = p->top + 1;
8930
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
8931
- upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
8932
- inner->m = p->top->m;
8933
- inner->f = p->top->f;
8934
- p->top = inner;
8876
+ void *mem = realloc(p->accumulate_buf, new_size);
8877
+ if (!mem) {
8878
+ upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
8879
+ return false;
8880
+ }
8935
8881
 
8882
+ p->accumulate_buf = mem;
8883
+ p->accumulate_buf_size = new_size;
8884
+ return true;
8885
+ }
8886
+
8887
+ // Logically appends the given data to the append buffer.
8888
+ // If "can_alias" is true, we will try to avoid actually copying, but the buffer
8889
+ // must be valid until the next accumulate_append() call (if any).
8890
+ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
8891
+ bool can_alias) {
8892
+ if (!p->accumulated && can_alias) {
8893
+ p->accumulated = buf;
8894
+ p->accumulated_len = len;
8936
8895
  return true;
8937
- } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
8938
- // Do nothing -- symbolic enum names in quotes remain in the
8939
- // current parser frame.
8940
- return true;
8941
- } else {
8942
- upb_status_seterrf(p->status,
8943
- "String specified for non-string/non-enum field: %s",
8944
- upb_fielddef_name(p->top->f));
8896
+ }
8897
+
8898
+ size_t need;
8899
+ if (!checked_add(p->accumulated_len, len, &need)) {
8900
+ upb_status_seterrmsg(p->status, "Integer overflow.");
8945
8901
  return false;
8946
8902
  }
8947
8903
 
8948
- }
8904
+ if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
8905
+ return false;
8906
+ }
8949
8907
 
8950
- static void end_stringval(upb_json_parser *p) {
8951
- if (upb_fielddef_isstring(p->top->f)) {
8952
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
8953
- upb_sink_endstr(&p->top->sink, sel);
8954
- p->top--;
8908
+ if (p->accumulated != p->accumulate_buf) {
8909
+ memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
8910
+ p->accumulated = p->accumulate_buf;
8955
8911
  }
8912
+
8913
+ memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
8914
+ p->accumulated_len += len;
8915
+ return true;
8956
8916
  }
8957
8917
 
8958
- static void start_number(upb_json_parser *p, const char *ptr) {
8959
- start_text(p, ptr);
8960
- assert(p->accumulated == NULL);
8918
+ // Returns a pointer to the data accumulated since the last accumulate_clear()
8919
+ // call, and writes the length to *len. This with point either to the input
8920
+ // buffer or a temporary accumulate buffer.
8921
+ static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
8922
+ assert(p->accumulated);
8923
+ *len = p->accumulated_len;
8924
+ return p->accumulated;
8961
8925
  }
8962
8926
 
8963
- static void end_number(upb_json_parser *p, const char *ptr) {
8964
- end_text(p, ptr, true);
8965
- const char *myend = p->accumulated + p->accumulated_len;
8966
- char *end;
8967
8927
 
8968
- switch (upb_fielddef_type(p->top->f)) {
8969
- case UPB_TYPE_ENUM:
8970
- case UPB_TYPE_INT32: {
8971
- long val = strtol(p->accumulated, &end, 0);
8972
- if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
8973
- assert(false);
8974
- else
8975
- upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
8976
- break;
8977
- }
8978
- case UPB_TYPE_INT64: {
8979
- long long val = strtoll(p->accumulated, &end, 0);
8980
- if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
8981
- assert(false);
8982
- else
8983
- upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
8984
- break;
8985
- }
8986
- case UPB_TYPE_UINT32: {
8987
- unsigned long val = strtoul(p->accumulated, &end, 0);
8988
- if (val > UINT32_MAX || errno == ERANGE || end != myend)
8989
- assert(false);
8990
- else
8991
- upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
8992
- break;
8993
- }
8994
- case UPB_TYPE_UINT64: {
8995
- unsigned long long val = strtoull(p->accumulated, &end, 0);
8996
- if (val > UINT64_MAX || errno == ERANGE || end != myend)
8997
- assert(false);
8998
- else
8999
- upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
9000
- break;
9001
- }
9002
- case UPB_TYPE_DOUBLE: {
9003
- double val = strtod(p->accumulated, &end);
9004
- if (errno == ERANGE || end != myend)
9005
- assert(false);
9006
- else
9007
- upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
8928
+ /* Mult-part text data ********************************************************/
8929
+
8930
+ // When we have text data in the input, it can often come in multiple segments.
8931
+ // For example, there may be some raw string data followed by an escape
8932
+ // sequence. The two segments are processed with different logic. Also buffer
8933
+ // seams in the input can cause multiple segments.
8934
+ //
8935
+ // As we see segments, there are two main cases for how we want to process them:
8936
+ //
8937
+ // 1. we want to push the captured input directly to string handlers.
8938
+ //
8939
+ // 2. we need to accumulate all the parts into a contiguous buffer for further
8940
+ // processing (field name lookup, string->number conversion, etc).
8941
+
8942
+ // This is the set of states for p->multipart_state.
8943
+ enum {
8944
+ // We are not currently processing multipart data.
8945
+ MULTIPART_INACTIVE = 0,
8946
+
8947
+ // We are processing multipart data by accumulating it into a contiguous
8948
+ // buffer.
8949
+ MULTIPART_ACCUMULATE = 1,
8950
+
8951
+ // We are processing multipart data by pushing each part directly to the
8952
+ // current string handlers.
8953
+ MULTIPART_PUSHEAGERLY = 2
8954
+ };
8955
+
8956
+ // Start a multi-part text value where we accumulate the data for processing at
8957
+ // the end.
8958
+ static void multipart_startaccum(upb_json_parser *p) {
8959
+ assert_accumulate_empty(p);
8960
+ assert(p->multipart_state == MULTIPART_INACTIVE);
8961
+ p->multipart_state = MULTIPART_ACCUMULATE;
8962
+ }
8963
+
8964
+ // Start a multi-part text value where we immediately push text data to a string
8965
+ // value with the given selector.
8966
+ static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
8967
+ assert_accumulate_empty(p);
8968
+ assert(p->multipart_state == MULTIPART_INACTIVE);
8969
+ p->multipart_state = MULTIPART_PUSHEAGERLY;
8970
+ p->string_selector = sel;
8971
+ }
8972
+
8973
+ static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
8974
+ bool can_alias) {
8975
+ switch (p->multipart_state) {
8976
+ case MULTIPART_INACTIVE:
8977
+ upb_status_seterrmsg(
8978
+ p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
8979
+ return false;
8980
+
8981
+ case MULTIPART_ACCUMULATE:
8982
+ if (!accumulate_append(p, buf, len, can_alias)) {
8983
+ return false;
8984
+ }
9008
8985
  break;
9009
- }
9010
- case UPB_TYPE_FLOAT: {
9011
- float val = strtof(p->accumulated, &end);
9012
- if (errno == ERANGE || end != myend)
9013
- assert(false);
9014
- else
9015
- upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
8986
+
8987
+ case MULTIPART_PUSHEAGERLY: {
8988
+ const upb_bufhandle *handle = can_alias ? p->handle : NULL;
8989
+ upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
9016
8990
  break;
9017
8991
  }
9018
- default:
9019
- assert(false);
9020
8992
  }
9021
8993
 
9022
- p->accumulated = NULL;
8994
+ return true;
9023
8995
  }
9024
8996
 
9025
- static char escape_char(char in) {
9026
- switch (in) {
9027
- case 'r': return '\r';
9028
- case 't': return '\t';
9029
- case 'n': return '\n';
9030
- case 'f': return '\f';
9031
- case 'b': return '\b';
9032
- case '/': return '/';
9033
- case '"': return '"';
9034
- case '\\': return '\\';
9035
- default:
9036
- assert(0);
9037
- return 'x';
9038
- }
8997
+ // Note: this invalidates the accumulate buffer! Call only after reading its
8998
+ // contents.
8999
+ static void multipart_end(upb_json_parser *p) {
9000
+ assert(p->multipart_state != MULTIPART_INACTIVE);
9001
+ p->multipart_state = MULTIPART_INACTIVE;
9002
+ accumulate_clear(p);
9039
9003
  }
9040
9004
 
9041
- static void escape(upb_json_parser *p, const char *ptr) {
9042
- char ch = escape_char(*ptr);
9043
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
9044
- upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL);
9005
+
9006
+ /* Input capture **************************************************************/
9007
+
9008
+ // Functionality for capturing a region of the input as text. Gracefully
9009
+ // handles the case where a buffer seam occurs in the middle of the captured
9010
+ // region.
9011
+
9012
+ static void capture_begin(upb_json_parser *p, const char *ptr) {
9013
+ assert(p->multipart_state != MULTIPART_INACTIVE);
9014
+ assert(p->capture == NULL);
9015
+ p->capture = ptr;
9045
9016
  }
9046
9017
 
9047
- static uint8_t hexdigit(char ch) {
9048
- if (ch >= '0' && ch <= '9') {
9049
- return ch - '0';
9050
- } else if (ch >= 'a' && ch <= 'f') {
9051
- return ch - 'a' + 10;
9018
+ static bool capture_end(upb_json_parser *p, const char *ptr) {
9019
+ assert(p->capture);
9020
+ if (multipart_text(p, p->capture, ptr - p->capture, true)) {
9021
+ p->capture = NULL;
9022
+ return true;
9052
9023
  } else {
9053
- assert(ch >= 'A' && ch <= 'F');
9054
- return ch - 'A' + 10;
9024
+ return false;
9055
9025
  }
9056
9026
  }
9057
9027
 
9058
- static void start_hex(upb_json_parser *p, const char *ptr) {
9059
- start_text(p, ptr);
9028
+ // This is called at the end of each input buffer (ie. when we have hit a
9029
+ // buffer seam). If we are in the middle of capturing the input, this
9030
+ // processes the unprocessed capture region.
9031
+ static void capture_suspend(upb_json_parser *p, const char **ptr) {
9032
+ if (!p->capture) return;
9033
+
9034
+ if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
9035
+ // We use this as a signal that we were in the middle of capturing, and
9036
+ // that capturing should resume at the beginning of the next buffer.
9037
+ //
9038
+ // We can't use *ptr here, because we have no guarantee that this pointer
9039
+ // will be valid when we resume (if the underlying memory is freed, then
9040
+ // using the pointer at all, even to compare to NULL, is likely undefined
9041
+ // behavior).
9042
+ p->capture = &suspend_capture;
9043
+ } else {
9044
+ // Need to back up the pointer to the beginning of the capture, since
9045
+ // we were not able to actually preserve it.
9046
+ *ptr = p->capture;
9047
+ }
9048
+ }
9049
+
9050
+ static void capture_resume(upb_json_parser *p, const char *ptr) {
9051
+ if (p->capture) {
9052
+ assert(p->capture == &suspend_capture);
9053
+ p->capture = ptr;
9054
+ }
9055
+ }
9056
+
9057
+
9058
+ /* Callbacks from the parser **************************************************/
9059
+
9060
+ // These are the functions called directly from the parser itself.
9061
+ // We define these in the same order as their declarations in the parser.
9062
+
9063
+ static char escape_char(char in) {
9064
+ switch (in) {
9065
+ case 'r': return '\r';
9066
+ case 't': return '\t';
9067
+ case 'n': return '\n';
9068
+ case 'f': return '\f';
9069
+ case 'b': return '\b';
9070
+ case '/': return '/';
9071
+ case '"': return '"';
9072
+ case '\\': return '\\';
9073
+ default:
9074
+ assert(0);
9075
+ return 'x';
9076
+ }
9077
+ }
9078
+
9079
+ static bool escape(upb_json_parser *p, const char *ptr) {
9080
+ char ch = escape_char(*ptr);
9081
+ return multipart_text(p, &ch, 1, false);
9082
+ }
9083
+
9084
+ static void start_hex(upb_json_parser *p) {
9085
+ p->digit = 0;
9086
+ }
9087
+
9088
+ static void hexdigit(upb_json_parser *p, const char *ptr) {
9089
+ char ch = *ptr;
9090
+
9091
+ p->digit <<= 4;
9092
+
9093
+ if (ch >= '0' && ch <= '9') {
9094
+ p->digit += (ch - '0');
9095
+ } else if (ch >= 'a' && ch <= 'f') {
9096
+ p->digit += ((ch - 'a') + 10);
9097
+ } else {
9098
+ assert(ch >= 'A' && ch <= 'F');
9099
+ p->digit += ((ch - 'A') + 10);
9100
+ }
9060
9101
  }
9061
9102
 
9062
- static void hex(upb_json_parser *p, const char *end) {
9063
- const char *start = p->text_begin;
9064
- UPB_ASSERT_VAR(end, end - start == 4);
9065
- uint16_t codepoint =
9066
- (hexdigit(start[0]) << 12) |
9067
- (hexdigit(start[1]) << 8) |
9068
- (hexdigit(start[2]) << 4) |
9069
- hexdigit(start[3]);
9103
+ static bool end_hex(upb_json_parser *p) {
9104
+ uint32_t codepoint = p->digit;
9105
+
9070
9106
  // emit the codepoint as UTF-8.
9071
9107
  char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
9072
9108
  int length = 0;
@@ -9089,160 +9125,466 @@ static void hex(upb_json_parser *p, const char *end) {
9089
9125
  // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
9090
9126
  // we have to wait for the next escape to get the full code point).
9091
9127
 
9092
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
9093
- upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
9128
+ return multipart_text(p, utf8, length, false);
9129
+ }
9130
+
9131
+ static void start_text(upb_json_parser *p, const char *ptr) {
9132
+ capture_begin(p, ptr);
9133
+ }
9134
+
9135
+ static bool end_text(upb_json_parser *p, const char *ptr) {
9136
+ return capture_end(p, ptr);
9137
+ }
9138
+
9139
+ static void start_number(upb_json_parser *p, const char *ptr) {
9140
+ multipart_startaccum(p);
9141
+ capture_begin(p, ptr);
9142
+ }
9143
+
9144
+ static bool end_number(upb_json_parser *p, const char *ptr) {
9145
+ if (!capture_end(p, ptr)) {
9146
+ return false;
9147
+ }
9148
+
9149
+ // strtol() and friends unfortunately do not support specifying the length of
9150
+ // the input string, so we need to force a copy into a NULL-terminated buffer.
9151
+ if (!multipart_text(p, "\0", 1, false)) {
9152
+ return false;
9153
+ }
9154
+
9155
+ size_t len;
9156
+ const char *buf = accumulate_getptr(p, &len);
9157
+ const char *myend = buf + len - 1; // One for NULL.
9158
+ char *end;
9159
+
9160
+ switch (upb_fielddef_type(p->top->f)) {
9161
+ case UPB_TYPE_ENUM:
9162
+ case UPB_TYPE_INT32: {
9163
+ long val = strtol(p->accumulated, &end, 0);
9164
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
9165
+ goto err;
9166
+ else
9167
+ upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
9168
+ break;
9169
+ }
9170
+ case UPB_TYPE_INT64: {
9171
+ long long val = strtoll(p->accumulated, &end, 0);
9172
+ if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
9173
+ goto err;
9174
+ else
9175
+ upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
9176
+ break;
9177
+ }
9178
+ case UPB_TYPE_UINT32: {
9179
+ unsigned long val = strtoul(p->accumulated, &end, 0);
9180
+ if (val > UINT32_MAX || errno == ERANGE || end != myend)
9181
+ goto err;
9182
+ else
9183
+ upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
9184
+ break;
9185
+ }
9186
+ case UPB_TYPE_UINT64: {
9187
+ unsigned long long val = strtoull(p->accumulated, &end, 0);
9188
+ if (val > UINT64_MAX || errno == ERANGE || end != myend)
9189
+ goto err;
9190
+ else
9191
+ upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
9192
+ break;
9193
+ }
9194
+ case UPB_TYPE_DOUBLE: {
9195
+ double val = strtod(p->accumulated, &end);
9196
+ if (errno == ERANGE || end != myend)
9197
+ goto err;
9198
+ else
9199
+ upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
9200
+ break;
9201
+ }
9202
+ case UPB_TYPE_FLOAT: {
9203
+ float val = strtof(p->accumulated, &end);
9204
+ if (errno == ERANGE || end != myend)
9205
+ goto err;
9206
+ else
9207
+ upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
9208
+ break;
9209
+ }
9210
+ default:
9211
+ assert(false);
9212
+ }
9213
+
9214
+ multipart_end(p);
9215
+ return true;
9216
+
9217
+ err:
9218
+ upb_status_seterrf(p->status, "error parsing number: %s", buf);
9219
+ multipart_end(p);
9220
+ return false;
9221
+ }
9222
+
9223
+ static bool parser_putbool(upb_json_parser *p, bool val) {
9224
+ if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
9225
+ upb_status_seterrf(p->status,
9226
+ "Boolean value specified for non-bool field: %s",
9227
+ upb_fielddef_name(p->top->f));
9228
+ return false;
9229
+ }
9230
+
9231
+ bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
9232
+ UPB_ASSERT_VAR(ok, ok);
9233
+ return true;
9234
+ }
9235
+
9236
+ static bool start_stringval(upb_json_parser *p) {
9237
+ assert(p->top->f);
9238
+
9239
+ if (upb_fielddef_isstring(p->top->f)) {
9240
+ if (!check_stack(p)) return false;
9241
+
9242
+ // Start a new parser frame: parser frames correspond one-to-one with
9243
+ // handler frames, and string events occur in a sub-frame.
9244
+ upb_jsonparser_frame *inner = p->top + 1;
9245
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
9246
+ upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
9247
+ inner->m = p->top->m;
9248
+ inner->f = p->top->f;
9249
+ p->top = inner;
9250
+
9251
+ if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
9252
+ // For STRING fields we push data directly to the handlers as it is
9253
+ // parsed. We don't do this yet for BYTES fields, because our base64
9254
+ // decoder is not streaming.
9255
+ //
9256
+ // TODO(haberman): make base64 decoding streaming also.
9257
+ multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
9258
+ return true;
9259
+ } else {
9260
+ multipart_startaccum(p);
9261
+ return true;
9262
+ }
9263
+ } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
9264
+ // No need to push a frame -- symbolic enum names in quotes remain in the
9265
+ // current parser frame.
9266
+ //
9267
+ // Enum string values must accumulate so we can look up the value in a table
9268
+ // once it is complete.
9269
+ multipart_startaccum(p);
9270
+ return true;
9271
+ } else {
9272
+ upb_status_seterrf(p->status,
9273
+ "String specified for non-string/non-enum field: %s",
9274
+ upb_fielddef_name(p->top->f));
9275
+ return false;
9276
+ }
9094
9277
  }
9095
9278
 
9279
+ static bool end_stringval(upb_json_parser *p) {
9280
+ bool ok = true;
9281
+
9282
+ switch (upb_fielddef_type(p->top->f)) {
9283
+ case UPB_TYPE_BYTES:
9284
+ if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
9285
+ p->accumulated, p->accumulated_len)) {
9286
+ return false;
9287
+ }
9288
+ // Fall through.
9289
+
9290
+ case UPB_TYPE_STRING: {
9291
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
9292
+ upb_sink_endstr(&p->top->sink, sel);
9293
+ p->top--;
9294
+ break;
9295
+ }
9296
+
9297
+ case UPB_TYPE_ENUM: {
9298
+ // Resolve enum symbolic name to integer value.
9299
+ const upb_enumdef *enumdef =
9300
+ (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
9301
+
9302
+ size_t len;
9303
+ const char *buf = accumulate_getptr(p, &len);
9304
+
9305
+ int32_t int_val = 0;
9306
+ ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
9307
+
9308
+ if (ok) {
9309
+ upb_selector_t sel = parser_getsel(p);
9310
+ upb_sink_putint32(&p->top->sink, sel, int_val);
9311
+ } else {
9312
+ upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
9313
+ }
9314
+
9315
+ break;
9316
+ }
9317
+
9318
+ default:
9319
+ assert(false);
9320
+ upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
9321
+ ok = false;
9322
+ break;
9323
+ }
9324
+
9325
+ multipart_end(p);
9326
+ return ok;
9327
+ }
9328
+
9329
+ static void start_member(upb_json_parser *p) {
9330
+ assert(!p->top->f);
9331
+ multipart_startaccum(p);
9332
+ }
9333
+
9334
+ static bool end_member(upb_json_parser *p) {
9335
+ assert(!p->top->f);
9336
+ size_t len;
9337
+ const char *buf = accumulate_getptr(p, &len);
9338
+
9339
+ const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
9340
+
9341
+ if (!f) {
9342
+ // TODO(haberman): Ignore unknown fields if requested/configured to do so.
9343
+ upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
9344
+ return false;
9345
+ }
9346
+
9347
+ p->top->f = f;
9348
+ multipart_end(p);
9349
+
9350
+ return true;
9351
+ }
9352
+
9353
+ static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
9354
+
9355
+ static bool start_subobject(upb_json_parser *p) {
9356
+ assert(p->top->f);
9357
+
9358
+ if (!upb_fielddef_issubmsg(p->top->f)) {
9359
+ upb_status_seterrf(p->status,
9360
+ "Object specified for non-message/group field: %s",
9361
+ upb_fielddef_name(p->top->f));
9362
+ return false;
9363
+ }
9364
+
9365
+ if (!check_stack(p)) return false;
9366
+
9367
+ upb_jsonparser_frame *inner = p->top + 1;
9368
+
9369
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
9370
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
9371
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
9372
+ inner->f = NULL;
9373
+ p->top = inner;
9374
+
9375
+ return true;
9376
+ }
9377
+
9378
+ static void end_subobject(upb_json_parser *p) {
9379
+ p->top--;
9380
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
9381
+ upb_sink_endsubmsg(&p->top->sink, sel);
9382
+ }
9383
+
9384
+ static bool start_array(upb_json_parser *p) {
9385
+ assert(p->top->f);
9386
+
9387
+ if (!upb_fielddef_isseq(p->top->f)) {
9388
+ upb_status_seterrf(p->status,
9389
+ "Array specified for non-repeated field: %s",
9390
+ upb_fielddef_name(p->top->f));
9391
+ return false;
9392
+ }
9393
+
9394
+ if (!check_stack(p)) return false;
9395
+
9396
+ upb_jsonparser_frame *inner = p->top + 1;
9397
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
9398
+ upb_sink_startseq(&p->top->sink, sel, &inner->sink);
9399
+ inner->m = p->top->m;
9400
+ inner->f = p->top->f;
9401
+ p->top = inner;
9402
+
9403
+ return true;
9404
+ }
9405
+
9406
+ static void end_array(upb_json_parser *p) {
9407
+ assert(p->top > p->stack);
9408
+
9409
+ p->top--;
9410
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
9411
+ upb_sink_endseq(&p->top->sink, sel);
9412
+ }
9413
+
9414
+ static void start_object(upb_json_parser *p) {
9415
+ upb_sink_startmsg(&p->top->sink);
9416
+ }
9417
+
9418
+ static void end_object(upb_json_parser *p) {
9419
+ upb_status status;
9420
+ upb_sink_endmsg(&p->top->sink, &status);
9421
+ }
9422
+
9423
+
9096
9424
  #define CHECK_RETURN_TOP(x) if (!(x)) goto error
9097
9425
 
9426
+
9427
+ /* The actual parser **********************************************************/
9428
+
9098
9429
  // What follows is the Ragel parser itself. The language is specified in Ragel
9099
9430
  // and the actions call our C functions above.
9431
+ //
9432
+ // Ragel has an extensive set of functionality, and we use only a small part of
9433
+ // it. There are many action types but we only use a few:
9434
+ //
9435
+ // ">" -- transition into a machine
9436
+ // "%" -- transition out of a machine
9437
+ // "@" -- transition into a final state of a machine.
9438
+ //
9439
+ // "@" transitions are tricky because a machine can transition into a final
9440
+ // state repeatedly. But in some cases we know this can't happen, for example
9441
+ // a string which is delimited by a final '"' can only transition into its
9442
+ // final state once, when the closing '"' is seen.
9100
9443
 
9101
- #line 596 "upb/json/parser.rl"
9102
9444
 
9445
+ #line 904 "upb/json/parser.rl"
9103
9446
 
9104
9447
 
9105
- #line 514 "upb/json/parser.c"
9448
+
9449
+ #line 816 "upb/json/parser.c"
9106
9450
  static const char _json_actions[] = {
9107
9451
  0, 1, 0, 1, 2, 1, 3, 1,
9108
- 4, 1, 5, 1, 6, 1, 7, 1,
9109
- 9, 1, 11, 1, 12, 1, 13, 1,
9110
- 14, 1, 15, 1, 16, 1, 24, 1,
9111
- 26, 2, 3, 7, 2, 5, 2, 2,
9112
- 5, 7, 2, 10, 8, 2, 12, 14,
9113
- 2, 13, 14, 2, 17, 1, 2, 18,
9114
- 26, 2, 19, 8, 2, 20, 26, 2,
9115
- 21, 26, 2, 22, 26, 2, 23, 26,
9116
- 2, 25, 26, 3, 13, 10, 8
9452
+ 5, 1, 6, 1, 7, 1, 8, 1,
9453
+ 10, 1, 12, 1, 13, 1, 14, 1,
9454
+ 15, 1, 16, 1, 17, 1, 21, 1,
9455
+ 25, 1, 27, 2, 3, 8, 2, 4,
9456
+ 5, 2, 6, 2, 2, 6, 8, 2,
9457
+ 11, 9, 2, 13, 15, 2, 14, 15,
9458
+ 2, 18, 1, 2, 19, 27, 2, 20,
9459
+ 9, 2, 22, 27, 2, 23, 27, 2,
9460
+ 24, 27, 2, 26, 27, 3, 14, 11,
9461
+ 9
9117
9462
  };
9118
9463
 
9119
9464
  static const unsigned char _json_key_offsets[] = {
9120
- 0, 0, 4, 9, 14, 18, 22, 27,
9121
- 32, 37, 41, 45, 48, 51, 53, 57,
9122
- 61, 63, 65, 70, 72, 74, 83, 89,
9123
- 95, 101, 107, 109, 118, 118, 118, 123,
9124
- 128, 133, 133, 134, 135, 136, 137, 137,
9125
- 138, 139, 140, 140, 141, 142, 143, 143,
9126
- 148, 153, 157, 161, 166, 171, 176, 180,
9127
- 180, 183, 183, 183
9465
+ 0, 0, 4, 9, 14, 15, 19, 24,
9466
+ 29, 34, 38, 42, 45, 48, 50, 54,
9467
+ 58, 60, 62, 67, 69, 71, 80, 86,
9468
+ 92, 98, 104, 106, 115, 116, 116, 116,
9469
+ 121, 126, 131, 132, 133, 134, 135, 135,
9470
+ 136, 137, 138, 138, 139, 140, 141, 141,
9471
+ 146, 151, 152, 156, 161, 166, 171, 175,
9472
+ 175, 178, 178, 178
9128
9473
  };
9129
9474
 
9130
9475
  static const char _json_trans_keys[] = {
9131
9476
  32, 123, 9, 13, 32, 34, 125, 9,
9132
- 13, 32, 34, 125, 9, 13, 32, 58,
9133
- 9, 13, 32, 58, 9, 13, 32, 93,
9134
- 125, 9, 13, 32, 44, 125, 9, 13,
9135
- 32, 44, 125, 9, 13, 32, 34, 9,
9136
- 13, 45, 48, 49, 57, 48, 49, 57,
9137
- 46, 69, 101, 48, 57, 69, 101, 48,
9138
- 57, 43, 45, 48, 57, 48, 57, 48,
9139
- 57, 46, 69, 101, 48, 57, 34, 92,
9140
- 34, 92, 34, 47, 92, 98, 102, 110,
9141
- 114, 116, 117, 48, 57, 65, 70, 97,
9142
- 102, 48, 57, 65, 70, 97, 102, 48,
9143
- 57, 65, 70, 97, 102, 48, 57, 65,
9144
- 70, 97, 102, 34, 92, 34, 45, 91,
9145
- 102, 110, 116, 123, 48, 57, 32, 93,
9146
- 125, 9, 13, 32, 44, 93, 9, 13,
9147
- 32, 93, 125, 9, 13, 97, 108, 115,
9148
- 101, 117, 108, 108, 114, 117, 101, 32,
9149
- 34, 125, 9, 13, 32, 34, 125, 9,
9150
- 13, 32, 58, 9, 13, 32, 58, 9,
9151
- 13, 32, 93, 125, 9, 13, 32, 44,
9152
- 125, 9, 13, 32, 44, 125, 9, 13,
9153
- 32, 34, 9, 13, 32, 9, 13, 0
9477
+ 13, 32, 34, 125, 9, 13, 34, 32,
9478
+ 58, 9, 13, 32, 93, 125, 9, 13,
9479
+ 32, 44, 125, 9, 13, 32, 44, 125,
9480
+ 9, 13, 32, 34, 9, 13, 45, 48,
9481
+ 49, 57, 48, 49, 57, 46, 69, 101,
9482
+ 48, 57, 69, 101, 48, 57, 43, 45,
9483
+ 48, 57, 48, 57, 48, 57, 46, 69,
9484
+ 101, 48, 57, 34, 92, 34, 92, 34,
9485
+ 47, 92, 98, 102, 110, 114, 116, 117,
9486
+ 48, 57, 65, 70, 97, 102, 48, 57,
9487
+ 65, 70, 97, 102, 48, 57, 65, 70,
9488
+ 97, 102, 48, 57, 65, 70, 97, 102,
9489
+ 34, 92, 34, 45, 91, 102, 110, 116,
9490
+ 123, 48, 57, 34, 32, 93, 125, 9,
9491
+ 13, 32, 44, 93, 9, 13, 32, 93,
9492
+ 125, 9, 13, 97, 108, 115, 101, 117,
9493
+ 108, 108, 114, 117, 101, 32, 34, 125,
9494
+ 9, 13, 32, 34, 125, 9, 13, 34,
9495
+ 32, 58, 9, 13, 32, 93, 125, 9,
9496
+ 13, 32, 44, 125, 9, 13, 32, 44,
9497
+ 125, 9, 13, 32, 34, 9, 13, 32,
9498
+ 9, 13, 0
9154
9499
  };
9155
9500
 
9156
9501
  static const char _json_single_lengths[] = {
9157
- 0, 2, 3, 3, 2, 2, 3, 3,
9502
+ 0, 2, 3, 3, 1, 2, 3, 3,
9158
9503
  3, 2, 2, 1, 3, 0, 2, 2,
9159
9504
  0, 0, 3, 2, 2, 9, 0, 0,
9160
- 0, 0, 2, 7, 0, 0, 3, 3,
9161
- 3, 0, 1, 1, 1, 1, 0, 1,
9505
+ 0, 0, 2, 7, 1, 0, 0, 3,
9506
+ 3, 3, 1, 1, 1, 1, 0, 1,
9162
9507
  1, 1, 0, 1, 1, 1, 0, 3,
9163
- 3, 2, 2, 3, 3, 3, 2, 0,
9508
+ 3, 1, 2, 3, 3, 3, 2, 0,
9164
9509
  1, 0, 0, 0
9165
9510
  };
9166
9511
 
9167
9512
  static const char _json_range_lengths[] = {
9168
- 0, 1, 1, 1, 1, 1, 1, 1,
9513
+ 0, 1, 1, 1, 0, 1, 1, 1,
9169
9514
  1, 1, 1, 1, 0, 1, 1, 1,
9170
9515
  1, 1, 1, 0, 0, 0, 3, 3,
9171
- 3, 3, 0, 1, 0, 0, 1, 1,
9172
- 1, 0, 0, 0, 0, 0, 0, 0,
9516
+ 3, 3, 0, 1, 0, 0, 0, 1,
9517
+ 1, 1, 0, 0, 0, 0, 0, 0,
9173
9518
  0, 0, 0, 0, 0, 0, 0, 1,
9174
- 1, 1, 1, 1, 1, 1, 1, 0,
9519
+ 1, 0, 1, 1, 1, 1, 1, 0,
9175
9520
  1, 0, 0, 0
9176
9521
  };
9177
9522
 
9178
9523
  static const short _json_index_offsets[] = {
9179
- 0, 0, 4, 9, 14, 18, 22, 27,
9180
- 32, 37, 41, 45, 48, 52, 54, 58,
9181
- 62, 64, 66, 71, 74, 77, 87, 91,
9182
- 95, 99, 103, 106, 115, 116, 117, 122,
9183
- 127, 132, 133, 135, 137, 139, 141, 142,
9184
- 144, 146, 148, 149, 151, 153, 155, 156,
9185
- 161, 166, 170, 174, 179, 184, 189, 193,
9186
- 194, 197, 198, 199
9524
+ 0, 0, 4, 9, 14, 16, 20, 25,
9525
+ 30, 35, 39, 43, 46, 50, 52, 56,
9526
+ 60, 62, 64, 69, 72, 75, 85, 89,
9527
+ 93, 97, 101, 104, 113, 115, 116, 117,
9528
+ 122, 127, 132, 134, 136, 138, 140, 141,
9529
+ 143, 145, 147, 148, 150, 152, 154, 155,
9530
+ 160, 165, 167, 171, 176, 181, 186, 190,
9531
+ 191, 194, 195, 196
9187
9532
  };
9188
9533
 
9189
9534
  static const char _json_indicies[] = {
9190
9535
  0, 2, 0, 1, 3, 4, 5, 3,
9191
- 1, 6, 7, 8, 6, 1, 9, 10,
9192
- 9, 1, 11, 12, 11, 1, 12, 1,
9193
- 1, 12, 13, 14, 15, 16, 14, 1,
9194
- 17, 18, 8, 17, 1, 18, 7, 18,
9195
- 1, 19, 20, 21, 1, 20, 21, 1,
9196
- 23, 24, 24, 22, 25, 1, 24, 24,
9197
- 25, 22, 26, 26, 27, 1, 27, 1,
9198
- 27, 22, 23, 24, 24, 21, 22, 29,
9199
- 30, 28, 32, 33, 31, 34, 34, 34,
9200
- 34, 34, 34, 34, 34, 35, 1, 36,
9201
- 36, 36, 1, 37, 37, 37, 1, 38,
9202
- 38, 38, 1, 39, 39, 39, 1, 41,
9203
- 42, 40, 43, 44, 45, 46, 47, 48,
9204
- 49, 44, 1, 50, 51, 53, 54, 1,
9536
+ 1, 6, 7, 8, 6, 1, 9, 1,
9537
+ 10, 11, 10, 1, 11, 1, 1, 11,
9538
+ 12, 13, 14, 15, 13, 1, 16, 17,
9539
+ 8, 16, 1, 17, 7, 17, 1, 18,
9540
+ 19, 20, 1, 19, 20, 1, 22, 23,
9541
+ 23, 21, 24, 1, 23, 23, 24, 21,
9542
+ 25, 25, 26, 1, 26, 1, 26, 21,
9543
+ 22, 23, 23, 20, 21, 28, 29, 27,
9544
+ 31, 32, 30, 33, 33, 33, 33, 33,
9545
+ 33, 33, 33, 34, 1, 35, 35, 35,
9546
+ 1, 36, 36, 36, 1, 37, 37, 37,
9547
+ 1, 38, 38, 38, 1, 40, 41, 39,
9548
+ 42, 43, 44, 45, 46, 47, 48, 43,
9549
+ 1, 49, 1, 50, 51, 53, 54, 1,
9205
9550
  53, 52, 55, 56, 54, 55, 1, 56,
9206
- 1, 1, 56, 52, 57, 58, 1, 59,
9207
- 1, 60, 1, 61, 1, 62, 63, 1,
9208
- 64, 1, 65, 1, 66, 67, 1, 68,
9209
- 1, 69, 1, 70, 71, 72, 73, 71,
9210
- 1, 74, 75, 76, 74, 1, 77, 78,
9211
- 77, 1, 79, 80, 79, 1, 80, 1,
9212
- 1, 80, 81, 82, 83, 84, 82, 1,
9213
- 85, 86, 76, 85, 1, 86, 75, 86,
9214
- 1, 87, 88, 88, 1, 1, 1, 1,
9215
- 0
9551
+ 1, 1, 56, 52, 57, 1, 58, 1,
9552
+ 59, 1, 60, 1, 61, 62, 1, 63,
9553
+ 1, 64, 1, 65, 66, 1, 67, 1,
9554
+ 68, 1, 69, 70, 71, 72, 70, 1,
9555
+ 73, 74, 75, 73, 1, 76, 1, 77,
9556
+ 78, 77, 1, 78, 1, 1, 78, 79,
9557
+ 80, 81, 82, 80, 1, 83, 84, 75,
9558
+ 83, 1, 84, 74, 84, 1, 85, 86,
9559
+ 86, 1, 1, 1, 1, 0
9216
9560
  };
9217
9561
 
9218
9562
  static const char _json_trans_targs[] = {
9219
9563
  1, 0, 2, 3, 4, 56, 3, 4,
9220
- 56, 5, 6, 5, 6, 7, 8, 9,
9221
- 56, 8, 9, 11, 12, 18, 57, 13,
9222
- 15, 14, 16, 17, 20, 58, 21, 20,
9223
- 58, 21, 19, 22, 23, 24, 25, 26,
9224
- 20, 58, 21, 28, 29, 30, 34, 39,
9225
- 43, 47, 59, 59, 31, 30, 33, 31,
9226
- 32, 59, 35, 36, 37, 38, 59, 40,
9227
- 41, 42, 59, 44, 45, 46, 59, 48,
9228
- 49, 55, 48, 49, 55, 50, 51, 50,
9229
- 51, 52, 53, 54, 55, 53, 54, 59,
9230
- 56
9564
+ 56, 5, 5, 6, 7, 8, 9, 56,
9565
+ 8, 9, 11, 12, 18, 57, 13, 15,
9566
+ 14, 16, 17, 20, 58, 21, 20, 58,
9567
+ 21, 19, 22, 23, 24, 25, 26, 20,
9568
+ 58, 21, 28, 30, 31, 34, 39, 43,
9569
+ 47, 29, 59, 59, 32, 31, 29, 32,
9570
+ 33, 35, 36, 37, 38, 59, 40, 41,
9571
+ 42, 59, 44, 45, 46, 59, 48, 49,
9572
+ 55, 48, 49, 55, 50, 50, 51, 52,
9573
+ 53, 54, 55, 53, 54, 59, 56
9231
9574
  };
9232
9575
 
9233
9576
  static const char _json_trans_actions[] = {
9234
- 0, 0, 0, 21, 75, 48, 0, 42,
9235
- 23, 17, 17, 0, 0, 15, 19, 19,
9236
- 45, 0, 0, 0, 0, 0, 1, 0,
9237
- 0, 0, 0, 0, 3, 13, 0, 0,
9238
- 33, 5, 11, 0, 7, 0, 0, 0,
9239
- 36, 39, 9, 57, 51, 25, 0, 0,
9240
- 0, 29, 60, 54, 15, 0, 27, 0,
9241
- 0, 31, 0, 0, 0, 0, 66, 0,
9242
- 0, 0, 69, 0, 0, 0, 63, 21,
9243
- 75, 48, 0, 42, 23, 17, 17, 0,
9244
- 0, 15, 19, 19, 45, 0, 0, 72,
9245
- 0
9577
+ 0, 0, 0, 21, 77, 53, 0, 47,
9578
+ 23, 17, 0, 0, 15, 19, 19, 50,
9579
+ 0, 0, 0, 0, 0, 1, 0, 0,
9580
+ 0, 0, 0, 3, 13, 0, 0, 35,
9581
+ 5, 11, 0, 38, 7, 7, 7, 41,
9582
+ 44, 9, 62, 56, 25, 0, 0, 0,
9583
+ 31, 29, 33, 59, 15, 0, 27, 0,
9584
+ 0, 0, 0, 0, 0, 68, 0, 0,
9585
+ 0, 71, 0, 0, 0, 65, 21, 77,
9586
+ 53, 0, 47, 23, 17, 0, 0, 15,
9587
+ 19, 19, 50, 0, 0, 74, 0
9246
9588
  };
9247
9589
 
9248
9590
  static const int json_start = 1;
@@ -9255,13 +9597,14 @@ static const int json_en_value_machine = 27;
9255
9597
  static const int json_en_main = 1;
9256
9598
 
9257
9599
 
9258
- #line 599 "upb/json/parser.rl"
9600
+ #line 907 "upb/json/parser.rl"
9259
9601
 
9260
9602
  size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9261
9603
  const upb_bufhandle *handle) {
9262
9604
  UPB_UNUSED(hd);
9263
9605
  UPB_UNUSED(handle);
9264
9606
  upb_json_parser *parser = closure;
9607
+ parser->handle = handle;
9265
9608
 
9266
9609
  // Variables used by Ragel's generated code.
9267
9610
  int cs = parser->current_state;
@@ -9271,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9271
9614
  const char *p = buf;
9272
9615
  const char *pe = buf + size;
9273
9616
 
9617
+ capture_resume(parser, buf);
9618
+
9274
9619
 
9275
- #line 684 "upb/json/parser.c"
9620
+ #line 987 "upb/json/parser.c"
9276
9621
  {
9277
9622
  int _klen;
9278
9623
  unsigned int _trans;
@@ -9347,114 +9692,118 @@ _match:
9347
9692
  switch ( *_acts++ )
9348
9693
  {
9349
9694
  case 0:
9350
- #line 517 "upb/json/parser.rl"
9695
+ #line 819 "upb/json/parser.rl"
9351
9696
  { p--; {cs = stack[--top]; goto _again;} }
9352
9697
  break;
9353
9698
  case 1:
9354
- #line 518 "upb/json/parser.rl"
9699
+ #line 820 "upb/json/parser.rl"
9355
9700
  { p--; {stack[top++] = cs; cs = 10; goto _again;} }
9356
9701
  break;
9357
9702
  case 2:
9358
- #line 522 "upb/json/parser.rl"
9703
+ #line 824 "upb/json/parser.rl"
9359
9704
  { start_text(parser, p); }
9360
9705
  break;
9361
9706
  case 3:
9362
- #line 523 "upb/json/parser.rl"
9363
- { CHECK_RETURN_TOP(end_text(parser, p, false)); }
9707
+ #line 825 "upb/json/parser.rl"
9708
+ { CHECK_RETURN_TOP(end_text(parser, p)); }
9364
9709
  break;
9365
9710
  case 4:
9366
- #line 529 "upb/json/parser.rl"
9367
- { start_hex(parser, p); }
9711
+ #line 831 "upb/json/parser.rl"
9712
+ { start_hex(parser); }
9368
9713
  break;
9369
9714
  case 5:
9370
- #line 530 "upb/json/parser.rl"
9371
- { hex(parser, p); }
9715
+ #line 832 "upb/json/parser.rl"
9716
+ { hexdigit(parser, p); }
9372
9717
  break;
9373
9718
  case 6:
9374
- #line 536 "upb/json/parser.rl"
9375
- { escape(parser, p); }
9719
+ #line 833 "upb/json/parser.rl"
9720
+ { CHECK_RETURN_TOP(end_hex(parser)); }
9376
9721
  break;
9377
9722
  case 7:
9378
- #line 539 "upb/json/parser.rl"
9379
- { {cs = stack[--top]; goto _again;} }
9723
+ #line 839 "upb/json/parser.rl"
9724
+ { CHECK_RETURN_TOP(escape(parser, p)); }
9380
9725
  break;
9381
9726
  case 8:
9382
- #line 540 "upb/json/parser.rl"
9383
- { {stack[top++] = cs; cs = 19; goto _again;} }
9727
+ #line 845 "upb/json/parser.rl"
9728
+ { p--; {cs = stack[--top]; goto _again;} }
9384
9729
  break;
9385
9730
  case 9:
9386
- #line 542 "upb/json/parser.rl"
9387
- { p--; {stack[top++] = cs; cs = 27; goto _again;} }
9731
+ #line 848 "upb/json/parser.rl"
9732
+ { {stack[top++] = cs; cs = 19; goto _again;} }
9388
9733
  break;
9389
9734
  case 10:
9390
- #line 547 "upb/json/parser.rl"
9391
- { start_member(parser); }
9735
+ #line 850 "upb/json/parser.rl"
9736
+ { p--; {stack[top++] = cs; cs = 27; goto _again;} }
9392
9737
  break;
9393
9738
  case 11:
9394
- #line 548 "upb/json/parser.rl"
9395
- { CHECK_RETURN_TOP(end_member(parser)); }
9739
+ #line 855 "upb/json/parser.rl"
9740
+ { start_member(parser); }
9396
9741
  break;
9397
9742
  case 12:
9398
- #line 551 "upb/json/parser.rl"
9399
- { clear_member(parser); }
9743
+ #line 856 "upb/json/parser.rl"
9744
+ { CHECK_RETURN_TOP(end_member(parser)); }
9400
9745
  break;
9401
9746
  case 13:
9402
- #line 557 "upb/json/parser.rl"
9403
- { start_object(parser); }
9747
+ #line 859 "upb/json/parser.rl"
9748
+ { clear_member(parser); }
9404
9749
  break;
9405
9750
  case 14:
9406
- #line 560 "upb/json/parser.rl"
9407
- { end_object(parser); }
9751
+ #line 865 "upb/json/parser.rl"
9752
+ { start_object(parser); }
9408
9753
  break;
9409
9754
  case 15:
9410
- #line 566 "upb/json/parser.rl"
9411
- { CHECK_RETURN_TOP(start_array(parser)); }
9755
+ #line 868 "upb/json/parser.rl"
9756
+ { end_object(parser); }
9412
9757
  break;
9413
9758
  case 16:
9414
- #line 570 "upb/json/parser.rl"
9415
- { end_array(parser); }
9759
+ #line 874 "upb/json/parser.rl"
9760
+ { CHECK_RETURN_TOP(start_array(parser)); }
9416
9761
  break;
9417
9762
  case 17:
9418
- #line 575 "upb/json/parser.rl"
9419
- { start_number(parser, p); }
9763
+ #line 878 "upb/json/parser.rl"
9764
+ { end_array(parser); }
9420
9765
  break;
9421
9766
  case 18:
9422
- #line 576 "upb/json/parser.rl"
9423
- { end_number(parser, p); }
9767
+ #line 883 "upb/json/parser.rl"
9768
+ { start_number(parser, p); }
9424
9769
  break;
9425
9770
  case 19:
9426
- #line 578 "upb/json/parser.rl"
9427
- { CHECK_RETURN_TOP(start_stringval(parser)); }
9771
+ #line 884 "upb/json/parser.rl"
9772
+ { CHECK_RETURN_TOP(end_number(parser, p)); }
9428
9773
  break;
9429
9774
  case 20:
9430
- #line 579 "upb/json/parser.rl"
9431
- { end_stringval(parser); }
9775
+ #line 886 "upb/json/parser.rl"
9776
+ { CHECK_RETURN_TOP(start_stringval(parser)); }
9432
9777
  break;
9433
9778
  case 21:
9434
- #line 581 "upb/json/parser.rl"
9435
- { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
9779
+ #line 887 "upb/json/parser.rl"
9780
+ { CHECK_RETURN_TOP(end_stringval(parser)); }
9436
9781
  break;
9437
9782
  case 22:
9438
- #line 583 "upb/json/parser.rl"
9439
- { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
9783
+ #line 889 "upb/json/parser.rl"
9784
+ { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
9440
9785
  break;
9441
9786
  case 23:
9442
- #line 585 "upb/json/parser.rl"
9443
- { /* null value */ }
9787
+ #line 891 "upb/json/parser.rl"
9788
+ { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
9444
9789
  break;
9445
9790
  case 24:
9446
- #line 587 "upb/json/parser.rl"
9447
- { CHECK_RETURN_TOP(start_subobject(parser)); }
9791
+ #line 893 "upb/json/parser.rl"
9792
+ { /* null value */ }
9448
9793
  break;
9449
9794
  case 25:
9450
- #line 588 "upb/json/parser.rl"
9451
- { end_subobject(parser); }
9795
+ #line 895 "upb/json/parser.rl"
9796
+ { CHECK_RETURN_TOP(start_subobject(parser)); }
9452
9797
  break;
9453
9798
  case 26:
9454
- #line 593 "upb/json/parser.rl"
9799
+ #line 896 "upb/json/parser.rl"
9800
+ { end_subobject(parser); }
9801
+ break;
9802
+ case 27:
9803
+ #line 901 "upb/json/parser.rl"
9455
9804
  { p--; {cs = stack[--top]; goto _again;} }
9456
9805
  break;
9457
- #line 866 "upb/json/parser.c"
9806
+ #line 1173 "upb/json/parser.c"
9458
9807
  }
9459
9808
  }
9460
9809
 
@@ -9467,10 +9816,12 @@ _again:
9467
9816
  _out: {}
9468
9817
  }
9469
9818
 
9470
- #line 615 "upb/json/parser.rl"
9819
+ #line 926 "upb/json/parser.rl"
9471
9820
 
9472
9821
  if (p != pe) {
9473
9822
  upb_status_seterrf(parser->status, "Parse error at %s\n", p);
9823
+ } else {
9824
+ capture_suspend(parser, &p);
9474
9825
  }
9475
9826
 
9476
9827
  error:
@@ -9487,8 +9838,13 @@ bool end(void *closure, const void *hd) {
9487
9838
  return true;
9488
9839
  }
9489
9840
 
9841
+
9842
+ /* Public API *****************************************************************/
9843
+
9490
9844
  void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
9491
9845
  p->limit = p->stack + UPB_JSON_MAX_DEPTH;
9846
+ p->accumulate_buf = NULL;
9847
+ p->accumulate_buf_size = 0;
9492
9848
  upb_byteshandler_init(&p->input_handler_);
9493
9849
  upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
9494
9850
  upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
@@ -9498,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
9498
9854
 
9499
9855
  void upb_json_parser_uninit(upb_json_parser *p) {
9500
9856
  upb_byteshandler_uninit(&p->input_handler_);
9857
+ free(p->accumulate_buf);
9501
9858
  }
9502
9859
 
9503
9860
  void upb_json_parser_reset(upb_json_parser *p) {
@@ -9508,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) {
9508
9865
  int top;
9509
9866
  // Emit Ragel initialization of the parser.
9510
9867
 
9511
- #line 920 "upb/json/parser.c"
9868
+ #line 1235 "upb/json/parser.c"
9512
9869
  {
9513
9870
  cs = json_start;
9514
9871
  top = 0;
9515
9872
  }
9516
9873
 
9517
- #line 655 "upb/json/parser.rl"
9874
+ #line 974 "upb/json/parser.rl"
9518
9875
  p->current_state = cs;
9519
9876
  p->parser_top = top;
9520
- p->text_begin = NULL;
9521
- p->accumulated = NULL;
9522
- p->accumulated_len = 0;
9877
+ accumulate_clear(p);
9878
+ p->multipart_state = MULTIPART_INACTIVE;
9879
+ p->capture = NULL;
9523
9880
  }
9524
9881
 
9525
9882
  void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {