google-protobuf 3.0.0.alpha.1.0 → 3.0.0.alpha.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of google-protobuf might be problematic. Click here for more details.
- checksums.yaml +8 -8
- data/ext/google/protobuf_c/defs.c +107 -6
- data/ext/google/protobuf_c/encode_decode.c +327 -90
- data/ext/google/protobuf_c/extconf.rb +1 -1
- data/ext/google/protobuf_c/map.c +805 -0
- data/ext/google/protobuf_c/message.c +12 -3
- data/ext/google/protobuf_c/protobuf.c +1 -0
- data/ext/google/protobuf_c/protobuf.h +82 -4
- data/ext/google/protobuf_c/repeated_field.c +5 -1
- data/ext/google/protobuf_c/storage.c +148 -41
- data/ext/google/protobuf_c/upb.c +810 -453
- data/ext/google/protobuf_c/upb.h +71 -12
- data/tests/basic.rb +262 -3
- metadata +3 -2
data/ext/google/protobuf_c/upb.c
CHANGED
@@ -1269,6 +1269,7 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
|
|
1269
1269
|
if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
|
1270
1270
|
if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
|
1271
1271
|
if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
|
1272
|
+
m->map_entry = false;
|
1272
1273
|
return m;
|
1273
1274
|
|
1274
1275
|
err1:
|
@@ -1283,6 +1284,7 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
|
|
1283
1284
|
if (!newm) return NULL;
|
1284
1285
|
bool ok = upb_def_setfullname(UPB_UPCAST(newm),
|
1285
1286
|
upb_def_fullname(UPB_UPCAST(m)), NULL);
|
1287
|
+
newm->map_entry = m->map_entry;
|
1286
1288
|
UPB_ASSERT_VAR(ok, ok);
|
1287
1289
|
upb_msg_iter i;
|
1288
1290
|
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
|
@@ -1386,6 +1388,15 @@ int upb_msgdef_numfields(const upb_msgdef *m) {
|
|
1386
1388
|
return upb_strtable_count(&m->ntof);
|
1387
1389
|
}
|
1388
1390
|
|
1391
|
+
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
|
1392
|
+
assert(!upb_msgdef_isfrozen(m));
|
1393
|
+
m->map_entry = map_entry;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
bool upb_msgdef_mapentry(const upb_msgdef *m) {
|
1397
|
+
return m->map_entry;
|
1398
|
+
}
|
1399
|
+
|
1389
1400
|
void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
|
1390
1401
|
upb_inttable_begin(iter, &m->itof);
|
1391
1402
|
}
|
@@ -3401,31 +3412,32 @@ int log2ceil(uint64_t v) {
|
|
3401
3412
|
}
|
3402
3413
|
|
3403
3414
|
char *upb_strdup(const char *s) {
|
3404
|
-
|
3415
|
+
return upb_strdup2(s, strlen(s));
|
3416
|
+
}
|
3417
|
+
|
3418
|
+
char *upb_strdup2(const char *s, size_t len) {
|
3419
|
+
// Prevent overflow errors.
|
3420
|
+
if (len == SIZE_MAX) return NULL;
|
3421
|
+
// Always null-terminate, even if binary data; but don't rely on the input to
|
3422
|
+
// have a null-terminating byte since it may be a raw binary buffer.
|
3423
|
+
size_t n = len + 1;
|
3405
3424
|
char *p = malloc(n);
|
3406
|
-
if (p)
|
3425
|
+
if (p) {
|
3426
|
+
memcpy(p, s, len);
|
3427
|
+
p[len] = 0;
|
3428
|
+
}
|
3407
3429
|
return p;
|
3408
3430
|
}
|
3409
3431
|
|
3410
3432
|
// A type to represent the lookup key of either a strtable or an inttable.
|
3411
|
-
// This is like upb_tabkey, but can carry a size also to allow lookups of
|
3412
|
-
// non-NULL-terminated strings (we don't store string lengths in the table).
|
3413
3433
|
typedef struct {
|
3414
3434
|
upb_tabkey key;
|
3415
|
-
uint32_t len; // For string keys only.
|
3416
3435
|
} lookupkey_t;
|
3417
3436
|
|
3418
|
-
static lookupkey_t strkey(const char *str) {
|
3419
|
-
lookupkey_t k;
|
3420
|
-
k.key.str = (char*)str;
|
3421
|
-
k.len = strlen(str);
|
3422
|
-
return k;
|
3423
|
-
}
|
3424
|
-
|
3425
3437
|
static lookupkey_t strkey2(const char *str, size_t len) {
|
3426
3438
|
lookupkey_t k;
|
3427
|
-
k.key.str = (char*)str;
|
3428
|
-
k.
|
3439
|
+
k.key.s.str = (char*)str;
|
3440
|
+
k.key.s.length = len;
|
3429
3441
|
return k;
|
3430
3442
|
}
|
3431
3443
|
|
@@ -3607,11 +3619,12 @@ static size_t begin(const upb_table *t) {
|
|
3607
3619
|
// A simple "subclass" of upb_table that only adds a hash function for strings.
|
3608
3620
|
|
3609
3621
|
static uint32_t strhash(upb_tabkey key) {
|
3610
|
-
return MurmurHash2(key.str,
|
3622
|
+
return MurmurHash2(key.s.str, key.s.length, 0);
|
3611
3623
|
}
|
3612
3624
|
|
3613
3625
|
static bool streql(upb_tabkey k1, lookupkey_t k2) {
|
3614
|
-
return
|
3626
|
+
return k1.s.length == k2.key.s.length &&
|
3627
|
+
memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
|
3615
3628
|
}
|
3616
3629
|
|
3617
3630
|
bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
|
@@ -3620,7 +3633,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
|
|
3620
3633
|
|
3621
3634
|
void upb_strtable_uninit(upb_strtable *t) {
|
3622
3635
|
for (size_t i = 0; i < upb_table_size(&t->t); i++)
|
3623
|
-
free((void*)t->t.entries[i].key.str);
|
3636
|
+
free((void*)t->t.entries[i].key.s.str);
|
3624
3637
|
uninit(&t->t);
|
3625
3638
|
}
|
3626
3639
|
|
@@ -3631,26 +3644,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
|
|
3631
3644
|
upb_strtable_iter i;
|
3632
3645
|
upb_strtable_begin(&i, t);
|
3633
3646
|
for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
|
3634
|
-
|
3635
|
-
&new_table,
|
3647
|
+
upb_strtable_insert2(
|
3648
|
+
&new_table,
|
3649
|
+
upb_strtable_iter_key(&i),
|
3650
|
+
upb_strtable_iter_keylength(&i),
|
3651
|
+
upb_strtable_iter_value(&i));
|
3636
3652
|
}
|
3637
3653
|
upb_strtable_uninit(t);
|
3638
3654
|
*t = new_table;
|
3639
3655
|
return true;
|
3640
3656
|
}
|
3641
3657
|
|
3642
|
-
bool
|
3658
|
+
bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
|
3659
|
+
upb_value v) {
|
3643
3660
|
if (isfull(&t->t)) {
|
3644
3661
|
// Need to resize. New table of double the size, add old elements to it.
|
3645
3662
|
if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
|
3646
3663
|
return false;
|
3647
3664
|
}
|
3648
3665
|
}
|
3649
|
-
if ((k =
|
3666
|
+
if ((k = upb_strdup2(k, len)) == NULL) return false;
|
3650
3667
|
|
3651
|
-
lookupkey_t key =
|
3652
|
-
uint32_t hash = MurmurHash2(key.key.str, key.
|
3653
|
-
insert(&t->t,
|
3668
|
+
lookupkey_t key = strkey2(k, len);
|
3669
|
+
uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
|
3670
|
+
insert(&t->t, key, v, hash, &strhash, &streql);
|
3654
3671
|
return true;
|
3655
3672
|
}
|
3656
3673
|
|
@@ -3660,11 +3677,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
|
|
3660
3677
|
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
|
3661
3678
|
}
|
3662
3679
|
|
3663
|
-
bool
|
3680
|
+
bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
|
3681
|
+
upb_value *val) {
|
3664
3682
|
uint32_t hash = MurmurHash2(key, strlen(key), 0);
|
3665
3683
|
upb_tabkey tabkey;
|
3666
|
-
if (rm(&t->t,
|
3667
|
-
free((void*)tabkey.str);
|
3684
|
+
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
|
3685
|
+
free((void*)tabkey.s.str);
|
3668
3686
|
return true;
|
3669
3687
|
} else {
|
3670
3688
|
return false;
|
@@ -3693,7 +3711,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) {
|
|
3693
3711
|
|
3694
3712
|
const char *upb_strtable_iter_key(upb_strtable_iter *i) {
|
3695
3713
|
assert(!upb_strtable_done(i));
|
3696
|
-
return str_tabent(i)->key.str;
|
3714
|
+
return str_tabent(i)->key.s.str;
|
3715
|
+
}
|
3716
|
+
|
3717
|
+
size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
|
3718
|
+
assert(!upb_strtable_done(i));
|
3719
|
+
return str_tabent(i)->key.s.length;
|
3697
3720
|
}
|
3698
3721
|
|
3699
3722
|
upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
|
@@ -4209,8 +4232,10 @@ static void nullz(upb_status *status) {
|
|
4209
4232
|
}
|
4210
4233
|
|
4211
4234
|
void upb_status_clear(upb_status *status) {
|
4212
|
-
|
4213
|
-
|
4235
|
+
if (!status) return;
|
4236
|
+
status->ok_ = true;
|
4237
|
+
status->code_ = 0;
|
4238
|
+
status->msg[0] = '\0';
|
4214
4239
|
}
|
4215
4240
|
|
4216
4241
|
bool upb_ok(const upb_status *status) { return status->ok_; }
|
@@ -5977,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) {
|
|
5977
6002
|
case OP_SETDELIM:
|
5978
6003
|
case OP_HALT:
|
5979
6004
|
case OP_RET:
|
6005
|
+
case OP_DISPATCH:
|
5980
6006
|
put32(c, op);
|
5981
6007
|
break;
|
5982
6008
|
case OP_PARSE_DOUBLE:
|
@@ -6057,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
|
|
6057
6083
|
OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
|
6058
6084
|
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
|
6059
6085
|
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
|
6060
|
-
OP(SETBIGGROUPNUM), OP(HALT),
|
6086
|
+
OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
|
6061
6087
|
};
|
6062
6088
|
return op > OP_HALT ? names[0] : names[op];
|
6063
6089
|
#undef OP
|
@@ -6089,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
|
|
6089
6115
|
upb_handlers_msgdef(method->dest_handlers_)));
|
6090
6116
|
break;
|
6091
6117
|
}
|
6118
|
+
case OP_DISPATCH:
|
6092
6119
|
case OP_STARTMSG:
|
6093
6120
|
case OP_ENDMSG:
|
6094
6121
|
case OP_PUSHLENDELIM:
|
@@ -6434,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
|
|
6434
6461
|
putop(c, OP_SETDISPATCH, &method->dispatch);
|
6435
6462
|
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
|
6436
6463
|
label(c, LABEL_FIELD);
|
6464
|
+
uint32_t* start_pc = c->pc;
|
6437
6465
|
upb_msg_iter i;
|
6438
6466
|
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
|
6439
6467
|
const upb_fielddef *f = upb_msg_iter_field(&i);
|
@@ -6449,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
|
|
6449
6477
|
}
|
6450
6478
|
}
|
6451
6479
|
|
6480
|
+
// If there were no fields, or if no handlers were defined, we need to
|
6481
|
+
// generate a non-empty loop body so that we can at least dispatch for unknown
|
6482
|
+
// fields and check for the end of the message.
|
6483
|
+
if (c->pc == start_pc) {
|
6484
|
+
// Check for end-of-message.
|
6485
|
+
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
|
6486
|
+
// Unconditionally dispatch.
|
6487
|
+
putop(c, OP_DISPATCH, 0);
|
6488
|
+
}
|
6489
|
+
|
6452
6490
|
// For now we just loop back to the last field of the message (or if none,
|
6453
|
-
// the DISPATCH opcode for the message.
|
6491
|
+
// the DISPATCH opcode for the message).
|
6454
6492
|
putop(c, OP_BRANCH, -LABEL_FIELD);
|
6455
6493
|
|
6456
6494
|
// Insert both a label and a dispatch table entry for this end-of-msg.
|
@@ -7434,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
|
|
7434
7472
|
if (result == DECODE_MISMATCH) goto badtag;
|
7435
7473
|
if (result >= 0) return result;
|
7436
7474
|
})
|
7475
|
+
VMCASE(OP_DISPATCH, {
|
7476
|
+
CHECK_RETURN(dispatch(d));
|
7477
|
+
})
|
7437
7478
|
VMCASE(OP_HALT, {
|
7438
7479
|
return size;
|
7439
7480
|
})
|
@@ -7492,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
|
|
7492
7533
|
// Rewind from OP_TAG* to OP_CHECKDELIM.
|
7493
7534
|
assert(getop(*d->pc) == OP_TAG1 ||
|
7494
7535
|
getop(*d->pc) == OP_TAG2 ||
|
7495
|
-
getop(*d->pc) == OP_TAGN
|
7536
|
+
getop(*d->pc) == OP_TAGN ||
|
7537
|
+
getop(*d->pc == OP_DISPATCH));
|
7496
7538
|
d->pc = p;
|
7497
7539
|
}
|
7498
7540
|
upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
|
@@ -8627,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
|
|
8627
8669
|
|
8628
8670
|
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
|
8629
8671
|
|
8672
|
+
// Used to signal that a capture has been suspended.
|
8673
|
+
static char suspend_capture;
|
8674
|
+
|
8630
8675
|
static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
|
8631
8676
|
upb_handlertype_t type) {
|
8632
8677
|
upb_selector_t sel;
|
@@ -8640,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) {
|
|
8640
8685
|
p, upb_handlers_getprimitivehandlertype(p->top->f));
|
8641
8686
|
}
|
8642
8687
|
|
8643
|
-
static void start_member(upb_json_parser *p) {
|
8644
|
-
assert(!p->top->f);
|
8645
|
-
assert(!p->accumulated);
|
8646
|
-
p->accumulated_len = 0;
|
8647
|
-
}
|
8648
|
-
|
8649
|
-
static bool end_member(upb_json_parser *p) {
|
8650
|
-
// TODO(haberman): support keys that span buffers or have escape sequences.
|
8651
|
-
assert(!p->top->f);
|
8652
|
-
assert(p->accumulated);
|
8653
|
-
const upb_fielddef *f =
|
8654
|
-
upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
|
8655
|
-
|
8656
|
-
if (!f) {
|
8657
|
-
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
|
8658
|
-
upb_status_seterrf(p->status, "No such field: %.*s\n",
|
8659
|
-
(int)p->accumulated_len, p->accumulated);
|
8660
|
-
return false;
|
8661
|
-
}
|
8662
|
-
|
8663
|
-
p->top->f = f;
|
8664
|
-
p->accumulated = NULL;
|
8665
|
-
|
8666
|
-
return true;
|
8667
|
-
}
|
8668
|
-
|
8669
|
-
static void start_object(upb_json_parser *p) {
|
8670
|
-
upb_sink_startmsg(&p->top->sink);
|
8671
|
-
}
|
8672
|
-
|
8673
|
-
static void end_object(upb_json_parser *p) {
|
8674
|
-
upb_status status;
|
8675
|
-
upb_sink_endmsg(&p->top->sink, &status);
|
8676
|
-
}
|
8677
|
-
|
8678
8688
|
static bool check_stack(upb_json_parser *p) {
|
8679
8689
|
if ((p->top + 1) == p->limit) {
|
8680
8690
|
upb_status_seterrmsg(p->status, "Nesting too deep");
|
@@ -8684,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) {
|
|
8684
8694
|
return true;
|
8685
8695
|
}
|
8686
8696
|
|
8687
|
-
|
8688
|
-
|
8689
|
-
|
8690
|
-
if (!upb_fielddef_issubmsg(p->top->f)) {
|
8691
|
-
upb_status_seterrf(p->status,
|
8692
|
-
"Object specified for non-message/group field: %s",
|
8693
|
-
upb_fielddef_name(p->top->f));
|
8694
|
-
return false;
|
8695
|
-
}
|
8696
|
-
|
8697
|
-
if (!check_stack(p)) return false;
|
8698
|
-
|
8699
|
-
upb_jsonparser_frame *inner = p->top + 1;
|
8700
|
-
|
8701
|
-
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
|
8702
|
-
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
|
8703
|
-
inner->m = upb_fielddef_msgsubdef(p->top->f);
|
8704
|
-
inner->f = NULL;
|
8705
|
-
p->top = inner;
|
8697
|
+
// There are GCC/Clang built-ins for overflow checking which we could start
|
8698
|
+
// using if there was any performance benefit to it.
|
8706
8699
|
|
8700
|
+
static bool checked_add(size_t a, size_t b, size_t *c) {
|
8701
|
+
if (SIZE_MAX - a < b) return false;
|
8702
|
+
*c = a + b;
|
8707
8703
|
return true;
|
8708
8704
|
}
|
8709
8705
|
|
8710
|
-
static
|
8711
|
-
|
8712
|
-
|
8713
|
-
|
8714
|
-
|
8715
|
-
|
8716
|
-
static bool start_array(upb_json_parser *p) {
|
8717
|
-
assert(p->top->f);
|
8718
|
-
|
8719
|
-
if (!upb_fielddef_isseq(p->top->f)) {
|
8720
|
-
upb_status_seterrf(p->status,
|
8721
|
-
"Array specified for non-repeated field: %s",
|
8722
|
-
upb_fielddef_name(p->top->f));
|
8723
|
-
return false;
|
8706
|
+
static size_t saturating_multiply(size_t a, size_t b) {
|
8707
|
+
// size_t is unsigned, so this is defined behavior even on overflow.
|
8708
|
+
size_t ret = a * b;
|
8709
|
+
if (b != 0 && ret / b != a) {
|
8710
|
+
ret = SIZE_MAX;
|
8724
8711
|
}
|
8725
|
-
|
8726
|
-
if (!check_stack(p)) return false;
|
8727
|
-
|
8728
|
-
upb_jsonparser_frame *inner = p->top + 1;
|
8729
|
-
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
|
8730
|
-
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
|
8731
|
-
inner->m = p->top->m;
|
8732
|
-
inner->f = p->top->f;
|
8733
|
-
p->top = inner;
|
8734
|
-
|
8735
|
-
return true;
|
8736
|
-
}
|
8737
|
-
|
8738
|
-
static void end_array(upb_json_parser *p) {
|
8739
|
-
assert(p->top > p->stack);
|
8740
|
-
|
8741
|
-
p->top--;
|
8742
|
-
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
|
8743
|
-
upb_sink_endseq(&p->top->sink, sel);
|
8712
|
+
return ret;
|
8744
8713
|
}
|
8745
8714
|
|
8746
|
-
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
|
8747
|
-
|
8748
|
-
static bool parser_putbool(upb_json_parser *p, bool val) {
|
8749
|
-
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
|
8750
|
-
upb_status_seterrf(p->status,
|
8751
|
-
"Boolean value specified for non-bool field: %s",
|
8752
|
-
upb_fielddef_name(p->top->f));
|
8753
|
-
return false;
|
8754
|
-
}
|
8755
8715
|
|
8756
|
-
|
8757
|
-
UPB_ASSERT_VAR(ok, ok);
|
8758
|
-
return true;
|
8759
|
-
}
|
8716
|
+
/* Base64 decoding ************************************************************/
|
8760
8717
|
|
8761
|
-
|
8762
|
-
p->text_begin = ptr;
|
8763
|
-
}
|
8718
|
+
// TODO(haberman): make this streaming.
|
8764
8719
|
|
8765
8720
|
static const signed char b64table[] = {
|
8766
8721
|
-1, -1, -1, -1, -1, -1, -1, -1,
|
@@ -8880,193 +8835,274 @@ badpadding:
|
|
8880
8835
|
return false;
|
8881
8836
|
}
|
8882
8837
|
|
8883
|
-
static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
|
8884
|
-
assert(!p->accumulated); // TODO: handle this case.
|
8885
|
-
p->accumulated = p->text_begin;
|
8886
|
-
p->accumulated_len = ptr - p->text_begin;
|
8887
8838
|
|
8888
|
-
|
8889
|
-
// This is a string field (as opposed to a member name).
|
8890
|
-
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
|
8891
|
-
if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) {
|
8892
|
-
PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated,
|
8893
|
-
p->accumulated_len));
|
8894
|
-
} else {
|
8895
|
-
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
|
8896
|
-
}
|
8897
|
-
p->accumulated = NULL;
|
8898
|
-
} else if (p->top->f &&
|
8899
|
-
upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
|
8900
|
-
!is_num) {
|
8901
|
-
|
8902
|
-
// Enum case: resolve enum symbolic name to integer value.
|
8903
|
-
const upb_enumdef *enumdef =
|
8904
|
-
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
|
8905
|
-
|
8906
|
-
int32_t int_val = 0;
|
8907
|
-
if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
|
8908
|
-
&int_val)) {
|
8909
|
-
upb_selector_t sel = parser_getsel(p);
|
8910
|
-
upb_sink_putint32(&p->top->sink, sel, int_val);
|
8911
|
-
} else {
|
8912
|
-
upb_status_seterrmsg(p->status, "Enum value name unknown");
|
8913
|
-
return false;
|
8914
|
-
}
|
8915
|
-
p->accumulated = NULL;
|
8916
|
-
}
|
8839
|
+
/* Accumulate buffer **********************************************************/
|
8917
8840
|
|
8918
|
-
|
8841
|
+
// Functionality for accumulating a buffer.
|
8842
|
+
//
|
8843
|
+
// Some parts of the parser need an entire value as a contiguous string. For
|
8844
|
+
// example, to look up a member name in a hash table, or to turn a string into
|
8845
|
+
// a number, the relevant library routines need the input string to be in
|
8846
|
+
// contiguous memory, even if the value spanned two or more buffers in the
|
8847
|
+
// input. These routines handle that.
|
8848
|
+
//
|
8849
|
+
// In the common case we can just point to the input buffer to get this
|
8850
|
+
// contiguous string and avoid any actual copy. So we optimistically begin
|
8851
|
+
// this way. But there are a few cases where we must instead copy into a
|
8852
|
+
// separate buffer:
|
8853
|
+
//
|
8854
|
+
// 1. The string was not contiguous in the input (it spanned buffers).
|
8855
|
+
//
|
8856
|
+
// 2. The string included escape sequences that need to be interpreted to get
|
8857
|
+
// the true value in a contiguous buffer.
|
8858
|
+
|
8859
|
+
static void assert_accumulate_empty(upb_json_parser *p) {
|
8860
|
+
assert(p->accumulated == NULL);
|
8861
|
+
assert(p->accumulated_len == 0);
|
8919
8862
|
}
|
8920
8863
|
|
8921
|
-
static
|
8922
|
-
|
8864
|
+
static void accumulate_clear(upb_json_parser *p) {
|
8865
|
+
p->accumulated = NULL;
|
8866
|
+
p->accumulated_len = 0;
|
8867
|
+
}
|
8923
8868
|
|
8924
|
-
|
8925
|
-
|
8869
|
+
// Used internally by accumulate_append().
|
8870
|
+
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
|
8871
|
+
size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
|
8872
|
+
while (new_size < need) {
|
8873
|
+
new_size = saturating_multiply(new_size, 2);
|
8874
|
+
}
|
8926
8875
|
|
8927
|
-
|
8928
|
-
|
8929
|
-
|
8930
|
-
|
8931
|
-
|
8932
|
-
inner->m = p->top->m;
|
8933
|
-
inner->f = p->top->f;
|
8934
|
-
p->top = inner;
|
8876
|
+
void *mem = realloc(p->accumulate_buf, new_size);
|
8877
|
+
if (!mem) {
|
8878
|
+
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
|
8879
|
+
return false;
|
8880
|
+
}
|
8935
8881
|
|
8882
|
+
p->accumulate_buf = mem;
|
8883
|
+
p->accumulate_buf_size = new_size;
|
8884
|
+
return true;
|
8885
|
+
}
|
8886
|
+
|
8887
|
+
// Logically appends the given data to the append buffer.
|
8888
|
+
// If "can_alias" is true, we will try to avoid actually copying, but the buffer
|
8889
|
+
// must be valid until the next accumulate_append() call (if any).
|
8890
|
+
static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
|
8891
|
+
bool can_alias) {
|
8892
|
+
if (!p->accumulated && can_alias) {
|
8893
|
+
p->accumulated = buf;
|
8894
|
+
p->accumulated_len = len;
|
8936
8895
|
return true;
|
8937
|
-
}
|
8938
|
-
|
8939
|
-
|
8940
|
-
|
8941
|
-
|
8942
|
-
upb_status_seterrf(p->status,
|
8943
|
-
"String specified for non-string/non-enum field: %s",
|
8944
|
-
upb_fielddef_name(p->top->f));
|
8896
|
+
}
|
8897
|
+
|
8898
|
+
size_t need;
|
8899
|
+
if (!checked_add(p->accumulated_len, len, &need)) {
|
8900
|
+
upb_status_seterrmsg(p->status, "Integer overflow.");
|
8945
8901
|
return false;
|
8946
8902
|
}
|
8947
8903
|
|
8948
|
-
|
8904
|
+
if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
|
8905
|
+
return false;
|
8906
|
+
}
|
8949
8907
|
|
8950
|
-
|
8951
|
-
|
8952
|
-
|
8953
|
-
upb_sink_endstr(&p->top->sink, sel);
|
8954
|
-
p->top--;
|
8908
|
+
if (p->accumulated != p->accumulate_buf) {
|
8909
|
+
memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
|
8910
|
+
p->accumulated = p->accumulate_buf;
|
8955
8911
|
}
|
8912
|
+
|
8913
|
+
memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
|
8914
|
+
p->accumulated_len += len;
|
8915
|
+
return true;
|
8956
8916
|
}
|
8957
8917
|
|
8958
|
-
|
8959
|
-
|
8960
|
-
|
8918
|
+
// Returns a pointer to the data accumulated since the last accumulate_clear()
|
8919
|
+
// call, and writes the length to *len. This with point either to the input
|
8920
|
+
// buffer or a temporary accumulate buffer.
|
8921
|
+
static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
|
8922
|
+
assert(p->accumulated);
|
8923
|
+
*len = p->accumulated_len;
|
8924
|
+
return p->accumulated;
|
8961
8925
|
}
|
8962
8926
|
|
8963
|
-
static void end_number(upb_json_parser *p, const char *ptr) {
|
8964
|
-
end_text(p, ptr, true);
|
8965
|
-
const char *myend = p->accumulated + p->accumulated_len;
|
8966
|
-
char *end;
|
8967
8927
|
|
8968
|
-
|
8969
|
-
|
8970
|
-
|
8971
|
-
|
8972
|
-
|
8973
|
-
|
8974
|
-
|
8975
|
-
|
8976
|
-
|
8977
|
-
|
8978
|
-
|
8979
|
-
|
8980
|
-
|
8981
|
-
|
8982
|
-
|
8983
|
-
|
8984
|
-
|
8985
|
-
|
8986
|
-
|
8987
|
-
|
8988
|
-
|
8989
|
-
|
8990
|
-
|
8991
|
-
|
8992
|
-
|
8993
|
-
|
8994
|
-
|
8995
|
-
|
8996
|
-
|
8997
|
-
|
8998
|
-
|
8999
|
-
|
9000
|
-
|
9001
|
-
|
9002
|
-
|
9003
|
-
|
9004
|
-
|
9005
|
-
|
9006
|
-
|
9007
|
-
|
8928
|
+
/* Mult-part text data ********************************************************/
|
8929
|
+
|
8930
|
+
// When we have text data in the input, it can often come in multiple segments.
|
8931
|
+
// For example, there may be some raw string data followed by an escape
|
8932
|
+
// sequence. The two segments are processed with different logic. Also buffer
|
8933
|
+
// seams in the input can cause multiple segments.
|
8934
|
+
//
|
8935
|
+
// As we see segments, there are two main cases for how we want to process them:
|
8936
|
+
//
|
8937
|
+
// 1. we want to push the captured input directly to string handlers.
|
8938
|
+
//
|
8939
|
+
// 2. we need to accumulate all the parts into a contiguous buffer for further
|
8940
|
+
// processing (field name lookup, string->number conversion, etc).
|
8941
|
+
|
8942
|
+
// This is the set of states for p->multipart_state.
|
8943
|
+
enum {
|
8944
|
+
// We are not currently processing multipart data.
|
8945
|
+
MULTIPART_INACTIVE = 0,
|
8946
|
+
|
8947
|
+
// We are processing multipart data by accumulating it into a contiguous
|
8948
|
+
// buffer.
|
8949
|
+
MULTIPART_ACCUMULATE = 1,
|
8950
|
+
|
8951
|
+
// We are processing multipart data by pushing each part directly to the
|
8952
|
+
// current string handlers.
|
8953
|
+
MULTIPART_PUSHEAGERLY = 2
|
8954
|
+
};
|
8955
|
+
|
8956
|
+
// Start a multi-part text value where we accumulate the data for processing at
|
8957
|
+
// the end.
|
8958
|
+
static void multipart_startaccum(upb_json_parser *p) {
|
8959
|
+
assert_accumulate_empty(p);
|
8960
|
+
assert(p->multipart_state == MULTIPART_INACTIVE);
|
8961
|
+
p->multipart_state = MULTIPART_ACCUMULATE;
|
8962
|
+
}
|
8963
|
+
|
8964
|
+
// Start a multi-part text value where we immediately push text data to a string
|
8965
|
+
// value with the given selector.
|
8966
|
+
static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
|
8967
|
+
assert_accumulate_empty(p);
|
8968
|
+
assert(p->multipart_state == MULTIPART_INACTIVE);
|
8969
|
+
p->multipart_state = MULTIPART_PUSHEAGERLY;
|
8970
|
+
p->string_selector = sel;
|
8971
|
+
}
|
8972
|
+
|
8973
|
+
static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
|
8974
|
+
bool can_alias) {
|
8975
|
+
switch (p->multipart_state) {
|
8976
|
+
case MULTIPART_INACTIVE:
|
8977
|
+
upb_status_seterrmsg(
|
8978
|
+
p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
|
8979
|
+
return false;
|
8980
|
+
|
8981
|
+
case MULTIPART_ACCUMULATE:
|
8982
|
+
if (!accumulate_append(p, buf, len, can_alias)) {
|
8983
|
+
return false;
|
8984
|
+
}
|
9008
8985
|
break;
|
9009
|
-
|
9010
|
-
case
|
9011
|
-
|
9012
|
-
|
9013
|
-
assert(false);
|
9014
|
-
else
|
9015
|
-
upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
|
8986
|
+
|
8987
|
+
case MULTIPART_PUSHEAGERLY: {
|
8988
|
+
const upb_bufhandle *handle = can_alias ? p->handle : NULL;
|
8989
|
+
upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
|
9016
8990
|
break;
|
9017
8991
|
}
|
9018
|
-
default:
|
9019
|
-
assert(false);
|
9020
8992
|
}
|
9021
8993
|
|
9022
|
-
|
8994
|
+
return true;
|
9023
8995
|
}
|
9024
8996
|
|
9025
|
-
|
9026
|
-
|
9027
|
-
|
9028
|
-
|
9029
|
-
|
9030
|
-
|
9031
|
-
case 'b': return '\b';
|
9032
|
-
case '/': return '/';
|
9033
|
-
case '"': return '"';
|
9034
|
-
case '\\': return '\\';
|
9035
|
-
default:
|
9036
|
-
assert(0);
|
9037
|
-
return 'x';
|
9038
|
-
}
|
8997
|
+
// Note: this invalidates the accumulate buffer! Call only after reading its
|
8998
|
+
// contents.
|
8999
|
+
static void multipart_end(upb_json_parser *p) {
|
9000
|
+
assert(p->multipart_state != MULTIPART_INACTIVE);
|
9001
|
+
p->multipart_state = MULTIPART_INACTIVE;
|
9002
|
+
accumulate_clear(p);
|
9039
9003
|
}
|
9040
9004
|
|
9041
|
-
|
9042
|
-
|
9043
|
-
|
9044
|
-
|
9005
|
+
|
9006
|
+
/* Input capture **************************************************************/
|
9007
|
+
|
9008
|
+
// Functionality for capturing a region of the input as text. Gracefully
|
9009
|
+
// handles the case where a buffer seam occurs in the middle of the captured
|
9010
|
+
// region.
|
9011
|
+
|
9012
|
+
static void capture_begin(upb_json_parser *p, const char *ptr) {
|
9013
|
+
assert(p->multipart_state != MULTIPART_INACTIVE);
|
9014
|
+
assert(p->capture == NULL);
|
9015
|
+
p->capture = ptr;
|
9045
9016
|
}
|
9046
9017
|
|
9047
|
-
static
|
9048
|
-
|
9049
|
-
|
9050
|
-
|
9051
|
-
return
|
9018
|
+
static bool capture_end(upb_json_parser *p, const char *ptr) {
|
9019
|
+
assert(p->capture);
|
9020
|
+
if (multipart_text(p, p->capture, ptr - p->capture, true)) {
|
9021
|
+
p->capture = NULL;
|
9022
|
+
return true;
|
9052
9023
|
} else {
|
9053
|
-
|
9054
|
-
return ch - 'A' + 10;
|
9024
|
+
return false;
|
9055
9025
|
}
|
9056
9026
|
}
|
9057
9027
|
|
9058
|
-
|
9059
|
-
|
9028
|
+
// This is called at the end of each input buffer (ie. when we have hit a
|
9029
|
+
// buffer seam). If we are in the middle of capturing the input, this
|
9030
|
+
// processes the unprocessed capture region.
|
9031
|
+
static void capture_suspend(upb_json_parser *p, const char **ptr) {
|
9032
|
+
if (!p->capture) return;
|
9033
|
+
|
9034
|
+
if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
|
9035
|
+
// We use this as a signal that we were in the middle of capturing, and
|
9036
|
+
// that capturing should resume at the beginning of the next buffer.
|
9037
|
+
//
|
9038
|
+
// We can't use *ptr here, because we have no guarantee that this pointer
|
9039
|
+
// will be valid when we resume (if the underlying memory is freed, then
|
9040
|
+
// using the pointer at all, even to compare to NULL, is likely undefined
|
9041
|
+
// behavior).
|
9042
|
+
p->capture = &suspend_capture;
|
9043
|
+
} else {
|
9044
|
+
// Need to back up the pointer to the beginning of the capture, since
|
9045
|
+
// we were not able to actually preserve it.
|
9046
|
+
*ptr = p->capture;
|
9047
|
+
}
|
9048
|
+
}
|
9049
|
+
|
9050
|
+
static void capture_resume(upb_json_parser *p, const char *ptr) {
|
9051
|
+
if (p->capture) {
|
9052
|
+
assert(p->capture == &suspend_capture);
|
9053
|
+
p->capture = ptr;
|
9054
|
+
}
|
9055
|
+
}
|
9056
|
+
|
9057
|
+
|
9058
|
+
/* Callbacks from the parser **************************************************/
|
9059
|
+
|
9060
|
+
// These are the functions called directly from the parser itself.
|
9061
|
+
// We define these in the same order as their declarations in the parser.
|
9062
|
+
|
9063
|
+
static char escape_char(char in) {
|
9064
|
+
switch (in) {
|
9065
|
+
case 'r': return '\r';
|
9066
|
+
case 't': return '\t';
|
9067
|
+
case 'n': return '\n';
|
9068
|
+
case 'f': return '\f';
|
9069
|
+
case 'b': return '\b';
|
9070
|
+
case '/': return '/';
|
9071
|
+
case '"': return '"';
|
9072
|
+
case '\\': return '\\';
|
9073
|
+
default:
|
9074
|
+
assert(0);
|
9075
|
+
return 'x';
|
9076
|
+
}
|
9077
|
+
}
|
9078
|
+
|
9079
|
+
static bool escape(upb_json_parser *p, const char *ptr) {
|
9080
|
+
char ch = escape_char(*ptr);
|
9081
|
+
return multipart_text(p, &ch, 1, false);
|
9082
|
+
}
|
9083
|
+
|
9084
|
+
static void start_hex(upb_json_parser *p) {
|
9085
|
+
p->digit = 0;
|
9086
|
+
}
|
9087
|
+
|
9088
|
+
static void hexdigit(upb_json_parser *p, const char *ptr) {
|
9089
|
+
char ch = *ptr;
|
9090
|
+
|
9091
|
+
p->digit <<= 4;
|
9092
|
+
|
9093
|
+
if (ch >= '0' && ch <= '9') {
|
9094
|
+
p->digit += (ch - '0');
|
9095
|
+
} else if (ch >= 'a' && ch <= 'f') {
|
9096
|
+
p->digit += ((ch - 'a') + 10);
|
9097
|
+
} else {
|
9098
|
+
assert(ch >= 'A' && ch <= 'F');
|
9099
|
+
p->digit += ((ch - 'A') + 10);
|
9100
|
+
}
|
9060
9101
|
}
|
9061
9102
|
|
9062
|
-
static
|
9063
|
-
|
9064
|
-
|
9065
|
-
uint16_t codepoint =
|
9066
|
-
(hexdigit(start[0]) << 12) |
|
9067
|
-
(hexdigit(start[1]) << 8) |
|
9068
|
-
(hexdigit(start[2]) << 4) |
|
9069
|
-
hexdigit(start[3]);
|
9103
|
+
static bool end_hex(upb_json_parser *p) {
|
9104
|
+
uint32_t codepoint = p->digit;
|
9105
|
+
|
9070
9106
|
// emit the codepoint as UTF-8.
|
9071
9107
|
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
|
9072
9108
|
int length = 0;
|
@@ -9089,160 +9125,466 @@ static void hex(upb_json_parser *p, const char *end) {
|
|
9089
9125
|
// TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
|
9090
9126
|
// we have to wait for the next escape to get the full code point).
|
9091
9127
|
|
9092
|
-
|
9093
|
-
|
9128
|
+
return multipart_text(p, utf8, length, false);
|
9129
|
+
}
|
9130
|
+
|
9131
|
+
static void start_text(upb_json_parser *p, const char *ptr) {
|
9132
|
+
capture_begin(p, ptr);
|
9133
|
+
}
|
9134
|
+
|
9135
|
+
static bool end_text(upb_json_parser *p, const char *ptr) {
|
9136
|
+
return capture_end(p, ptr);
|
9137
|
+
}
|
9138
|
+
|
9139
|
+
static void start_number(upb_json_parser *p, const char *ptr) {
|
9140
|
+
multipart_startaccum(p);
|
9141
|
+
capture_begin(p, ptr);
|
9142
|
+
}
|
9143
|
+
|
9144
|
+
static bool end_number(upb_json_parser *p, const char *ptr) {
|
9145
|
+
if (!capture_end(p, ptr)) {
|
9146
|
+
return false;
|
9147
|
+
}
|
9148
|
+
|
9149
|
+
// strtol() and friends unfortunately do not support specifying the length of
|
9150
|
+
// the input string, so we need to force a copy into a NULL-terminated buffer.
|
9151
|
+
if (!multipart_text(p, "\0", 1, false)) {
|
9152
|
+
return false;
|
9153
|
+
}
|
9154
|
+
|
9155
|
+
size_t len;
|
9156
|
+
const char *buf = accumulate_getptr(p, &len);
|
9157
|
+
const char *myend = buf + len - 1; // One for NULL.
|
9158
|
+
char *end;
|
9159
|
+
|
9160
|
+
switch (upb_fielddef_type(p->top->f)) {
|
9161
|
+
case UPB_TYPE_ENUM:
|
9162
|
+
case UPB_TYPE_INT32: {
|
9163
|
+
long val = strtol(p->accumulated, &end, 0);
|
9164
|
+
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
|
9165
|
+
goto err;
|
9166
|
+
else
|
9167
|
+
upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
|
9168
|
+
break;
|
9169
|
+
}
|
9170
|
+
case UPB_TYPE_INT64: {
|
9171
|
+
long long val = strtoll(p->accumulated, &end, 0);
|
9172
|
+
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
|
9173
|
+
goto err;
|
9174
|
+
else
|
9175
|
+
upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
|
9176
|
+
break;
|
9177
|
+
}
|
9178
|
+
case UPB_TYPE_UINT32: {
|
9179
|
+
unsigned long val = strtoul(p->accumulated, &end, 0);
|
9180
|
+
if (val > UINT32_MAX || errno == ERANGE || end != myend)
|
9181
|
+
goto err;
|
9182
|
+
else
|
9183
|
+
upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
|
9184
|
+
break;
|
9185
|
+
}
|
9186
|
+
case UPB_TYPE_UINT64: {
|
9187
|
+
unsigned long long val = strtoull(p->accumulated, &end, 0);
|
9188
|
+
if (val > UINT64_MAX || errno == ERANGE || end != myend)
|
9189
|
+
goto err;
|
9190
|
+
else
|
9191
|
+
upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
|
9192
|
+
break;
|
9193
|
+
}
|
9194
|
+
case UPB_TYPE_DOUBLE: {
|
9195
|
+
double val = strtod(p->accumulated, &end);
|
9196
|
+
if (errno == ERANGE || end != myend)
|
9197
|
+
goto err;
|
9198
|
+
else
|
9199
|
+
upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
|
9200
|
+
break;
|
9201
|
+
}
|
9202
|
+
case UPB_TYPE_FLOAT: {
|
9203
|
+
float val = strtof(p->accumulated, &end);
|
9204
|
+
if (errno == ERANGE || end != myend)
|
9205
|
+
goto err;
|
9206
|
+
else
|
9207
|
+
upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
|
9208
|
+
break;
|
9209
|
+
}
|
9210
|
+
default:
|
9211
|
+
assert(false);
|
9212
|
+
}
|
9213
|
+
|
9214
|
+
multipart_end(p);
|
9215
|
+
return true;
|
9216
|
+
|
9217
|
+
err:
|
9218
|
+
upb_status_seterrf(p->status, "error parsing number: %s", buf);
|
9219
|
+
multipart_end(p);
|
9220
|
+
return false;
|
9221
|
+
}
|
9222
|
+
|
9223
|
+
static bool parser_putbool(upb_json_parser *p, bool val) {
|
9224
|
+
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
|
9225
|
+
upb_status_seterrf(p->status,
|
9226
|
+
"Boolean value specified for non-bool field: %s",
|
9227
|
+
upb_fielddef_name(p->top->f));
|
9228
|
+
return false;
|
9229
|
+
}
|
9230
|
+
|
9231
|
+
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
|
9232
|
+
UPB_ASSERT_VAR(ok, ok);
|
9233
|
+
return true;
|
9234
|
+
}
|
9235
|
+
|
9236
|
+
static bool start_stringval(upb_json_parser *p) {
|
9237
|
+
assert(p->top->f);
|
9238
|
+
|
9239
|
+
if (upb_fielddef_isstring(p->top->f)) {
|
9240
|
+
if (!check_stack(p)) return false;
|
9241
|
+
|
9242
|
+
// Start a new parser frame: parser frames correspond one-to-one with
|
9243
|
+
// handler frames, and string events occur in a sub-frame.
|
9244
|
+
upb_jsonparser_frame *inner = p->top + 1;
|
9245
|
+
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
|
9246
|
+
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
|
9247
|
+
inner->m = p->top->m;
|
9248
|
+
inner->f = p->top->f;
|
9249
|
+
p->top = inner;
|
9250
|
+
|
9251
|
+
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
|
9252
|
+
// For STRING fields we push data directly to the handlers as it is
|
9253
|
+
// parsed. We don't do this yet for BYTES fields, because our base64
|
9254
|
+
// decoder is not streaming.
|
9255
|
+
//
|
9256
|
+
// TODO(haberman): make base64 decoding streaming also.
|
9257
|
+
multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
|
9258
|
+
return true;
|
9259
|
+
} else {
|
9260
|
+
multipart_startaccum(p);
|
9261
|
+
return true;
|
9262
|
+
}
|
9263
|
+
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
|
9264
|
+
// No need to push a frame -- symbolic enum names in quotes remain in the
|
9265
|
+
// current parser frame.
|
9266
|
+
//
|
9267
|
+
// Enum string values must accumulate so we can look up the value in a table
|
9268
|
+
// once it is complete.
|
9269
|
+
multipart_startaccum(p);
|
9270
|
+
return true;
|
9271
|
+
} else {
|
9272
|
+
upb_status_seterrf(p->status,
|
9273
|
+
"String specified for non-string/non-enum field: %s",
|
9274
|
+
upb_fielddef_name(p->top->f));
|
9275
|
+
return false;
|
9276
|
+
}
|
9094
9277
|
}
|
9095
9278
|
|
9279
|
+
static bool end_stringval(upb_json_parser *p) {
|
9280
|
+
bool ok = true;
|
9281
|
+
|
9282
|
+
switch (upb_fielddef_type(p->top->f)) {
|
9283
|
+
case UPB_TYPE_BYTES:
|
9284
|
+
if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
|
9285
|
+
p->accumulated, p->accumulated_len)) {
|
9286
|
+
return false;
|
9287
|
+
}
|
9288
|
+
// Fall through.
|
9289
|
+
|
9290
|
+
case UPB_TYPE_STRING: {
|
9291
|
+
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
|
9292
|
+
upb_sink_endstr(&p->top->sink, sel);
|
9293
|
+
p->top--;
|
9294
|
+
break;
|
9295
|
+
}
|
9296
|
+
|
9297
|
+
case UPB_TYPE_ENUM: {
|
9298
|
+
// Resolve enum symbolic name to integer value.
|
9299
|
+
const upb_enumdef *enumdef =
|
9300
|
+
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
|
9301
|
+
|
9302
|
+
size_t len;
|
9303
|
+
const char *buf = accumulate_getptr(p, &len);
|
9304
|
+
|
9305
|
+
int32_t int_val = 0;
|
9306
|
+
ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
|
9307
|
+
|
9308
|
+
if (ok) {
|
9309
|
+
upb_selector_t sel = parser_getsel(p);
|
9310
|
+
upb_sink_putint32(&p->top->sink, sel, int_val);
|
9311
|
+
} else {
|
9312
|
+
upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
|
9313
|
+
}
|
9314
|
+
|
9315
|
+
break;
|
9316
|
+
}
|
9317
|
+
|
9318
|
+
default:
|
9319
|
+
assert(false);
|
9320
|
+
upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
|
9321
|
+
ok = false;
|
9322
|
+
break;
|
9323
|
+
}
|
9324
|
+
|
9325
|
+
multipart_end(p);
|
9326
|
+
return ok;
|
9327
|
+
}
|
9328
|
+
|
9329
|
+
static void start_member(upb_json_parser *p) {
|
9330
|
+
assert(!p->top->f);
|
9331
|
+
multipart_startaccum(p);
|
9332
|
+
}
|
9333
|
+
|
9334
|
+
static bool end_member(upb_json_parser *p) {
|
9335
|
+
assert(!p->top->f);
|
9336
|
+
size_t len;
|
9337
|
+
const char *buf = accumulate_getptr(p, &len);
|
9338
|
+
|
9339
|
+
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
|
9340
|
+
|
9341
|
+
if (!f) {
|
9342
|
+
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
|
9343
|
+
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
|
9344
|
+
return false;
|
9345
|
+
}
|
9346
|
+
|
9347
|
+
p->top->f = f;
|
9348
|
+
multipart_end(p);
|
9349
|
+
|
9350
|
+
return true;
|
9351
|
+
}
|
9352
|
+
|
9353
|
+
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
|
9354
|
+
|
9355
|
+
static bool start_subobject(upb_json_parser *p) {
|
9356
|
+
assert(p->top->f);
|
9357
|
+
|
9358
|
+
if (!upb_fielddef_issubmsg(p->top->f)) {
|
9359
|
+
upb_status_seterrf(p->status,
|
9360
|
+
"Object specified for non-message/group field: %s",
|
9361
|
+
upb_fielddef_name(p->top->f));
|
9362
|
+
return false;
|
9363
|
+
}
|
9364
|
+
|
9365
|
+
if (!check_stack(p)) return false;
|
9366
|
+
|
9367
|
+
upb_jsonparser_frame *inner = p->top + 1;
|
9368
|
+
|
9369
|
+
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
|
9370
|
+
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
|
9371
|
+
inner->m = upb_fielddef_msgsubdef(p->top->f);
|
9372
|
+
inner->f = NULL;
|
9373
|
+
p->top = inner;
|
9374
|
+
|
9375
|
+
return true;
|
9376
|
+
}
|
9377
|
+
|
9378
|
+
static void end_subobject(upb_json_parser *p) {
|
9379
|
+
p->top--;
|
9380
|
+
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
|
9381
|
+
upb_sink_endsubmsg(&p->top->sink, sel);
|
9382
|
+
}
|
9383
|
+
|
9384
|
+
static bool start_array(upb_json_parser *p) {
|
9385
|
+
assert(p->top->f);
|
9386
|
+
|
9387
|
+
if (!upb_fielddef_isseq(p->top->f)) {
|
9388
|
+
upb_status_seterrf(p->status,
|
9389
|
+
"Array specified for non-repeated field: %s",
|
9390
|
+
upb_fielddef_name(p->top->f));
|
9391
|
+
return false;
|
9392
|
+
}
|
9393
|
+
|
9394
|
+
if (!check_stack(p)) return false;
|
9395
|
+
|
9396
|
+
upb_jsonparser_frame *inner = p->top + 1;
|
9397
|
+
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
|
9398
|
+
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
|
9399
|
+
inner->m = p->top->m;
|
9400
|
+
inner->f = p->top->f;
|
9401
|
+
p->top = inner;
|
9402
|
+
|
9403
|
+
return true;
|
9404
|
+
}
|
9405
|
+
|
9406
|
+
static void end_array(upb_json_parser *p) {
|
9407
|
+
assert(p->top > p->stack);
|
9408
|
+
|
9409
|
+
p->top--;
|
9410
|
+
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
|
9411
|
+
upb_sink_endseq(&p->top->sink, sel);
|
9412
|
+
}
|
9413
|
+
|
9414
|
+
static void start_object(upb_json_parser *p) {
|
9415
|
+
upb_sink_startmsg(&p->top->sink);
|
9416
|
+
}
|
9417
|
+
|
9418
|
+
static void end_object(upb_json_parser *p) {
|
9419
|
+
upb_status status;
|
9420
|
+
upb_sink_endmsg(&p->top->sink, &status);
|
9421
|
+
}
|
9422
|
+
|
9423
|
+
|
9096
9424
|
#define CHECK_RETURN_TOP(x) if (!(x)) goto error
|
9097
9425
|
|
9426
|
+
|
9427
|
+
/* The actual parser **********************************************************/
|
9428
|
+
|
9098
9429
|
// What follows is the Ragel parser itself. The language is specified in Ragel
|
9099
9430
|
// and the actions call our C functions above.
|
9431
|
+
//
|
9432
|
+
// Ragel has an extensive set of functionality, and we use only a small part of
|
9433
|
+
// it. There are many action types but we only use a few:
|
9434
|
+
//
|
9435
|
+
// ">" -- transition into a machine
|
9436
|
+
// "%" -- transition out of a machine
|
9437
|
+
// "@" -- transition into a final state of a machine.
|
9438
|
+
//
|
9439
|
+
// "@" transitions are tricky because a machine can transition into a final
|
9440
|
+
// state repeatedly. But in some cases we know this can't happen, for example
|
9441
|
+
// a string which is delimited by a final '"' can only transition into its
|
9442
|
+
// final state once, when the closing '"' is seen.
|
9100
9443
|
|
9101
|
-
#line 596 "upb/json/parser.rl"
|
9102
9444
|
|
9445
|
+
#line 904 "upb/json/parser.rl"
|
9103
9446
|
|
9104
9447
|
|
9105
|
-
|
9448
|
+
|
9449
|
+
#line 816 "upb/json/parser.c"
|
9106
9450
|
static const char _json_actions[] = {
|
9107
9451
|
0, 1, 0, 1, 2, 1, 3, 1,
|
9108
|
-
|
9109
|
-
|
9110
|
-
|
9111
|
-
|
9112
|
-
5,
|
9113
|
-
|
9114
|
-
|
9115
|
-
|
9116
|
-
|
9452
|
+
5, 1, 6, 1, 7, 1, 8, 1,
|
9453
|
+
10, 1, 12, 1, 13, 1, 14, 1,
|
9454
|
+
15, 1, 16, 1, 17, 1, 21, 1,
|
9455
|
+
25, 1, 27, 2, 3, 8, 2, 4,
|
9456
|
+
5, 2, 6, 2, 2, 6, 8, 2,
|
9457
|
+
11, 9, 2, 13, 15, 2, 14, 15,
|
9458
|
+
2, 18, 1, 2, 19, 27, 2, 20,
|
9459
|
+
9, 2, 22, 27, 2, 23, 27, 2,
|
9460
|
+
24, 27, 2, 26, 27, 3, 14, 11,
|
9461
|
+
9
|
9117
9462
|
};
|
9118
9463
|
|
9119
9464
|
static const unsigned char _json_key_offsets[] = {
|
9120
|
-
0, 0, 4, 9, 14,
|
9121
|
-
|
9122
|
-
|
9123
|
-
|
9124
|
-
|
9125
|
-
|
9126
|
-
|
9127
|
-
|
9465
|
+
0, 0, 4, 9, 14, 15, 19, 24,
|
9466
|
+
29, 34, 38, 42, 45, 48, 50, 54,
|
9467
|
+
58, 60, 62, 67, 69, 71, 80, 86,
|
9468
|
+
92, 98, 104, 106, 115, 116, 116, 116,
|
9469
|
+
121, 126, 131, 132, 133, 134, 135, 135,
|
9470
|
+
136, 137, 138, 138, 139, 140, 141, 141,
|
9471
|
+
146, 151, 152, 156, 161, 166, 171, 175,
|
9472
|
+
175, 178, 178, 178
|
9128
9473
|
};
|
9129
9474
|
|
9130
9475
|
static const char _json_trans_keys[] = {
|
9131
9476
|
32, 123, 9, 13, 32, 34, 125, 9,
|
9132
|
-
13, 32, 34, 125, 9, 13,
|
9133
|
-
9, 13, 32,
|
9134
|
-
125, 9, 13, 32, 44, 125,
|
9135
|
-
|
9136
|
-
|
9137
|
-
|
9138
|
-
|
9139
|
-
|
9140
|
-
|
9141
|
-
|
9142
|
-
|
9143
|
-
|
9144
|
-
|
9145
|
-
|
9146
|
-
|
9147
|
-
|
9148
|
-
|
9149
|
-
|
9150
|
-
|
9151
|
-
13, 32,
|
9152
|
-
125, 9, 13, 32,
|
9153
|
-
|
9477
|
+
13, 32, 34, 125, 9, 13, 34, 32,
|
9478
|
+
58, 9, 13, 32, 93, 125, 9, 13,
|
9479
|
+
32, 44, 125, 9, 13, 32, 44, 125,
|
9480
|
+
9, 13, 32, 34, 9, 13, 45, 48,
|
9481
|
+
49, 57, 48, 49, 57, 46, 69, 101,
|
9482
|
+
48, 57, 69, 101, 48, 57, 43, 45,
|
9483
|
+
48, 57, 48, 57, 48, 57, 46, 69,
|
9484
|
+
101, 48, 57, 34, 92, 34, 92, 34,
|
9485
|
+
47, 92, 98, 102, 110, 114, 116, 117,
|
9486
|
+
48, 57, 65, 70, 97, 102, 48, 57,
|
9487
|
+
65, 70, 97, 102, 48, 57, 65, 70,
|
9488
|
+
97, 102, 48, 57, 65, 70, 97, 102,
|
9489
|
+
34, 92, 34, 45, 91, 102, 110, 116,
|
9490
|
+
123, 48, 57, 34, 32, 93, 125, 9,
|
9491
|
+
13, 32, 44, 93, 9, 13, 32, 93,
|
9492
|
+
125, 9, 13, 97, 108, 115, 101, 117,
|
9493
|
+
108, 108, 114, 117, 101, 32, 34, 125,
|
9494
|
+
9, 13, 32, 34, 125, 9, 13, 34,
|
9495
|
+
32, 58, 9, 13, 32, 93, 125, 9,
|
9496
|
+
13, 32, 44, 125, 9, 13, 32, 44,
|
9497
|
+
125, 9, 13, 32, 34, 9, 13, 32,
|
9498
|
+
9, 13, 0
|
9154
9499
|
};
|
9155
9500
|
|
9156
9501
|
static const char _json_single_lengths[] = {
|
9157
|
-
0, 2, 3, 3,
|
9502
|
+
0, 2, 3, 3, 1, 2, 3, 3,
|
9158
9503
|
3, 2, 2, 1, 3, 0, 2, 2,
|
9159
9504
|
0, 0, 3, 2, 2, 9, 0, 0,
|
9160
|
-
0, 0, 2, 7,
|
9161
|
-
3,
|
9505
|
+
0, 0, 2, 7, 1, 0, 0, 3,
|
9506
|
+
3, 3, 1, 1, 1, 1, 0, 1,
|
9162
9507
|
1, 1, 0, 1, 1, 1, 0, 3,
|
9163
|
-
3,
|
9508
|
+
3, 1, 2, 3, 3, 3, 2, 0,
|
9164
9509
|
1, 0, 0, 0
|
9165
9510
|
};
|
9166
9511
|
|
9167
9512
|
static const char _json_range_lengths[] = {
|
9168
|
-
0, 1, 1, 1,
|
9513
|
+
0, 1, 1, 1, 0, 1, 1, 1,
|
9169
9514
|
1, 1, 1, 1, 0, 1, 1, 1,
|
9170
9515
|
1, 1, 1, 0, 0, 0, 3, 3,
|
9171
|
-
3, 3, 0, 1, 0, 0,
|
9172
|
-
1,
|
9516
|
+
3, 3, 0, 1, 0, 0, 0, 1,
|
9517
|
+
1, 1, 0, 0, 0, 0, 0, 0,
|
9173
9518
|
0, 0, 0, 0, 0, 0, 0, 1,
|
9174
|
-
1,
|
9519
|
+
1, 0, 1, 1, 1, 1, 1, 0,
|
9175
9520
|
1, 0, 0, 0
|
9176
9521
|
};
|
9177
9522
|
|
9178
9523
|
static const short _json_index_offsets[] = {
|
9179
|
-
0, 0, 4, 9, 14,
|
9180
|
-
|
9181
|
-
62, 64,
|
9182
|
-
|
9183
|
-
127, 132,
|
9184
|
-
|
9185
|
-
|
9186
|
-
|
9524
|
+
0, 0, 4, 9, 14, 16, 20, 25,
|
9525
|
+
30, 35, 39, 43, 46, 50, 52, 56,
|
9526
|
+
60, 62, 64, 69, 72, 75, 85, 89,
|
9527
|
+
93, 97, 101, 104, 113, 115, 116, 117,
|
9528
|
+
122, 127, 132, 134, 136, 138, 140, 141,
|
9529
|
+
143, 145, 147, 148, 150, 152, 154, 155,
|
9530
|
+
160, 165, 167, 171, 176, 181, 186, 190,
|
9531
|
+
191, 194, 195, 196
|
9187
9532
|
};
|
9188
9533
|
|
9189
9534
|
static const char _json_indicies[] = {
|
9190
9535
|
0, 2, 0, 1, 3, 4, 5, 3,
|
9191
|
-
1, 6, 7, 8, 6, 1, 9,
|
9192
|
-
|
9193
|
-
|
9194
|
-
|
9195
|
-
|
9196
|
-
23,
|
9197
|
-
25,
|
9198
|
-
|
9199
|
-
|
9200
|
-
|
9201
|
-
36, 36, 1, 37, 37, 37,
|
9202
|
-
|
9203
|
-
42,
|
9204
|
-
|
9536
|
+
1, 6, 7, 8, 6, 1, 9, 1,
|
9537
|
+
10, 11, 10, 1, 11, 1, 1, 11,
|
9538
|
+
12, 13, 14, 15, 13, 1, 16, 17,
|
9539
|
+
8, 16, 1, 17, 7, 17, 1, 18,
|
9540
|
+
19, 20, 1, 19, 20, 1, 22, 23,
|
9541
|
+
23, 21, 24, 1, 23, 23, 24, 21,
|
9542
|
+
25, 25, 26, 1, 26, 1, 26, 21,
|
9543
|
+
22, 23, 23, 20, 21, 28, 29, 27,
|
9544
|
+
31, 32, 30, 33, 33, 33, 33, 33,
|
9545
|
+
33, 33, 33, 34, 1, 35, 35, 35,
|
9546
|
+
1, 36, 36, 36, 1, 37, 37, 37,
|
9547
|
+
1, 38, 38, 38, 1, 40, 41, 39,
|
9548
|
+
42, 43, 44, 45, 46, 47, 48, 43,
|
9549
|
+
1, 49, 1, 50, 51, 53, 54, 1,
|
9205
9550
|
53, 52, 55, 56, 54, 55, 1, 56,
|
9206
|
-
1, 1, 56, 52, 57, 58, 1,
|
9207
|
-
1, 60, 1, 61,
|
9208
|
-
64, 1, 65,
|
9209
|
-
1, 69,
|
9210
|
-
|
9211
|
-
77, 1,
|
9212
|
-
|
9213
|
-
|
9214
|
-
|
9215
|
-
0
|
9551
|
+
1, 1, 56, 52, 57, 1, 58, 1,
|
9552
|
+
59, 1, 60, 1, 61, 62, 1, 63,
|
9553
|
+
1, 64, 1, 65, 66, 1, 67, 1,
|
9554
|
+
68, 1, 69, 70, 71, 72, 70, 1,
|
9555
|
+
73, 74, 75, 73, 1, 76, 1, 77,
|
9556
|
+
78, 77, 1, 78, 1, 1, 78, 79,
|
9557
|
+
80, 81, 82, 80, 1, 83, 84, 75,
|
9558
|
+
83, 1, 84, 74, 84, 1, 85, 86,
|
9559
|
+
86, 1, 1, 1, 1, 0
|
9216
9560
|
};
|
9217
9561
|
|
9218
9562
|
static const char _json_trans_targs[] = {
|
9219
9563
|
1, 0, 2, 3, 4, 56, 3, 4,
|
9220
|
-
56, 5,
|
9221
|
-
|
9222
|
-
|
9223
|
-
|
9224
|
-
|
9225
|
-
|
9226
|
-
|
9227
|
-
|
9228
|
-
|
9229
|
-
|
9230
|
-
56
|
9564
|
+
56, 5, 5, 6, 7, 8, 9, 56,
|
9565
|
+
8, 9, 11, 12, 18, 57, 13, 15,
|
9566
|
+
14, 16, 17, 20, 58, 21, 20, 58,
|
9567
|
+
21, 19, 22, 23, 24, 25, 26, 20,
|
9568
|
+
58, 21, 28, 30, 31, 34, 39, 43,
|
9569
|
+
47, 29, 59, 59, 32, 31, 29, 32,
|
9570
|
+
33, 35, 36, 37, 38, 59, 40, 41,
|
9571
|
+
42, 59, 44, 45, 46, 59, 48, 49,
|
9572
|
+
55, 48, 49, 55, 50, 50, 51, 52,
|
9573
|
+
53, 54, 55, 53, 54, 59, 56
|
9231
9574
|
};
|
9232
9575
|
|
9233
9576
|
static const char _json_trans_actions[] = {
|
9234
|
-
0, 0, 0, 21,
|
9235
|
-
23, 17,
|
9236
|
-
|
9237
|
-
0, 0, 0,
|
9238
|
-
|
9239
|
-
|
9240
|
-
|
9241
|
-
0,
|
9242
|
-
0,
|
9243
|
-
|
9244
|
-
|
9245
|
-
0
|
9577
|
+
0, 0, 0, 21, 77, 53, 0, 47,
|
9578
|
+
23, 17, 0, 0, 15, 19, 19, 50,
|
9579
|
+
0, 0, 0, 0, 0, 1, 0, 0,
|
9580
|
+
0, 0, 0, 3, 13, 0, 0, 35,
|
9581
|
+
5, 11, 0, 38, 7, 7, 7, 41,
|
9582
|
+
44, 9, 62, 56, 25, 0, 0, 0,
|
9583
|
+
31, 29, 33, 59, 15, 0, 27, 0,
|
9584
|
+
0, 0, 0, 0, 0, 68, 0, 0,
|
9585
|
+
0, 71, 0, 0, 0, 65, 21, 77,
|
9586
|
+
53, 0, 47, 23, 17, 0, 0, 15,
|
9587
|
+
19, 19, 50, 0, 0, 74, 0
|
9246
9588
|
};
|
9247
9589
|
|
9248
9590
|
static const int json_start = 1;
|
@@ -9255,13 +9597,14 @@ static const int json_en_value_machine = 27;
|
|
9255
9597
|
static const int json_en_main = 1;
|
9256
9598
|
|
9257
9599
|
|
9258
|
-
#line
|
9600
|
+
#line 907 "upb/json/parser.rl"
|
9259
9601
|
|
9260
9602
|
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
|
9261
9603
|
const upb_bufhandle *handle) {
|
9262
9604
|
UPB_UNUSED(hd);
|
9263
9605
|
UPB_UNUSED(handle);
|
9264
9606
|
upb_json_parser *parser = closure;
|
9607
|
+
parser->handle = handle;
|
9265
9608
|
|
9266
9609
|
// Variables used by Ragel's generated code.
|
9267
9610
|
int cs = parser->current_state;
|
@@ -9271,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
|
|
9271
9614
|
const char *p = buf;
|
9272
9615
|
const char *pe = buf + size;
|
9273
9616
|
|
9617
|
+
capture_resume(parser, buf);
|
9618
|
+
|
9274
9619
|
|
9275
|
-
#line
|
9620
|
+
#line 987 "upb/json/parser.c"
|
9276
9621
|
{
|
9277
9622
|
int _klen;
|
9278
9623
|
unsigned int _trans;
|
@@ -9347,114 +9692,118 @@ _match:
|
|
9347
9692
|
switch ( *_acts++ )
|
9348
9693
|
{
|
9349
9694
|
case 0:
|
9350
|
-
#line
|
9695
|
+
#line 819 "upb/json/parser.rl"
|
9351
9696
|
{ p--; {cs = stack[--top]; goto _again;} }
|
9352
9697
|
break;
|
9353
9698
|
case 1:
|
9354
|
-
#line
|
9699
|
+
#line 820 "upb/json/parser.rl"
|
9355
9700
|
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
|
9356
9701
|
break;
|
9357
9702
|
case 2:
|
9358
|
-
#line
|
9703
|
+
#line 824 "upb/json/parser.rl"
|
9359
9704
|
{ start_text(parser, p); }
|
9360
9705
|
break;
|
9361
9706
|
case 3:
|
9362
|
-
#line
|
9363
|
-
{ CHECK_RETURN_TOP(end_text(parser, p
|
9707
|
+
#line 825 "upb/json/parser.rl"
|
9708
|
+
{ CHECK_RETURN_TOP(end_text(parser, p)); }
|
9364
9709
|
break;
|
9365
9710
|
case 4:
|
9366
|
-
#line
|
9367
|
-
{ start_hex(parser
|
9711
|
+
#line 831 "upb/json/parser.rl"
|
9712
|
+
{ start_hex(parser); }
|
9368
9713
|
break;
|
9369
9714
|
case 5:
|
9370
|
-
#line
|
9371
|
-
{
|
9715
|
+
#line 832 "upb/json/parser.rl"
|
9716
|
+
{ hexdigit(parser, p); }
|
9372
9717
|
break;
|
9373
9718
|
case 6:
|
9374
|
-
#line
|
9375
|
-
{
|
9719
|
+
#line 833 "upb/json/parser.rl"
|
9720
|
+
{ CHECK_RETURN_TOP(end_hex(parser)); }
|
9376
9721
|
break;
|
9377
9722
|
case 7:
|
9378
|
-
#line
|
9379
|
-
{
|
9723
|
+
#line 839 "upb/json/parser.rl"
|
9724
|
+
{ CHECK_RETURN_TOP(escape(parser, p)); }
|
9380
9725
|
break;
|
9381
9726
|
case 8:
|
9382
|
-
#line
|
9383
|
-
{ {stack[top
|
9727
|
+
#line 845 "upb/json/parser.rl"
|
9728
|
+
{ p--; {cs = stack[--top]; goto _again;} }
|
9384
9729
|
break;
|
9385
9730
|
case 9:
|
9386
|
-
#line
|
9387
|
-
{
|
9731
|
+
#line 848 "upb/json/parser.rl"
|
9732
|
+
{ {stack[top++] = cs; cs = 19; goto _again;} }
|
9388
9733
|
break;
|
9389
9734
|
case 10:
|
9390
|
-
#line
|
9391
|
-
{
|
9735
|
+
#line 850 "upb/json/parser.rl"
|
9736
|
+
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
|
9392
9737
|
break;
|
9393
9738
|
case 11:
|
9394
|
-
#line
|
9395
|
-
{
|
9739
|
+
#line 855 "upb/json/parser.rl"
|
9740
|
+
{ start_member(parser); }
|
9396
9741
|
break;
|
9397
9742
|
case 12:
|
9398
|
-
#line
|
9399
|
-
{
|
9743
|
+
#line 856 "upb/json/parser.rl"
|
9744
|
+
{ CHECK_RETURN_TOP(end_member(parser)); }
|
9400
9745
|
break;
|
9401
9746
|
case 13:
|
9402
|
-
#line
|
9403
|
-
{
|
9747
|
+
#line 859 "upb/json/parser.rl"
|
9748
|
+
{ clear_member(parser); }
|
9404
9749
|
break;
|
9405
9750
|
case 14:
|
9406
|
-
#line
|
9407
|
-
{
|
9751
|
+
#line 865 "upb/json/parser.rl"
|
9752
|
+
{ start_object(parser); }
|
9408
9753
|
break;
|
9409
9754
|
case 15:
|
9410
|
-
#line
|
9411
|
-
{
|
9755
|
+
#line 868 "upb/json/parser.rl"
|
9756
|
+
{ end_object(parser); }
|
9412
9757
|
break;
|
9413
9758
|
case 16:
|
9414
|
-
#line
|
9415
|
-
{
|
9759
|
+
#line 874 "upb/json/parser.rl"
|
9760
|
+
{ CHECK_RETURN_TOP(start_array(parser)); }
|
9416
9761
|
break;
|
9417
9762
|
case 17:
|
9418
|
-
#line
|
9419
|
-
{
|
9763
|
+
#line 878 "upb/json/parser.rl"
|
9764
|
+
{ end_array(parser); }
|
9420
9765
|
break;
|
9421
9766
|
case 18:
|
9422
|
-
#line
|
9423
|
-
{
|
9767
|
+
#line 883 "upb/json/parser.rl"
|
9768
|
+
{ start_number(parser, p); }
|
9424
9769
|
break;
|
9425
9770
|
case 19:
|
9426
|
-
#line
|
9427
|
-
{ CHECK_RETURN_TOP(
|
9771
|
+
#line 884 "upb/json/parser.rl"
|
9772
|
+
{ CHECK_RETURN_TOP(end_number(parser, p)); }
|
9428
9773
|
break;
|
9429
9774
|
case 20:
|
9430
|
-
#line
|
9431
|
-
{
|
9775
|
+
#line 886 "upb/json/parser.rl"
|
9776
|
+
{ CHECK_RETURN_TOP(start_stringval(parser)); }
|
9432
9777
|
break;
|
9433
9778
|
case 21:
|
9434
|
-
#line
|
9435
|
-
{ CHECK_RETURN_TOP(
|
9779
|
+
#line 887 "upb/json/parser.rl"
|
9780
|
+
{ CHECK_RETURN_TOP(end_stringval(parser)); }
|
9436
9781
|
break;
|
9437
9782
|
case 22:
|
9438
|
-
#line
|
9439
|
-
{ CHECK_RETURN_TOP(parser_putbool(parser,
|
9783
|
+
#line 889 "upb/json/parser.rl"
|
9784
|
+
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
|
9440
9785
|
break;
|
9441
9786
|
case 23:
|
9442
|
-
#line
|
9443
|
-
{
|
9787
|
+
#line 891 "upb/json/parser.rl"
|
9788
|
+
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
|
9444
9789
|
break;
|
9445
9790
|
case 24:
|
9446
|
-
#line
|
9447
|
-
{
|
9791
|
+
#line 893 "upb/json/parser.rl"
|
9792
|
+
{ /* null value */ }
|
9448
9793
|
break;
|
9449
9794
|
case 25:
|
9450
|
-
#line
|
9451
|
-
{
|
9795
|
+
#line 895 "upb/json/parser.rl"
|
9796
|
+
{ CHECK_RETURN_TOP(start_subobject(parser)); }
|
9452
9797
|
break;
|
9453
9798
|
case 26:
|
9454
|
-
#line
|
9799
|
+
#line 896 "upb/json/parser.rl"
|
9800
|
+
{ end_subobject(parser); }
|
9801
|
+
break;
|
9802
|
+
case 27:
|
9803
|
+
#line 901 "upb/json/parser.rl"
|
9455
9804
|
{ p--; {cs = stack[--top]; goto _again;} }
|
9456
9805
|
break;
|
9457
|
-
#line
|
9806
|
+
#line 1173 "upb/json/parser.c"
|
9458
9807
|
}
|
9459
9808
|
}
|
9460
9809
|
|
@@ -9467,10 +9816,12 @@ _again:
|
|
9467
9816
|
_out: {}
|
9468
9817
|
}
|
9469
9818
|
|
9470
|
-
#line
|
9819
|
+
#line 926 "upb/json/parser.rl"
|
9471
9820
|
|
9472
9821
|
if (p != pe) {
|
9473
9822
|
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
|
9823
|
+
} else {
|
9824
|
+
capture_suspend(parser, &p);
|
9474
9825
|
}
|
9475
9826
|
|
9476
9827
|
error:
|
@@ -9487,8 +9838,13 @@ bool end(void *closure, const void *hd) {
|
|
9487
9838
|
return true;
|
9488
9839
|
}
|
9489
9840
|
|
9841
|
+
|
9842
|
+
/* Public API *****************************************************************/
|
9843
|
+
|
9490
9844
|
void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
|
9491
9845
|
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
|
9846
|
+
p->accumulate_buf = NULL;
|
9847
|
+
p->accumulate_buf_size = 0;
|
9492
9848
|
upb_byteshandler_init(&p->input_handler_);
|
9493
9849
|
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
|
9494
9850
|
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
|
@@ -9498,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
|
|
9498
9854
|
|
9499
9855
|
void upb_json_parser_uninit(upb_json_parser *p) {
|
9500
9856
|
upb_byteshandler_uninit(&p->input_handler_);
|
9857
|
+
free(p->accumulate_buf);
|
9501
9858
|
}
|
9502
9859
|
|
9503
9860
|
void upb_json_parser_reset(upb_json_parser *p) {
|
@@ -9508,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) {
|
|
9508
9865
|
int top;
|
9509
9866
|
// Emit Ragel initialization of the parser.
|
9510
9867
|
|
9511
|
-
#line
|
9868
|
+
#line 1235 "upb/json/parser.c"
|
9512
9869
|
{
|
9513
9870
|
cs = json_start;
|
9514
9871
|
top = 0;
|
9515
9872
|
}
|
9516
9873
|
|
9517
|
-
#line
|
9874
|
+
#line 974 "upb/json/parser.rl"
|
9518
9875
|
p->current_state = cs;
|
9519
9876
|
p->parser_top = top;
|
9520
|
-
p
|
9521
|
-
p->
|
9522
|
-
p->
|
9877
|
+
accumulate_clear(p);
|
9878
|
+
p->multipart_state = MULTIPART_INACTIVE;
|
9879
|
+
p->capture = NULL;
|
9523
9880
|
}
|
9524
9881
|
|
9525
9882
|
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
|