google-protobuf 3.0.0.alpha.2.0 → 3.0.0.alpha.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -161,8 +161,6 @@ extern VALUE cOneofBuilderContext;
161
161
  extern VALUE cEnumBuilderContext;
162
162
  extern VALUE cBuilder;
163
163
 
164
- extern const char* kDescriptorInstanceVar;
165
-
166
164
  // We forward-declare all of the Ruby method implementations here because we
167
165
  // sometimes call the methods directly across .c files, rather than going
168
166
  // through Ruby's method dispatching (e.g. during message parse). It's cleaner
@@ -361,19 +359,20 @@ extern VALUE cRepeatedField;
361
359
  RepeatedField* ruby_to_RepeatedField(VALUE value);
362
360
 
363
361
  VALUE RepeatedField_each(VALUE _self);
364
- VALUE RepeatedField_index(VALUE _self, VALUE _index);
362
+ VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self);
365
363
  void* RepeatedField_index_native(VALUE _self, int index);
366
364
  VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val);
367
365
  void RepeatedField_reserve(RepeatedField* self, int new_size);
368
366
  VALUE RepeatedField_push(VALUE _self, VALUE val);
369
367
  void RepeatedField_push_native(VALUE _self, void* data);
370
- VALUE RepeatedField_pop(VALUE _self);
368
+ VALUE RepeatedField_pop_one(VALUE _self);
371
369
  VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self);
372
370
  VALUE RepeatedField_replace(VALUE _self, VALUE list);
373
371
  VALUE RepeatedField_clear(VALUE _self);
374
372
  VALUE RepeatedField_length(VALUE _self);
375
373
  VALUE RepeatedField_dup(VALUE _self);
376
374
  VALUE RepeatedField_deep_copy(VALUE _self);
375
+ VALUE RepeatedField_to_ary(VALUE _self);
377
376
  VALUE RepeatedField_eq(VALUE _self, VALUE _other);
378
377
  VALUE RepeatedField_hash(VALUE _self);
379
378
  VALUE RepeatedField_inspect(VALUE _self);
@@ -497,11 +496,6 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb);
497
496
  VALUE Message_decode_json(VALUE klass, VALUE data);
498
497
  VALUE Message_encode_json(VALUE klass, VALUE msg_rb);
499
498
 
500
- VALUE Google_Protobuf_encode(VALUE self, VALUE msg_rb);
501
- VALUE Google_Protobuf_decode(VALUE self, VALUE klass, VALUE msg_rb);
502
- VALUE Google_Protobuf_encode_json(VALUE self, VALUE msg_rb);
503
- VALUE Google_Protobuf_decode_json(VALUE self, VALUE klass, VALUE msg_rb);
504
-
505
499
  VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj);
506
500
 
507
501
  VALUE build_module_from_enumdesc(EnumDescriptor* enumdef);
@@ -511,6 +505,10 @@ VALUE enum_resolve(VALUE self, VALUE sym);
511
505
  const upb_pbdecodermethod *new_fillmsg_decodermethod(
512
506
  Descriptor* descriptor, const void *owner);
513
507
 
508
+ // Maximum depth allowed during encoding, to avoid stack overflows due to
509
+ // cycles.
510
+ #define ENCODE_MAX_NESTING 63
511
+
514
512
  // -----------------------------------------------------------------------------
515
513
  // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
516
514
  // instances.
@@ -530,4 +528,6 @@ void check_upb_status(const upb_status* status, const char* msg);
530
528
  check_upb_status(&status, msg); \
531
529
  } while (0)
532
530
 
531
+ extern ID descriptor_instancevar_interned;
532
+
533
533
  #endif // __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__
@@ -47,6 +47,30 @@ RepeatedField* ruby_to_RepeatedField(VALUE _self) {
47
47
  return self;
48
48
  }
49
49
 
50
+ static int index_position(VALUE _index, RepeatedField* repeated_field) {
51
+ int index = NUM2INT(_index);
52
+ if (index < 0 && repeated_field->size > 0) {
53
+ index = repeated_field->size + index;
54
+ }
55
+ return index;
56
+ }
57
+
58
+ VALUE RepeatedField_subarray(VALUE _self, long beg, long len) {
59
+ RepeatedField* self = ruby_to_RepeatedField(_self);
60
+ int element_size = native_slot_size(self->field_type);
61
+ upb_fieldtype_t field_type = self->field_type;
62
+ VALUE field_type_class = self->field_type_class;
63
+
64
+ size_t off = beg * element_size;
65
+ VALUE ary = rb_ary_new2(len);
66
+ for (int i = beg; i < beg + len; i++, off += element_size) {
67
+ void* mem = ((uint8_t *)self->elements) + off;
68
+ VALUE elem = native_slot_get(field_type, field_type_class, mem);
69
+ rb_ary_push(ary, elem);
70
+ }
71
+ return ary;
72
+ }
73
+
50
74
  /*
51
75
  * call-seq:
52
76
  * RepeatedField.each(&block)
@@ -67,29 +91,58 @@ VALUE RepeatedField_each(VALUE _self) {
67
91
  VALUE val = native_slot_get(field_type, field_type_class, memory);
68
92
  rb_yield(val);
69
93
  }
70
- return Qnil;
94
+ return _self;
71
95
  }
72
96
 
97
+
73
98
  /*
74
99
  * call-seq:
75
100
  * RepeatedField.[](index) => value
76
101
  *
77
- * Accesses the element at the given index. Throws an exception on out-of-bounds
78
- * errors.
102
+ * Accesses the element at the given index. Returns nil on out-of-bounds
79
103
  */
80
- VALUE RepeatedField_index(VALUE _self, VALUE _index) {
104
+ VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self) {
81
105
  RepeatedField* self = ruby_to_RepeatedField(_self);
82
106
  int element_size = native_slot_size(self->field_type);
83
107
  upb_fieldtype_t field_type = self->field_type;
84
108
  VALUE field_type_class = self->field_type_class;
85
109
 
86
- int index = NUM2INT(_index);
87
- if (index < 0 || index >= self->size) {
88
- rb_raise(rb_eRangeError, "Index out of range");
110
+ VALUE arg = argv[0];
111
+ long beg, len;
112
+
113
+ if (argc == 1){
114
+ if (FIXNUM_P(arg)) {
115
+ /* standard case */
116
+ int index = index_position(argv[0], self);
117
+ if (index < 0 || index >= self->size) {
118
+ return Qnil;
119
+ }
120
+ void* memory = (void *) (((uint8_t *)self->elements) +
121
+ index * element_size);
122
+ return native_slot_get(field_type, field_type_class, memory);
123
+ }else{
124
+ /* check if idx is Range */
125
+ size_t off;
126
+ switch (rb_range_beg_len(arg, &beg, &len, self->size, 0)) {
127
+ case Qfalse:
128
+ break;
129
+ case Qnil:
130
+ return Qnil;
131
+ default:
132
+ return RepeatedField_subarray(_self, beg, len);
133
+ }
134
+ }
89
135
  }
90
-
91
- void* memory = (void *) (((uint8_t *)self->elements) + index * element_size);
92
- return native_slot_get(field_type, field_type_class, memory);
136
+ /* assume 2 arguments */
137
+ beg = NUM2LONG(argv[0]);
138
+ len = NUM2LONG(argv[1]);
139
+ if (beg < 0) {
140
+ beg += self->size;
141
+ }
142
+ if (beg >= self->size) {
143
+ return Qnil;
144
+ }
145
+ return RepeatedField_subarray(_self, beg, len);
93
146
  }
94
147
 
95
148
  /*
@@ -105,9 +158,9 @@ VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val) {
105
158
  VALUE field_type_class = self->field_type_class;
106
159
  int element_size = native_slot_size(field_type);
107
160
 
108
- int index = NUM2INT(_index);
161
+ int index = index_position(_index, self);
109
162
  if (index < 0 || index >= (INT_MAX - 1)) {
110
- rb_raise(rb_eRangeError, "Index out of range");
163
+ return Qnil;
111
164
  }
112
165
  if (index >= self->size) {
113
166
  RepeatedField_reserve(self, index + 1);
@@ -165,6 +218,7 @@ VALUE RepeatedField_push(VALUE _self, VALUE val) {
165
218
  return _self;
166
219
  }
167
220
 
221
+
168
222
  // Used by parsing handlers.
169
223
  void RepeatedField_push_native(VALUE _self, void* data) {
170
224
  RepeatedField* self = ruby_to_RepeatedField(_self);
@@ -185,19 +239,15 @@ void* RepeatedField_index_native(VALUE _self, int index) {
185
239
  }
186
240
 
187
241
  /*
188
- * call-seq:
189
- * RepeatedField.pop => value
190
- *
191
- * Removes the last element and returns it. Throws an exception if the repeated
192
- * field is empty.
242
+ * Private ruby method, used by RepeatedField.pop
193
243
  */
194
- VALUE RepeatedField_pop(VALUE _self) {
244
+ VALUE RepeatedField_pop_one(VALUE _self) {
195
245
  RepeatedField* self = ruby_to_RepeatedField(_self);
196
246
  upb_fieldtype_t field_type = self->field_type;
197
247
  VALUE field_type_class = self->field_type_class;
198
248
  int element_size = native_slot_size(field_type);
199
249
  if (self->size == 0) {
200
- rb_raise(rb_eRangeError, "Pop from empty repeated field is not allowed.");
250
+ return Qnil;
201
251
  }
202
252
  int index = self->size - 1;
203
253
  void* memory = (void *) (((uint8_t *)self->elements) + index * element_size);
@@ -206,19 +256,6 @@ VALUE RepeatedField_pop(VALUE _self) {
206
256
  return ret;
207
257
  }
208
258
 
209
- /*
210
- * call-seq:
211
- * RepeatedField.insert(*args)
212
- *
213
- * Pushes each arg in turn onto the end of the repeated field.
214
- */
215
- VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self) {
216
- for (int i = 0; i < argc; i++) {
217
- RepeatedField_push(_self, argv[i]);
218
- }
219
- return Qnil;
220
- }
221
-
222
259
  /*
223
260
  * call-seq:
224
261
  * RepeatedField.replace(list)
@@ -232,7 +269,7 @@ VALUE RepeatedField_replace(VALUE _self, VALUE list) {
232
269
  for (int i = 0; i < RARRAY_LEN(list); i++) {
233
270
  RepeatedField_push(_self, rb_ary_entry(list, i));
234
271
  }
235
- return Qnil;
272
+ return list;
236
273
  }
237
274
 
238
275
  /*
@@ -244,7 +281,7 @@ VALUE RepeatedField_replace(VALUE _self, VALUE list) {
244
281
  VALUE RepeatedField_clear(VALUE _self) {
245
282
  RepeatedField* self = ruby_to_RepeatedField(_self);
246
283
  self->size = 0;
247
- return Qnil;
284
+ return _self;
248
285
  }
249
286
 
250
287
  /*
@@ -333,7 +370,6 @@ VALUE RepeatedField_to_ary(VALUE _self) {
333
370
  for (int i = 0; i < self->size; i++, off += elem_size) {
334
371
  void* mem = ((uint8_t *)self->elements) + off;
335
372
  VALUE elem = native_slot_get(field_type, self->field_type_class, mem);
336
-
337
373
  rb_ary_push(ary, elem);
338
374
  }
339
375
  return ary;
@@ -409,19 +445,6 @@ VALUE RepeatedField_hash(VALUE _self) {
409
445
  return hash;
410
446
  }
411
447
 
412
- /*
413
- * call-seq:
414
- * RepeatedField.inspect => string
415
- *
416
- * Returns a string representing this repeated field's elements. It will be
417
- * formated as "[<element>, <element>, ...]", with each element's string
418
- * representation computed by its own #inspect method.
419
- */
420
- VALUE RepeatedField_inspect(VALUE _self) {
421
- VALUE self_ary = RepeatedField_to_ary(_self);
422
- return rb_funcall(self_ary, rb_intern("inspect"), 0);
423
- }
424
-
425
448
  /*
426
449
  * call-seq:
427
450
  * RepeatedField.+(other) => repeated field
@@ -458,14 +481,30 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) {
458
481
  return dupped;
459
482
  }
460
483
 
484
+ /*
485
+ * call-seq:
486
+ * RepeatedField.concat(other) => self
487
+ *
488
+ * concats the passed in array to self. Returns a Ruby array.
489
+ */
490
+ VALUE RepeatedField_concat(VALUE _self, VALUE list) {
491
+ RepeatedField* self = ruby_to_RepeatedField(_self);
492
+ Check_Type(list, T_ARRAY);
493
+ for (int i = 0; i < RARRAY_LEN(list); i++) {
494
+ RepeatedField_push(_self, rb_ary_entry(list, i));
495
+ }
496
+ return _self;
497
+ }
498
+
499
+
461
500
  void validate_type_class(upb_fieldtype_t type, VALUE klass) {
462
- if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) {
501
+ if (rb_ivar_get(klass, descriptor_instancevar_interned) == Qnil) {
463
502
  rb_raise(rb_eArgError,
464
503
  "Type class has no descriptor. Please pass a "
465
504
  "class or enum as returned by the DescriptorPool.");
466
505
  }
467
506
  if (type == UPB_TYPE_MESSAGE) {
468
- VALUE desc = rb_iv_get(klass, kDescriptorInstanceVar);
507
+ VALUE desc = rb_ivar_get(klass, descriptor_instancevar_interned);
469
508
  if (!RB_TYPE_P(desc, T_DATA) || !RTYPEDDATA_P(desc) ||
470
509
  RTYPEDDATA_TYPE(desc) != &_Descriptor_type) {
471
510
  rb_raise(rb_eArgError, "Descriptor has an incorrect type.");
@@ -475,7 +514,7 @@ void validate_type_class(upb_fieldtype_t type, VALUE klass) {
475
514
  "Message class was not returned by the DescriptorPool.");
476
515
  }
477
516
  } else if (type == UPB_TYPE_ENUM) {
478
- VALUE enumdesc = rb_iv_get(klass, kDescriptorInstanceVar);
517
+ VALUE enumdesc = rb_ivar_get(klass, descriptor_instancevar_interned);
479
518
  if (!RB_TYPE_P(enumdesc, T_DATA) || !RTYPEDDATA_P(enumdesc) ||
480
519
  RTYPEDDATA_TYPE(enumdesc) != &_EnumDescriptor_type) {
481
520
  rb_raise(rb_eArgError, "Descriptor has an incorrect type.");
@@ -577,22 +616,23 @@ void RepeatedField_register(VALUE module) {
577
616
  rb_define_method(klass, "initialize",
578
617
  RepeatedField_init, -1);
579
618
  rb_define_method(klass, "each", RepeatedField_each, 0);
580
- rb_define_method(klass, "[]", RepeatedField_index, 1);
619
+ rb_define_method(klass, "[]", RepeatedField_index, -1);
620
+ rb_define_method(klass, "at", RepeatedField_index, -1);
581
621
  rb_define_method(klass, "[]=", RepeatedField_index_set, 2);
582
622
  rb_define_method(klass, "push", RepeatedField_push, 1);
583
623
  rb_define_method(klass, "<<", RepeatedField_push, 1);
584
- rb_define_method(klass, "pop", RepeatedField_pop, 0);
585
- rb_define_method(klass, "insert", RepeatedField_insert, -1);
624
+ rb_define_private_method(klass, "pop_one", RepeatedField_pop_one, 0);
586
625
  rb_define_method(klass, "replace", RepeatedField_replace, 1);
587
626
  rb_define_method(klass, "clear", RepeatedField_clear, 0);
588
627
  rb_define_method(klass, "length", RepeatedField_length, 0);
628
+ rb_define_method(klass, "size", RepeatedField_length, 0);
589
629
  rb_define_method(klass, "dup", RepeatedField_dup, 0);
590
630
  // Also define #clone so that we don't inherit Object#clone.
591
631
  rb_define_method(klass, "clone", RepeatedField_dup, 0);
592
632
  rb_define_method(klass, "==", RepeatedField_eq, 1);
593
633
  rb_define_method(klass, "to_ary", RepeatedField_to_ary, 0);
594
634
  rb_define_method(klass, "hash", RepeatedField_hash, 0);
595
- rb_define_method(klass, "inspect", RepeatedField_inspect, 0);
596
635
  rb_define_method(klass, "+", RepeatedField_plus, 1);
636
+ rb_define_method(klass, "concat", RepeatedField_concat, 1);
597
637
  rb_include_module(klass, rb_mEnumerable);
598
638
  }
@@ -155,7 +155,9 @@ void native_slot_set_value_and_case(upb_fieldtype_t type, VALUE type_class,
155
155
  break;
156
156
  }
157
157
  case UPB_TYPE_MESSAGE: {
158
- if (CLASS_OF(value) != type_class) {
158
+ if (CLASS_OF(value) == CLASS_OF(Qnil)) {
159
+ value = Qnil;
160
+ } else if (CLASS_OF(value) != type_class) {
159
161
  rb_raise(rb_eTypeError,
160
162
  "Invalid type %s to assign to submessage field.",
161
163
  rb_class2name(CLASS_OF(value)));
@@ -413,7 +415,8 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) {
413
415
  // Align current offset up to |size| granularity.
414
416
  off = align_up_to(off, field_size);
415
417
  layout->fields[upb_fielddef_index(field)].offset = off;
416
- layout->fields[upb_fielddef_index(field)].case_offset = MESSAGE_FIELD_NO_CASE;
418
+ layout->fields[upb_fielddef_index(field)].case_offset =
419
+ MESSAGE_FIELD_NO_CASE;
417
420
  off += field_size;
418
421
  }
419
422
 
@@ -1779,6 +1779,275 @@ upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1779
1779
  void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1780
1780
  upb_inttable_iter_setdone(iter);
1781
1781
  }
1782
+ /*
1783
+ * upb - a minimalist implementation of protocol buffers.
1784
+ *
1785
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
1786
+ * Author: Josh Haberman <jhaberman@gmail.com>
1787
+ */
1788
+
1789
+
1790
+ #include <stdlib.h>
1791
+ #include <stdio.h>
1792
+ #include <string.h>
1793
+
1794
+ typedef struct cleanup_ent {
1795
+ upb_cleanup_func *cleanup;
1796
+ void *ud;
1797
+ struct cleanup_ent *next;
1798
+ } cleanup_ent;
1799
+
1800
+ static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1801
+
1802
+ /* Default allocator **********************************************************/
1803
+
1804
+ // Just use realloc, keeping all allocated blocks in a linked list to destroy at
1805
+ // the end.
1806
+
1807
+ typedef struct mem_block {
1808
+ // List is doubly-linked, because in cases where realloc() moves an existing
1809
+ // block, we need to be able to remove the old pointer from the list
1810
+ // efficiently.
1811
+ struct mem_block *prev, *next;
1812
+ #ifndef NDEBUG
1813
+ size_t size; // Doesn't include mem_block structure.
1814
+ #endif
1815
+ char data[];
1816
+ } mem_block;
1817
+
1818
+ typedef struct {
1819
+ mem_block *head;
1820
+ } default_alloc_ud;
1821
+
1822
+ static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
1823
+ UPB_UNUSED(oldsize);
1824
+ default_alloc_ud *ud = _ud;
1825
+
1826
+ mem_block *from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
1827
+
1828
+ #ifndef NDEBUG
1829
+ if (from) {
1830
+ assert(oldsize <= from->size);
1831
+ }
1832
+ #endif
1833
+
1834
+ mem_block *block = realloc(from, size + sizeof(mem_block));
1835
+ if (!block) return NULL;
1836
+
1837
+ #ifndef NDEBUG
1838
+ block->size = size;
1839
+ #endif
1840
+
1841
+ if (from) {
1842
+ if (block != from) {
1843
+ // The block was moved, so pointers in next and prev blocks must be
1844
+ // updated to its new location.
1845
+ if (block->next) block->next->prev = block;
1846
+ if (block->prev) block->prev->next = block;
1847
+ }
1848
+ } else {
1849
+ // Insert at head of linked list.
1850
+ block->prev = NULL;
1851
+ block->next = ud->head;
1852
+ if (block->next) block->next->prev = block;
1853
+ ud->head = block;
1854
+ }
1855
+
1856
+ return &block->data;
1857
+ }
1858
+
1859
+ static void default_alloc_cleanup(void *_ud) {
1860
+ default_alloc_ud *ud = _ud;
1861
+ mem_block *block = ud->head;
1862
+
1863
+ while (block) {
1864
+ void *to_free = block;
1865
+ block = block->next;
1866
+ free(to_free);
1867
+ }
1868
+ }
1869
+
1870
+
1871
+ /* Standard error functions ***************************************************/
1872
+
1873
+ static bool default_err(void *ud, const upb_status *status) {
1874
+ UPB_UNUSED(ud);
1875
+ fprintf(stderr, "upb error: %s\n", upb_status_errmsg(status));
1876
+ return false;
1877
+ }
1878
+
1879
+ static bool write_err_to(void *ud, const upb_status *status) {
1880
+ upb_status *copy_to = ud;
1881
+ upb_status_copy(copy_to, status);
1882
+ return false;
1883
+ }
1884
+
1885
+
1886
+ /* upb_env ********************************************************************/
1887
+
1888
+ void upb_env_init(upb_env *e) {
1889
+ e->ok_ = true;
1890
+ e->bytes_allocated = 0;
1891
+ e->cleanup_head = NULL;
1892
+
1893
+ default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
1894
+ ud->head = NULL;
1895
+
1896
+ // Set default functions.
1897
+ upb_env_setallocfunc(e, default_alloc, ud);
1898
+ upb_env_seterrorfunc(e, default_err, NULL);
1899
+ }
1900
+
1901
+ void upb_env_uninit(upb_env *e) {
1902
+ cleanup_ent *ent = e->cleanup_head;
1903
+
1904
+ while (ent) {
1905
+ ent->cleanup(ent->ud);
1906
+ ent = ent->next;
1907
+ }
1908
+
1909
+ // Must do this after running cleanup functions, because this will delete
1910
+ // the memory we store our cleanup entries in!
1911
+ if (e->alloc == default_alloc) {
1912
+ default_alloc_cleanup(e->alloc_ud);
1913
+ }
1914
+ }
1915
+
1916
+ UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1917
+ void *ud) {
1918
+ e->alloc = alloc;
1919
+ e->alloc_ud = ud;
1920
+ }
1921
+
1922
+ UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1923
+ void *ud) {
1924
+ e->err = func;
1925
+ e->err_ud = ud;
1926
+ }
1927
+
1928
+ void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1929
+ e->err = write_err_to;
1930
+ e->err_ud = status;
1931
+ }
1932
+
1933
+ bool upb_env_ok(const upb_env *e) {
1934
+ return e->ok_;
1935
+ }
1936
+
1937
+ bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1938
+ e->ok_ = false;
1939
+ return e->err(e->err_ud, status);
1940
+ }
1941
+
1942
+ bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1943
+ cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1944
+ if (!ent) return false;
1945
+
1946
+ ent->cleanup = func;
1947
+ ent->ud = ud;
1948
+ ent->next = e->cleanup_head;
1949
+ e->cleanup_head = ent;
1950
+
1951
+ return true;
1952
+ }
1953
+
1954
+ void *upb_env_malloc(upb_env *e, size_t size) {
1955
+ e->bytes_allocated += size;
1956
+ if (e->alloc == seeded_alloc) {
1957
+ // This is equivalent to the next branch, but allows inlining for a
1958
+ // measurable perf benefit.
1959
+ return seeded_alloc(e->alloc_ud, NULL, 0, size);
1960
+ } else {
1961
+ return e->alloc(e->alloc_ud, NULL, 0, size);
1962
+ }
1963
+ }
1964
+
1965
+ void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
1966
+ assert(oldsize <= size);
1967
+ char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
1968
+
1969
+ #ifndef NDEBUG
1970
+ // Overwrite non-preserved memory to ensure callers are passing the oldsize
1971
+ // that they truly require.
1972
+ memset(ret + oldsize, 0xff, size - oldsize);
1973
+ #endif
1974
+
1975
+ return ret;
1976
+ }
1977
+
1978
+ size_t upb_env_bytesallocated(const upb_env *e) {
1979
+ return e->bytes_allocated;
1980
+ }
1981
+
1982
+
1983
+ /* upb_seededalloc ************************************************************/
1984
+
1985
+ // Be conservative and choose 16 in case anyone is using SSE.
1986
+ static const size_t maxalign = 16;
1987
+
1988
+ static size_t align_up(size_t size) {
1989
+ return ((size + maxalign - 1) / maxalign) * maxalign;
1990
+ }
1991
+
1992
+ UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1993
+ size_t size) {
1994
+ UPB_UNUSED(ptr);
1995
+
1996
+ upb_seededalloc *a = ud;
1997
+ size = align_up(size);
1998
+
1999
+ assert(a->mem_limit >= a->mem_ptr);
2000
+
2001
+ if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
2002
+ // Fast path: we can satisfy from the initial allocation.
2003
+ void *ret = a->mem_ptr;
2004
+ a->mem_ptr += size;
2005
+ return ret;
2006
+ } else {
2007
+ // Slow path: fallback to other allocator.
2008
+ a->need_cleanup = true;
2009
+ // Is `ptr` part of the user-provided initial block? Don't pass it to the
2010
+ // default allocator if so; otherwise, it may try to realloc() the block.
2011
+ char *chptr = ptr;
2012
+ if (chptr >= a->mem_base && chptr < a->mem_limit) {
2013
+ return a->alloc(a->alloc_ud, NULL, 0, size);
2014
+ } else {
2015
+ return a->alloc(a->alloc_ud, ptr, oldsize, size);
2016
+ }
2017
+ }
2018
+ }
2019
+
2020
+ void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
2021
+ a->mem_base = mem;
2022
+ a->mem_ptr = mem;
2023
+ a->mem_limit = (char*)mem + len;
2024
+ a->need_cleanup = false;
2025
+ a->returned_allocfunc = false;
2026
+
2027
+ default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
2028
+ ud->head = NULL;
2029
+
2030
+ upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
2031
+ }
2032
+
2033
+ void upb_seededalloc_uninit(upb_seededalloc *a) {
2034
+ if (a->alloc == default_alloc && a->need_cleanup) {
2035
+ default_alloc_cleanup(a->alloc_ud);
2036
+ }
2037
+ }
2038
+
2039
+ UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
2040
+ upb_alloc_func *alloc,
2041
+ void *ud) {
2042
+ assert(!a->returned_allocfunc);
2043
+ a->alloc = alloc;
2044
+ a->alloc_ud = ud;
2045
+ }
2046
+
2047
+ upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
2048
+ a->returned_allocfunc = true;
2049
+ return seeded_alloc;
2050
+ }
1782
2051
  /*
1783
2052
  * upb - a minimalist implementation of protocol buffers.
1784
2053
  *
@@ -1955,7 +2224,14 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
1955
2224
  if (closure_type && *context_closure_type &&
1956
2225
  closure_type != *context_closure_type) {
1957
2226
  // TODO(haberman): better message for debugging.
1958
- upb_status_seterrmsg(&h->status_, "closure type does not match");
2227
+ if (f) {
2228
+ upb_status_seterrf(&h->status_,
2229
+ "closure type does not match for field %s",
2230
+ upb_fielddef_name(f));
2231
+ } else {
2232
+ upb_status_seterrmsg(
2233
+ &h->status_, "closure type does not match for message-level handler");
2234
+ }
1959
2235
  return false;
1960
2236
  }
1961
2237
 
@@ -2353,7 +2629,7 @@ bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2353
2629
  *s = f->selector_base;
2354
2630
  break;
2355
2631
  }
2356
- assert(*s < upb_fielddef_containingtype(f)->selector_count);
2632
+ assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2357
2633
  return true;
2358
2634
  }
2359
2635
 
@@ -4295,7 +4571,7 @@ void upb_inttable_compact(upb_inttable *t) {
4295
4571
  counts[log2ceil(key)]++;
4296
4572
  }
4297
4573
 
4298
- int arr_size;
4574
+ size_t arr_size = 1;
4299
4575
  int arr_count = upb_inttable_count(t);
4300
4576
 
4301
4577
  if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
@@ -5522,6 +5798,54 @@ static upb_inttable reftables[212] = {
5522
5798
  #include <stdlib.h>
5523
5799
  #include <string.h>
5524
5800
 
5801
+ // upb_deflist is an internal-only dynamic array for storing a growing list of
5802
+ // upb_defs.
5803
+ typedef struct {
5804
+ upb_def **defs;
5805
+ size_t len;
5806
+ size_t size;
5807
+ bool owned;
5808
+ } upb_deflist;
5809
+
5810
+ // We keep a stack of all the messages scopes we are currently in, as well as
5811
+ // the top-level file scope. This is necessary to correctly qualify the
5812
+ // definitions that are contained inside. "name" tracks the name of the
5813
+ // message or package (a bare name -- not qualified by any enclosing scopes).
5814
+ typedef struct {
5815
+ char *name;
5816
+ // Index of the first def that is under this scope. For msgdefs, the
5817
+ // msgdef itself is at start-1.
5818
+ int start;
5819
+ } upb_descreader_frame;
5820
+
5821
+ // The maximum number of nested declarations that are allowed, ie.
5822
+ // message Foo {
5823
+ // message Bar {
5824
+ // message Baz {
5825
+ // }
5826
+ // }
5827
+ // }
5828
+ //
5829
+ // This is a resource limit that affects how big our runtime stack can grow.
5830
+ // TODO: make this a runtime-settable property of the Reader instance.
5831
+ #define UPB_MAX_MESSAGE_NESTING 64
5832
+
5833
+ struct upb_descreader {
5834
+ upb_sink sink;
5835
+ upb_deflist defs;
5836
+ upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
5837
+ int stack_len;
5838
+
5839
+ uint32_t number;
5840
+ char *name;
5841
+ bool saw_number;
5842
+ bool saw_name;
5843
+
5844
+ char *default_string;
5845
+
5846
+ upb_fielddef *f;
5847
+ };
5848
+
5525
5849
  static char *upb_strndup(const char *buf, size_t n) {
5526
5850
  char *ret = malloc(n + 1);
5527
5851
  if (!ret) return NULL;
@@ -5601,36 +5925,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
5601
5925
 
5602
5926
  /* upb_descreader ************************************************************/
5603
5927
 
5604
- void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
5605
- upb_status *status) {
5606
- UPB_UNUSED(status);
5607
- upb_deflist_init(&r->defs);
5608
- upb_sink_reset(upb_descreader_input(r), handlers, r);
5609
- r->stack_len = 0;
5610
- r->name = NULL;
5611
- r->default_string = NULL;
5612
- }
5613
-
5614
- void upb_descreader_uninit(upb_descreader *r) {
5615
- free(r->name);
5616
- upb_deflist_uninit(&r->defs);
5617
- free(r->default_string);
5618
- while (r->stack_len > 0) {
5619
- upb_descreader_frame *f = &r->stack[--r->stack_len];
5620
- free(f->name);
5621
- }
5622
- }
5623
-
5624
- upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
5625
- *n = r->defs.len;
5626
- upb_deflist_donaterefs(&r->defs, owner);
5627
- return r->defs.defs;
5628
- }
5629
-
5630
- upb_sink *upb_descreader_input(upb_descreader *r) {
5631
- return &r->sink;
5632
- }
5633
-
5634
5928
  static upb_msgdef *upb_descreader_top(upb_descreader *r) {
5635
5929
  assert(r->stack_len > 1);
5636
5930
  int index = r->stack[r->stack_len-1].start - 1;
@@ -5803,7 +6097,7 @@ static bool parse_default(char *str, upb_fielddef *f) {
5803
6097
  break;
5804
6098
  }
5805
6099
  case UPB_TYPE_UINT32: {
5806
- long val = strtoul(str, &end, 0);
6100
+ unsigned long val = strtoul(str, &end, 0);
5807
6101
  if (val > UINT32_MAX || errno == ERANGE || *end)
5808
6102
  success = false;
5809
6103
  else
@@ -6070,6 +6364,45 @@ static void reghandlers(const void *closure, upb_handlers *h) {
6070
6364
 
6071
6365
  #undef D
6072
6366
 
6367
+ void descreader_cleanup(void *_r) {
6368
+ upb_descreader *r = _r;
6369
+ free(r->name);
6370
+ upb_deflist_uninit(&r->defs);
6371
+ free(r->default_string);
6372
+ while (r->stack_len > 0) {
6373
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
6374
+ free(f->name);
6375
+ }
6376
+ }
6377
+
6378
+
6379
+ /* Public API ****************************************************************/
6380
+
6381
+ upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6382
+ upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6383
+ if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6384
+ return NULL;
6385
+ }
6386
+
6387
+ upb_deflist_init(&r->defs);
6388
+ upb_sink_reset(upb_descreader_input(r), h, r);
6389
+ r->stack_len = 0;
6390
+ r->name = NULL;
6391
+ r->default_string = NULL;
6392
+
6393
+ return r;
6394
+ }
6395
+
6396
+ upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6397
+ *n = r->defs.len;
6398
+ upb_deflist_donaterefs(&r->defs, owner);
6399
+ return r->defs.defs;
6400
+ }
6401
+
6402
+ upb_sink *upb_descreader_input(upb_descreader *r) {
6403
+ return &r->sink;
6404
+ }
6405
+
6073
6406
  const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6074
6407
  const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6075
6408
  const upb_handlers *h = upb_handlers_newfrozen(
@@ -6141,7 +6474,6 @@ mgroup *newgroup(const void *owner) {
6141
6474
 
6142
6475
  static void freemethod(upb_refcounted *r) {
6143
6476
  upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
6144
- upb_byteshandler_uninit(&method->input_handler_);
6145
6477
 
6146
6478
  if (method->dest_handlers_) {
6147
6479
  upb_handlers_unref(method->dest_handlers_, method);
@@ -7073,10 +7405,7 @@ void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7073
7405
  */
7074
7406
 
7075
7407
  #include <inttypes.h>
7076
- #include <setjmp.h>
7077
- #include <stdarg.h>
7078
7408
  #include <stddef.h>
7079
- #include <stdlib.h>
7080
7409
 
7081
7410
  #ifdef UPB_DUMP_BYTECODE
7082
7411
  #include <stdio.h>
@@ -7122,18 +7451,17 @@ static bool consumes_input(opcode op) {
7122
7451
 
7123
7452
  static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7124
7453
 
7125
- // It's unfortunate that we have to micro-manage the compiler this way,
7126
- // especially since this tuning is necessarily specific to one hardware
7127
- // configuration. But emperically on a Core i7, performance increases 30-50%
7128
- // with these annotations. Every instance where these appear, gcc 4.2.1 made
7129
- // the wrong decision and degraded performance in benchmarks.
7130
- #define FORCEINLINE static inline __attribute__((always_inline))
7131
- #define NOINLINE __attribute__((noinline))
7454
+ // It's unfortunate that we have to micro-manage the compiler with
7455
+ // UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7456
+ // specific to one hardware configuration. But empirically on a Core i7,
7457
+ // performance increases 30-50% with these annotations. Every instance where
7458
+ // these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7459
+ // benchmarks.
7132
7460
 
7133
7461
  static void seterr(upb_pbdecoder *d, const char *msg) {
7134
- // TODO(haberman): encapsulate this access to pipeline->status, but not sure
7135
- // exactly what that interface should look like.
7136
- upb_status_seterrmsg(d->status, msg);
7462
+ upb_status status = UPB_STATUS_INIT;
7463
+ upb_status_seterrmsg(&status, msg);
7464
+ upb_env_reporterror(d->env, &status);
7137
7465
  }
7138
7466
 
7139
7467
  void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
@@ -7176,7 +7504,7 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7176
7504
  // and the parsing stack, so must be called whenever either is updated.
7177
7505
  static void set_delim_end(upb_pbdecoder *d) {
7178
7506
  size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7179
- if (delim_ofs <= (d->end - d->buf)) {
7507
+ if (delim_ofs <= (size_t)(d->end - d->buf)) {
7180
7508
  d->delim_end = d->buf + delim_ofs;
7181
7509
  d->data_end = d->delim_end;
7182
7510
  } else {
@@ -7301,7 +7629,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7301
7629
 
7302
7630
  // Copies the next "bytes" bytes into "buf" and advances the stream.
7303
7631
  // Requires that this many bytes are available in the current buffer.
7304
- FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7632
+ UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7633
+ size_t bytes) {
7305
7634
  assert(bytes <= curbufleft(d));
7306
7635
  memcpy(buf, d->ptr, bytes);
7307
7636
  advance(d, bytes);
@@ -7310,8 +7639,8 @@ FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7310
7639
  // Slow path for getting the next "bytes" bytes, regardless of whether they are
7311
7640
  // available in the current buffer or not. Returns a status code as described
7312
7641
  // in decoder.int.h.
7313
- static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7314
- size_t bytes) {
7642
+ UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7643
+ size_t bytes) {
7315
7644
  const size_t avail = curbufleft(d);
7316
7645
  consumebytes(d, buf, avail);
7317
7646
  bytes -= avail;
@@ -7320,7 +7649,7 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7320
7649
  advancetobuf(d, d->buf_param, d->size_param);
7321
7650
  }
7322
7651
  if (curbufleft(d) >= bytes) {
7323
- consumebytes(d, buf + avail, bytes);
7652
+ consumebytes(d, (char *)buf + avail, bytes);
7324
7653
  return DECODE_OK;
7325
7654
  } else if (d->data_end == d->delim_end) {
7326
7655
  seterr(d, "Submessage ended in the middle of a value or group");
@@ -7332,7 +7661,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7332
7661
 
7333
7662
  // Gets the next "bytes" bytes, regardless of whether they are available in the
7334
7663
  // current buffer or not. Returns a status code as described in decoder.int.h.
7335
- FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7664
+ UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7665
+ size_t bytes) {
7336
7666
  if (curbufleft(d) >= bytes) {
7337
7667
  // Buffer has enough data to satisfy.
7338
7668
  consumebytes(d, buf, bytes);
@@ -7342,19 +7672,20 @@ FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7342
7672
  }
7343
7673
  }
7344
7674
 
7345
- static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7346
- size_t bytes) {
7675
+ UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7676
+ size_t bytes) {
7347
7677
  size_t ret = curbufleft(d);
7348
7678
  memcpy(buf, d->ptr, ret);
7349
7679
  if (in_residual_buf(d, d->ptr)) {
7350
7680
  size_t copy = UPB_MIN(bytes - ret, d->size_param);
7351
- memcpy(buf + ret, d->buf_param, copy);
7681
+ memcpy((char *)buf + ret, d->buf_param, copy);
7352
7682
  ret += copy;
7353
7683
  }
7354
7684
  return ret;
7355
7685
  }
7356
7686
 
7357
- FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7687
+ UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7688
+ size_t bytes) {
7358
7689
  if (curbufleft(d) >= bytes) {
7359
7690
  memcpy(buf, d->ptr, bytes);
7360
7691
  return bytes;
@@ -7368,8 +7699,8 @@ FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7368
7699
 
7369
7700
  // Slow path for decoding a varint from the current buffer position.
7370
7701
  // Returns a status code as described in decoder.int.h.
7371
- NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7372
- uint64_t *u64) {
7702
+ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7703
+ uint64_t *u64) {
7373
7704
  *u64 = 0;
7374
7705
  uint8_t byte = 0x80;
7375
7706
  int bitpos;
@@ -7387,7 +7718,7 @@ NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7387
7718
 
7388
7719
  // Decodes a varint from the current buffer position.
7389
7720
  // Returns a status code as described in decoder.int.h.
7390
- FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7721
+ UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7391
7722
  if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7392
7723
  *u64 = *d->ptr;
7393
7724
  advance(d, 1);
@@ -7410,7 +7741,7 @@ FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7410
7741
 
7411
7742
  // Decodes a 32-bit varint from the current buffer position.
7412
7743
  // Returns a status code as described in decoder.int.h.
7413
- FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7744
+ UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7414
7745
  uint64_t u64;
7415
7746
  int32_t ret = decode_varint(d, &u64);
7416
7747
  if (ret >= 0) return ret;
@@ -7429,14 +7760,14 @@ FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7429
7760
  // Decodes a fixed32 from the current buffer position.
7430
7761
  // Returns a status code as described in decoder.int.h.
7431
7762
  // TODO: proper byte swapping for big-endian machines.
7432
- FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7763
+ UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7433
7764
  return getbytes(d, u32, 4);
7434
7765
  }
7435
7766
 
7436
7767
  // Decodes a fixed64 from the current buffer position.
7437
7768
  // Returns a status code as described in decoder.int.h.
7438
7769
  // TODO: proper byte swapping for big-endian machines.
7439
- FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7770
+ UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7440
7771
  return getbytes(d, u64, 8);
7441
7772
  }
7442
7773
 
@@ -7460,7 +7791,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7460
7791
  if (end > fr->end_ofs) {
7461
7792
  seterr(d, "Submessage end extends past enclosing submessage.");
7462
7793
  return false;
7463
- } else if ((fr + 1) == d->limit) {
7794
+ } else if (fr == d->limit) {
7464
7795
  seterr(d, kPbDecoderStackOverflow);
7465
7796
  return false;
7466
7797
  }
@@ -7487,8 +7818,8 @@ static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7487
7818
  // Pops a frame from the decoder stack.
7488
7819
  static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7489
7820
 
7490
- NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7491
- uint64_t expected) {
7821
+ UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7822
+ uint64_t expected) {
7492
7823
  uint64_t data = 0;
7493
7824
  size_t bytes = upb_value_size(expected);
7494
7825
  size_t read = peekbytes(d, &data, bytes);
@@ -7640,10 +7971,17 @@ static int32_t dispatch(upb_pbdecoder *d) {
7640
7971
  if (ret == DECODE_ENDGROUP) {
7641
7972
  goto_endmsg(d);
7642
7973
  return DECODE_OK;
7643
- } else {
7644
- d->pc = d->last - 1; // Rewind to CHECKDELIM.
7645
- return ret;
7974
+ } else if (ret == DECODE_OK) {
7975
+ // We just consumed some input, so we might now have consumed all the data
7976
+ // in the delmited region. Since every opcode that can trigger dispatch is
7977
+ // directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
7978
+ // delimited end.
7979
+ d->pc = d->last - 1;
7980
+ assert(getop(*d->pc) == OP_CHECKDELIM);
7981
+ return DECODE_OK;
7646
7982
  }
7983
+
7984
+ return ret;
7647
7985
  }
7648
7986
 
7649
7987
  // Callers know that the stack is more than one deep because the opcodes that
@@ -7866,7 +8204,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
7866
8204
  void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
7867
8205
  upb_pbdecoder *d = closure;
7868
8206
  UPB_UNUSED(size_hint);
8207
+ d->top->end_ofs = UINT64_MAX;
8208
+ d->bufstart_ofs = 0;
7869
8209
  d->call_len = 1;
8210
+ d->callstack[0] = &halt;
7870
8211
  d->pc = pc;
7871
8212
  return d;
7872
8213
  }
@@ -7875,6 +8216,8 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
7875
8216
  UPB_UNUSED(hd);
7876
8217
  UPB_UNUSED(size_hint);
7877
8218
  upb_pbdecoder *d = closure;
8219
+ d->top->end_ofs = UINT64_MAX;
8220
+ d->bufstart_ofs = 0;
7878
8221
  d->call_len = 0;
7879
8222
  return d;
7880
8223
  }
@@ -7931,58 +8274,120 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7931
8274
  return true;
7932
8275
  }
7933
8276
 
7934
- void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m,
7935
- upb_status *s) {
7936
- d->limit = &d->stack[UPB_DECODER_MAX_NESTING];
7937
- upb_bytessink_reset(&d->input_, &m->input_handler_, d);
7938
- d->method_ = m;
7939
- d->callstack[0] = &halt;
7940
- d->status = s;
7941
- upb_pbdecoder_reset(d);
7942
- }
7943
-
7944
8277
  void upb_pbdecoder_reset(upb_pbdecoder *d) {
7945
8278
  d->top = d->stack;
7946
- d->top->end_ofs = UINT64_MAX;
7947
8279
  d->top->groupnum = 0;
7948
- d->bufstart_ofs = 0;
7949
8280
  d->ptr = d->residual;
7950
8281
  d->buf = d->residual;
7951
8282
  d->end = d->residual;
7952
8283
  d->residual_end = d->residual;
7953
- d->call_len = 1;
7954
8284
  }
7955
8285
 
7956
- uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
7957
- return offset(d);
8286
+ static size_t stacksize(upb_pbdecoder *d, size_t entries) {
8287
+ UPB_UNUSED(d);
8288
+ return entries * sizeof(upb_pbdecoder_frame);
7958
8289
  }
7959
8290
 
7960
- // Not currently required, but to support outgrowing the static stack we need
7961
- // this.
7962
- void upb_pbdecoder_uninit(upb_pbdecoder *d) {
8291
+ static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7963
8292
  UPB_UNUSED(d);
7964
- }
7965
8293
 
7966
- const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
7967
- return d->method_;
8294
+ #ifdef UPB_USE_JIT_X64
8295
+ if (d->method_->is_native_) {
8296
+ // Each native stack frame needs two pointers, plus we need a few frames for
8297
+ // the enter/exit trampolines.
8298
+ size_t ret = entries * sizeof(void*) * 2;
8299
+ ret += sizeof(void*) * 10;
8300
+ return ret;
8301
+ }
8302
+ #endif
8303
+
8304
+ return entries * sizeof(uint32_t*);
7968
8305
  }
7969
8306
 
7970
- bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) {
7971
- // TODO(haberman): do we need to test whether the decoder is already on the
7972
- // stack (like calling this from within a callback)? Should we support
7973
- // rebinding the output at all?
8307
+ upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8308
+ upb_sink *sink) {
8309
+ const size_t default_max_nesting = 64;
8310
+ #ifndef NDEBUG
8311
+ size_t size_before = upb_env_bytesallocated(e);
8312
+ #endif
8313
+
8314
+ upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8315
+ if (!d) return NULL;
8316
+
8317
+ d->method_ = m;
8318
+ d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8319
+ d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8320
+ if (!d->stack || !d->callstack) {
8321
+ return NULL;
8322
+ }
8323
+
8324
+ d->env = e;
8325
+ d->limit = d->stack + default_max_nesting - 1;
8326
+ d->stack_size = default_max_nesting;
8327
+
8328
+ upb_pbdecoder_reset(d);
8329
+ upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8330
+
7974
8331
  assert(sink);
7975
8332
  if (d->method_->dest_handlers_) {
7976
8333
  if (sink->handlers != d->method_->dest_handlers_)
7977
- return false;
8334
+ return NULL;
7978
8335
  }
7979
8336
  upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
7980
- return true;
8337
+
8338
+ // If this fails, increase the value in decoder.h.
8339
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8340
+ return d;
8341
+ }
8342
+
8343
+ uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8344
+ return offset(d);
8345
+ }
8346
+
8347
+ const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8348
+ return d->method_;
7981
8349
  }
7982
8350
 
7983
8351
  upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
7984
8352
  return &d->input_;
7985
8353
  }
8354
+
8355
+ size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8356
+ return d->stack_size;
8357
+ }
8358
+
8359
+ bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8360
+ assert(d->top >= d->stack);
8361
+
8362
+ if (max < (size_t)(d->top - d->stack)) {
8363
+ // Can't set a limit smaller than what we are currently at.
8364
+ return false;
8365
+ }
8366
+
8367
+ if (max > d->stack_size) {
8368
+ // Need to reallocate stack and callstack to accommodate.
8369
+ size_t old_size = stacksize(d, d->stack_size);
8370
+ size_t new_size = stacksize(d, max);
8371
+ void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8372
+ if (!p) {
8373
+ return false;
8374
+ }
8375
+ d->stack = p;
8376
+
8377
+ old_size = callstacksize(d, d->stack_size);
8378
+ new_size = callstacksize(d, max);
8379
+ p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8380
+ if (!p) {
8381
+ return false;
8382
+ }
8383
+ d->callstack = p;
8384
+
8385
+ d->stack_size = max;
8386
+ }
8387
+
8388
+ d->limit = d->stack + max - 1;
8389
+ return true;
8390
+ }
7986
8391
  /*
7987
8392
  * upb - a minimalist implementation of protocol buffers.
7988
8393
  *
@@ -8045,6 +8450,68 @@ upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8045
8450
 
8046
8451
  #include <stdlib.h>
8047
8452
 
8453
+ // The output buffer is divided into segments; a segment is a string of data
8454
+ // that is "ready to go" -- it does not need any varint lengths inserted into
8455
+ // the middle. The seams between segments are where varints will be inserted
8456
+ // once they are known.
8457
+ //
8458
+ // We also use the concept of a "run", which is a range of encoded bytes that
8459
+ // occur at a single submessage level. Every segment contains one or more runs.
8460
+ //
8461
+ // A segment can span messages. Consider:
8462
+ //
8463
+ // .--Submessage lengths---------.
8464
+ // | | |
8465
+ // | V V
8466
+ // V | |--------------- | |-----------------
8467
+ // Submessages: | |-----------------------------------------------
8468
+ // Top-level msg: ------------------------------------------------------------
8469
+ //
8470
+ // Segments: ----- ------------------- -----------------
8471
+ // Runs: *---- *--------------*--- *----------------
8472
+ // (* marks the start)
8473
+ //
8474
+ // Note that the top-level menssage is not in any segment because it does not
8475
+ // have any length preceding it.
8476
+ //
8477
+ // A segment is only interrupted when another length needs to be inserted. So
8478
+ // observe how the second segment spans both the inner submessage and part of
8479
+ // the next enclosing message.
8480
+ typedef struct {
8481
+ uint32_t msglen; // The length to varint-encode before this segment.
8482
+ uint32_t seglen; // Length of the segment.
8483
+ } upb_pb_encoder_segment;
8484
+
8485
+ struct upb_pb_encoder {
8486
+ upb_env *env;
8487
+
8488
+ // Our input and output.
8489
+ upb_sink input_;
8490
+ upb_bytessink *output_;
8491
+
8492
+ // The "subclosure" -- used as the inner closure as part of the bytessink
8493
+ // protocol.
8494
+ void *subc;
8495
+
8496
+ // The output buffer and limit, and our current write position. "buf"
8497
+ // initially points to "initbuf", but is dynamically allocated if we need to
8498
+ // grow beyond the initial size.
8499
+ char *buf, *ptr, *limit;
8500
+
8501
+ // The beginning of the current run, or undefined if we are at the top level.
8502
+ char *runbegin;
8503
+
8504
+ // The list of segments we are accumulating.
8505
+ upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8506
+
8507
+ // The stack of enclosing submessages. Each entry in the stack points to the
8508
+ // segment where this submessage's length is being accumulated.
8509
+ int *stack, *top, *stacklimit;
8510
+
8511
+ // Depth of startmsg/endmsg calls.
8512
+ int depth;
8513
+ };
8514
+
8048
8515
  /* low-level buffering ********************************************************/
8049
8516
 
8050
8517
  // Low-level functions for interacting with the output buffer.
@@ -8062,25 +8529,23 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8062
8529
  // Call to ensure that at least "bytes" bytes are available for writing at
8063
8530
  // e->ptr. Returns false if the bytes could not be allocated.
8064
8531
  static bool reserve(upb_pb_encoder *e, size_t bytes) {
8065
- if ((e->limit - e->ptr) < bytes) {
8532
+ if ((size_t)(e->limit - e->ptr) < bytes) {
8533
+ // Grow buffer.
8066
8534
  size_t needed = bytes + (e->ptr - e->buf);
8067
8535
  size_t old_size = e->limit - e->buf;
8536
+
8068
8537
  size_t new_size = old_size;
8538
+
8069
8539
  while (new_size < needed) {
8070
8540
  new_size *= 2;
8071
8541
  }
8072
8542
 
8073
- char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf;
8074
- char *new_buf = realloc(realloc_from, new_size);
8543
+ char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
8075
8544
 
8076
8545
  if (new_buf == NULL) {
8077
8546
  return false;
8078
8547
  }
8079
8548
 
8080
- if (realloc_from == NULL) {
8081
- memcpy(new_buf, e->initbuf, old_size);
8082
- }
8083
-
8084
8549
  e->ptr = new_buf + (e->ptr - e->buf);
8085
8550
  e->runbegin = new_buf + (e->runbegin - e->buf);
8086
8551
  e->limit = new_buf + new_size;
@@ -8093,7 +8558,7 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) {
8093
8558
  // Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
8094
8559
  // previously called reserve() with at least this many bytes.
8095
8560
  static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8096
- assert((e->limit - e->ptr) >= bytes);
8561
+ assert((size_t)(e->limit - e->ptr) >= bytes);
8097
8562
  e->ptr += bytes;
8098
8563
  }
8099
8564
 
@@ -8149,21 +8614,17 @@ static bool start_delim(upb_pb_encoder *e) {
8149
8614
  }
8150
8615
 
8151
8616
  if (++e->segptr == e->seglimit) {
8152
- upb_pb_encoder_segment *realloc_from =
8153
- (e->segbuf == e->seginitbuf) ? NULL : e->segbuf;
8617
+ // Grow segment buffer.
8154
8618
  size_t old_size =
8155
8619
  (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8156
8620
  size_t new_size = old_size * 2;
8157
- upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size);
8621
+ upb_pb_encoder_segment *new_buf =
8622
+ upb_env_realloc(e->env, e->segbuf, old_size, new_size);
8158
8623
 
8159
8624
  if (new_buf == NULL) {
8160
8625
  return false;
8161
8626
  }
8162
8627
 
8163
- if (realloc_from == NULL) {
8164
- memcpy(new_buf, e->seginitbuf, old_size);
8165
- }
8166
-
8167
8628
  e->segptr = new_buf + (e->segptr - e->segbuf);
8168
8629
  e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8169
8630
  e->segbuf = new_buf;
@@ -8434,6 +8895,12 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
8434
8895
  }
8435
8896
  }
8436
8897
 
8898
+ void upb_pb_encoder_reset(upb_pb_encoder *e) {
8899
+ e->segptr = NULL;
8900
+ e->top = NULL;
8901
+ e->depth = 0;
8902
+ }
8903
+
8437
8904
 
8438
8905
  /* public API *****************************************************************/
8439
8906
 
@@ -8442,40 +8909,42 @@ const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
8442
8909
  return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
8443
8910
  }
8444
8911
 
8445
- #define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
8912
+ upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
8913
+ upb_bytessink *output) {
8914
+ const size_t initial_bufsize = 256;
8915
+ const size_t initial_segbufsize = 16;
8916
+ // TODO(haberman): make this configurable.
8917
+ const size_t stack_size = 64;
8918
+ #ifndef NDEBUG
8919
+ const size_t size_before = upb_env_bytesallocated(env);
8920
+ #endif
8446
8921
 
8447
- void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) {
8448
- e->output_ = NULL;
8449
- e->subc = NULL;
8450
- e->buf = e->initbuf;
8451
- e->ptr = e->buf;
8452
- e->limit = e->buf + ARRAYSIZE(e->initbuf);
8453
- e->segbuf = e->seginitbuf;
8454
- e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf);
8455
- e->stacklimit = e->stack + ARRAYSIZE(e->stack);
8456
- upb_sink_reset(&e->input_, h, e);
8457
- }
8922
+ upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
8923
+ if (!e) return NULL;
8458
8924
 
8459
- void upb_pb_encoder_uninit(upb_pb_encoder *e) {
8460
- if (e->buf != e->initbuf) {
8461
- free(e->buf);
8462
- }
8925
+ e->buf = upb_env_malloc(env, initial_bufsize);
8926
+ e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
8927
+ e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
8463
8928
 
8464
- if (e->segbuf != e->seginitbuf) {
8465
- free(e->segbuf);
8929
+ if (!e->buf || !e->segbuf || !e->stack) {
8930
+ return NULL;
8466
8931
  }
8467
- }
8468
8932
 
8469
- void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) {
8933
+ e->limit = e->buf + initial_bufsize;
8934
+ e->seglimit = e->segbuf + initial_segbufsize;
8935
+ e->stacklimit = e->stack + stack_size;
8936
+
8470
8937
  upb_pb_encoder_reset(e);
8938
+ upb_sink_reset(&e->input_, h, e);
8939
+
8940
+ e->env = env;
8471
8941
  e->output_ = output;
8472
8942
  e->subc = output->closure;
8473
- }
8943
+ e->ptr = e->buf;
8474
8944
 
8475
- void upb_pb_encoder_reset(upb_pb_encoder *e) {
8476
- e->segptr = NULL;
8477
- e->top = NULL;
8478
- e->depth = 0;
8945
+ // If this fails, increase the value in encoder.h.
8946
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
8947
+ return e;
8479
8948
  }
8480
8949
 
8481
8950
  upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
@@ -8500,26 +8969,26 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
8500
8969
  const upb_pbdecodermethod *decoder_m =
8501
8970
  upb_pbdecodermethod_new(&opts, &decoder_m);
8502
8971
 
8503
- upb_pbdecoder decoder;
8504
- upb_descreader reader;
8972
+ upb_env env;
8973
+ upb_env_init(&env);
8974
+ upb_env_reporterrorsto(&env, status);
8505
8975
 
8506
- upb_pbdecoder_init(&decoder, decoder_m, status);
8507
- upb_descreader_init(&reader, reader_h, status);
8508
- upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader));
8976
+ upb_descreader *reader = upb_descreader_create(&env, reader_h);
8977
+ upb_pbdecoder *decoder =
8978
+ upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
8509
8979
 
8510
8980
  // Push input data.
8511
- bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder));
8981
+ bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
8512
8982
 
8513
8983
  upb_def **ret = NULL;
8514
8984
 
8515
8985
  if (!ok) goto cleanup;
8516
- upb_def **defs = upb_descreader_getdefs(&reader, owner, n);
8986
+ upb_def **defs = upb_descreader_getdefs(reader, owner, n);
8517
8987
  ret = malloc(sizeof(upb_def*) * (*n));
8518
8988
  memcpy(ret, defs, sizeof(upb_def*) * (*n));
8519
8989
 
8520
8990
  cleanup:
8521
- upb_pbdecoder_uninit(&decoder);
8522
- upb_descreader_uninit(&reader);
8991
+ upb_env_uninit(&env);
8523
8992
  upb_handlers_unref(reader_h, &reader_h);
8524
8993
  upb_pbdecodermethod_unref(decoder_m, &decoder_m);
8525
8994
  return ret;
@@ -8584,6 +9053,14 @@ bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
8584
9053
  #include <string.h>
8585
9054
 
8586
9055
 
9056
+ struct upb_textprinter {
9057
+ upb_sink input_;
9058
+ upb_bytessink *output_;
9059
+ int indent_depth_;
9060
+ bool single_line_;
9061
+ void *subc;
9062
+ };
9063
+
8587
9064
  #define CHECK(x) if ((x) < 0) goto err;
8588
9065
 
8589
9066
  static const char *shortname(const char *longname) {
@@ -8801,24 +9278,6 @@ err:
8801
9278
  return false;
8802
9279
  }
8803
9280
 
8804
-
8805
- /* Public API *****************************************************************/
8806
-
8807
- void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) {
8808
- p->single_line_ = false;
8809
- p->indent_depth_ = 0;
8810
- upb_sink_reset(&p->input_, h, p);
8811
- }
8812
-
8813
- void upb_textprinter_uninit(upb_textprinter *p) {
8814
- UPB_UNUSED(p);
8815
- }
8816
-
8817
- void upb_textprinter_reset(upb_textprinter *p, bool single_line) {
8818
- p->single_line_ = single_line;
8819
- p->indent_depth_ = 0;
8820
- }
8821
-
8822
9281
  static void onmreg(const void *c, upb_handlers *h) {
8823
9282
  UPB_UNUSED(c);
8824
9283
  const upb_msgdef *m = upb_handlers_msgdef(h);
@@ -8878,6 +9337,26 @@ static void onmreg(const void *c, upb_handlers *h) {
8878
9337
  }
8879
9338
  }
8880
9339
 
9340
+ static void textprinter_reset(upb_textprinter *p, bool single_line) {
9341
+ p->single_line_ = single_line;
9342
+ p->indent_depth_ = 0;
9343
+ }
9344
+
9345
+
9346
+ /* Public API *****************************************************************/
9347
+
9348
+ upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9349
+ upb_bytessink *output) {
9350
+ upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9351
+ if (!p) return NULL;
9352
+
9353
+ p->output_ = output;
9354
+ upb_sink_reset(&p->input_, h, p);
9355
+ textprinter_reset(p, false);
9356
+
9357
+ return p;
9358
+ }
9359
+
8881
9360
  const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
8882
9361
  const void *owner) {
8883
9362
  return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
@@ -8885,11 +9364,6 @@ const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
8885
9364
 
8886
9365
  upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
8887
9366
 
8888
- bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) {
8889
- p->output_ = output;
8890
- return true;
8891
- }
8892
-
8893
9367
  void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
8894
9368
  p->single_line_ = single_line;
8895
9369
  }
@@ -9052,6 +9526,71 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9052
9526
  #include <errno.h>
9053
9527
 
9054
9528
 
9529
+ #define UPB_JSON_MAX_DEPTH 64
9530
+
9531
+ typedef struct {
9532
+ upb_sink sink;
9533
+
9534
+ // The current message in which we're parsing, and the field whose value we're
9535
+ // expecting next.
9536
+ const upb_msgdef *m;
9537
+ const upb_fielddef *f;
9538
+
9539
+ // We are in a repeated-field context, ready to emit mapentries as
9540
+ // submessages. This flag alters the start-of-object (open-brace) behavior to
9541
+ // begin a sequence of mapentry messages rather than a single submessage.
9542
+ bool is_map;
9543
+
9544
+ // We are in a map-entry message context. This flag is set when parsing the
9545
+ // value field of a single map entry and indicates to all value-field parsers
9546
+ // (subobjects, strings, numbers, and bools) that the map-entry submessage
9547
+ // should end as soon as the value is parsed.
9548
+ bool is_mapentry;
9549
+
9550
+ // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9551
+ // message's map field that we're currently parsing. This differs from |f|
9552
+ // because |f| is the field in the *current* message (i.e., the map-entry
9553
+ // message itself), not the parent's field that leads to this map.
9554
+ const upb_fielddef *mapfield;
9555
+ } upb_jsonparser_frame;
9556
+
9557
+ struct upb_json_parser {
9558
+ upb_env *env;
9559
+ upb_byteshandler input_handler_;
9560
+ upb_bytessink input_;
9561
+
9562
+ // Stack to track the JSON scopes we are in.
9563
+ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9564
+ upb_jsonparser_frame *top;
9565
+ upb_jsonparser_frame *limit;
9566
+
9567
+ upb_status *status;
9568
+
9569
+ // Ragel's internal parsing stack for the parsing state machine.
9570
+ int current_state;
9571
+ int parser_stack[UPB_JSON_MAX_DEPTH];
9572
+ int parser_top;
9573
+
9574
+ // The handle for the current buffer.
9575
+ const upb_bufhandle *handle;
9576
+
9577
+ // Accumulate buffer. See details in parser.rl.
9578
+ const char *accumulated;
9579
+ size_t accumulated_len;
9580
+ char *accumulate_buf;
9581
+ size_t accumulate_buf_size;
9582
+
9583
+ // Multi-part text data. See details in parser.rl.
9584
+ int multipart_state;
9585
+ upb_selector_t string_selector;
9586
+
9587
+ // Input capture. See details in parser.rl.
9588
+ const char *capture;
9589
+
9590
+ // Intermediate result of parsing a unicode escape sequence.
9591
+ uint32_t digit;
9592
+ };
9593
+
9055
9594
  #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9056
9595
 
9057
9596
  // Used to signal that a capture has been suspended.
@@ -9254,12 +9793,13 @@ static void accumulate_clear(upb_json_parser *p) {
9254
9793
 
9255
9794
  // Used internally by accumulate_append().
9256
9795
  static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9257
- size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
9796
+ size_t old_size = p->accumulate_buf_size;
9797
+ size_t new_size = UPB_MAX(old_size, 128);
9258
9798
  while (new_size < need) {
9259
9799
  new_size = saturating_multiply(new_size, 2);
9260
9800
  }
9261
9801
 
9262
- void *mem = realloc(p->accumulate_buf, new_size);
9802
+ void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
9263
9803
  if (!mem) {
9264
9804
  upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
9265
9805
  return false;
@@ -10008,11 +10548,11 @@ static void end_object(upb_json_parser *p) {
10008
10548
  // final state once, when the closing '"' is seen.
10009
10549
 
10010
10550
 
10011
- #line 1085 "upb/json/parser.rl"
10551
+ #line 1151 "upb/json/parser.rl"
10012
10552
 
10013
10553
 
10014
10554
 
10015
- #line 997 "upb/json/parser.c"
10555
+ #line 1063 "upb/json/parser.c"
10016
10556
  static const char _json_actions[] = {
10017
10557
  0, 1, 0, 1, 2, 1, 3, 1,
10018
10558
  5, 1, 6, 1, 7, 1, 8, 1,
@@ -10154,8 +10694,6 @@ static const char _json_trans_actions[] = {
10154
10694
  };
10155
10695
 
10156
10696
  static const int json_start = 1;
10157
- static const int json_first_final = 56;
10158
- static const int json_error = 0;
10159
10697
 
10160
10698
  static const int json_en_number_machine = 10;
10161
10699
  static const int json_en_string_machine = 19;
@@ -10163,7 +10701,7 @@ static const int json_en_value_machine = 27;
10163
10701
  static const int json_en_main = 1;
10164
10702
 
10165
10703
 
10166
- #line 1088 "upb/json/parser.rl"
10704
+ #line 1154 "upb/json/parser.rl"
10167
10705
 
10168
10706
  size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10169
10707
  const upb_bufhandle *handle) {
@@ -10183,7 +10721,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10183
10721
  capture_resume(parser, buf);
10184
10722
 
10185
10723
 
10186
- #line 1168 "upb/json/parser.c"
10724
+ #line 1232 "upb/json/parser.c"
10187
10725
  {
10188
10726
  int _klen;
10189
10727
  unsigned int _trans;
@@ -10258,118 +10796,118 @@ _match:
10258
10796
  switch ( *_acts++ )
10259
10797
  {
10260
10798
  case 0:
10261
- #line 1000 "upb/json/parser.rl"
10799
+ #line 1066 "upb/json/parser.rl"
10262
10800
  { p--; {cs = stack[--top]; goto _again;} }
10263
10801
  break;
10264
10802
  case 1:
10265
- #line 1001 "upb/json/parser.rl"
10803
+ #line 1067 "upb/json/parser.rl"
10266
10804
  { p--; {stack[top++] = cs; cs = 10; goto _again;} }
10267
10805
  break;
10268
10806
  case 2:
10269
- #line 1005 "upb/json/parser.rl"
10807
+ #line 1071 "upb/json/parser.rl"
10270
10808
  { start_text(parser, p); }
10271
10809
  break;
10272
10810
  case 3:
10273
- #line 1006 "upb/json/parser.rl"
10811
+ #line 1072 "upb/json/parser.rl"
10274
10812
  { CHECK_RETURN_TOP(end_text(parser, p)); }
10275
10813
  break;
10276
10814
  case 4:
10277
- #line 1012 "upb/json/parser.rl"
10815
+ #line 1078 "upb/json/parser.rl"
10278
10816
  { start_hex(parser); }
10279
10817
  break;
10280
10818
  case 5:
10281
- #line 1013 "upb/json/parser.rl"
10819
+ #line 1079 "upb/json/parser.rl"
10282
10820
  { hexdigit(parser, p); }
10283
10821
  break;
10284
10822
  case 6:
10285
- #line 1014 "upb/json/parser.rl"
10823
+ #line 1080 "upb/json/parser.rl"
10286
10824
  { CHECK_RETURN_TOP(end_hex(parser)); }
10287
10825
  break;
10288
10826
  case 7:
10289
- #line 1020 "upb/json/parser.rl"
10827
+ #line 1086 "upb/json/parser.rl"
10290
10828
  { CHECK_RETURN_TOP(escape(parser, p)); }
10291
10829
  break;
10292
10830
  case 8:
10293
- #line 1026 "upb/json/parser.rl"
10831
+ #line 1092 "upb/json/parser.rl"
10294
10832
  { p--; {cs = stack[--top]; goto _again;} }
10295
10833
  break;
10296
10834
  case 9:
10297
- #line 1029 "upb/json/parser.rl"
10835
+ #line 1095 "upb/json/parser.rl"
10298
10836
  { {stack[top++] = cs; cs = 19; goto _again;} }
10299
10837
  break;
10300
10838
  case 10:
10301
- #line 1031 "upb/json/parser.rl"
10839
+ #line 1097 "upb/json/parser.rl"
10302
10840
  { p--; {stack[top++] = cs; cs = 27; goto _again;} }
10303
10841
  break;
10304
10842
  case 11:
10305
- #line 1036 "upb/json/parser.rl"
10843
+ #line 1102 "upb/json/parser.rl"
10306
10844
  { start_member(parser); }
10307
10845
  break;
10308
10846
  case 12:
10309
- #line 1037 "upb/json/parser.rl"
10847
+ #line 1103 "upb/json/parser.rl"
10310
10848
  { CHECK_RETURN_TOP(end_membername(parser)); }
10311
10849
  break;
10312
10850
  case 13:
10313
- #line 1040 "upb/json/parser.rl"
10851
+ #line 1106 "upb/json/parser.rl"
10314
10852
  { end_member(parser); }
10315
10853
  break;
10316
10854
  case 14:
10317
- #line 1046 "upb/json/parser.rl"
10855
+ #line 1112 "upb/json/parser.rl"
10318
10856
  { start_object(parser); }
10319
10857
  break;
10320
10858
  case 15:
10321
- #line 1049 "upb/json/parser.rl"
10859
+ #line 1115 "upb/json/parser.rl"
10322
10860
  { end_object(parser); }
10323
10861
  break;
10324
10862
  case 16:
10325
- #line 1055 "upb/json/parser.rl"
10863
+ #line 1121 "upb/json/parser.rl"
10326
10864
  { CHECK_RETURN_TOP(start_array(parser)); }
10327
10865
  break;
10328
10866
  case 17:
10329
- #line 1059 "upb/json/parser.rl"
10867
+ #line 1125 "upb/json/parser.rl"
10330
10868
  { end_array(parser); }
10331
10869
  break;
10332
10870
  case 18:
10333
- #line 1064 "upb/json/parser.rl"
10871
+ #line 1130 "upb/json/parser.rl"
10334
10872
  { start_number(parser, p); }
10335
10873
  break;
10336
10874
  case 19:
10337
- #line 1065 "upb/json/parser.rl"
10875
+ #line 1131 "upb/json/parser.rl"
10338
10876
  { CHECK_RETURN_TOP(end_number(parser, p)); }
10339
10877
  break;
10340
10878
  case 20:
10341
- #line 1067 "upb/json/parser.rl"
10879
+ #line 1133 "upb/json/parser.rl"
10342
10880
  { CHECK_RETURN_TOP(start_stringval(parser)); }
10343
10881
  break;
10344
10882
  case 21:
10345
- #line 1068 "upb/json/parser.rl"
10883
+ #line 1134 "upb/json/parser.rl"
10346
10884
  { CHECK_RETURN_TOP(end_stringval(parser)); }
10347
10885
  break;
10348
10886
  case 22:
10349
- #line 1070 "upb/json/parser.rl"
10887
+ #line 1136 "upb/json/parser.rl"
10350
10888
  { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
10351
10889
  break;
10352
10890
  case 23:
10353
- #line 1072 "upb/json/parser.rl"
10891
+ #line 1138 "upb/json/parser.rl"
10354
10892
  { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
10355
10893
  break;
10356
10894
  case 24:
10357
- #line 1074 "upb/json/parser.rl"
10895
+ #line 1140 "upb/json/parser.rl"
10358
10896
  { /* null value */ }
10359
10897
  break;
10360
10898
  case 25:
10361
- #line 1076 "upb/json/parser.rl"
10899
+ #line 1142 "upb/json/parser.rl"
10362
10900
  { CHECK_RETURN_TOP(start_subobject(parser)); }
10363
10901
  break;
10364
10902
  case 26:
10365
- #line 1077 "upb/json/parser.rl"
10903
+ #line 1143 "upb/json/parser.rl"
10366
10904
  { end_subobject(parser); }
10367
10905
  break;
10368
10906
  case 27:
10369
- #line 1082 "upb/json/parser.rl"
10907
+ #line 1148 "upb/json/parser.rl"
10370
10908
  { p--; {cs = stack[--top]; goto _again;} }
10371
10909
  break;
10372
- #line 1354 "upb/json/parser.c"
10910
+ #line 1418 "upb/json/parser.c"
10373
10911
  }
10374
10912
  }
10375
10913
 
@@ -10382,7 +10920,7 @@ _again:
10382
10920
  _out: {}
10383
10921
  }
10384
10922
 
10385
- #line 1107 "upb/json/parser.rl"
10923
+ #line 1173 "upb/json/parser.rl"
10386
10924
 
10387
10925
  if (p != pe) {
10388
10926
  upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@@ -10401,29 +10939,17 @@ error:
10401
10939
  bool end(void *closure, const void *hd) {
10402
10940
  UPB_UNUSED(closure);
10403
10941
  UPB_UNUSED(hd);
10404
- return true;
10405
- }
10406
10942
 
10407
-
10408
- /* Public API *****************************************************************/
10409
-
10410
- void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
10411
- p->limit = p->stack + UPB_JSON_MAX_DEPTH;
10412
- p->accumulate_buf = NULL;
10413
- p->accumulate_buf_size = 0;
10414
- upb_byteshandler_init(&p->input_handler_);
10415
- upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
10416
- upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
10417
- upb_bytessink_reset(&p->input_, &p->input_handler_, p);
10418
- p->status = status;
10419
- }
10420
-
10421
- void upb_json_parser_uninit(upb_json_parser *p) {
10422
- upb_byteshandler_uninit(&p->input_handler_);
10423
- free(p->accumulate_buf);
10943
+ // Prevent compile warning on unused static constants.
10944
+ UPB_UNUSED(json_start);
10945
+ UPB_UNUSED(json_en_number_machine);
10946
+ UPB_UNUSED(json_en_string_machine);
10947
+ UPB_UNUSED(json_en_value_machine);
10948
+ UPB_UNUSED(json_en_main);
10949
+ return true;
10424
10950
  }
10425
10951
 
10426
- void upb_json_parser_reset(upb_json_parser *p) {
10952
+ static void json_parser_reset(upb_json_parser *p) {
10427
10953
  p->top = p->stack;
10428
10954
  p->top->f = NULL;
10429
10955
  p->top->is_map = false;
@@ -10433,25 +10959,48 @@ void upb_json_parser_reset(upb_json_parser *p) {
10433
10959
  int top;
10434
10960
  // Emit Ragel initialization of the parser.
10435
10961
 
10436
- #line 1418 "upb/json/parser.c"
10962
+ #line 1470 "upb/json/parser.c"
10437
10963
  {
10438
10964
  cs = json_start;
10439
10965
  top = 0;
10440
10966
  }
10441
10967
 
10442
- #line 1157 "upb/json/parser.rl"
10968
+ #line 1211 "upb/json/parser.rl"
10443
10969
  p->current_state = cs;
10444
10970
  p->parser_top = top;
10445
10971
  accumulate_clear(p);
10446
10972
  p->multipart_state = MULTIPART_INACTIVE;
10447
10973
  p->capture = NULL;
10974
+ p->accumulated = NULL;
10448
10975
  }
10449
10976
 
10450
- void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
10451
- upb_json_parser_reset(p);
10452
- upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
10453
- p->top->m = upb_handlers_msgdef(sink->handlers);
10454
- p->accumulated = NULL;
10977
+
10978
+ /* Public API *****************************************************************/
10979
+
10980
+ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
10981
+ #ifndef NDEBUG
10982
+ const size_t size_before = upb_env_bytesallocated(env);
10983
+ #endif
10984
+ upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
10985
+ if (!p) return false;
10986
+
10987
+ p->env = env;
10988
+ p->limit = p->stack + UPB_JSON_MAX_DEPTH;
10989
+ p->accumulate_buf = NULL;
10990
+ p->accumulate_buf_size = 0;
10991
+ upb_byteshandler_init(&p->input_handler_);
10992
+ upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
10993
+ upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
10994
+ upb_bytessink_reset(&p->input_, &p->input_handler_, p);
10995
+
10996
+ json_parser_reset(p);
10997
+ upb_sink_reset(&p->top->sink, output->handlers, output->closure);
10998
+ p->top->m = upb_handlers_msgdef(output->handlers);
10999
+
11000
+ // If this fails, uncomment and increase the value in parser.h.
11001
+ // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
11002
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11003
+ return p;
10455
11004
  }
10456
11005
 
10457
11006
  upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
@@ -10473,6 +11022,27 @@ upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
10473
11022
  #include <string.h>
10474
11023
  #include <stdint.h>
10475
11024
 
11025
+ struct upb_json_printer {
11026
+ upb_sink input_;
11027
+ // BytesSink closure.
11028
+ void *subc_;
11029
+ upb_bytessink *output_;
11030
+
11031
+ // We track the depth so that we know when to emit startstr/endstr on the
11032
+ // output.
11033
+ int depth_;
11034
+
11035
+ // Have we emitted the first element? This state is necessary to emit commas
11036
+ // without leaving a trailing comma in arrays/maps. We keep this state per
11037
+ // frame depth.
11038
+ //
11039
+ // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11040
+ // We count frames (contexts in which we separate elements by commas) as both
11041
+ // repeated fields and messages (maps), and the worst case is a
11042
+ // message->repeated field->submessage->repeated field->... nesting.
11043
+ bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11044
+ };
11045
+
10476
11046
  // StringPiece; a pointer plus a length.
10477
11047
  typedef struct {
10478
11048
  const char *ptr;
@@ -10620,7 +11190,7 @@ static bool putkey(void *closure, const void *handler_data) {
10620
11190
  return true;
10621
11191
  }
10622
11192
 
10623
- #define CHKFMT(val) if ((val) == -1) return false;
11193
+ #define CHKFMT(val) if ((val) == (size_t)-1) return false;
10624
11194
  #define CHK(val) if (!(val)) return false;
10625
11195
 
10626
11196
  #define TYPE_HANDLERS(type, fmt_func) \
@@ -11189,25 +11759,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
11189
11759
  #undef TYPE
11190
11760
  }
11191
11761
 
11192
- /* Public API *****************************************************************/
11193
-
11194
- void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) {
11195
- p->output_ = NULL;
11762
+ static void json_printer_reset(upb_json_printer *p) {
11196
11763
  p->depth_ = 0;
11197
- upb_sink_reset(&p->input_, h, p);
11198
11764
  }
11199
11765
 
11200
- void upb_json_printer_uninit(upb_json_printer *p) {
11201
- UPB_UNUSED(p);
11202
- }
11203
11766
 
11204
- void upb_json_printer_reset(upb_json_printer *p) {
11205
- p->depth_ = 0;
11206
- }
11767
+ /* Public API *****************************************************************/
11768
+
11769
+ upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
11770
+ upb_bytessink *output) {
11771
+ #ifndef NDEBUG
11772
+ size_t size_before = upb_env_bytesallocated(e);
11773
+ #endif
11774
+
11775
+ upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
11776
+ if (!p) return NULL;
11207
11777
 
11208
- void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) {
11209
- upb_json_printer_reset(p);
11210
11778
  p->output_ = output;
11779
+ json_printer_reset(p);
11780
+ upb_sink_reset(&p->input_, h, p);
11781
+
11782
+ // If this fails, increase the value in printer.h.
11783
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
11784
+ return p;
11211
11785
  }
11212
11786
 
11213
11787
  upb_sink *upb_json_printer_input(upb_json_printer *p) {