google-protobuf 3.0.0.alpha.2.0 → 3.0.0.alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -161,8 +161,6 @@ extern VALUE cOneofBuilderContext;
161
161
  extern VALUE cEnumBuilderContext;
162
162
  extern VALUE cBuilder;
163
163
 
164
- extern const char* kDescriptorInstanceVar;
165
-
166
164
  // We forward-declare all of the Ruby method implementations here because we
167
165
  // sometimes call the methods directly across .c files, rather than going
168
166
  // through Ruby's method dispatching (e.g. during message parse). It's cleaner
@@ -361,19 +359,20 @@ extern VALUE cRepeatedField;
361
359
  RepeatedField* ruby_to_RepeatedField(VALUE value);
362
360
 
363
361
  VALUE RepeatedField_each(VALUE _self);
364
- VALUE RepeatedField_index(VALUE _self, VALUE _index);
362
+ VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self);
365
363
  void* RepeatedField_index_native(VALUE _self, int index);
366
364
  VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val);
367
365
  void RepeatedField_reserve(RepeatedField* self, int new_size);
368
366
  VALUE RepeatedField_push(VALUE _self, VALUE val);
369
367
  void RepeatedField_push_native(VALUE _self, void* data);
370
- VALUE RepeatedField_pop(VALUE _self);
368
+ VALUE RepeatedField_pop_one(VALUE _self);
371
369
  VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self);
372
370
  VALUE RepeatedField_replace(VALUE _self, VALUE list);
373
371
  VALUE RepeatedField_clear(VALUE _self);
374
372
  VALUE RepeatedField_length(VALUE _self);
375
373
  VALUE RepeatedField_dup(VALUE _self);
376
374
  VALUE RepeatedField_deep_copy(VALUE _self);
375
+ VALUE RepeatedField_to_ary(VALUE _self);
377
376
  VALUE RepeatedField_eq(VALUE _self, VALUE _other);
378
377
  VALUE RepeatedField_hash(VALUE _self);
379
378
  VALUE RepeatedField_inspect(VALUE _self);
@@ -497,11 +496,6 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb);
497
496
  VALUE Message_decode_json(VALUE klass, VALUE data);
498
497
  VALUE Message_encode_json(VALUE klass, VALUE msg_rb);
499
498
 
500
- VALUE Google_Protobuf_encode(VALUE self, VALUE msg_rb);
501
- VALUE Google_Protobuf_decode(VALUE self, VALUE klass, VALUE msg_rb);
502
- VALUE Google_Protobuf_encode_json(VALUE self, VALUE msg_rb);
503
- VALUE Google_Protobuf_decode_json(VALUE self, VALUE klass, VALUE msg_rb);
504
-
505
499
  VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj);
506
500
 
507
501
  VALUE build_module_from_enumdesc(EnumDescriptor* enumdef);
@@ -511,6 +505,10 @@ VALUE enum_resolve(VALUE self, VALUE sym);
511
505
  const upb_pbdecodermethod *new_fillmsg_decodermethod(
512
506
  Descriptor* descriptor, const void *owner);
513
507
 
508
+ // Maximum depth allowed during encoding, to avoid stack overflows due to
509
+ // cycles.
510
+ #define ENCODE_MAX_NESTING 63
511
+
514
512
  // -----------------------------------------------------------------------------
515
513
  // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
516
514
  // instances.
@@ -530,4 +528,6 @@ void check_upb_status(const upb_status* status, const char* msg);
530
528
  check_upb_status(&status, msg); \
531
529
  } while (0)
532
530
 
531
+ extern ID descriptor_instancevar_interned;
532
+
533
533
  #endif // __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__
@@ -47,6 +47,30 @@ RepeatedField* ruby_to_RepeatedField(VALUE _self) {
47
47
  return self;
48
48
  }
49
49
 
50
+ static int index_position(VALUE _index, RepeatedField* repeated_field) {
51
+ int index = NUM2INT(_index);
52
+ if (index < 0 && repeated_field->size > 0) {
53
+ index = repeated_field->size + index;
54
+ }
55
+ return index;
56
+ }
57
+
58
+ VALUE RepeatedField_subarray(VALUE _self, long beg, long len) {
59
+ RepeatedField* self = ruby_to_RepeatedField(_self);
60
+ int element_size = native_slot_size(self->field_type);
61
+ upb_fieldtype_t field_type = self->field_type;
62
+ VALUE field_type_class = self->field_type_class;
63
+
64
+ size_t off = beg * element_size;
65
+ VALUE ary = rb_ary_new2(len);
66
+ for (int i = beg; i < beg + len; i++, off += element_size) {
67
+ void* mem = ((uint8_t *)self->elements) + off;
68
+ VALUE elem = native_slot_get(field_type, field_type_class, mem);
69
+ rb_ary_push(ary, elem);
70
+ }
71
+ return ary;
72
+ }
73
+
50
74
  /*
51
75
  * call-seq:
52
76
  * RepeatedField.each(&block)
@@ -67,29 +91,58 @@ VALUE RepeatedField_each(VALUE _self) {
67
91
  VALUE val = native_slot_get(field_type, field_type_class, memory);
68
92
  rb_yield(val);
69
93
  }
70
- return Qnil;
94
+ return _self;
71
95
  }
72
96
 
97
+
73
98
  /*
74
99
  * call-seq:
75
100
  * RepeatedField.[](index) => value
76
101
  *
77
- * Accesses the element at the given index. Throws an exception on out-of-bounds
78
- * errors.
102
+ * Accesses the element at the given index. Returns nil on out-of-bounds
79
103
  */
80
- VALUE RepeatedField_index(VALUE _self, VALUE _index) {
104
+ VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self) {
81
105
  RepeatedField* self = ruby_to_RepeatedField(_self);
82
106
  int element_size = native_slot_size(self->field_type);
83
107
  upb_fieldtype_t field_type = self->field_type;
84
108
  VALUE field_type_class = self->field_type_class;
85
109
 
86
- int index = NUM2INT(_index);
87
- if (index < 0 || index >= self->size) {
88
- rb_raise(rb_eRangeError, "Index out of range");
110
+ VALUE arg = argv[0];
111
+ long beg, len;
112
+
113
+ if (argc == 1){
114
+ if (FIXNUM_P(arg)) {
115
+ /* standard case */
116
+ int index = index_position(argv[0], self);
117
+ if (index < 0 || index >= self->size) {
118
+ return Qnil;
119
+ }
120
+ void* memory = (void *) (((uint8_t *)self->elements) +
121
+ index * element_size);
122
+ return native_slot_get(field_type, field_type_class, memory);
123
+ }else{
124
+ /* check if idx is Range */
125
+ size_t off;
126
+ switch (rb_range_beg_len(arg, &beg, &len, self->size, 0)) {
127
+ case Qfalse:
128
+ break;
129
+ case Qnil:
130
+ return Qnil;
131
+ default:
132
+ return RepeatedField_subarray(_self, beg, len);
133
+ }
134
+ }
89
135
  }
90
-
91
- void* memory = (void *) (((uint8_t *)self->elements) + index * element_size);
92
- return native_slot_get(field_type, field_type_class, memory);
136
+ /* assume 2 arguments */
137
+ beg = NUM2LONG(argv[0]);
138
+ len = NUM2LONG(argv[1]);
139
+ if (beg < 0) {
140
+ beg += self->size;
141
+ }
142
+ if (beg >= self->size) {
143
+ return Qnil;
144
+ }
145
+ return RepeatedField_subarray(_self, beg, len);
93
146
  }
94
147
 
95
148
  /*
@@ -105,9 +158,9 @@ VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val) {
105
158
  VALUE field_type_class = self->field_type_class;
106
159
  int element_size = native_slot_size(field_type);
107
160
 
108
- int index = NUM2INT(_index);
161
+ int index = index_position(_index, self);
109
162
  if (index < 0 || index >= (INT_MAX - 1)) {
110
- rb_raise(rb_eRangeError, "Index out of range");
163
+ return Qnil;
111
164
  }
112
165
  if (index >= self->size) {
113
166
  RepeatedField_reserve(self, index + 1);
@@ -165,6 +218,7 @@ VALUE RepeatedField_push(VALUE _self, VALUE val) {
165
218
  return _self;
166
219
  }
167
220
 
221
+
168
222
  // Used by parsing handlers.
169
223
  void RepeatedField_push_native(VALUE _self, void* data) {
170
224
  RepeatedField* self = ruby_to_RepeatedField(_self);
@@ -185,19 +239,15 @@ void* RepeatedField_index_native(VALUE _self, int index) {
185
239
  }
186
240
 
187
241
  /*
188
- * call-seq:
189
- * RepeatedField.pop => value
190
- *
191
- * Removes the last element and returns it. Throws an exception if the repeated
192
- * field is empty.
242
+ * Private ruby method, used by RepeatedField.pop
193
243
  */
194
- VALUE RepeatedField_pop(VALUE _self) {
244
+ VALUE RepeatedField_pop_one(VALUE _self) {
195
245
  RepeatedField* self = ruby_to_RepeatedField(_self);
196
246
  upb_fieldtype_t field_type = self->field_type;
197
247
  VALUE field_type_class = self->field_type_class;
198
248
  int element_size = native_slot_size(field_type);
199
249
  if (self->size == 0) {
200
- rb_raise(rb_eRangeError, "Pop from empty repeated field is not allowed.");
250
+ return Qnil;
201
251
  }
202
252
  int index = self->size - 1;
203
253
  void* memory = (void *) (((uint8_t *)self->elements) + index * element_size);
@@ -206,19 +256,6 @@ VALUE RepeatedField_pop(VALUE _self) {
206
256
  return ret;
207
257
  }
208
258
 
209
- /*
210
- * call-seq:
211
- * RepeatedField.insert(*args)
212
- *
213
- * Pushes each arg in turn onto the end of the repeated field.
214
- */
215
- VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self) {
216
- for (int i = 0; i < argc; i++) {
217
- RepeatedField_push(_self, argv[i]);
218
- }
219
- return Qnil;
220
- }
221
-
222
259
  /*
223
260
  * call-seq:
224
261
  * RepeatedField.replace(list)
@@ -232,7 +269,7 @@ VALUE RepeatedField_replace(VALUE _self, VALUE list) {
232
269
  for (int i = 0; i < RARRAY_LEN(list); i++) {
233
270
  RepeatedField_push(_self, rb_ary_entry(list, i));
234
271
  }
235
- return Qnil;
272
+ return list;
236
273
  }
237
274
 
238
275
  /*
@@ -244,7 +281,7 @@ VALUE RepeatedField_replace(VALUE _self, VALUE list) {
244
281
  VALUE RepeatedField_clear(VALUE _self) {
245
282
  RepeatedField* self = ruby_to_RepeatedField(_self);
246
283
  self->size = 0;
247
- return Qnil;
284
+ return _self;
248
285
  }
249
286
 
250
287
  /*
@@ -333,7 +370,6 @@ VALUE RepeatedField_to_ary(VALUE _self) {
333
370
  for (int i = 0; i < self->size; i++, off += elem_size) {
334
371
  void* mem = ((uint8_t *)self->elements) + off;
335
372
  VALUE elem = native_slot_get(field_type, self->field_type_class, mem);
336
-
337
373
  rb_ary_push(ary, elem);
338
374
  }
339
375
  return ary;
@@ -409,19 +445,6 @@ VALUE RepeatedField_hash(VALUE _self) {
409
445
  return hash;
410
446
  }
411
447
 
412
- /*
413
- * call-seq:
414
- * RepeatedField.inspect => string
415
- *
416
- * Returns a string representing this repeated field's elements. It will be
417
- * formated as "[<element>, <element>, ...]", with each element's string
418
- * representation computed by its own #inspect method.
419
- */
420
- VALUE RepeatedField_inspect(VALUE _self) {
421
- VALUE self_ary = RepeatedField_to_ary(_self);
422
- return rb_funcall(self_ary, rb_intern("inspect"), 0);
423
- }
424
-
425
448
  /*
426
449
  * call-seq:
427
450
  * RepeatedField.+(other) => repeated field
@@ -458,14 +481,30 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) {
458
481
  return dupped;
459
482
  }
460
483
 
484
+ /*
485
+ * call-seq:
486
+ * RepeatedField.concat(other) => self
487
+ *
488
+ * concats the passed in array to self. Returns a Ruby array.
489
+ */
490
+ VALUE RepeatedField_concat(VALUE _self, VALUE list) {
491
+ RepeatedField* self = ruby_to_RepeatedField(_self);
492
+ Check_Type(list, T_ARRAY);
493
+ for (int i = 0; i < RARRAY_LEN(list); i++) {
494
+ RepeatedField_push(_self, rb_ary_entry(list, i));
495
+ }
496
+ return _self;
497
+ }
498
+
499
+
461
500
  void validate_type_class(upb_fieldtype_t type, VALUE klass) {
462
- if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) {
501
+ if (rb_ivar_get(klass, descriptor_instancevar_interned) == Qnil) {
463
502
  rb_raise(rb_eArgError,
464
503
  "Type class has no descriptor. Please pass a "
465
504
  "class or enum as returned by the DescriptorPool.");
466
505
  }
467
506
  if (type == UPB_TYPE_MESSAGE) {
468
- VALUE desc = rb_iv_get(klass, kDescriptorInstanceVar);
507
+ VALUE desc = rb_ivar_get(klass, descriptor_instancevar_interned);
469
508
  if (!RB_TYPE_P(desc, T_DATA) || !RTYPEDDATA_P(desc) ||
470
509
  RTYPEDDATA_TYPE(desc) != &_Descriptor_type) {
471
510
  rb_raise(rb_eArgError, "Descriptor has an incorrect type.");
@@ -475,7 +514,7 @@ void validate_type_class(upb_fieldtype_t type, VALUE klass) {
475
514
  "Message class was not returned by the DescriptorPool.");
476
515
  }
477
516
  } else if (type == UPB_TYPE_ENUM) {
478
- VALUE enumdesc = rb_iv_get(klass, kDescriptorInstanceVar);
517
+ VALUE enumdesc = rb_ivar_get(klass, descriptor_instancevar_interned);
479
518
  if (!RB_TYPE_P(enumdesc, T_DATA) || !RTYPEDDATA_P(enumdesc) ||
480
519
  RTYPEDDATA_TYPE(enumdesc) != &_EnumDescriptor_type) {
481
520
  rb_raise(rb_eArgError, "Descriptor has an incorrect type.");
@@ -577,22 +616,23 @@ void RepeatedField_register(VALUE module) {
577
616
  rb_define_method(klass, "initialize",
578
617
  RepeatedField_init, -1);
579
618
  rb_define_method(klass, "each", RepeatedField_each, 0);
580
- rb_define_method(klass, "[]", RepeatedField_index, 1);
619
+ rb_define_method(klass, "[]", RepeatedField_index, -1);
620
+ rb_define_method(klass, "at", RepeatedField_index, -1);
581
621
  rb_define_method(klass, "[]=", RepeatedField_index_set, 2);
582
622
  rb_define_method(klass, "push", RepeatedField_push, 1);
583
623
  rb_define_method(klass, "<<", RepeatedField_push, 1);
584
- rb_define_method(klass, "pop", RepeatedField_pop, 0);
585
- rb_define_method(klass, "insert", RepeatedField_insert, -1);
624
+ rb_define_private_method(klass, "pop_one", RepeatedField_pop_one, 0);
586
625
  rb_define_method(klass, "replace", RepeatedField_replace, 1);
587
626
  rb_define_method(klass, "clear", RepeatedField_clear, 0);
588
627
  rb_define_method(klass, "length", RepeatedField_length, 0);
628
+ rb_define_method(klass, "size", RepeatedField_length, 0);
589
629
  rb_define_method(klass, "dup", RepeatedField_dup, 0);
590
630
  // Also define #clone so that we don't inherit Object#clone.
591
631
  rb_define_method(klass, "clone", RepeatedField_dup, 0);
592
632
  rb_define_method(klass, "==", RepeatedField_eq, 1);
593
633
  rb_define_method(klass, "to_ary", RepeatedField_to_ary, 0);
594
634
  rb_define_method(klass, "hash", RepeatedField_hash, 0);
595
- rb_define_method(klass, "inspect", RepeatedField_inspect, 0);
596
635
  rb_define_method(klass, "+", RepeatedField_plus, 1);
636
+ rb_define_method(klass, "concat", RepeatedField_concat, 1);
597
637
  rb_include_module(klass, rb_mEnumerable);
598
638
  }
@@ -155,7 +155,9 @@ void native_slot_set_value_and_case(upb_fieldtype_t type, VALUE type_class,
155
155
  break;
156
156
  }
157
157
  case UPB_TYPE_MESSAGE: {
158
- if (CLASS_OF(value) != type_class) {
158
+ if (CLASS_OF(value) == CLASS_OF(Qnil)) {
159
+ value = Qnil;
160
+ } else if (CLASS_OF(value) != type_class) {
159
161
  rb_raise(rb_eTypeError,
160
162
  "Invalid type %s to assign to submessage field.",
161
163
  rb_class2name(CLASS_OF(value)));
@@ -413,7 +415,8 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) {
413
415
  // Align current offset up to |size| granularity.
414
416
  off = align_up_to(off, field_size);
415
417
  layout->fields[upb_fielddef_index(field)].offset = off;
416
- layout->fields[upb_fielddef_index(field)].case_offset = MESSAGE_FIELD_NO_CASE;
418
+ layout->fields[upb_fielddef_index(field)].case_offset =
419
+ MESSAGE_FIELD_NO_CASE;
417
420
  off += field_size;
418
421
  }
419
422
 
@@ -1779,6 +1779,275 @@ upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1779
1779
  void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1780
1780
  upb_inttable_iter_setdone(iter);
1781
1781
  }
1782
+ /*
1783
+ * upb - a minimalist implementation of protocol buffers.
1784
+ *
1785
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
1786
+ * Author: Josh Haberman <jhaberman@gmail.com>
1787
+ */
1788
+
1789
+
1790
+ #include <stdlib.h>
1791
+ #include <stdio.h>
1792
+ #include <string.h>
1793
+
1794
+ typedef struct cleanup_ent {
1795
+ upb_cleanup_func *cleanup;
1796
+ void *ud;
1797
+ struct cleanup_ent *next;
1798
+ } cleanup_ent;
1799
+
1800
+ static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1801
+
1802
+ /* Default allocator **********************************************************/
1803
+
1804
+ // Just use realloc, keeping all allocated blocks in a linked list to destroy at
1805
+ // the end.
1806
+
1807
+ typedef struct mem_block {
1808
+ // List is doubly-linked, because in cases where realloc() moves an existing
1809
+ // block, we need to be able to remove the old pointer from the list
1810
+ // efficiently.
1811
+ struct mem_block *prev, *next;
1812
+ #ifndef NDEBUG
1813
+ size_t size; // Doesn't include mem_block structure.
1814
+ #endif
1815
+ char data[];
1816
+ } mem_block;
1817
+
1818
+ typedef struct {
1819
+ mem_block *head;
1820
+ } default_alloc_ud;
1821
+
1822
+ static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
1823
+ UPB_UNUSED(oldsize);
1824
+ default_alloc_ud *ud = _ud;
1825
+
1826
+ mem_block *from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
1827
+
1828
+ #ifndef NDEBUG
1829
+ if (from) {
1830
+ assert(oldsize <= from->size);
1831
+ }
1832
+ #endif
1833
+
1834
+ mem_block *block = realloc(from, size + sizeof(mem_block));
1835
+ if (!block) return NULL;
1836
+
1837
+ #ifndef NDEBUG
1838
+ block->size = size;
1839
+ #endif
1840
+
1841
+ if (from) {
1842
+ if (block != from) {
1843
+ // The block was moved, so pointers in next and prev blocks must be
1844
+ // updated to its new location.
1845
+ if (block->next) block->next->prev = block;
1846
+ if (block->prev) block->prev->next = block;
1847
+ }
1848
+ } else {
1849
+ // Insert at head of linked list.
1850
+ block->prev = NULL;
1851
+ block->next = ud->head;
1852
+ if (block->next) block->next->prev = block;
1853
+ ud->head = block;
1854
+ }
1855
+
1856
+ return &block->data;
1857
+ }
1858
+
1859
+ static void default_alloc_cleanup(void *_ud) {
1860
+ default_alloc_ud *ud = _ud;
1861
+ mem_block *block = ud->head;
1862
+
1863
+ while (block) {
1864
+ void *to_free = block;
1865
+ block = block->next;
1866
+ free(to_free);
1867
+ }
1868
+ }
1869
+
1870
+
1871
+ /* Standard error functions ***************************************************/
1872
+
1873
+ static bool default_err(void *ud, const upb_status *status) {
1874
+ UPB_UNUSED(ud);
1875
+ fprintf(stderr, "upb error: %s\n", upb_status_errmsg(status));
1876
+ return false;
1877
+ }
1878
+
1879
+ static bool write_err_to(void *ud, const upb_status *status) {
1880
+ upb_status *copy_to = ud;
1881
+ upb_status_copy(copy_to, status);
1882
+ return false;
1883
+ }
1884
+
1885
+
1886
+ /* upb_env ********************************************************************/
1887
+
1888
+ void upb_env_init(upb_env *e) {
1889
+ e->ok_ = true;
1890
+ e->bytes_allocated = 0;
1891
+ e->cleanup_head = NULL;
1892
+
1893
+ default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
1894
+ ud->head = NULL;
1895
+
1896
+ // Set default functions.
1897
+ upb_env_setallocfunc(e, default_alloc, ud);
1898
+ upb_env_seterrorfunc(e, default_err, NULL);
1899
+ }
1900
+
1901
+ void upb_env_uninit(upb_env *e) {
1902
+ cleanup_ent *ent = e->cleanup_head;
1903
+
1904
+ while (ent) {
1905
+ ent->cleanup(ent->ud);
1906
+ ent = ent->next;
1907
+ }
1908
+
1909
+ // Must do this after running cleanup functions, because this will delete
1910
+ // the memory we store our cleanup entries in!
1911
+ if (e->alloc == default_alloc) {
1912
+ default_alloc_cleanup(e->alloc_ud);
1913
+ }
1914
+ }
1915
+
1916
+ UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1917
+ void *ud) {
1918
+ e->alloc = alloc;
1919
+ e->alloc_ud = ud;
1920
+ }
1921
+
1922
+ UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1923
+ void *ud) {
1924
+ e->err = func;
1925
+ e->err_ud = ud;
1926
+ }
1927
+
1928
+ void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1929
+ e->err = write_err_to;
1930
+ e->err_ud = status;
1931
+ }
1932
+
1933
+ bool upb_env_ok(const upb_env *e) {
1934
+ return e->ok_;
1935
+ }
1936
+
1937
+ bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1938
+ e->ok_ = false;
1939
+ return e->err(e->err_ud, status);
1940
+ }
1941
+
1942
+ bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1943
+ cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1944
+ if (!ent) return false;
1945
+
1946
+ ent->cleanup = func;
1947
+ ent->ud = ud;
1948
+ ent->next = e->cleanup_head;
1949
+ e->cleanup_head = ent;
1950
+
1951
+ return true;
1952
+ }
1953
+
1954
+ void *upb_env_malloc(upb_env *e, size_t size) {
1955
+ e->bytes_allocated += size;
1956
+ if (e->alloc == seeded_alloc) {
1957
+ // This is equivalent to the next branch, but allows inlining for a
1958
+ // measurable perf benefit.
1959
+ return seeded_alloc(e->alloc_ud, NULL, 0, size);
1960
+ } else {
1961
+ return e->alloc(e->alloc_ud, NULL, 0, size);
1962
+ }
1963
+ }
1964
+
1965
+ void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
1966
+ assert(oldsize <= size);
1967
+ char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
1968
+
1969
+ #ifndef NDEBUG
1970
+ // Overwrite non-preserved memory to ensure callers are passing the oldsize
1971
+ // that they truly require.
1972
+ memset(ret + oldsize, 0xff, size - oldsize);
1973
+ #endif
1974
+
1975
+ return ret;
1976
+ }
1977
+
1978
+ size_t upb_env_bytesallocated(const upb_env *e) {
1979
+ return e->bytes_allocated;
1980
+ }
1981
+
1982
+
1983
+ /* upb_seededalloc ************************************************************/
1984
+
1985
+ // Be conservative and choose 16 in case anyone is using SSE.
1986
+ static const size_t maxalign = 16;
1987
+
1988
+ static size_t align_up(size_t size) {
1989
+ return ((size + maxalign - 1) / maxalign) * maxalign;
1990
+ }
1991
+
1992
+ UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1993
+ size_t size) {
1994
+ UPB_UNUSED(ptr);
1995
+
1996
+ upb_seededalloc *a = ud;
1997
+ size = align_up(size);
1998
+
1999
+ assert(a->mem_limit >= a->mem_ptr);
2000
+
2001
+ if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
2002
+ // Fast path: we can satisfy from the initial allocation.
2003
+ void *ret = a->mem_ptr;
2004
+ a->mem_ptr += size;
2005
+ return ret;
2006
+ } else {
2007
+ // Slow path: fallback to other allocator.
2008
+ a->need_cleanup = true;
2009
+ // Is `ptr` part of the user-provided initial block? Don't pass it to the
2010
+ // default allocator if so; otherwise, it may try to realloc() the block.
2011
+ char *chptr = ptr;
2012
+ if (chptr >= a->mem_base && chptr < a->mem_limit) {
2013
+ return a->alloc(a->alloc_ud, NULL, 0, size);
2014
+ } else {
2015
+ return a->alloc(a->alloc_ud, ptr, oldsize, size);
2016
+ }
2017
+ }
2018
+ }
2019
+
2020
+ void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
2021
+ a->mem_base = mem;
2022
+ a->mem_ptr = mem;
2023
+ a->mem_limit = (char*)mem + len;
2024
+ a->need_cleanup = false;
2025
+ a->returned_allocfunc = false;
2026
+
2027
+ default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
2028
+ ud->head = NULL;
2029
+
2030
+ upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
2031
+ }
2032
+
2033
+ void upb_seededalloc_uninit(upb_seededalloc *a) {
2034
+ if (a->alloc == default_alloc && a->need_cleanup) {
2035
+ default_alloc_cleanup(a->alloc_ud);
2036
+ }
2037
+ }
2038
+
2039
+ UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
2040
+ upb_alloc_func *alloc,
2041
+ void *ud) {
2042
+ assert(!a->returned_allocfunc);
2043
+ a->alloc = alloc;
2044
+ a->alloc_ud = ud;
2045
+ }
2046
+
2047
+ upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
2048
+ a->returned_allocfunc = true;
2049
+ return seeded_alloc;
2050
+ }
1782
2051
  /*
1783
2052
  * upb - a minimalist implementation of protocol buffers.
1784
2053
  *
@@ -1955,7 +2224,14 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
1955
2224
  if (closure_type && *context_closure_type &&
1956
2225
  closure_type != *context_closure_type) {
1957
2226
  // TODO(haberman): better message for debugging.
1958
- upb_status_seterrmsg(&h->status_, "closure type does not match");
2227
+ if (f) {
2228
+ upb_status_seterrf(&h->status_,
2229
+ "closure type does not match for field %s",
2230
+ upb_fielddef_name(f));
2231
+ } else {
2232
+ upb_status_seterrmsg(
2233
+ &h->status_, "closure type does not match for message-level handler");
2234
+ }
1959
2235
  return false;
1960
2236
  }
1961
2237
 
@@ -2353,7 +2629,7 @@ bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2353
2629
  *s = f->selector_base;
2354
2630
  break;
2355
2631
  }
2356
- assert(*s < upb_fielddef_containingtype(f)->selector_count);
2632
+ assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2357
2633
  return true;
2358
2634
  }
2359
2635
 
@@ -4295,7 +4571,7 @@ void upb_inttable_compact(upb_inttable *t) {
4295
4571
  counts[log2ceil(key)]++;
4296
4572
  }
4297
4573
 
4298
- int arr_size;
4574
+ size_t arr_size = 1;
4299
4575
  int arr_count = upb_inttable_count(t);
4300
4576
 
4301
4577
  if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
@@ -5522,6 +5798,54 @@ static upb_inttable reftables[212] = {
5522
5798
  #include <stdlib.h>
5523
5799
  #include <string.h>
5524
5800
 
5801
+ // upb_deflist is an internal-only dynamic array for storing a growing list of
5802
+ // upb_defs.
5803
+ typedef struct {
5804
+ upb_def **defs;
5805
+ size_t len;
5806
+ size_t size;
5807
+ bool owned;
5808
+ } upb_deflist;
5809
+
5810
+ // We keep a stack of all the messages scopes we are currently in, as well as
5811
+ // the top-level file scope. This is necessary to correctly qualify the
5812
+ // definitions that are contained inside. "name" tracks the name of the
5813
+ // message or package (a bare name -- not qualified by any enclosing scopes).
5814
+ typedef struct {
5815
+ char *name;
5816
+ // Index of the first def that is under this scope. For msgdefs, the
5817
+ // msgdef itself is at start-1.
5818
+ int start;
5819
+ } upb_descreader_frame;
5820
+
5821
+ // The maximum number of nested declarations that are allowed, ie.
5822
+ // message Foo {
5823
+ // message Bar {
5824
+ // message Baz {
5825
+ // }
5826
+ // }
5827
+ // }
5828
+ //
5829
+ // This is a resource limit that affects how big our runtime stack can grow.
5830
+ // TODO: make this a runtime-settable property of the Reader instance.
5831
+ #define UPB_MAX_MESSAGE_NESTING 64
5832
+
5833
+ struct upb_descreader {
5834
+ upb_sink sink;
5835
+ upb_deflist defs;
5836
+ upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
5837
+ int stack_len;
5838
+
5839
+ uint32_t number;
5840
+ char *name;
5841
+ bool saw_number;
5842
+ bool saw_name;
5843
+
5844
+ char *default_string;
5845
+
5846
+ upb_fielddef *f;
5847
+ };
5848
+
5525
5849
  static char *upb_strndup(const char *buf, size_t n) {
5526
5850
  char *ret = malloc(n + 1);
5527
5851
  if (!ret) return NULL;
@@ -5601,36 +5925,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
5601
5925
 
5602
5926
  /* upb_descreader ************************************************************/
5603
5927
 
5604
- void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
5605
- upb_status *status) {
5606
- UPB_UNUSED(status);
5607
- upb_deflist_init(&r->defs);
5608
- upb_sink_reset(upb_descreader_input(r), handlers, r);
5609
- r->stack_len = 0;
5610
- r->name = NULL;
5611
- r->default_string = NULL;
5612
- }
5613
-
5614
- void upb_descreader_uninit(upb_descreader *r) {
5615
- free(r->name);
5616
- upb_deflist_uninit(&r->defs);
5617
- free(r->default_string);
5618
- while (r->stack_len > 0) {
5619
- upb_descreader_frame *f = &r->stack[--r->stack_len];
5620
- free(f->name);
5621
- }
5622
- }
5623
-
5624
- upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
5625
- *n = r->defs.len;
5626
- upb_deflist_donaterefs(&r->defs, owner);
5627
- return r->defs.defs;
5628
- }
5629
-
5630
- upb_sink *upb_descreader_input(upb_descreader *r) {
5631
- return &r->sink;
5632
- }
5633
-
5634
5928
  static upb_msgdef *upb_descreader_top(upb_descreader *r) {
5635
5929
  assert(r->stack_len > 1);
5636
5930
  int index = r->stack[r->stack_len-1].start - 1;
@@ -5803,7 +6097,7 @@ static bool parse_default(char *str, upb_fielddef *f) {
5803
6097
  break;
5804
6098
  }
5805
6099
  case UPB_TYPE_UINT32: {
5806
- long val = strtoul(str, &end, 0);
6100
+ unsigned long val = strtoul(str, &end, 0);
5807
6101
  if (val > UINT32_MAX || errno == ERANGE || *end)
5808
6102
  success = false;
5809
6103
  else
@@ -6070,6 +6364,45 @@ static void reghandlers(const void *closure, upb_handlers *h) {
6070
6364
 
6071
6365
  #undef D
6072
6366
 
6367
+ void descreader_cleanup(void *_r) {
6368
+ upb_descreader *r = _r;
6369
+ free(r->name);
6370
+ upb_deflist_uninit(&r->defs);
6371
+ free(r->default_string);
6372
+ while (r->stack_len > 0) {
6373
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
6374
+ free(f->name);
6375
+ }
6376
+ }
6377
+
6378
+
6379
+ /* Public API ****************************************************************/
6380
+
6381
+ upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6382
+ upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6383
+ if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6384
+ return NULL;
6385
+ }
6386
+
6387
+ upb_deflist_init(&r->defs);
6388
+ upb_sink_reset(upb_descreader_input(r), h, r);
6389
+ r->stack_len = 0;
6390
+ r->name = NULL;
6391
+ r->default_string = NULL;
6392
+
6393
+ return r;
6394
+ }
6395
+
6396
+ upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6397
+ *n = r->defs.len;
6398
+ upb_deflist_donaterefs(&r->defs, owner);
6399
+ return r->defs.defs;
6400
+ }
6401
+
6402
+ upb_sink *upb_descreader_input(upb_descreader *r) {
6403
+ return &r->sink;
6404
+ }
6405
+
6073
6406
  const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6074
6407
  const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6075
6408
  const upb_handlers *h = upb_handlers_newfrozen(
@@ -6141,7 +6474,6 @@ mgroup *newgroup(const void *owner) {
6141
6474
 
6142
6475
  static void freemethod(upb_refcounted *r) {
6143
6476
  upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
6144
- upb_byteshandler_uninit(&method->input_handler_);
6145
6477
 
6146
6478
  if (method->dest_handlers_) {
6147
6479
  upb_handlers_unref(method->dest_handlers_, method);
@@ -7073,10 +7405,7 @@ void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7073
7405
  */
7074
7406
 
7075
7407
  #include <inttypes.h>
7076
- #include <setjmp.h>
7077
- #include <stdarg.h>
7078
7408
  #include <stddef.h>
7079
- #include <stdlib.h>
7080
7409
 
7081
7410
  #ifdef UPB_DUMP_BYTECODE
7082
7411
  #include <stdio.h>
@@ -7122,18 +7451,17 @@ static bool consumes_input(opcode op) {
7122
7451
 
7123
7452
  static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7124
7453
 
7125
- // It's unfortunate that we have to micro-manage the compiler this way,
7126
- // especially since this tuning is necessarily specific to one hardware
7127
- // configuration. But emperically on a Core i7, performance increases 30-50%
7128
- // with these annotations. Every instance where these appear, gcc 4.2.1 made
7129
- // the wrong decision and degraded performance in benchmarks.
7130
- #define FORCEINLINE static inline __attribute__((always_inline))
7131
- #define NOINLINE __attribute__((noinline))
7454
+ // It's unfortunate that we have to micro-manage the compiler with
7455
+ // UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7456
+ // specific to one hardware configuration. But empirically on a Core i7,
7457
+ // performance increases 30-50% with these annotations. Every instance where
7458
+ // these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7459
+ // benchmarks.
7132
7460
 
7133
7461
  static void seterr(upb_pbdecoder *d, const char *msg) {
7134
- // TODO(haberman): encapsulate this access to pipeline->status, but not sure
7135
- // exactly what that interface should look like.
7136
- upb_status_seterrmsg(d->status, msg);
7462
+ upb_status status = UPB_STATUS_INIT;
7463
+ upb_status_seterrmsg(&status, msg);
7464
+ upb_env_reporterror(d->env, &status);
7137
7465
  }
7138
7466
 
7139
7467
  void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
@@ -7176,7 +7504,7 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7176
7504
  // and the parsing stack, so must be called whenever either is updated.
7177
7505
  static void set_delim_end(upb_pbdecoder *d) {
7178
7506
  size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7179
- if (delim_ofs <= (d->end - d->buf)) {
7507
+ if (delim_ofs <= (size_t)(d->end - d->buf)) {
7180
7508
  d->delim_end = d->buf + delim_ofs;
7181
7509
  d->data_end = d->delim_end;
7182
7510
  } else {
@@ -7301,7 +7629,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7301
7629
 
7302
7630
  // Copies the next "bytes" bytes into "buf" and advances the stream.
7303
7631
  // Requires that this many bytes are available in the current buffer.
7304
- FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7632
+ UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7633
+ size_t bytes) {
7305
7634
  assert(bytes <= curbufleft(d));
7306
7635
  memcpy(buf, d->ptr, bytes);
7307
7636
  advance(d, bytes);
@@ -7310,8 +7639,8 @@ FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7310
7639
  // Slow path for getting the next "bytes" bytes, regardless of whether they are
7311
7640
  // available in the current buffer or not. Returns a status code as described
7312
7641
  // in decoder.int.h.
7313
- static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7314
- size_t bytes) {
7642
+ UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7643
+ size_t bytes) {
7315
7644
  const size_t avail = curbufleft(d);
7316
7645
  consumebytes(d, buf, avail);
7317
7646
  bytes -= avail;
@@ -7320,7 +7649,7 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7320
7649
  advancetobuf(d, d->buf_param, d->size_param);
7321
7650
  }
7322
7651
  if (curbufleft(d) >= bytes) {
7323
- consumebytes(d, buf + avail, bytes);
7652
+ consumebytes(d, (char *)buf + avail, bytes);
7324
7653
  return DECODE_OK;
7325
7654
  } else if (d->data_end == d->delim_end) {
7326
7655
  seterr(d, "Submessage ended in the middle of a value or group");
@@ -7332,7 +7661,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7332
7661
 
7333
7662
  // Gets the next "bytes" bytes, regardless of whether they are available in the
7334
7663
  // current buffer or not. Returns a status code as described in decoder.int.h.
7335
- FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7664
+ UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7665
+ size_t bytes) {
7336
7666
  if (curbufleft(d) >= bytes) {
7337
7667
  // Buffer has enough data to satisfy.
7338
7668
  consumebytes(d, buf, bytes);
@@ -7342,19 +7672,20 @@ FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7342
7672
  }
7343
7673
  }
7344
7674
 
7345
- static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7346
- size_t bytes) {
7675
+ UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7676
+ size_t bytes) {
7347
7677
  size_t ret = curbufleft(d);
7348
7678
  memcpy(buf, d->ptr, ret);
7349
7679
  if (in_residual_buf(d, d->ptr)) {
7350
7680
  size_t copy = UPB_MIN(bytes - ret, d->size_param);
7351
- memcpy(buf + ret, d->buf_param, copy);
7681
+ memcpy((char *)buf + ret, d->buf_param, copy);
7352
7682
  ret += copy;
7353
7683
  }
7354
7684
  return ret;
7355
7685
  }
7356
7686
 
7357
- FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7687
+ UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7688
+ size_t bytes) {
7358
7689
  if (curbufleft(d) >= bytes) {
7359
7690
  memcpy(buf, d->ptr, bytes);
7360
7691
  return bytes;
@@ -7368,8 +7699,8 @@ FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
7368
7699
 
7369
7700
  // Slow path for decoding a varint from the current buffer position.
7370
7701
  // Returns a status code as described in decoder.int.h.
7371
- NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7372
- uint64_t *u64) {
7702
+ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7703
+ uint64_t *u64) {
7373
7704
  *u64 = 0;
7374
7705
  uint8_t byte = 0x80;
7375
7706
  int bitpos;
@@ -7387,7 +7718,7 @@ NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7387
7718
 
7388
7719
  // Decodes a varint from the current buffer position.
7389
7720
  // Returns a status code as described in decoder.int.h.
7390
- FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7721
+ UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7391
7722
  if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7392
7723
  *u64 = *d->ptr;
7393
7724
  advance(d, 1);
@@ -7410,7 +7741,7 @@ FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7410
7741
 
7411
7742
  // Decodes a 32-bit varint from the current buffer position.
7412
7743
  // Returns a status code as described in decoder.int.h.
7413
- FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7744
+ UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7414
7745
  uint64_t u64;
7415
7746
  int32_t ret = decode_varint(d, &u64);
7416
7747
  if (ret >= 0) return ret;
@@ -7429,14 +7760,14 @@ FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7429
7760
  // Decodes a fixed32 from the current buffer position.
7430
7761
  // Returns a status code as described in decoder.int.h.
7431
7762
  // TODO: proper byte swapping for big-endian machines.
7432
- FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7763
+ UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7433
7764
  return getbytes(d, u32, 4);
7434
7765
  }
7435
7766
 
7436
7767
  // Decodes a fixed64 from the current buffer position.
7437
7768
  // Returns a status code as described in decoder.int.h.
7438
7769
  // TODO: proper byte swapping for big-endian machines.
7439
- FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7770
+ UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7440
7771
  return getbytes(d, u64, 8);
7441
7772
  }
7442
7773
 
@@ -7460,7 +7791,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7460
7791
  if (end > fr->end_ofs) {
7461
7792
  seterr(d, "Submessage end extends past enclosing submessage.");
7462
7793
  return false;
7463
- } else if ((fr + 1) == d->limit) {
7794
+ } else if (fr == d->limit) {
7464
7795
  seterr(d, kPbDecoderStackOverflow);
7465
7796
  return false;
7466
7797
  }
@@ -7487,8 +7818,8 @@ static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7487
7818
  // Pops a frame from the decoder stack.
7488
7819
  static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7489
7820
 
7490
- NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7491
- uint64_t expected) {
7821
+ UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7822
+ uint64_t expected) {
7492
7823
  uint64_t data = 0;
7493
7824
  size_t bytes = upb_value_size(expected);
7494
7825
  size_t read = peekbytes(d, &data, bytes);
@@ -7640,10 +7971,17 @@ static int32_t dispatch(upb_pbdecoder *d) {
7640
7971
  if (ret == DECODE_ENDGROUP) {
7641
7972
  goto_endmsg(d);
7642
7973
  return DECODE_OK;
7643
- } else {
7644
- d->pc = d->last - 1; // Rewind to CHECKDELIM.
7645
- return ret;
7974
+ } else if (ret == DECODE_OK) {
7975
+ // We just consumed some input, so we might now have consumed all the data
7976
+ // in the delmited region. Since every opcode that can trigger dispatch is
7977
+ // directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
7978
+ // delimited end.
7979
+ d->pc = d->last - 1;
7980
+ assert(getop(*d->pc) == OP_CHECKDELIM);
7981
+ return DECODE_OK;
7646
7982
  }
7983
+
7984
+ return ret;
7647
7985
  }
7648
7986
 
7649
7987
  // Callers know that the stack is more than one deep because the opcodes that
@@ -7866,7 +8204,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
7866
8204
  void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
7867
8205
  upb_pbdecoder *d = closure;
7868
8206
  UPB_UNUSED(size_hint);
8207
+ d->top->end_ofs = UINT64_MAX;
8208
+ d->bufstart_ofs = 0;
7869
8209
  d->call_len = 1;
8210
+ d->callstack[0] = &halt;
7870
8211
  d->pc = pc;
7871
8212
  return d;
7872
8213
  }
@@ -7875,6 +8216,8 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
7875
8216
  UPB_UNUSED(hd);
7876
8217
  UPB_UNUSED(size_hint);
7877
8218
  upb_pbdecoder *d = closure;
8219
+ d->top->end_ofs = UINT64_MAX;
8220
+ d->bufstart_ofs = 0;
7878
8221
  d->call_len = 0;
7879
8222
  return d;
7880
8223
  }
@@ -7931,58 +8274,120 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7931
8274
  return true;
7932
8275
  }
7933
8276
 
7934
- void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m,
7935
- upb_status *s) {
7936
- d->limit = &d->stack[UPB_DECODER_MAX_NESTING];
7937
- upb_bytessink_reset(&d->input_, &m->input_handler_, d);
7938
- d->method_ = m;
7939
- d->callstack[0] = &halt;
7940
- d->status = s;
7941
- upb_pbdecoder_reset(d);
7942
- }
7943
-
7944
8277
  void upb_pbdecoder_reset(upb_pbdecoder *d) {
7945
8278
  d->top = d->stack;
7946
- d->top->end_ofs = UINT64_MAX;
7947
8279
  d->top->groupnum = 0;
7948
- d->bufstart_ofs = 0;
7949
8280
  d->ptr = d->residual;
7950
8281
  d->buf = d->residual;
7951
8282
  d->end = d->residual;
7952
8283
  d->residual_end = d->residual;
7953
- d->call_len = 1;
7954
8284
  }
7955
8285
 
7956
- uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
7957
- return offset(d);
8286
+ static size_t stacksize(upb_pbdecoder *d, size_t entries) {
8287
+ UPB_UNUSED(d);
8288
+ return entries * sizeof(upb_pbdecoder_frame);
7958
8289
  }
7959
8290
 
7960
- // Not currently required, but to support outgrowing the static stack we need
7961
- // this.
7962
- void upb_pbdecoder_uninit(upb_pbdecoder *d) {
8291
+ static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7963
8292
  UPB_UNUSED(d);
7964
- }
7965
8293
 
7966
- const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
7967
- return d->method_;
8294
+ #ifdef UPB_USE_JIT_X64
8295
+ if (d->method_->is_native_) {
8296
+ // Each native stack frame needs two pointers, plus we need a few frames for
8297
+ // the enter/exit trampolines.
8298
+ size_t ret = entries * sizeof(void*) * 2;
8299
+ ret += sizeof(void*) * 10;
8300
+ return ret;
8301
+ }
8302
+ #endif
8303
+
8304
+ return entries * sizeof(uint32_t*);
7968
8305
  }
7969
8306
 
7970
- bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) {
7971
- // TODO(haberman): do we need to test whether the decoder is already on the
7972
- // stack (like calling this from within a callback)? Should we support
7973
- // rebinding the output at all?
8307
+ upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8308
+ upb_sink *sink) {
8309
+ const size_t default_max_nesting = 64;
8310
+ #ifndef NDEBUG
8311
+ size_t size_before = upb_env_bytesallocated(e);
8312
+ #endif
8313
+
8314
+ upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8315
+ if (!d) return NULL;
8316
+
8317
+ d->method_ = m;
8318
+ d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8319
+ d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8320
+ if (!d->stack || !d->callstack) {
8321
+ return NULL;
8322
+ }
8323
+
8324
+ d->env = e;
8325
+ d->limit = d->stack + default_max_nesting - 1;
8326
+ d->stack_size = default_max_nesting;
8327
+
8328
+ upb_pbdecoder_reset(d);
8329
+ upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8330
+
7974
8331
  assert(sink);
7975
8332
  if (d->method_->dest_handlers_) {
7976
8333
  if (sink->handlers != d->method_->dest_handlers_)
7977
- return false;
8334
+ return NULL;
7978
8335
  }
7979
8336
  upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
7980
- return true;
8337
+
8338
+ // If this fails, increase the value in decoder.h.
8339
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8340
+ return d;
8341
+ }
8342
+
8343
+ uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8344
+ return offset(d);
8345
+ }
8346
+
8347
+ const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8348
+ return d->method_;
7981
8349
  }
7982
8350
 
7983
8351
  upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
7984
8352
  return &d->input_;
7985
8353
  }
8354
+
8355
+ size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8356
+ return d->stack_size;
8357
+ }
8358
+
8359
+ bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8360
+ assert(d->top >= d->stack);
8361
+
8362
+ if (max < (size_t)(d->top - d->stack)) {
8363
+ // Can't set a limit smaller than what we are currently at.
8364
+ return false;
8365
+ }
8366
+
8367
+ if (max > d->stack_size) {
8368
+ // Need to reallocate stack and callstack to accommodate.
8369
+ size_t old_size = stacksize(d, d->stack_size);
8370
+ size_t new_size = stacksize(d, max);
8371
+ void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8372
+ if (!p) {
8373
+ return false;
8374
+ }
8375
+ d->stack = p;
8376
+
8377
+ old_size = callstacksize(d, d->stack_size);
8378
+ new_size = callstacksize(d, max);
8379
+ p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8380
+ if (!p) {
8381
+ return false;
8382
+ }
8383
+ d->callstack = p;
8384
+
8385
+ d->stack_size = max;
8386
+ }
8387
+
8388
+ d->limit = d->stack + max - 1;
8389
+ return true;
8390
+ }
7986
8391
  /*
7987
8392
  * upb - a minimalist implementation of protocol buffers.
7988
8393
  *
@@ -8045,6 +8450,68 @@ upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8045
8450
 
8046
8451
  #include <stdlib.h>
8047
8452
 
8453
+ // The output buffer is divided into segments; a segment is a string of data
8454
+ // that is "ready to go" -- it does not need any varint lengths inserted into
8455
+ // the middle. The seams between segments are where varints will be inserted
8456
+ // once they are known.
8457
+ //
8458
+ // We also use the concept of a "run", which is a range of encoded bytes that
8459
+ // occur at a single submessage level. Every segment contains one or more runs.
8460
+ //
8461
+ // A segment can span messages. Consider:
8462
+ //
8463
+ // .--Submessage lengths---------.
8464
+ // | | |
8465
+ // | V V
8466
+ // V | |--------------- | |-----------------
8467
+ // Submessages: | |-----------------------------------------------
8468
+ // Top-level msg: ------------------------------------------------------------
8469
+ //
8470
+ // Segments: ----- ------------------- -----------------
8471
+ // Runs: *---- *--------------*--- *----------------
8472
+ // (* marks the start)
8473
+ //
8474
+ // Note that the top-level menssage is not in any segment because it does not
8475
+ // have any length preceding it.
8476
+ //
8477
+ // A segment is only interrupted when another length needs to be inserted. So
8478
+ // observe how the second segment spans both the inner submessage and part of
8479
+ // the next enclosing message.
8480
+ typedef struct {
8481
+ uint32_t msglen; // The length to varint-encode before this segment.
8482
+ uint32_t seglen; // Length of the segment.
8483
+ } upb_pb_encoder_segment;
8484
+
8485
+ struct upb_pb_encoder {
8486
+ upb_env *env;
8487
+
8488
+ // Our input and output.
8489
+ upb_sink input_;
8490
+ upb_bytessink *output_;
8491
+
8492
+ // The "subclosure" -- used as the inner closure as part of the bytessink
8493
+ // protocol.
8494
+ void *subc;
8495
+
8496
+ // The output buffer and limit, and our current write position. "buf"
8497
+ // initially points to "initbuf", but is dynamically allocated if we need to
8498
+ // grow beyond the initial size.
8499
+ char *buf, *ptr, *limit;
8500
+
8501
+ // The beginning of the current run, or undefined if we are at the top level.
8502
+ char *runbegin;
8503
+
8504
+ // The list of segments we are accumulating.
8505
+ upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8506
+
8507
+ // The stack of enclosing submessages. Each entry in the stack points to the
8508
+ // segment where this submessage's length is being accumulated.
8509
+ int *stack, *top, *stacklimit;
8510
+
8511
+ // Depth of startmsg/endmsg calls.
8512
+ int depth;
8513
+ };
8514
+
8048
8515
  /* low-level buffering ********************************************************/
8049
8516
 
8050
8517
  // Low-level functions for interacting with the output buffer.
@@ -8062,25 +8529,23 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8062
8529
  // Call to ensure that at least "bytes" bytes are available for writing at
8063
8530
  // e->ptr. Returns false if the bytes could not be allocated.
8064
8531
  static bool reserve(upb_pb_encoder *e, size_t bytes) {
8065
- if ((e->limit - e->ptr) < bytes) {
8532
+ if ((size_t)(e->limit - e->ptr) < bytes) {
8533
+ // Grow buffer.
8066
8534
  size_t needed = bytes + (e->ptr - e->buf);
8067
8535
  size_t old_size = e->limit - e->buf;
8536
+
8068
8537
  size_t new_size = old_size;
8538
+
8069
8539
  while (new_size < needed) {
8070
8540
  new_size *= 2;
8071
8541
  }
8072
8542
 
8073
- char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf;
8074
- char *new_buf = realloc(realloc_from, new_size);
8543
+ char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
8075
8544
 
8076
8545
  if (new_buf == NULL) {
8077
8546
  return false;
8078
8547
  }
8079
8548
 
8080
- if (realloc_from == NULL) {
8081
- memcpy(new_buf, e->initbuf, old_size);
8082
- }
8083
-
8084
8549
  e->ptr = new_buf + (e->ptr - e->buf);
8085
8550
  e->runbegin = new_buf + (e->runbegin - e->buf);
8086
8551
  e->limit = new_buf + new_size;
@@ -8093,7 +8558,7 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) {
8093
8558
  // Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
8094
8559
  // previously called reserve() with at least this many bytes.
8095
8560
  static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8096
- assert((e->limit - e->ptr) >= bytes);
8561
+ assert((size_t)(e->limit - e->ptr) >= bytes);
8097
8562
  e->ptr += bytes;
8098
8563
  }
8099
8564
 
@@ -8149,21 +8614,17 @@ static bool start_delim(upb_pb_encoder *e) {
8149
8614
  }
8150
8615
 
8151
8616
  if (++e->segptr == e->seglimit) {
8152
- upb_pb_encoder_segment *realloc_from =
8153
- (e->segbuf == e->seginitbuf) ? NULL : e->segbuf;
8617
+ // Grow segment buffer.
8154
8618
  size_t old_size =
8155
8619
  (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8156
8620
  size_t new_size = old_size * 2;
8157
- upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size);
8621
+ upb_pb_encoder_segment *new_buf =
8622
+ upb_env_realloc(e->env, e->segbuf, old_size, new_size);
8158
8623
 
8159
8624
  if (new_buf == NULL) {
8160
8625
  return false;
8161
8626
  }
8162
8627
 
8163
- if (realloc_from == NULL) {
8164
- memcpy(new_buf, e->seginitbuf, old_size);
8165
- }
8166
-
8167
8628
  e->segptr = new_buf + (e->segptr - e->segbuf);
8168
8629
  e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8169
8630
  e->segbuf = new_buf;
@@ -8434,6 +8895,12 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
8434
8895
  }
8435
8896
  }
8436
8897
 
8898
+ void upb_pb_encoder_reset(upb_pb_encoder *e) {
8899
+ e->segptr = NULL;
8900
+ e->top = NULL;
8901
+ e->depth = 0;
8902
+ }
8903
+
8437
8904
 
8438
8905
  /* public API *****************************************************************/
8439
8906
 
@@ -8442,40 +8909,42 @@ const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
8442
8909
  return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
8443
8910
  }
8444
8911
 
8445
- #define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
8912
+ upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
8913
+ upb_bytessink *output) {
8914
+ const size_t initial_bufsize = 256;
8915
+ const size_t initial_segbufsize = 16;
8916
+ // TODO(haberman): make this configurable.
8917
+ const size_t stack_size = 64;
8918
+ #ifndef NDEBUG
8919
+ const size_t size_before = upb_env_bytesallocated(env);
8920
+ #endif
8446
8921
 
8447
- void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) {
8448
- e->output_ = NULL;
8449
- e->subc = NULL;
8450
- e->buf = e->initbuf;
8451
- e->ptr = e->buf;
8452
- e->limit = e->buf + ARRAYSIZE(e->initbuf);
8453
- e->segbuf = e->seginitbuf;
8454
- e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf);
8455
- e->stacklimit = e->stack + ARRAYSIZE(e->stack);
8456
- upb_sink_reset(&e->input_, h, e);
8457
- }
8922
+ upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
8923
+ if (!e) return NULL;
8458
8924
 
8459
- void upb_pb_encoder_uninit(upb_pb_encoder *e) {
8460
- if (e->buf != e->initbuf) {
8461
- free(e->buf);
8462
- }
8925
+ e->buf = upb_env_malloc(env, initial_bufsize);
8926
+ e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
8927
+ e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
8463
8928
 
8464
- if (e->segbuf != e->seginitbuf) {
8465
- free(e->segbuf);
8929
+ if (!e->buf || !e->segbuf || !e->stack) {
8930
+ return NULL;
8466
8931
  }
8467
- }
8468
8932
 
8469
- void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) {
8933
+ e->limit = e->buf + initial_bufsize;
8934
+ e->seglimit = e->segbuf + initial_segbufsize;
8935
+ e->stacklimit = e->stack + stack_size;
8936
+
8470
8937
  upb_pb_encoder_reset(e);
8938
+ upb_sink_reset(&e->input_, h, e);
8939
+
8940
+ e->env = env;
8471
8941
  e->output_ = output;
8472
8942
  e->subc = output->closure;
8473
- }
8943
+ e->ptr = e->buf;
8474
8944
 
8475
- void upb_pb_encoder_reset(upb_pb_encoder *e) {
8476
- e->segptr = NULL;
8477
- e->top = NULL;
8478
- e->depth = 0;
8945
+ // If this fails, increase the value in encoder.h.
8946
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
8947
+ return e;
8479
8948
  }
8480
8949
 
8481
8950
  upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
@@ -8500,26 +8969,26 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
8500
8969
  const upb_pbdecodermethod *decoder_m =
8501
8970
  upb_pbdecodermethod_new(&opts, &decoder_m);
8502
8971
 
8503
- upb_pbdecoder decoder;
8504
- upb_descreader reader;
8972
+ upb_env env;
8973
+ upb_env_init(&env);
8974
+ upb_env_reporterrorsto(&env, status);
8505
8975
 
8506
- upb_pbdecoder_init(&decoder, decoder_m, status);
8507
- upb_descreader_init(&reader, reader_h, status);
8508
- upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader));
8976
+ upb_descreader *reader = upb_descreader_create(&env, reader_h);
8977
+ upb_pbdecoder *decoder =
8978
+ upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
8509
8979
 
8510
8980
  // Push input data.
8511
- bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder));
8981
+ bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
8512
8982
 
8513
8983
  upb_def **ret = NULL;
8514
8984
 
8515
8985
  if (!ok) goto cleanup;
8516
- upb_def **defs = upb_descreader_getdefs(&reader, owner, n);
8986
+ upb_def **defs = upb_descreader_getdefs(reader, owner, n);
8517
8987
  ret = malloc(sizeof(upb_def*) * (*n));
8518
8988
  memcpy(ret, defs, sizeof(upb_def*) * (*n));
8519
8989
 
8520
8990
  cleanup:
8521
- upb_pbdecoder_uninit(&decoder);
8522
- upb_descreader_uninit(&reader);
8991
+ upb_env_uninit(&env);
8523
8992
  upb_handlers_unref(reader_h, &reader_h);
8524
8993
  upb_pbdecodermethod_unref(decoder_m, &decoder_m);
8525
8994
  return ret;
@@ -8584,6 +9053,14 @@ bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
8584
9053
  #include <string.h>
8585
9054
 
8586
9055
 
9056
+ struct upb_textprinter {
9057
+ upb_sink input_;
9058
+ upb_bytessink *output_;
9059
+ int indent_depth_;
9060
+ bool single_line_;
9061
+ void *subc;
9062
+ };
9063
+
8587
9064
  #define CHECK(x) if ((x) < 0) goto err;
8588
9065
 
8589
9066
  static const char *shortname(const char *longname) {
@@ -8801,24 +9278,6 @@ err:
8801
9278
  return false;
8802
9279
  }
8803
9280
 
8804
-
8805
- /* Public API *****************************************************************/
8806
-
8807
- void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) {
8808
- p->single_line_ = false;
8809
- p->indent_depth_ = 0;
8810
- upb_sink_reset(&p->input_, h, p);
8811
- }
8812
-
8813
- void upb_textprinter_uninit(upb_textprinter *p) {
8814
- UPB_UNUSED(p);
8815
- }
8816
-
8817
- void upb_textprinter_reset(upb_textprinter *p, bool single_line) {
8818
- p->single_line_ = single_line;
8819
- p->indent_depth_ = 0;
8820
- }
8821
-
8822
9281
  static void onmreg(const void *c, upb_handlers *h) {
8823
9282
  UPB_UNUSED(c);
8824
9283
  const upb_msgdef *m = upb_handlers_msgdef(h);
@@ -8878,6 +9337,26 @@ static void onmreg(const void *c, upb_handlers *h) {
8878
9337
  }
8879
9338
  }
8880
9339
 
9340
+ static void textprinter_reset(upb_textprinter *p, bool single_line) {
9341
+ p->single_line_ = single_line;
9342
+ p->indent_depth_ = 0;
9343
+ }
9344
+
9345
+
9346
+ /* Public API *****************************************************************/
9347
+
9348
+ upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9349
+ upb_bytessink *output) {
9350
+ upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9351
+ if (!p) return NULL;
9352
+
9353
+ p->output_ = output;
9354
+ upb_sink_reset(&p->input_, h, p);
9355
+ textprinter_reset(p, false);
9356
+
9357
+ return p;
9358
+ }
9359
+
8881
9360
  const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
8882
9361
  const void *owner) {
8883
9362
  return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
@@ -8885,11 +9364,6 @@ const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
8885
9364
 
8886
9365
  upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
8887
9366
 
8888
- bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) {
8889
- p->output_ = output;
8890
- return true;
8891
- }
8892
-
8893
9367
  void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
8894
9368
  p->single_line_ = single_line;
8895
9369
  }
@@ -9052,6 +9526,71 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9052
9526
  #include <errno.h>
9053
9527
 
9054
9528
 
9529
+ #define UPB_JSON_MAX_DEPTH 64
9530
+
9531
+ typedef struct {
9532
+ upb_sink sink;
9533
+
9534
+ // The current message in which we're parsing, and the field whose value we're
9535
+ // expecting next.
9536
+ const upb_msgdef *m;
9537
+ const upb_fielddef *f;
9538
+
9539
+ // We are in a repeated-field context, ready to emit mapentries as
9540
+ // submessages. This flag alters the start-of-object (open-brace) behavior to
9541
+ // begin a sequence of mapentry messages rather than a single submessage.
9542
+ bool is_map;
9543
+
9544
+ // We are in a map-entry message context. This flag is set when parsing the
9545
+ // value field of a single map entry and indicates to all value-field parsers
9546
+ // (subobjects, strings, numbers, and bools) that the map-entry submessage
9547
+ // should end as soon as the value is parsed.
9548
+ bool is_mapentry;
9549
+
9550
+ // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9551
+ // message's map field that we're currently parsing. This differs from |f|
9552
+ // because |f| is the field in the *current* message (i.e., the map-entry
9553
+ // message itself), not the parent's field that leads to this map.
9554
+ const upb_fielddef *mapfield;
9555
+ } upb_jsonparser_frame;
9556
+
9557
+ struct upb_json_parser {
9558
+ upb_env *env;
9559
+ upb_byteshandler input_handler_;
9560
+ upb_bytessink input_;
9561
+
9562
+ // Stack to track the JSON scopes we are in.
9563
+ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9564
+ upb_jsonparser_frame *top;
9565
+ upb_jsonparser_frame *limit;
9566
+
9567
+ upb_status *status;
9568
+
9569
+ // Ragel's internal parsing stack for the parsing state machine.
9570
+ int current_state;
9571
+ int parser_stack[UPB_JSON_MAX_DEPTH];
9572
+ int parser_top;
9573
+
9574
+ // The handle for the current buffer.
9575
+ const upb_bufhandle *handle;
9576
+
9577
+ // Accumulate buffer. See details in parser.rl.
9578
+ const char *accumulated;
9579
+ size_t accumulated_len;
9580
+ char *accumulate_buf;
9581
+ size_t accumulate_buf_size;
9582
+
9583
+ // Multi-part text data. See details in parser.rl.
9584
+ int multipart_state;
9585
+ upb_selector_t string_selector;
9586
+
9587
+ // Input capture. See details in parser.rl.
9588
+ const char *capture;
9589
+
9590
+ // Intermediate result of parsing a unicode escape sequence.
9591
+ uint32_t digit;
9592
+ };
9593
+
9055
9594
  #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9056
9595
 
9057
9596
  // Used to signal that a capture has been suspended.
@@ -9254,12 +9793,13 @@ static void accumulate_clear(upb_json_parser *p) {
9254
9793
 
9255
9794
  // Used internally by accumulate_append().
9256
9795
  static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9257
- size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
9796
+ size_t old_size = p->accumulate_buf_size;
9797
+ size_t new_size = UPB_MAX(old_size, 128);
9258
9798
  while (new_size < need) {
9259
9799
  new_size = saturating_multiply(new_size, 2);
9260
9800
  }
9261
9801
 
9262
- void *mem = realloc(p->accumulate_buf, new_size);
9802
+ void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
9263
9803
  if (!mem) {
9264
9804
  upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
9265
9805
  return false;
@@ -10008,11 +10548,11 @@ static void end_object(upb_json_parser *p) {
10008
10548
  // final state once, when the closing '"' is seen.
10009
10549
 
10010
10550
 
10011
- #line 1085 "upb/json/parser.rl"
10551
+ #line 1151 "upb/json/parser.rl"
10012
10552
 
10013
10553
 
10014
10554
 
10015
- #line 997 "upb/json/parser.c"
10555
+ #line 1063 "upb/json/parser.c"
10016
10556
  static const char _json_actions[] = {
10017
10557
  0, 1, 0, 1, 2, 1, 3, 1,
10018
10558
  5, 1, 6, 1, 7, 1, 8, 1,
@@ -10154,8 +10694,6 @@ static const char _json_trans_actions[] = {
10154
10694
  };
10155
10695
 
10156
10696
  static const int json_start = 1;
10157
- static const int json_first_final = 56;
10158
- static const int json_error = 0;
10159
10697
 
10160
10698
  static const int json_en_number_machine = 10;
10161
10699
  static const int json_en_string_machine = 19;
@@ -10163,7 +10701,7 @@ static const int json_en_value_machine = 27;
10163
10701
  static const int json_en_main = 1;
10164
10702
 
10165
10703
 
10166
- #line 1088 "upb/json/parser.rl"
10704
+ #line 1154 "upb/json/parser.rl"
10167
10705
 
10168
10706
  size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10169
10707
  const upb_bufhandle *handle) {
@@ -10183,7 +10721,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10183
10721
  capture_resume(parser, buf);
10184
10722
 
10185
10723
 
10186
- #line 1168 "upb/json/parser.c"
10724
+ #line 1232 "upb/json/parser.c"
10187
10725
  {
10188
10726
  int _klen;
10189
10727
  unsigned int _trans;
@@ -10258,118 +10796,118 @@ _match:
10258
10796
  switch ( *_acts++ )
10259
10797
  {
10260
10798
  case 0:
10261
- #line 1000 "upb/json/parser.rl"
10799
+ #line 1066 "upb/json/parser.rl"
10262
10800
  { p--; {cs = stack[--top]; goto _again;} }
10263
10801
  break;
10264
10802
  case 1:
10265
- #line 1001 "upb/json/parser.rl"
10803
+ #line 1067 "upb/json/parser.rl"
10266
10804
  { p--; {stack[top++] = cs; cs = 10; goto _again;} }
10267
10805
  break;
10268
10806
  case 2:
10269
- #line 1005 "upb/json/parser.rl"
10807
+ #line 1071 "upb/json/parser.rl"
10270
10808
  { start_text(parser, p); }
10271
10809
  break;
10272
10810
  case 3:
10273
- #line 1006 "upb/json/parser.rl"
10811
+ #line 1072 "upb/json/parser.rl"
10274
10812
  { CHECK_RETURN_TOP(end_text(parser, p)); }
10275
10813
  break;
10276
10814
  case 4:
10277
- #line 1012 "upb/json/parser.rl"
10815
+ #line 1078 "upb/json/parser.rl"
10278
10816
  { start_hex(parser); }
10279
10817
  break;
10280
10818
  case 5:
10281
- #line 1013 "upb/json/parser.rl"
10819
+ #line 1079 "upb/json/parser.rl"
10282
10820
  { hexdigit(parser, p); }
10283
10821
  break;
10284
10822
  case 6:
10285
- #line 1014 "upb/json/parser.rl"
10823
+ #line 1080 "upb/json/parser.rl"
10286
10824
  { CHECK_RETURN_TOP(end_hex(parser)); }
10287
10825
  break;
10288
10826
  case 7:
10289
- #line 1020 "upb/json/parser.rl"
10827
+ #line 1086 "upb/json/parser.rl"
10290
10828
  { CHECK_RETURN_TOP(escape(parser, p)); }
10291
10829
  break;
10292
10830
  case 8:
10293
- #line 1026 "upb/json/parser.rl"
10831
+ #line 1092 "upb/json/parser.rl"
10294
10832
  { p--; {cs = stack[--top]; goto _again;} }
10295
10833
  break;
10296
10834
  case 9:
10297
- #line 1029 "upb/json/parser.rl"
10835
+ #line 1095 "upb/json/parser.rl"
10298
10836
  { {stack[top++] = cs; cs = 19; goto _again;} }
10299
10837
  break;
10300
10838
  case 10:
10301
- #line 1031 "upb/json/parser.rl"
10839
+ #line 1097 "upb/json/parser.rl"
10302
10840
  { p--; {stack[top++] = cs; cs = 27; goto _again;} }
10303
10841
  break;
10304
10842
  case 11:
10305
- #line 1036 "upb/json/parser.rl"
10843
+ #line 1102 "upb/json/parser.rl"
10306
10844
  { start_member(parser); }
10307
10845
  break;
10308
10846
  case 12:
10309
- #line 1037 "upb/json/parser.rl"
10847
+ #line 1103 "upb/json/parser.rl"
10310
10848
  { CHECK_RETURN_TOP(end_membername(parser)); }
10311
10849
  break;
10312
10850
  case 13:
10313
- #line 1040 "upb/json/parser.rl"
10851
+ #line 1106 "upb/json/parser.rl"
10314
10852
  { end_member(parser); }
10315
10853
  break;
10316
10854
  case 14:
10317
- #line 1046 "upb/json/parser.rl"
10855
+ #line 1112 "upb/json/parser.rl"
10318
10856
  { start_object(parser); }
10319
10857
  break;
10320
10858
  case 15:
10321
- #line 1049 "upb/json/parser.rl"
10859
+ #line 1115 "upb/json/parser.rl"
10322
10860
  { end_object(parser); }
10323
10861
  break;
10324
10862
  case 16:
10325
- #line 1055 "upb/json/parser.rl"
10863
+ #line 1121 "upb/json/parser.rl"
10326
10864
  { CHECK_RETURN_TOP(start_array(parser)); }
10327
10865
  break;
10328
10866
  case 17:
10329
- #line 1059 "upb/json/parser.rl"
10867
+ #line 1125 "upb/json/parser.rl"
10330
10868
  { end_array(parser); }
10331
10869
  break;
10332
10870
  case 18:
10333
- #line 1064 "upb/json/parser.rl"
10871
+ #line 1130 "upb/json/parser.rl"
10334
10872
  { start_number(parser, p); }
10335
10873
  break;
10336
10874
  case 19:
10337
- #line 1065 "upb/json/parser.rl"
10875
+ #line 1131 "upb/json/parser.rl"
10338
10876
  { CHECK_RETURN_TOP(end_number(parser, p)); }
10339
10877
  break;
10340
10878
  case 20:
10341
- #line 1067 "upb/json/parser.rl"
10879
+ #line 1133 "upb/json/parser.rl"
10342
10880
  { CHECK_RETURN_TOP(start_stringval(parser)); }
10343
10881
  break;
10344
10882
  case 21:
10345
- #line 1068 "upb/json/parser.rl"
10883
+ #line 1134 "upb/json/parser.rl"
10346
10884
  { CHECK_RETURN_TOP(end_stringval(parser)); }
10347
10885
  break;
10348
10886
  case 22:
10349
- #line 1070 "upb/json/parser.rl"
10887
+ #line 1136 "upb/json/parser.rl"
10350
10888
  { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
10351
10889
  break;
10352
10890
  case 23:
10353
- #line 1072 "upb/json/parser.rl"
10891
+ #line 1138 "upb/json/parser.rl"
10354
10892
  { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
10355
10893
  break;
10356
10894
  case 24:
10357
- #line 1074 "upb/json/parser.rl"
10895
+ #line 1140 "upb/json/parser.rl"
10358
10896
  { /* null value */ }
10359
10897
  break;
10360
10898
  case 25:
10361
- #line 1076 "upb/json/parser.rl"
10899
+ #line 1142 "upb/json/parser.rl"
10362
10900
  { CHECK_RETURN_TOP(start_subobject(parser)); }
10363
10901
  break;
10364
10902
  case 26:
10365
- #line 1077 "upb/json/parser.rl"
10903
+ #line 1143 "upb/json/parser.rl"
10366
10904
  { end_subobject(parser); }
10367
10905
  break;
10368
10906
  case 27:
10369
- #line 1082 "upb/json/parser.rl"
10907
+ #line 1148 "upb/json/parser.rl"
10370
10908
  { p--; {cs = stack[--top]; goto _again;} }
10371
10909
  break;
10372
- #line 1354 "upb/json/parser.c"
10910
+ #line 1418 "upb/json/parser.c"
10373
10911
  }
10374
10912
  }
10375
10913
 
@@ -10382,7 +10920,7 @@ _again:
10382
10920
  _out: {}
10383
10921
  }
10384
10922
 
10385
- #line 1107 "upb/json/parser.rl"
10923
+ #line 1173 "upb/json/parser.rl"
10386
10924
 
10387
10925
  if (p != pe) {
10388
10926
  upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@@ -10401,29 +10939,17 @@ error:
10401
10939
  bool end(void *closure, const void *hd) {
10402
10940
  UPB_UNUSED(closure);
10403
10941
  UPB_UNUSED(hd);
10404
- return true;
10405
- }
10406
10942
 
10407
-
10408
- /* Public API *****************************************************************/
10409
-
10410
- void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
10411
- p->limit = p->stack + UPB_JSON_MAX_DEPTH;
10412
- p->accumulate_buf = NULL;
10413
- p->accumulate_buf_size = 0;
10414
- upb_byteshandler_init(&p->input_handler_);
10415
- upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
10416
- upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
10417
- upb_bytessink_reset(&p->input_, &p->input_handler_, p);
10418
- p->status = status;
10419
- }
10420
-
10421
- void upb_json_parser_uninit(upb_json_parser *p) {
10422
- upb_byteshandler_uninit(&p->input_handler_);
10423
- free(p->accumulate_buf);
10943
+ // Prevent compile warning on unused static constants.
10944
+ UPB_UNUSED(json_start);
10945
+ UPB_UNUSED(json_en_number_machine);
10946
+ UPB_UNUSED(json_en_string_machine);
10947
+ UPB_UNUSED(json_en_value_machine);
10948
+ UPB_UNUSED(json_en_main);
10949
+ return true;
10424
10950
  }
10425
10951
 
10426
- void upb_json_parser_reset(upb_json_parser *p) {
10952
+ static void json_parser_reset(upb_json_parser *p) {
10427
10953
  p->top = p->stack;
10428
10954
  p->top->f = NULL;
10429
10955
  p->top->is_map = false;
@@ -10433,25 +10959,48 @@ void upb_json_parser_reset(upb_json_parser *p) {
10433
10959
  int top;
10434
10960
  // Emit Ragel initialization of the parser.
10435
10961
 
10436
- #line 1418 "upb/json/parser.c"
10962
+ #line 1470 "upb/json/parser.c"
10437
10963
  {
10438
10964
  cs = json_start;
10439
10965
  top = 0;
10440
10966
  }
10441
10967
 
10442
- #line 1157 "upb/json/parser.rl"
10968
+ #line 1211 "upb/json/parser.rl"
10443
10969
  p->current_state = cs;
10444
10970
  p->parser_top = top;
10445
10971
  accumulate_clear(p);
10446
10972
  p->multipart_state = MULTIPART_INACTIVE;
10447
10973
  p->capture = NULL;
10974
+ p->accumulated = NULL;
10448
10975
  }
10449
10976
 
10450
- void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
10451
- upb_json_parser_reset(p);
10452
- upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
10453
- p->top->m = upb_handlers_msgdef(sink->handlers);
10454
- p->accumulated = NULL;
10977
+
10978
+ /* Public API *****************************************************************/
10979
+
10980
+ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
10981
+ #ifndef NDEBUG
10982
+ const size_t size_before = upb_env_bytesallocated(env);
10983
+ #endif
10984
+ upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
10985
+ if (!p) return false;
10986
+
10987
+ p->env = env;
10988
+ p->limit = p->stack + UPB_JSON_MAX_DEPTH;
10989
+ p->accumulate_buf = NULL;
10990
+ p->accumulate_buf_size = 0;
10991
+ upb_byteshandler_init(&p->input_handler_);
10992
+ upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
10993
+ upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
10994
+ upb_bytessink_reset(&p->input_, &p->input_handler_, p);
10995
+
10996
+ json_parser_reset(p);
10997
+ upb_sink_reset(&p->top->sink, output->handlers, output->closure);
10998
+ p->top->m = upb_handlers_msgdef(output->handlers);
10999
+
11000
+ // If this fails, uncomment and increase the value in parser.h.
11001
+ // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
11002
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11003
+ return p;
10455
11004
  }
10456
11005
 
10457
11006
  upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
@@ -10473,6 +11022,27 @@ upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
10473
11022
  #include <string.h>
10474
11023
  #include <stdint.h>
10475
11024
 
11025
+ struct upb_json_printer {
11026
+ upb_sink input_;
11027
+ // BytesSink closure.
11028
+ void *subc_;
11029
+ upb_bytessink *output_;
11030
+
11031
+ // We track the depth so that we know when to emit startstr/endstr on the
11032
+ // output.
11033
+ int depth_;
11034
+
11035
+ // Have we emitted the first element? This state is necessary to emit commas
11036
+ // without leaving a trailing comma in arrays/maps. We keep this state per
11037
+ // frame depth.
11038
+ //
11039
+ // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11040
+ // We count frames (contexts in which we separate elements by commas) as both
11041
+ // repeated fields and messages (maps), and the worst case is a
11042
+ // message->repeated field->submessage->repeated field->... nesting.
11043
+ bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11044
+ };
11045
+
10476
11046
  // StringPiece; a pointer plus a length.
10477
11047
  typedef struct {
10478
11048
  const char *ptr;
@@ -10620,7 +11190,7 @@ static bool putkey(void *closure, const void *handler_data) {
10620
11190
  return true;
10621
11191
  }
10622
11192
 
10623
- #define CHKFMT(val) if ((val) == -1) return false;
11193
+ #define CHKFMT(val) if ((val) == (size_t)-1) return false;
10624
11194
  #define CHK(val) if (!(val)) return false;
10625
11195
 
10626
11196
  #define TYPE_HANDLERS(type, fmt_func) \
@@ -11189,25 +11759,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
11189
11759
  #undef TYPE
11190
11760
  }
11191
11761
 
11192
- /* Public API *****************************************************************/
11193
-
11194
- void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) {
11195
- p->output_ = NULL;
11762
+ static void json_printer_reset(upb_json_printer *p) {
11196
11763
  p->depth_ = 0;
11197
- upb_sink_reset(&p->input_, h, p);
11198
11764
  }
11199
11765
 
11200
- void upb_json_printer_uninit(upb_json_printer *p) {
11201
- UPB_UNUSED(p);
11202
- }
11203
11766
 
11204
- void upb_json_printer_reset(upb_json_printer *p) {
11205
- p->depth_ = 0;
11206
- }
11767
+ /* Public API *****************************************************************/
11768
+
11769
+ upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
11770
+ upb_bytessink *output) {
11771
+ #ifndef NDEBUG
11772
+ size_t size_before = upb_env_bytesallocated(e);
11773
+ #endif
11774
+
11775
+ upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
11776
+ if (!p) return NULL;
11207
11777
 
11208
- void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) {
11209
- upb_json_printer_reset(p);
11210
11778
  p->output_ = output;
11779
+ json_printer_reset(p);
11780
+ upb_sink_reset(&p->input_, h, p);
11781
+
11782
+ // If this fails, increase the value in printer.h.
11783
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
11784
+ return p;
11211
11785
  }
11212
11786
 
11213
11787
  upb_sink *upb_json_printer_input(upb_json_printer *p) {