google-protobuf 3.2.0 → 3.9.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -44,30 +44,90 @@ VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
44
44
  return rb_str;
45
45
  }
46
46
 
47
+ // The code below also comes from upb's prototype Ruby binding, developed by
48
+ // haberman@.
49
+
50
+ /* stringsink *****************************************************************/
51
+
52
+ static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
53
+ stringsink *sink = _sink;
54
+ sink->len = 0;
55
+ return sink;
56
+ }
57
+
58
+ static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
59
+ size_t len, const upb_bufhandle *handle) {
60
+ stringsink *sink = _sink;
61
+ size_t new_size = sink->size;
62
+
63
+ UPB_UNUSED(hd);
64
+ UPB_UNUSED(handle);
65
+
66
+ while (sink->len + len > new_size) {
67
+ new_size *= 2;
68
+ }
69
+
70
+ if (new_size != sink->size) {
71
+ sink->ptr = realloc(sink->ptr, new_size);
72
+ sink->size = new_size;
73
+ }
74
+
75
+ memcpy(sink->ptr + sink->len, ptr, len);
76
+ sink->len += len;
77
+
78
+ return len;
79
+ }
80
+
81
+ void stringsink_init(stringsink *sink) {
82
+ upb_byteshandler_init(&sink->handler);
83
+ upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
84
+ upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
85
+
86
+ upb_bytessink_reset(&sink->sink, &sink->handler, sink);
87
+
88
+ sink->size = 32;
89
+ sink->ptr = malloc(sink->size);
90
+ sink->len = 0;
91
+ }
92
+
93
+ void stringsink_uninit(stringsink *sink) {
94
+ free(sink->ptr);
95
+ }
96
+
47
97
  // -----------------------------------------------------------------------------
48
98
  // Parsing.
49
99
  // -----------------------------------------------------------------------------
50
100
 
51
101
  #define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)
52
102
 
53
- // Creates a handlerdata that simply contains the offset for this field.
54
- static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) {
55
- size_t* hd_ofs = ALLOC(size_t);
56
- *hd_ofs = ofs;
57
- upb_handlers_addcleanup(h, hd_ofs, xfree);
58
- return hd_ofs;
103
+ typedef struct {
104
+ size_t ofs;
105
+ int32_t hasbit;
106
+ } field_handlerdata_t;
107
+
108
+ // Creates a handlerdata that contains the offset and the hasbit for the field
109
+ static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) {
110
+ field_handlerdata_t *hd = ALLOC(field_handlerdata_t);
111
+ hd->ofs = ofs;
112
+ hd->hasbit = hasbit;
113
+ upb_handlers_addcleanup(h, hd, xfree);
114
+ return hd;
59
115
  }
60
116
 
61
117
  typedef struct {
62
118
  size_t ofs;
119
+ int32_t hasbit;
63
120
  const upb_msgdef *md;
64
121
  } submsg_handlerdata_t;
65
122
 
66
123
  // Creates a handlerdata that contains offset and submessage type information.
67
- static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
124
+ static const void *newsubmsghandlerdata(upb_handlers* h,
125
+ uint32_t ofs,
126
+ int32_t hasbit,
68
127
  const upb_fielddef* f) {
69
128
  submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
70
129
  hd->ofs = ofs;
130
+ hd->hasbit = hasbit;
71
131
  hd->md = upb_fielddef_msgsubdef(f);
72
132
  upb_handlers_addcleanup(h, hd, xfree);
73
133
  return hd;
@@ -139,6 +199,13 @@ static void* appendstr_handler(void *closure,
139
199
  return (void*)str;
140
200
  }
141
201
 
202
+ static void set_hasbit(void *closure, int32_t hasbit) {
203
+ if (hasbit > 0) {
204
+ uint8_t* storage = closure;
205
+ storage[hasbit/8] |= 1 << (hasbit % 8);
206
+ }
207
+ }
208
+
142
209
  // Appends a 'bytes' string to a repeated field.
143
210
  static void* appendbytes_handler(void *closure,
144
211
  const void *hd,
@@ -155,10 +222,12 @@ static void* str_handler(void *closure,
155
222
  const void *hd,
156
223
  size_t size_hint) {
157
224
  MessageHeader* msg = closure;
158
- const size_t *ofs = hd;
225
+ const field_handlerdata_t *fieldhandler = hd;
226
+
159
227
  VALUE str = rb_str_new2("");
160
228
  rb_enc_associate(str, kRubyStringUtf8Encoding);
161
- DEREF(msg, *ofs, VALUE) = str;
229
+ DEREF(msg, fieldhandler->ofs, VALUE) = str;
230
+ set_hasbit(closure, fieldhandler->hasbit);
162
231
  return (void*)str;
163
232
  }
164
233
 
@@ -167,10 +236,12 @@ static void* bytes_handler(void *closure,
167
236
  const void *hd,
168
237
  size_t size_hint) {
169
238
  MessageHeader* msg = closure;
170
- const size_t *ofs = hd;
239
+ const field_handlerdata_t *fieldhandler = hd;
240
+
171
241
  VALUE str = rb_str_new2("");
172
242
  rb_enc_associate(str, kRubyString8bitEncoding);
173
- DEREF(msg, *ofs, VALUE) = str;
243
+ DEREF(msg, fieldhandler->ofs, VALUE) = str;
244
+ set_hasbit(closure, fieldhandler->hasbit);
174
245
  return (void*)str;
175
246
  }
176
247
 
@@ -183,18 +254,13 @@ static size_t stringdata_handler(void* closure, const void* hd,
183
254
  }
184
255
 
185
256
  static bool stringdata_end_handler(void* closure, const void* hd) {
186
- MessageHeader* msg = closure;
187
- const size_t *ofs = hd;
188
- VALUE rb_str = DEREF(msg, *ofs, VALUE);
257
+ VALUE rb_str = closure;
189
258
  rb_obj_freeze(rb_str);
190
259
  return true;
191
260
  }
192
261
 
193
262
  static bool appendstring_end_handler(void* closure, const void* hd) {
194
- VALUE ary = (VALUE)closure;
195
- int size = RepeatedField_size(ary);
196
- VALUE* last = RepeatedField_index_native(ary, size - 1);
197
- VALUE rb_str = *last;
263
+ VALUE rb_str = closure;
198
264
  rb_obj_freeze(rb_str);
199
265
  return true;
200
266
  }
@@ -230,8 +296,11 @@ static void *submsg_handler(void *closure, const void *hd) {
230
296
  rb_class_new_instance(0, NULL, subklass);
231
297
  }
232
298
 
299
+ set_hasbit(closure, submsgdata->hasbit);
300
+
233
301
  submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
234
302
  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
303
+
235
304
  return submsg;
236
305
  }
237
306
 
@@ -280,11 +349,6 @@ rb_data_type_t MapParseFrame_type = {
280
349
  { MapParseFrame_mark, MapParseFrame_free, NULL },
281
350
  };
282
351
 
283
- // Array of Ruby objects wrapping map_parse_frame_t.
284
- // We don't allow multiple concurrent decodes, so we assume that this global
285
- // variable is specific to the "current" decode.
286
- VALUE map_parse_frames;
287
-
288
352
  static map_parse_frame_t* map_push_frame(VALUE map,
289
353
  const map_handlerdata_t* handlerdata) {
290
354
  map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
@@ -293,16 +357,12 @@ static map_parse_frame_t* map_push_frame(VALUE map,
293
357
  native_slot_init(handlerdata->key_field_type, &frame->key_storage);
294
358
  native_slot_init(handlerdata->value_field_type, &frame->value_storage);
295
359
 
296
- rb_ary_push(map_parse_frames,
360
+ Map_set_frame(map,
297
361
  TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame));
298
362
 
299
363
  return frame;
300
364
  }
301
365
 
302
- static void map_pop_frame() {
303
- rb_ary_pop(map_parse_frames);
304
- }
305
-
306
366
  // Handler to begin a map entry: allocates a temporary frame. This is the
307
367
  // 'startsubmsg' handler on the msgdef that contains the map field.
308
368
  static void *startmapentry_handler(void *closure, const void *hd) {
@@ -329,6 +389,9 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
329
389
  if (mapdata->value_field_type == UPB_TYPE_MESSAGE ||
330
390
  mapdata->value_field_type == UPB_TYPE_ENUM) {
331
391
  value_field_typeclass = get_def_obj(mapdata->value_field_subdef);
392
+ if (mapdata->value_field_type == UPB_TYPE_ENUM) {
393
+ value_field_typeclass = EnumDescriptor_enummodule(value_field_typeclass);
394
+ }
332
395
  }
333
396
 
334
397
  value = native_slot_get(
@@ -336,7 +399,7 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
336
399
  &frame->value_storage);
337
400
 
338
401
  Map_index_set(frame->map, key, value);
339
- map_pop_frame();
402
+ Map_set_frame(frame->map, Qnil);
340
403
 
341
404
  return true;
342
405
  }
@@ -416,9 +479,8 @@ static void *oneofbytes_handler(void *closure,
416
479
  }
417
480
 
418
481
  static bool oneofstring_end_handler(void* closure, const void* hd) {
419
- MessageHeader* msg = closure;
420
- const oneof_handlerdata_t *oneofdata = hd;
421
- rb_obj_freeze(DEREF(msg, oneofdata->ofs, VALUE));
482
+ VALUE rb_str = rb_str_new2("");
483
+ rb_obj_freeze(rb_str);
422
484
  return true;
423
485
  }
424
486
 
@@ -459,7 +521,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
459
521
  const upb_fielddef *f,
460
522
  size_t offset) {
461
523
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
462
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
524
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, -1));
463
525
  upb_handlers_setstartseq(h, f, startseq_handler, &attr);
464
526
  upb_handlerattr_uninit(&attr);
465
527
 
@@ -493,7 +555,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
493
555
  }
494
556
  case UPB_TYPE_MESSAGE: {
495
557
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
496
- upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
558
+ upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, -1, f));
497
559
  upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
498
560
  upb_handlerattr_uninit(&attr);
499
561
  break;
@@ -504,7 +566,15 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
504
566
  // Set up handlers for a singular field.
505
567
  static void add_handlers_for_singular_field(upb_handlers *h,
506
568
  const upb_fielddef *f,
507
- size_t offset) {
569
+ size_t offset,
570
+ size_t hasbit_off) {
571
+ // The offset we pass to UPB points to the start of the Message,
572
+ // rather than the start of where our data is stored.
573
+ int32_t hasbit = -1;
574
+ if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) {
575
+ hasbit = hasbit_off + sizeof(MessageHeader) * 8;
576
+ }
577
+
508
578
  switch (upb_fielddef_type(f)) {
509
579
  case UPB_TYPE_BOOL:
510
580
  case UPB_TYPE_INT32:
@@ -514,13 +584,13 @@ static void add_handlers_for_singular_field(upb_handlers *h,
514
584
  case UPB_TYPE_INT64:
515
585
  case UPB_TYPE_UINT64:
516
586
  case UPB_TYPE_DOUBLE:
517
- upb_msg_setscalarhandler(h, f, offset, -1);
587
+ upb_msg_setscalarhandler(h, f, offset, hasbit);
518
588
  break;
519
589
  case UPB_TYPE_STRING:
520
590
  case UPB_TYPE_BYTES: {
521
591
  bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
522
592
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
523
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
593
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, hasbit));
524
594
  upb_handlers_setstartstr(h, f,
525
595
  is_bytes ? bytes_handler : str_handler,
526
596
  &attr);
@@ -531,7 +601,9 @@ static void add_handlers_for_singular_field(upb_handlers *h,
531
601
  }
532
602
  case UPB_TYPE_MESSAGE: {
533
603
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
534
- upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
604
+ upb_handlerattr_sethandlerdata(&attr,
605
+ newsubmsghandlerdata(h, offset,
606
+ hasbit, f));
535
607
  upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
536
608
  upb_handlerattr_uninit(&attr);
537
609
  break;
@@ -569,10 +641,12 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
569
641
 
570
642
  add_handlers_for_singular_field(
571
643
  h, key_field,
572
- offsetof(map_parse_frame_t, key_storage));
644
+ offsetof(map_parse_frame_t, key_storage),
645
+ MESSAGE_FIELD_NO_HASBIT);
573
646
  add_handlers_for_singular_field(
574
647
  h, value_field,
575
- offsetof(map_parse_frame_t, value_storage));
648
+ offsetof(map_parse_frame_t, value_storage),
649
+ MESSAGE_FIELD_NO_HASBIT);
576
650
  }
577
651
 
578
652
  // Set up handlers for a oneof field.
@@ -622,6 +696,20 @@ static void add_handlers_for_oneof_field(upb_handlers *h,
622
696
  upb_handlerattr_uninit(&attr);
623
697
  }
624
698
 
699
+ static bool unknown_field_handler(void* closure, const void* hd,
700
+ const char* buf, size_t size) {
701
+ UPB_UNUSED(hd);
702
+
703
+ MessageHeader* msg = (MessageHeader*)closure;
704
+ if (msg->unknown_fields == NULL) {
705
+ msg->unknown_fields = malloc(sizeof(stringsink));
706
+ stringsink_init(msg->unknown_fields);
707
+ }
708
+
709
+ stringsink_string(msg->unknown_fields, NULL, buf, size, NULL);
710
+
711
+ return true;
712
+ }
625
713
 
626
714
  static void add_handlers_for_message(const void *closure, upb_handlers *h) {
627
715
  const upb_msgdef* msgdef = upb_handlers_msgdef(h);
@@ -643,6 +731,9 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
643
731
  desc->layout = create_layout(desc->msgdef);
644
732
  }
645
733
 
734
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
735
+ upb_handlers_setunknown(h, unknown_field_handler, &attr);
736
+
646
737
  for (upb_msg_field_begin(&i, desc->msgdef);
647
738
  !upb_msg_field_done(&i);
648
739
  upb_msg_field_next(&i)) {
@@ -660,7 +751,8 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
660
751
  } else if (upb_fielddef_isseq(f)) {
661
752
  add_handlers_for_repeated_field(h, f, offset);
662
753
  } else {
663
- add_handlers_for_singular_field(h, f, offset);
754
+ add_handlers_for_singular_field(
755
+ h, f, offset, desc->layout->fields[upb_fielddef_index(f)].hasbit);
664
756
  }
665
757
  }
666
758
  }
@@ -775,10 +867,6 @@ VALUE Message_decode(VALUE klass, VALUE data) {
775
867
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
776
868
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
777
869
 
778
- // We generally expect this to be clear already, but clear it in case parsing
779
- // previously got interrupted somehow.
780
- rb_ary_clear(map_parse_frames);
781
-
782
870
  {
783
871
  const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
784
872
  const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
@@ -800,19 +888,38 @@ VALUE Message_decode(VALUE klass, VALUE data) {
800
888
 
801
889
  /*
802
890
  * call-seq:
803
- * MessageClass.decode_json(data) => message
891
+ * MessageClass.decode_json(data, options = {}) => message
804
892
  *
805
893
  * Decodes the given data (as a string containing bytes in protocol buffers wire
806
894
  * format) under the interpretration given by this message class's definition
807
895
  * and returns a message object with the corresponding field values.
896
+ *
897
+ * @param options [Hash] options for the decoder
898
+ * ignore_unknown_fields: set true to ignore unknown fields (default is to raise an error)
808
899
  */
809
- VALUE Message_decode_json(VALUE klass, VALUE data) {
900
+ VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
810
901
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
811
902
  Descriptor* desc = ruby_to_Descriptor(descriptor);
812
903
  VALUE msgklass = Descriptor_msgclass(descriptor);
813
904
  VALUE msg_rb;
905
+ VALUE data = argv[0];
906
+ VALUE ignore_unknown_fields = Qfalse;
814
907
  MessageHeader* msg;
815
908
 
909
+ if (argc < 1 || argc > 2) {
910
+ rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
911
+ }
912
+
913
+ if (argc == 2) {
914
+ VALUE hash_args = argv[1];
915
+ if (TYPE(hash_args) != T_HASH) {
916
+ rb_raise(rb_eArgError, "Expected hash arguments.");
917
+ }
918
+
919
+ ignore_unknown_fields = rb_hash_lookup2(
920
+ hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse);
921
+ }
922
+
816
923
  if (TYPE(data) != T_STRING) {
817
924
  rb_raise(rb_eArgError, "Expected string for JSON data.");
818
925
  }
@@ -823,19 +930,17 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
823
930
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
824
931
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
825
932
 
826
- // We generally expect this to be clear already, but clear it in case parsing
827
- // previously got interrupted somehow.
828
- rb_ary_clear(map_parse_frames);
829
-
830
933
  {
831
934
  const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
832
935
  stackenv se;
833
936
  upb_sink sink;
834
937
  upb_json_parser* parser;
938
+ DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
835
939
  stackenv_init(&se, "Error occurred during parsing: %s");
836
940
 
837
941
  upb_sink_reset(&sink, get_fill_handlers(desc), msg);
838
- parser = upb_json_parser_create(&se.env, method, &sink);
942
+ parser = upb_json_parser_create(&se.env, method, pool->symtab,
943
+ &sink, ignore_unknown_fields);
839
944
  upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
840
945
  upb_json_parser_input(parser));
841
946
 
@@ -848,79 +953,12 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
848
953
  // -----------------------------------------------------------------------------
849
954
  // Serializing.
850
955
  // -----------------------------------------------------------------------------
851
- //
852
- // The code below also comes from upb's prototype Ruby binding, developed by
853
- // haberman@.
854
-
855
- /* stringsink *****************************************************************/
856
-
857
- // This should probably be factored into a common upb component.
858
-
859
- typedef struct {
860
- upb_byteshandler handler;
861
- upb_bytessink sink;
862
- char *ptr;
863
- size_t len, size;
864
- } stringsink;
865
-
866
- static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
867
- stringsink *sink = _sink;
868
- sink->len = 0;
869
- return sink;
870
- }
871
-
872
- static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
873
- size_t len, const upb_bufhandle *handle) {
874
- stringsink *sink = _sink;
875
- size_t new_size = sink->size;
876
-
877
- UPB_UNUSED(hd);
878
- UPB_UNUSED(handle);
879
-
880
- while (sink->len + len > new_size) {
881
- new_size *= 2;
882
- }
883
-
884
- if (new_size != sink->size) {
885
- sink->ptr = realloc(sink->ptr, new_size);
886
- sink->size = new_size;
887
- }
888
-
889
- memcpy(sink->ptr + sink->len, ptr, len);
890
- sink->len += len;
891
-
892
- return len;
893
- }
894
-
895
- void stringsink_init(stringsink *sink) {
896
- upb_byteshandler_init(&sink->handler);
897
- upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
898
- upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
899
-
900
- upb_bytessink_reset(&sink->sink, &sink->handler, sink);
901
-
902
- sink->size = 32;
903
- sink->ptr = malloc(sink->size);
904
- sink->len = 0;
905
- }
906
-
907
- void stringsink_uninit(stringsink *sink) {
908
- free(sink->ptr);
909
- }
910
956
 
911
957
  /* msgvisitor *****************************************************************/
912
958
 
913
- // TODO: If/when we support proto2 semantics in addition to the current proto3
914
- // semantics, which means that we have true field presence, we will want to
915
- // modify msgvisitor so that it emits all present fields rather than all
916
- // non-default-value fields.
917
- //
918
- // Likewise, when implementing JSON serialization, we may need to have a
919
- // 'verbose' mode that outputs all fields and a 'concise' mode that outputs only
920
- // those with non-default values.
921
-
922
959
  static void putmsg(VALUE msg, const Descriptor* desc,
923
- upb_sink *sink, int depth);
960
+ upb_sink *sink, int depth, bool emit_defaults,
961
+ bool is_json, bool open_msg);
924
962
 
925
963
  static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
926
964
  upb_selector_t ret;
@@ -952,7 +990,7 @@ static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) {
952
990
  }
953
991
 
954
992
  static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
955
- int depth) {
993
+ int depth, bool emit_defaults, bool is_json) {
956
994
  upb_sink subsink;
957
995
  VALUE descriptor;
958
996
  Descriptor* subdesc;
@@ -963,18 +1001,22 @@ static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
963
1001
  subdesc = ruby_to_Descriptor(descriptor);
964
1002
 
965
1003
  upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
966
- putmsg(submsg, subdesc, &subsink, depth + 1);
1004
+ putmsg(submsg, subdesc, &subsink, depth + 1, emit_defaults, is_json, true);
967
1005
  upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
968
1006
  }
969
1007
 
970
1008
  static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
971
- int depth) {
1009
+ int depth, bool emit_defaults, bool is_json) {
972
1010
  upb_sink subsink;
973
1011
  upb_fieldtype_t type = upb_fielddef_type(f);
974
1012
  upb_selector_t sel = 0;
975
1013
  int size;
976
1014
 
977
1015
  if (ary == Qnil) return;
1016
+ if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return;
1017
+
1018
+ size = NUM2INT(RepeatedField_length(ary));
1019
+ if (size == 0 && !emit_defaults) return;
978
1020
 
979
1021
  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
980
1022
 
@@ -982,7 +1024,6 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
982
1024
  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
983
1025
  }
984
1026
 
985
- size = NUM2INT(RepeatedField_length(ary));
986
1027
  for (int i = 0; i < size; i++) {
987
1028
  void* memory = RepeatedField_index_native(ary, i);
988
1029
  switch (type) {
@@ -1005,7 +1046,8 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
1005
1046
  putstr(*((VALUE *)memory), f, &subsink);
1006
1047
  break;
1007
1048
  case UPB_TYPE_MESSAGE:
1008
- putsubmsg(*((VALUE *)memory), f, &subsink, depth);
1049
+ putsubmsg(*((VALUE *)memory), f, &subsink, depth,
1050
+ emit_defaults, is_json);
1009
1051
  break;
1010
1052
 
1011
1053
  #undef T
@@ -1019,7 +1061,14 @@ static void put_ruby_value(VALUE value,
1019
1061
  const upb_fielddef *f,
1020
1062
  VALUE type_class,
1021
1063
  int depth,
1022
- upb_sink *sink) {
1064
+ upb_sink *sink,
1065
+ bool emit_defaults,
1066
+ bool is_json) {
1067
+ if (depth > ENCODE_MAX_NESTING) {
1068
+ rb_raise(rb_eRuntimeError,
1069
+ "Maximum recursion depth exceeded during encoding.");
1070
+ }
1071
+
1023
1072
  upb_selector_t sel = 0;
1024
1073
  if (upb_fielddef_isprimitive(f)) {
1025
1074
  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
@@ -1059,12 +1108,12 @@ static void put_ruby_value(VALUE value,
1059
1108
  putstr(value, f, sink);
1060
1109
  break;
1061
1110
  case UPB_TYPE_MESSAGE:
1062
- putsubmsg(value, f, sink, depth);
1111
+ putsubmsg(value, f, sink, depth, emit_defaults, is_json);
1063
1112
  }
1064
1113
  }
1065
1114
 
1066
1115
  static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1067
- int depth) {
1116
+ int depth, bool emit_defaults, bool is_json) {
1068
1117
  Map* self;
1069
1118
  upb_sink subsink;
1070
1119
  const upb_fielddef* key_field;
@@ -1072,6 +1121,8 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1072
1121
  Map_iter it;
1073
1122
 
1074
1123
  if (map == Qnil) return;
1124
+ if (!emit_defaults && Map_length(map) == 0) return;
1125
+
1075
1126
  self = ruby_to_Map(map);
1076
1127
 
1077
1128
  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
@@ -1090,9 +1141,10 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1090
1141
  &entry_sink);
1091
1142
  upb_sink_startmsg(&entry_sink);
1092
1143
 
1093
- put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
1144
+ put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink,
1145
+ emit_defaults, is_json);
1094
1146
  put_ruby_value(value, value_field, self->value_type_class, depth + 1,
1095
- &entry_sink);
1147
+ &entry_sink, emit_defaults, is_json);
1096
1148
 
1097
1149
  upb_sink_endmsg(&entry_sink, &status);
1098
1150
  upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
@@ -1101,13 +1153,143 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1101
1153
  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1102
1154
  }
1103
1155
 
1156
+ static const upb_handlers* msgdef_json_serialize_handlers(
1157
+ Descriptor* desc, bool preserve_proto_fieldnames);
1158
+
1159
+ static void putjsonany(VALUE msg_rb, const Descriptor* desc,
1160
+ upb_sink* sink, int depth, bool emit_defaults) {
1161
+ upb_status status;
1162
+ MessageHeader* msg = NULL;
1163
+ const upb_fielddef* type_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_TYPE);
1164
+ const upb_fielddef* value_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_VALUE);
1165
+
1166
+ size_t type_url_offset;
1167
+ VALUE type_url_str_rb;
1168
+ const upb_msgdef *payload_type = NULL;
1169
+
1170
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1171
+
1172
+ upb_sink_startmsg(sink);
1173
+
1174
+ /* Handle type url */
1175
+ type_url_offset = desc->layout->fields[upb_fielddef_index(type_field)].offset;
1176
+ type_url_str_rb = DEREF(Message_data(msg), type_url_offset, VALUE);
1177
+ if (RSTRING_LEN(type_url_str_rb) > 0) {
1178
+ putstr(type_url_str_rb, type_field, sink);
1179
+ }
1180
+
1181
+ {
1182
+ const char* type_url_str = RSTRING_PTR(type_url_str_rb);
1183
+ size_t type_url_len = RSTRING_LEN(type_url_str_rb);
1184
+ DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
1185
+
1186
+ if (type_url_len <= 20 ||
1187
+ strncmp(type_url_str, "type.googleapis.com/", 20) != 0) {
1188
+ rb_raise(rb_eRuntimeError, "Invalid type url: %s", type_url_str);
1189
+ return;
1190
+ }
1191
+
1192
+ /* Resolve type url */
1193
+ type_url_str += 20;
1194
+ type_url_len -= 20;
1195
+
1196
+ payload_type = upb_symtab_lookupmsg2(
1197
+ pool->symtab, type_url_str, type_url_len);
1198
+ if (payload_type == NULL) {
1199
+ rb_raise(rb_eRuntimeError, "Unknown type: %s", type_url_str);
1200
+ return;
1201
+ }
1202
+ }
1203
+
1204
+ {
1205
+ uint32_t value_offset;
1206
+ VALUE value_str_rb;
1207
+ const char* value_str;
1208
+ size_t value_len;
1209
+
1210
+ value_offset = desc->layout->fields[upb_fielddef_index(value_field)].offset;
1211
+ value_str_rb = DEREF(Message_data(msg), value_offset, VALUE);
1212
+ value_str = RSTRING_PTR(value_str_rb);
1213
+ value_len = RSTRING_LEN(value_str_rb);
1214
+
1215
+ if (value_len > 0) {
1216
+ VALUE payload_desc_rb = get_def_obj(payload_type);
1217
+ Descriptor* payload_desc = ruby_to_Descriptor(payload_desc_rb);
1218
+ VALUE payload_class = Descriptor_msgclass(payload_desc_rb);
1219
+ upb_sink subsink;
1220
+ bool is_wellknown;
1221
+
1222
+ VALUE payload_msg_rb = Message_decode(payload_class, value_str_rb);
1223
+
1224
+ is_wellknown =
1225
+ upb_msgdef_wellknowntype(payload_desc->msgdef) !=
1226
+ UPB_WELLKNOWN_UNSPECIFIED;
1227
+ if (is_wellknown) {
1228
+ upb_sink_startstr(sink, getsel(value_field, UPB_HANDLER_STARTSTR), 0,
1229
+ &subsink);
1230
+ }
1231
+
1232
+ subsink.handlers =
1233
+ msgdef_json_serialize_handlers(payload_desc, true);
1234
+ subsink.closure = sink->closure;
1235
+ putmsg(payload_msg_rb, payload_desc, &subsink, depth, emit_defaults, true,
1236
+ is_wellknown);
1237
+ }
1238
+ }
1239
+
1240
+ upb_sink_endmsg(sink, &status);
1241
+ }
1242
+
1243
+ static void putjsonlistvalue(
1244
+ VALUE msg_rb, const Descriptor* desc,
1245
+ upb_sink* sink, int depth, bool emit_defaults) {
1246
+ upb_status status;
1247
+ upb_sink subsink;
1248
+ MessageHeader* msg = NULL;
1249
+ const upb_fielddef* f = upb_msgdef_itof(desc->msgdef, 1);
1250
+ uint32_t offset =
1251
+ desc->layout->fields[upb_fielddef_index(f)].offset +
1252
+ sizeof(MessageHeader);
1253
+ VALUE ary;
1254
+
1255
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1256
+
1257
+ upb_sink_startmsg(sink);
1258
+
1259
+ ary = DEREF(msg, offset, VALUE);
1260
+
1261
+ if (ary == Qnil || RepeatedField_size(ary) == 0) {
1262
+ upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1263
+ upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1264
+ } else {
1265
+ putary(ary, f, sink, depth, emit_defaults, true);
1266
+ }
1267
+
1268
+ upb_sink_endmsg(sink, &status);
1269
+ }
1270
+
1104
1271
  static void putmsg(VALUE msg_rb, const Descriptor* desc,
1105
- upb_sink *sink, int depth) {
1272
+ upb_sink *sink, int depth, bool emit_defaults,
1273
+ bool is_json, bool open_msg) {
1106
1274
  MessageHeader* msg;
1107
1275
  upb_msg_field_iter i;
1108
1276
  upb_status status;
1109
1277
 
1110
- upb_sink_startmsg(sink);
1278
+ if (is_json &&
1279
+ upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_ANY) {
1280
+ putjsonany(msg_rb, desc, sink, depth, emit_defaults);
1281
+ return;
1282
+ }
1283
+
1284
+ if (is_json &&
1285
+ upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_LISTVALUE) {
1286
+ putjsonlistvalue(msg_rb, desc, sink, depth, emit_defaults);
1287
+ return;
1288
+ }
1289
+
1290
+ if (open_msg) {
1291
+ upb_sink_startmsg(sink);
1292
+ }
1111
1293
 
1112
1294
  // Protect against cycles (possible because users may freely reassign message
1113
1295
  // and repeated fields) by imposing a maximum recursion depth.
@@ -1118,6 +1300,13 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
1118
1300
 
1119
1301
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1120
1302
 
1303
+ if (desc != msg->descriptor) {
1304
+ rb_raise(rb_eArgError,
1305
+ "The type of given msg is '%s', expect '%s'.",
1306
+ upb_msgdef_fullname(msg->descriptor->msgdef),
1307
+ upb_msgdef_fullname(desc->msgdef));
1308
+ }
1309
+
1121
1310
  for (upb_msg_field_begin(&i, desc->msgdef);
1122
1311
  !upb_msg_field_done(&i);
1123
1312
  upb_msg_field_next(&i)) {
@@ -1144,31 +1333,46 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
1144
1333
 
1145
1334
  if (is_map_field(f)) {
1146
1335
  VALUE map = DEREF(msg, offset, VALUE);
1147
- if (map != Qnil) {
1148
- putmap(map, f, sink, depth);
1336
+ if (map != Qnil || emit_defaults) {
1337
+ putmap(map, f, sink, depth, emit_defaults, is_json);
1149
1338
  }
1150
1339
  } else if (upb_fielddef_isseq(f)) {
1151
1340
  VALUE ary = DEREF(msg, offset, VALUE);
1152
1341
  if (ary != Qnil) {
1153
- putary(ary, f, sink, depth);
1342
+ putary(ary, f, sink, depth, emit_defaults, is_json);
1154
1343
  }
1155
1344
  } else if (upb_fielddef_isstring(f)) {
1156
1345
  VALUE str = DEREF(msg, offset, VALUE);
1157
- if (is_matching_oneof || RSTRING_LEN(str) > 0) {
1346
+ bool is_default = false;
1347
+
1348
+ if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) {
1349
+ is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;
1350
+ } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {
1351
+ is_default = RSTRING_LEN(str) == 0;
1352
+ }
1353
+
1354
+ if (is_matching_oneof || emit_defaults || !is_default) {
1158
1355
  putstr(str, f, sink);
1159
1356
  }
1160
1357
  } else if (upb_fielddef_issubmsg(f)) {
1161
- putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
1358
+ putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth,
1359
+ emit_defaults, is_json);
1162
1360
  } else {
1163
1361
  upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1164
1362
 
1165
- #define T(upbtypeconst, upbtype, ctype, default_value) \
1166
- case upbtypeconst: { \
1167
- ctype value = DEREF(msg, offset, ctype); \
1168
- if (is_matching_oneof || value != default_value) { \
1169
- upb_sink_put##upbtype(sink, sel, value); \
1170
- } \
1171
- } \
1363
+ #define T(upbtypeconst, upbtype, ctype, default_value) \
1364
+ case upbtypeconst: { \
1365
+ ctype value = DEREF(msg, offset, ctype); \
1366
+ bool is_default = false; \
1367
+ if (upb_fielddef_haspresence(f)) { \
1368
+ is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse; \
1369
+ } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) { \
1370
+ is_default = default_value == value; \
1371
+ } \
1372
+ if (is_matching_oneof || emit_defaults || !is_default) { \
1373
+ upb_sink_put##upbtype(sink, sel, value); \
1374
+ } \
1375
+ } \
1172
1376
  break;
1173
1377
 
1174
1378
  switch (upb_fielddef_type(f)) {
@@ -1191,7 +1395,14 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
1191
1395
  }
1192
1396
  }
1193
1397
 
1194
- upb_sink_endmsg(sink, &status);
1398
+ stringsink* unknown = msg->unknown_fields;
1399
+ if (unknown != NULL) {
1400
+ upb_sink_putunknown(sink, unknown->ptr, unknown->len);
1401
+ }
1402
+
1403
+ if (open_msg) {
1404
+ upb_sink_endmsg(sink, &status);
1405
+ }
1195
1406
  }
1196
1407
 
1197
1408
  static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
@@ -1246,7 +1457,7 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1246
1457
  stackenv_init(&se, "Error occurred during encoding: %s");
1247
1458
  encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
1248
1459
 
1249
- putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0);
1460
+ putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false, false, true);
1250
1461
 
1251
1462
  ret = rb_str_new(sink.ptr, sink.len);
1252
1463
 
@@ -1259,15 +1470,19 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1259
1470
 
1260
1471
  /*
1261
1472
  * call-seq:
1262
- * MessageClass.encode_json(msg) => json_string
1473
+ * MessageClass.encode_json(msg, options = {}) => json_string
1263
1474
  *
1264
1475
  * Encodes the given message object into its serialized JSON representation.
1476
+ * @param options [Hash] options for the decoder
1477
+ * preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase)
1478
+ * emit_defaults: set true to emit 0/false values (default is to omit them)
1265
1479
  */
1266
1480
  VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1267
1481
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1268
1482
  Descriptor* desc = ruby_to_Descriptor(descriptor);
1269
1483
  VALUE msg_rb;
1270
1484
  VALUE preserve_proto_fieldnames = Qfalse;
1485
+ VALUE emit_defaults = Qfalse;
1271
1486
  stringsink sink;
1272
1487
 
1273
1488
  if (argc < 1 || argc > 2) {
@@ -1283,6 +1498,9 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1283
1498
  }
1284
1499
  preserve_proto_fieldnames = rb_hash_lookup2(
1285
1500
  hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
1501
+
1502
+ emit_defaults = rb_hash_lookup2(
1503
+ hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse);
1286
1504
  }
1287
1505
 
1288
1506
  stringsink_init(&sink);
@@ -1297,7 +1515,8 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1297
1515
  stackenv_init(&se, "Error occurred during encoding: %s");
1298
1516
  printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink);
1299
1517
 
1300
- putmsg(msg_rb, desc, upb_json_printer_input(printer), 0);
1518
+ putmsg(msg_rb, desc, upb_json_printer_input(printer), 0,
1519
+ RTEST(emit_defaults), true, true);
1301
1520
 
1302
1521
  ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
1303
1522
 
@@ -1308,3 +1527,91 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1308
1527
  }
1309
1528
  }
1310
1529
 
1530
+ static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
1531
+ MessageHeader* msg;
1532
+ upb_msg_field_iter it;
1533
+
1534
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1535
+
1536
+ stringsink* unknown = msg->unknown_fields;
1537
+ if (unknown != NULL) {
1538
+ stringsink_uninit(unknown);
1539
+ msg->unknown_fields = NULL;
1540
+ }
1541
+
1542
+ for (upb_msg_field_begin(&it, desc->msgdef);
1543
+ !upb_msg_field_done(&it);
1544
+ upb_msg_field_next(&it)) {
1545
+ upb_fielddef *f = upb_msg_iter_field(&it);
1546
+ uint32_t offset =
1547
+ desc->layout->fields[upb_fielddef_index(f)].offset +
1548
+ sizeof(MessageHeader);
1549
+
1550
+ if (upb_fielddef_containingoneof(f)) {
1551
+ uint32_t oneof_case_offset =
1552
+ desc->layout->fields[upb_fielddef_index(f)].case_offset +
1553
+ sizeof(MessageHeader);
1554
+ // For a oneof, check that this field is actually present -- skip all the
1555
+ // below if not.
1556
+ if (DEREF(msg, oneof_case_offset, uint32_t) !=
1557
+ upb_fielddef_number(f)) {
1558
+ continue;
1559
+ }
1560
+ // Otherwise, fall through to the appropriate singular-field handler
1561
+ // below.
1562
+ }
1563
+
1564
+ if (!upb_fielddef_issubmsg(f)) {
1565
+ continue;
1566
+ }
1567
+
1568
+ if (is_map_field(f)) {
1569
+ if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
1570
+ VALUE map = DEREF(msg, offset, VALUE);
1571
+ if (map == Qnil) continue;
1572
+ Map_iter map_it;
1573
+ for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
1574
+ VALUE submsg = Map_iter_value(&map_it);
1575
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1576
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1577
+ discard_unknown(submsg, subdesc);
1578
+ }
1579
+ } else if (upb_fielddef_isseq(f)) {
1580
+ VALUE ary = DEREF(msg, offset, VALUE);
1581
+ if (ary == Qnil) continue;
1582
+ int size = NUM2INT(RepeatedField_length(ary));
1583
+ for (int i = 0; i < size; i++) {
1584
+ void* memory = RepeatedField_index_native(ary, i);
1585
+ VALUE submsg = *((VALUE *)memory);
1586
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1587
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1588
+ discard_unknown(submsg, subdesc);
1589
+ }
1590
+ } else {
1591
+ VALUE submsg = DEREF(msg, offset, VALUE);
1592
+ if (submsg == Qnil) continue;
1593
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1594
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1595
+ discard_unknown(submsg, subdesc);
1596
+ }
1597
+ }
1598
+ }
1599
+
1600
+ /*
1601
+ * call-seq:
1602
+ * Google::Protobuf.discard_unknown(msg)
1603
+ *
1604
+ * Discard unknown fields in the given message object and recursively discard
1605
+ * unknown fields in submessages.
1606
+ */
1607
+ VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
1608
+ VALUE klass = CLASS_OF(msg_rb);
1609
+ VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1610
+ Descriptor* desc = ruby_to_Descriptor(descriptor);
1611
+ if (klass == cRepeatedField || klass == cMap) {
1612
+ rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
1613
+ } else {
1614
+ discard_unknown(msg_rb, desc);
1615
+ }
1616
+ return Qnil;
1617
+ }