google-protobuf 3.2.0 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -44,30 +44,90 @@ VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
44
44
  return rb_str;
45
45
  }
46
46
 
47
+ // The code below also comes from upb's prototype Ruby binding, developed by
48
+ // haberman@.
49
+
50
+ /* stringsink *****************************************************************/
51
+
52
+ static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
53
+ stringsink *sink = _sink;
54
+ sink->len = 0;
55
+ return sink;
56
+ }
57
+
58
+ static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
59
+ size_t len, const upb_bufhandle *handle) {
60
+ stringsink *sink = _sink;
61
+ size_t new_size = sink->size;
62
+
63
+ UPB_UNUSED(hd);
64
+ UPB_UNUSED(handle);
65
+
66
+ while (sink->len + len > new_size) {
67
+ new_size *= 2;
68
+ }
69
+
70
+ if (new_size != sink->size) {
71
+ sink->ptr = realloc(sink->ptr, new_size);
72
+ sink->size = new_size;
73
+ }
74
+
75
+ memcpy(sink->ptr + sink->len, ptr, len);
76
+ sink->len += len;
77
+
78
+ return len;
79
+ }
80
+
81
+ void stringsink_init(stringsink *sink) {
82
+ upb_byteshandler_init(&sink->handler);
83
+ upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
84
+ upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
85
+
86
+ upb_bytessink_reset(&sink->sink, &sink->handler, sink);
87
+
88
+ sink->size = 32;
89
+ sink->ptr = malloc(sink->size);
90
+ sink->len = 0;
91
+ }
92
+
93
+ void stringsink_uninit(stringsink *sink) {
94
+ free(sink->ptr);
95
+ }
96
+
47
97
  // -----------------------------------------------------------------------------
48
98
  // Parsing.
49
99
  // -----------------------------------------------------------------------------
50
100
 
51
101
  #define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)
52
102
 
53
- // Creates a handlerdata that simply contains the offset for this field.
54
- static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) {
55
- size_t* hd_ofs = ALLOC(size_t);
56
- *hd_ofs = ofs;
57
- upb_handlers_addcleanup(h, hd_ofs, xfree);
58
- return hd_ofs;
103
+ typedef struct {
104
+ size_t ofs;
105
+ int32_t hasbit;
106
+ } field_handlerdata_t;
107
+
108
+ // Creates a handlerdata that contains the offset and the hasbit for the field
109
+ static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) {
110
+ field_handlerdata_t *hd = ALLOC(field_handlerdata_t);
111
+ hd->ofs = ofs;
112
+ hd->hasbit = hasbit;
113
+ upb_handlers_addcleanup(h, hd, xfree);
114
+ return hd;
59
115
  }
60
116
 
61
117
  typedef struct {
62
118
  size_t ofs;
119
+ int32_t hasbit;
63
120
  const upb_msgdef *md;
64
121
  } submsg_handlerdata_t;
65
122
 
66
123
  // Creates a handlerdata that contains offset and submessage type information.
67
- static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
124
+ static const void *newsubmsghandlerdata(upb_handlers* h,
125
+ uint32_t ofs,
126
+ int32_t hasbit,
68
127
  const upb_fielddef* f) {
69
128
  submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
70
129
  hd->ofs = ofs;
130
+ hd->hasbit = hasbit;
71
131
  hd->md = upb_fielddef_msgsubdef(f);
72
132
  upb_handlers_addcleanup(h, hd, xfree);
73
133
  return hd;
@@ -139,6 +199,13 @@ static void* appendstr_handler(void *closure,
139
199
  return (void*)str;
140
200
  }
141
201
 
202
+ static void set_hasbit(void *closure, int32_t hasbit) {
203
+ if (hasbit > 0) {
204
+ uint8_t* storage = closure;
205
+ storage[hasbit/8] |= 1 << (hasbit % 8);
206
+ }
207
+ }
208
+
142
209
  // Appends a 'bytes' string to a repeated field.
143
210
  static void* appendbytes_handler(void *closure,
144
211
  const void *hd,
@@ -155,10 +222,12 @@ static void* str_handler(void *closure,
155
222
  const void *hd,
156
223
  size_t size_hint) {
157
224
  MessageHeader* msg = closure;
158
- const size_t *ofs = hd;
225
+ const field_handlerdata_t *fieldhandler = hd;
226
+
159
227
  VALUE str = rb_str_new2("");
160
228
  rb_enc_associate(str, kRubyStringUtf8Encoding);
161
- DEREF(msg, *ofs, VALUE) = str;
229
+ DEREF(msg, fieldhandler->ofs, VALUE) = str;
230
+ set_hasbit(closure, fieldhandler->hasbit);
162
231
  return (void*)str;
163
232
  }
164
233
 
@@ -167,10 +236,12 @@ static void* bytes_handler(void *closure,
167
236
  const void *hd,
168
237
  size_t size_hint) {
169
238
  MessageHeader* msg = closure;
170
- const size_t *ofs = hd;
239
+ const field_handlerdata_t *fieldhandler = hd;
240
+
171
241
  VALUE str = rb_str_new2("");
172
242
  rb_enc_associate(str, kRubyString8bitEncoding);
173
- DEREF(msg, *ofs, VALUE) = str;
243
+ DEREF(msg, fieldhandler->ofs, VALUE) = str;
244
+ set_hasbit(closure, fieldhandler->hasbit);
174
245
  return (void*)str;
175
246
  }
176
247
 
@@ -183,18 +254,13 @@ static size_t stringdata_handler(void* closure, const void* hd,
183
254
  }
184
255
 
185
256
  static bool stringdata_end_handler(void* closure, const void* hd) {
186
- MessageHeader* msg = closure;
187
- const size_t *ofs = hd;
188
- VALUE rb_str = DEREF(msg, *ofs, VALUE);
257
+ VALUE rb_str = closure;
189
258
  rb_obj_freeze(rb_str);
190
259
  return true;
191
260
  }
192
261
 
193
262
  static bool appendstring_end_handler(void* closure, const void* hd) {
194
- VALUE ary = (VALUE)closure;
195
- int size = RepeatedField_size(ary);
196
- VALUE* last = RepeatedField_index_native(ary, size - 1);
197
- VALUE rb_str = *last;
263
+ VALUE rb_str = closure;
198
264
  rb_obj_freeze(rb_str);
199
265
  return true;
200
266
  }
@@ -230,8 +296,11 @@ static void *submsg_handler(void *closure, const void *hd) {
230
296
  rb_class_new_instance(0, NULL, subklass);
231
297
  }
232
298
 
299
+ set_hasbit(closure, submsgdata->hasbit);
300
+
233
301
  submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
234
302
  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
303
+
235
304
  return submsg;
236
305
  }
237
306
 
@@ -280,11 +349,6 @@ rb_data_type_t MapParseFrame_type = {
280
349
  { MapParseFrame_mark, MapParseFrame_free, NULL },
281
350
  };
282
351
 
283
- // Array of Ruby objects wrapping map_parse_frame_t.
284
- // We don't allow multiple concurrent decodes, so we assume that this global
285
- // variable is specific to the "current" decode.
286
- VALUE map_parse_frames;
287
-
288
352
  static map_parse_frame_t* map_push_frame(VALUE map,
289
353
  const map_handlerdata_t* handlerdata) {
290
354
  map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
@@ -293,16 +357,12 @@ static map_parse_frame_t* map_push_frame(VALUE map,
293
357
  native_slot_init(handlerdata->key_field_type, &frame->key_storage);
294
358
  native_slot_init(handlerdata->value_field_type, &frame->value_storage);
295
359
 
296
- rb_ary_push(map_parse_frames,
360
+ Map_set_frame(map,
297
361
  TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame));
298
362
 
299
363
  return frame;
300
364
  }
301
365
 
302
- static void map_pop_frame() {
303
- rb_ary_pop(map_parse_frames);
304
- }
305
-
306
366
  // Handler to begin a map entry: allocates a temporary frame. This is the
307
367
  // 'startsubmsg' handler on the msgdef that contains the map field.
308
368
  static void *startmapentry_handler(void *closure, const void *hd) {
@@ -329,6 +389,9 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
329
389
  if (mapdata->value_field_type == UPB_TYPE_MESSAGE ||
330
390
  mapdata->value_field_type == UPB_TYPE_ENUM) {
331
391
  value_field_typeclass = get_def_obj(mapdata->value_field_subdef);
392
+ if (mapdata->value_field_type == UPB_TYPE_ENUM) {
393
+ value_field_typeclass = EnumDescriptor_enummodule(value_field_typeclass);
394
+ }
332
395
  }
333
396
 
334
397
  value = native_slot_get(
@@ -336,7 +399,7 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
336
399
  &frame->value_storage);
337
400
 
338
401
  Map_index_set(frame->map, key, value);
339
- map_pop_frame();
402
+ Map_set_frame(frame->map, Qnil);
340
403
 
341
404
  return true;
342
405
  }
@@ -416,9 +479,8 @@ static void *oneofbytes_handler(void *closure,
416
479
  }
417
480
 
418
481
  static bool oneofstring_end_handler(void* closure, const void* hd) {
419
- MessageHeader* msg = closure;
420
- const oneof_handlerdata_t *oneofdata = hd;
421
- rb_obj_freeze(DEREF(msg, oneofdata->ofs, VALUE));
482
+ VALUE rb_str = rb_str_new2("");
483
+ rb_obj_freeze(rb_str);
422
484
  return true;
423
485
  }
424
486
 
@@ -459,7 +521,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
459
521
  const upb_fielddef *f,
460
522
  size_t offset) {
461
523
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
462
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
524
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, -1));
463
525
  upb_handlers_setstartseq(h, f, startseq_handler, &attr);
464
526
  upb_handlerattr_uninit(&attr);
465
527
 
@@ -493,7 +555,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
493
555
  }
494
556
  case UPB_TYPE_MESSAGE: {
495
557
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
496
- upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
558
+ upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, -1, f));
497
559
  upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
498
560
  upb_handlerattr_uninit(&attr);
499
561
  break;
@@ -504,7 +566,15 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
504
566
  // Set up handlers for a singular field.
505
567
  static void add_handlers_for_singular_field(upb_handlers *h,
506
568
  const upb_fielddef *f,
507
- size_t offset) {
569
+ size_t offset,
570
+ size_t hasbit_off) {
571
+ // The offset we pass to UPB points to the start of the Message,
572
+ // rather than the start of where our data is stored.
573
+ int32_t hasbit = -1;
574
+ if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) {
575
+ hasbit = hasbit_off + sizeof(MessageHeader) * 8;
576
+ }
577
+
508
578
  switch (upb_fielddef_type(f)) {
509
579
  case UPB_TYPE_BOOL:
510
580
  case UPB_TYPE_INT32:
@@ -514,13 +584,13 @@ static void add_handlers_for_singular_field(upb_handlers *h,
514
584
  case UPB_TYPE_INT64:
515
585
  case UPB_TYPE_UINT64:
516
586
  case UPB_TYPE_DOUBLE:
517
- upb_msg_setscalarhandler(h, f, offset, -1);
587
+ upb_msg_setscalarhandler(h, f, offset, hasbit);
518
588
  break;
519
589
  case UPB_TYPE_STRING:
520
590
  case UPB_TYPE_BYTES: {
521
591
  bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
522
592
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
523
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
593
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, hasbit));
524
594
  upb_handlers_setstartstr(h, f,
525
595
  is_bytes ? bytes_handler : str_handler,
526
596
  &attr);
@@ -531,7 +601,9 @@ static void add_handlers_for_singular_field(upb_handlers *h,
531
601
  }
532
602
  case UPB_TYPE_MESSAGE: {
533
603
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
534
- upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
604
+ upb_handlerattr_sethandlerdata(&attr,
605
+ newsubmsghandlerdata(h, offset,
606
+ hasbit, f));
535
607
  upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
536
608
  upb_handlerattr_uninit(&attr);
537
609
  break;
@@ -569,10 +641,12 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
569
641
 
570
642
  add_handlers_for_singular_field(
571
643
  h, key_field,
572
- offsetof(map_parse_frame_t, key_storage));
644
+ offsetof(map_parse_frame_t, key_storage),
645
+ MESSAGE_FIELD_NO_HASBIT);
573
646
  add_handlers_for_singular_field(
574
647
  h, value_field,
575
- offsetof(map_parse_frame_t, value_storage));
648
+ offsetof(map_parse_frame_t, value_storage),
649
+ MESSAGE_FIELD_NO_HASBIT);
576
650
  }
577
651
 
578
652
  // Set up handlers for a oneof field.
@@ -622,6 +696,20 @@ static void add_handlers_for_oneof_field(upb_handlers *h,
622
696
  upb_handlerattr_uninit(&attr);
623
697
  }
624
698
 
699
+ static bool unknown_field_handler(void* closure, const void* hd,
700
+ const char* buf, size_t size) {
701
+ UPB_UNUSED(hd);
702
+
703
+ MessageHeader* msg = (MessageHeader*)closure;
704
+ if (msg->unknown_fields == NULL) {
705
+ msg->unknown_fields = malloc(sizeof(stringsink));
706
+ stringsink_init(msg->unknown_fields);
707
+ }
708
+
709
+ stringsink_string(msg->unknown_fields, NULL, buf, size, NULL);
710
+
711
+ return true;
712
+ }
625
713
 
626
714
  static void add_handlers_for_message(const void *closure, upb_handlers *h) {
627
715
  const upb_msgdef* msgdef = upb_handlers_msgdef(h);
@@ -643,6 +731,9 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
643
731
  desc->layout = create_layout(desc->msgdef);
644
732
  }
645
733
 
734
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
735
+ upb_handlers_setunknown(h, unknown_field_handler, &attr);
736
+
646
737
  for (upb_msg_field_begin(&i, desc->msgdef);
647
738
  !upb_msg_field_done(&i);
648
739
  upb_msg_field_next(&i)) {
@@ -660,7 +751,8 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
660
751
  } else if (upb_fielddef_isseq(f)) {
661
752
  add_handlers_for_repeated_field(h, f, offset);
662
753
  } else {
663
- add_handlers_for_singular_field(h, f, offset);
754
+ add_handlers_for_singular_field(
755
+ h, f, offset, desc->layout->fields[upb_fielddef_index(f)].hasbit);
664
756
  }
665
757
  }
666
758
  }
@@ -775,10 +867,6 @@ VALUE Message_decode(VALUE klass, VALUE data) {
775
867
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
776
868
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
777
869
 
778
- // We generally expect this to be clear already, but clear it in case parsing
779
- // previously got interrupted somehow.
780
- rb_ary_clear(map_parse_frames);
781
-
782
870
  {
783
871
  const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
784
872
  const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
@@ -800,19 +888,38 @@ VALUE Message_decode(VALUE klass, VALUE data) {
800
888
 
801
889
  /*
802
890
  * call-seq:
803
- * MessageClass.decode_json(data) => message
891
+ * MessageClass.decode_json(data, options = {}) => message
804
892
  *
805
893
  * Decodes the given data (as a string containing bytes in protocol buffers wire
806
894
  * format) under the interpretration given by this message class's definition
807
895
  * and returns a message object with the corresponding field values.
896
+ *
897
+ * @param options [Hash] options for the decoder
898
+ * ignore_unknown_fields: set true to ignore unknown fields (default is to raise an error)
808
899
  */
809
- VALUE Message_decode_json(VALUE klass, VALUE data) {
900
+ VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
810
901
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
811
902
  Descriptor* desc = ruby_to_Descriptor(descriptor);
812
903
  VALUE msgklass = Descriptor_msgclass(descriptor);
813
904
  VALUE msg_rb;
905
+ VALUE data = argv[0];
906
+ VALUE ignore_unknown_fields = Qfalse;
814
907
  MessageHeader* msg;
815
908
 
909
+ if (argc < 1 || argc > 2) {
910
+ rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
911
+ }
912
+
913
+ if (argc == 2) {
914
+ VALUE hash_args = argv[1];
915
+ if (TYPE(hash_args) != T_HASH) {
916
+ rb_raise(rb_eArgError, "Expected hash arguments.");
917
+ }
918
+
919
+ ignore_unknown_fields = rb_hash_lookup2(
920
+ hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse);
921
+ }
922
+
816
923
  if (TYPE(data) != T_STRING) {
817
924
  rb_raise(rb_eArgError, "Expected string for JSON data.");
818
925
  }
@@ -823,19 +930,17 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
823
930
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
824
931
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
825
932
 
826
- // We generally expect this to be clear already, but clear it in case parsing
827
- // previously got interrupted somehow.
828
- rb_ary_clear(map_parse_frames);
829
-
830
933
  {
831
934
  const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
832
935
  stackenv se;
833
936
  upb_sink sink;
834
937
  upb_json_parser* parser;
938
+ DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
835
939
  stackenv_init(&se, "Error occurred during parsing: %s");
836
940
 
837
941
  upb_sink_reset(&sink, get_fill_handlers(desc), msg);
838
- parser = upb_json_parser_create(&se.env, method, &sink);
942
+ parser = upb_json_parser_create(&se.env, method, pool->symtab,
943
+ &sink, ignore_unknown_fields);
839
944
  upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
840
945
  upb_json_parser_input(parser));
841
946
 
@@ -848,79 +953,12 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
848
953
  // -----------------------------------------------------------------------------
849
954
  // Serializing.
850
955
  // -----------------------------------------------------------------------------
851
- //
852
- // The code below also comes from upb's prototype Ruby binding, developed by
853
- // haberman@.
854
-
855
- /* stringsink *****************************************************************/
856
-
857
- // This should probably be factored into a common upb component.
858
-
859
- typedef struct {
860
- upb_byteshandler handler;
861
- upb_bytessink sink;
862
- char *ptr;
863
- size_t len, size;
864
- } stringsink;
865
-
866
- static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
867
- stringsink *sink = _sink;
868
- sink->len = 0;
869
- return sink;
870
- }
871
-
872
- static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
873
- size_t len, const upb_bufhandle *handle) {
874
- stringsink *sink = _sink;
875
- size_t new_size = sink->size;
876
-
877
- UPB_UNUSED(hd);
878
- UPB_UNUSED(handle);
879
-
880
- while (sink->len + len > new_size) {
881
- new_size *= 2;
882
- }
883
-
884
- if (new_size != sink->size) {
885
- sink->ptr = realloc(sink->ptr, new_size);
886
- sink->size = new_size;
887
- }
888
-
889
- memcpy(sink->ptr + sink->len, ptr, len);
890
- sink->len += len;
891
-
892
- return len;
893
- }
894
-
895
- void stringsink_init(stringsink *sink) {
896
- upb_byteshandler_init(&sink->handler);
897
- upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
898
- upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
899
-
900
- upb_bytessink_reset(&sink->sink, &sink->handler, sink);
901
-
902
- sink->size = 32;
903
- sink->ptr = malloc(sink->size);
904
- sink->len = 0;
905
- }
906
-
907
- void stringsink_uninit(stringsink *sink) {
908
- free(sink->ptr);
909
- }
910
956
 
911
957
  /* msgvisitor *****************************************************************/
912
958
 
913
- // TODO: If/when we support proto2 semantics in addition to the current proto3
914
- // semantics, which means that we have true field presence, we will want to
915
- // modify msgvisitor so that it emits all present fields rather than all
916
- // non-default-value fields.
917
- //
918
- // Likewise, when implementing JSON serialization, we may need to have a
919
- // 'verbose' mode that outputs all fields and a 'concise' mode that outputs only
920
- // those with non-default values.
921
-
922
959
  static void putmsg(VALUE msg, const Descriptor* desc,
923
- upb_sink *sink, int depth);
960
+ upb_sink *sink, int depth, bool emit_defaults,
961
+ bool is_json, bool open_msg);
924
962
 
925
963
  static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
926
964
  upb_selector_t ret;
@@ -952,7 +990,7 @@ static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) {
952
990
  }
953
991
 
954
992
  static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
955
- int depth) {
993
+ int depth, bool emit_defaults, bool is_json) {
956
994
  upb_sink subsink;
957
995
  VALUE descriptor;
958
996
  Descriptor* subdesc;
@@ -963,18 +1001,22 @@ static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
963
1001
  subdesc = ruby_to_Descriptor(descriptor);
964
1002
 
965
1003
  upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
966
- putmsg(submsg, subdesc, &subsink, depth + 1);
1004
+ putmsg(submsg, subdesc, &subsink, depth + 1, emit_defaults, is_json, true);
967
1005
  upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
968
1006
  }
969
1007
 
970
1008
  static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
971
- int depth) {
1009
+ int depth, bool emit_defaults, bool is_json) {
972
1010
  upb_sink subsink;
973
1011
  upb_fieldtype_t type = upb_fielddef_type(f);
974
1012
  upb_selector_t sel = 0;
975
1013
  int size;
976
1014
 
977
1015
  if (ary == Qnil) return;
1016
+ if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return;
1017
+
1018
+ size = NUM2INT(RepeatedField_length(ary));
1019
+ if (size == 0 && !emit_defaults) return;
978
1020
 
979
1021
  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
980
1022
 
@@ -982,7 +1024,6 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
982
1024
  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
983
1025
  }
984
1026
 
985
- size = NUM2INT(RepeatedField_length(ary));
986
1027
  for (int i = 0; i < size; i++) {
987
1028
  void* memory = RepeatedField_index_native(ary, i);
988
1029
  switch (type) {
@@ -1005,7 +1046,8 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
1005
1046
  putstr(*((VALUE *)memory), f, &subsink);
1006
1047
  break;
1007
1048
  case UPB_TYPE_MESSAGE:
1008
- putsubmsg(*((VALUE *)memory), f, &subsink, depth);
1049
+ putsubmsg(*((VALUE *)memory), f, &subsink, depth,
1050
+ emit_defaults, is_json);
1009
1051
  break;
1010
1052
 
1011
1053
  #undef T
@@ -1019,7 +1061,14 @@ static void put_ruby_value(VALUE value,
1019
1061
  const upb_fielddef *f,
1020
1062
  VALUE type_class,
1021
1063
  int depth,
1022
- upb_sink *sink) {
1064
+ upb_sink *sink,
1065
+ bool emit_defaults,
1066
+ bool is_json) {
1067
+ if (depth > ENCODE_MAX_NESTING) {
1068
+ rb_raise(rb_eRuntimeError,
1069
+ "Maximum recursion depth exceeded during encoding.");
1070
+ }
1071
+
1023
1072
  upb_selector_t sel = 0;
1024
1073
  if (upb_fielddef_isprimitive(f)) {
1025
1074
  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
@@ -1059,12 +1108,12 @@ static void put_ruby_value(VALUE value,
1059
1108
  putstr(value, f, sink);
1060
1109
  break;
1061
1110
  case UPB_TYPE_MESSAGE:
1062
- putsubmsg(value, f, sink, depth);
1111
+ putsubmsg(value, f, sink, depth, emit_defaults, is_json);
1063
1112
  }
1064
1113
  }
1065
1114
 
1066
1115
  static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1067
- int depth) {
1116
+ int depth, bool emit_defaults, bool is_json) {
1068
1117
  Map* self;
1069
1118
  upb_sink subsink;
1070
1119
  const upb_fielddef* key_field;
@@ -1072,6 +1121,8 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1072
1121
  Map_iter it;
1073
1122
 
1074
1123
  if (map == Qnil) return;
1124
+ if (!emit_defaults && Map_length(map) == 0) return;
1125
+
1075
1126
  self = ruby_to_Map(map);
1076
1127
 
1077
1128
  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
@@ -1090,9 +1141,10 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1090
1141
  &entry_sink);
1091
1142
  upb_sink_startmsg(&entry_sink);
1092
1143
 
1093
- put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
1144
+ put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink,
1145
+ emit_defaults, is_json);
1094
1146
  put_ruby_value(value, value_field, self->value_type_class, depth + 1,
1095
- &entry_sink);
1147
+ &entry_sink, emit_defaults, is_json);
1096
1148
 
1097
1149
  upb_sink_endmsg(&entry_sink, &status);
1098
1150
  upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
@@ -1101,13 +1153,143 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1101
1153
  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1102
1154
  }
1103
1155
 
1156
+ static const upb_handlers* msgdef_json_serialize_handlers(
1157
+ Descriptor* desc, bool preserve_proto_fieldnames);
1158
+
1159
+ static void putjsonany(VALUE msg_rb, const Descriptor* desc,
1160
+ upb_sink* sink, int depth, bool emit_defaults) {
1161
+ upb_status status;
1162
+ MessageHeader* msg = NULL;
1163
+ const upb_fielddef* type_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_TYPE);
1164
+ const upb_fielddef* value_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_VALUE);
1165
+
1166
+ size_t type_url_offset;
1167
+ VALUE type_url_str_rb;
1168
+ const upb_msgdef *payload_type = NULL;
1169
+
1170
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1171
+
1172
+ upb_sink_startmsg(sink);
1173
+
1174
+ /* Handle type url */
1175
+ type_url_offset = desc->layout->fields[upb_fielddef_index(type_field)].offset;
1176
+ type_url_str_rb = DEREF(Message_data(msg), type_url_offset, VALUE);
1177
+ if (RSTRING_LEN(type_url_str_rb) > 0) {
1178
+ putstr(type_url_str_rb, type_field, sink);
1179
+ }
1180
+
1181
+ {
1182
+ const char* type_url_str = RSTRING_PTR(type_url_str_rb);
1183
+ size_t type_url_len = RSTRING_LEN(type_url_str_rb);
1184
+ DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
1185
+
1186
+ if (type_url_len <= 20 ||
1187
+ strncmp(type_url_str, "type.googleapis.com/", 20) != 0) {
1188
+ rb_raise(rb_eRuntimeError, "Invalid type url: %s", type_url_str);
1189
+ return;
1190
+ }
1191
+
1192
+ /* Resolve type url */
1193
+ type_url_str += 20;
1194
+ type_url_len -= 20;
1195
+
1196
+ payload_type = upb_symtab_lookupmsg2(
1197
+ pool->symtab, type_url_str, type_url_len);
1198
+ if (payload_type == NULL) {
1199
+ rb_raise(rb_eRuntimeError, "Unknown type: %s", type_url_str);
1200
+ return;
1201
+ }
1202
+ }
1203
+
1204
+ {
1205
+ uint32_t value_offset;
1206
+ VALUE value_str_rb;
1207
+ const char* value_str;
1208
+ size_t value_len;
1209
+
1210
+ value_offset = desc->layout->fields[upb_fielddef_index(value_field)].offset;
1211
+ value_str_rb = DEREF(Message_data(msg), value_offset, VALUE);
1212
+ value_str = RSTRING_PTR(value_str_rb);
1213
+ value_len = RSTRING_LEN(value_str_rb);
1214
+
1215
+ if (value_len > 0) {
1216
+ VALUE payload_desc_rb = get_def_obj(payload_type);
1217
+ Descriptor* payload_desc = ruby_to_Descriptor(payload_desc_rb);
1218
+ VALUE payload_class = Descriptor_msgclass(payload_desc_rb);
1219
+ upb_sink subsink;
1220
+ bool is_wellknown;
1221
+
1222
+ VALUE payload_msg_rb = Message_decode(payload_class, value_str_rb);
1223
+
1224
+ is_wellknown =
1225
+ upb_msgdef_wellknowntype(payload_desc->msgdef) !=
1226
+ UPB_WELLKNOWN_UNSPECIFIED;
1227
+ if (is_wellknown) {
1228
+ upb_sink_startstr(sink, getsel(value_field, UPB_HANDLER_STARTSTR), 0,
1229
+ &subsink);
1230
+ }
1231
+
1232
+ subsink.handlers =
1233
+ msgdef_json_serialize_handlers(payload_desc, true);
1234
+ subsink.closure = sink->closure;
1235
+ putmsg(payload_msg_rb, payload_desc, &subsink, depth, emit_defaults, true,
1236
+ is_wellknown);
1237
+ }
1238
+ }
1239
+
1240
+ upb_sink_endmsg(sink, &status);
1241
+ }
1242
+
1243
+ static void putjsonlistvalue(
1244
+ VALUE msg_rb, const Descriptor* desc,
1245
+ upb_sink* sink, int depth, bool emit_defaults) {
1246
+ upb_status status;
1247
+ upb_sink subsink;
1248
+ MessageHeader* msg = NULL;
1249
+ const upb_fielddef* f = upb_msgdef_itof(desc->msgdef, 1);
1250
+ uint32_t offset =
1251
+ desc->layout->fields[upb_fielddef_index(f)].offset +
1252
+ sizeof(MessageHeader);
1253
+ VALUE ary;
1254
+
1255
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1256
+
1257
+ upb_sink_startmsg(sink);
1258
+
1259
+ ary = DEREF(msg, offset, VALUE);
1260
+
1261
+ if (ary == Qnil || RepeatedField_size(ary) == 0) {
1262
+ upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1263
+ upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1264
+ } else {
1265
+ putary(ary, f, sink, depth, emit_defaults, true);
1266
+ }
1267
+
1268
+ upb_sink_endmsg(sink, &status);
1269
+ }
1270
+
1104
1271
  static void putmsg(VALUE msg_rb, const Descriptor* desc,
1105
- upb_sink *sink, int depth) {
1272
+ upb_sink *sink, int depth, bool emit_defaults,
1273
+ bool is_json, bool open_msg) {
1106
1274
  MessageHeader* msg;
1107
1275
  upb_msg_field_iter i;
1108
1276
  upb_status status;
1109
1277
 
1110
- upb_sink_startmsg(sink);
1278
+ if (is_json &&
1279
+ upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_ANY) {
1280
+ putjsonany(msg_rb, desc, sink, depth, emit_defaults);
1281
+ return;
1282
+ }
1283
+
1284
+ if (is_json &&
1285
+ upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_LISTVALUE) {
1286
+ putjsonlistvalue(msg_rb, desc, sink, depth, emit_defaults);
1287
+ return;
1288
+ }
1289
+
1290
+ if (open_msg) {
1291
+ upb_sink_startmsg(sink);
1292
+ }
1111
1293
 
1112
1294
  // Protect against cycles (possible because users may freely reassign message
1113
1295
  // and repeated fields) by imposing a maximum recursion depth.
@@ -1118,6 +1300,13 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
1118
1300
 
1119
1301
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1120
1302
 
1303
+ if (desc != msg->descriptor) {
1304
+ rb_raise(rb_eArgError,
1305
+ "The type of given msg is '%s', expect '%s'.",
1306
+ upb_msgdef_fullname(msg->descriptor->msgdef),
1307
+ upb_msgdef_fullname(desc->msgdef));
1308
+ }
1309
+
1121
1310
  for (upb_msg_field_begin(&i, desc->msgdef);
1122
1311
  !upb_msg_field_done(&i);
1123
1312
  upb_msg_field_next(&i)) {
@@ -1144,31 +1333,46 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
1144
1333
 
1145
1334
  if (is_map_field(f)) {
1146
1335
  VALUE map = DEREF(msg, offset, VALUE);
1147
- if (map != Qnil) {
1148
- putmap(map, f, sink, depth);
1336
+ if (map != Qnil || emit_defaults) {
1337
+ putmap(map, f, sink, depth, emit_defaults, is_json);
1149
1338
  }
1150
1339
  } else if (upb_fielddef_isseq(f)) {
1151
1340
  VALUE ary = DEREF(msg, offset, VALUE);
1152
1341
  if (ary != Qnil) {
1153
- putary(ary, f, sink, depth);
1342
+ putary(ary, f, sink, depth, emit_defaults, is_json);
1154
1343
  }
1155
1344
  } else if (upb_fielddef_isstring(f)) {
1156
1345
  VALUE str = DEREF(msg, offset, VALUE);
1157
- if (is_matching_oneof || RSTRING_LEN(str) > 0) {
1346
+ bool is_default = false;
1347
+
1348
+ if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) {
1349
+ is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;
1350
+ } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {
1351
+ is_default = RSTRING_LEN(str) == 0;
1352
+ }
1353
+
1354
+ if (is_matching_oneof || emit_defaults || !is_default) {
1158
1355
  putstr(str, f, sink);
1159
1356
  }
1160
1357
  } else if (upb_fielddef_issubmsg(f)) {
1161
- putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
1358
+ putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth,
1359
+ emit_defaults, is_json);
1162
1360
  } else {
1163
1361
  upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1164
1362
 
1165
- #define T(upbtypeconst, upbtype, ctype, default_value) \
1166
- case upbtypeconst: { \
1167
- ctype value = DEREF(msg, offset, ctype); \
1168
- if (is_matching_oneof || value != default_value) { \
1169
- upb_sink_put##upbtype(sink, sel, value); \
1170
- } \
1171
- } \
1363
+ #define T(upbtypeconst, upbtype, ctype, default_value) \
1364
+ case upbtypeconst: { \
1365
+ ctype value = DEREF(msg, offset, ctype); \
1366
+ bool is_default = false; \
1367
+ if (upb_fielddef_haspresence(f)) { \
1368
+ is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse; \
1369
+ } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) { \
1370
+ is_default = default_value == value; \
1371
+ } \
1372
+ if (is_matching_oneof || emit_defaults || !is_default) { \
1373
+ upb_sink_put##upbtype(sink, sel, value); \
1374
+ } \
1375
+ } \
1172
1376
  break;
1173
1377
 
1174
1378
  switch (upb_fielddef_type(f)) {
@@ -1191,7 +1395,14 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
1191
1395
  }
1192
1396
  }
1193
1397
 
1194
- upb_sink_endmsg(sink, &status);
1398
+ stringsink* unknown = msg->unknown_fields;
1399
+ if (unknown != NULL) {
1400
+ upb_sink_putunknown(sink, unknown->ptr, unknown->len);
1401
+ }
1402
+
1403
+ if (open_msg) {
1404
+ upb_sink_endmsg(sink, &status);
1405
+ }
1195
1406
  }
1196
1407
 
1197
1408
  static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
@@ -1246,7 +1457,7 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1246
1457
  stackenv_init(&se, "Error occurred during encoding: %s");
1247
1458
  encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
1248
1459
 
1249
- putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0);
1460
+ putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false, false, true);
1250
1461
 
1251
1462
  ret = rb_str_new(sink.ptr, sink.len);
1252
1463
 
@@ -1259,15 +1470,19 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1259
1470
 
1260
1471
  /*
1261
1472
  * call-seq:
1262
- * MessageClass.encode_json(msg) => json_string
1473
+ * MessageClass.encode_json(msg, options = {}) => json_string
1263
1474
  *
1264
1475
  * Encodes the given message object into its serialized JSON representation.
1476
+ * @param options [Hash] options for the decoder
1477
+ * preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase)
1478
+ * emit_defaults: set true to emit 0/false values (default is to omit them)
1265
1479
  */
1266
1480
  VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1267
1481
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1268
1482
  Descriptor* desc = ruby_to_Descriptor(descriptor);
1269
1483
  VALUE msg_rb;
1270
1484
  VALUE preserve_proto_fieldnames = Qfalse;
1485
+ VALUE emit_defaults = Qfalse;
1271
1486
  stringsink sink;
1272
1487
 
1273
1488
  if (argc < 1 || argc > 2) {
@@ -1283,6 +1498,9 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1283
1498
  }
1284
1499
  preserve_proto_fieldnames = rb_hash_lookup2(
1285
1500
  hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
1501
+
1502
+ emit_defaults = rb_hash_lookup2(
1503
+ hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse);
1286
1504
  }
1287
1505
 
1288
1506
  stringsink_init(&sink);
@@ -1297,7 +1515,8 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1297
1515
  stackenv_init(&se, "Error occurred during encoding: %s");
1298
1516
  printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink);
1299
1517
 
1300
- putmsg(msg_rb, desc, upb_json_printer_input(printer), 0);
1518
+ putmsg(msg_rb, desc, upb_json_printer_input(printer), 0,
1519
+ RTEST(emit_defaults), true, true);
1301
1520
 
1302
1521
  ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
1303
1522
 
@@ -1308,3 +1527,91 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1308
1527
  }
1309
1528
  }
1310
1529
 
1530
+ static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
1531
+ MessageHeader* msg;
1532
+ upb_msg_field_iter it;
1533
+
1534
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1535
+
1536
+ stringsink* unknown = msg->unknown_fields;
1537
+ if (unknown != NULL) {
1538
+ stringsink_uninit(unknown);
1539
+ msg->unknown_fields = NULL;
1540
+ }
1541
+
1542
+ for (upb_msg_field_begin(&it, desc->msgdef);
1543
+ !upb_msg_field_done(&it);
1544
+ upb_msg_field_next(&it)) {
1545
+ upb_fielddef *f = upb_msg_iter_field(&it);
1546
+ uint32_t offset =
1547
+ desc->layout->fields[upb_fielddef_index(f)].offset +
1548
+ sizeof(MessageHeader);
1549
+
1550
+ if (upb_fielddef_containingoneof(f)) {
1551
+ uint32_t oneof_case_offset =
1552
+ desc->layout->fields[upb_fielddef_index(f)].case_offset +
1553
+ sizeof(MessageHeader);
1554
+ // For a oneof, check that this field is actually present -- skip all the
1555
+ // below if not.
1556
+ if (DEREF(msg, oneof_case_offset, uint32_t) !=
1557
+ upb_fielddef_number(f)) {
1558
+ continue;
1559
+ }
1560
+ // Otherwise, fall through to the appropriate singular-field handler
1561
+ // below.
1562
+ }
1563
+
1564
+ if (!upb_fielddef_issubmsg(f)) {
1565
+ continue;
1566
+ }
1567
+
1568
+ if (is_map_field(f)) {
1569
+ if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
1570
+ VALUE map = DEREF(msg, offset, VALUE);
1571
+ if (map == Qnil) continue;
1572
+ Map_iter map_it;
1573
+ for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
1574
+ VALUE submsg = Map_iter_value(&map_it);
1575
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1576
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1577
+ discard_unknown(submsg, subdesc);
1578
+ }
1579
+ } else if (upb_fielddef_isseq(f)) {
1580
+ VALUE ary = DEREF(msg, offset, VALUE);
1581
+ if (ary == Qnil) continue;
1582
+ int size = NUM2INT(RepeatedField_length(ary));
1583
+ for (int i = 0; i < size; i++) {
1584
+ void* memory = RepeatedField_index_native(ary, i);
1585
+ VALUE submsg = *((VALUE *)memory);
1586
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1587
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1588
+ discard_unknown(submsg, subdesc);
1589
+ }
1590
+ } else {
1591
+ VALUE submsg = DEREF(msg, offset, VALUE);
1592
+ if (submsg == Qnil) continue;
1593
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1594
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1595
+ discard_unknown(submsg, subdesc);
1596
+ }
1597
+ }
1598
+ }
1599
+
1600
+ /*
1601
+ * call-seq:
1602
+ * Google::Protobuf.discard_unknown(msg)
1603
+ *
1604
+ * Discard unknown fields in the given message object and recursively discard
1605
+ * unknown fields in submessages.
1606
+ */
1607
+ VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
1608
+ VALUE klass = CLASS_OF(msg_rb);
1609
+ VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1610
+ Descriptor* desc = ruby_to_Descriptor(descriptor);
1611
+ if (klass == cRepeatedField || klass == cMap) {
1612
+ rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
1613
+ } else {
1614
+ discard_unknown(msg_rb, desc);
1615
+ }
1616
+ return Qnil;
1617
+ }