google-protobuf 3.15.6 → 3.19.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

@@ -167,7 +167,8 @@ VALUE Map_deep_copy(VALUE obj) {
167
167
  new_arena_rb);
168
168
  }
169
169
 
170
- const upb_map* Map_GetUpbMap(VALUE val, const upb_fielddef *field) {
170
+ const upb_map* Map_GetUpbMap(VALUE val, const upb_fielddef* field,
171
+ upb_arena* arena) {
171
172
  const upb_fielddef* key_field = map_field_key(field);
172
173
  const upb_fielddef* value_field = map_field_value(field);
173
174
  TypeInfo value_type_info = TypeInfo_get(value_field);
@@ -189,6 +190,7 @@ const upb_map* Map_GetUpbMap(VALUE val, const upb_fielddef *field) {
189
190
  rb_raise(cTypeError, "Map value type has wrong message/enum class");
190
191
  }
191
192
 
193
+ Arena_fuse(self->arena, arena);
192
194
  return self->map;
193
195
  }
194
196
 
@@ -236,7 +238,7 @@ static VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) {
236
238
  upb_msg *self_msg = Map_GetMutable(_self);
237
239
  size_t iter = UPB_MAP_BEGIN;
238
240
 
239
- upb_arena_fuse(arena, Arena_get(other->arena));
241
+ Arena_fuse(other->arena, arena);
240
242
 
241
243
  if (self->key_type != other->key_type ||
242
244
  self->value_type_info.type != other->value_type_info.type ||
@@ -511,7 +513,7 @@ static VALUE Map_dup(VALUE _self) {
511
513
  upb_arena *arena = Arena_get(new_self->arena);
512
514
  upb_map *new_map = Map_GetMutable(new_map_rb);
513
515
 
514
- upb_arena_fuse(arena, Arena_get(self->arena));
516
+ Arena_fuse(self->arena, arena);
515
517
 
516
518
  while (upb_mapiter_next(self->map, &iter)) {
517
519
  upb_msgval key = upb_mapiter_key(self->map, iter);
@@ -678,7 +680,10 @@ void Map_register(VALUE module) {
678
680
  rb_define_method(klass, "delete", Map_delete, 1);
679
681
  rb_define_method(klass, "clear", Map_clear, 0);
680
682
  rb_define_method(klass, "length", Map_length, 0);
683
+ rb_define_method(klass, "size", Map_length, 0);
681
684
  rb_define_method(klass, "dup", Map_dup, 0);
685
+ // Also define #clone so that we don't inherit Object#clone.
686
+ rb_define_method(klass, "clone", Map_dup, 0);
682
687
  rb_define_method(klass, "==", Map_eq, 1);
683
688
  rb_define_method(klass, "freeze", Map_freeze, 0);
684
689
  rb_define_method(klass, "hash", Map_hash, 0);
@@ -44,7 +44,8 @@ VALUE Map_GetRubyWrapper(upb_map *map, upb_fieldtype_t key_type,
44
44
  // Gets the underlying upb_map for this Ruby map object, which must have
45
45
  // key/value type that match |field|. If this is not a map or the type doesn't
46
46
  // match, raises an exception.
47
- const upb_map *Map_GetUpbMap(VALUE val, const upb_fielddef *field);
47
+ const upb_map *Map_GetUpbMap(VALUE val, const upb_fielddef *field,
48
+ upb_arena *arena);
48
49
 
49
50
  // Implements #inspect for this map by appending its contents to |b|.
50
51
  void Map_Inspect(StringBuilder *b, const upb_map *map, upb_fieldtype_t key_type,
@@ -35,7 +35,6 @@
35
35
  #include "map.h"
36
36
  #include "protobuf.h"
37
37
  #include "repeated_field.h"
38
- #include "third_party/wyhash/wyhash.h"
39
38
 
40
39
  static VALUE cParseError = Qnil;
41
40
  static ID descriptor_instancevar_interned;
@@ -277,9 +276,9 @@ static void Message_setfield(upb_msg* msg, const upb_fielddef* f, VALUE val,
277
276
  upb_arena* arena) {
278
277
  upb_msgval msgval;
279
278
  if (upb_fielddef_ismap(f)) {
280
- msgval.map_val = Map_GetUpbMap(val, f);
279
+ msgval.map_val = Map_GetUpbMap(val, f, arena);
281
280
  } else if (upb_fielddef_isseq(f)) {
282
- msgval.array_val = RepeatedField_GetUpbArray(val, f);
281
+ msgval.array_val = RepeatedField_GetUpbArray(val, f, arena);
283
282
  } else {
284
283
  if (val == Qnil &&
285
284
  (upb_fielddef_issubmsg(f) || upb_fielddef_realcontainingoneof(f))) {
@@ -660,7 +659,7 @@ static VALUE Message_dup(VALUE _self) {
660
659
  // TODO(copy unknown fields?)
661
660
  // TODO(use official upb msg copy function)
662
661
  memcpy((upb_msg*)new_msg_self->msg, self->msg, size);
663
- upb_arena_fuse(Arena_get(new_msg_self->arena), Arena_get(self->arena));
662
+ Arena_fuse(self->arena, Arena_get(new_msg_self->arena));
664
663
  return new_msg;
665
664
  }
666
665
 
@@ -697,16 +696,13 @@ bool Message_Equal(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) {
697
696
  * field is of a primitive type).
698
697
  */
699
698
  static VALUE Message_eq(VALUE _self, VALUE _other) {
700
- if (TYPE(_self) != TYPE(_other)) {
701
- return Qfalse;
702
- }
699
+ if (CLASS_OF(_self) != CLASS_OF(_other)) return Qfalse;
703
700
 
704
701
  Message* self = ruby_to_Message(_self);
705
702
  Message* other = ruby_to_Message(_other);
703
+ assert(self->msgdef == other->msgdef);
706
704
 
707
- return Message_Equal(self->msg, other->msg, self->msgdef)
708
- ? Qtrue
709
- : Qfalse;
705
+ return Message_Equal(self->msg, other->msg, self->msgdef) ? Qtrue : Qfalse;
710
706
  }
711
707
 
712
708
  uint64_t Message_Hash(const upb_msg* msg, const upb_msgdef* m, uint64_t seed) {
@@ -720,7 +716,7 @@ uint64_t Message_Hash(const upb_msg* msg, const upb_msgdef* m, uint64_t seed) {
720
716
  &size);
721
717
 
722
718
  if (data) {
723
- uint64_t ret = wyhash(data, size, seed, _wyp);
719
+ uint64_t ret = Wyhash(data, size, seed, kWyhashSalt);
724
720
  upb_arena_free(arena);
725
721
  return ret;
726
722
  } else {
@@ -737,7 +733,10 @@ uint64_t Message_Hash(const upb_msg* msg, const upb_msgdef* m, uint64_t seed) {
737
733
  */
738
734
  static VALUE Message_hash(VALUE _self) {
739
735
  Message* self = ruby_to_Message(_self);
740
- return INT2FIX(Message_Hash(self->msg, self->msgdef, 0));
736
+ uint64_t hash_value = Message_Hash(self->msg, self->msgdef, 0);
737
+ // RUBY_FIXNUM_MAX should be one less than a power of 2.
738
+ assert((RUBY_FIXNUM_MAX & (RUBY_FIXNUM_MAX + 1)) == 0);
739
+ return INT2FIX(hash_value & RUBY_FIXNUM_MAX);
741
740
  }
742
741
 
743
742
  /*
@@ -794,6 +793,14 @@ static VALUE Message_CreateHash(const upb_msg *msg, const upb_msgdef *m) {
794
793
  VALUE msg_value;
795
794
  VALUE msg_key;
796
795
 
796
+ if (!is_proto2 && upb_fielddef_issubmsg(field) &&
797
+ !upb_fielddef_isseq(field) && !upb_msg_has(msg, field)) {
798
+ // TODO: Legacy behavior, remove when we fix the is_proto2 differences.
799
+ msg_key = ID2SYM(rb_intern(upb_fielddef_name(field)));
800
+ rb_hash_aset(hash, msg_key, Qnil);
801
+ continue;
802
+ }
803
+
797
804
  // Do not include fields that are not present (oneof or optional fields).
798
805
  if (is_proto2 && upb_fielddef_haspresence(field) &&
799
806
  !upb_msg_has(msg, field)) {
@@ -1005,7 +1012,6 @@ static VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
1005
1012
  */
1006
1013
  static VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1007
1014
  Message* msg = ruby_to_Message(msg_rb);
1008
- upb_arena *arena = upb_arena_new();
1009
1015
  const char *data;
1010
1016
  size_t size;
1011
1017
 
@@ -1013,6 +1019,8 @@ static VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1013
1019
  rb_raise(rb_eArgError, "Message of wrong type.");
1014
1020
  }
1015
1021
 
1022
+ upb_arena *arena = upb_arena_new();
1023
+
1016
1024
  data = upb_encode(msg->msg, upb_msgdef_layout(msg->msgdef), arena,
1017
1025
  &size);
1018
1026
 
@@ -1306,7 +1314,7 @@ const upb_msg* Message_GetUpbMessage(VALUE value, const upb_msgdef* m,
1306
1314
  }
1307
1315
 
1308
1316
  Message* self = ruby_to_Message(value);
1309
- upb_arena_fuse(arena, Arena_get(self->arena));
1317
+ Arena_fuse(self->arena, arena);
1310
1318
 
1311
1319
  return self->msg;
1312
1320
  }
@@ -37,7 +37,7 @@
37
37
  #include "message.h"
38
38
  #include "repeated_field.h"
39
39
 
40
- VALUE cError;
40
+ VALUE cParseError;
41
41
  VALUE cTypeError;
42
42
 
43
43
  const upb_fielddef* map_field_key(const upb_fielddef* field) {
@@ -180,6 +180,7 @@ static void Arena_mark(void *data) {
180
180
  static void Arena_free(void *data) {
181
181
  Arena *arena = data;
182
182
  upb_arena_free(arena->arena);
183
+ xfree(arena);
183
184
  }
184
185
 
185
186
  static VALUE cArena;
@@ -203,6 +204,16 @@ upb_arena *Arena_get(VALUE _arena) {
203
204
  return arena->arena;
204
205
  }
205
206
 
207
+ void Arena_fuse(VALUE _arena, upb_arena *other) {
208
+ Arena *arena;
209
+ TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
210
+ if (!upb_arena_fuse(arena->arena, other)) {
211
+ rb_raise(rb_eRuntimeError,
212
+ "Unable to fuse arenas. This should never happen since Ruby does "
213
+ "not use initial blocks");
214
+ }
215
+ }
216
+
206
217
  VALUE Arena_new() {
207
218
  return Arena_alloc(cArena);
208
219
  }
@@ -237,8 +248,16 @@ void Arena_register(VALUE module) {
237
248
  // We use WeakMap for the cache. For Ruby <2.7 we also need a secondary Hash
238
249
  // to store WeakMap keys because Ruby <2.7 WeakMap doesn't allow non-finalizable
239
250
  // keys.
240
-
241
- #if RUBY_API_VERSION_CODE >= 20700
251
+ //
252
+ // We also need the secondary Hash if sizeof(long) < sizeof(VALUE), because this
253
+ // means it may not be possible to fit a pointer into a Fixnum. Keys are
254
+ // pointers, and if they fit into a Fixnum, Ruby doesn't collect them, but if
255
+ // they overflow and require allocating a Bignum, they could get collected
256
+ // prematurely, thus removing the cache entry. This happens on 64-bit Windows,
257
+ // on which pointers are 64 bits but longs are 32 bits. In this case, we enable
258
+ // the secondary Hash to hold the keys and prevent them from being collected.
259
+
260
+ #if RUBY_API_VERSION_CODE >= 20700 && SIZEOF_LONG >= SIZEOF_VALUE
242
261
  #define USE_SECONDARY_MAP 0
243
262
  #else
244
263
  #define USE_SECONDARY_MAP 1
@@ -251,15 +270,81 @@ void Arena_register(VALUE module) {
251
270
  // The object is used only for its identity; it does not contain any data.
252
271
  VALUE secondary_map = Qnil;
253
272
 
273
+ // Mutations to the map are under a mutex, because SeconaryMap_MaybeGC()
274
+ // iterates over the map which cannot happen in parallel with insertions, or
275
+ // Ruby will throw:
276
+ // can't add a new key into hash during iteration (RuntimeError)
277
+ VALUE secondary_map_mutex = Qnil;
278
+
279
+ // Lambda that will GC entries from the secondary map that are no longer present
280
+ // in the primary map.
281
+ VALUE gc_secondary_map_lambda = Qnil;
282
+ ID length;
283
+
284
+ extern VALUE weak_obj_cache;
285
+
254
286
  static void SecondaryMap_Init() {
255
287
  rb_gc_register_address(&secondary_map);
288
+ rb_gc_register_address(&gc_secondary_map_lambda);
289
+ rb_gc_register_address(&secondary_map_mutex);
256
290
  secondary_map = rb_hash_new();
291
+ gc_secondary_map_lambda = rb_eval_string(
292
+ "->(secondary, weak) {\n"
293
+ " secondary.delete_if { |k, v| !weak.key?(v) }\n"
294
+ "}\n");
295
+ secondary_map_mutex = rb_mutex_new();
296
+ length = rb_intern("length");
257
297
  }
258
298
 
259
- static VALUE SecondaryMap_Get(VALUE key) {
299
+ // The secondary map is a regular Hash, and will never shrink on its own.
300
+ // The main object cache is a WeakMap that will automatically remove entries
301
+ // when the target object is no longer reachable, but unless we manually
302
+ // remove the corresponding entries from the secondary map, it will grow
303
+ // without bound.
304
+ //
305
+ // To avoid this unbounded growth we periodically remove entries from the
306
+ // secondary map that are no longer present in the WeakMap. The logic of
307
+ // how often to perform this GC is an artbirary tuning parameter that
308
+ // represents a straightforward CPU/memory tradeoff.
309
+ //
310
+ // Requires: secondary_map_mutex is held.
311
+ static void SecondaryMap_MaybeGC() {
312
+ PBRUBY_ASSERT(rb_mutex_locked_p(secondary_map_mutex) == Qtrue);
313
+ size_t weak_len = NUM2ULL(rb_funcall(weak_obj_cache, length, 0));
314
+ size_t secondary_len = RHASH_SIZE(secondary_map);
315
+ if (secondary_len < weak_len) {
316
+ // Logically this case should not be possible: a valid entry cannot exist in
317
+ // the weak table unless there is a corresponding entry in the secondary
318
+ // table. It should *always* be the case that secondary_len >= weak_len.
319
+ //
320
+ // However ObjectSpace::WeakMap#length (and therefore weak_len) is
321
+ // unreliable: it overreports its true length by including non-live objects.
322
+ // However these non-live objects are not yielded in iteration, so we may
323
+ // have previously deleted them from the secondary map in a previous
324
+ // invocation of SecondaryMap_MaybeGC().
325
+ //
326
+ // In this case, we can't measure any waste, so we just return.
327
+ return;
328
+ }
329
+ size_t waste = secondary_len - weak_len;
330
+ // GC if we could remove at least 2000 entries or 20% of the table size
331
+ // (whichever is greater). Since the cost of the GC pass is O(N), we
332
+ // want to make sure that we condition this on overall table size, to
333
+ // avoid O(N^2) CPU costs.
334
+ size_t threshold = PBRUBY_MAX(secondary_len * 0.2, 2000);
335
+ if (waste > threshold) {
336
+ rb_funcall(gc_secondary_map_lambda, rb_intern("call"), 2,
337
+ secondary_map, weak_obj_cache);
338
+ }
339
+ }
340
+
341
+ // Requires: secondary_map_mutex is held by this thread iff create == true.
342
+ static VALUE SecondaryMap_Get(VALUE key, bool create) {
343
+ PBRUBY_ASSERT(!create || rb_mutex_locked_p(secondary_map_mutex) == Qtrue);
260
344
  VALUE ret = rb_hash_lookup(secondary_map, key);
261
- if (ret == Qnil) {
262
- ret = rb_eval_string("Object.new");
345
+ if (ret == Qnil && create) {
346
+ SecondaryMap_MaybeGC();
347
+ ret = rb_class_new_instance(0, NULL, rb_cObject);
263
348
  rb_hash_aset(secondary_map, key, ret);
264
349
  }
265
350
  return ret;
@@ -267,14 +352,13 @@ static VALUE SecondaryMap_Get(VALUE key) {
267
352
 
268
353
  #endif
269
354
 
270
- static VALUE ObjectCache_GetKey(const void* key) {
271
- char buf[sizeof(key)];
272
- memcpy(&buf, &key, sizeof(key));
273
- intptr_t key_int = (intptr_t)key;
274
- PBRUBY_ASSERT((key_int & 3) == 0);
275
- VALUE ret = LL2NUM(key_int >> 2);
355
+ // Requires: secondary_map_mutex is held by this thread iff create == true.
356
+ static VALUE ObjectCache_GetKey(const void* key, bool create) {
357
+ VALUE key_val = (VALUE)key;
358
+ PBRUBY_ASSERT((key_val & 3) == 0);
359
+ VALUE ret = LL2NUM(key_val >> 2);
276
360
  #if USE_SECONDARY_MAP
277
- ret = SecondaryMap_Get(ret);
361
+ ret = SecondaryMap_Get(ret, create);
278
362
  #endif
279
363
  return ret;
280
364
  }
@@ -298,14 +382,20 @@ static void ObjectCache_Init() {
298
382
 
299
383
  void ObjectCache_Add(const void* key, VALUE val) {
300
384
  PBRUBY_ASSERT(ObjectCache_Get(key) == Qnil);
301
- VALUE key_rb = ObjectCache_GetKey(key);
385
+ #if USE_SECONDARY_MAP
386
+ rb_mutex_lock(secondary_map_mutex);
387
+ #endif
388
+ VALUE key_rb = ObjectCache_GetKey(key, true);
302
389
  rb_funcall(weak_obj_cache, item_set, 2, key_rb, val);
390
+ #if USE_SECONDARY_MAP
391
+ rb_mutex_unlock(secondary_map_mutex);
392
+ #endif
303
393
  PBRUBY_ASSERT(ObjectCache_Get(key) == val);
304
394
  }
305
395
 
306
396
  // Returns the cached object for this key, if any. Otherwise returns Qnil.
307
397
  VALUE ObjectCache_Get(const void* key) {
308
- VALUE key_rb = ObjectCache_GetKey(key);
398
+ VALUE key_rb = ObjectCache_GetKey(key, false);
309
399
  return rb_funcall(weak_obj_cache, item_get, 1, key_rb);
310
400
  }
311
401
 
@@ -368,8 +458,10 @@ void Init_protobuf_c() {
368
458
  Map_register(protobuf);
369
459
  Message_register(protobuf);
370
460
 
371
- cError = rb_const_get(protobuf, rb_intern("Error"));
461
+ cParseError = rb_const_get(protobuf, rb_intern("ParseError"));
462
+ rb_gc_register_mark_object(cParseError);
372
463
  cTypeError = rb_const_get(protobuf, rb_intern("TypeError"));
464
+ rb_gc_register_mark_object(cTypeError);
373
465
 
374
466
  rb_define_singleton_method(protobuf, "discard_unknown",
375
467
  Google_Protobuf_discard_unknown, 1);
@@ -55,6 +55,10 @@ const upb_fielddef* map_field_value(const upb_fielddef* field);
55
55
  VALUE Arena_new();
56
56
  upb_arena *Arena_get(VALUE arena);
57
57
 
58
+ // Fuses this arena to another, throwing a Ruby exception if this is not
59
+ // possible.
60
+ void Arena_fuse(VALUE arena, upb_arena *other);
61
+
58
62
  // Pins this Ruby object to the lifetime of this arena, so that as long as the
59
63
  // arena is alive this object will not be collected.
60
64
  //
@@ -106,6 +110,8 @@ extern VALUE cTypeError;
106
110
  #define PBRUBY_ASSERT(expr) assert(expr)
107
111
  #endif
108
112
 
113
+ #define PBRUBY_MAX(x, y) (((x) > (y)) ? (x) : (y))
114
+
109
115
  #define UPB_UNUSED(var) (void)var
110
116
 
111
117
  #endif // __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__
@@ -34,7 +34,6 @@
34
34
  #include "defs.h"
35
35
  #include "message.h"
36
36
  #include "protobuf.h"
37
- #include "third_party/wyhash/wyhash.h"
38
37
 
39
38
  // -----------------------------------------------------------------------------
40
39
  // Repeated field container type.
@@ -149,7 +148,8 @@ VALUE RepeatedField_deep_copy(VALUE _self) {
149
148
  return new_rptfield;
150
149
  }
151
150
 
152
- const upb_array* RepeatedField_GetUpbArray(VALUE val, const upb_fielddef *field) {
151
+ const upb_array* RepeatedField_GetUpbArray(VALUE val, const upb_fielddef* field,
152
+ upb_arena* arena) {
153
153
  RepeatedField* self;
154
154
  TypeInfo type_info = TypeInfo_get(field);
155
155
 
@@ -167,6 +167,7 @@ const upb_array* RepeatedField_GetUpbArray(VALUE val, const upb_fielddef *field)
167
167
  rb_raise(cTypeError, "Repeated field array has wrong message/enum class");
168
168
  }
169
169
 
170
+ Arena_fuse(self->arena, arena);
170
171
  return self->array;
171
172
  }
172
173
 
@@ -412,7 +413,7 @@ static VALUE RepeatedField_dup(VALUE _self) {
412
413
  int size = upb_array_size(self->array);
413
414
  int i;
414
415
 
415
- upb_arena_fuse(arena, Arena_get(self->arena));
416
+ Arena_fuse(self->arena, arena);
416
417
 
417
418
  for (i = 0; i < size; i++) {
418
419
  upb_msgval msgval = upb_array_get(self->array, i);
@@ -550,6 +551,7 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) {
550
551
  RepeatedField* dupped = ruby_to_RepeatedField(dupped_);
551
552
  upb_array *dupped_array = RepeatedField_GetMutable(dupped_);
552
553
  upb_arena* arena = Arena_get(dupped->arena);
554
+ Arena_fuse(list_rptfield->arena, arena);
553
555
  int size = upb_array_size(list_rptfield->array);
554
556
  int i;
555
557
 
@@ -44,7 +44,8 @@ VALUE RepeatedField_GetRubyWrapper(upb_array* msg, TypeInfo type_info,
44
44
  // Gets the underlying upb_array for this Ruby RepeatedField object, which must
45
45
  // have a type that matches |f|. If this is not a repeated field or the type
46
46
  // doesn't match, raises an exception.
47
- const upb_array* RepeatedField_GetUpbArray(VALUE value, const upb_fielddef* f);
47
+ const upb_array* RepeatedField_GetUpbArray(VALUE value, const upb_fielddef* f,
48
+ upb_arena* arena);
48
49
 
49
50
  // Implements #inspect for this repeated field by appending its contents to |b|.
50
51
  void RepeatedField_Inspect(StringBuilder* b, const upb_array* array,