grpc 1.19.0 → 1.20.0.pre1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +4131 -7903
  3. data/include/grpc/grpc.h +11 -6
  4. data/include/grpc/grpc_security.h +51 -9
  5. data/include/grpc/impl/codegen/byte_buffer.h +13 -0
  6. data/include/grpc/impl/codegen/grpc_types.h +4 -0
  7. data/include/grpc/impl/codegen/port_platform.h +37 -6
  8. data/include/grpc/impl/codegen/sync_posix.h +18 -0
  9. data/src/core/ext/filters/client_channel/client_channel.cc +560 -236
  10. data/src/core/ext/filters/client_channel/client_channel_channelz.h +2 -2
  11. data/src/core/ext/filters/client_channel/client_channel_factory.cc +22 -34
  12. data/src/core/ext/filters/client_channel/client_channel_factory.h +19 -38
  13. data/src/core/ext/filters/client_channel/global_subchannel_pool.cc +7 -4
  14. data/src/core/ext/filters/client_channel/http_connect_handshaker.cc +2 -2
  15. data/src/core/ext/filters/client_channel/lb_policy.cc +105 -28
  16. data/src/core/ext/filters/client_channel/lb_policy.h +259 -141
  17. data/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc +29 -32
  18. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +789 -803
  19. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.h +3 -1
  20. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel_secure.cc +2 -6
  21. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc +1 -1
  22. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h +7 -1
  23. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +8 -8
  24. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +2 -2
  25. data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +127 -219
  26. data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +103 -282
  27. data/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +4 -10
  28. data/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc +709 -906
  29. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc +0 -43
  30. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc +8 -8
  31. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h +2 -2
  32. data/src/core/ext/filters/client_channel/lb_policy_factory.h +1 -6
  33. data/src/core/ext/filters/client_channel/resolver.cc +54 -1
  34. data/src/core/ext/filters/client_channel/resolver.h +51 -22
  35. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +34 -86
  36. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +29 -41
  37. data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +32 -78
  38. data/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc +109 -72
  39. data/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h +13 -8
  40. data/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc +28 -63
  41. data/src/core/ext/filters/client_channel/resolver_factory.h +3 -1
  42. data/src/core/ext/filters/client_channel/resolver_registry.cc +5 -2
  43. data/src/core/ext/filters/client_channel/resolver_registry.h +5 -4
  44. data/src/core/ext/filters/client_channel/resolver_result_parsing.cc +69 -49
  45. data/src/core/ext/filters/client_channel/resolver_result_parsing.h +11 -8
  46. data/src/core/ext/filters/client_channel/resolving_lb_policy.cc +568 -0
  47. data/src/core/ext/filters/client_channel/resolving_lb_policy.h +141 -0
  48. data/src/core/ext/filters/client_channel/server_address.cc +0 -48
  49. data/src/core/ext/filters/client_channel/server_address.h +0 -10
  50. data/src/core/{lib/transport → ext/filters/client_channel}/service_config.cc +10 -5
  51. data/src/core/{lib/transport → ext/filters/client_channel}/service_config.h +16 -12
  52. data/src/core/ext/filters/client_channel/subchannel.cc +11 -16
  53. data/src/core/ext/filters/client_channel/subchannel.h +3 -0
  54. data/src/core/ext/filters/max_age/max_age_filter.cc +4 -1
  55. data/src/core/ext/filters/message_size/message_size_filter.cc +2 -2
  56. data/src/core/ext/transport/chttp2/client/insecure/channel_create.cc +45 -45
  57. data/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc +133 -134
  58. data/src/core/ext/transport/chttp2/transport/bin_decoder.cc +4 -4
  59. data/src/core/ext/transport/chttp2/transport/bin_decoder.h +4 -4
  60. data/src/core/ext/transport/chttp2/transport/bin_encoder.cc +7 -6
  61. data/src/core/ext/transport/chttp2/transport/bin_encoder.h +4 -3
  62. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +37 -29
  63. data/src/core/ext/transport/chttp2/transport/flow_control.cc +1 -1
  64. data/src/core/ext/transport/chttp2/transport/frame_data.cc +2 -1
  65. data/src/core/ext/transport/chttp2/transport/frame_data.h +1 -1
  66. data/src/core/ext/transport/chttp2/transport/frame_goaway.cc +6 -5
  67. data/src/core/ext/transport/chttp2/transport/frame_goaway.h +3 -2
  68. data/src/core/ext/transport/chttp2/transport/frame_ping.cc +5 -4
  69. data/src/core/ext/transport/chttp2/transport/frame_ping.h +1 -1
  70. data/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc +5 -4
  71. data/src/core/ext/transport/chttp2/transport/frame_rst_stream.h +2 -1
  72. data/src/core/ext/transport/chttp2/transport/frame_settings.cc +2 -1
  73. data/src/core/ext/transport/chttp2/transport/frame_settings.h +2 -1
  74. data/src/core/ext/transport/chttp2/transport/frame_window_update.cc +4 -4
  75. data/src/core/ext/transport/chttp2/transport/frame_window_update.h +1 -1
  76. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +7 -6
  77. data/src/core/ext/transport/chttp2/transport/hpack_parser.h +3 -2
  78. data/src/core/ext/transport/chttp2/transport/incoming_metadata.cc +9 -5
  79. data/src/core/ext/transport/chttp2/transport/incoming_metadata.h +6 -1
  80. data/src/core/ext/transport/chttp2/transport/internal.h +5 -4
  81. data/src/core/ext/transport/chttp2/transport/parsing.cc +9 -9
  82. data/src/core/ext/transport/chttp2/transport/writing.cc +1 -1
  83. data/src/core/ext/transport/inproc/inproc_transport.cc +8 -0
  84. data/src/core/lib/channel/channel_args.cc +2 -0
  85. data/src/core/lib/channel/channel_args.h +3 -0
  86. data/src/core/lib/channel/channel_stack.h +1 -1
  87. data/src/core/lib/channel/channel_trace.cc +4 -4
  88. data/src/core/lib/channel/channel_trace.h +4 -4
  89. data/src/core/lib/channel/channelz.cc +32 -19
  90. data/src/core/lib/channel/channelz.h +4 -4
  91. data/src/core/lib/channel/channelz_registry.cc +1 -1
  92. data/src/core/lib/channel/context.h +0 -3
  93. data/src/core/lib/channel/handshaker_registry.cc +7 -3
  94. data/src/core/lib/compression/algorithm_metadata.h +3 -3
  95. data/src/core/lib/compression/compression.cc +1 -1
  96. data/src/core/lib/compression/compression_internal.cc +2 -2
  97. data/src/core/lib/compression/stream_compression_gzip.cc +1 -1
  98. data/src/core/lib/debug/trace.h +2 -1
  99. data/src/core/lib/gpr/cpu_posix.cc +5 -3
  100. data/src/core/lib/gpr/sync_posix.cc +65 -4
  101. data/src/core/lib/gprpp/atomic.h +75 -5
  102. data/src/core/lib/gprpp/fork.cc +0 -2
  103. data/src/core/lib/gprpp/orphanable.h +3 -2
  104. data/src/core/lib/gprpp/ref_counted.h +9 -11
  105. data/src/core/lib/gprpp/thd.h +42 -7
  106. data/src/core/lib/gprpp/thd_posix.cc +31 -13
  107. data/src/core/lib/gprpp/thd_windows.cc +47 -34
  108. data/src/core/lib/http/httpcli.cc +3 -2
  109. data/src/core/lib/http/httpcli_security_connector.cc +0 -1
  110. data/src/core/lib/http/parser.cc +2 -1
  111. data/src/core/lib/http/parser.h +2 -1
  112. data/src/core/lib/iomgr/buffer_list.h +1 -1
  113. data/src/core/lib/iomgr/endpoint.cc +2 -2
  114. data/src/core/lib/iomgr/endpoint.h +3 -2
  115. data/src/core/lib/iomgr/error.cc +9 -9
  116. data/src/core/lib/iomgr/error.h +4 -3
  117. data/src/core/lib/iomgr/ev_epoll1_linux.cc +6 -0
  118. data/src/core/lib/iomgr/ev_epollex_linux.cc +14 -9
  119. data/src/core/lib/iomgr/ev_poll_posix.cc +7 -481
  120. data/src/core/lib/iomgr/ev_posix.cc +7 -3
  121. data/src/core/lib/iomgr/ev_posix.h +8 -0
  122. data/src/core/lib/iomgr/executor.cc +13 -0
  123. data/src/core/lib/iomgr/executor.h +2 -1
  124. data/src/core/lib/iomgr/internal_errqueue.cc +2 -4
  125. data/src/core/lib/iomgr/iomgr.cc +5 -0
  126. data/src/core/lib/iomgr/iomgr.h +7 -0
  127. data/src/core/lib/iomgr/iomgr_custom.cc +9 -2
  128. data/src/core/lib/iomgr/iomgr_internal.cc +6 -0
  129. data/src/core/lib/iomgr/iomgr_internal.h +9 -1
  130. data/src/core/lib/iomgr/iomgr_posix.cc +10 -2
  131. data/src/core/lib/iomgr/iomgr_windows.cc +10 -2
  132. data/src/core/lib/iomgr/port.h +19 -0
  133. data/src/core/lib/iomgr/tcp_client_windows.cc +6 -4
  134. data/src/core/lib/iomgr/tcp_custom.cc +1 -1
  135. data/src/core/lib/iomgr/tcp_posix.cc +158 -54
  136. data/src/core/lib/iomgr/tcp_windows.cc +1 -1
  137. data/src/core/lib/iomgr/wakeup_fd_posix.cc +1 -19
  138. data/src/core/lib/security/credentials/jwt/jwt_verifier.cc +10 -6
  139. data/src/core/lib/security/credentials/jwt/jwt_verifier.h +2 -1
  140. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.h +3 -6
  141. data/src/core/lib/security/credentials/tls/spiffe_credentials.cc +129 -0
  142. data/src/core/lib/security/credentials/tls/spiffe_credentials.h +62 -0
  143. data/src/core/lib/security/security_connector/fake/fake_security_connector.cc +7 -2
  144. data/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc +28 -17
  145. data/src/core/lib/security/security_connector/ssl_utils.cc +134 -0
  146. data/src/core/lib/security/security_connector/ssl_utils.h +32 -0
  147. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.cc +426 -0
  148. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.h +122 -0
  149. data/src/core/lib/security/transport/auth_filters.h +2 -2
  150. data/src/core/lib/security/transport/client_auth_filter.cc +35 -39
  151. data/src/core/lib/security/transport/secure_endpoint.cc +2 -2
  152. data/src/core/lib/security/transport/security_handshaker.cc +4 -3
  153. data/src/core/lib/slice/percent_encoding.cc +3 -3
  154. data/src/core/lib/slice/percent_encoding.h +3 -3
  155. data/src/core/lib/slice/slice.cc +27 -30
  156. data/src/core/lib/slice/slice_hash_table.h +2 -2
  157. data/src/core/lib/slice/slice_intern.cc +1 -1
  158. data/src/core/lib/slice/slice_internal.h +14 -3
  159. data/src/core/lib/slice/slice_weak_hash_table.h +4 -4
  160. data/src/core/lib/surface/byte_buffer_reader.cc +17 -0
  161. data/src/core/lib/surface/call.cc +8 -3
  162. data/src/core/lib/surface/completion_queue.cc +134 -148
  163. data/src/core/lib/surface/init.cc +78 -30
  164. data/src/core/lib/surface/init.h +1 -0
  165. data/src/core/lib/surface/lame_client.cc +4 -6
  166. data/src/core/lib/surface/version.cc +1 -1
  167. data/src/core/lib/transport/metadata.cc +66 -33
  168. data/src/core/lib/transport/metadata_batch.cc +1 -1
  169. data/src/core/lib/transport/metadata_batch.h +1 -1
  170. data/src/core/lib/transport/timeout_encoding.cc +1 -1
  171. data/src/core/lib/transport/timeout_encoding.h +1 -1
  172. data/src/core/lib/transport/transport.h +4 -3
  173. data/src/core/tsi/alts/handshaker/alts_handshaker_client.cc +3 -3
  174. data/src/core/tsi/alts/handshaker/alts_handshaker_client.h +1 -1
  175. data/src/core/tsi/alts/handshaker/transport_security_common_api.cc +4 -3
  176. data/src/core/tsi/alts/handshaker/transport_security_common_api.h +1 -1
  177. data/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc +1 -1
  178. data/src/core/tsi/ssl_transport_security.cc +1 -5
  179. data/src/core/tsi/ssl_transport_security.h +24 -4
  180. data/src/ruby/bin/math_pb.rb +18 -16
  181. data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +4 -0
  182. data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +6 -0
  183. data/src/ruby/lib/grpc/generic/rpc_server.rb +1 -1
  184. data/src/ruby/lib/grpc/version.rb +1 -1
  185. data/src/ruby/pb/README.md +1 -1
  186. data/src/ruby/pb/grpc/health/v1/health_pb.rb +13 -10
  187. data/src/ruby/pb/grpc/health/v1/health_services_pb.rb +18 -0
  188. data/src/ruby/pb/src/proto/grpc/testing/empty_pb.rb +3 -1
  189. data/src/ruby/pb/src/proto/grpc/testing/messages_pb.rb +58 -56
  190. data/src/ruby/pb/src/proto/grpc/testing/test_pb.rb +2 -0
  191. data/third_party/cares/cares/ares.h +12 -0
  192. data/third_party/cares/cares/ares_create_query.c +5 -1
  193. data/third_party/cares/cares/ares_data.c +74 -73
  194. data/third_party/cares/cares/ares_destroy.c +6 -1
  195. data/third_party/cares/cares/ares_gethostbyaddr.c +5 -5
  196. data/third_party/cares/cares/ares_gethostbyname.c +15 -4
  197. data/third_party/cares/cares/ares_getnameinfo.c +11 -0
  198. data/third_party/cares/cares/ares_init.c +274 -173
  199. data/third_party/cares/cares/ares_library_init.c +21 -3
  200. data/third_party/cares/cares/ares_options.c +6 -2
  201. data/third_party/cares/cares/ares_parse_naptr_reply.c +7 -6
  202. data/third_party/cares/cares/ares_parse_ptr_reply.c +4 -2
  203. data/third_party/cares/cares/ares_platform.c +7 -0
  204. data/third_party/cares/cares/ares_private.h +19 -11
  205. data/third_party/cares/cares/ares_process.c +27 -2
  206. data/third_party/cares/cares/ares_rules.h +1 -1
  207. data/third_party/cares/cares/ares_search.c +7 -0
  208. data/third_party/cares/cares/ares_send.c +6 -0
  209. data/third_party/cares/cares/ares_strsplit.c +174 -0
  210. data/third_party/cares/cares/ares_strsplit.h +43 -0
  211. data/third_party/cares/cares/ares_version.h +4 -4
  212. data/third_party/cares/cares/config-win32.h +1 -1
  213. data/third_party/cares/cares/inet_ntop.c +2 -3
  214. data/third_party/cares/config_darwin/ares_config.h +3 -0
  215. data/third_party/cares/config_freebsd/ares_config.h +3 -0
  216. data/third_party/cares/config_linux/ares_config.h +3 -0
  217. data/third_party/cares/config_openbsd/ares_config.h +3 -0
  218. metadata +39 -37
  219. data/src/core/ext/filters/client_channel/request_routing.cc +0 -946
  220. data/src/core/ext/filters/client_channel/request_routing.h +0 -181
  221. data/src/core/lib/gprpp/atomic_with_atm.h +0 -57
  222. data/src/core/lib/gprpp/atomic_with_std.h +0 -35
  223. data/src/core/lib/iomgr/wakeup_fd_cv.cc +0 -107
  224. data/src/core/lib/iomgr/wakeup_fd_cv.h +0 -69
@@ -232,7 +232,7 @@ class SubchannelList : public InternallyRefCounted<SubchannelListType> {
232
232
  protected:
233
233
  SubchannelList(LoadBalancingPolicy* policy, TraceFlag* tracer,
234
234
  const ServerAddressList& addresses, grpc_combiner* combiner,
235
- grpc_client_channel_factory* client_channel_factory,
235
+ LoadBalancingPolicy::ChannelControlHelper* helper,
236
236
  const grpc_channel_args& args);
237
237
 
238
238
  virtual ~SubchannelList();
@@ -486,7 +486,7 @@ template <typename SubchannelListType, typename SubchannelDataType>
486
486
  SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
487
487
  LoadBalancingPolicy* policy, TraceFlag* tracer,
488
488
  const ServerAddressList& addresses, grpc_combiner* combiner,
489
- grpc_client_channel_factory* client_channel_factory,
489
+ LoadBalancingPolicy::ChannelControlHelper* helper,
490
490
  const grpc_channel_args& args)
491
491
  : InternallyRefCounted<SubchannelListType>(tracer),
492
492
  policy_(policy),
@@ -505,16 +505,11 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
505
505
  inhibit_health_checking_ = grpc_channel_arg_get_bool(
506
506
  grpc_channel_args_find(&args, GRPC_ARG_INHIBIT_HEALTH_CHECKING), false);
507
507
  static const char* keys_to_remove[] = {GRPC_ARG_SUBCHANNEL_ADDRESS,
508
- GRPC_ARG_SERVER_ADDRESS_LIST,
509
508
  GRPC_ARG_INHIBIT_HEALTH_CHECKING};
510
509
  // Create a subchannel for each address.
511
510
  for (size_t i = 0; i < addresses.size(); i++) {
512
- // If there were any balancer addresses, we would have chosen grpclb
513
- // policy, which does not use a SubchannelList.
514
511
  GPR_ASSERT(!addresses[i].IsBalancer());
515
- InlinedVector<grpc_arg, 4> args_to_add;
516
- args_to_add.emplace_back(
517
- SubchannelPoolInterface::CreateChannelArg(policy_->subchannel_pool()));
512
+ InlinedVector<grpc_arg, 3> args_to_add;
518
513
  const size_t subchannel_address_arg_index = args_to_add.size();
519
514
  args_to_add.emplace_back(
520
515
  Subchannel::CreateSubchannelAddressArg(&addresses[i].address()));
@@ -527,8 +522,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
527
522
  &args, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove),
528
523
  args_to_add.data(), args_to_add.size());
529
524
  gpr_free(args_to_add[subchannel_address_arg_index].value.string);
530
- Subchannel* subchannel = grpc_client_channel_factory_create_subchannel(
531
- client_channel_factory, new_args);
525
+ Subchannel* subchannel = helper->CreateSubchannel(*new_args);
532
526
  grpc_channel_args_destroy(new_args);
533
527
  if (subchannel == nullptr) {
534
528
  // Subchannel could not be created.
@@ -26,14 +26,13 @@
26
26
  /// channel that uses pick_first to select from the list of balancer
27
27
  /// addresses.
28
28
  ///
29
- /// The first time the xDS policy gets a request for a pick or to exit the idle
30
- /// state, \a StartPickingLocked() is called. This method is responsible for
31
- /// instantiating the internal *streaming* call to the LB server (whichever
32
- /// address pick_first chose). The call will be complete when either the
33
- /// balancer sends status or when we cancel the call (e.g., because we are
34
- /// shutting down). In needed, we retry the call. If we received at least one
35
- /// valid message from the server, a new call attempt will be made immediately;
36
- /// otherwise, we apply back-off delays between attempts.
29
+ /// When we get our initial update, we instantiate the internal *streaming*
30
+ /// call to the LB server (whichever address pick_first chose). The call
31
+ /// will be complete when either the balancer sends status or when we cancel
32
+ /// the call (e.g., because we are shutting down). In needed, we retry the
33
+ /// call. If we received at least one valid message from the server, a new
34
+ /// call attempt will be made immediately; otherwise, we apply back-off
35
+ /// delays between attempts.
37
36
  ///
38
37
  /// We maintain an internal child policy (round_robin) instance for distributing
39
38
  /// requests across backends. Whenever we receive a new serverlist from
@@ -70,7 +69,6 @@
70
69
  #include <grpc/support/time.h>
71
70
 
72
71
  #include "src/core/ext/filters/client_channel/client_channel.h"
73
- #include "src/core/ext/filters/client_channel/client_channel_factory.h"
74
72
  #include "src/core/ext/filters/client_channel/lb_policy/xds/xds.h"
75
73
  #include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h"
76
74
  #include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h"
@@ -80,6 +78,7 @@
80
78
  #include "src/core/ext/filters/client_channel/parse_address.h"
81
79
  #include "src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h"
82
80
  #include "src/core/ext/filters/client_channel/server_address.h"
81
+ #include "src/core/ext/filters/client_channel/service_config.h"
83
82
  #include "src/core/lib/backoff/backoff.h"
84
83
  #include "src/core/lib/channel/channel_args.h"
85
84
  #include "src/core/lib/channel/channel_stack.h"
@@ -100,7 +99,6 @@
100
99
  #include "src/core/lib/surface/call.h"
101
100
  #include "src/core/lib/surface/channel.h"
102
101
  #include "src/core/lib/surface/channel_init.h"
103
- #include "src/core/lib/transport/service_config.h"
104
102
  #include "src/core/lib/transport/static_metadata.h"
105
103
 
106
104
  #define GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS 1
@@ -123,164 +121,199 @@ class XdsLb : public LoadBalancingPolicy {
123
121
 
124
122
  const char* name() const override { return kXds; }
125
123
 
126
- void UpdateLocked(const grpc_channel_args& args,
127
- grpc_json* lb_config) override;
128
- bool PickLocked(PickState* pick, grpc_error** error) override;
129
- void CancelPickLocked(PickState* pick, grpc_error* error) override;
130
- void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
131
- uint32_t initial_metadata_flags_eq,
132
- grpc_error* error) override;
133
- void NotifyOnStateChangeLocked(grpc_connectivity_state* state,
134
- grpc_closure* closure) override;
135
- grpc_connectivity_state CheckConnectivityLocked(
136
- grpc_error** connectivity_error) override;
137
- void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
138
- void ExitIdleLocked() override;
124
+ void UpdateLocked(UpdateArgs args) override;
139
125
  void ResetBackoffLocked() override;
140
126
  void FillChildRefsForChannelz(
141
127
  channelz::ChildRefsList* child_subchannels,
142
128
  channelz::ChildRefsList* child_channels) override;
143
129
 
144
130
  private:
145
- /// Linked list of pending pick requests. It stores all information needed to
146
- /// eventually call pick() on them. They mainly stay pending waiting for the
147
- /// child policy to be created.
148
- ///
149
- /// Note that when a pick is sent to the child policy, we inject our own
150
- /// on_complete callback, so that we can intercept the result before
151
- /// invoking the original on_complete callback. This allows us to set the
152
- /// LB token metadata and add client_stats to the call context.
153
- /// See \a pending_pick_complete() for details.
154
- struct PendingPick {
155
- // The xds lb instance that created the wrapping. This instance is not
156
- // owned; reference counts are untouched. It's used only for logging
157
- // purposes.
158
- XdsLb* xdslb_policy;
159
- // The original pick.
160
- PickState* pick;
161
- // Our on_complete closure and the original one.
162
- grpc_closure on_complete;
163
- grpc_closure* original_on_complete;
164
- // Stats for client-side load reporting.
165
- RefCountedPtr<XdsLbClientStats> client_stats;
166
- // Next pending pick.
167
- PendingPick* next = nullptr;
168
- };
169
-
170
- /// Contains a call to the LB server and all the data related to the call.
171
- class BalancerCallState : public InternallyRefCounted<BalancerCallState> {
131
+ /// Contains a channel to the LB server and all the data related to the
132
+ /// channel.
133
+ class BalancerChannelState
134
+ : public InternallyRefCounted<BalancerChannelState> {
172
135
  public:
173
- explicit BalancerCallState(
174
- RefCountedPtr<LoadBalancingPolicy> parent_xdslb_policy);
136
+ /// Contains a call to the LB server and all the data related to the call.
137
+ class BalancerCallState : public InternallyRefCounted<BalancerCallState> {
138
+ public:
139
+ explicit BalancerCallState(RefCountedPtr<BalancerChannelState> lb_chand);
175
140
 
176
- // It's the caller's responsibility to ensure that Orphan() is called from
177
- // inside the combiner.
178
- void Orphan() override;
141
+ // It's the caller's responsibility to ensure that Orphan() is called from
142
+ // inside the combiner.
143
+ void Orphan() override;
179
144
 
180
- void StartQuery();
145
+ void StartQuery();
181
146
 
182
- XdsLbClientStats* client_stats() const { return client_stats_.get(); }
147
+ RefCountedPtr<XdsLbClientStats> client_stats() const {
148
+ return client_stats_;
149
+ }
183
150
 
184
- bool seen_initial_response() const { return seen_initial_response_; }
151
+ bool seen_initial_response() const { return seen_initial_response_; }
185
152
 
186
- private:
187
- // So Delete() can access our private dtor.
188
- template <typename T>
189
- friend void grpc_core::Delete(T*);
153
+ private:
154
+ // So Delete() can access our private dtor.
155
+ template <typename T>
156
+ friend void grpc_core::Delete(T*);
190
157
 
191
- ~BalancerCallState();
158
+ ~BalancerCallState();
192
159
 
193
- XdsLb* xdslb_policy() const {
194
- return static_cast<XdsLb*>(xdslb_policy_.get());
195
- }
160
+ XdsLb* xdslb_policy() const { return lb_chand_->xdslb_policy_.get(); }
196
161
 
197
- void ScheduleNextClientLoadReportLocked();
198
- void SendClientLoadReportLocked();
162
+ bool IsCurrentCallOnChannel() const {
163
+ return this == lb_chand_->lb_calld_.get();
164
+ }
199
165
 
200
- static bool LoadReportCountersAreZero(xds_grpclb_request* request);
166
+ void ScheduleNextClientLoadReportLocked();
167
+ void SendClientLoadReportLocked();
168
+
169
+ static bool LoadReportCountersAreZero(xds_grpclb_request* request);
170
+
171
+ static void MaybeSendClientLoadReportLocked(void* arg, grpc_error* error);
172
+ static void OnInitialRequestSentLocked(void* arg, grpc_error* error);
173
+ static void OnBalancerMessageReceivedLocked(void* arg, grpc_error* error);
174
+ static void OnBalancerStatusReceivedLocked(void* arg, grpc_error* error);
175
+
176
+ // The owning LB channel.
177
+ RefCountedPtr<BalancerChannelState> lb_chand_;
178
+
179
+ // The streaming call to the LB server. Always non-NULL.
180
+ grpc_call* lb_call_ = nullptr;
181
+
182
+ // recv_initial_metadata
183
+ grpc_metadata_array lb_initial_metadata_recv_;
184
+
185
+ // send_message
186
+ grpc_byte_buffer* send_message_payload_ = nullptr;
187
+ grpc_closure lb_on_initial_request_sent_;
188
+
189
+ // recv_message
190
+ grpc_byte_buffer* recv_message_payload_ = nullptr;
191
+ grpc_closure lb_on_balancer_message_received_;
192
+ bool seen_initial_response_ = false;
193
+
194
+ // recv_trailing_metadata
195
+ grpc_closure lb_on_balancer_status_received_;
196
+ grpc_metadata_array lb_trailing_metadata_recv_;
197
+ grpc_status_code lb_call_status_;
198
+ grpc_slice lb_call_status_details_;
199
+
200
+ // The stats for client-side load reporting associated with this LB call.
201
+ // Created after the first serverlist is received.
202
+ RefCountedPtr<XdsLbClientStats> client_stats_;
203
+ grpc_millis client_stats_report_interval_ = 0;
204
+ grpc_timer client_load_report_timer_;
205
+ bool client_load_report_timer_callback_pending_ = false;
206
+ bool last_client_load_report_counters_were_zero_ = false;
207
+ bool client_load_report_is_due_ = false;
208
+ // The closure used for either the load report timer or the callback for
209
+ // completion of sending the load report.
210
+ grpc_closure client_load_report_closure_;
211
+ };
212
+
213
+ BalancerChannelState(const char* balancer_name,
214
+ const grpc_channel_args& args,
215
+ RefCountedPtr<XdsLb> parent_xdslb_policy);
216
+ ~BalancerChannelState();
201
217
 
202
- static void MaybeSendClientLoadReportLocked(void* arg, grpc_error* error);
203
- static void OnInitialRequestSentLocked(void* arg, grpc_error* error);
204
- static void OnBalancerMessageReceivedLocked(void* arg, grpc_error* error);
205
- static void OnBalancerStatusReceivedLocked(void* arg, grpc_error* error);
218
+ void Orphan() override;
206
219
 
207
- // The owning LB policy.
208
- RefCountedPtr<LoadBalancingPolicy> xdslb_policy_;
220
+ grpc_channel* channel() const { return channel_; }
221
+ BalancerCallState* lb_calld() const { return lb_calld_.get(); }
209
222
 
210
- // The streaming call to the LB server. Always non-NULL.
211
- grpc_call* lb_call_ = nullptr;
223
+ bool IsCurrentChannel() const {
224
+ return this == xdslb_policy_->lb_chand_.get();
225
+ }
226
+ bool IsPendingChannel() const {
227
+ return this == xdslb_policy_->pending_lb_chand_.get();
228
+ }
229
+ bool HasActiveCall() const { return lb_calld_ != nullptr; }
212
230
 
213
- // recv_initial_metadata
214
- grpc_metadata_array lb_initial_metadata_recv_;
231
+ void StartCallRetryTimerLocked();
232
+ static void OnCallRetryTimerLocked(void* arg, grpc_error* error);
233
+ void StartCallLocked();
215
234
 
216
- // send_message
217
- grpc_byte_buffer* send_message_payload_ = nullptr;
218
- grpc_closure lb_on_initial_request_sent_;
235
+ private:
236
+ // The owning LB policy.
237
+ RefCountedPtr<XdsLb> xdslb_policy_;
238
+
239
+ // The channel and its status.
240
+ grpc_channel* channel_;
241
+ bool shutting_down_ = false;
242
+
243
+ // The data associated with the current LB call. It holds a ref to this LB
244
+ // channel. It's instantiated every time we query for backends. It's reset
245
+ // whenever the current LB call is no longer needed (e.g., the LB policy is
246
+ // shutting down, or the LB call has ended). A non-NULL lb_calld_ always
247
+ // contains a non-NULL lb_call_.
248
+ OrphanablePtr<BalancerCallState> lb_calld_;
249
+ BackOff lb_call_backoff_;
250
+ grpc_timer lb_call_retry_timer_;
251
+ grpc_closure lb_on_call_retry_;
252
+ bool retry_timer_callback_pending_ = false;
253
+ };
219
254
 
220
- // recv_message
221
- grpc_byte_buffer* recv_message_payload_ = nullptr;
222
- grpc_closure lb_on_balancer_message_received_;
223
- bool seen_initial_response_ = false;
255
+ class Picker : public SubchannelPicker {
256
+ public:
257
+ Picker(UniquePtr<SubchannelPicker> child_picker,
258
+ RefCountedPtr<XdsLbClientStats> client_stats)
259
+ : child_picker_(std::move(child_picker)),
260
+ client_stats_(std::move(client_stats)) {}
224
261
 
225
- // recv_trailing_metadata
226
- grpc_closure lb_on_balancer_status_received_;
227
- grpc_metadata_array lb_trailing_metadata_recv_;
228
- grpc_status_code lb_call_status_;
229
- grpc_slice lb_call_status_details_;
262
+ PickResult Pick(PickArgs* pick, grpc_error** error) override;
230
263
 
231
- // The stats for client-side load reporting associated with this LB call.
232
- // Created after the first serverlist is received.
264
+ private:
265
+ UniquePtr<SubchannelPicker> child_picker_;
233
266
  RefCountedPtr<XdsLbClientStats> client_stats_;
234
- grpc_millis client_stats_report_interval_ = 0;
235
- grpc_timer client_load_report_timer_;
236
- bool client_load_report_timer_callback_pending_ = false;
237
- bool last_client_load_report_counters_were_zero_ = false;
238
- bool client_load_report_is_due_ = false;
239
- // The closure used for either the load report timer or the callback for
240
- // completion of sending the load report.
241
- grpc_closure client_load_report_closure_;
267
+ };
268
+
269
+ class Helper : public ChannelControlHelper {
270
+ public:
271
+ explicit Helper(RefCountedPtr<XdsLb> parent) : parent_(std::move(parent)) {}
272
+
273
+ Subchannel* CreateSubchannel(const grpc_channel_args& args) override;
274
+ grpc_channel* CreateChannel(const char* target,
275
+ const grpc_channel_args& args) override;
276
+ void UpdateState(grpc_connectivity_state state, grpc_error* state_error,
277
+ UniquePtr<SubchannelPicker> picker) override;
278
+ void RequestReresolution() override;
279
+
280
+ void set_child(LoadBalancingPolicy* child) { child_ = child; }
281
+
282
+ private:
283
+ bool CalledByPendingChild() const;
284
+ bool CalledByCurrentChild() const;
285
+
286
+ RefCountedPtr<XdsLb> parent_;
287
+ LoadBalancingPolicy* child_ = nullptr;
242
288
  };
243
289
 
244
290
  ~XdsLb();
245
291
 
246
292
  void ShutdownLocked() override;
247
293
 
248
- // Helper function used in ctor and UpdateLocked().
249
- void ProcessChannelArgsLocked(const grpc_channel_args& args);
294
+ // Helper function used in UpdateLocked().
295
+ void ProcessAddressesAndChannelArgsLocked(const ServerAddressList& addresses,
296
+ const grpc_channel_args& args);
250
297
 
251
298
  // Parses the xds config given the JSON node of the first child of XdsConfig.
252
299
  // If parsing succeeds, updates \a balancer_name, and updates \a
253
- // child_policy_json_dump_ and \a fallback_policy_json_dump_ if they are also
300
+ // child_policy_config_ and \a fallback_policy_config_ if they are also
254
301
  // found. Does nothing upon failure.
255
- void ParseLbConfig(grpc_json* xds_config_json);
302
+ void ParseLbConfig(Config* xds_config);
256
303
 
257
- // Methods for dealing with the balancer channel and call.
258
- void StartPickingLocked();
259
- void StartBalancerCallLocked();
260
- static void OnFallbackTimerLocked(void* arg, grpc_error* error);
261
- void StartBalancerCallRetryTimerLocked();
262
- static void OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error);
263
- static void OnBalancerChannelConnectivityChangedLocked(void* arg,
264
- grpc_error* error);
304
+ BalancerChannelState* LatestLbChannel() const {
305
+ return pending_lb_chand_ != nullptr ? pending_lb_chand_.get()
306
+ : lb_chand_.get();
307
+ }
265
308
 
266
- // Pending pick methods.
267
- static void PendingPickCleanup(PendingPick* pp);
268
- PendingPick* PendingPickCreate(PickState* pick);
269
- void AddPendingPick(PendingPick* pp);
270
- static void OnPendingPickComplete(void* arg, grpc_error* error);
309
+ // Callback to enter fallback mode.
310
+ static void OnFallbackTimerLocked(void* arg, grpc_error* error);
271
311
 
272
312
  // Methods for dealing with the child policy.
273
313
  void CreateOrUpdateChildPolicyLocked();
274
314
  grpc_channel_args* CreateChildPolicyArgsLocked();
275
- void CreateChildPolicyLocked(const char* name, Args args);
276
- bool PickFromChildPolicyLocked(bool force_async, PendingPick* pp,
277
- grpc_error** error);
278
- void UpdateConnectivityStateFromChildPolicyLocked(
279
- grpc_error* child_state_error);
280
- static void OnChildPolicyConnectivityChangedLocked(void* arg,
281
- grpc_error* error);
282
- static void OnChildPolicyRequestReresolutionLocked(void* arg,
283
- grpc_error* error);
315
+ OrphanablePtr<LoadBalancingPolicy> CreateChildPolicyLocked(
316
+ const char* name, const grpc_channel_args* args);
284
317
 
285
318
  // Who the client is trying to communicate with.
286
319
  const char* server_name_ = nullptr;
@@ -292,35 +325,18 @@ class XdsLb : public LoadBalancingPolicy {
292
325
  grpc_channel_args* args_ = nullptr;
293
326
 
294
327
  // Internal state.
295
- bool started_picking_ = false;
296
328
  bool shutting_down_ = false;
297
- grpc_connectivity_state_tracker state_tracker_;
298
329
 
299
330
  // The channel for communicating with the LB server.
300
- grpc_channel* lb_channel_ = nullptr;
331
+ OrphanablePtr<BalancerChannelState> lb_chand_;
332
+ OrphanablePtr<BalancerChannelState> pending_lb_chand_;
301
333
  // Mutex to protect the channel to the LB server. This is used when
302
334
  // processing a channelz request.
303
- gpr_mu lb_channel_mu_;
304
- grpc_connectivity_state lb_channel_connectivity_;
305
- grpc_closure lb_channel_on_connectivity_changed_;
306
- // Are we already watching the LB channel's connectivity?
307
- bool watching_lb_channel_ = false;
308
- // Response generator to inject address updates into lb_channel_.
309
- RefCountedPtr<FakeResolverResponseGenerator> response_generator_;
310
-
311
- // The data associated with the current LB call. It holds a ref to this LB
312
- // policy. It's initialized every time we query for backends. It's reset to
313
- // NULL whenever the current LB call is no longer needed (e.g., the LB policy
314
- // is shutting down, or the LB call has ended). A non-NULL lb_calld_ always
315
- // contains a non-NULL lb_call_.
316
- OrphanablePtr<BalancerCallState> lb_calld_;
335
+ // TODO(juanlishen): Replace this with atomic.
336
+ gpr_mu lb_chand_mu_;
337
+
317
338
  // Timeout in milliseconds for the LB call. 0 means no deadline.
318
339
  int lb_call_timeout_ms_ = 0;
319
- // Balancer call retry state.
320
- BackOff lb_call_backoff_;
321
- bool retry_timer_callback_pending_ = false;
322
- grpc_timer lb_call_retry_timer_;
323
- grpc_closure lb_on_call_retry_;
324
340
 
325
341
  // The deserialized response from the balancer. May be nullptr until one
326
342
  // such response has arrived.
@@ -328,7 +344,7 @@ class XdsLb : public LoadBalancingPolicy {
328
344
 
329
345
  // Timeout in milliseconds for before using fallback backend addresses.
330
346
  // 0 means not using fallback.
331
- UniquePtr<char> fallback_policy_json_string_;
347
+ RefCountedPtr<Config> fallback_policy_config_;
332
348
  int lb_fallback_timeout_ms_ = 0;
333
349
  // The backend addresses from the resolver.
334
350
  UniquePtr<ServerAddressList> fallback_backend_addresses_;
@@ -337,17 +353,130 @@ class XdsLb : public LoadBalancingPolicy {
337
353
  grpc_timer lb_fallback_timer_;
338
354
  grpc_closure lb_on_fallback_;
339
355
 
340
- // Pending picks that are waiting on the xDS policy's connectivity.
341
- PendingPick* pending_picks_ = nullptr;
342
-
343
356
  // The policy to use for the backends.
357
+ RefCountedPtr<Config> child_policy_config_;
344
358
  OrphanablePtr<LoadBalancingPolicy> child_policy_;
345
- UniquePtr<char> child_policy_json_string_;
346
- grpc_connectivity_state child_connectivity_state_;
347
- grpc_closure on_child_connectivity_changed_;
348
- grpc_closure on_child_request_reresolution_;
359
+ OrphanablePtr<LoadBalancingPolicy> pending_child_policy_;
360
+ // Lock held when modifying the value of child_policy_ or
361
+ // pending_child_policy_.
362
+ gpr_mu child_policy_mu_;
349
363
  };
350
364
 
365
+ //
366
+ // XdsLb::Picker
367
+ //
368
+
369
+ XdsLb::PickResult XdsLb::Picker::Pick(PickArgs* pick, grpc_error** error) {
370
+ // TODO(roth): Add support for drop handling.
371
+ // Forward pick to child policy.
372
+ PickResult result = child_picker_->Pick(pick, error);
373
+ // If pick succeeded, add client stats.
374
+ if (result == PickResult::PICK_COMPLETE &&
375
+ pick->connected_subchannel != nullptr && client_stats_ != nullptr) {
376
+ // TODO(roth): Add support for client stats.
377
+ }
378
+ return result;
379
+ }
380
+
381
+ //
382
+ // XdsLb::Helper
383
+ //
384
+
385
+ bool XdsLb::Helper::CalledByPendingChild() const {
386
+ GPR_ASSERT(child_ != nullptr);
387
+ return child_ == parent_->pending_child_policy_.get();
388
+ }
389
+
390
+ bool XdsLb::Helper::CalledByCurrentChild() const {
391
+ GPR_ASSERT(child_ != nullptr);
392
+ return child_ == parent_->child_policy_.get();
393
+ }
394
+
395
+ Subchannel* XdsLb::Helper::CreateSubchannel(const grpc_channel_args& args) {
396
+ if (parent_->shutting_down_ ||
397
+ (!CalledByPendingChild() && !CalledByCurrentChild())) {
398
+ return nullptr;
399
+ }
400
+ return parent_->channel_control_helper()->CreateSubchannel(args);
401
+ }
402
+
403
+ grpc_channel* XdsLb::Helper::CreateChannel(const char* target,
404
+ const grpc_channel_args& args) {
405
+ if (parent_->shutting_down_ ||
406
+ (!CalledByPendingChild() && !CalledByCurrentChild())) {
407
+ return nullptr;
408
+ }
409
+ return parent_->channel_control_helper()->CreateChannel(target, args);
410
+ }
411
+
412
+ void XdsLb::Helper::UpdateState(grpc_connectivity_state state,
413
+ grpc_error* state_error,
414
+ UniquePtr<SubchannelPicker> picker) {
415
+ if (parent_->shutting_down_) {
416
+ GRPC_ERROR_UNREF(state_error);
417
+ return;
418
+ }
419
+ // If this request is from the pending child policy, ignore it until
420
+ // it reports READY, at which point we swap it into place.
421
+ if (CalledByPendingChild()) {
422
+ if (grpc_lb_xds_trace.enabled()) {
423
+ gpr_log(GPR_INFO,
424
+ "[xdslb %p helper %p] pending child policy %p reports state=%s",
425
+ parent_.get(), this, parent_->pending_child_policy_.get(),
426
+ grpc_connectivity_state_name(state));
427
+ }
428
+ if (state != GRPC_CHANNEL_READY) {
429
+ GRPC_ERROR_UNREF(state_error);
430
+ return;
431
+ }
432
+ grpc_pollset_set_del_pollset_set(
433
+ parent_->child_policy_->interested_parties(),
434
+ parent_->interested_parties());
435
+ MutexLock lock(&parent_->child_policy_mu_);
436
+ parent_->child_policy_ = std::move(parent_->pending_child_policy_);
437
+ } else if (!CalledByCurrentChild()) {
438
+ // This request is from an outdated child, so ignore it.
439
+ GRPC_ERROR_UNREF(state_error);
440
+ return;
441
+ }
442
+ // TODO(juanlishen): When in fallback mode, pass the child picker
443
+ // through without wrapping it. (Or maybe use a different helper for
444
+ // the fallback policy?)
445
+ GPR_ASSERT(parent_->lb_chand_ != nullptr);
446
+ RefCountedPtr<XdsLbClientStats> client_stats =
447
+ parent_->lb_chand_->lb_calld() == nullptr
448
+ ? nullptr
449
+ : parent_->lb_chand_->lb_calld()->client_stats();
450
+ parent_->channel_control_helper()->UpdateState(
451
+ state, state_error,
452
+ UniquePtr<SubchannelPicker>(
453
+ New<Picker>(std::move(picker), std::move(client_stats))));
454
+ }
455
+
456
+ void XdsLb::Helper::RequestReresolution() {
457
+ if (parent_->shutting_down_) return;
458
+ // If there is a pending child policy, ignore re-resolution requests
459
+ // from the current child policy (or any outdated child).
460
+ if (parent_->pending_child_policy_ != nullptr && !CalledByPendingChild()) {
461
+ return;
462
+ }
463
+ if (grpc_lb_xds_trace.enabled()) {
464
+ gpr_log(GPR_INFO,
465
+ "[xdslb %p] Re-resolution requested from the internal RR policy "
466
+ "(%p).",
467
+ parent_.get(), parent_->child_policy_.get());
468
+ }
469
+ GPR_ASSERT(parent_->lb_chand_ != nullptr);
470
+ // If we are talking to a balancer, we expect to get updated addresses
471
+ // from the balancer, so we can ignore the re-resolution request from
472
+ // the child policy. Otherwise, pass the re-resolution request up to the
473
+ // channel.
474
+ if (parent_->lb_chand_->lb_calld() == nullptr ||
475
+ !parent_->lb_chand_->lb_calld()->seen_initial_response()) {
476
+ parent_->channel_control_helper()->RequestReresolution();
477
+ }
478
+ }
479
+
351
480
  //
352
481
  // serverlist parsing code
353
482
  //
@@ -410,28 +539,111 @@ void ParseServer(const xds_grpclb_server* server, grpc_resolved_address* addr) {
410
539
  }
411
540
 
412
541
  // Returns addresses extracted from \a serverlist.
413
- UniquePtr<ServerAddressList> ProcessServerlist(
414
- const xds_grpclb_serverlist* serverlist) {
415
- auto addresses = MakeUnique<ServerAddressList>();
542
+ ServerAddressList ProcessServerlist(const xds_grpclb_serverlist* serverlist) {
543
+ ServerAddressList addresses;
416
544
  for (size_t i = 0; i < serverlist->num_servers; ++i) {
417
545
  const xds_grpclb_server* server = serverlist->servers[i];
418
546
  if (!IsServerValid(serverlist->servers[i], i, false)) continue;
419
547
  grpc_resolved_address addr;
420
548
  ParseServer(server, &addr);
421
- addresses->emplace_back(addr, nullptr);
549
+ addresses.emplace_back(addr, nullptr);
422
550
  }
423
551
  return addresses;
424
552
  }
425
553
 
426
554
  //
427
- // XdsLb::BalancerCallState
555
+ // XdsLb::BalancerChannelState
556
+ //
557
+
558
+ XdsLb::BalancerChannelState::BalancerChannelState(
559
+ const char* balancer_name, const grpc_channel_args& args,
560
+ grpc_core::RefCountedPtr<grpc_core::XdsLb> parent_xdslb_policy)
561
+ : InternallyRefCounted<BalancerChannelState>(&grpc_lb_xds_trace),
562
+ xdslb_policy_(std::move(parent_xdslb_policy)),
563
+ lb_call_backoff_(
564
+ BackOff::Options()
565
+ .set_initial_backoff(GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS *
566
+ 1000)
567
+ .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER)
568
+ .set_jitter(GRPC_XDS_RECONNECT_JITTER)
569
+ .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) {
570
+ channel_ = xdslb_policy_->channel_control_helper()->CreateChannel(
571
+ balancer_name, args);
572
+ GPR_ASSERT(channel_ != nullptr);
573
+ StartCallLocked();
574
+ }
575
+
576
+ XdsLb::BalancerChannelState::~BalancerChannelState() {
577
+ grpc_channel_destroy(channel_);
578
+ }
579
+
580
+ void XdsLb::BalancerChannelState::Orphan() {
581
+ shutting_down_ = true;
582
+ lb_calld_.reset();
583
+ if (retry_timer_callback_pending_) grpc_timer_cancel(&lb_call_retry_timer_);
584
+ Unref(DEBUG_LOCATION, "lb_channel_orphaned");
585
+ }
586
+
587
+ void XdsLb::BalancerChannelState::StartCallRetryTimerLocked() {
588
+ grpc_millis next_try = lb_call_backoff_.NextAttemptTime();
589
+ if (grpc_lb_xds_trace.enabled()) {
590
+ gpr_log(GPR_INFO,
591
+ "[xdslb %p] Failed to connect to LB server (lb_chand: %p)...",
592
+ xdslb_policy_.get(), this);
593
+ grpc_millis timeout = next_try - ExecCtx::Get()->Now();
594
+ if (timeout > 0) {
595
+ gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active in %" PRId64 "ms.",
596
+ xdslb_policy_.get(), timeout);
597
+ } else {
598
+ gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active immediately.",
599
+ xdslb_policy_.get());
600
+ }
601
+ }
602
+ Ref(DEBUG_LOCATION, "on_balancer_call_retry_timer").release();
603
+ GRPC_CLOSURE_INIT(&lb_on_call_retry_, &OnCallRetryTimerLocked, this,
604
+ grpc_combiner_scheduler(xdslb_policy_->combiner()));
605
+ grpc_timer_init(&lb_call_retry_timer_, next_try, &lb_on_call_retry_);
606
+ retry_timer_callback_pending_ = true;
607
+ }
608
+
609
+ void XdsLb::BalancerChannelState::OnCallRetryTimerLocked(void* arg,
610
+ grpc_error* error) {
611
+ BalancerChannelState* lb_chand = static_cast<BalancerChannelState*>(arg);
612
+ lb_chand->retry_timer_callback_pending_ = false;
613
+ if (!lb_chand->shutting_down_ && error == GRPC_ERROR_NONE &&
614
+ lb_chand->lb_calld_ == nullptr) {
615
+ if (grpc_lb_xds_trace.enabled()) {
616
+ gpr_log(GPR_INFO,
617
+ "[xdslb %p] Restarting call to LB server (lb_chand: %p)",
618
+ lb_chand->xdslb_policy_.get(), lb_chand);
619
+ }
620
+ lb_chand->StartCallLocked();
621
+ }
622
+ lb_chand->Unref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
623
+ }
624
+
625
+ void XdsLb::BalancerChannelState::StartCallLocked() {
626
+ if (shutting_down_) return;
627
+ GPR_ASSERT(channel_ != nullptr);
628
+ GPR_ASSERT(lb_calld_ == nullptr);
629
+ lb_calld_ = MakeOrphanable<BalancerCallState>(Ref());
630
+ if (grpc_lb_xds_trace.enabled()) {
631
+ gpr_log(GPR_INFO,
632
+ "[xdslb %p] Query for backends (lb_chand: %p, lb_calld: %p)",
633
+ xdslb_policy_.get(), this, lb_calld_.get());
634
+ }
635
+ lb_calld_->StartQuery();
636
+ }
637
+
638
+ //
639
+ // XdsLb::BalancerChannelState::BalancerCallState
428
640
  //
429
641
 
430
- XdsLb::BalancerCallState::BalancerCallState(
431
- RefCountedPtr<LoadBalancingPolicy> parent_xdslb_policy)
642
+ XdsLb::BalancerChannelState::BalancerCallState::BalancerCallState(
643
+ RefCountedPtr<BalancerChannelState> lb_chand)
432
644
  : InternallyRefCounted<BalancerCallState>(&grpc_lb_xds_trace),
433
- xdslb_policy_(std::move(parent_xdslb_policy)) {
434
- GPR_ASSERT(xdslb_policy_ != nullptr);
645
+ lb_chand_(std::move(lb_chand)) {
646
+ GPR_ASSERT(xdslb_policy() != nullptr);
435
647
  GPR_ASSERT(!xdslb_policy()->shutting_down_);
436
648
  // Init the LB call. Note that the LB call will progress every time there's
437
649
  // activity in xdslb_policy_->interested_parties(), which is comprised of
@@ -443,8 +655,8 @@ XdsLb::BalancerCallState::BalancerCallState(
443
655
  ? GRPC_MILLIS_INF_FUTURE
444
656
  : ExecCtx::Get()->Now() + xdslb_policy()->lb_call_timeout_ms_;
445
657
  lb_call_ = grpc_channel_create_pollset_set_call(
446
- xdslb_policy()->lb_channel_, nullptr, GRPC_PROPAGATE_DEFAULTS,
447
- xdslb_policy_->interested_parties(),
658
+ lb_chand_->channel_, nullptr, GRPC_PROPAGATE_DEFAULTS,
659
+ xdslb_policy()->interested_parties(),
448
660
  GRPC_MDSTR_SLASH_GRPC_DOT_LB_DOT_V1_DOT_LOADBALANCER_SLASH_BALANCELOAD,
449
661
  nullptr, deadline, nullptr);
450
662
  // Init the LB call request payload.
@@ -468,7 +680,7 @@ XdsLb::BalancerCallState::BalancerCallState(
468
680
  grpc_combiner_scheduler(xdslb_policy()->combiner()));
469
681
  }
470
682
 
471
- XdsLb::BalancerCallState::~BalancerCallState() {
683
+ XdsLb::BalancerChannelState::BalancerCallState::~BalancerCallState() {
472
684
  GPR_ASSERT(lb_call_ != nullptr);
473
685
  grpc_call_unref(lb_call_);
474
686
  grpc_metadata_array_destroy(&lb_initial_metadata_recv_);
@@ -478,7 +690,7 @@ XdsLb::BalancerCallState::~BalancerCallState() {
478
690
  grpc_slice_unref_internal(lb_call_status_details_);
479
691
  }
480
692
 
481
- void XdsLb::BalancerCallState::Orphan() {
693
+ void XdsLb::BalancerChannelState::BalancerCallState::Orphan() {
482
694
  GPR_ASSERT(lb_call_ != nullptr);
483
695
  // If we are here because xdslb_policy wants to cancel the call,
484
696
  // lb_on_balancer_status_received_ will complete the cancellation and clean
@@ -493,11 +705,11 @@ void XdsLb::BalancerCallState::Orphan() {
493
705
  // in lb_on_balancer_status_received_ instead of here.
494
706
  }
495
707
 
496
- void XdsLb::BalancerCallState::StartQuery() {
708
+ void XdsLb::BalancerChannelState::BalancerCallState::StartQuery() {
497
709
  GPR_ASSERT(lb_call_ != nullptr);
498
710
  if (grpc_lb_xds_trace.enabled()) {
499
711
  gpr_log(GPR_INFO, "[xdslb %p] Starting LB call (lb_calld: %p, lb_call: %p)",
500
- xdslb_policy_.get(), this, lb_call_);
712
+ xdslb_policy(), this, lb_call_);
501
713
  }
502
714
  // Create the ops.
503
715
  grpc_call_error call_error;
@@ -565,7 +777,8 @@ void XdsLb::BalancerCallState::StartQuery() {
565
777
  GPR_ASSERT(GRPC_CALL_OK == call_error);
566
778
  }
567
779
 
568
- void XdsLb::BalancerCallState::ScheduleNextClientLoadReportLocked() {
780
+ void XdsLb::BalancerChannelState::BalancerCallState::
781
+ ScheduleNextClientLoadReportLocked() {
569
782
  const grpc_millis next_client_load_report_time =
570
783
  ExecCtx::Get()->Now() + client_stats_report_interval_;
571
784
  GRPC_CLOSURE_INIT(&client_load_report_closure_,
@@ -576,12 +789,11 @@ void XdsLb::BalancerCallState::ScheduleNextClientLoadReportLocked() {
576
789
  client_load_report_timer_callback_pending_ = true;
577
790
  }
578
791
 
579
- void XdsLb::BalancerCallState::MaybeSendClientLoadReportLocked(
580
- void* arg, grpc_error* error) {
792
+ void XdsLb::BalancerChannelState::BalancerCallState::
793
+ MaybeSendClientLoadReportLocked(void* arg, grpc_error* error) {
581
794
  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
582
- XdsLb* xdslb_policy = lb_calld->xdslb_policy();
583
795
  lb_calld->client_load_report_timer_callback_pending_ = false;
584
- if (error != GRPC_ERROR_NONE || lb_calld != xdslb_policy->lb_calld_.get()) {
796
+ if (error != GRPC_ERROR_NONE || !lb_calld->IsCurrentCallOnChannel()) {
585
797
  lb_calld->Unref(DEBUG_LOCATION, "client_load_report");
586
798
  return;
587
799
  }
@@ -595,7 +807,7 @@ void XdsLb::BalancerCallState::MaybeSendClientLoadReportLocked(
595
807
  }
596
808
  }
597
809
 
598
- bool XdsLb::BalancerCallState::LoadReportCountersAreZero(
810
+ bool XdsLb::BalancerChannelState::BalancerCallState::LoadReportCountersAreZero(
599
811
  xds_grpclb_request* request) {
600
812
  XdsLbClientStats::DroppedCallCounts* drop_entries =
601
813
  static_cast<XdsLbClientStats::DroppedCallCounts*>(
@@ -609,7 +821,8 @@ bool XdsLb::BalancerCallState::LoadReportCountersAreZero(
609
821
  }
610
822
 
611
823
  // TODO(vpowar): Use LRS to send the client Load Report.
612
- void XdsLb::BalancerCallState::SendClientLoadReportLocked() {
824
+ void XdsLb::BalancerChannelState::BalancerCallState::
825
+ SendClientLoadReportLocked() {
613
826
  // Construct message payload.
614
827
  GPR_ASSERT(send_message_payload_ == nullptr);
615
828
  xds_grpclb_request* request =
@@ -630,27 +843,27 @@ void XdsLb::BalancerCallState::SendClientLoadReportLocked() {
630
843
  xds_grpclb_request_destroy(request);
631
844
  }
632
845
 
633
- void XdsLb::BalancerCallState::OnInitialRequestSentLocked(void* arg,
634
- grpc_error* error) {
846
+ void XdsLb::BalancerChannelState::BalancerCallState::OnInitialRequestSentLocked(
847
+ void* arg, grpc_error* error) {
635
848
  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
636
849
  grpc_byte_buffer_destroy(lb_calld->send_message_payload_);
637
850
  lb_calld->send_message_payload_ = nullptr;
638
851
  // If we attempted to send a client load report before the initial request was
639
852
  // sent (and this lb_calld is still in use), send the load report now.
640
853
  if (lb_calld->client_load_report_is_due_ &&
641
- lb_calld == lb_calld->xdslb_policy()->lb_calld_.get()) {
854
+ lb_calld->IsCurrentCallOnChannel()) {
642
855
  lb_calld->SendClientLoadReportLocked();
643
856
  lb_calld->client_load_report_is_due_ = false;
644
857
  }
645
858
  lb_calld->Unref(DEBUG_LOCATION, "on_initial_request_sent");
646
859
  }
647
860
 
648
- void XdsLb::BalancerCallState::OnBalancerMessageReceivedLocked(
649
- void* arg, grpc_error* error) {
861
+ void XdsLb::BalancerChannelState::BalancerCallState::
862
+ OnBalancerMessageReceivedLocked(void* arg, grpc_error* error) {
650
863
  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
651
864
  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
652
865
  // Empty payload means the LB call was cancelled.
653
- if (lb_calld != xdslb_policy->lb_calld_.get() ||
866
+ if (!lb_calld->IsCurrentCallOnChannel() ||
654
867
  lb_calld->recv_message_payload_ == nullptr) {
655
868
  lb_calld->Unref(DEBUG_LOCATION, "on_message_received");
656
869
  return;
@@ -668,20 +881,25 @@ void XdsLb::BalancerCallState::OnBalancerMessageReceivedLocked(
668
881
  nullptr) {
669
882
  // Have NOT seen initial response, look for initial response.
670
883
  if (initial_response->has_client_stats_report_interval) {
671
- lb_calld->client_stats_report_interval_ = GPR_MAX(
672
- GPR_MS_PER_SEC, xds_grpclb_duration_to_millis(
673
- &initial_response->client_stats_report_interval));
674
- if (grpc_lb_xds_trace.enabled()) {
884
+ const grpc_millis interval = xds_grpclb_duration_to_millis(
885
+ &initial_response->client_stats_report_interval);
886
+ if (interval > 0) {
887
+ lb_calld->client_stats_report_interval_ =
888
+ GPR_MAX(GPR_MS_PER_SEC, interval);
889
+ }
890
+ }
891
+ if (grpc_lb_xds_trace.enabled()) {
892
+ if (lb_calld->client_stats_report_interval_ != 0) {
675
893
  gpr_log(GPR_INFO,
676
894
  "[xdslb %p] Received initial LB response message; "
677
895
  "client load reporting interval = %" PRId64 " milliseconds",
678
896
  xdslb_policy, lb_calld->client_stats_report_interval_);
897
+ } else {
898
+ gpr_log(GPR_INFO,
899
+ "[xdslb %p] Received initial LB response message; client load "
900
+ "reporting NOT enabled",
901
+ xdslb_policy);
679
902
  }
680
- } else if (grpc_lb_xds_trace.enabled()) {
681
- gpr_log(GPR_INFO,
682
- "[xdslb %p] Received initial LB response message; client load "
683
- "reporting NOT enabled",
684
- xdslb_policy);
685
903
  }
686
904
  xds_grpclb_initial_response_destroy(initial_response);
687
905
  lb_calld->seen_initial_response_ = true;
@@ -704,12 +922,28 @@ void XdsLb::BalancerCallState::OnBalancerMessageReceivedLocked(
704
922
  }
705
923
  }
706
924
  /* update serverlist */
925
+ // TODO(juanlishen): Don't ingore empty serverlist.
707
926
  if (serverlist->num_servers > 0) {
927
+ // Pending LB channel receives a serverlist; promote it.
928
+ // Note that this call can't be on a discarded pending channel, because
929
+ // such channels don't have any current call but we have checked this call
930
+ // is a current call.
931
+ if (!lb_calld->lb_chand_->IsCurrentChannel()) {
932
+ if (grpc_lb_xds_trace.enabled()) {
933
+ gpr_log(GPR_INFO,
934
+ "[xdslb %p] Promoting pending LB channel %p to replace "
935
+ "current LB channel %p",
936
+ xdslb_policy, lb_calld->lb_chand_.get(),
937
+ lb_calld->xdslb_policy()->lb_chand_.get());
938
+ }
939
+ lb_calld->xdslb_policy()->lb_chand_ =
940
+ std::move(lb_calld->xdslb_policy()->pending_lb_chand_);
941
+ }
708
942
  // Start sending client load report only after we start using the
709
943
  // serverlist returned from the current LB call.
710
944
  if (lb_calld->client_stats_report_interval_ > 0 &&
711
945
  lb_calld->client_stats_ == nullptr) {
712
- lb_calld->client_stats_.reset(New<XdsLbClientStats>());
946
+ lb_calld->client_stats_ = MakeRefCounted<XdsLbClientStats>();
713
947
  // TODO(roth): We currently track this ref manually. Once the
714
948
  // ClosureRef API is ready, we should pass the RefCountedPtr<> along
715
949
  // with the callback.
@@ -777,37 +1011,53 @@ void XdsLb::BalancerCallState::OnBalancerMessageReceivedLocked(
777
1011
  }
778
1012
  }
779
1013
 
780
- void XdsLb::BalancerCallState::OnBalancerStatusReceivedLocked(
781
- void* arg, grpc_error* error) {
1014
+ void XdsLb::BalancerChannelState::BalancerCallState::
1015
+ OnBalancerStatusReceivedLocked(void* arg, grpc_error* error) {
782
1016
  BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
783
1017
  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
1018
+ BalancerChannelState* lb_chand = lb_calld->lb_chand_.get();
784
1019
  GPR_ASSERT(lb_calld->lb_call_ != nullptr);
785
1020
  if (grpc_lb_xds_trace.enabled()) {
786
1021
  char* status_details =
787
1022
  grpc_slice_to_c_string(lb_calld->lb_call_status_details_);
788
1023
  gpr_log(GPR_INFO,
789
1024
  "[xdslb %p] Status from LB server received. Status = %d, details "
790
- "= '%s', (lb_calld: %p, lb_call: %p), error '%s'",
791
- xdslb_policy, lb_calld->lb_call_status_, status_details, lb_calld,
792
- lb_calld->lb_call_, grpc_error_string(error));
1025
+ "= '%s', (lb_chand: %p, lb_calld: %p, lb_call: %p), error '%s'",
1026
+ xdslb_policy, lb_calld->lb_call_status_, status_details, lb_chand,
1027
+ lb_calld, lb_calld->lb_call_, grpc_error_string(error));
793
1028
  gpr_free(status_details);
794
1029
  }
795
- xdslb_policy->TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_NONE);
796
- // If this lb_calld is still in use, this call ended because of a failure so
797
- // we want to retry connecting. Otherwise, we have deliberately ended this
798
- // call and no further action is required.
799
- if (lb_calld == xdslb_policy->lb_calld_.get()) {
800
- xdslb_policy->lb_calld_.reset();
1030
+ // Ignore status from a stale call.
1031
+ if (lb_calld->IsCurrentCallOnChannel()) {
1032
+ // Because this call is the current one on the channel, the channel can't
1033
+ // have been swapped out; otherwise, the call should have been reset.
1034
+ GPR_ASSERT(lb_chand->IsCurrentChannel() || lb_chand->IsPendingChannel());
801
1035
  GPR_ASSERT(!xdslb_policy->shutting_down_);
802
- if (lb_calld->seen_initial_response_) {
803
- // If we lose connection to the LB server, reset the backoff and restart
804
- // the LB call immediately.
805
- xdslb_policy->lb_call_backoff_.Reset();
806
- xdslb_policy->StartBalancerCallLocked();
1036
+ if (lb_chand != xdslb_policy->LatestLbChannel()) {
1037
+ // This channel must be the current one and there is a pending one. Swap
1038
+ // in the pending one and we are done.
1039
+ if (grpc_lb_xds_trace.enabled()) {
1040
+ gpr_log(GPR_INFO,
1041
+ "[xdslb %p] Promoting pending LB channel %p to replace "
1042
+ "current LB channel %p",
1043
+ xdslb_policy, lb_calld->lb_chand_.get(),
1044
+ lb_calld->xdslb_policy()->lb_chand_.get());
1045
+ }
1046
+ xdslb_policy->lb_chand_ = std::move(xdslb_policy->pending_lb_chand_);
807
1047
  } else {
808
- // If this LB call fails establishing any connection to the LB server,
809
- // retry later.
810
- xdslb_policy->StartBalancerCallRetryTimerLocked();
1048
+ // This channel is the most recently created one. Try to restart the call
1049
+ // and reresolve.
1050
+ lb_chand->lb_calld_.reset();
1051
+ if (lb_calld->seen_initial_response_) {
1052
+ // If we lost connection to the LB server, reset the backoff and restart
1053
+ // the LB call immediately.
1054
+ lb_chand->lb_call_backoff_.Reset();
1055
+ lb_chand->StartCallLocked();
1056
+ } else {
1057
+ // If we failed to connect to the LB server, retry later.
1058
+ lb_chand->StartCallRetryTimerLocked();
1059
+ }
1060
+ xdslb_policy->channel_control_helper()->RequestReresolution();
811
1061
  }
812
1062
  }
813
1063
  lb_calld->Unref(DEBUG_LOCATION, "lb_call_ended");
@@ -817,53 +1067,20 @@ void XdsLb::BalancerCallState::OnBalancerStatusReceivedLocked(
817
1067
  // helper code for creating balancer channel
818
1068
  //
819
1069
 
820
- UniquePtr<ServerAddressList> ExtractBalancerAddresses(
821
- const ServerAddressList& addresses) {
822
- auto balancer_addresses = MakeUnique<ServerAddressList>();
823
- for (size_t i = 0; i < addresses.size(); ++i) {
824
- if (addresses[i].IsBalancer()) {
825
- balancer_addresses->emplace_back(addresses[i]);
826
- }
827
- }
828
- return balancer_addresses;
829
- }
830
-
831
- /* Returns the channel args for the LB channel, used to create a bidirectional
832
- * stream for the reception of load balancing updates.
833
- *
834
- * Inputs:
835
- * - \a addresses: corresponding to the balancers.
836
- * - \a response_generator: in order to propagate updates from the resolver
837
- * above the grpclb policy.
838
- * - \a args: other args inherited from the xds policy. */
839
- grpc_channel_args* BuildBalancerChannelArgs(
840
- const ServerAddressList& addresses,
841
- FakeResolverResponseGenerator* response_generator,
842
- const grpc_channel_args* args) {
843
- UniquePtr<ServerAddressList> balancer_addresses =
844
- ExtractBalancerAddresses(addresses);
845
- // Channel args to remove.
1070
+ // Returns the channel args for the LB channel, used to create a bidirectional
1071
+ // stream for the reception of load balancing updates.
1072
+ grpc_channel_args* BuildBalancerChannelArgs(const grpc_channel_args* args) {
846
1073
  static const char* args_to_remove[] = {
847
1074
  // LB policy name, since we want to use the default (pick_first) in
848
1075
  // the LB channel.
849
1076
  GRPC_ARG_LB_POLICY_NAME,
1077
+ // The service config that contains the LB config. We don't want to
1078
+ // recursively use xds in the LB channel.
1079
+ GRPC_ARG_SERVICE_CONFIG,
850
1080
  // The channel arg for the server URI, since that will be different for
851
1081
  // the LB channel than for the parent channel. The client channel
852
1082
  // factory will re-add this arg with the right value.
853
1083
  GRPC_ARG_SERVER_URI,
854
- // The resolved addresses, which will be generated by the name resolver
855
- // used in the LB channel. Note that the LB channel will use the fake
856
- // resolver, so this won't actually generate a query to DNS (or some
857
- // other name service). However, the addresses returned by the fake
858
- // resolver will have is_balancer=false, whereas our own addresses have
859
- // is_balancer=true. We need the LB channel to return addresses with
860
- // is_balancer=false so that it does not wind up recursively using the
861
- // xds LB policy, as per the special case logic in client_channel.c.
862
- GRPC_ARG_SERVER_ADDRESS_LIST,
863
- // The fake resolver response generator, because we are replacing it
864
- // with the one from the xds policy, used to propagate updates to
865
- // the LB channel.
866
- GRPC_ARG_FAKE_RESOLVER_RESPONSE_GENERATOR,
867
1084
  // The LB channel should use the authority indicated by the target
868
1085
  // authority table (see \a grpc_lb_policy_xds_modify_lb_channel_args),
869
1086
  // as opposed to the authority from the parent channel.
@@ -875,14 +1092,6 @@ grpc_channel_args* BuildBalancerChannelArgs(
875
1092
  };
876
1093
  // Channel args to add.
877
1094
  const grpc_arg args_to_add[] = {
878
- // New server address list.
879
- // Note that we pass these in both when creating the LB channel
880
- // and via the fake resolver. The latter is what actually gets used.
881
- CreateServerAddressListChannelArg(balancer_addresses.get()),
882
- // The fake resolver response generator, which we use to inject
883
- // address updates into the LB channel.
884
- grpc_core::FakeResolverResponseGenerator::MakeChannelArg(
885
- response_generator),
886
1095
  // A channel arg indicating the target is a xds load balancer.
887
1096
  grpc_channel_arg_integer_create(
888
1097
  const_cast<char*>(GRPC_ARG_ADDRESS_IS_XDS_LOAD_BALANCER), 1),
@@ -903,29 +1112,9 @@ grpc_channel_args* BuildBalancerChannelArgs(
903
1112
  // ctor and dtor
904
1113
  //
905
1114
 
906
- // TODO(vishalpowar): Use lb_config in args to configure LB policy.
907
- XdsLb::XdsLb(LoadBalancingPolicy::Args args)
908
- : LoadBalancingPolicy(std::move(args)),
909
- response_generator_(MakeRefCounted<FakeResolverResponseGenerator>()),
910
- lb_call_backoff_(
911
- BackOff::Options()
912
- .set_initial_backoff(GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS *
913
- 1000)
914
- .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER)
915
- .set_jitter(GRPC_XDS_RECONNECT_JITTER)
916
- .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) {
917
- // Initialization.
918
- gpr_mu_init(&lb_channel_mu_);
919
- GRPC_CLOSURE_INIT(&lb_channel_on_connectivity_changed_,
920
- &XdsLb::OnBalancerChannelConnectivityChangedLocked, this,
921
- grpc_combiner_scheduler(args.combiner));
922
- GRPC_CLOSURE_INIT(&on_child_connectivity_changed_,
923
- &XdsLb::OnChildPolicyConnectivityChangedLocked, this,
924
- grpc_combiner_scheduler(args.combiner));
925
- GRPC_CLOSURE_INIT(&on_child_request_reresolution_,
926
- &XdsLb::OnChildPolicyRequestReresolutionLocked, this,
927
- grpc_combiner_scheduler(args.combiner));
928
- grpc_connectivity_state_init(&state_tracker_, GRPC_CHANNEL_IDLE, "xds");
1115
+ XdsLb::XdsLb(Args args) : LoadBalancingPolicy(std::move(args)) {
1116
+ gpr_mu_init(&lb_chand_mu_);
1117
+ gpr_mu_init(&child_policy_mu_);
929
1118
  // Record server name.
930
1119
  const grpc_arg* arg = grpc_channel_args_find(args.args, GRPC_ARG_SERVER_URI);
931
1120
  const char* server_uri = grpc_channel_arg_get_string(arg);
@@ -946,229 +1135,103 @@ XdsLb::XdsLb(LoadBalancingPolicy::Args args)
946
1135
  arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_FALLBACK_TIMEOUT_MS);
947
1136
  lb_fallback_timeout_ms_ = grpc_channel_arg_get_integer(
948
1137
  arg, {GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX});
949
- // Parse the LB config.
950
- ParseLbConfig(args.lb_config);
951
- // Process channel args.
952
- ProcessChannelArgsLocked(*args.args);
953
1138
  }
954
1139
 
955
1140
  XdsLb::~XdsLb() {
956
- GPR_ASSERT(pending_picks_ == nullptr);
957
- gpr_mu_destroy(&lb_channel_mu_);
1141
+ gpr_mu_destroy(&lb_chand_mu_);
958
1142
  gpr_free((void*)server_name_);
959
1143
  grpc_channel_args_destroy(args_);
960
- grpc_connectivity_state_destroy(&state_tracker_);
961
1144
  if (serverlist_ != nullptr) {
962
1145
  xds_grpclb_destroy_serverlist(serverlist_);
963
1146
  }
1147
+ gpr_mu_destroy(&child_policy_mu_);
964
1148
  }
965
1149
 
966
1150
  void XdsLb::ShutdownLocked() {
967
- grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown");
968
1151
  shutting_down_ = true;
969
- lb_calld_.reset();
970
- if (retry_timer_callback_pending_) {
971
- grpc_timer_cancel(&lb_call_retry_timer_);
972
- }
973
1152
  if (fallback_timer_callback_pending_) {
974
1153
  grpc_timer_cancel(&lb_fallback_timer_);
975
1154
  }
976
- child_policy_.reset();
977
- TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_CANCELLED);
1155
+ if (child_policy_ != nullptr) {
1156
+ grpc_pollset_set_del_pollset_set(child_policy_->interested_parties(),
1157
+ interested_parties());
1158
+ }
1159
+ if (pending_child_policy_ != nullptr) {
1160
+ grpc_pollset_set_del_pollset_set(
1161
+ pending_child_policy_->interested_parties(), interested_parties());
1162
+ }
1163
+ {
1164
+ MutexLock lock(&child_policy_mu_);
1165
+ child_policy_.reset();
1166
+ pending_child_policy_.reset();
1167
+ }
978
1168
  // We destroy the LB channel here instead of in our destructor because
979
1169
  // destroying the channel triggers a last callback to
980
1170
  // OnBalancerChannelConnectivityChangedLocked(), and we need to be
981
1171
  // alive when that callback is invoked.
982
- if (lb_channel_ != nullptr) {
983
- gpr_mu_lock(&lb_channel_mu_);
984
- grpc_channel_destroy(lb_channel_);
985
- lb_channel_ = nullptr;
986
- gpr_mu_unlock(&lb_channel_mu_);
1172
+ {
1173
+ MutexLock lock(&lb_chand_mu_);
1174
+ lb_chand_.reset();
1175
+ pending_lb_chand_.reset();
987
1176
  }
988
- grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN,
989
- GRPC_ERROR_REF(error), "xds_shutdown");
990
- // Clear pending picks.
991
- PendingPick* pp;
992
- while ((pp = pending_picks_) != nullptr) {
993
- pending_picks_ = pp->next;
994
- pp->pick->connected_subchannel.reset();
995
- // Note: pp is deleted in this callback.
996
- GRPC_CLOSURE_SCHED(&pp->on_complete, GRPC_ERROR_REF(error));
997
- }
998
- GRPC_ERROR_UNREF(error);
999
1177
  }
1000
1178
 
1001
1179
  //
1002
1180
  // public methods
1003
1181
  //
1004
1182
 
1005
- void XdsLb::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) {
1006
- PendingPick* pp;
1007
- while ((pp = pending_picks_) != nullptr) {
1008
- pending_picks_ = pp->next;
1009
- pp->pick->on_complete = pp->original_on_complete;
1010
- grpc_error* error = GRPC_ERROR_NONE;
1011
- if (new_policy->PickLocked(pp->pick, &error)) {
1012
- // Synchronous return; schedule closure.
1013
- GRPC_CLOSURE_SCHED(pp->pick->on_complete, error);
1014
- }
1015
- Delete(pp);
1016
- }
1017
- }
1018
-
1019
- // Cancel a specific pending pick.
1020
- //
1021
- // A pick progresses as follows:
1022
- // - If there's a child policy available, it'll be handed over to child policy
1023
- // (in CreateChildPolicyLocked()). From that point onwards, it'll be the
1024
- // child policy's responsibility. For cancellations, that implies the pick
1025
- // needs to be also cancelled by the child policy instance.
1026
- // - Otherwise, without a child policy instance, picks stay pending at this
1027
- // policy's level (xds), inside the pending_picks_ list. To cancel these,
1028
- // we invoke the completion closure and set the pick's connected
1029
- // subchannel to nullptr right here.
1030
- void XdsLb::CancelPickLocked(PickState* pick, grpc_error* error) {
1031
- PendingPick* pp = pending_picks_;
1032
- pending_picks_ = nullptr;
1033
- while (pp != nullptr) {
1034
- PendingPick* next = pp->next;
1035
- if (pp->pick == pick) {
1036
- pick->connected_subchannel.reset();
1037
- // Note: pp is deleted in this callback.
1038
- GRPC_CLOSURE_SCHED(&pp->on_complete,
1039
- GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
1040
- "Pick Cancelled", &error, 1));
1041
- } else {
1042
- pp->next = pending_picks_;
1043
- pending_picks_ = pp;
1044
- }
1045
- pp = next;
1046
- }
1047
- if (child_policy_ != nullptr) {
1048
- child_policy_->CancelPickLocked(pick, GRPC_ERROR_REF(error));
1049
- }
1050
- GRPC_ERROR_UNREF(error);
1051
- }
1052
-
1053
- // Cancel all pending picks.
1054
- //
1055
- // A pick progresses as follows:
1056
- // - If there's a child policy available, it'll be handed over to child policy
1057
- // (in CreateChildPolicyLocked()). From that point onwards, it'll be the
1058
- // child policy's responsibility. For cancellations, that implies the pick
1059
- // needs to be also cancelled by the child policy instance.
1060
- // - Otherwise, without a child policy instance, picks stay pending at this
1061
- // policy's level (xds), inside the pending_picks_ list. To cancel these,
1062
- // we invoke the completion closure and set the pick's connected
1063
- // subchannel to nullptr right here.
1064
- void XdsLb::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
1065
- uint32_t initial_metadata_flags_eq,
1066
- grpc_error* error) {
1067
- PendingPick* pp = pending_picks_;
1068
- pending_picks_ = nullptr;
1069
- while (pp != nullptr) {
1070
- PendingPick* next = pp->next;
1071
- if ((*pp->pick->initial_metadata_flags & initial_metadata_flags_mask) ==
1072
- initial_metadata_flags_eq) {
1073
- // Note: pp is deleted in this callback.
1074
- GRPC_CLOSURE_SCHED(&pp->on_complete,
1075
- GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
1076
- "Pick Cancelled", &error, 1));
1077
- } else {
1078
- pp->next = pending_picks_;
1079
- pending_picks_ = pp;
1080
- }
1081
- pp = next;
1082
- }
1083
- if (child_policy_ != nullptr) {
1084
- child_policy_->CancelMatchingPicksLocked(initial_metadata_flags_mask,
1085
- initial_metadata_flags_eq,
1086
- GRPC_ERROR_REF(error));
1087
- }
1088
- GRPC_ERROR_UNREF(error);
1089
- }
1090
-
1091
- void XdsLb::ExitIdleLocked() {
1092
- if (!started_picking_) {
1093
- StartPickingLocked();
1094
- }
1095
- }
1096
-
1097
1183
  void XdsLb::ResetBackoffLocked() {
1098
- if (lb_channel_ != nullptr) {
1099
- grpc_channel_reset_connect_backoff(lb_channel_);
1184
+ if (lb_chand_ != nullptr) {
1185
+ grpc_channel_reset_connect_backoff(lb_chand_->channel());
1186
+ }
1187
+ if (pending_lb_chand_ != nullptr) {
1188
+ grpc_channel_reset_connect_backoff(pending_lb_chand_->channel());
1100
1189
  }
1101
1190
  if (child_policy_ != nullptr) {
1102
1191
  child_policy_->ResetBackoffLocked();
1103
1192
  }
1104
- }
1105
-
1106
- bool XdsLb::PickLocked(PickState* pick, grpc_error** error) {
1107
- PendingPick* pp = PendingPickCreate(pick);
1108
- bool pick_done = false;
1109
- if (child_policy_ != nullptr) {
1110
- if (grpc_lb_xds_trace.enabled()) {
1111
- gpr_log(GPR_INFO, "[xdslb %p] about to PICK from policy %p", this,
1112
- child_policy_.get());
1113
- }
1114
- pick_done = PickFromChildPolicyLocked(false /* force_async */, pp, error);
1115
- } else { // child_policy_ == NULL
1116
- if (pick->on_complete == nullptr) {
1117
- *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
1118
- "No pick result available but synchronous result required.");
1119
- pick_done = true;
1120
- } else {
1121
- if (grpc_lb_xds_trace.enabled()) {
1122
- gpr_log(GPR_INFO,
1123
- "[xdslb %p] No child policy. Adding to xds's pending picks",
1124
- this);
1125
- }
1126
- AddPendingPick(pp);
1127
- if (!started_picking_) {
1128
- StartPickingLocked();
1129
- }
1130
- pick_done = false;
1131
- }
1193
+ if (pending_child_policy_ != nullptr) {
1194
+ pending_child_policy_->ResetBackoffLocked();
1132
1195
  }
1133
- return pick_done;
1134
1196
  }
1135
1197
 
1136
1198
  void XdsLb::FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
1137
1199
  channelz::ChildRefsList* child_channels) {
1138
- // delegate to the child_policy_ to fill the children subchannels.
1139
- child_policy_->FillChildRefsForChannelz(child_subchannels, child_channels);
1140
- MutexLock lock(&lb_channel_mu_);
1141
- if (lb_channel_ != nullptr) {
1200
+ {
1201
+ // Delegate to the child_policy_ to fill the children subchannels.
1202
+ // This must be done holding child_policy_mu_, since this method does not
1203
+ // run in the combiner.
1204
+ MutexLock lock(&child_policy_mu_);
1205
+ if (child_policy_ != nullptr) {
1206
+ child_policy_->FillChildRefsForChannelz(child_subchannels,
1207
+ child_channels);
1208
+ }
1209
+ if (pending_child_policy_ != nullptr) {
1210
+ pending_child_policy_->FillChildRefsForChannelz(child_subchannels,
1211
+ child_channels);
1212
+ }
1213
+ }
1214
+ MutexLock lock(&lb_chand_mu_);
1215
+ if (lb_chand_ != nullptr) {
1142
1216
  grpc_core::channelz::ChannelNode* channel_node =
1143
- grpc_channel_get_channelz_node(lb_channel_);
1217
+ grpc_channel_get_channelz_node(lb_chand_->channel());
1218
+ if (channel_node != nullptr) {
1219
+ child_channels->push_back(channel_node->uuid());
1220
+ }
1221
+ }
1222
+ if (pending_lb_chand_ != nullptr) {
1223
+ grpc_core::channelz::ChannelNode* channel_node =
1224
+ grpc_channel_get_channelz_node(pending_lb_chand_->channel());
1144
1225
  if (channel_node != nullptr) {
1145
1226
  child_channels->push_back(channel_node->uuid());
1146
1227
  }
1147
1228
  }
1148
1229
  }
1149
1230
 
1150
- grpc_connectivity_state XdsLb::CheckConnectivityLocked(
1151
- grpc_error** connectivity_error) {
1152
- return grpc_connectivity_state_get(&state_tracker_, connectivity_error);
1153
- }
1154
-
1155
- void XdsLb::NotifyOnStateChangeLocked(grpc_connectivity_state* current,
1156
- grpc_closure* closure) {
1157
- grpc_connectivity_state_notify_on_state_change(&state_tracker_, current,
1158
- closure);
1159
- }
1160
-
1161
- void XdsLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
1162
- const ServerAddressList* addresses = FindServerAddressListChannelArg(&args);
1163
- if (addresses == nullptr) {
1164
- // Ignore this update.
1165
- gpr_log(GPR_ERROR,
1166
- "[xdslb %p] No valid LB addresses channel arg in update, ignoring.",
1167
- this);
1168
- return;
1169
- }
1231
+ void XdsLb::ProcessAddressesAndChannelArgsLocked(
1232
+ const ServerAddressList& addresses, const grpc_channel_args& args) {
1170
1233
  // Update fallback address list.
1171
- fallback_backend_addresses_ = ExtractBackendAddresses(*addresses);
1234
+ fallback_backend_addresses_ = ExtractBackendAddresses(addresses);
1172
1235
  // Make sure that GRPC_ARG_LB_POLICY_NAME is set in channel args,
1173
1236
  // since we use this to trigger the client_load_reporting filter.
1174
1237
  static const char* args_to_remove[] = {GRPC_ARG_LB_POLICY_NAME};
@@ -1178,31 +1241,38 @@ void XdsLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
1178
1241
  args_ = grpc_channel_args_copy_and_add_and_remove(
1179
1242
  &args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1);
1180
1243
  // Construct args for balancer channel.
1181
- grpc_channel_args* lb_channel_args =
1182
- BuildBalancerChannelArgs(*addresses, response_generator_.get(), &args);
1183
- // Create balancer channel if needed.
1184
- if (lb_channel_ == nullptr) {
1185
- char* uri_str;
1186
- gpr_asprintf(&uri_str, "fake:///%s", server_name_);
1187
- gpr_mu_lock(&lb_channel_mu_);
1188
- lb_channel_ = grpc_client_channel_factory_create_channel(
1189
- client_channel_factory(), uri_str,
1190
- GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, lb_channel_args);
1191
- gpr_mu_unlock(&lb_channel_mu_);
1192
- GPR_ASSERT(lb_channel_ != nullptr);
1193
- gpr_free(uri_str);
1244
+ grpc_channel_args* lb_channel_args = BuildBalancerChannelArgs(&args);
1245
+ // Create an LB channel if we don't have one yet or the balancer name has
1246
+ // changed from the last received one.
1247
+ bool create_lb_channel = lb_chand_ == nullptr;
1248
+ if (lb_chand_ != nullptr) {
1249
+ UniquePtr<char> last_balancer_name(
1250
+ grpc_channel_get_target(LatestLbChannel()->channel()));
1251
+ create_lb_channel =
1252
+ strcmp(last_balancer_name.get(), balancer_name_.get()) != 0;
1253
+ }
1254
+ if (create_lb_channel) {
1255
+ OrphanablePtr<BalancerChannelState> lb_chand =
1256
+ MakeOrphanable<BalancerChannelState>(balancer_name_.get(),
1257
+ *lb_channel_args, Ref());
1258
+ if (lb_chand_ == nullptr || !lb_chand_->HasActiveCall()) {
1259
+ GPR_ASSERT(pending_lb_chand_ == nullptr);
1260
+ // If we do not have a working LB channel yet, use the newly created one.
1261
+ lb_chand_ = std::move(lb_chand);
1262
+ } else {
1263
+ // Otherwise, wait until the new LB channel to be ready to swap it in.
1264
+ pending_lb_chand_ = std::move(lb_chand);
1265
+ }
1194
1266
  }
1195
- // Propagate updates to the LB channel (pick_first) through the fake
1196
- // resolver.
1197
- response_generator_->SetResponse(lb_channel_args);
1198
1267
  grpc_channel_args_destroy(lb_channel_args);
1199
1268
  }
1200
1269
 
1201
- void XdsLb::ParseLbConfig(grpc_json* xds_config_json) {
1270
+ void XdsLb::ParseLbConfig(Config* xds_config) {
1271
+ const grpc_json* xds_config_json = xds_config->config();
1202
1272
  const char* balancer_name = nullptr;
1203
1273
  grpc_json* child_policy = nullptr;
1204
1274
  grpc_json* fallback_policy = nullptr;
1205
- for (grpc_json* field = xds_config_json; field != nullptr;
1275
+ for (const grpc_json* field = xds_config_json; field != nullptr;
1206
1276
  field = field->next) {
1207
1277
  if (field->key == nullptr) return;
1208
1278
  if (strcmp(field->key, "balancerName") == 0) {
@@ -1218,50 +1288,47 @@ void XdsLb::ParseLbConfig(grpc_json* xds_config_json) {
1218
1288
  }
1219
1289
  }
1220
1290
  if (balancer_name == nullptr) return; // Required field.
1291
+ balancer_name_ = UniquePtr<char>(gpr_strdup(balancer_name));
1221
1292
  if (child_policy != nullptr) {
1222
- child_policy_json_string_ =
1223
- UniquePtr<char>(grpc_json_dump_to_string(child_policy, 0 /* indent */));
1293
+ child_policy_config_ =
1294
+ MakeRefCounted<Config>(child_policy, xds_config->service_config());
1224
1295
  }
1225
1296
  if (fallback_policy != nullptr) {
1226
- fallback_policy_json_string_ = UniquePtr<char>(
1227
- grpc_json_dump_to_string(fallback_policy, 0 /* indent */));
1297
+ fallback_policy_config_ =
1298
+ MakeRefCounted<Config>(fallback_policy, xds_config->service_config());
1228
1299
  }
1229
- balancer_name_ = UniquePtr<char>(gpr_strdup(balancer_name));
1230
1300
  }
1231
1301
 
1232
- void XdsLb::UpdateLocked(const grpc_channel_args& args, grpc_json* lb_config) {
1233
- ParseLbConfig(lb_config);
1302
+ void XdsLb::UpdateLocked(UpdateArgs args) {
1303
+ const bool is_initial_update = lb_chand_ == nullptr;
1304
+ ParseLbConfig(args.config.get());
1234
1305
  // TODO(juanlishen): Pass fallback policy config update after fallback policy
1235
1306
  // is added.
1236
1307
  if (balancer_name_ == nullptr) {
1237
1308
  gpr_log(GPR_ERROR, "[xdslb %p] LB config parsing fails.", this);
1309
+ return;
1238
1310
  }
1239
- ProcessChannelArgsLocked(args);
1311
+ ProcessAddressesAndChannelArgsLocked(args.addresses, *args.args);
1240
1312
  // Update the existing child policy.
1241
1313
  // Note: We have disabled fallback mode in the code, so this child policy must
1242
1314
  // have been created from a serverlist.
1243
1315
  // TODO(vpowar): Handle the fallback_address changes when we add support for
1244
1316
  // fallback in xDS.
1245
1317
  if (child_policy_ != nullptr) CreateOrUpdateChildPolicyLocked();
1246
- // Start watching the LB channel connectivity for connection, if not
1247
- // already doing so.
1248
- if (!watching_lb_channel_) {
1249
- lb_channel_connectivity_ = grpc_channel_check_connectivity_state(
1250
- lb_channel_, true /* try to connect */);
1251
- grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element(
1252
- grpc_channel_get_channel_stack(lb_channel_));
1253
- GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1254
- watching_lb_channel_ = true;
1255
- // TODO(roth): We currently track this ref manually. Once the
1256
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
1257
- // with the callback.
1258
- auto self = Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity");
1259
- self.release();
1260
- grpc_client_channel_watch_connectivity_state(
1261
- client_channel_elem,
1262
- grpc_polling_entity_create_from_pollset_set(interested_parties()),
1263
- &lb_channel_connectivity_, &lb_channel_on_connectivity_changed_,
1264
- nullptr);
1318
+ // If this is the initial update, start the fallback timer.
1319
+ if (is_initial_update) {
1320
+ if (lb_fallback_timeout_ms_ > 0 && serverlist_ == nullptr &&
1321
+ !fallback_timer_callback_pending_) {
1322
+ grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_;
1323
+ Ref(DEBUG_LOCATION, "on_fallback_timer").release(); // Held by closure
1324
+ GRPC_CLOSURE_INIT(&lb_on_fallback_, &XdsLb::OnFallbackTimerLocked, this,
1325
+ grpc_combiner_scheduler(combiner()));
1326
+ fallback_timer_callback_pending_ = true;
1327
+ grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
1328
+ // TODO(juanlishen): Monitor the connectivity state of the balancer
1329
+ // channel. If the channel reports TRANSIENT_FAILURE before the
1330
+ // fallback timeout expires, go into fallback mode early.
1331
+ }
1265
1332
  }
1266
1333
  }
1267
1334
 
@@ -1269,39 +1336,6 @@ void XdsLb::UpdateLocked(const grpc_channel_args& args, grpc_json* lb_config) {
1269
1336
  // code for balancer channel and call
1270
1337
  //
1271
1338
 
1272
- void XdsLb::StartPickingLocked() {
1273
- // Start a timer to fall back.
1274
- if (lb_fallback_timeout_ms_ > 0 && serverlist_ == nullptr &&
1275
- !fallback_timer_callback_pending_) {
1276
- grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_;
1277
- // TODO(roth): We currently track this ref manually. Once the
1278
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
1279
- // with the callback.
1280
- auto self = Ref(DEBUG_LOCATION, "on_fallback_timer");
1281
- self.release();
1282
- GRPC_CLOSURE_INIT(&lb_on_fallback_, &XdsLb::OnFallbackTimerLocked, this,
1283
- grpc_combiner_scheduler(combiner()));
1284
- fallback_timer_callback_pending_ = true;
1285
- grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
1286
- }
1287
- started_picking_ = true;
1288
- StartBalancerCallLocked();
1289
- }
1290
-
1291
- void XdsLb::StartBalancerCallLocked() {
1292
- GPR_ASSERT(lb_channel_ != nullptr);
1293
- if (shutting_down_) return;
1294
- // Init the LB call data.
1295
- GPR_ASSERT(lb_calld_ == nullptr);
1296
- lb_calld_ = MakeOrphanable<BalancerCallState>(Ref());
1297
- if (grpc_lb_xds_trace.enabled()) {
1298
- gpr_log(GPR_INFO,
1299
- "[xdslb %p] Query for backends (lb_channel: %p, lb_calld: %p)",
1300
- this, lb_channel_, lb_calld_.get());
1301
- }
1302
- lb_calld_->StartQuery();
1303
- }
1304
-
1305
1339
  void XdsLb::OnFallbackTimerLocked(void* arg, grpc_error* error) {
1306
1340
  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
1307
1341
  xdslb_policy->fallback_timer_callback_pending_ = false;
@@ -1318,383 +1352,164 @@ void XdsLb::OnFallbackTimerLocked(void* arg, grpc_error* error) {
1318
1352
  xdslb_policy->Unref(DEBUG_LOCATION, "on_fallback_timer");
1319
1353
  }
1320
1354
 
1321
- void XdsLb::StartBalancerCallRetryTimerLocked() {
1322
- grpc_millis next_try = lb_call_backoff_.NextAttemptTime();
1323
- if (grpc_lb_xds_trace.enabled()) {
1324
- gpr_log(GPR_INFO, "[xdslb %p] Connection to LB server lost...", this);
1325
- grpc_millis timeout = next_try - ExecCtx::Get()->Now();
1326
- if (timeout > 0) {
1327
- gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active in %" PRId64 "ms.",
1328
- this, timeout);
1329
- } else {
1330
- gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active immediately.", this);
1331
- }
1332
- }
1333
- // TODO(roth): We currently track this ref manually. Once the
1334
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
1335
- // with the callback.
1336
- auto self = Ref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
1337
- self.release();
1338
- GRPC_CLOSURE_INIT(&lb_on_call_retry_, &XdsLb::OnBalancerCallRetryTimerLocked,
1339
- this, grpc_combiner_scheduler(combiner()));
1340
- retry_timer_callback_pending_ = true;
1341
- grpc_timer_init(&lb_call_retry_timer_, next_try, &lb_on_call_retry_);
1342
- }
1343
-
1344
- void XdsLb::OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error) {
1345
- XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
1346
- xdslb_policy->retry_timer_callback_pending_ = false;
1347
- if (!xdslb_policy->shutting_down_ && error == GRPC_ERROR_NONE &&
1348
- xdslb_policy->lb_calld_ == nullptr) {
1349
- if (grpc_lb_xds_trace.enabled()) {
1350
- gpr_log(GPR_INFO, "[xdslb %p] Restarting call to LB server",
1351
- xdslb_policy);
1352
- }
1353
- xdslb_policy->StartBalancerCallLocked();
1354
- }
1355
- xdslb_policy->Unref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
1356
- }
1357
-
1358
- // Invoked as part of the update process. It continues watching the LB channel
1359
- // until it shuts down or becomes READY. It's invoked even if the LB channel
1360
- // stayed READY throughout the update (for example if the update is identical).
1361
- void XdsLb::OnBalancerChannelConnectivityChangedLocked(void* arg,
1362
- grpc_error* error) {
1363
- XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
1364
- if (xdslb_policy->shutting_down_) goto done;
1365
- // Re-initialize the lb_call. This should also take care of updating the
1366
- // child policy. Note that the current child policy, if any, will
1367
- // stay in effect until an update from the new lb_call is received.
1368
- switch (xdslb_policy->lb_channel_connectivity_) {
1369
- case GRPC_CHANNEL_CONNECTING:
1370
- case GRPC_CHANNEL_TRANSIENT_FAILURE: {
1371
- // Keep watching the LB channel.
1372
- grpc_channel_element* client_channel_elem =
1373
- grpc_channel_stack_last_element(
1374
- grpc_channel_get_channel_stack(xdslb_policy->lb_channel_));
1375
- GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1376
- grpc_client_channel_watch_connectivity_state(
1377
- client_channel_elem,
1378
- grpc_polling_entity_create_from_pollset_set(
1379
- xdslb_policy->interested_parties()),
1380
- &xdslb_policy->lb_channel_connectivity_,
1381
- &xdslb_policy->lb_channel_on_connectivity_changed_, nullptr);
1382
- break;
1383
- }
1384
- // The LB channel may be IDLE because it's shut down before the update.
1385
- // Restart the LB call to kick the LB channel into gear.
1386
- case GRPC_CHANNEL_IDLE:
1387
- case GRPC_CHANNEL_READY:
1388
- xdslb_policy->lb_calld_.reset();
1389
- if (xdslb_policy->started_picking_) {
1390
- if (xdslb_policy->retry_timer_callback_pending_) {
1391
- grpc_timer_cancel(&xdslb_policy->lb_call_retry_timer_);
1392
- }
1393
- xdslb_policy->lb_call_backoff_.Reset();
1394
- xdslb_policy->StartBalancerCallLocked();
1395
- }
1396
- // Fall through.
1397
- case GRPC_CHANNEL_SHUTDOWN:
1398
- done:
1399
- xdslb_policy->watching_lb_channel_ = false;
1400
- xdslb_policy->Unref(DEBUG_LOCATION,
1401
- "watch_lb_channel_connectivity_cb_shutdown");
1402
- }
1403
- }
1404
-
1405
- //
1406
- // PendingPick
1407
- //
1408
-
1409
- // Destroy function used when embedding client stats in call context.
1410
- void DestroyClientStats(void* arg) {
1411
- static_cast<XdsLbClientStats*>(arg)->Unref();
1412
- }
1413
-
1414
- void XdsLb::PendingPickCleanup(PendingPick* pp) {
1415
- // If connected_subchannel is nullptr, no pick has been made by the
1416
- // child policy (e.g., all addresses failed to connect).
1417
- if (pp->pick->connected_subchannel != nullptr) {
1418
- // Pass on client stats via context. Passes ownership of the reference.
1419
- if (pp->client_stats != nullptr) {
1420
- pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].value =
1421
- pp->client_stats.release();
1422
- pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].destroy =
1423
- DestroyClientStats;
1424
- }
1425
- } else {
1426
- pp->client_stats.reset();
1427
- }
1428
- }
1429
-
1430
- /* The \a on_complete closure passed as part of the pick requires keeping a
1431
- * reference to its associated child policy instance. We wrap this closure in
1432
- * order to unref the child policy instance upon its invocation */
1433
- void XdsLb::OnPendingPickComplete(void* arg, grpc_error* error) {
1434
- PendingPick* pp = static_cast<PendingPick*>(arg);
1435
- PendingPickCleanup(pp);
1436
- GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_REF(error));
1437
- Delete(pp);
1438
- }
1439
-
1440
- XdsLb::PendingPick* XdsLb::PendingPickCreate(PickState* pick) {
1441
- PendingPick* pp = New<PendingPick>();
1442
- pp->xdslb_policy = this;
1443
- pp->pick = pick;
1444
- GRPC_CLOSURE_INIT(&pp->on_complete, &XdsLb::OnPendingPickComplete, pp,
1445
- grpc_schedule_on_exec_ctx);
1446
- pp->original_on_complete = pick->on_complete;
1447
- pick->on_complete = &pp->on_complete;
1448
- return pp;
1449
- }
1450
-
1451
- void XdsLb::AddPendingPick(PendingPick* pp) {
1452
- pp->next = pending_picks_;
1453
- pending_picks_ = pp;
1454
- }
1455
-
1456
1355
  //
1457
1356
  // code for interacting with the child policy
1458
1357
  //
1459
1358
 
1460
- // Performs a pick over \a child_policy_. Given that a pick can return
1461
- // immediately (ignoring its completion callback), we need to perform the
1462
- // cleanups this callback would otherwise be responsible for.
1463
- // If \a force_async is true, then we will manually schedule the
1464
- // completion callback even if the pick is available immediately.
1465
- bool XdsLb::PickFromChildPolicyLocked(bool force_async, PendingPick* pp,
1466
- grpc_error** error) {
1467
- // Set client_stats.
1468
- if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
1469
- pp->client_stats = lb_calld_->client_stats()->Ref();
1470
- }
1471
- // Pick via the child policy.
1472
- bool pick_done = child_policy_->PickLocked(pp->pick, error);
1473
- if (pick_done) {
1474
- PendingPickCleanup(pp);
1475
- if (force_async) {
1476
- GRPC_CLOSURE_SCHED(pp->original_on_complete, *error);
1477
- *error = GRPC_ERROR_NONE;
1478
- pick_done = false;
1479
- }
1480
- Delete(pp);
1481
- }
1482
- // else, the pending pick will be registered and taken care of by the
1483
- // pending pick list inside the child policy. Eventually,
1484
- // OnPendingPickComplete() will be called, which will (among other
1485
- // things) add the LB token to the call's initial metadata.
1486
- return pick_done;
1487
- }
1488
-
1489
- void XdsLb::CreateChildPolicyLocked(const char* name, Args args) {
1490
- GPR_ASSERT(child_policy_ == nullptr);
1491
- child_policy_ = LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
1492
- name, std::move(args));
1493
- if (GPR_UNLIKELY(child_policy_ == nullptr)) {
1494
- gpr_log(GPR_ERROR, "[xdslb %p] Failure creating a child policy", this);
1495
- return;
1496
- }
1497
- // TODO(roth): We currently track this ref manually. Once the new
1498
- // ClosureRef API is done, pass the RefCountedPtr<> along with the closure.
1499
- auto self = Ref(DEBUG_LOCATION, "on_child_reresolution_requested");
1500
- self.release();
1501
- child_policy_->SetReresolutionClosureLocked(&on_child_request_reresolution_);
1502
- grpc_error* child_state_error = nullptr;
1503
- child_connectivity_state_ =
1504
- child_policy_->CheckConnectivityLocked(&child_state_error);
1505
- // Connectivity state is a function of the child policy updated/created.
1506
- UpdateConnectivityStateFromChildPolicyLocked(child_state_error);
1507
- // Add the xDS's interested_parties pollset_set to that of the newly created
1508
- // child policy. This will make the child policy progress upon activity on
1509
- // xDS LB, which in turn is tied to the application's call.
1510
- grpc_pollset_set_add_pollset_set(child_policy_->interested_parties(),
1511
- interested_parties());
1512
- // Subscribe to changes to the connectivity of the new child policy.
1513
- // TODO(roth): We currently track this ref manually. Once the new
1514
- // ClosureRef API is done, pass the RefCountedPtr<> along with the closure.
1515
- self = Ref(DEBUG_LOCATION, "on_child_connectivity_changed");
1516
- self.release();
1517
- child_policy_->NotifyOnStateChangeLocked(&child_connectivity_state_,
1518
- &on_child_connectivity_changed_);
1519
- child_policy_->ExitIdleLocked();
1520
- // Send pending picks to child policy.
1521
- PendingPick* pp;
1522
- while ((pp = pending_picks_)) {
1523
- pending_picks_ = pp->next;
1524
- if (grpc_lb_xds_trace.enabled()) {
1525
- gpr_log(
1526
- GPR_INFO,
1527
- "[xdslb %p] Pending pick about to (async) PICK from child policy %p",
1528
- this, child_policy_.get());
1529
- }
1530
- grpc_error* error = GRPC_ERROR_NONE;
1531
- PickFromChildPolicyLocked(true /* force_async */, pp, &error);
1532
- }
1533
- }
1534
-
1535
1359
  grpc_channel_args* XdsLb::CreateChildPolicyArgsLocked() {
1536
- bool is_backend_from_grpclb_load_balancer = false;
1537
- // This should never be invoked if we do not have serverlist_, as fallback
1538
- // mode is disabled for xDS plugin.
1539
- GPR_ASSERT(serverlist_ != nullptr);
1540
- GPR_ASSERT(serverlist_->num_servers > 0);
1541
- UniquePtr<ServerAddressList> addresses = ProcessServerlist(serverlist_);
1542
- GPR_ASSERT(addresses != nullptr);
1543
- is_backend_from_grpclb_load_balancer = true;
1544
- // Replace the server address list in the channel args that we pass down to
1545
- // the subchannel.
1546
- static const char* keys_to_remove[] = {GRPC_ARG_SERVER_ADDRESS_LIST};
1547
1360
  const grpc_arg args_to_add[] = {
1548
- CreateServerAddressListChannelArg(addresses.get()),
1549
1361
  // A channel arg indicating if the target is a backend inferred from a
1550
1362
  // grpclb load balancer.
1551
1363
  grpc_channel_arg_integer_create(
1552
1364
  const_cast<char*>(GRPC_ARG_ADDRESS_IS_BACKEND_FROM_XDS_LOAD_BALANCER),
1553
- is_backend_from_grpclb_load_balancer),
1365
+ 1),
1366
+ // Inhibit client-side health checking, since the balancer does
1367
+ // this for us.
1368
+ grpc_channel_arg_integer_create(
1369
+ const_cast<char*>(GRPC_ARG_INHIBIT_HEALTH_CHECKING), 1),
1554
1370
  };
1555
- grpc_channel_args* args = grpc_channel_args_copy_and_add_and_remove(
1556
- args_, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), args_to_add,
1557
- GPR_ARRAY_SIZE(args_to_add));
1558
- return args;
1371
+ return grpc_channel_args_copy_and_add(args_, args_to_add,
1372
+ GPR_ARRAY_SIZE(args_to_add));
1373
+ }
1374
+
1375
+ OrphanablePtr<LoadBalancingPolicy> XdsLb::CreateChildPolicyLocked(
1376
+ const char* name, const grpc_channel_args* args) {
1377
+ Helper* helper = New<Helper>(Ref());
1378
+ LoadBalancingPolicy::Args lb_policy_args;
1379
+ lb_policy_args.combiner = combiner();
1380
+ lb_policy_args.args = args;
1381
+ lb_policy_args.channel_control_helper =
1382
+ UniquePtr<ChannelControlHelper>(helper);
1383
+ OrphanablePtr<LoadBalancingPolicy> lb_policy =
1384
+ LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
1385
+ name, std::move(lb_policy_args));
1386
+ if (GPR_UNLIKELY(lb_policy == nullptr)) {
1387
+ gpr_log(GPR_ERROR, "[xdslb %p] Failure creating child policy %s", this,
1388
+ name);
1389
+ return nullptr;
1390
+ }
1391
+ helper->set_child(lb_policy.get());
1392
+ if (grpc_lb_xds_trace.enabled()) {
1393
+ gpr_log(GPR_INFO, "[xdslb %p] Created new child policy %s (%p)", this, name,
1394
+ lb_policy.get());
1395
+ }
1396
+ // Add the xDS's interested_parties pollset_set to that of the newly created
1397
+ // child policy. This will make the child policy progress upon activity on xDS
1398
+ // LB, which in turn is tied to the application's call.
1399
+ grpc_pollset_set_add_pollset_set(lb_policy->interested_parties(),
1400
+ interested_parties());
1401
+ return lb_policy;
1559
1402
  }
1560
1403
 
1561
1404
  void XdsLb::CreateOrUpdateChildPolicyLocked() {
1562
1405
  if (shutting_down_) return;
1563
- grpc_channel_args* args = CreateChildPolicyArgsLocked();
1564
- GPR_ASSERT(args != nullptr);
1565
- const char* child_policy_name = nullptr;
1566
- grpc_json* child_policy_config = nullptr;
1567
- grpc_json* child_policy_json =
1568
- grpc_json_parse_string(child_policy_json_string_.get());
1406
+ // This should never be invoked if we do not have serverlist_, as fallback
1407
+ // mode is disabled for xDS plugin.
1408
+ // TODO(juanlishen): Change this as part of implementing fallback mode.
1409
+ GPR_ASSERT(serverlist_ != nullptr);
1410
+ GPR_ASSERT(serverlist_->num_servers > 0);
1411
+ // Construct update args.
1412
+ UpdateArgs update_args;
1413
+ update_args.addresses = ProcessServerlist(serverlist_);
1414
+ update_args.config = child_policy_config_;
1415
+ update_args.args = CreateChildPolicyArgsLocked();
1416
+ // If the child policy name changes, we need to create a new child
1417
+ // policy. When this happens, we leave child_policy_ as-is and store
1418
+ // the new child policy in pending_child_policy_. Once the new child
1419
+ // policy transitions into state READY, we swap it into child_policy_,
1420
+ // replacing the original child policy. So pending_child_policy_ is
1421
+ // non-null only between when we apply an update that changes the child
1422
+ // policy name and when the new child reports state READY.
1423
+ //
1424
+ // Updates can arrive at any point during this transition. We always
1425
+ // apply updates relative to the most recently created child policy,
1426
+ // even if the most recent one is still in pending_child_policy_. This
1427
+ // is true both when applying the updates to an existing child policy
1428
+ // and when determining whether we need to create a new policy.
1429
+ //
1430
+ // As a result of this, there are several cases to consider here:
1431
+ //
1432
+ // 1. We have no existing child policy (i.e., we have started up but
1433
+ // have not yet received a serverlist from the balancer or gone
1434
+ // into fallback mode; in this case, both child_policy_ and
1435
+ // pending_child_policy_ are null). In this case, we create a
1436
+ // new child policy and store it in child_policy_.
1437
+ //
1438
+ // 2. We have an existing child policy and have no pending child policy
1439
+ // from a previous update (i.e., either there has not been a
1440
+ // previous update that changed the policy name, or we have already
1441
+ // finished swapping in the new policy; in this case, child_policy_
1442
+ // is non-null but pending_child_policy_ is null). In this case:
1443
+ // a. If child_policy_->name() equals child_policy_name, then we
1444
+ // update the existing child policy.
1445
+ // b. If child_policy_->name() does not equal child_policy_name,
1446
+ // we create a new policy. The policy will be stored in
1447
+ // pending_child_policy_ and will later be swapped into
1448
+ // child_policy_ by the helper when the new child transitions
1449
+ // into state READY.
1450
+ //
1451
+ // 3. We have an existing child policy and have a pending child policy
1452
+ // from a previous update (i.e., a previous update set
1453
+ // pending_child_policy_ as per case 2b above and that policy has
1454
+ // not yet transitioned into state READY and been swapped into
1455
+ // child_policy_; in this case, both child_policy_ and
1456
+ // pending_child_policy_ are non-null). In this case:
1457
+ // a. If pending_child_policy_->name() equals child_policy_name,
1458
+ // then we update the existing pending child policy.
1459
+ // b. If pending_child_policy->name() does not equal
1460
+ // child_policy_name, then we create a new policy. The new
1461
+ // policy is stored in pending_child_policy_ (replacing the one
1462
+ // that was there before, which will be immediately shut down)
1463
+ // and will later be swapped into child_policy_ by the helper
1464
+ // when the new child transitions into state READY.
1569
1465
  // TODO(juanlishen): If the child policy is not configured via service config,
1570
1466
  // use whatever algorithm is specified by the balancer.
1571
- if (child_policy_json != nullptr) {
1572
- child_policy_name = child_policy_json->key;
1573
- child_policy_config = child_policy_json->child;
1574
- } else {
1467
+ const char* child_policy_name = child_policy_config_ == nullptr
1468
+ ? "round_robin"
1469
+ : child_policy_config_->name();
1470
+ const bool create_policy =
1471
+ // case 1
1472
+ child_policy_ == nullptr ||
1473
+ // case 2b
1474
+ (pending_child_policy_ == nullptr &&
1475
+ strcmp(child_policy_->name(), child_policy_name) != 0) ||
1476
+ // case 3b
1477
+ (pending_child_policy_ != nullptr &&
1478
+ strcmp(pending_child_policy_->name(), child_policy_name) != 0);
1479
+ LoadBalancingPolicy* policy_to_update = nullptr;
1480
+ if (create_policy) {
1481
+ // Cases 1, 2b, and 3b: create a new child policy.
1482
+ // If child_policy_ is null, we set it (case 1), else we set
1483
+ // pending_child_policy_ (cases 2b and 3b).
1575
1484
  if (grpc_lb_xds_trace.enabled()) {
1576
- gpr_log(GPR_INFO, "[xdslb %p] No valid child policy LB config", this);
1485
+ gpr_log(GPR_INFO, "[xdslb %p] Creating new %schild policy %s", this,
1486
+ child_policy_ == nullptr ? "" : "pending ", child_policy_name);
1577
1487
  }
1578
- child_policy_name = "round_robin";
1579
- }
1580
- // TODO(juanlishen): Switch policy according to child_policy_config->key.
1581
- if (child_policy_ != nullptr) {
1582
- if (grpc_lb_xds_trace.enabled()) {
1583
- gpr_log(GPR_INFO, "[xdslb %p] Updating the child policy %p", this,
1584
- child_policy_.get());
1488
+ auto new_policy =
1489
+ CreateChildPolicyLocked(child_policy_name, update_args.args);
1490
+ auto& lb_policy =
1491
+ child_policy_ == nullptr ? child_policy_ : pending_child_policy_;
1492
+ {
1493
+ MutexLock lock(&child_policy_mu_);
1494
+ lb_policy = std::move(new_policy);
1585
1495
  }
1586
- child_policy_->UpdateLocked(*args, child_policy_config);
1496
+ policy_to_update = lb_policy.get();
1587
1497
  } else {
1588
- LoadBalancingPolicy::Args lb_policy_args;
1589
- lb_policy_args.combiner = combiner();
1590
- lb_policy_args.client_channel_factory = client_channel_factory();
1591
- lb_policy_args.subchannel_pool = subchannel_pool()->Ref();
1592
- lb_policy_args.args = args;
1593
- lb_policy_args.lb_config = child_policy_config;
1594
- CreateChildPolicyLocked(child_policy_name, std::move(lb_policy_args));
1595
- if (grpc_lb_xds_trace.enabled()) {
1596
- gpr_log(GPR_INFO, "[xdslb %p] Created a new child policy %p", this,
1597
- child_policy_.get());
1598
- }
1599
- }
1600
- grpc_channel_args_destroy(args);
1601
- grpc_json_destroy(child_policy_json);
1602
- }
1603
-
1604
- void XdsLb::OnChildPolicyRequestReresolutionLocked(void* arg,
1605
- grpc_error* error) {
1606
- XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
1607
- if (xdslb_policy->shutting_down_ || error != GRPC_ERROR_NONE) {
1608
- xdslb_policy->Unref(DEBUG_LOCATION, "on_child_reresolution_requested");
1609
- return;
1610
- }
1611
- if (grpc_lb_xds_trace.enabled()) {
1612
- gpr_log(GPR_INFO,
1613
- "[xdslb %p] Re-resolution requested from child policy "
1614
- "(%p).",
1615
- xdslb_policy, xdslb_policy->child_policy_.get());
1616
- }
1617
- // If we are talking to a balancer, we expect to get updated addresses form
1618
- // the balancer, so we can ignore the re-resolution request from the child
1619
- // policy.
1620
- // Otherwise, handle the re-resolution request using the xds policy's
1621
- // original re-resolution closure.
1622
- if (xdslb_policy->lb_calld_ == nullptr ||
1623
- !xdslb_policy->lb_calld_->seen_initial_response()) {
1624
- xdslb_policy->TryReresolutionLocked(&grpc_lb_xds_trace, GRPC_ERROR_NONE);
1625
- }
1626
- // Give back the wrapper closure to the child policy.
1627
- xdslb_policy->child_policy_->SetReresolutionClosureLocked(
1628
- &xdslb_policy->on_child_request_reresolution_);
1629
- }
1630
-
1631
- void XdsLb::UpdateConnectivityStateFromChildPolicyLocked(
1632
- grpc_error* child_state_error) {
1633
- const grpc_connectivity_state curr_glb_state =
1634
- grpc_connectivity_state_check(&state_tracker_);
1635
- /* The new connectivity status is a function of the previous one and the new
1636
- * input coming from the status of the child policy.
1637
- *
1638
- * current state (xds's)
1639
- * |
1640
- * v || I | C | R | TF | SD | <- new state (child policy's)
1641
- * ===++====+=====+=====+======+======+
1642
- * I || I | C | R | [I] | [I] |
1643
- * ---++----+-----+-----+------+------+
1644
- * C || I | C | R | [C] | [C] |
1645
- * ---++----+-----+-----+------+------+
1646
- * R || I | C | R | [R] | [R] |
1647
- * ---++----+-----+-----+------+------+
1648
- * TF || I | C | R | [TF] | [TF] |
1649
- * ---++----+-----+-----+------+------+
1650
- * SD || NA | NA | NA | NA | NA | (*)
1651
- * ---++----+-----+-----+------+------+
1652
- *
1653
- * A [STATE] indicates that the old child policy is kept. In those cases,
1654
- * STATE is the current state of xds, which is left untouched.
1655
- *
1656
- * In summary, if the new state is TRANSIENT_FAILURE or SHUTDOWN, stick to
1657
- * the previous child policy instance.
1658
- *
1659
- * Note that the status is never updated to SHUTDOWN as a result of calling
1660
- * this function. Only glb_shutdown() has the power to set that state.
1661
- *
1662
- * (*) This function mustn't be called during shutting down. */
1663
- GPR_ASSERT(curr_glb_state != GRPC_CHANNEL_SHUTDOWN);
1664
- switch (child_connectivity_state_) {
1665
- case GRPC_CHANNEL_TRANSIENT_FAILURE:
1666
- case GRPC_CHANNEL_SHUTDOWN:
1667
- GPR_ASSERT(child_state_error != GRPC_ERROR_NONE);
1668
- break;
1669
- case GRPC_CHANNEL_IDLE:
1670
- case GRPC_CHANNEL_CONNECTING:
1671
- case GRPC_CHANNEL_READY:
1672
- GPR_ASSERT(child_state_error == GRPC_ERROR_NONE);
1498
+ // Cases 2a and 3a: update an existing policy.
1499
+ // If we have a pending child policy, send the update to the pending
1500
+ // policy (case 3a), else send it to the current policy (case 2a).
1501
+ policy_to_update = pending_child_policy_ != nullptr
1502
+ ? pending_child_policy_.get()
1503
+ : child_policy_.get();
1673
1504
  }
1505
+ GPR_ASSERT(policy_to_update != nullptr);
1506
+ // Update the policy.
1674
1507
  if (grpc_lb_xds_trace.enabled()) {
1675
- gpr_log(GPR_INFO,
1676
- "[xdslb %p] Setting xds's state to %s from child policy %p state.",
1677
- this, grpc_connectivity_state_name(child_connectivity_state_),
1678
- child_policy_.get());
1679
- }
1680
- grpc_connectivity_state_set(&state_tracker_, child_connectivity_state_,
1681
- child_state_error,
1682
- "update_lb_connectivity_status_locked");
1683
- }
1684
-
1685
- void XdsLb::OnChildPolicyConnectivityChangedLocked(void* arg,
1686
- grpc_error* error) {
1687
- XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
1688
- if (xdslb_policy->shutting_down_) {
1689
- xdslb_policy->Unref(DEBUG_LOCATION, "on_child_connectivity_changed");
1690
- return;
1508
+ gpr_log(GPR_INFO, "[xdslb %p] Updating %schild policy %p", this,
1509
+ policy_to_update == pending_child_policy_.get() ? "pending " : "",
1510
+ policy_to_update);
1691
1511
  }
1692
- xdslb_policy->UpdateConnectivityStateFromChildPolicyLocked(
1693
- GRPC_ERROR_REF(error));
1694
- // Resubscribe. Reuse the "on_child_connectivity_changed" ref.
1695
- xdslb_policy->child_policy_->NotifyOnStateChangeLocked(
1696
- &xdslb_policy->child_connectivity_state_,
1697
- &xdslb_policy->on_child_connectivity_changed_);
1512
+ policy_to_update->UpdateLocked(std::move(update_args));
1698
1513
  }
1699
1514
 
1700
1515
  //
@@ -1705,18 +1520,6 @@ class XdsFactory : public LoadBalancingPolicyFactory {
1705
1520
  public:
1706
1521
  OrphanablePtr<LoadBalancingPolicy> CreateLoadBalancingPolicy(
1707
1522
  LoadBalancingPolicy::Args args) const override {
1708
- /* Count the number of gRPC-LB addresses. There must be at least one. */
1709
- const ServerAddressList* addresses =
1710
- FindServerAddressListChannelArg(args.args);
1711
- if (addresses == nullptr) return nullptr;
1712
- bool found_balancer_address = false;
1713
- for (size_t i = 0; i < addresses->size(); ++i) {
1714
- if ((*addresses)[i].IsBalancer()) {
1715
- found_balancer_address = true;
1716
- break;
1717
- }
1718
- }
1719
- if (!found_balancer_address) return nullptr;
1720
1523
  return OrphanablePtr<LoadBalancingPolicy>(New<XdsLb>(std::move(args)));
1721
1524
  }
1722
1525