grpc 1.19.0 → 1.20.0.pre1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +4131 -7903
  3. data/include/grpc/grpc.h +11 -6
  4. data/include/grpc/grpc_security.h +51 -9
  5. data/include/grpc/impl/codegen/byte_buffer.h +13 -0
  6. data/include/grpc/impl/codegen/grpc_types.h +4 -0
  7. data/include/grpc/impl/codegen/port_platform.h +37 -6
  8. data/include/grpc/impl/codegen/sync_posix.h +18 -0
  9. data/src/core/ext/filters/client_channel/client_channel.cc +560 -236
  10. data/src/core/ext/filters/client_channel/client_channel_channelz.h +2 -2
  11. data/src/core/ext/filters/client_channel/client_channel_factory.cc +22 -34
  12. data/src/core/ext/filters/client_channel/client_channel_factory.h +19 -38
  13. data/src/core/ext/filters/client_channel/global_subchannel_pool.cc +7 -4
  14. data/src/core/ext/filters/client_channel/http_connect_handshaker.cc +2 -2
  15. data/src/core/ext/filters/client_channel/lb_policy.cc +105 -28
  16. data/src/core/ext/filters/client_channel/lb_policy.h +259 -141
  17. data/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc +29 -32
  18. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +789 -803
  19. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.h +3 -1
  20. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel_secure.cc +2 -6
  21. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc +1 -1
  22. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h +7 -1
  23. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +8 -8
  24. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +2 -2
  25. data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +127 -219
  26. data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +103 -282
  27. data/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +4 -10
  28. data/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc +709 -906
  29. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc +0 -43
  30. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc +8 -8
  31. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h +2 -2
  32. data/src/core/ext/filters/client_channel/lb_policy_factory.h +1 -6
  33. data/src/core/ext/filters/client_channel/resolver.cc +54 -1
  34. data/src/core/ext/filters/client_channel/resolver.h +51 -22
  35. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +34 -86
  36. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +29 -41
  37. data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +32 -78
  38. data/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc +109 -72
  39. data/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h +13 -8
  40. data/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc +28 -63
  41. data/src/core/ext/filters/client_channel/resolver_factory.h +3 -1
  42. data/src/core/ext/filters/client_channel/resolver_registry.cc +5 -2
  43. data/src/core/ext/filters/client_channel/resolver_registry.h +5 -4
  44. data/src/core/ext/filters/client_channel/resolver_result_parsing.cc +69 -49
  45. data/src/core/ext/filters/client_channel/resolver_result_parsing.h +11 -8
  46. data/src/core/ext/filters/client_channel/resolving_lb_policy.cc +568 -0
  47. data/src/core/ext/filters/client_channel/resolving_lb_policy.h +141 -0
  48. data/src/core/ext/filters/client_channel/server_address.cc +0 -48
  49. data/src/core/ext/filters/client_channel/server_address.h +0 -10
  50. data/src/core/{lib/transport → ext/filters/client_channel}/service_config.cc +10 -5
  51. data/src/core/{lib/transport → ext/filters/client_channel}/service_config.h +16 -12
  52. data/src/core/ext/filters/client_channel/subchannel.cc +11 -16
  53. data/src/core/ext/filters/client_channel/subchannel.h +3 -0
  54. data/src/core/ext/filters/max_age/max_age_filter.cc +4 -1
  55. data/src/core/ext/filters/message_size/message_size_filter.cc +2 -2
  56. data/src/core/ext/transport/chttp2/client/insecure/channel_create.cc +45 -45
  57. data/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc +133 -134
  58. data/src/core/ext/transport/chttp2/transport/bin_decoder.cc +4 -4
  59. data/src/core/ext/transport/chttp2/transport/bin_decoder.h +4 -4
  60. data/src/core/ext/transport/chttp2/transport/bin_encoder.cc +7 -6
  61. data/src/core/ext/transport/chttp2/transport/bin_encoder.h +4 -3
  62. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +37 -29
  63. data/src/core/ext/transport/chttp2/transport/flow_control.cc +1 -1
  64. data/src/core/ext/transport/chttp2/transport/frame_data.cc +2 -1
  65. data/src/core/ext/transport/chttp2/transport/frame_data.h +1 -1
  66. data/src/core/ext/transport/chttp2/transport/frame_goaway.cc +6 -5
  67. data/src/core/ext/transport/chttp2/transport/frame_goaway.h +3 -2
  68. data/src/core/ext/transport/chttp2/transport/frame_ping.cc +5 -4
  69. data/src/core/ext/transport/chttp2/transport/frame_ping.h +1 -1
  70. data/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc +5 -4
  71. data/src/core/ext/transport/chttp2/transport/frame_rst_stream.h +2 -1
  72. data/src/core/ext/transport/chttp2/transport/frame_settings.cc +2 -1
  73. data/src/core/ext/transport/chttp2/transport/frame_settings.h +2 -1
  74. data/src/core/ext/transport/chttp2/transport/frame_window_update.cc +4 -4
  75. data/src/core/ext/transport/chttp2/transport/frame_window_update.h +1 -1
  76. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +7 -6
  77. data/src/core/ext/transport/chttp2/transport/hpack_parser.h +3 -2
  78. data/src/core/ext/transport/chttp2/transport/incoming_metadata.cc +9 -5
  79. data/src/core/ext/transport/chttp2/transport/incoming_metadata.h +6 -1
  80. data/src/core/ext/transport/chttp2/transport/internal.h +5 -4
  81. data/src/core/ext/transport/chttp2/transport/parsing.cc +9 -9
  82. data/src/core/ext/transport/chttp2/transport/writing.cc +1 -1
  83. data/src/core/ext/transport/inproc/inproc_transport.cc +8 -0
  84. data/src/core/lib/channel/channel_args.cc +2 -0
  85. data/src/core/lib/channel/channel_args.h +3 -0
  86. data/src/core/lib/channel/channel_stack.h +1 -1
  87. data/src/core/lib/channel/channel_trace.cc +4 -4
  88. data/src/core/lib/channel/channel_trace.h +4 -4
  89. data/src/core/lib/channel/channelz.cc +32 -19
  90. data/src/core/lib/channel/channelz.h +4 -4
  91. data/src/core/lib/channel/channelz_registry.cc +1 -1
  92. data/src/core/lib/channel/context.h +0 -3
  93. data/src/core/lib/channel/handshaker_registry.cc +7 -3
  94. data/src/core/lib/compression/algorithm_metadata.h +3 -3
  95. data/src/core/lib/compression/compression.cc +1 -1
  96. data/src/core/lib/compression/compression_internal.cc +2 -2
  97. data/src/core/lib/compression/stream_compression_gzip.cc +1 -1
  98. data/src/core/lib/debug/trace.h +2 -1
  99. data/src/core/lib/gpr/cpu_posix.cc +5 -3
  100. data/src/core/lib/gpr/sync_posix.cc +65 -4
  101. data/src/core/lib/gprpp/atomic.h +75 -5
  102. data/src/core/lib/gprpp/fork.cc +0 -2
  103. data/src/core/lib/gprpp/orphanable.h +3 -2
  104. data/src/core/lib/gprpp/ref_counted.h +9 -11
  105. data/src/core/lib/gprpp/thd.h +42 -7
  106. data/src/core/lib/gprpp/thd_posix.cc +31 -13
  107. data/src/core/lib/gprpp/thd_windows.cc +47 -34
  108. data/src/core/lib/http/httpcli.cc +3 -2
  109. data/src/core/lib/http/httpcli_security_connector.cc +0 -1
  110. data/src/core/lib/http/parser.cc +2 -1
  111. data/src/core/lib/http/parser.h +2 -1
  112. data/src/core/lib/iomgr/buffer_list.h +1 -1
  113. data/src/core/lib/iomgr/endpoint.cc +2 -2
  114. data/src/core/lib/iomgr/endpoint.h +3 -2
  115. data/src/core/lib/iomgr/error.cc +9 -9
  116. data/src/core/lib/iomgr/error.h +4 -3
  117. data/src/core/lib/iomgr/ev_epoll1_linux.cc +6 -0
  118. data/src/core/lib/iomgr/ev_epollex_linux.cc +14 -9
  119. data/src/core/lib/iomgr/ev_poll_posix.cc +7 -481
  120. data/src/core/lib/iomgr/ev_posix.cc +7 -3
  121. data/src/core/lib/iomgr/ev_posix.h +8 -0
  122. data/src/core/lib/iomgr/executor.cc +13 -0
  123. data/src/core/lib/iomgr/executor.h +2 -1
  124. data/src/core/lib/iomgr/internal_errqueue.cc +2 -4
  125. data/src/core/lib/iomgr/iomgr.cc +5 -0
  126. data/src/core/lib/iomgr/iomgr.h +7 -0
  127. data/src/core/lib/iomgr/iomgr_custom.cc +9 -2
  128. data/src/core/lib/iomgr/iomgr_internal.cc +6 -0
  129. data/src/core/lib/iomgr/iomgr_internal.h +9 -1
  130. data/src/core/lib/iomgr/iomgr_posix.cc +10 -2
  131. data/src/core/lib/iomgr/iomgr_windows.cc +10 -2
  132. data/src/core/lib/iomgr/port.h +19 -0
  133. data/src/core/lib/iomgr/tcp_client_windows.cc +6 -4
  134. data/src/core/lib/iomgr/tcp_custom.cc +1 -1
  135. data/src/core/lib/iomgr/tcp_posix.cc +158 -54
  136. data/src/core/lib/iomgr/tcp_windows.cc +1 -1
  137. data/src/core/lib/iomgr/wakeup_fd_posix.cc +1 -19
  138. data/src/core/lib/security/credentials/jwt/jwt_verifier.cc +10 -6
  139. data/src/core/lib/security/credentials/jwt/jwt_verifier.h +2 -1
  140. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.h +3 -6
  141. data/src/core/lib/security/credentials/tls/spiffe_credentials.cc +129 -0
  142. data/src/core/lib/security/credentials/tls/spiffe_credentials.h +62 -0
  143. data/src/core/lib/security/security_connector/fake/fake_security_connector.cc +7 -2
  144. data/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc +28 -17
  145. data/src/core/lib/security/security_connector/ssl_utils.cc +134 -0
  146. data/src/core/lib/security/security_connector/ssl_utils.h +32 -0
  147. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.cc +426 -0
  148. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.h +122 -0
  149. data/src/core/lib/security/transport/auth_filters.h +2 -2
  150. data/src/core/lib/security/transport/client_auth_filter.cc +35 -39
  151. data/src/core/lib/security/transport/secure_endpoint.cc +2 -2
  152. data/src/core/lib/security/transport/security_handshaker.cc +4 -3
  153. data/src/core/lib/slice/percent_encoding.cc +3 -3
  154. data/src/core/lib/slice/percent_encoding.h +3 -3
  155. data/src/core/lib/slice/slice.cc +27 -30
  156. data/src/core/lib/slice/slice_hash_table.h +2 -2
  157. data/src/core/lib/slice/slice_intern.cc +1 -1
  158. data/src/core/lib/slice/slice_internal.h +14 -3
  159. data/src/core/lib/slice/slice_weak_hash_table.h +4 -4
  160. data/src/core/lib/surface/byte_buffer_reader.cc +17 -0
  161. data/src/core/lib/surface/call.cc +8 -3
  162. data/src/core/lib/surface/completion_queue.cc +134 -148
  163. data/src/core/lib/surface/init.cc +78 -30
  164. data/src/core/lib/surface/init.h +1 -0
  165. data/src/core/lib/surface/lame_client.cc +4 -6
  166. data/src/core/lib/surface/version.cc +1 -1
  167. data/src/core/lib/transport/metadata.cc +66 -33
  168. data/src/core/lib/transport/metadata_batch.cc +1 -1
  169. data/src/core/lib/transport/metadata_batch.h +1 -1
  170. data/src/core/lib/transport/timeout_encoding.cc +1 -1
  171. data/src/core/lib/transport/timeout_encoding.h +1 -1
  172. data/src/core/lib/transport/transport.h +4 -3
  173. data/src/core/tsi/alts/handshaker/alts_handshaker_client.cc +3 -3
  174. data/src/core/tsi/alts/handshaker/alts_handshaker_client.h +1 -1
  175. data/src/core/tsi/alts/handshaker/transport_security_common_api.cc +4 -3
  176. data/src/core/tsi/alts/handshaker/transport_security_common_api.h +1 -1
  177. data/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc +1 -1
  178. data/src/core/tsi/ssl_transport_security.cc +1 -5
  179. data/src/core/tsi/ssl_transport_security.h +24 -4
  180. data/src/ruby/bin/math_pb.rb +18 -16
  181. data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +4 -0
  182. data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +6 -0
  183. data/src/ruby/lib/grpc/generic/rpc_server.rb +1 -1
  184. data/src/ruby/lib/grpc/version.rb +1 -1
  185. data/src/ruby/pb/README.md +1 -1
  186. data/src/ruby/pb/grpc/health/v1/health_pb.rb +13 -10
  187. data/src/ruby/pb/grpc/health/v1/health_services_pb.rb +18 -0
  188. data/src/ruby/pb/src/proto/grpc/testing/empty_pb.rb +3 -1
  189. data/src/ruby/pb/src/proto/grpc/testing/messages_pb.rb +58 -56
  190. data/src/ruby/pb/src/proto/grpc/testing/test_pb.rb +2 -0
  191. data/third_party/cares/cares/ares.h +12 -0
  192. data/third_party/cares/cares/ares_create_query.c +5 -1
  193. data/third_party/cares/cares/ares_data.c +74 -73
  194. data/third_party/cares/cares/ares_destroy.c +6 -1
  195. data/third_party/cares/cares/ares_gethostbyaddr.c +5 -5
  196. data/third_party/cares/cares/ares_gethostbyname.c +15 -4
  197. data/third_party/cares/cares/ares_getnameinfo.c +11 -0
  198. data/third_party/cares/cares/ares_init.c +274 -173
  199. data/third_party/cares/cares/ares_library_init.c +21 -3
  200. data/third_party/cares/cares/ares_options.c +6 -2
  201. data/third_party/cares/cares/ares_parse_naptr_reply.c +7 -6
  202. data/third_party/cares/cares/ares_parse_ptr_reply.c +4 -2
  203. data/third_party/cares/cares/ares_platform.c +7 -0
  204. data/third_party/cares/cares/ares_private.h +19 -11
  205. data/third_party/cares/cares/ares_process.c +27 -2
  206. data/third_party/cares/cares/ares_rules.h +1 -1
  207. data/third_party/cares/cares/ares_search.c +7 -0
  208. data/third_party/cares/cares/ares_send.c +6 -0
  209. data/third_party/cares/cares/ares_strsplit.c +174 -0
  210. data/third_party/cares/cares/ares_strsplit.h +43 -0
  211. data/third_party/cares/cares/ares_version.h +4 -4
  212. data/third_party/cares/cares/config-win32.h +1 -1
  213. data/third_party/cares/cares/inet_ntop.c +2 -3
  214. data/third_party/cares/config_darwin/ares_config.h +3 -0
  215. data/third_party/cares/config_freebsd/ares_config.h +3 -0
  216. data/third_party/cares/config_linux/ares_config.h +3 -0
  217. data/third_party/cares/config_openbsd/ares_config.h +3 -0
  218. metadata +39 -37
  219. data/src/core/ext/filters/client_channel/request_routing.cc +0 -946
  220. data/src/core/ext/filters/client_channel/request_routing.h +0 -181
  221. data/src/core/lib/gprpp/atomic_with_atm.h +0 -57
  222. data/src/core/lib/gprpp/atomic_with_std.h +0 -35
  223. data/src/core/lib/iomgr/wakeup_fd_cv.cc +0 -107
  224. data/src/core/lib/iomgr/wakeup_fd_cv.h +0 -69
@@ -37,17 +37,6 @@ static void destroy_channel_elem(grpc_channel_element* elem) {}
37
37
  namespace {
38
38
 
39
39
  struct call_data {
40
- call_data(const grpc_call_element_args& args) {
41
- if (args.context[GRPC_GRPCLB_CLIENT_STATS].value != nullptr) {
42
- // Get stats object from context and take a ref.
43
- client_stats = static_cast<grpc_core::GrpcLbClientStats*>(
44
- args.context[GRPC_GRPCLB_CLIENT_STATS].value)
45
- ->Ref();
46
- // Record call started.
47
- client_stats->AddCallStarted();
48
- }
49
- }
50
-
51
40
  // Stats object to update.
52
41
  grpc_core::RefCountedPtr<grpc_core::GrpcLbClientStats> client_stats;
53
42
  // State for intercepting send_initial_metadata.
@@ -82,7 +71,7 @@ static void recv_initial_metadata_ready(void* arg, grpc_error* error) {
82
71
  static grpc_error* init_call_elem(grpc_call_element* elem,
83
72
  const grpc_call_element_args* args) {
84
73
  GPR_ASSERT(args->context != nullptr);
85
- new (elem->call_data) call_data(*args);
74
+ new (elem->call_data) call_data();
86
75
  return GRPC_ERROR_NONE;
87
76
  }
88
77
 
@@ -96,9 +85,6 @@ static void destroy_call_elem(grpc_call_element* elem,
96
85
  calld->client_stats->AddCallFinished(
97
86
  !calld->send_initial_metadata_succeeded /* client_failed_to_send */,
98
87
  calld->recv_initial_metadata_succeeded /* known_received */);
99
- // All done, so unref the stats object.
100
- // TODO(roth): Eliminate this once filter stack is converted to C++.
101
- calld->client_stats.reset();
102
88
  }
103
89
  calld->~call_data();
104
90
  }
@@ -107,25 +93,36 @@ static void start_transport_stream_op_batch(
107
93
  grpc_call_element* elem, grpc_transport_stream_op_batch* batch) {
108
94
  call_data* calld = static_cast<call_data*>(elem->call_data);
109
95
  GPR_TIMER_SCOPE("clr_start_transport_stream_op_batch", 0);
110
- if (calld->client_stats != nullptr) {
111
- // Intercept send_initial_metadata.
112
- if (batch->send_initial_metadata) {
113
- calld->original_on_complete_for_send = batch->on_complete;
114
- GRPC_CLOSURE_INIT(&calld->on_complete_for_send, on_complete_for_send,
115
- calld, grpc_schedule_on_exec_ctx);
116
- batch->on_complete = &calld->on_complete_for_send;
117
- }
118
- // Intercept recv_initial_metadata.
119
- if (batch->recv_initial_metadata) {
120
- calld->original_recv_initial_metadata_ready =
121
- batch->payload->recv_initial_metadata.recv_initial_metadata_ready;
122
- GRPC_CLOSURE_INIT(&calld->recv_initial_metadata_ready,
123
- recv_initial_metadata_ready, calld,
124
- grpc_schedule_on_exec_ctx);
125
- batch->payload->recv_initial_metadata.recv_initial_metadata_ready =
126
- &calld->recv_initial_metadata_ready;
96
+ // Handle send_initial_metadata.
97
+ if (batch->send_initial_metadata) {
98
+ // Grab client stats object from user_data for LB token metadata.
99
+ grpc_linked_mdelem* lb_token =
100
+ batch->payload->send_initial_metadata.send_initial_metadata->idx.named
101
+ .lb_token;
102
+ if (lb_token != nullptr) {
103
+ grpc_core::GrpcLbClientStats* client_stats =
104
+ static_cast<grpc_core::GrpcLbClientStats*>(grpc_mdelem_get_user_data(
105
+ lb_token->md, grpc_core::GrpcLbClientStats::Destroy));
106
+ if (client_stats != nullptr) {
107
+ calld->client_stats = client_stats->Ref();
108
+ // Intercept completion.
109
+ calld->original_on_complete_for_send = batch->on_complete;
110
+ GRPC_CLOSURE_INIT(&calld->on_complete_for_send, on_complete_for_send,
111
+ calld, grpc_schedule_on_exec_ctx);
112
+ batch->on_complete = &calld->on_complete_for_send;
113
+ }
127
114
  }
128
115
  }
116
+ // Intercept completion of recv_initial_metadata.
117
+ if (batch->recv_initial_metadata) {
118
+ calld->original_recv_initial_metadata_ready =
119
+ batch->payload->recv_initial_metadata.recv_initial_metadata_ready;
120
+ GRPC_CLOSURE_INIT(&calld->recv_initial_metadata_ready,
121
+ recv_initial_metadata_ready, calld,
122
+ grpc_schedule_on_exec_ctx);
123
+ batch->payload->recv_initial_metadata.recv_initial_metadata_ready =
124
+ &calld->recv_initial_metadata_ready;
125
+ }
129
126
  // Chain to next filter.
130
127
  grpc_call_next_op(elem, batch);
131
128
  }
@@ -26,30 +26,27 @@
26
26
  /// channel that uses pick_first to select from the list of balancer
27
27
  /// addresses.
28
28
  ///
29
- /// The first time the policy gets a request for a pick, a ping, or to exit
30
- /// the idle state, \a StartPickingLocked() is called. This method is
31
- /// responsible for instantiating the internal *streaming* call to the LB
32
- /// server (whichever address pick_first chose). The call will be complete
33
- /// when either the balancer sends status or when we cancel the call (e.g.,
34
- /// because we are shutting down). In needed, we retry the call. If we
35
- /// received at least one valid message from the server, a new call attempt
36
- /// will be made immediately; otherwise, we apply back-off delays between
37
- /// attempts.
29
+ /// When we get our initial update, we instantiate the internal *streaming*
30
+ /// call to the LB server (whichever address pick_first chose). The call
31
+ /// will be complete when either the balancer sends status or when we cancel
32
+ /// the call (e.g., because we are shutting down). In needed, we retry the
33
+ /// call. If we received at least one valid message from the server, a new
34
+ /// call attempt will be made immediately; otherwise, we apply back-off
35
+ /// delays between attempts.
38
36
  ///
39
37
  /// We maintain an internal round_robin policy instance for distributing
40
38
  /// requests across backends. Whenever we receive a new serverlist from
41
39
  /// the balancer, we update the round_robin policy with the new list of
42
40
  /// addresses. If we cannot communicate with the balancer on startup,
43
41
  /// however, we may enter fallback mode, in which case we will populate
44
- /// the RR policy's addresses from the backend addresses returned by the
42
+ /// the child policy's addresses from the backend addresses returned by the
45
43
  /// resolver.
46
44
  ///
47
- /// Once an RR policy instance is in place (and getting updated as described),
45
+ /// Once a child policy instance is in place (and getting updated as described),
48
46
  /// calls for a pick, a ping, or a cancellation will be serviced right
49
- /// away by forwarding them to the RR instance. Any time there's no RR
50
- /// policy available (i.e., right after the creation of the gRPCLB policy),
51
- /// pick and ping requests are added to a list of pending picks and pings
52
- /// to be flushed and serviced when the RR policy instance becomes available.
47
+ /// away by forwarding them to the child policy instance. Any time there's no
48
+ /// child policy available (i.e., right after the creation of the gRPCLB
49
+ /// policy), pick requests are queued.
53
50
  ///
54
51
  /// \see https://github.com/grpc/grpc/blob/master/doc/load-balancing.md for the
55
52
  /// high level design and details.
@@ -74,7 +71,6 @@
74
71
  #include <grpc/support/time.h>
75
72
 
76
73
  #include "src/core/ext/filters/client_channel/client_channel.h"
77
- #include "src/core/ext/filters/client_channel/client_channel_factory.h"
78
74
  #include "src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h"
79
75
  #include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h"
80
76
  #include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.h"
@@ -129,50 +125,13 @@ class GrpcLb : public LoadBalancingPolicy {
129
125
 
130
126
  const char* name() const override { return kGrpclb; }
131
127
 
132
- void UpdateLocked(const grpc_channel_args& args,
133
- grpc_json* lb_config) override;
134
- bool PickLocked(PickState* pick, grpc_error** error) override;
135
- void CancelPickLocked(PickState* pick, grpc_error* error) override;
136
- void CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
137
- uint32_t initial_metadata_flags_eq,
138
- grpc_error* error) override;
139
- void NotifyOnStateChangeLocked(grpc_connectivity_state* state,
140
- grpc_closure* closure) override;
141
- grpc_connectivity_state CheckConnectivityLocked(
142
- grpc_error** connectivity_error) override;
143
- void HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) override;
144
- void ExitIdleLocked() override;
128
+ void UpdateLocked(UpdateArgs args) override;
145
129
  void ResetBackoffLocked() override;
146
130
  void FillChildRefsForChannelz(
147
131
  channelz::ChildRefsList* child_subchannels,
148
132
  channelz::ChildRefsList* child_channels) override;
149
133
 
150
134
  private:
151
- /// Linked list of pending pick requests. It stores all information needed to
152
- /// eventually call (Round Robin's) pick() on them. They mainly stay pending
153
- /// waiting for the RR policy to be created.
154
- ///
155
- /// Note that when a pick is sent to the RR policy, we inject our own
156
- /// on_complete callback, so that we can intercept the result before
157
- /// invoking the original on_complete callback. This allows us to set the
158
- /// LB token metadata and add client_stats to the call context.
159
- /// See \a pending_pick_complete() for details.
160
- struct PendingPick {
161
- // The grpclb instance that created the wrapping. This instance is not
162
- // owned; reference counts are untouched. It's used only for logging
163
- // purposes.
164
- GrpcLb* grpclb_policy;
165
- // The original pick.
166
- PickState* pick;
167
- // Our on_complete closure and the original one.
168
- grpc_closure on_complete;
169
- grpc_closure* original_on_complete;
170
- // Stats for client-side load reporting.
171
- RefCountedPtr<GrpcLbClientStats> client_stats;
172
- // Next pending pick.
173
- PendingPick* next = nullptr;
174
- };
175
-
176
135
  /// Contains a call to the LB server and all the data related to the call.
177
136
  class BalancerCallState : public InternallyRefCounted<BalancerCallState> {
178
137
  public:
@@ -188,6 +147,7 @@ class GrpcLb : public LoadBalancingPolicy {
188
147
  GrpcLbClientStats* client_stats() const { return client_stats_.get(); }
189
148
 
190
149
  bool seen_initial_response() const { return seen_initial_response_; }
150
+ bool seen_serverlist() const { return seen_serverlist_; }
191
151
 
192
152
  private:
193
153
  // So Delete() can access our private dtor.
@@ -228,6 +188,7 @@ class GrpcLb : public LoadBalancingPolicy {
228
188
  grpc_byte_buffer* recv_message_payload_ = nullptr;
229
189
  grpc_closure lb_on_balancer_message_received_;
230
190
  bool seen_initial_response_ = false;
191
+ bool seen_serverlist_ = false;
231
192
 
232
193
  // recv_trailing_metadata
233
194
  grpc_closure lb_on_balancer_status_received_;
@@ -248,40 +209,113 @@ class GrpcLb : public LoadBalancingPolicy {
248
209
  grpc_closure client_load_report_closure_;
249
210
  };
250
211
 
212
+ class Serverlist : public RefCounted<Serverlist> {
213
+ public:
214
+ // Takes ownership of serverlist.
215
+ explicit Serverlist(grpc_grpclb_serverlist* serverlist)
216
+ : serverlist_(serverlist) {}
217
+
218
+ ~Serverlist() { grpc_grpclb_destroy_serverlist(serverlist_); }
219
+
220
+ bool operator==(const Serverlist& other) const;
221
+
222
+ const grpc_grpclb_serverlist* serverlist() const { return serverlist_; }
223
+
224
+ // Returns a text representation suitable for logging.
225
+ UniquePtr<char> AsText() const;
226
+
227
+ // Extracts all non-drop entries into a ServerAddressList.
228
+ ServerAddressList GetServerAddressList(
229
+ GrpcLbClientStats* client_stats) const;
230
+
231
+ // Returns true if the serverlist contains at least one drop entry and
232
+ // no backend address entries.
233
+ bool ContainsAllDropEntries() const;
234
+
235
+ // Returns the LB token to use for a drop, or null if the call
236
+ // should not be dropped.
237
+ // Intended to be called from picker, so calls will be externally
238
+ // synchronized.
239
+ const char* ShouldDrop();
240
+
241
+ private:
242
+ grpc_grpclb_serverlist* serverlist_;
243
+ size_t drop_index_ = 0;
244
+ };
245
+
246
+ class Picker : public SubchannelPicker {
247
+ public:
248
+ Picker(GrpcLb* parent, RefCountedPtr<Serverlist> serverlist,
249
+ UniquePtr<SubchannelPicker> child_picker,
250
+ RefCountedPtr<GrpcLbClientStats> client_stats)
251
+ : parent_(parent),
252
+ serverlist_(std::move(serverlist)),
253
+ child_picker_(std::move(child_picker)),
254
+ client_stats_(std::move(client_stats)) {}
255
+
256
+ PickResult Pick(PickArgs* pick, grpc_error** error) override;
257
+
258
+ private:
259
+ // Storing the address for logging, but not holding a ref.
260
+ // DO NOT DEFERENCE!
261
+ GrpcLb* parent_;
262
+
263
+ // Serverlist to be used for determining drops.
264
+ RefCountedPtr<Serverlist> serverlist_;
265
+
266
+ UniquePtr<SubchannelPicker> child_picker_;
267
+ RefCountedPtr<GrpcLbClientStats> client_stats_;
268
+ };
269
+
270
+ class Helper : public ChannelControlHelper {
271
+ public:
272
+ explicit Helper(RefCountedPtr<GrpcLb> parent)
273
+ : parent_(std::move(parent)) {}
274
+
275
+ Subchannel* CreateSubchannel(const grpc_channel_args& args) override;
276
+ grpc_channel* CreateChannel(const char* target,
277
+ const grpc_channel_args& args) override;
278
+ void UpdateState(grpc_connectivity_state state, grpc_error* state_error,
279
+ UniquePtr<SubchannelPicker> picker) override;
280
+ void RequestReresolution() override;
281
+
282
+ void set_child(LoadBalancingPolicy* child) { child_ = child; }
283
+
284
+ private:
285
+ bool CalledByPendingChild() const;
286
+ bool CalledByCurrentChild() const;
287
+
288
+ RefCountedPtr<GrpcLb> parent_;
289
+ LoadBalancingPolicy* child_ = nullptr;
290
+ };
291
+
251
292
  ~GrpcLb();
252
293
 
253
294
  void ShutdownLocked() override;
254
295
 
255
- // Helper function used in ctor and UpdateLocked().
256
- void ProcessChannelArgsLocked(const grpc_channel_args& args);
296
+ // Helper functions used in UpdateLocked().
297
+ void ProcessAddressesAndChannelArgsLocked(const ServerAddressList& addresses,
298
+ const grpc_channel_args& args);
299
+ void ParseLbConfig(Config* grpclb_config);
300
+ static void OnBalancerChannelConnectivityChangedLocked(void* arg,
301
+ grpc_error* error);
302
+ void CancelBalancerChannelConnectivityWatchLocked();
257
303
 
258
- // Methods for dealing with the balancer channel and call.
259
- void StartPickingLocked();
260
- void StartBalancerCallLocked();
304
+ // Methods for dealing with fallback state.
305
+ void MaybeEnterFallbackModeAfterStartup();
261
306
  static void OnFallbackTimerLocked(void* arg, grpc_error* error);
307
+
308
+ // Methods for dealing with the balancer call.
309
+ void StartBalancerCallLocked();
262
310
  void StartBalancerCallRetryTimerLocked();
263
311
  static void OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error);
264
- static void OnBalancerChannelConnectivityChangedLocked(void* arg,
265
- grpc_error* error);
266
312
 
267
- // Pending pick methods.
268
- static void PendingPickSetMetadataAndContext(PendingPick* pp);
269
- PendingPick* PendingPickCreate(PickState* pick);
270
- void AddPendingPick(PendingPick* pp);
271
- static void OnPendingPickComplete(void* arg, grpc_error* error);
272
-
273
- // Methods for dealing with the RR policy.
274
- void CreateOrUpdateRoundRobinPolicyLocked();
275
- grpc_channel_args* CreateRoundRobinPolicyArgsLocked();
276
- void CreateRoundRobinPolicyLocked(Args args);
277
- bool PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp,
278
- grpc_error** error);
279
- void UpdateConnectivityStateFromRoundRobinPolicyLocked(
280
- grpc_error* rr_state_error);
281
- static void OnRoundRobinConnectivityChangedLocked(void* arg,
282
- grpc_error* error);
283
- static void OnRoundRobinRequestReresolutionLocked(void* arg,
284
- grpc_error* error);
313
+ // Methods for dealing with the child policy.
314
+ grpc_channel_args* CreateChildPolicyArgsLocked(
315
+ bool is_backend_from_grpclb_load_balancer);
316
+ OrphanablePtr<LoadBalancingPolicy> CreateChildPolicyLocked(
317
+ const char* name, const grpc_channel_args* args);
318
+ void CreateOrUpdateChildPolicyLocked();
285
319
 
286
320
  // Who the client is trying to communicate with.
287
321
  const char* server_name_ = nullptr;
@@ -290,18 +324,12 @@ class GrpcLb : public LoadBalancingPolicy {
290
324
  grpc_channel_args* args_ = nullptr;
291
325
 
292
326
  // Internal state.
293
- bool started_picking_ = false;
294
327
  bool shutting_down_ = false;
295
- grpc_connectivity_state_tracker state_tracker_;
296
328
 
297
329
  // The channel for communicating with the LB server.
298
330
  grpc_channel* lb_channel_ = nullptr;
299
331
  // Uuid of the lb channel. Used for channelz.
300
332
  gpr_atm lb_channel_uuid_ = 0;
301
- grpc_connectivity_state lb_channel_connectivity_;
302
- grpc_closure lb_channel_on_connectivity_changed_;
303
- // Are we already watching the LB channel's connectivity?
304
- bool watching_lb_channel_ = false;
305
333
  // Response generator to inject address updates into lb_channel_.
306
334
  RefCountedPtr<FakeResolverResponseGenerator> response_generator_;
307
335
 
@@ -321,36 +349,91 @@ class GrpcLb : public LoadBalancingPolicy {
321
349
 
322
350
  // The deserialized response from the balancer. May be nullptr until one
323
351
  // such response has arrived.
324
- grpc_grpclb_serverlist* serverlist_ = nullptr;
325
- // Index into serverlist for next pick.
326
- // If the server at this index is a drop, we return a drop.
327
- // Otherwise, we delegate to the RR policy.
328
- size_t serverlist_index_ = 0;
329
-
330
- // Timeout in milliseconds for before using fallback backend addresses.
331
- // 0 means not using fallback.
332
- int lb_fallback_timeout_ms_ = 0;
352
+ RefCountedPtr<Serverlist> serverlist_;
353
+
354
+ // Whether we're in fallback mode.
355
+ bool fallback_mode_ = false;
333
356
  // The backend addresses from the resolver.
334
- UniquePtr<ServerAddressList> fallback_backend_addresses_;
335
- // Fallback timer.
336
- bool fallback_timer_callback_pending_ = false;
357
+ ServerAddressList fallback_backend_addresses_;
358
+ // State for fallback-at-startup checks.
359
+ // Timeout after startup after which we will go into fallback mode if
360
+ // we have not received a serverlist from the balancer.
361
+ int fallback_at_startup_timeout_ = 0;
362
+ bool fallback_at_startup_checks_pending_ = false;
337
363
  grpc_timer lb_fallback_timer_;
338
364
  grpc_closure lb_on_fallback_;
365
+ grpc_connectivity_state lb_channel_connectivity_ = GRPC_CHANNEL_IDLE;
366
+ grpc_closure lb_channel_on_connectivity_changed_;
339
367
 
340
- // Pending picks that are waiting on the RR policy's connectivity.
341
- PendingPick* pending_picks_ = nullptr;
342
-
343
- // The RR policy to use for the backends.
344
- OrphanablePtr<LoadBalancingPolicy> rr_policy_;
345
- grpc_connectivity_state rr_connectivity_state_;
346
- grpc_closure on_rr_connectivity_changed_;
347
- grpc_closure on_rr_request_reresolution_;
368
+ // Lock held when modifying the value of child_policy_ or
369
+ // pending_child_policy_.
370
+ gpr_mu child_policy_mu_;
371
+ // The child policy to use for the backends.
372
+ OrphanablePtr<LoadBalancingPolicy> child_policy_;
373
+ // When switching child policies, the new policy will be stored here
374
+ // until it reports READY, at which point it will be moved to child_policy_.
375
+ OrphanablePtr<LoadBalancingPolicy> pending_child_policy_;
376
+ // The child policy config.
377
+ RefCountedPtr<Config> child_policy_config_;
378
+ // Child policy in state READY.
379
+ bool child_policy_ready_ = false;
348
380
  };
349
381
 
350
382
  //
351
- // serverlist parsing code
383
+ // GrpcLb::Serverlist
352
384
  //
353
385
 
386
+ bool GrpcLb::Serverlist::operator==(const Serverlist& other) const {
387
+ return grpc_grpclb_serverlist_equals(serverlist_, other.serverlist_);
388
+ }
389
+
390
+ void ParseServer(const grpc_grpclb_server* server,
391
+ grpc_resolved_address* addr) {
392
+ memset(addr, 0, sizeof(*addr));
393
+ if (server->drop) return;
394
+ const uint16_t netorder_port = grpc_htons((uint16_t)server->port);
395
+ /* the addresses are given in binary format (a in(6)_addr struct) in
396
+ * server->ip_address.bytes. */
397
+ const grpc_grpclb_ip_address* ip = &server->ip_address;
398
+ if (ip->size == 4) {
399
+ addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in));
400
+ grpc_sockaddr_in* addr4 = reinterpret_cast<grpc_sockaddr_in*>(&addr->addr);
401
+ addr4->sin_family = GRPC_AF_INET;
402
+ memcpy(&addr4->sin_addr, ip->bytes, ip->size);
403
+ addr4->sin_port = netorder_port;
404
+ } else if (ip->size == 16) {
405
+ addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in6));
406
+ grpc_sockaddr_in6* addr6 = (grpc_sockaddr_in6*)&addr->addr;
407
+ addr6->sin6_family = GRPC_AF_INET6;
408
+ memcpy(&addr6->sin6_addr, ip->bytes, ip->size);
409
+ addr6->sin6_port = netorder_port;
410
+ }
411
+ }
412
+
413
+ UniquePtr<char> GrpcLb::Serverlist::AsText() const {
414
+ gpr_strvec entries;
415
+ gpr_strvec_init(&entries);
416
+ for (size_t i = 0; i < serverlist_->num_servers; ++i) {
417
+ const auto* server = serverlist_->servers[i];
418
+ char* ipport;
419
+ if (server->drop) {
420
+ ipport = gpr_strdup("(drop)");
421
+ } else {
422
+ grpc_resolved_address addr;
423
+ ParseServer(server, &addr);
424
+ grpc_sockaddr_to_string(&ipport, &addr, false);
425
+ }
426
+ char* entry;
427
+ gpr_asprintf(&entry, " %" PRIuPTR ": %s token=%s\n", i, ipport,
428
+ server->load_balance_token);
429
+ gpr_free(ipport);
430
+ gpr_strvec_add(&entries, entry);
431
+ }
432
+ UniquePtr<char> result(gpr_strvec_flatten(&entries, nullptr));
433
+ gpr_strvec_destroy(&entries);
434
+ return result;
435
+ }
436
+
354
437
  // vtable for LB token channel arg.
355
438
  void* lb_token_copy(void* token) {
356
439
  return token == nullptr
@@ -393,35 +476,13 @@ bool IsServerValid(const grpc_grpclb_server* server, size_t idx, bool log) {
393
476
  return true;
394
477
  }
395
478
 
396
- void ParseServer(const grpc_grpclb_server* server,
397
- grpc_resolved_address* addr) {
398
- memset(addr, 0, sizeof(*addr));
399
- if (server->drop) return;
400
- const uint16_t netorder_port = grpc_htons((uint16_t)server->port);
401
- /* the addresses are given in binary format (a in(6)_addr struct) in
402
- * server->ip_address.bytes. */
403
- const grpc_grpclb_ip_address* ip = &server->ip_address;
404
- if (ip->size == 4) {
405
- addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in));
406
- grpc_sockaddr_in* addr4 = reinterpret_cast<grpc_sockaddr_in*>(&addr->addr);
407
- addr4->sin_family = GRPC_AF_INET;
408
- memcpy(&addr4->sin_addr, ip->bytes, ip->size);
409
- addr4->sin_port = netorder_port;
410
- } else if (ip->size == 16) {
411
- addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in6));
412
- grpc_sockaddr_in6* addr6 = (grpc_sockaddr_in6*)&addr->addr;
413
- addr6->sin6_family = GRPC_AF_INET6;
414
- memcpy(&addr6->sin6_addr, ip->bytes, ip->size);
415
- addr6->sin6_port = netorder_port;
416
- }
417
- }
418
-
419
- // Returns addresses extracted from \a serverlist.
420
- ServerAddressList ProcessServerlist(const grpc_grpclb_serverlist* serverlist) {
479
+ // Returns addresses extracted from the serverlist.
480
+ ServerAddressList GrpcLb::Serverlist::GetServerAddressList(
481
+ GrpcLbClientStats* client_stats) const {
421
482
  ServerAddressList addresses;
422
- for (size_t i = 0; i < serverlist->num_servers; ++i) {
423
- const grpc_grpclb_server* server = serverlist->servers[i];
424
- if (!IsServerValid(serverlist->servers[i], i, false)) continue;
483
+ for (size_t i = 0; i < serverlist_->num_servers; ++i) {
484
+ const grpc_grpclb_server* server = serverlist_->servers[i];
485
+ if (!IsServerValid(serverlist_->servers[i], i, false)) continue;
425
486
  // Address processing.
426
487
  grpc_resolved_address addr;
427
488
  ParseServer(server, &addr);
@@ -435,6 +496,11 @@ ServerAddressList ProcessServerlist(const grpc_grpclb_serverlist* serverlist) {
435
496
  grpc_slice lb_token_mdstr = grpc_slice_from_copied_buffer(
436
497
  server->load_balance_token, lb_token_length);
437
498
  lb_token = grpc_mdelem_from_slices(GRPC_MDSTR_LB_TOKEN, lb_token_mdstr);
499
+ if (client_stats != nullptr) {
500
+ GPR_ASSERT(grpc_mdelem_set_user_data(
501
+ lb_token, GrpcLbClientStats::Destroy,
502
+ client_stats->Ref().release()) == client_stats);
503
+ }
438
504
  } else {
439
505
  char* uri = grpc_sockaddr_to_uri(&addr);
440
506
  gpr_log(GPR_INFO,
@@ -456,6 +522,204 @@ ServerAddressList ProcessServerlist(const grpc_grpclb_serverlist* serverlist) {
456
522
  return addresses;
457
523
  }
458
524
 
525
+ bool GrpcLb::Serverlist::ContainsAllDropEntries() const {
526
+ if (serverlist_->num_servers == 0) return false;
527
+ for (size_t i = 0; i < serverlist_->num_servers; ++i) {
528
+ if (!serverlist_->servers[i]->drop) return false;
529
+ }
530
+ return true;
531
+ }
532
+
533
+ const char* GrpcLb::Serverlist::ShouldDrop() {
534
+ if (serverlist_->num_servers == 0) return nullptr;
535
+ grpc_grpclb_server* server = serverlist_->servers[drop_index_];
536
+ drop_index_ = (drop_index_ + 1) % serverlist_->num_servers;
537
+ return server->drop ? server->load_balance_token : nullptr;
538
+ }
539
+
540
+ //
541
+ // GrpcLb::Picker
542
+ //
543
+
544
+ GrpcLb::PickResult GrpcLb::Picker::Pick(PickArgs* pick, grpc_error** error) {
545
+ // Check if we should drop the call.
546
+ const char* drop_token = serverlist_->ShouldDrop();
547
+ if (drop_token != nullptr) {
548
+ // Update client load reporting stats to indicate the number of
549
+ // dropped calls. Note that we have to do this here instead of in
550
+ // the client_load_reporting filter, because we do not create a
551
+ // subchannel call (and therefore no client_load_reporting filter)
552
+ // for dropped calls.
553
+ if (client_stats_ != nullptr) {
554
+ client_stats_->AddCallDroppedLocked(drop_token);
555
+ }
556
+ return PICK_COMPLETE;
557
+ }
558
+ // Forward pick to child policy.
559
+ PickResult result = child_picker_->Pick(pick, error);
560
+ // If pick succeeded, add LB token to initial metadata.
561
+ if (result == PickResult::PICK_COMPLETE &&
562
+ pick->connected_subchannel != nullptr) {
563
+ const grpc_arg* arg = grpc_channel_args_find(
564
+ pick->connected_subchannel->args(), GRPC_ARG_GRPCLB_ADDRESS_LB_TOKEN);
565
+ if (arg == nullptr) {
566
+ gpr_log(GPR_ERROR,
567
+ "[grpclb %p picker %p] No LB token for connected subchannel "
568
+ "pick %p",
569
+ parent_, this, pick);
570
+ abort();
571
+ }
572
+ grpc_mdelem lb_token = {reinterpret_cast<uintptr_t>(arg->value.pointer.p)};
573
+ GPR_ASSERT(!GRPC_MDISNULL(lb_token));
574
+ GPR_ASSERT(grpc_metadata_batch_add_tail(
575
+ pick->initial_metadata, &pick->lb_token_mdelem_storage,
576
+ GRPC_MDELEM_REF(lb_token)) == GRPC_ERROR_NONE);
577
+ GrpcLbClientStats* client_stats = static_cast<GrpcLbClientStats*>(
578
+ grpc_mdelem_get_user_data(lb_token, GrpcLbClientStats::Destroy));
579
+ if (client_stats != nullptr) {
580
+ client_stats->AddCallStarted();
581
+ }
582
+ }
583
+ return result;
584
+ }
585
+
586
+ //
587
+ // GrpcLb::Helper
588
+ //
589
+
590
+ bool GrpcLb::Helper::CalledByPendingChild() const {
591
+ GPR_ASSERT(child_ != nullptr);
592
+ return child_ == parent_->pending_child_policy_.get();
593
+ }
594
+
595
+ bool GrpcLb::Helper::CalledByCurrentChild() const {
596
+ GPR_ASSERT(child_ != nullptr);
597
+ return child_ == parent_->child_policy_.get();
598
+ }
599
+
600
+ Subchannel* GrpcLb::Helper::CreateSubchannel(const grpc_channel_args& args) {
601
+ if (parent_->shutting_down_ ||
602
+ (!CalledByPendingChild() && !CalledByCurrentChild())) {
603
+ return nullptr;
604
+ }
605
+ return parent_->channel_control_helper()->CreateSubchannel(args);
606
+ }
607
+
608
+ grpc_channel* GrpcLb::Helper::CreateChannel(const char* target,
609
+ const grpc_channel_args& args) {
610
+ if (parent_->shutting_down_ ||
611
+ (!CalledByPendingChild() && !CalledByCurrentChild())) {
612
+ return nullptr;
613
+ }
614
+ return parent_->channel_control_helper()->CreateChannel(target, args);
615
+ }
616
+
617
+ void GrpcLb::Helper::UpdateState(grpc_connectivity_state state,
618
+ grpc_error* state_error,
619
+ UniquePtr<SubchannelPicker> picker) {
620
+ if (parent_->shutting_down_) {
621
+ GRPC_ERROR_UNREF(state_error);
622
+ return;
623
+ }
624
+ // If this request is from the pending child policy, ignore it until
625
+ // it reports READY, at which point we swap it into place.
626
+ if (CalledByPendingChild()) {
627
+ if (grpc_lb_glb_trace.enabled()) {
628
+ gpr_log(GPR_INFO,
629
+ "[grpclb %p helper %p] pending child policy %p reports state=%s",
630
+ parent_.get(), this, parent_->pending_child_policy_.get(),
631
+ grpc_connectivity_state_name(state));
632
+ }
633
+ if (state != GRPC_CHANNEL_READY) {
634
+ GRPC_ERROR_UNREF(state_error);
635
+ return;
636
+ }
637
+ grpc_pollset_set_del_pollset_set(
638
+ parent_->child_policy_->interested_parties(),
639
+ parent_->interested_parties());
640
+ MutexLock lock(&parent_->child_policy_mu_);
641
+ parent_->child_policy_ = std::move(parent_->pending_child_policy_);
642
+ } else if (!CalledByCurrentChild()) {
643
+ // This request is from an outdated child, so ignore it.
644
+ GRPC_ERROR_UNREF(state_error);
645
+ return;
646
+ }
647
+ // Record whether child policy reports READY.
648
+ parent_->child_policy_ready_ = state == GRPC_CHANNEL_READY;
649
+ // Enter fallback mode if needed.
650
+ parent_->MaybeEnterFallbackModeAfterStartup();
651
+ // There are three cases to consider here:
652
+ // 1. We're in fallback mode. In this case, we're always going to use
653
+ // the child policy's result, so we pass its picker through as-is.
654
+ // 2. The serverlist contains only drop entries. In this case, we
655
+ // want to use our own picker so that we can return the drops.
656
+ // 3. Not in fallback mode and serverlist is not all drops (i.e., it
657
+ // may be empty or contain at least one backend address). There are
658
+ // two sub-cases:
659
+ // a. The child policy is reporting state READY. In this case, we wrap
660
+ // the child's picker in our own, so that we can handle drops and LB
661
+ // token metadata for each pick.
662
+ // b. The child policy is reporting a state other than READY. In this
663
+ // case, we don't want to use our own picker, because we don't want
664
+ // to process drops for picks that yield a QUEUE result; this would
665
+ // result in dropping too many calls, since we will see the
666
+ // queued picks multiple times, and we'd consider each one a
667
+ // separate call for the drop calculation.
668
+ //
669
+ // Cases 1 and 3b: return picker from the child policy as-is.
670
+ if (parent_->serverlist_ == nullptr ||
671
+ (!parent_->serverlist_->ContainsAllDropEntries() &&
672
+ state != GRPC_CHANNEL_READY)) {
673
+ if (grpc_lb_glb_trace.enabled()) {
674
+ gpr_log(GPR_INFO,
675
+ "[grpclb %p helper %p] state=%s passing child picker %p as-is",
676
+ parent_.get(), this, grpc_connectivity_state_name(state),
677
+ picker.get());
678
+ }
679
+ parent_->channel_control_helper()->UpdateState(state, state_error,
680
+ std::move(picker));
681
+ return;
682
+ }
683
+ // Cases 2 and 3a: wrap picker from the child in our own picker.
684
+ if (grpc_lb_glb_trace.enabled()) {
685
+ gpr_log(GPR_INFO, "[grpclb %p helper %p] state=%s wrapping child picker %p",
686
+ parent_.get(), this, grpc_connectivity_state_name(state),
687
+ picker.get());
688
+ }
689
+ RefCountedPtr<GrpcLbClientStats> client_stats;
690
+ if (parent_->lb_calld_ != nullptr &&
691
+ parent_->lb_calld_->client_stats() != nullptr) {
692
+ client_stats = parent_->lb_calld_->client_stats()->Ref();
693
+ }
694
+ parent_->channel_control_helper()->UpdateState(
695
+ state, state_error,
696
+ UniquePtr<SubchannelPicker>(
697
+ New<Picker>(parent_.get(), parent_->serverlist_, std::move(picker),
698
+ std::move(client_stats))));
699
+ }
700
+
701
+ void GrpcLb::Helper::RequestReresolution() {
702
+ if (parent_->shutting_down_) return;
703
+ const LoadBalancingPolicy* latest_child_policy =
704
+ parent_->pending_child_policy_ != nullptr
705
+ ? parent_->pending_child_policy_.get()
706
+ : parent_->child_policy_.get();
707
+ if (child_ != latest_child_policy) return;
708
+ if (grpc_lb_glb_trace.enabled()) {
709
+ gpr_log(GPR_INFO,
710
+ "[grpclb %p] Re-resolution requested from %schild policy (%p).",
711
+ parent_.get(), CalledByPendingChild() ? "pending " : "", child_);
712
+ }
713
+ // If we are talking to a balancer, we expect to get updated addresses
714
+ // from the balancer, so we can ignore the re-resolution request from
715
+ // the child policy. Otherwise, pass the re-resolution request up to the
716
+ // channel.
717
+ if (parent_->lb_calld_ == nullptr ||
718
+ !parent_->lb_calld_->seen_initial_response()) {
719
+ parent_->channel_control_helper()->RequestReresolution();
720
+ }
721
+ }
722
+
459
723
  //
460
724
  // GrpcLb::BalancerCallState
461
725
  //
@@ -540,7 +804,8 @@ void GrpcLb::BalancerCallState::StartQuery() {
540
804
  grpc_op* op = ops;
541
805
  op->op = GRPC_OP_SEND_INITIAL_METADATA;
542
806
  op->data.send_initial_metadata.count = 0;
543
- op->flags = 0;
807
+ op->flags = GRPC_INITIAL_METADATA_WAIT_FOR_READY |
808
+ GRPC_INITIAL_METADATA_WAIT_FOR_READY_EXPLICITLY_SET;
544
809
  op->reserved = nullptr;
545
810
  op++;
546
811
  // Op: send request message.
@@ -596,7 +861,7 @@ void GrpcLb::BalancerCallState::StartQuery() {
596
861
  call_error = grpc_call_start_batch_and_execute(
597
862
  lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_status_received_);
598
863
  GPR_ASSERT(GRPC_CALL_OK == call_error);
599
- };
864
+ }
600
865
 
601
866
  void GrpcLb::BalancerCallState::ScheduleNextClientLoadReportLocked() {
602
867
  const grpc_millis next_client_load_report_time =
@@ -754,60 +1019,71 @@ void GrpcLb::BalancerCallState::OnBalancerMessageReceivedLocked(
754
1019
  response_slice)) != nullptr) {
755
1020
  // Have seen initial response, look for serverlist.
756
1021
  GPR_ASSERT(lb_calld->lb_call_ != nullptr);
1022
+ auto serverlist_wrapper = MakeRefCounted<Serverlist>(serverlist);
757
1023
  if (grpc_lb_glb_trace.enabled()) {
1024
+ UniquePtr<char> serverlist_text = serverlist_wrapper->AsText();
758
1025
  gpr_log(GPR_INFO,
759
1026
  "[grpclb %p] lb_calld=%p: Serverlist with %" PRIuPTR
760
- " servers received",
761
- grpclb_policy, lb_calld, serverlist->num_servers);
762
- for (size_t i = 0; i < serverlist->num_servers; ++i) {
763
- grpc_resolved_address addr;
764
- ParseServer(serverlist->servers[i], &addr);
765
- char* ipport;
766
- grpc_sockaddr_to_string(&ipport, &addr, false);
767
- gpr_log(GPR_INFO,
768
- "[grpclb %p] lb_calld=%p: Serverlist[%" PRIuPTR "]: %s",
769
- grpclb_policy, lb_calld, i, ipport);
770
- gpr_free(ipport);
771
- }
1027
+ " servers received:\n%s",
1028
+ grpclb_policy, lb_calld, serverlist->num_servers,
1029
+ serverlist_text.get());
772
1030
  }
1031
+ lb_calld->seen_serverlist_ = true;
773
1032
  // Start sending client load report only after we start using the
774
1033
  // serverlist returned from the current LB call.
775
1034
  if (lb_calld->client_stats_report_interval_ > 0 &&
776
1035
  lb_calld->client_stats_ == nullptr) {
777
- lb_calld->client_stats_.reset(New<GrpcLbClientStats>());
778
- // TODO(roth): We currently track this ref manually. Once the
779
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
780
- // with the callback.
781
- auto self = lb_calld->Ref(DEBUG_LOCATION, "client_load_report");
782
- self.release();
1036
+ lb_calld->client_stats_ = MakeRefCounted<GrpcLbClientStats>();
1037
+ // Ref held by callback.
1038
+ lb_calld->Ref(DEBUG_LOCATION, "client_load_report").release();
783
1039
  lb_calld->ScheduleNextClientLoadReportLocked();
784
1040
  }
785
1041
  // Check if the serverlist differs from the previous one.
786
- if (grpc_grpclb_serverlist_equals(grpclb_policy->serverlist_, serverlist)) {
1042
+ if (grpclb_policy->serverlist_ != nullptr &&
1043
+ *grpclb_policy->serverlist_ == *serverlist_wrapper) {
787
1044
  if (grpc_lb_glb_trace.enabled()) {
788
1045
  gpr_log(GPR_INFO,
789
1046
  "[grpclb %p] lb_calld=%p: Incoming server list identical to "
790
1047
  "current, ignoring.",
791
1048
  grpclb_policy, lb_calld);
792
1049
  }
793
- grpc_grpclb_destroy_serverlist(serverlist);
794
1050
  } else { // New serverlist.
795
- if (grpclb_policy->serverlist_ != nullptr) {
796
- // Dispose of the old serverlist.
797
- grpc_grpclb_destroy_serverlist(grpclb_policy->serverlist_);
798
- } else {
799
- // Dispose of the fallback.
800
- grpclb_policy->fallback_backend_addresses_.reset();
801
- if (grpclb_policy->fallback_timer_callback_pending_) {
802
- grpc_timer_cancel(&grpclb_policy->lb_fallback_timer_);
803
- }
1051
+ // Dispose of the fallback.
1052
+ // TODO(roth): Ideally, we should stay in fallback mode until we
1053
+ // know that we can reach at least one of the backends in the new
1054
+ // serverlist. Unfortunately, we can't do that, since we need to
1055
+ // send the new addresses to the child policy in order to determine
1056
+ // if they are reachable, and if we don't exit fallback mode now,
1057
+ // CreateOrUpdateChildPolicyLocked() will use the fallback
1058
+ // addresses instead of the addresses from the new serverlist.
1059
+ // However, if we can't reach any of the servers in the new
1060
+ // serverlist, then the child policy will never switch away from
1061
+ // the fallback addresses, but the grpclb policy will still think
1062
+ // that we're not in fallback mode, which means that we won't send
1063
+ // updates to the child policy when the fallback addresses are
1064
+ // updated by the resolver. This is sub-optimal, but the only way
1065
+ // to fix it is to maintain a completely separate child policy for
1066
+ // fallback mode, and that's more work than we want to put into
1067
+ // the grpclb implementation at this point, since we're deprecating
1068
+ // it in favor of the xds policy. We will implement this the
1069
+ // right way in the xds policy instead.
1070
+ if (grpclb_policy->fallback_mode_) {
1071
+ gpr_log(GPR_INFO,
1072
+ "[grpclb %p] Received response from balancer; exiting "
1073
+ "fallback mode",
1074
+ grpclb_policy);
1075
+ grpclb_policy->fallback_mode_ = false;
1076
+ }
1077
+ if (grpclb_policy->fallback_at_startup_checks_pending_) {
1078
+ grpclb_policy->fallback_at_startup_checks_pending_ = false;
1079
+ grpc_timer_cancel(&grpclb_policy->lb_fallback_timer_);
1080
+ grpclb_policy->CancelBalancerChannelConnectivityWatchLocked();
804
1081
  }
805
1082
  // Update the serverlist in the GrpcLb instance. This serverlist
806
1083
  // instance will be destroyed either upon the next update or when the
807
1084
  // GrpcLb instance is destroyed.
808
- grpclb_policy->serverlist_ = serverlist;
809
- grpclb_policy->serverlist_index_ = 0;
810
- grpclb_policy->CreateOrUpdateRoundRobinPolicyLocked();
1085
+ grpclb_policy->serverlist_ = std::move(serverlist_wrapper);
1086
+ grpclb_policy->CreateOrUpdateChildPolicyLocked();
811
1087
  }
812
1088
  } else {
813
1089
  // No valid initial response or serverlist found.
@@ -853,13 +1129,31 @@ void GrpcLb::BalancerCallState::OnBalancerStatusReceivedLocked(
853
1129
  lb_calld->lb_call_, grpc_error_string(error));
854
1130
  gpr_free(status_details);
855
1131
  }
856
- grpclb_policy->TryReresolutionLocked(&grpc_lb_glb_trace, GRPC_ERROR_NONE);
857
1132
  // If this lb_calld is still in use, this call ended because of a failure so
858
1133
  // we want to retry connecting. Otherwise, we have deliberately ended this
859
1134
  // call and no further action is required.
860
1135
  if (lb_calld == grpclb_policy->lb_calld_.get()) {
1136
+ // If we did not receive a serverlist and the fallback-at-startup checks
1137
+ // are pending, go into fallback mode immediately. This short-circuits
1138
+ // the timeout for the fallback-at-startup case.
1139
+ if (!lb_calld->seen_serverlist_ &&
1140
+ grpclb_policy->fallback_at_startup_checks_pending_) {
1141
+ gpr_log(GPR_INFO,
1142
+ "[grpclb %p] balancer call finished without receiving "
1143
+ "serverlist; entering fallback mode",
1144
+ grpclb_policy);
1145
+ grpclb_policy->fallback_at_startup_checks_pending_ = false;
1146
+ grpc_timer_cancel(&grpclb_policy->lb_fallback_timer_);
1147
+ grpclb_policy->CancelBalancerChannelConnectivityWatchLocked();
1148
+ grpclb_policy->fallback_mode_ = true;
1149
+ grpclb_policy->CreateOrUpdateChildPolicyLocked();
1150
+ } else {
1151
+ // This handles the fallback-after-startup case.
1152
+ grpclb_policy->MaybeEnterFallbackModeAfterStartup();
1153
+ }
861
1154
  grpclb_policy->lb_calld_.reset();
862
1155
  GPR_ASSERT(!grpclb_policy->shutting_down_);
1156
+ grpclb_policy->channel_control_helper()->RequestReresolution();
863
1157
  if (lb_calld->seen_initial_response_) {
864
1158
  // If we lose connection to the LB server, reset the backoff and restart
865
1159
  // the LB call immediately.
@@ -911,7 +1205,6 @@ grpc_channel_args* BuildBalancerChannelArgs(
911
1205
  const ServerAddressList& addresses,
912
1206
  FakeResolverResponseGenerator* response_generator,
913
1207
  const grpc_channel_args* args) {
914
- ServerAddressList balancer_addresses = ExtractBalancerAddresses(addresses);
915
1208
  // Channel args to remove.
916
1209
  static const char* args_to_remove[] = {
917
1210
  // LB policy name, since we want to use the default (pick_first) in
@@ -924,15 +1217,6 @@ grpc_channel_args* BuildBalancerChannelArgs(
924
1217
  // the LB channel than for the parent channel. The client channel
925
1218
  // factory will re-add this arg with the right value.
926
1219
  GRPC_ARG_SERVER_URI,
927
- // The resolved addresses, which will be generated by the name resolver
928
- // used in the LB channel. Note that the LB channel will use the fake
929
- // resolver, so this won't actually generate a query to DNS (or some
930
- // other name service). However, the addresses returned by the fake
931
- // resolver will have is_balancer=false, whereas our own addresses have
932
- // is_balancer=true. We need the LB channel to return addresses with
933
- // is_balancer=false so that it does not wind up recursively using the
934
- // grpclb LB policy.
935
- GRPC_ARG_SERVER_ADDRESS_LIST,
936
1220
  // The fake resolver response generator, because we are replacing it
937
1221
  // with the one from the grpclb policy, used to propagate updates to
938
1222
  // the LB channel.
@@ -948,10 +1232,6 @@ grpc_channel_args* BuildBalancerChannelArgs(
948
1232
  };
949
1233
  // Channel args to add.
950
1234
  const grpc_arg args_to_add[] = {
951
- // New address list.
952
- // Note that we pass these in both when creating the LB channel
953
- // and via the fake resolver. The latter is what actually gets used.
954
- CreateServerAddressListChannelArg(&balancer_addresses),
955
1235
  // The fake resolver response generator, which we use to inject
956
1236
  // address updates into the LB channel.
957
1237
  grpc_core::FakeResolverResponseGenerator::MakeChannelArg(
@@ -969,14 +1249,14 @@ grpc_channel_args* BuildBalancerChannelArgs(
969
1249
  args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), args_to_add,
970
1250
  GPR_ARRAY_SIZE(args_to_add));
971
1251
  // Make any necessary modifications for security.
972
- return grpc_lb_policy_grpclb_modify_lb_channel_args(new_args);
1252
+ return grpc_lb_policy_grpclb_modify_lb_channel_args(addresses, new_args);
973
1253
  }
974
1254
 
975
1255
  //
976
1256
  // ctor and dtor
977
1257
  //
978
1258
 
979
- GrpcLb::GrpcLb(LoadBalancingPolicy::Args args)
1259
+ GrpcLb::GrpcLb(Args args)
980
1260
  : LoadBalancingPolicy(std::move(args)),
981
1261
  response_generator_(MakeRefCounted<FakeResolverResponseGenerator>()),
982
1262
  lb_call_backoff_(
@@ -988,16 +1268,12 @@ GrpcLb::GrpcLb(LoadBalancingPolicy::Args args)
988
1268
  .set_max_backoff(GRPC_GRPCLB_RECONNECT_MAX_BACKOFF_SECONDS *
989
1269
  1000)) {
990
1270
  // Initialization.
1271
+ GRPC_CLOSURE_INIT(&lb_on_fallback_, &GrpcLb::OnFallbackTimerLocked, this,
1272
+ grpc_combiner_scheduler(combiner()));
991
1273
  GRPC_CLOSURE_INIT(&lb_channel_on_connectivity_changed_,
992
1274
  &GrpcLb::OnBalancerChannelConnectivityChangedLocked, this,
993
1275
  grpc_combiner_scheduler(args.combiner));
994
- GRPC_CLOSURE_INIT(&on_rr_connectivity_changed_,
995
- &GrpcLb::OnRoundRobinConnectivityChangedLocked, this,
996
- grpc_combiner_scheduler(args.combiner));
997
- GRPC_CLOSURE_INIT(&on_rr_request_reresolution_,
998
- &GrpcLb::OnRoundRobinRequestReresolutionLocked, this,
999
- grpc_combiner_scheduler(args.combiner));
1000
- grpc_connectivity_state_init(&state_tracker_, GRPC_CHANNEL_IDLE, "grpclb");
1276
+ gpr_mu_init(&child_policy_mu_);
1001
1277
  // Record server name.
1002
1278
  const grpc_arg* arg = grpc_channel_args_find(args.args, GRPC_ARG_SERVER_URI);
1003
1279
  const char* server_uri = grpc_channel_arg_get_string(arg);
@@ -1014,36 +1290,41 @@ GrpcLb::GrpcLb(LoadBalancingPolicy::Args args)
1014
1290
  // Record LB call timeout.
1015
1291
  arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS);
1016
1292
  lb_call_timeout_ms_ = grpc_channel_arg_get_integer(arg, {0, 0, INT_MAX});
1017
- // Record fallback timeout.
1293
+ // Record fallback-at-startup timeout.
1018
1294
  arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_FALLBACK_TIMEOUT_MS);
1019
- lb_fallback_timeout_ms_ = grpc_channel_arg_get_integer(
1295
+ fallback_at_startup_timeout_ = grpc_channel_arg_get_integer(
1020
1296
  arg, {GRPC_GRPCLB_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX});
1021
- // Process channel args.
1022
- ProcessChannelArgsLocked(*args.args);
1023
1297
  }
1024
1298
 
1025
1299
  GrpcLb::~GrpcLb() {
1026
- GPR_ASSERT(pending_picks_ == nullptr);
1027
1300
  gpr_free((void*)server_name_);
1028
1301
  grpc_channel_args_destroy(args_);
1029
- grpc_connectivity_state_destroy(&state_tracker_);
1030
- if (serverlist_ != nullptr) {
1031
- grpc_grpclb_destroy_serverlist(serverlist_);
1032
- }
1302
+ gpr_mu_destroy(&child_policy_mu_);
1033
1303
  }
1034
1304
 
1035
1305
  void GrpcLb::ShutdownLocked() {
1036
- grpc_error* error = GRPC_ERROR_CREATE_FROM_STATIC_STRING("Channel shutdown");
1037
1306
  shutting_down_ = true;
1038
1307
  lb_calld_.reset();
1039
1308
  if (retry_timer_callback_pending_) {
1040
1309
  grpc_timer_cancel(&lb_call_retry_timer_);
1041
1310
  }
1042
- if (fallback_timer_callback_pending_) {
1311
+ if (fallback_at_startup_checks_pending_) {
1043
1312
  grpc_timer_cancel(&lb_fallback_timer_);
1313
+ CancelBalancerChannelConnectivityWatchLocked();
1314
+ }
1315
+ if (child_policy_ != nullptr) {
1316
+ grpc_pollset_set_del_pollset_set(child_policy_->interested_parties(),
1317
+ interested_parties());
1318
+ }
1319
+ if (pending_child_policy_ != nullptr) {
1320
+ grpc_pollset_set_del_pollset_set(
1321
+ pending_child_policy_->interested_parties(), interested_parties());
1322
+ }
1323
+ {
1324
+ MutexLock lock(&child_policy_mu_);
1325
+ child_policy_.reset();
1326
+ pending_child_policy_.reset();
1044
1327
  }
1045
- rr_policy_.reset();
1046
- TryReresolutionLocked(&grpc_lb_glb_trace, GRPC_ERROR_CANCELLED);
1047
1328
  // We destroy the LB channel here instead of in our destructor because
1048
1329
  // destroying the channel triggers a last callback to
1049
1330
  // OnBalancerChannelConnectivityChangedLocked(), and we need to be
@@ -1053,161 +1334,40 @@ void GrpcLb::ShutdownLocked() {
1053
1334
  lb_channel_ = nullptr;
1054
1335
  gpr_atm_no_barrier_store(&lb_channel_uuid_, 0);
1055
1336
  }
1056
- grpc_connectivity_state_set(&state_tracker_, GRPC_CHANNEL_SHUTDOWN,
1057
- GRPC_ERROR_REF(error), "grpclb_shutdown");
1058
- // Clear pending picks.
1059
- PendingPick* pp;
1060
- while ((pp = pending_picks_) != nullptr) {
1061
- pending_picks_ = pp->next;
1062
- pp->pick->connected_subchannel.reset();
1063
- // Note: pp is deleted in this callback.
1064
- GRPC_CLOSURE_SCHED(&pp->on_complete, GRPC_ERROR_REF(error));
1065
- }
1066
- GRPC_ERROR_UNREF(error);
1067
1337
  }
1068
1338
 
1069
1339
  //
1070
1340
  // public methods
1071
1341
  //
1072
1342
 
1073
- void GrpcLb::HandOffPendingPicksLocked(LoadBalancingPolicy* new_policy) {
1074
- PendingPick* pp;
1075
- while ((pp = pending_picks_) != nullptr) {
1076
- pending_picks_ = pp->next;
1077
- pp->pick->on_complete = pp->original_on_complete;
1078
- grpc_error* error = GRPC_ERROR_NONE;
1079
- if (new_policy->PickLocked(pp->pick, &error)) {
1080
- // Synchronous return; schedule closure.
1081
- GRPC_CLOSURE_SCHED(pp->pick->on_complete, error);
1082
- }
1083
- Delete(pp);
1084
- }
1085
- }
1086
-
1087
- // Cancel a specific pending pick.
1088
- //
1089
- // A grpclb pick progresses as follows:
1090
- // - If there's a Round Robin policy (rr_policy_) available, it'll be
1091
- // handed over to the RR policy (in CreateRoundRobinPolicyLocked()). From
1092
- // that point onwards, it'll be RR's responsibility. For cancellations, that
1093
- // implies the pick needs also be cancelled by the RR instance.
1094
- // - Otherwise, without an RR instance, picks stay pending at this policy's
1095
- // level (grpclb), inside the pending_picks_ list. To cancel these,
1096
- // we invoke the completion closure and set the pick's connected
1097
- // subchannel to nullptr right here.
1098
- void GrpcLb::CancelPickLocked(PickState* pick, grpc_error* error) {
1099
- PendingPick* pp = pending_picks_;
1100
- pending_picks_ = nullptr;
1101
- while (pp != nullptr) {
1102
- PendingPick* next = pp->next;
1103
- if (pp->pick == pick) {
1104
- pick->connected_subchannel.reset();
1105
- // Note: pp is deleted in this callback.
1106
- GRPC_CLOSURE_SCHED(&pp->on_complete,
1107
- GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
1108
- "Pick Cancelled", &error, 1));
1109
- } else {
1110
- pp->next = pending_picks_;
1111
- pending_picks_ = pp;
1112
- }
1113
- pp = next;
1114
- }
1115
- if (rr_policy_ != nullptr) {
1116
- rr_policy_->CancelPickLocked(pick, GRPC_ERROR_REF(error));
1117
- }
1118
- GRPC_ERROR_UNREF(error);
1119
- }
1120
-
1121
- // Cancel all pending picks.
1122
- //
1123
- // A grpclb pick progresses as follows:
1124
- // - If there's a Round Robin policy (rr_policy_) available, it'll be
1125
- // handed over to the RR policy (in CreateRoundRobinPolicyLocked()). From
1126
- // that point onwards, it'll be RR's responsibility. For cancellations, that
1127
- // implies the pick needs also be cancelled by the RR instance.
1128
- // - Otherwise, without an RR instance, picks stay pending at this policy's
1129
- // level (grpclb), inside the pending_picks_ list. To cancel these,
1130
- // we invoke the completion closure and set the pick's connected
1131
- // subchannel to nullptr right here.
1132
- void GrpcLb::CancelMatchingPicksLocked(uint32_t initial_metadata_flags_mask,
1133
- uint32_t initial_metadata_flags_eq,
1134
- grpc_error* error) {
1135
- PendingPick* pp = pending_picks_;
1136
- pending_picks_ = nullptr;
1137
- while (pp != nullptr) {
1138
- PendingPick* next = pp->next;
1139
- if ((*pp->pick->initial_metadata_flags & initial_metadata_flags_mask) ==
1140
- initial_metadata_flags_eq) {
1141
- // Note: pp is deleted in this callback.
1142
- GRPC_CLOSURE_SCHED(&pp->on_complete,
1143
- GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
1144
- "Pick Cancelled", &error, 1));
1145
- } else {
1146
- pp->next = pending_picks_;
1147
- pending_picks_ = pp;
1148
- }
1149
- pp = next;
1150
- }
1151
- if (rr_policy_ != nullptr) {
1152
- rr_policy_->CancelMatchingPicksLocked(initial_metadata_flags_mask,
1153
- initial_metadata_flags_eq,
1154
- GRPC_ERROR_REF(error));
1155
- }
1156
- GRPC_ERROR_UNREF(error);
1157
- }
1158
-
1159
- void GrpcLb::ExitIdleLocked() {
1160
- if (!started_picking_) {
1161
- StartPickingLocked();
1162
- }
1163
- }
1164
-
1165
1343
  void GrpcLb::ResetBackoffLocked() {
1166
1344
  if (lb_channel_ != nullptr) {
1167
1345
  grpc_channel_reset_connect_backoff(lb_channel_);
1168
1346
  }
1169
- if (rr_policy_ != nullptr) {
1170
- rr_policy_->ResetBackoffLocked();
1347
+ if (child_policy_ != nullptr) {
1348
+ child_policy_->ResetBackoffLocked();
1171
1349
  }
1172
- }
1173
-
1174
- bool GrpcLb::PickLocked(PickState* pick, grpc_error** error) {
1175
- PendingPick* pp = PendingPickCreate(pick);
1176
- bool pick_done = false;
1177
- if (rr_policy_ != nullptr) {
1178
- if (grpc_lb_glb_trace.enabled()) {
1179
- gpr_log(GPR_INFO, "[grpclb %p] about to PICK from RR %p", this,
1180
- rr_policy_.get());
1181
- }
1182
- pick_done =
1183
- PickFromRoundRobinPolicyLocked(false /* force_async */, pp, error);
1184
- } else { // rr_policy_ == NULL
1185
- if (pick->on_complete == nullptr) {
1186
- *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
1187
- "No pick result available but synchronous result required.");
1188
- pick_done = true;
1189
- } else {
1190
- if (grpc_lb_glb_trace.enabled()) {
1191
- gpr_log(GPR_INFO,
1192
- "[grpclb %p] No RR policy. Adding to grpclb's pending picks",
1193
- this);
1194
- }
1195
- AddPendingPick(pp);
1196
- if (!started_picking_) {
1197
- StartPickingLocked();
1198
- }
1199
- pick_done = false;
1200
- }
1350
+ if (pending_child_policy_ != nullptr) {
1351
+ pending_child_policy_->ResetBackoffLocked();
1201
1352
  }
1202
- return pick_done;
1203
1353
  }
1204
1354
 
1205
1355
  void GrpcLb::FillChildRefsForChannelz(
1206
1356
  channelz::ChildRefsList* child_subchannels,
1207
1357
  channelz::ChildRefsList* child_channels) {
1208
- // delegate to the RoundRobin to fill the children subchannels.
1209
- if (rr_policy_ != nullptr) {
1210
- rr_policy_->FillChildRefsForChannelz(child_subchannels, child_channels);
1358
+ {
1359
+ // Delegate to the child policy to fill the children subchannels.
1360
+ // This must be done holding child_policy_mu_, since this method
1361
+ // does not run in the combiner.
1362
+ MutexLock lock(&child_policy_mu_);
1363
+ if (child_policy_ != nullptr) {
1364
+ child_policy_->FillChildRefsForChannelz(child_subchannels,
1365
+ child_channels);
1366
+ }
1367
+ if (pending_child_policy_ != nullptr) {
1368
+ pending_child_policy_->FillChildRefsForChannelz(child_subchannels,
1369
+ child_channels);
1370
+ }
1211
1371
  }
1212
1372
  gpr_atm uuid = gpr_atm_no_barrier_load(&lb_channel_uuid_);
1213
1373
  if (uuid != 0) {
@@ -1215,28 +1375,52 @@ void GrpcLb::FillChildRefsForChannelz(
1215
1375
  }
1216
1376
  }
1217
1377
 
1218
- grpc_connectivity_state GrpcLb::CheckConnectivityLocked(
1219
- grpc_error** connectivity_error) {
1220
- return grpc_connectivity_state_get(&state_tracker_, connectivity_error);
1378
+ void GrpcLb::UpdateLocked(UpdateArgs args) {
1379
+ const bool is_initial_update = lb_channel_ == nullptr;
1380
+ ParseLbConfig(args.config.get());
1381
+ ProcessAddressesAndChannelArgsLocked(args.addresses, *args.args);
1382
+ // Update the existing child policy.
1383
+ if (child_policy_ != nullptr) CreateOrUpdateChildPolicyLocked();
1384
+ // If this is the initial update, start the fallback-at-startup checks
1385
+ // and the balancer call.
1386
+ if (is_initial_update) {
1387
+ fallback_at_startup_checks_pending_ = true;
1388
+ // Start timer.
1389
+ grpc_millis deadline = ExecCtx::Get()->Now() + fallback_at_startup_timeout_;
1390
+ Ref(DEBUG_LOCATION, "on_fallback_timer").release(); // Ref for callback
1391
+ grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
1392
+ // Start watching the channel's connectivity state. If the channel
1393
+ // goes into state TRANSIENT_FAILURE before the timer fires, we go into
1394
+ // fallback mode even if the fallback timeout has not elapsed.
1395
+ grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element(
1396
+ grpc_channel_get_channel_stack(lb_channel_));
1397
+ GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1398
+ // Ref held by callback.
1399
+ Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity").release();
1400
+ grpc_client_channel_watch_connectivity_state(
1401
+ client_channel_elem,
1402
+ grpc_polling_entity_create_from_pollset_set(interested_parties()),
1403
+ &lb_channel_connectivity_, &lb_channel_on_connectivity_changed_,
1404
+ nullptr);
1405
+ // Start balancer call.
1406
+ StartBalancerCallLocked();
1407
+ }
1221
1408
  }
1222
1409
 
1223
- void GrpcLb::NotifyOnStateChangeLocked(grpc_connectivity_state* current,
1224
- grpc_closure* notify) {
1225
- grpc_connectivity_state_notify_on_state_change(&state_tracker_, current,
1226
- notify);
1227
- }
1410
+ //
1411
+ // helpers for UpdateLocked()
1412
+ //
1228
1413
 
1229
1414
  // Returns the backend addresses extracted from the given addresses.
1230
- UniquePtr<ServerAddressList> ExtractBackendAddresses(
1231
- const ServerAddressList& addresses) {
1415
+ ServerAddressList ExtractBackendAddresses(const ServerAddressList& addresses) {
1232
1416
  void* lb_token = (void*)GRPC_MDELEM_LB_TOKEN_EMPTY.payload;
1233
1417
  grpc_arg arg = grpc_channel_arg_pointer_create(
1234
1418
  const_cast<char*>(GRPC_ARG_GRPCLB_ADDRESS_LB_TOKEN), lb_token,
1235
1419
  &lb_token_arg_vtable);
1236
- auto backend_addresses = MakeUnique<ServerAddressList>();
1420
+ ServerAddressList backend_addresses;
1237
1421
  for (size_t i = 0; i < addresses.size(); ++i) {
1238
1422
  if (!addresses[i].IsBalancer()) {
1239
- backend_addresses->emplace_back(
1423
+ backend_addresses.emplace_back(
1240
1424
  addresses[i].address(),
1241
1425
  grpc_channel_args_copy_and_add(addresses[i].args(), &arg, 1));
1242
1426
  }
@@ -1244,18 +1428,10 @@ UniquePtr<ServerAddressList> ExtractBackendAddresses(
1244
1428
  return backend_addresses;
1245
1429
  }
1246
1430
 
1247
- void GrpcLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
1248
- const ServerAddressList* addresses = FindServerAddressListChannelArg(&args);
1249
- if (addresses == nullptr) {
1250
- // Ignore this update.
1251
- gpr_log(
1252
- GPR_ERROR,
1253
- "[grpclb %p] No valid LB addresses channel arg in update, ignoring.",
1254
- this);
1255
- return;
1256
- }
1431
+ void GrpcLb::ProcessAddressesAndChannelArgsLocked(
1432
+ const ServerAddressList& addresses, const grpc_channel_args& args) {
1257
1433
  // Update fallback address list.
1258
- fallback_backend_addresses_ = ExtractBackendAddresses(*addresses);
1434
+ fallback_backend_addresses_ = ExtractBackendAddresses(addresses);
1259
1435
  // Make sure that GRPC_ARG_LB_POLICY_NAME is set in channel args,
1260
1436
  // since we use this to trigger the client_load_reporting filter.
1261
1437
  static const char* args_to_remove[] = {GRPC_ARG_LB_POLICY_NAME};
@@ -1265,15 +1441,15 @@ void GrpcLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
1265
1441
  args_ = grpc_channel_args_copy_and_add_and_remove(
1266
1442
  &args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), &new_arg, 1);
1267
1443
  // Construct args for balancer channel.
1268
- grpc_channel_args* lb_channel_args =
1269
- BuildBalancerChannelArgs(*addresses, response_generator_.get(), &args);
1444
+ ServerAddressList balancer_addresses = ExtractBalancerAddresses(addresses);
1445
+ grpc_channel_args* lb_channel_args = BuildBalancerChannelArgs(
1446
+ balancer_addresses, response_generator_.get(), &args);
1270
1447
  // Create balancer channel if needed.
1271
1448
  if (lb_channel_ == nullptr) {
1272
1449
  char* uri_str;
1273
1450
  gpr_asprintf(&uri_str, "fake:///%s", server_name_);
1274
- lb_channel_ = grpc_client_channel_factory_create_channel(
1275
- client_channel_factory(), uri_str,
1276
- GRPC_CLIENT_CHANNEL_TYPE_LOAD_BALANCING, lb_channel_args);
1451
+ lb_channel_ =
1452
+ channel_control_helper()->CreateChannel(uri_str, *lb_channel_args);
1277
1453
  GPR_ASSERT(lb_channel_ != nullptr);
1278
1454
  grpc_core::channelz::ChannelNode* channel_node =
1279
1455
  grpc_channel_get_channelz_node(lb_channel_);
@@ -1284,59 +1460,80 @@ void GrpcLb::ProcessChannelArgsLocked(const grpc_channel_args& args) {
1284
1460
  }
1285
1461
  // Propagate updates to the LB channel (pick_first) through the fake
1286
1462
  // resolver.
1287
- response_generator_->SetResponse(lb_channel_args);
1288
- grpc_channel_args_destroy(lb_channel_args);
1463
+ Resolver::Result result;
1464
+ result.addresses = std::move(balancer_addresses);
1465
+ result.args = lb_channel_args;
1466
+ response_generator_->SetResponse(std::move(result));
1289
1467
  }
1290
1468
 
1291
- void GrpcLb::UpdateLocked(const grpc_channel_args& args, grpc_json* lb_config) {
1292
- ProcessChannelArgsLocked(args);
1293
- // Update the existing RR policy.
1294
- if (rr_policy_ != nullptr) CreateOrUpdateRoundRobinPolicyLocked();
1295
- // Start watching the LB channel connectivity for connection, if not
1296
- // already doing so.
1297
- if (!watching_lb_channel_) {
1298
- lb_channel_connectivity_ = grpc_channel_check_connectivity_state(
1299
- lb_channel_, true /* try to connect */);
1300
- grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element(
1301
- grpc_channel_get_channel_stack(lb_channel_));
1302
- GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1303
- watching_lb_channel_ = true;
1304
- // TODO(roth): We currently track this ref manually. Once the
1305
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
1306
- // with the callback.
1307
- auto self = Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity");
1308
- self.release();
1309
- grpc_client_channel_watch_connectivity_state(
1310
- client_channel_elem,
1311
- grpc_polling_entity_create_from_pollset_set(interested_parties()),
1312
- &lb_channel_connectivity_, &lb_channel_on_connectivity_changed_,
1313
- nullptr);
1469
+ void GrpcLb::ParseLbConfig(Config* grpclb_config) {
1470
+ const grpc_json* child_policy = nullptr;
1471
+ if (grpclb_config != nullptr) {
1472
+ const grpc_json* grpclb_config_json = grpclb_config->config();
1473
+ for (const grpc_json* field = grpclb_config_json; field != nullptr;
1474
+ field = field->next) {
1475
+ if (field->key == nullptr) return;
1476
+ if (strcmp(field->key, "childPolicy") == 0) {
1477
+ if (child_policy != nullptr) return; // Duplicate.
1478
+ child_policy = ParseLoadBalancingConfig(field);
1479
+ }
1480
+ }
1481
+ }
1482
+ if (child_policy != nullptr) {
1483
+ child_policy_config_ =
1484
+ MakeRefCounted<Config>(child_policy, grpclb_config->service_config());
1485
+ } else {
1486
+ child_policy_config_.reset();
1314
1487
  }
1315
1488
  }
1316
1489
 
1490
+ void GrpcLb::OnBalancerChannelConnectivityChangedLocked(void* arg,
1491
+ grpc_error* error) {
1492
+ GrpcLb* self = static_cast<GrpcLb*>(arg);
1493
+ if (!self->shutting_down_ && self->fallback_at_startup_checks_pending_) {
1494
+ if (self->lb_channel_connectivity_ != GRPC_CHANNEL_TRANSIENT_FAILURE) {
1495
+ // Not in TRANSIENT_FAILURE. Renew connectivity watch.
1496
+ grpc_channel_element* client_channel_elem =
1497
+ grpc_channel_stack_last_element(
1498
+ grpc_channel_get_channel_stack(self->lb_channel_));
1499
+ GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1500
+ grpc_client_channel_watch_connectivity_state(
1501
+ client_channel_elem,
1502
+ grpc_polling_entity_create_from_pollset_set(
1503
+ self->interested_parties()),
1504
+ &self->lb_channel_connectivity_,
1505
+ &self->lb_channel_on_connectivity_changed_, nullptr);
1506
+ return; // Early out so we don't drop the ref below.
1507
+ }
1508
+ // In TRANSIENT_FAILURE. Cancel the fallback timer and go into
1509
+ // fallback mode immediately.
1510
+ gpr_log(GPR_INFO,
1511
+ "[grpclb %p] balancer channel in state TRANSIENT_FAILURE; "
1512
+ "entering fallback mode",
1513
+ self);
1514
+ self->fallback_at_startup_checks_pending_ = false;
1515
+ grpc_timer_cancel(&self->lb_fallback_timer_);
1516
+ self->fallback_mode_ = true;
1517
+ self->CreateOrUpdateChildPolicyLocked();
1518
+ }
1519
+ // Done watching connectivity state, so drop ref.
1520
+ self->Unref(DEBUG_LOCATION, "watch_lb_channel_connectivity");
1521
+ }
1522
+
1523
+ void GrpcLb::CancelBalancerChannelConnectivityWatchLocked() {
1524
+ grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element(
1525
+ grpc_channel_get_channel_stack(lb_channel_));
1526
+ GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1527
+ grpc_client_channel_watch_connectivity_state(
1528
+ client_channel_elem,
1529
+ grpc_polling_entity_create_from_pollset_set(interested_parties()),
1530
+ nullptr, &lb_channel_on_connectivity_changed_, nullptr);
1531
+ }
1532
+
1317
1533
  //
1318
1534
  // code for balancer channel and call
1319
1535
  //
1320
1536
 
1321
- void GrpcLb::StartPickingLocked() {
1322
- // Start a timer to fall back.
1323
- if (lb_fallback_timeout_ms_ > 0 && serverlist_ == nullptr &&
1324
- !fallback_timer_callback_pending_) {
1325
- grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_;
1326
- // TODO(roth): We currently track this ref manually. Once the
1327
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
1328
- // with the callback.
1329
- auto self = Ref(DEBUG_LOCATION, "on_fallback_timer");
1330
- self.release();
1331
- GRPC_CLOSURE_INIT(&lb_on_fallback_, &GrpcLb::OnFallbackTimerLocked, this,
1332
- grpc_combiner_scheduler(combiner()));
1333
- fallback_timer_callback_pending_ = true;
1334
- grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
1335
- }
1336
- started_picking_ = true;
1337
- StartBalancerCallLocked();
1338
- }
1339
-
1340
1537
  void GrpcLb::StartBalancerCallLocked() {
1341
1538
  GPR_ASSERT(lb_channel_ != nullptr);
1342
1539
  if (shutting_down_) return;
@@ -1351,24 +1548,6 @@ void GrpcLb::StartBalancerCallLocked() {
1351
1548
  lb_calld_->StartQuery();
1352
1549
  }
1353
1550
 
1354
- void GrpcLb::OnFallbackTimerLocked(void* arg, grpc_error* error) {
1355
- GrpcLb* grpclb_policy = static_cast<GrpcLb*>(arg);
1356
- grpclb_policy->fallback_timer_callback_pending_ = false;
1357
- // If we receive a serverlist after the timer fires but before this callback
1358
- // actually runs, don't fall back.
1359
- if (grpclb_policy->serverlist_ == nullptr && !grpclb_policy->shutting_down_ &&
1360
- error == GRPC_ERROR_NONE) {
1361
- if (grpc_lb_glb_trace.enabled()) {
1362
- gpr_log(GPR_INFO,
1363
- "[grpclb %p] Falling back to use backends from resolver",
1364
- grpclb_policy);
1365
- }
1366
- GPR_ASSERT(grpclb_policy->fallback_backend_addresses_ != nullptr);
1367
- grpclb_policy->CreateOrUpdateRoundRobinPolicyLocked();
1368
- }
1369
- grpclb_policy->Unref(DEBUG_LOCATION, "on_fallback_timer");
1370
- }
1371
-
1372
1551
  void GrpcLb::StartBalancerCallRetryTimerLocked() {
1373
1552
  grpc_millis next_try = lb_call_backoff_.NextAttemptTime();
1374
1553
  if (grpc_lb_glb_trace.enabled()) {
@@ -1407,395 +1586,214 @@ void GrpcLb::OnBalancerCallRetryTimerLocked(void* arg, grpc_error* error) {
1407
1586
  grpclb_policy->Unref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
1408
1587
  }
1409
1588
 
1410
- // Invoked as part of the update process. It continues watching the LB channel
1411
- // until it shuts down or becomes READY. It's invoked even if the LB channel
1412
- // stayed READY throughout the update (for example if the update is identical).
1413
- void GrpcLb::OnBalancerChannelConnectivityChangedLocked(void* arg,
1414
- grpc_error* error) {
1415
- GrpcLb* grpclb_policy = static_cast<GrpcLb*>(arg);
1416
- if (grpclb_policy->shutting_down_) goto done;
1417
- // Re-initialize the lb_call. This should also take care of updating the
1418
- // embedded RR policy. Note that the current RR policy, if any, will stay in
1419
- // effect until an update from the new lb_call is received.
1420
- switch (grpclb_policy->lb_channel_connectivity_) {
1421
- case GRPC_CHANNEL_CONNECTING:
1422
- case GRPC_CHANNEL_TRANSIENT_FAILURE: {
1423
- // Keep watching the LB channel.
1424
- grpc_channel_element* client_channel_elem =
1425
- grpc_channel_stack_last_element(
1426
- grpc_channel_get_channel_stack(grpclb_policy->lb_channel_));
1427
- GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
1428
- grpc_client_channel_watch_connectivity_state(
1429
- client_channel_elem,
1430
- grpc_polling_entity_create_from_pollset_set(
1431
- grpclb_policy->interested_parties()),
1432
- &grpclb_policy->lb_channel_connectivity_,
1433
- &grpclb_policy->lb_channel_on_connectivity_changed_, nullptr);
1434
- break;
1435
- }
1436
- // The LB channel may be IDLE because it's shut down before the update.
1437
- // Restart the LB call to kick the LB channel into gear.
1438
- case GRPC_CHANNEL_IDLE:
1439
- case GRPC_CHANNEL_READY:
1440
- grpclb_policy->lb_calld_.reset();
1441
- if (grpclb_policy->started_picking_) {
1442
- if (grpclb_policy->retry_timer_callback_pending_) {
1443
- grpc_timer_cancel(&grpclb_policy->lb_call_retry_timer_);
1444
- }
1445
- grpclb_policy->lb_call_backoff_.Reset();
1446
- grpclb_policy->StartBalancerCallLocked();
1447
- }
1448
- // fallthrough
1449
- case GRPC_CHANNEL_SHUTDOWN:
1450
- done:
1451
- grpclb_policy->watching_lb_channel_ = false;
1452
- grpclb_policy->Unref(DEBUG_LOCATION,
1453
- "watch_lb_channel_connectivity_cb_shutdown");
1454
- }
1455
- }
1456
-
1457
1589
  //
1458
- // PendingPick
1590
+ // code for handling fallback mode
1459
1591
  //
1460
1592
 
1461
- // Adds lb_token of selected subchannel (address) to the call's initial
1462
- // metadata.
1463
- grpc_error* AddLbTokenToInitialMetadata(
1464
- grpc_mdelem lb_token, grpc_linked_mdelem* lb_token_mdelem_storage,
1465
- grpc_metadata_batch* initial_metadata) {
1466
- GPR_ASSERT(lb_token_mdelem_storage != nullptr);
1467
- GPR_ASSERT(!GRPC_MDISNULL(lb_token));
1468
- return grpc_metadata_batch_add_tail(initial_metadata, lb_token_mdelem_storage,
1469
- lb_token);
1470
- }
1471
-
1472
- // Destroy function used when embedding client stats in call context.
1473
- void DestroyClientStats(void* arg) {
1474
- static_cast<GrpcLbClientStats*>(arg)->Unref();
1475
- }
1476
-
1477
- void GrpcLb::PendingPickSetMetadataAndContext(PendingPick* pp) {
1478
- // If connected_subchannel is nullptr, no pick has been made by the RR
1479
- // policy (e.g., all addresses failed to connect). There won't be any
1480
- // LB token available.
1481
- if (pp->pick->connected_subchannel != nullptr) {
1482
- const grpc_arg* arg =
1483
- grpc_channel_args_find(pp->pick->connected_subchannel->args(),
1484
- GRPC_ARG_GRPCLB_ADDRESS_LB_TOKEN);
1485
- if (arg != nullptr) {
1486
- grpc_mdelem lb_token = {
1487
- reinterpret_cast<uintptr_t>(arg->value.pointer.p)};
1488
- AddLbTokenToInitialMetadata(GRPC_MDELEM_REF(lb_token),
1489
- &pp->pick->lb_token_mdelem_storage,
1490
- pp->pick->initial_metadata);
1491
- } else {
1492
- gpr_log(GPR_ERROR,
1493
- "[grpclb %p] No LB token for connected subchannel pick %p",
1494
- pp->grpclb_policy, pp->pick);
1495
- abort();
1496
- }
1497
- // Pass on client stats via context. Passes ownership of the reference.
1498
- if (pp->client_stats != nullptr) {
1499
- pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].value =
1500
- pp->client_stats.release();
1501
- pp->pick->subchannel_call_context[GRPC_GRPCLB_CLIENT_STATS].destroy =
1502
- DestroyClientStats;
1503
- }
1504
- } else {
1505
- pp->client_stats.reset();
1593
+ void GrpcLb::MaybeEnterFallbackModeAfterStartup() {
1594
+ // Enter fallback mode if all of the following are true:
1595
+ // - We are not currently in fallback mode.
1596
+ // - We are not currently waiting for the initial fallback timeout.
1597
+ // - We are not currently in contact with the balancer.
1598
+ // - The child policy is not in state READY.
1599
+ if (!fallback_mode_ && !fallback_at_startup_checks_pending_ &&
1600
+ (lb_calld_ == nullptr || !lb_calld_->seen_serverlist()) &&
1601
+ !child_policy_ready_) {
1602
+ gpr_log(GPR_INFO,
1603
+ "[grpclb %p] lost contact with balancer and backends from "
1604
+ "most recent serverlist; entering fallback mode",
1605
+ this);
1606
+ fallback_mode_ = true;
1607
+ CreateOrUpdateChildPolicyLocked();
1506
1608
  }
1507
1609
  }
1508
1610
 
1509
- /* The \a on_complete closure passed as part of the pick requires keeping a
1510
- * reference to its associated round robin instance. We wrap this closure in
1511
- * order to unref the round robin instance upon its invocation */
1512
- void GrpcLb::OnPendingPickComplete(void* arg, grpc_error* error) {
1513
- PendingPick* pp = static_cast<PendingPick*>(arg);
1514
- PendingPickSetMetadataAndContext(pp);
1515
- GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_REF(error));
1516
- Delete(pp);
1517
- }
1518
-
1519
- GrpcLb::PendingPick* GrpcLb::PendingPickCreate(PickState* pick) {
1520
- PendingPick* pp = New<PendingPick>();
1521
- pp->grpclb_policy = this;
1522
- pp->pick = pick;
1523
- GRPC_CLOSURE_INIT(&pp->on_complete, &GrpcLb::OnPendingPickComplete, pp,
1524
- grpc_schedule_on_exec_ctx);
1525
- pp->original_on_complete = pick->on_complete;
1526
- pick->on_complete = &pp->on_complete;
1527
- return pp;
1528
- }
1529
-
1530
- void GrpcLb::AddPendingPick(PendingPick* pp) {
1531
- pp->next = pending_picks_;
1532
- pending_picks_ = pp;
1611
+ void GrpcLb::OnFallbackTimerLocked(void* arg, grpc_error* error) {
1612
+ GrpcLb* grpclb_policy = static_cast<GrpcLb*>(arg);
1613
+ // If we receive a serverlist after the timer fires but before this callback
1614
+ // actually runs, don't fall back.
1615
+ if (grpclb_policy->fallback_at_startup_checks_pending_ &&
1616
+ !grpclb_policy->shutting_down_ && error == GRPC_ERROR_NONE) {
1617
+ gpr_log(GPR_INFO,
1618
+ "[grpclb %p] No response from balancer after fallback timeout; "
1619
+ "entering fallback mode",
1620
+ grpclb_policy);
1621
+ grpclb_policy->fallback_at_startup_checks_pending_ = false;
1622
+ grpclb_policy->CancelBalancerChannelConnectivityWatchLocked();
1623
+ grpclb_policy->fallback_mode_ = true;
1624
+ grpclb_policy->CreateOrUpdateChildPolicyLocked();
1625
+ }
1626
+ grpclb_policy->Unref(DEBUG_LOCATION, "on_fallback_timer");
1533
1627
  }
1534
1628
 
1535
1629
  //
1536
- // code for interacting with the RR policy
1630
+ // code for interacting with the child policy
1537
1631
  //
1538
1632
 
1539
- // Performs a pick over \a rr_policy_. Given that a pick can return
1540
- // immediately (ignoring its completion callback), we need to perform the
1541
- // cleanups this callback would otherwise be responsible for.
1542
- // If \a force_async is true, then we will manually schedule the
1543
- // completion callback even if the pick is available immediately.
1544
- bool GrpcLb::PickFromRoundRobinPolicyLocked(bool force_async, PendingPick* pp,
1545
- grpc_error** error) {
1546
- // Check for drops if we are not using fallback backend addresses.
1547
- if (serverlist_ != nullptr && serverlist_->num_servers > 0) {
1548
- // Look at the index into the serverlist to see if we should drop this call.
1549
- grpc_grpclb_server* server = serverlist_->servers[serverlist_index_++];
1550
- if (serverlist_index_ == serverlist_->num_servers) {
1551
- serverlist_index_ = 0; // Wrap-around.
1552
- }
1553
- if (server->drop) {
1554
- // Update client load reporting stats to indicate the number of
1555
- // dropped calls. Note that we have to do this here instead of in
1556
- // the client_load_reporting filter, because we do not create a
1557
- // subchannel call (and therefore no client_load_reporting filter)
1558
- // for dropped calls.
1559
- if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
1560
- lb_calld_->client_stats()->AddCallDroppedLocked(
1561
- server->load_balance_token);
1562
- }
1563
- if (force_async) {
1564
- GRPC_CLOSURE_SCHED(pp->original_on_complete, GRPC_ERROR_NONE);
1565
- Delete(pp);
1566
- return false;
1567
- }
1568
- Delete(pp);
1569
- return true;
1570
- }
1571
- }
1572
- // Set client_stats.
1573
- if (lb_calld_ != nullptr && lb_calld_->client_stats() != nullptr) {
1574
- pp->client_stats = lb_calld_->client_stats()->Ref();
1575
- }
1576
- // Pick via the RR policy.
1577
- bool pick_done = rr_policy_->PickLocked(pp->pick, error);
1578
- if (pick_done) {
1579
- PendingPickSetMetadataAndContext(pp);
1580
- if (force_async) {
1581
- GRPC_CLOSURE_SCHED(pp->original_on_complete, *error);
1582
- *error = GRPC_ERROR_NONE;
1583
- pick_done = false;
1584
- }
1585
- Delete(pp);
1633
+ grpc_channel_args* GrpcLb::CreateChildPolicyArgsLocked(
1634
+ bool is_backend_from_grpclb_load_balancer) {
1635
+ grpc_arg args_to_add[2] = {
1636
+ // A channel arg indicating if the target is a backend inferred from a
1637
+ // grpclb load balancer.
1638
+ grpc_channel_arg_integer_create(
1639
+ const_cast<char*>(
1640
+ GRPC_ARG_ADDRESS_IS_BACKEND_FROM_GRPCLB_LOAD_BALANCER),
1641
+ is_backend_from_grpclb_load_balancer),
1642
+ };
1643
+ size_t num_args_to_add = 1;
1644
+ if (is_backend_from_grpclb_load_balancer) {
1645
+ args_to_add[num_args_to_add++] = grpc_channel_arg_integer_create(
1646
+ const_cast<char*>(GRPC_ARG_INHIBIT_HEALTH_CHECKING), 1);
1586
1647
  }
1587
- // else, the pending pick will be registered and taken care of by the
1588
- // pending pick list inside the RR policy. Eventually,
1589
- // OnPendingPickComplete() will be called, which will (among other
1590
- // things) add the LB token to the call's initial metadata.
1591
- return pick_done;
1648
+ return grpc_channel_args_copy_and_add(args_, args_to_add, num_args_to_add);
1592
1649
  }
1593
1650
 
1594
- void GrpcLb::CreateRoundRobinPolicyLocked(Args args) {
1595
- GPR_ASSERT(rr_policy_ == nullptr);
1596
- rr_policy_ = LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
1597
- "round_robin", std::move(args));
1598
- if (GPR_UNLIKELY(rr_policy_ == nullptr)) {
1599
- gpr_log(GPR_ERROR, "[grpclb %p] Failure creating a RoundRobin policy",
1600
- this);
1601
- return;
1602
- }
1651
+ OrphanablePtr<LoadBalancingPolicy> GrpcLb::CreateChildPolicyLocked(
1652
+ const char* name, const grpc_channel_args* args) {
1653
+ Helper* helper = New<Helper>(Ref());
1654
+ LoadBalancingPolicy::Args lb_policy_args;
1655
+ lb_policy_args.combiner = combiner();
1656
+ lb_policy_args.args = args;
1657
+ lb_policy_args.channel_control_helper =
1658
+ UniquePtr<ChannelControlHelper>(helper);
1659
+ OrphanablePtr<LoadBalancingPolicy> lb_policy =
1660
+ LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
1661
+ name, std::move(lb_policy_args));
1662
+ if (GPR_UNLIKELY(lb_policy == nullptr)) {
1663
+ gpr_log(GPR_ERROR, "[grpclb %p] Failure creating child policy %s", this,
1664
+ name);
1665
+ return nullptr;
1666
+ }
1667
+ helper->set_child(lb_policy.get());
1603
1668
  if (grpc_lb_glb_trace.enabled()) {
1604
- gpr_log(GPR_INFO, "[grpclb %p] Created new RR policy %p", this,
1605
- rr_policy_.get());
1669
+ gpr_log(GPR_INFO, "[grpclb %p] Created new child policy %s (%p)", this,
1670
+ name, lb_policy.get());
1606
1671
  }
1607
- // TODO(roth): We currently track this ref manually. Once the new
1608
- // ClosureRef API is done, pass the RefCountedPtr<> along with the closure.
1609
- auto self = Ref(DEBUG_LOCATION, "on_rr_reresolution_requested");
1610
- self.release();
1611
- rr_policy_->SetReresolutionClosureLocked(&on_rr_request_reresolution_);
1612
- grpc_error* rr_state_error = nullptr;
1613
- rr_connectivity_state_ = rr_policy_->CheckConnectivityLocked(&rr_state_error);
1614
- // Connectivity state is a function of the RR policy updated/created.
1615
- UpdateConnectivityStateFromRoundRobinPolicyLocked(rr_state_error);
1616
1672
  // Add the gRPC LB's interested_parties pollset_set to that of the newly
1617
- // created RR policy. This will make the RR policy progress upon activity on
1618
- // gRPC LB, which in turn is tied to the application's call.
1619
- grpc_pollset_set_add_pollset_set(rr_policy_->interested_parties(),
1673
+ // created child policy. This will make the child policy progress upon
1674
+ // activity on gRPC LB, which in turn is tied to the application's call.
1675
+ grpc_pollset_set_add_pollset_set(lb_policy->interested_parties(),
1620
1676
  interested_parties());
1621
- // Subscribe to changes to the connectivity of the new RR.
1622
- // TODO(roth): We currently track this ref manually. Once the new
1623
- // ClosureRef API is done, pass the RefCountedPtr<> along with the closure.
1624
- self = Ref(DEBUG_LOCATION, "on_rr_connectivity_changed");
1625
- self.release();
1626
- rr_policy_->NotifyOnStateChangeLocked(&rr_connectivity_state_,
1627
- &on_rr_connectivity_changed_);
1628
- rr_policy_->ExitIdleLocked();
1629
- // Send pending picks to RR policy.
1630
- PendingPick* pp;
1631
- while ((pp = pending_picks_)) {
1632
- pending_picks_ = pp->next;
1633
- if (grpc_lb_glb_trace.enabled()) {
1634
- gpr_log(GPR_INFO,
1635
- "[grpclb %p] Pending pick about to (async) PICK from RR %p", this,
1636
- rr_policy_.get());
1637
- }
1638
- grpc_error* error = GRPC_ERROR_NONE;
1639
- PickFromRoundRobinPolicyLocked(true /* force_async */, pp, &error);
1640
- }
1677
+ return lb_policy;
1641
1678
  }
1642
1679
 
1643
- grpc_channel_args* GrpcLb::CreateRoundRobinPolicyArgsLocked() {
1644
- ServerAddressList tmp_addresses;
1645
- ServerAddressList* addresses = &tmp_addresses;
1680
+ void GrpcLb::CreateOrUpdateChildPolicyLocked() {
1681
+ if (shutting_down_) return;
1682
+ // Construct update args.
1683
+ UpdateArgs update_args;
1646
1684
  bool is_backend_from_grpclb_load_balancer = false;
1647
- if (serverlist_ != nullptr) {
1648
- tmp_addresses = ProcessServerlist(serverlist_);
1649
- is_backend_from_grpclb_load_balancer = true;
1650
- } else {
1651
- // If CreateOrUpdateRoundRobinPolicyLocked() is invoked when we haven't
1685
+ if (fallback_mode_) {
1686
+ // If CreateOrUpdateChildPolicyLocked() is invoked when we haven't
1652
1687
  // received any serverlist from the balancer, we use the fallback backends
1653
1688
  // returned by the resolver. Note that the fallback backend list may be
1654
1689
  // empty, in which case the new round_robin policy will keep the requested
1655
1690
  // picks pending.
1656
- GPR_ASSERT(fallback_backend_addresses_ != nullptr);
1657
- addresses = fallback_backend_addresses_.get();
1658
- }
1659
- GPR_ASSERT(addresses != nullptr);
1660
- // Replace the server address list in the channel args that we pass down to
1661
- // the subchannel.
1662
- static const char* keys_to_remove[] = {GRPC_ARG_SERVER_ADDRESS_LIST};
1663
- grpc_arg args_to_add[3] = {
1664
- CreateServerAddressListChannelArg(addresses),
1665
- // A channel arg indicating if the target is a backend inferred from a
1666
- // grpclb load balancer.
1667
- grpc_channel_arg_integer_create(
1668
- const_cast<char*>(
1669
- GRPC_ARG_ADDRESS_IS_BACKEND_FROM_GRPCLB_LOAD_BALANCER),
1670
- is_backend_from_grpclb_load_balancer),
1671
- };
1672
- size_t num_args_to_add = 2;
1673
- if (is_backend_from_grpclb_load_balancer) {
1674
- args_to_add[2] = grpc_channel_arg_integer_create(
1675
- const_cast<char*>(GRPC_ARG_INHIBIT_HEALTH_CHECKING), 1);
1676
- ++num_args_to_add;
1691
+ update_args.addresses = fallback_backend_addresses_;
1692
+ } else {
1693
+ update_args.addresses = serverlist_->GetServerAddressList(
1694
+ lb_calld_ == nullptr ? nullptr : lb_calld_->client_stats());
1695
+ is_backend_from_grpclb_load_balancer = true;
1677
1696
  }
1678
- grpc_channel_args* args = grpc_channel_args_copy_and_add_and_remove(
1679
- args_, keys_to_remove, GPR_ARRAY_SIZE(keys_to_remove), args_to_add,
1680
- num_args_to_add);
1681
- return args;
1682
- }
1683
-
1684
- void GrpcLb::CreateOrUpdateRoundRobinPolicyLocked() {
1685
- if (shutting_down_) return;
1686
- grpc_channel_args* args = CreateRoundRobinPolicyArgsLocked();
1687
- GPR_ASSERT(args != nullptr);
1688
- if (rr_policy_ != nullptr) {
1697
+ update_args.args =
1698
+ CreateChildPolicyArgsLocked(is_backend_from_grpclb_load_balancer);
1699
+ GPR_ASSERT(update_args.args != nullptr);
1700
+ update_args.config = child_policy_config_;
1701
+ // If the child policy name changes, we need to create a new child
1702
+ // policy. When this happens, we leave child_policy_ as-is and store
1703
+ // the new child policy in pending_child_policy_. Once the new child
1704
+ // policy transitions into state READY, we swap it into child_policy_,
1705
+ // replacing the original child policy. So pending_child_policy_ is
1706
+ // non-null only between when we apply an update that changes the child
1707
+ // policy name and when the new child reports state READY.
1708
+ //
1709
+ // Updates can arrive at any point during this transition. We always
1710
+ // apply updates relative to the most recently created child policy,
1711
+ // even if the most recent one is still in pending_child_policy_. This
1712
+ // is true both when applying the updates to an existing child policy
1713
+ // and when determining whether we need to create a new policy.
1714
+ //
1715
+ // As a result of this, there are several cases to consider here:
1716
+ //
1717
+ // 1. We have no existing child policy (i.e., we have started up but
1718
+ // have not yet received a serverlist from the balancer or gone
1719
+ // into fallback mode; in this case, both child_policy_ and
1720
+ // pending_child_policy_ are null). In this case, we create a
1721
+ // new child policy and store it in child_policy_.
1722
+ //
1723
+ // 2. We have an existing child policy and have no pending child policy
1724
+ // from a previous update (i.e., either there has not been a
1725
+ // previous update that changed the policy name, or we have already
1726
+ // finished swapping in the new policy; in this case, child_policy_
1727
+ // is non-null but pending_child_policy_ is null). In this case:
1728
+ // a. If child_policy_->name() equals child_policy_name, then we
1729
+ // update the existing child policy.
1730
+ // b. If child_policy_->name() does not equal child_policy_name,
1731
+ // we create a new policy. The policy will be stored in
1732
+ // pending_child_policy_ and will later be swapped into
1733
+ // child_policy_ by the helper when the new child transitions
1734
+ // into state READY.
1735
+ //
1736
+ // 3. We have an existing child policy and have a pending child policy
1737
+ // from a previous update (i.e., a previous update set
1738
+ // pending_child_policy_ as per case 2b above and that policy has
1739
+ // not yet transitioned into state READY and been swapped into
1740
+ // child_policy_; in this case, both child_policy_ and
1741
+ // pending_child_policy_ are non-null). In this case:
1742
+ // a. If pending_child_policy_->name() equals child_policy_name,
1743
+ // then we update the existing pending child policy.
1744
+ // b. If pending_child_policy->name() does not equal
1745
+ // child_policy_name, then we create a new policy. The new
1746
+ // policy is stored in pending_child_policy_ (replacing the one
1747
+ // that was there before, which will be immediately shut down)
1748
+ // and will later be swapped into child_policy_ by the helper
1749
+ // when the new child transitions into state READY.
1750
+ const char* child_policy_name = child_policy_config_ == nullptr
1751
+ ? "round_robin"
1752
+ : child_policy_config_->name();
1753
+ const bool create_policy =
1754
+ // case 1
1755
+ child_policy_ == nullptr ||
1756
+ // case 2b
1757
+ (pending_child_policy_ == nullptr &&
1758
+ strcmp(child_policy_->name(), child_policy_name) != 0) ||
1759
+ // case 3b
1760
+ (pending_child_policy_ != nullptr &&
1761
+ strcmp(pending_child_policy_->name(), child_policy_name) != 0);
1762
+ LoadBalancingPolicy* policy_to_update = nullptr;
1763
+ if (create_policy) {
1764
+ // Cases 1, 2b, and 3b: create a new child policy.
1765
+ // If child_policy_ is null, we set it (case 1), else we set
1766
+ // pending_child_policy_ (cases 2b and 3b).
1689
1767
  if (grpc_lb_glb_trace.enabled()) {
1690
- gpr_log(GPR_INFO, "[grpclb %p] Updating RR policy %p", this,
1691
- rr_policy_.get());
1768
+ gpr_log(GPR_INFO, "[grpclb %p] Creating new %schild policy %s", this,
1769
+ child_policy_ == nullptr ? "" : "pending ", child_policy_name);
1770
+ }
1771
+ auto new_policy =
1772
+ CreateChildPolicyLocked(child_policy_name, update_args.args);
1773
+ // Swap the policy into place.
1774
+ auto& lb_policy =
1775
+ child_policy_ == nullptr ? child_policy_ : pending_child_policy_;
1776
+ {
1777
+ MutexLock lock(&child_policy_mu_);
1778
+ lb_policy = std::move(new_policy);
1692
1779
  }
1693
- rr_policy_->UpdateLocked(*args, nullptr);
1780
+ policy_to_update = lb_policy.get();
1694
1781
  } else {
1695
- LoadBalancingPolicy::Args lb_policy_args;
1696
- lb_policy_args.combiner = combiner();
1697
- lb_policy_args.client_channel_factory = client_channel_factory();
1698
- lb_policy_args.args = args;
1699
- lb_policy_args.subchannel_pool = subchannel_pool()->Ref();
1700
- CreateRoundRobinPolicyLocked(std::move(lb_policy_args));
1701
- }
1702
- grpc_channel_args_destroy(args);
1703
- }
1704
-
1705
- void GrpcLb::OnRoundRobinRequestReresolutionLocked(void* arg,
1706
- grpc_error* error) {
1707
- GrpcLb* grpclb_policy = static_cast<GrpcLb*>(arg);
1708
- if (grpclb_policy->shutting_down_ || error != GRPC_ERROR_NONE) {
1709
- grpclb_policy->Unref(DEBUG_LOCATION, "on_rr_reresolution_requested");
1710
- return;
1711
- }
1712
- if (grpc_lb_glb_trace.enabled()) {
1713
- gpr_log(
1714
- GPR_INFO,
1715
- "[grpclb %p] Re-resolution requested from the internal RR policy (%p).",
1716
- grpclb_policy, grpclb_policy->rr_policy_.get());
1717
- }
1718
- // If we are talking to a balancer, we expect to get updated addresses form
1719
- // the balancer, so we can ignore the re-resolution request from the RR
1720
- // policy. Otherwise, handle the re-resolution request using the
1721
- // grpclb policy's original re-resolution closure.
1722
- if (grpclb_policy->lb_calld_ == nullptr ||
1723
- !grpclb_policy->lb_calld_->seen_initial_response()) {
1724
- grpclb_policy->TryReresolutionLocked(&grpc_lb_glb_trace, GRPC_ERROR_NONE);
1725
- }
1726
- // Give back the wrapper closure to the RR policy.
1727
- grpclb_policy->rr_policy_->SetReresolutionClosureLocked(
1728
- &grpclb_policy->on_rr_request_reresolution_);
1729
- }
1730
-
1731
- void GrpcLb::UpdateConnectivityStateFromRoundRobinPolicyLocked(
1732
- grpc_error* rr_state_error) {
1733
- const grpc_connectivity_state curr_glb_state =
1734
- grpc_connectivity_state_check(&state_tracker_);
1735
- /* The new connectivity status is a function of the previous one and the new
1736
- * input coming from the status of the RR policy.
1737
- *
1738
- * current state (grpclb's)
1739
- * |
1740
- * v || I | C | R | TF | SD | <- new state (RR's)
1741
- * ===++====+=====+=====+======+======+
1742
- * I || I | C | R | [I] | [I] |
1743
- * ---++----+-----+-----+------+------+
1744
- * C || I | C | R | [C] | [C] |
1745
- * ---++----+-----+-----+------+------+
1746
- * R || I | C | R | [R] | [R] |
1747
- * ---++----+-----+-----+------+------+
1748
- * TF || I | C | R | [TF] | [TF] |
1749
- * ---++----+-----+-----+------+------+
1750
- * SD || NA | NA | NA | NA | NA | (*)
1751
- * ---++----+-----+-----+------+------+
1752
- *
1753
- * A [STATE] indicates that the old RR policy is kept. In those cases, STATE
1754
- * is the current state of grpclb, which is left untouched.
1755
- *
1756
- * In summary, if the new state is TRANSIENT_FAILURE or SHUTDOWN, stick to
1757
- * the previous RR instance.
1758
- *
1759
- * Note that the status is never updated to SHUTDOWN as a result of calling
1760
- * this function. Only glb_shutdown() has the power to set that state.
1761
- *
1762
- * (*) This function mustn't be called during shutting down. */
1763
- GPR_ASSERT(curr_glb_state != GRPC_CHANNEL_SHUTDOWN);
1764
- switch (rr_connectivity_state_) {
1765
- case GRPC_CHANNEL_TRANSIENT_FAILURE:
1766
- case GRPC_CHANNEL_SHUTDOWN:
1767
- GPR_ASSERT(rr_state_error != GRPC_ERROR_NONE);
1768
- break;
1769
- case GRPC_CHANNEL_IDLE:
1770
- case GRPC_CHANNEL_CONNECTING:
1771
- case GRPC_CHANNEL_READY:
1772
- GPR_ASSERT(rr_state_error == GRPC_ERROR_NONE);
1773
- }
1782
+ // Cases 2a and 3a: update an existing policy.
1783
+ // If we have a pending child policy, send the update to the pending
1784
+ // policy (case 3a), else send it to the current policy (case 2a).
1785
+ policy_to_update = pending_child_policy_ != nullptr
1786
+ ? pending_child_policy_.get()
1787
+ : child_policy_.get();
1788
+ }
1789
+ GPR_ASSERT(policy_to_update != nullptr);
1790
+ // Update the policy.
1774
1791
  if (grpc_lb_glb_trace.enabled()) {
1775
- gpr_log(
1776
- GPR_INFO,
1777
- "[grpclb %p] Setting grpclb's state to %s from new RR policy %p state.",
1778
- this, grpc_connectivity_state_name(rr_connectivity_state_),
1779
- rr_policy_.get());
1792
+ gpr_log(GPR_INFO, "[grpclb %p] Updating %schild policy %p", this,
1793
+ policy_to_update == pending_child_policy_.get() ? "pending " : "",
1794
+ policy_to_update);
1780
1795
  }
1781
- grpc_connectivity_state_set(&state_tracker_, rr_connectivity_state_,
1782
- rr_state_error,
1783
- "update_lb_connectivity_status_locked");
1784
- }
1785
-
1786
- void GrpcLb::OnRoundRobinConnectivityChangedLocked(void* arg,
1787
- grpc_error* error) {
1788
- GrpcLb* grpclb_policy = static_cast<GrpcLb*>(arg);
1789
- if (grpclb_policy->shutting_down_) {
1790
- grpclb_policy->Unref(DEBUG_LOCATION, "on_rr_connectivity_changed");
1791
- return;
1792
- }
1793
- grpclb_policy->UpdateConnectivityStateFromRoundRobinPolicyLocked(
1794
- GRPC_ERROR_REF(error));
1795
- // Resubscribe. Reuse the "on_rr_connectivity_changed" ref.
1796
- grpclb_policy->rr_policy_->NotifyOnStateChangeLocked(
1797
- &grpclb_policy->rr_connectivity_state_,
1798
- &grpclb_policy->on_rr_connectivity_changed_);
1796
+ policy_to_update->UpdateLocked(std::move(update_args));
1799
1797
  }
1800
1798
 
1801
1799
  //
@@ -1806,18 +1804,6 @@ class GrpcLbFactory : public LoadBalancingPolicyFactory {
1806
1804
  public:
1807
1805
  OrphanablePtr<LoadBalancingPolicy> CreateLoadBalancingPolicy(
1808
1806
  LoadBalancingPolicy::Args args) const override {
1809
- /* Count the number of gRPC-LB addresses. There must be at least one. */
1810
- const ServerAddressList* addresses =
1811
- FindServerAddressListChannelArg(args.args);
1812
- if (addresses == nullptr) return nullptr;
1813
- bool found_balancer = false;
1814
- for (size_t i = 0; i < addresses->size(); ++i) {
1815
- if ((*addresses)[i].IsBalancer()) {
1816
- found_balancer = true;
1817
- break;
1818
- }
1819
- }
1820
- if (!found_balancer) return nullptr;
1821
1807
  return OrphanablePtr<LoadBalancingPolicy>(New<GrpcLb>(std::move(args)));
1822
1808
  }
1823
1809