grpc 1.20.0 → 1.21.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (209) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +500 -29
  3. data/etc/roots.pem +146 -0
  4. data/include/grpc/grpc_security.h +1 -1
  5. data/include/grpc/impl/codegen/grpc_types.h +10 -7
  6. data/include/grpc/impl/codegen/port_platform.h +11 -1
  7. data/include/grpc/impl/codegen/slice.h +1 -21
  8. data/include/grpc/impl/codegen/status.h +2 -1
  9. data/include/grpc/slice.h +1 -1
  10. data/src/core/ext/filters/client_channel/backup_poller.cc +19 -13
  11. data/src/core/ext/filters/client_channel/backup_poller.h +3 -0
  12. data/src/core/ext/filters/client_channel/channel_connectivity.cc +1 -1
  13. data/src/core/ext/filters/client_channel/client_channel.cc +2084 -1673
  14. data/src/core/ext/filters/client_channel/client_channel_channelz.cc +2 -3
  15. data/src/core/ext/filters/client_channel/client_channel_plugin.cc +4 -0
  16. data/src/core/ext/filters/client_channel/health/health_check_client.cc +54 -49
  17. data/src/core/ext/filters/client_channel/health/health_check_client.h +20 -9
  18. data/src/core/ext/filters/client_channel/http_connect_handshaker.cc +1 -2
  19. data/src/core/ext/filters/client_channel/http_connect_handshaker.h +1 -1
  20. data/src/core/ext/filters/client_channel/lb_policy.cc +3 -30
  21. data/src/core/ext/filters/client_channel/lb_policy.h +16 -25
  22. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +106 -81
  23. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc +6 -2
  24. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h +8 -12
  25. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +2 -2
  26. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +1 -1
  27. data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +57 -49
  28. data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +47 -41
  29. data/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +24 -20
  30. data/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc +989 -284
  31. data/src/core/ext/filters/client_channel/lb_policy_factory.h +4 -1
  32. data/src/core/ext/filters/client_channel/lb_policy_registry.cc +105 -2
  33. data/src/core/ext/filters/client_channel/lb_policy_registry.h +9 -2
  34. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +79 -36
  35. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.cc +84 -2
  36. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.h +3 -0
  37. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_libuv.cc +179 -0
  38. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc +15 -3
  39. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +80 -4
  40. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h +7 -13
  41. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc +2 -2
  42. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_libuv.cc +39 -0
  43. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_posix.cc +0 -6
  44. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc +2 -64
  45. data/src/core/ext/filters/client_channel/resolver/dns/dns_resolver_selection.cc +28 -0
  46. data/src/core/ext/filters/client_channel/resolver/dns/dns_resolver_selection.h +29 -0
  47. data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +4 -4
  48. data/src/core/ext/filters/client_channel/resolver_result_parsing.cc +367 -232
  49. data/src/core/ext/filters/client_channel/resolver_result_parsing.h +55 -76
  50. data/src/core/ext/filters/client_channel/resolving_lb_policy.cc +50 -39
  51. data/src/core/ext/filters/client_channel/resolving_lb_policy.h +18 -12
  52. data/src/core/ext/filters/client_channel/service_config.cc +247 -27
  53. data/src/core/ext/filters/client_channel/service_config.h +119 -166
  54. data/src/core/ext/filters/client_channel/subchannel.cc +46 -84
  55. data/src/core/ext/filters/client_channel/subchannel.h +7 -7
  56. data/src/core/ext/filters/deadline/deadline_filter.cc +3 -4
  57. data/src/core/ext/filters/deadline/deadline_filter.h +3 -2
  58. data/src/core/ext/filters/http/client/http_client_filter.cc +7 -5
  59. data/src/core/ext/filters/http/client/http_client_filter.h +1 -1
  60. data/src/core/ext/filters/http/client_authority_filter.cc +1 -1
  61. data/src/core/ext/filters/http/message_compress/message_compress_filter.cc +4 -3
  62. data/src/core/ext/filters/http/server/http_server_filter.cc +18 -12
  63. data/src/core/ext/filters/message_size/message_size_filter.cc +118 -76
  64. data/src/core/ext/filters/message_size/message_size_filter.h +33 -0
  65. data/src/core/ext/transport/chttp2/alpn/alpn.h +1 -1
  66. data/src/core/ext/transport/chttp2/transport/chttp2_plugin.cc +9 -7
  67. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +93 -60
  68. data/src/core/ext/transport/chttp2/transport/flow_control.h +1 -1
  69. data/src/core/ext/transport/chttp2/transport/frame_settings.cc +4 -3
  70. data/src/core/ext/transport/chttp2/transport/hpack_encoder.cc +3 -3
  71. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +8 -2
  72. data/src/core/ext/transport/chttp2/transport/hpack_table.cc +2 -2
  73. data/src/core/ext/transport/chttp2/transport/incoming_metadata.cc +1 -1
  74. data/src/core/ext/transport/chttp2/transport/incoming_metadata.h +3 -2
  75. data/src/core/ext/transport/chttp2/transport/internal.h +35 -23
  76. data/src/core/ext/transport/chttp2/transport/parsing.cc +4 -4
  77. data/src/core/ext/transport/chttp2/transport/stream_lists.cc +3 -3
  78. data/src/core/ext/transport/chttp2/transport/writing.cc +61 -27
  79. data/src/core/ext/transport/inproc/inproc_transport.cc +18 -18
  80. data/src/core/lib/channel/channel_args.cc +0 -101
  81. data/src/core/lib/channel/channel_args.h +0 -37
  82. data/src/core/lib/channel/channel_stack.h +9 -5
  83. data/src/core/lib/channel/channelz_registry.cc +1 -1
  84. data/src/core/lib/channel/connected_channel.cc +2 -2
  85. data/src/core/lib/channel/context.h +3 -0
  86. data/src/core/lib/channel/handshaker.cc +4 -4
  87. data/src/core/lib/channel/handshaker.h +1 -1
  88. data/src/core/lib/compression/compression_args.cc +127 -0
  89. data/src/core/lib/compression/compression_args.h +55 -0
  90. data/src/core/lib/debug/trace.cc +13 -7
  91. data/src/core/lib/debug/trace.h +12 -0
  92. data/src/core/lib/gpr/arena.h +13 -9
  93. data/src/core/lib/gpr/env.h +2 -5
  94. data/src/core/lib/gpr/env_linux.cc +6 -1
  95. data/src/core/lib/gpr/env_posix.cc +5 -0
  96. data/src/core/lib/gpr/env_windows.cc +7 -5
  97. data/src/core/lib/gpr/log.cc +9 -13
  98. data/src/core/lib/gpr/string.cc +12 -6
  99. data/src/core/lib/gpr/string.h +4 -2
  100. data/src/core/lib/gpr/time_posix.cc +13 -0
  101. data/src/core/lib/gprpp/arena.cc +103 -0
  102. data/src/core/lib/gprpp/arena.h +121 -0
  103. data/src/core/lib/gprpp/fork.cc +12 -29
  104. data/src/core/lib/gprpp/global_config.h +87 -0
  105. data/src/core/lib/gprpp/global_config_custom.h +29 -0
  106. data/src/core/lib/gprpp/global_config_env.cc +135 -0
  107. data/src/core/lib/gprpp/global_config_env.h +131 -0
  108. data/src/core/lib/gprpp/global_config_generic.h +44 -0
  109. data/src/core/lib/gprpp/map.h +419 -0
  110. data/src/core/lib/gprpp/optional.h +1 -0
  111. data/src/core/lib/gprpp/orphanable.h +2 -2
  112. data/src/core/lib/gprpp/{mutex_lock.h → pair.h} +15 -19
  113. data/src/core/lib/gprpp/ref_counted.h +18 -2
  114. data/src/core/lib/gprpp/sync.h +126 -0
  115. data/src/core/lib/http/parser.cc +1 -1
  116. data/src/core/lib/iomgr/call_combiner.cc +84 -90
  117. data/src/core/lib/iomgr/call_combiner.h +75 -82
  118. data/src/core/lib/iomgr/cfstream_handle.cc +202 -0
  119. data/src/core/lib/iomgr/cfstream_handle.h +82 -0
  120. data/src/core/lib/iomgr/combiner.h +1 -1
  121. data/src/core/lib/iomgr/endpoint_cfstream.cc +375 -0
  122. data/src/core/lib/iomgr/endpoint_cfstream.h +49 -0
  123. data/src/core/lib/iomgr/endpoint_pair_windows.cc +2 -2
  124. data/src/core/lib/iomgr/error.h +23 -0
  125. data/src/core/lib/iomgr/error_cfstream.cc +52 -0
  126. data/src/core/lib/iomgr/error_cfstream.h +31 -0
  127. data/src/core/lib/iomgr/ev_epoll1_linux.cc +34 -27
  128. data/src/core/lib/iomgr/ev_epollex_linux.cc +33 -33
  129. data/src/core/lib/iomgr/ev_poll_posix.cc +7 -7
  130. data/src/core/lib/iomgr/ev_posix.cc +15 -13
  131. data/src/core/lib/iomgr/ev_posix.h +4 -1
  132. data/src/core/lib/iomgr/executor.cc +13 -9
  133. data/src/core/lib/iomgr/fork_posix.cc +0 -1
  134. data/src/core/lib/iomgr/internal_errqueue.cc +1 -1
  135. data/src/core/lib/iomgr/iomgr.cc +6 -5
  136. data/src/core/lib/iomgr/iomgr_custom.cc +3 -0
  137. data/src/core/lib/iomgr/iomgr_custom.h +2 -0
  138. data/src/core/lib/iomgr/iomgr_posix_cfstream.cc +93 -0
  139. data/src/core/lib/iomgr/iomgr_windows.cc +1 -0
  140. data/src/core/lib/iomgr/lockfree_event.cc +3 -3
  141. data/src/core/lib/iomgr/port.h +11 -0
  142. data/src/core/lib/iomgr/resource_quota.cc +40 -37
  143. data/src/core/lib/iomgr/socket_utils_common_posix.cc +6 -2
  144. data/src/core/lib/iomgr/socket_windows.cc +19 -0
  145. data/src/core/lib/iomgr/socket_windows.h +8 -0
  146. data/src/core/lib/iomgr/tcp_client_cfstream.cc +216 -0
  147. data/src/core/lib/iomgr/tcp_client_custom.cc +2 -2
  148. data/src/core/lib/iomgr/tcp_client_posix.cc +3 -3
  149. data/src/core/lib/iomgr/tcp_client_windows.cc +1 -1
  150. data/src/core/lib/iomgr/tcp_custom.cc +9 -9
  151. data/src/core/lib/iomgr/tcp_posix.cc +41 -41
  152. data/src/core/lib/iomgr/tcp_server_custom.cc +3 -3
  153. data/src/core/lib/iomgr/tcp_server_posix.cc +14 -1
  154. data/src/core/lib/iomgr/tcp_server_windows.cc +2 -2
  155. data/src/core/lib/iomgr/tcp_windows.cc +7 -9
  156. data/src/core/lib/iomgr/timer_generic.cc +16 -16
  157. data/src/core/lib/iomgr/timer_manager.cc +12 -11
  158. data/src/core/lib/profiling/basic_timers.cc +10 -4
  159. data/src/core/lib/security/context/security_context.cc +6 -7
  160. data/src/core/lib/security/context/security_context.h +3 -4
  161. data/src/core/lib/security/credentials/jwt/jwt_credentials.cc +1 -1
  162. data/src/core/lib/security/credentials/jwt/jwt_verifier.cc +2 -3
  163. data/src/core/lib/security/credentials/oauth2/oauth2_credentials.cc +1 -1
  164. data/src/core/lib/security/credentials/plugin/plugin_credentials.cc +7 -7
  165. data/src/core/lib/security/security_connector/load_system_roots_linux.cc +7 -5
  166. data/src/core/lib/security/security_connector/security_connector.cc +0 -1
  167. data/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc +3 -2
  168. data/src/core/lib/security/security_connector/ssl_utils.cc +30 -26
  169. data/src/core/lib/security/security_connector/ssl_utils.h +5 -1
  170. data/src/core/lib/security/transport/client_auth_filter.cc +7 -11
  171. data/src/core/lib/security/transport/secure_endpoint.cc +4 -4
  172. data/src/core/lib/security/transport/server_auth_filter.cc +2 -3
  173. data/src/core/lib/slice/slice.cc +99 -116
  174. data/src/core/lib/slice/slice_buffer.cc +5 -0
  175. data/src/core/lib/slice/slice_intern.cc +38 -95
  176. data/src/core/lib/slice/slice_internal.h +200 -2
  177. data/src/core/lib/surface/api_trace.h +1 -1
  178. data/src/core/lib/surface/call.cc +41 -35
  179. data/src/core/lib/surface/call.h +7 -2
  180. data/src/core/lib/surface/call_details.cc +0 -1
  181. data/src/core/lib/surface/completion_queue.cc +36 -27
  182. data/src/core/lib/surface/init.cc +3 -4
  183. data/src/core/lib/surface/lame_client.cc +1 -1
  184. data/src/core/lib/surface/server.cc +18 -25
  185. data/src/core/lib/surface/version.cc +1 -1
  186. data/src/core/lib/transport/bdp_estimator.cc +3 -3
  187. data/src/core/lib/transport/bdp_estimator.h +2 -2
  188. data/src/core/lib/transport/connectivity_state.cc +10 -40
  189. data/src/core/lib/transport/connectivity_state.h +0 -8
  190. data/src/core/lib/transport/error_utils.cc +12 -0
  191. data/src/core/lib/transport/metadata.cc +206 -278
  192. data/src/core/lib/transport/metadata.h +205 -10
  193. data/src/core/lib/transport/static_metadata.cc +108 -116
  194. data/src/core/lib/transport/static_metadata.h +1 -2
  195. data/src/core/lib/transport/status_metadata.cc +3 -3
  196. data/src/core/lib/transport/transport.cc +29 -66
  197. data/src/core/lib/transport/transport.h +36 -8
  198. data/src/core/lib/transport/transport_impl.h +1 -1
  199. data/src/core/tsi/fake_transport_security.cc +4 -4
  200. data/src/core/tsi/ssl/session_cache/ssl_session_cache.cc +1 -1
  201. data/src/core/tsi/ssl_transport_security.cc +1 -1
  202. data/src/ruby/ext/grpc/rb_grpc.c +1 -1
  203. data/src/ruby/lib/grpc/errors.rb +22 -3
  204. data/src/ruby/lib/grpc/generic/bidi_call.rb +1 -1
  205. data/src/ruby/lib/grpc/generic/rpc_server.rb +1 -1
  206. data/src/ruby/lib/grpc/version.rb +1 -1
  207. data/src/ruby/spec/errors_spec.rb +141 -0
  208. metadata +57 -33
  209. data/src/core/lib/gpr/arena.cc +0 -192
@@ -51,7 +51,7 @@ class MySubchannelData
51
51
  : public SubchannelData<MySubchannelList, MySubchannelData> {
52
52
  public:
53
53
  void ProcessConnectivityChangeLocked(
54
- grpc_connectivity_state connectivity_state, grpc_error* error) override {
54
+ grpc_connectivity_state connectivity_state) override {
55
55
  // ...code to handle connectivity changes...
56
56
  }
57
57
  };
@@ -101,10 +101,10 @@ class SubchannelData {
101
101
  // pending (i.e., between calling StartConnectivityWatchLocked() or
102
102
  // RenewConnectivityWatchLocked() and the resulting invocation of
103
103
  // ProcessConnectivityChangeLocked()).
104
- grpc_connectivity_state CheckConnectivityStateLocked(grpc_error** error) {
104
+ grpc_connectivity_state CheckConnectivityStateLocked() {
105
105
  GPR_ASSERT(!connectivity_notification_pending_);
106
106
  pending_connectivity_state_unsafe_ = subchannel()->CheckConnectivity(
107
- error, subchannel_list_->inhibit_health_checking());
107
+ subchannel_list_->inhibit_health_checking());
108
108
  UpdateConnectedSubchannelLocked();
109
109
  return pending_connectivity_state_unsafe_;
110
110
  }
@@ -153,8 +153,7 @@ class SubchannelData {
153
153
  // Implementations must invoke either RenewConnectivityWatchLocked() or
154
154
  // StopConnectivityWatchLocked() before returning.
155
155
  virtual void ProcessConnectivityChangeLocked(
156
- grpc_connectivity_state connectivity_state,
157
- grpc_error* error) GRPC_ABSTRACT;
156
+ grpc_connectivity_state connectivity_state) GRPC_ABSTRACT;
158
157
 
159
158
  // Unrefs the subchannel.
160
159
  void UnrefSubchannelLocked(const char* reason);
@@ -299,7 +298,7 @@ template <typename SubchannelListType, typename SubchannelDataType>
299
298
  void SubchannelData<SubchannelListType, SubchannelDataType>::
300
299
  UnrefSubchannelLocked(const char* reason) {
301
300
  if (subchannel_ != nullptr) {
302
- if (subchannel_list_->tracer()->enabled()) {
301
+ if (GRPC_TRACE_FLAG_ENABLED(*subchannel_list_->tracer())) {
303
302
  gpr_log(GPR_INFO,
304
303
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
305
304
  " (subchannel %p): unreffing subchannel",
@@ -324,7 +323,7 @@ void SubchannelData<SubchannelListType,
324
323
  template <typename SubchannelListType, typename SubchannelDataType>
325
324
  void SubchannelData<SubchannelListType,
326
325
  SubchannelDataType>::StartConnectivityWatchLocked() {
327
- if (subchannel_list_->tracer()->enabled()) {
326
+ if (GRPC_TRACE_FLAG_ENABLED(*subchannel_list_->tracer())) {
328
327
  gpr_log(GPR_INFO,
329
328
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
330
329
  " (subchannel %p): starting watch: requesting connectivity change "
@@ -346,7 +345,7 @@ void SubchannelData<SubchannelListType,
346
345
  template <typename SubchannelListType, typename SubchannelDataType>
347
346
  void SubchannelData<SubchannelListType,
348
347
  SubchannelDataType>::RenewConnectivityWatchLocked() {
349
- if (subchannel_list_->tracer()->enabled()) {
348
+ if (GRPC_TRACE_FLAG_ENABLED(*subchannel_list_->tracer())) {
350
349
  gpr_log(GPR_INFO,
351
350
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
352
351
  " (subchannel %p): renewing watch: requesting connectivity change "
@@ -366,7 +365,7 @@ void SubchannelData<SubchannelListType,
366
365
  template <typename SubchannelListType, typename SubchannelDataType>
367
366
  void SubchannelData<SubchannelListType,
368
367
  SubchannelDataType>::StopConnectivityWatchLocked() {
369
- if (subchannel_list_->tracer()->enabled()) {
368
+ if (GRPC_TRACE_FLAG_ENABLED(*subchannel_list_->tracer())) {
370
369
  gpr_log(GPR_INFO,
371
370
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
372
371
  " (subchannel %p): stopping connectivity watch",
@@ -382,7 +381,7 @@ void SubchannelData<SubchannelListType,
382
381
  template <typename SubchannelListType, typename SubchannelDataType>
383
382
  void SubchannelData<SubchannelListType, SubchannelDataType>::
384
383
  CancelConnectivityWatchLocked(const char* reason) {
385
- if (subchannel_list_->tracer()->enabled()) {
384
+ if (GRPC_TRACE_FLAG_ENABLED(*subchannel_list_->tracer())) {
386
385
  gpr_log(GPR_INFO,
387
386
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
388
387
  " (subchannel %p): canceling connectivity watch (%s)",
@@ -414,7 +413,7 @@ bool SubchannelData<SubchannelListType,
414
413
  // is READY again (e.g., if the subchannel has transitioned back to
415
414
  // READY before the next watch gets requested).
416
415
  if (connected_subchannel_ == nullptr) {
417
- if (subchannel_list_->tracer()->enabled()) {
416
+ if (GRPC_TRACE_FLAG_ENABLED(*subchannel_list_->tracer())) {
418
417
  gpr_log(GPR_INFO,
419
418
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
420
419
  " (subchannel %p): state is READY but connected subchannel is "
@@ -437,7 +436,7 @@ template <typename SubchannelListType, typename SubchannelDataType>
437
436
  void SubchannelData<SubchannelListType, SubchannelDataType>::
438
437
  OnConnectivityChangedLocked(void* arg, grpc_error* error) {
439
438
  SubchannelData* sd = static_cast<SubchannelData*>(arg);
440
- if (sd->subchannel_list_->tracer()->enabled()) {
439
+ if (GRPC_TRACE_FLAG_ENABLED(*sd->subchannel_list_->tracer())) {
441
440
  gpr_log(
442
441
  GPR_INFO,
443
442
  "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
@@ -462,8 +461,7 @@ void SubchannelData<SubchannelListType, SubchannelDataType>::
462
461
  return;
463
462
  }
464
463
  // Call the subclass's ProcessConnectivityChangeLocked() method.
465
- sd->ProcessConnectivityChangeLocked(sd->pending_connectivity_state_unsafe_,
466
- GRPC_ERROR_REF(error));
464
+ sd->ProcessConnectivityChangeLocked(sd->pending_connectivity_state_unsafe_);
467
465
  }
468
466
 
469
467
  template <typename SubchannelListType, typename SubchannelDataType>
@@ -492,7 +490,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
492
490
  policy_(policy),
493
491
  tracer_(tracer),
494
492
  combiner_(GRPC_COMBINER_REF(combiner, "subchannel_list")) {
495
- if (tracer_->enabled()) {
493
+ if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
496
494
  gpr_log(GPR_INFO,
497
495
  "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels",
498
496
  tracer_->name(), policy, this, addresses.size());
@@ -508,7 +506,13 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
508
506
  GRPC_ARG_INHIBIT_HEALTH_CHECKING};
509
507
  // Create a subchannel for each address.
510
508
  for (size_t i = 0; i < addresses.size(); i++) {
511
- GPR_ASSERT(!addresses[i].IsBalancer());
509
+ // TODO(roth): we should ideally hide this from the LB policy code. In
510
+ // principle, if we're dealing with this special case in the client_channel
511
+ // code for selecting grpclb, then we should also strip out these addresses
512
+ // there if we're not using grpclb.
513
+ if (addresses[i].IsBalancer()) {
514
+ continue;
515
+ }
512
516
  InlinedVector<grpc_arg, 3> args_to_add;
513
517
  const size_t subchannel_address_arg_index = args_to_add.size();
514
518
  args_to_add.emplace_back(
@@ -526,7 +530,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
526
530
  grpc_channel_args_destroy(new_args);
527
531
  if (subchannel == nullptr) {
528
532
  // Subchannel could not be created.
529
- if (tracer_->enabled()) {
533
+ if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
530
534
  char* address_uri = grpc_sockaddr_to_uri(&addresses[i].address());
531
535
  gpr_log(GPR_INFO,
532
536
  "[%s %p] could not create subchannel for address uri %s, "
@@ -536,7 +540,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
536
540
  }
537
541
  continue;
538
542
  }
539
- if (tracer_->enabled()) {
543
+ if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
540
544
  char* address_uri = grpc_sockaddr_to_uri(&addresses[i].address());
541
545
  gpr_log(GPR_INFO,
542
546
  "[%s %p] subchannel list %p index %" PRIuPTR
@@ -551,7 +555,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
551
555
 
552
556
  template <typename SubchannelListType, typename SubchannelDataType>
553
557
  SubchannelList<SubchannelListType, SubchannelDataType>::~SubchannelList() {
554
- if (tracer_->enabled()) {
558
+ if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
555
559
  gpr_log(GPR_INFO, "[%s %p] Destroying subchannel_list %p", tracer_->name(),
556
560
  policy_, this);
557
561
  }
@@ -560,7 +564,7 @@ SubchannelList<SubchannelListType, SubchannelDataType>::~SubchannelList() {
560
564
 
561
565
  template <typename SubchannelListType, typename SubchannelDataType>
562
566
  void SubchannelList<SubchannelListType, SubchannelDataType>::ShutdownLocked() {
563
- if (tracer_->enabled()) {
567
+ if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
564
568
  gpr_log(GPR_INFO, "[%s %p] Shutting down subchannel_list %p",
565
569
  tracer_->name(), policy_, this);
566
570
  }
@@ -68,7 +68,9 @@
68
68
  #include <grpc/support/string_util.h>
69
69
  #include <grpc/support/time.h>
70
70
 
71
+ #include "include/grpc/support/alloc.h"
71
72
  #include "src/core/ext/filters/client_channel/client_channel.h"
73
+ #include "src/core/ext/filters/client_channel/lb_policy.h"
72
74
  #include "src/core/ext/filters/client_channel/lb_policy/xds/xds.h"
73
75
  #include "src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h"
74
76
  #include "src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h"
@@ -85,10 +87,11 @@
85
87
  #include "src/core/lib/gpr/host_port.h"
86
88
  #include "src/core/lib/gpr/string.h"
87
89
  #include "src/core/lib/gprpp/manual_constructor.h"
90
+ #include "src/core/lib/gprpp/map.h"
88
91
  #include "src/core/lib/gprpp/memory.h"
89
- #include "src/core/lib/gprpp/mutex_lock.h"
90
92
  #include "src/core/lib/gprpp/orphanable.h"
91
93
  #include "src/core/lib/gprpp/ref_counted_ptr.h"
94
+ #include "src/core/lib/gprpp/sync.h"
92
95
  #include "src/core/lib/iomgr/combiner.h"
93
96
  #include "src/core/lib/iomgr/sockaddr.h"
94
97
  #include "src/core/lib/iomgr/sockaddr_utils.h"
@@ -114,6 +117,35 @@ TraceFlag grpc_lb_xds_trace(false, "xds");
114
117
  namespace {
115
118
 
116
119
  constexpr char kXds[] = "xds_experimental";
120
+ constexpr char kDefaultLocalityName[] = "xds_default_locality";
121
+ constexpr uint32_t kDefaultLocalityWeight = 3;
122
+
123
+ class ParsedXdsConfig : public ParsedLoadBalancingConfig {
124
+ public:
125
+ ParsedXdsConfig(const char* balancer_name,
126
+ RefCountedPtr<ParsedLoadBalancingConfig> child_policy,
127
+ RefCountedPtr<ParsedLoadBalancingConfig> fallback_policy)
128
+ : balancer_name_(balancer_name),
129
+ child_policy_(std::move(child_policy)),
130
+ fallback_policy_(std::move(fallback_policy)) {}
131
+
132
+ const char* name() const override { return kXds; }
133
+
134
+ const char* balancer_name() const { return balancer_name_; };
135
+
136
+ RefCountedPtr<ParsedLoadBalancingConfig> child_policy() const {
137
+ return child_policy_;
138
+ }
139
+
140
+ RefCountedPtr<ParsedLoadBalancingConfig> fallback_policy() const {
141
+ return fallback_policy_;
142
+ }
143
+
144
+ private:
145
+ const char* balancer_name_ = nullptr;
146
+ RefCountedPtr<ParsedLoadBalancingConfig> child_policy_;
147
+ RefCountedPtr<ParsedLoadBalancingConfig> fallback_policy_;
148
+ };
117
149
 
118
150
  class XdsLb : public LoadBalancingPolicy {
119
151
  public:
@@ -128,6 +160,9 @@ class XdsLb : public LoadBalancingPolicy {
128
160
  channelz::ChildRefsList* child_channels) override;
129
161
 
130
162
  private:
163
+ struct LocalityServerlistEntry;
164
+ using LocalityList = InlinedVector<UniquePtr<LocalityServerlistEntry>, 1>;
165
+
131
166
  /// Contains a channel to the LB server and all the data related to the
132
167
  /// channel.
133
168
  class BalancerChannelState
@@ -232,6 +267,10 @@ class XdsLb : public LoadBalancingPolicy {
232
267
  static void OnCallRetryTimerLocked(void* arg, grpc_error* error);
233
268
  void StartCallLocked();
234
269
 
270
+ void StartConnectivityWatchLocked();
271
+ void CancelConnectivityWatchLocked();
272
+ static void OnConnectivityChangedLocked(void* arg, grpc_error* error);
273
+
235
274
  private:
236
275
  // The owning LB policy.
237
276
  RefCountedPtr<XdsLb> xdslb_policy_;
@@ -239,6 +278,8 @@ class XdsLb : public LoadBalancingPolicy {
239
278
  // The channel and its status.
240
279
  grpc_channel* channel_;
241
280
  bool shutting_down_ = false;
281
+ grpc_connectivity_state connectivity_ = GRPC_CHANNEL_IDLE;
282
+ grpc_closure on_connectivity_changed_;
242
283
 
243
284
  // The data associated with the current LB call. It holds a ref to this LB
244
285
  // channel. It's instantiated every time we query for backends. It's reset
@@ -252,41 +293,151 @@ class XdsLb : public LoadBalancingPolicy {
252
293
  bool retry_timer_callback_pending_ = false;
253
294
  };
254
295
 
296
+ // Since pickers are UniquePtrs we use this RefCounted wrapper
297
+ // to control references to it by the xds picker and the locality
298
+ // entry
299
+ class PickerRef : public RefCounted<PickerRef> {
300
+ public:
301
+ explicit PickerRef(UniquePtr<SubchannelPicker> picker)
302
+ : picker_(std::move(picker)) {}
303
+ PickResult Pick(PickArgs* pick, grpc_error** error) {
304
+ return picker_->Pick(pick, error);
305
+ }
306
+
307
+ private:
308
+ UniquePtr<SubchannelPicker> picker_;
309
+ };
310
+
311
+ // The picker will use a stateless weighting algorithm to pick the locality to
312
+ // use for each request.
255
313
  class Picker : public SubchannelPicker {
256
314
  public:
257
- Picker(UniquePtr<SubchannelPicker> child_picker,
258
- RefCountedPtr<XdsLbClientStats> client_stats)
259
- : child_picker_(std::move(child_picker)),
260
- client_stats_(std::move(client_stats)) {}
315
+ // Maintains a weighted list of pickers from each locality that is in ready
316
+ // state. The first element in the pair represents the end of a range
317
+ // proportional to the locality's weight. The start of the range is the
318
+ // previous value in the vector and is 0 for the first element.
319
+ using PickerList =
320
+ InlinedVector<Pair<uint32_t, RefCountedPtr<PickerRef>>, 1>;
321
+ Picker(RefCountedPtr<XdsLbClientStats> client_stats, PickerList pickers)
322
+ : client_stats_(std::move(client_stats)),
323
+ pickers_(std::move(pickers)) {}
261
324
 
262
325
  PickResult Pick(PickArgs* pick, grpc_error** error) override;
263
326
 
264
327
  private:
265
- UniquePtr<SubchannelPicker> child_picker_;
328
+ // Calls the picker of the locality that the key falls within
329
+ PickResult PickFromLocality(const uint32_t key, PickArgs* pick,
330
+ grpc_error** error);
266
331
  RefCountedPtr<XdsLbClientStats> client_stats_;
332
+ PickerList pickers_;
267
333
  };
268
334
 
269
- class Helper : public ChannelControlHelper {
335
+ class FallbackHelper : public ChannelControlHelper {
270
336
  public:
271
- explicit Helper(RefCountedPtr<XdsLb> parent) : parent_(std::move(parent)) {}
337
+ explicit FallbackHelper(RefCountedPtr<XdsLb> parent)
338
+ : parent_(std::move(parent)) {}
272
339
 
273
340
  Subchannel* CreateSubchannel(const grpc_channel_args& args) override;
274
341
  grpc_channel* CreateChannel(const char* target,
275
342
  const grpc_channel_args& args) override;
276
- void UpdateState(grpc_connectivity_state state, grpc_error* state_error,
343
+ void UpdateState(grpc_connectivity_state state,
277
344
  UniquePtr<SubchannelPicker> picker) override;
278
345
  void RequestReresolution() override;
279
346
 
280
347
  void set_child(LoadBalancingPolicy* child) { child_ = child; }
281
348
 
282
349
  private:
283
- bool CalledByPendingChild() const;
284
- bool CalledByCurrentChild() const;
350
+ bool CalledByPendingFallback() const;
351
+ bool CalledByCurrentFallback() const;
285
352
 
286
353
  RefCountedPtr<XdsLb> parent_;
287
354
  LoadBalancingPolicy* child_ = nullptr;
288
355
  };
289
356
 
357
+ class LocalityMap {
358
+ public:
359
+ class LocalityEntry : public InternallyRefCounted<LocalityEntry> {
360
+ public:
361
+ LocalityEntry(RefCountedPtr<XdsLb> parent, uint32_t locality_weight)
362
+ : parent_(std::move(parent)), locality_weight_(locality_weight) {}
363
+ ~LocalityEntry() = default;
364
+
365
+ void UpdateLocked(xds_grpclb_serverlist* serverlist,
366
+ ParsedLoadBalancingConfig* child_policy_config,
367
+ const grpc_channel_args* args);
368
+ void ShutdownLocked();
369
+ void ResetBackoffLocked();
370
+ void FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
371
+ channelz::ChildRefsList* child_channels);
372
+ void Orphan() override;
373
+
374
+ private:
375
+ class Helper : public ChannelControlHelper {
376
+ public:
377
+ explicit Helper(RefCountedPtr<LocalityEntry> entry)
378
+ : entry_(std::move(entry)) {}
379
+
380
+ Subchannel* CreateSubchannel(const grpc_channel_args& args) override;
381
+ grpc_channel* CreateChannel(const char* target,
382
+ const grpc_channel_args& args) override;
383
+ void UpdateState(grpc_connectivity_state state,
384
+ UniquePtr<SubchannelPicker> picker) override;
385
+ void RequestReresolution() override;
386
+ void set_child(LoadBalancingPolicy* child) { child_ = child; }
387
+
388
+ private:
389
+ bool CalledByPendingChild() const;
390
+ bool CalledByCurrentChild() const;
391
+
392
+ RefCountedPtr<LocalityEntry> entry_;
393
+ LoadBalancingPolicy* child_ = nullptr;
394
+ };
395
+ // Methods for dealing with the child policy.
396
+ OrphanablePtr<LoadBalancingPolicy> CreateChildPolicyLocked(
397
+ const char* name, const grpc_channel_args* args);
398
+ grpc_channel_args* CreateChildPolicyArgsLocked(
399
+ const grpc_channel_args* args);
400
+
401
+ OrphanablePtr<LoadBalancingPolicy> child_policy_;
402
+ OrphanablePtr<LoadBalancingPolicy> pending_child_policy_;
403
+ // Lock held when modifying the value of child_policy_ or
404
+ // pending_child_policy_.
405
+ Mutex child_policy_mu_;
406
+ RefCountedPtr<XdsLb> parent_;
407
+ RefCountedPtr<PickerRef> picker_ref_;
408
+ grpc_connectivity_state connectivity_state_;
409
+ uint32_t locality_weight_;
410
+ };
411
+
412
+ void UpdateLocked(const LocalityList& locality_list,
413
+ ParsedLoadBalancingConfig* child_policy_config,
414
+ const grpc_channel_args* args, XdsLb* parent);
415
+ void ShutdownLocked();
416
+ void ResetBackoffLocked();
417
+ void FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
418
+ channelz::ChildRefsList* child_channels);
419
+
420
+ private:
421
+ void PruneLocalities(const LocalityList& locality_list);
422
+ Map<UniquePtr<char>, OrphanablePtr<LocalityEntry>, StringLess> map_;
423
+ // Lock held while filling child refs for all localities
424
+ // inside the map
425
+ Mutex child_refs_mu_;
426
+ };
427
+
428
+ struct LocalityServerlistEntry {
429
+ ~LocalityServerlistEntry() {
430
+ gpr_free(locality_name);
431
+ xds_grpclb_destroy_serverlist(serverlist);
432
+ }
433
+
434
+ char* locality_name;
435
+ uint32_t locality_weight;
436
+ // The deserialized response from the balancer. May be nullptr until one
437
+ // such response has arrived.
438
+ xds_grpclb_serverlist* serverlist;
439
+ };
440
+
290
441
  ~XdsLb();
291
442
 
292
443
  void ShutdownLocked() override;
@@ -299,21 +450,20 @@ class XdsLb : public LoadBalancingPolicy {
299
450
  // If parsing succeeds, updates \a balancer_name, and updates \a
300
451
  // child_policy_config_ and \a fallback_policy_config_ if they are also
301
452
  // found. Does nothing upon failure.
302
- void ParseLbConfig(Config* xds_config);
453
+ void ParseLbConfig(const ParsedXdsConfig* xds_config);
303
454
 
304
455
  BalancerChannelState* LatestLbChannel() const {
305
456
  return pending_lb_chand_ != nullptr ? pending_lb_chand_.get()
306
457
  : lb_chand_.get();
307
458
  }
308
459
 
309
- // Callback to enter fallback mode.
460
+ // Methods for dealing with fallback state.
461
+ void MaybeCancelFallbackAtStartupChecks();
310
462
  static void OnFallbackTimerLocked(void* arg, grpc_error* error);
311
-
312
- // Methods for dealing with the child policy.
313
- void CreateOrUpdateChildPolicyLocked();
314
- grpc_channel_args* CreateChildPolicyArgsLocked();
315
- OrphanablePtr<LoadBalancingPolicy> CreateChildPolicyLocked(
463
+ void UpdateFallbackPolicyLocked();
464
+ OrphanablePtr<LoadBalancingPolicy> CreateFallbackPolicyLocked(
316
465
  const char* name, const grpc_channel_args* args);
466
+ void MaybeExitFallbackMode();
317
467
 
318
468
  // Who the client is trying to communicate with.
319
469
  const char* server_name_ = nullptr;
@@ -333,33 +483,48 @@ class XdsLb : public LoadBalancingPolicy {
333
483
  // Mutex to protect the channel to the LB server. This is used when
334
484
  // processing a channelz request.
335
485
  // TODO(juanlishen): Replace this with atomic.
336
- gpr_mu lb_chand_mu_;
486
+ Mutex lb_chand_mu_;
337
487
 
338
488
  // Timeout in milliseconds for the LB call. 0 means no deadline.
339
489
  int lb_call_timeout_ms_ = 0;
340
490
 
341
- // The deserialized response from the balancer. May be nullptr until one
342
- // such response has arrived.
343
- xds_grpclb_serverlist* serverlist_ = nullptr;
344
-
491
+ // Whether the checks for fallback at startup are ALL pending. There are
492
+ // several cases where this can be reset:
493
+ // 1. The fallback timer fires, we enter fallback mode.
494
+ // 2. Before the fallback timer fires, the LB channel becomes
495
+ // TRANSIENT_FAILURE or the LB call fails, we enter fallback mode.
496
+ // 3. Before the fallback timer fires, we receive a response from the
497
+ // balancer, we cancel the fallback timer and use the response to update the
498
+ // locality map.
499
+ bool fallback_at_startup_checks_pending_ = false;
345
500
  // Timeout in milliseconds for before using fallback backend addresses.
346
501
  // 0 means not using fallback.
347
- RefCountedPtr<Config> fallback_policy_config_;
348
502
  int lb_fallback_timeout_ms_ = 0;
349
503
  // The backend addresses from the resolver.
350
- UniquePtr<ServerAddressList> fallback_backend_addresses_;
504
+ ServerAddressList fallback_backend_addresses_;
351
505
  // Fallback timer.
352
- bool fallback_timer_callback_pending_ = false;
353
506
  grpc_timer lb_fallback_timer_;
354
507
  grpc_closure lb_on_fallback_;
355
508
 
509
+ // The policy to use for the fallback backends.
510
+ RefCountedPtr<ParsedLoadBalancingConfig> fallback_policy_config_;
511
+ // Lock held when modifying the value of fallback_policy_ or
512
+ // pending_fallback_policy_.
513
+ Mutex fallback_policy_mu_;
514
+ // Non-null iff we are in fallback mode.
515
+ OrphanablePtr<LoadBalancingPolicy> fallback_policy_;
516
+ OrphanablePtr<LoadBalancingPolicy> pending_fallback_policy_;
517
+
356
518
  // The policy to use for the backends.
357
- RefCountedPtr<Config> child_policy_config_;
358
- OrphanablePtr<LoadBalancingPolicy> child_policy_;
359
- OrphanablePtr<LoadBalancingPolicy> pending_child_policy_;
360
- // Lock held when modifying the value of child_policy_ or
361
- // pending_child_policy_.
362
- gpr_mu child_policy_mu_;
519
+ RefCountedPtr<ParsedLoadBalancingConfig> child_policy_config_;
520
+ // Map of policies to use in the backend
521
+ LocalityMap locality_map_;
522
+ // TODO(mhaidry) : Add support for multiple maps of localities
523
+ // with different priorities
524
+ LocalityList locality_serverlist_;
525
+ // TODO(mhaidry) : Add a pending locality map that may be swapped with the
526
+ // the current one when new localities in the pending map are ready
527
+ // to accept connections
363
528
  };
364
529
 
365
530
  //
@@ -368,8 +533,12 @@ class XdsLb : public LoadBalancingPolicy {
368
533
 
369
534
  XdsLb::PickResult XdsLb::Picker::Pick(PickArgs* pick, grpc_error** error) {
370
535
  // TODO(roth): Add support for drop handling.
371
- // Forward pick to child policy.
372
- PickResult result = child_picker_->Pick(pick, error);
536
+ // Generate a random number between 0 and the total weight
537
+ const uint32_t key =
538
+ (rand() * pickers_[pickers_.size() - 1].first) / RAND_MAX;
539
+ // Forward pick to whichever locality maps to the range in which the
540
+ // random number falls in.
541
+ PickResult result = PickFromLocality(key, pick, error);
373
542
  // If pick succeeded, add client stats.
374
543
  if (result == PickResult::PICK_COMPLETE &&
375
544
  pick->connected_subchannel != nullptr && client_stats_ != nullptr) {
@@ -378,103 +547,101 @@ XdsLb::PickResult XdsLb::Picker::Pick(PickArgs* pick, grpc_error** error) {
378
547
  return result;
379
548
  }
380
549
 
550
+ XdsLb::PickResult XdsLb::Picker::PickFromLocality(const uint32_t key,
551
+ PickArgs* pick,
552
+ grpc_error** error) {
553
+ size_t mid = 0;
554
+ size_t start_index = 0;
555
+ size_t end_index = pickers_.size() - 1;
556
+ size_t index = 0;
557
+ while (end_index > start_index) {
558
+ mid = (start_index + end_index) / 2;
559
+ if (pickers_[mid].first > key) {
560
+ end_index = mid;
561
+ } else if (pickers_[mid].first < key) {
562
+ start_index = mid + 1;
563
+ } else {
564
+ index = mid + 1;
565
+ break;
566
+ }
567
+ }
568
+ if (index == 0) index = start_index;
569
+ GPR_ASSERT(pickers_[index].first > key);
570
+ return pickers_[index].second->Pick(pick, error);
571
+ }
572
+
381
573
  //
382
- // XdsLb::Helper
574
+ // XdsLb::FallbackHelper
383
575
  //
384
576
 
385
- bool XdsLb::Helper::CalledByPendingChild() const {
577
+ bool XdsLb::FallbackHelper::CalledByPendingFallback() const {
386
578
  GPR_ASSERT(child_ != nullptr);
387
- return child_ == parent_->pending_child_policy_.get();
579
+ return child_ == parent_->pending_fallback_policy_.get();
388
580
  }
389
581
 
390
- bool XdsLb::Helper::CalledByCurrentChild() const {
582
+ bool XdsLb::FallbackHelper::CalledByCurrentFallback() const {
391
583
  GPR_ASSERT(child_ != nullptr);
392
- return child_ == parent_->child_policy_.get();
584
+ return child_ == parent_->fallback_policy_.get();
393
585
  }
394
586
 
395
- Subchannel* XdsLb::Helper::CreateSubchannel(const grpc_channel_args& args) {
587
+ Subchannel* XdsLb::FallbackHelper::CreateSubchannel(
588
+ const grpc_channel_args& args) {
396
589
  if (parent_->shutting_down_ ||
397
- (!CalledByPendingChild() && !CalledByCurrentChild())) {
590
+ (!CalledByPendingFallback() && !CalledByCurrentFallback())) {
398
591
  return nullptr;
399
592
  }
400
593
  return parent_->channel_control_helper()->CreateSubchannel(args);
401
594
  }
402
595
 
403
- grpc_channel* XdsLb::Helper::CreateChannel(const char* target,
404
- const grpc_channel_args& args) {
596
+ grpc_channel* XdsLb::FallbackHelper::CreateChannel(
597
+ const char* target, const grpc_channel_args& args) {
405
598
  if (parent_->shutting_down_ ||
406
- (!CalledByPendingChild() && !CalledByCurrentChild())) {
599
+ (!CalledByPendingFallback() && !CalledByCurrentFallback())) {
407
600
  return nullptr;
408
601
  }
409
602
  return parent_->channel_control_helper()->CreateChannel(target, args);
410
603
  }
411
604
 
412
- void XdsLb::Helper::UpdateState(grpc_connectivity_state state,
413
- grpc_error* state_error,
414
- UniquePtr<SubchannelPicker> picker) {
415
- if (parent_->shutting_down_) {
416
- GRPC_ERROR_UNREF(state_error);
417
- return;
418
- }
419
- // If this request is from the pending child policy, ignore it until
605
+ void XdsLb::FallbackHelper::UpdateState(grpc_connectivity_state state,
606
+ UniquePtr<SubchannelPicker> picker) {
607
+ if (parent_->shutting_down_) return;
608
+ // If this request is from the pending fallback policy, ignore it until
420
609
  // it reports READY, at which point we swap it into place.
421
- if (CalledByPendingChild()) {
422
- if (grpc_lb_xds_trace.enabled()) {
423
- gpr_log(GPR_INFO,
424
- "[xdslb %p helper %p] pending child policy %p reports state=%s",
425
- parent_.get(), this, parent_->pending_child_policy_.get(),
426
- grpc_connectivity_state_name(state));
427
- }
428
- if (state != GRPC_CHANNEL_READY) {
429
- GRPC_ERROR_UNREF(state_error);
430
- return;
610
+ if (CalledByPendingFallback()) {
611
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
612
+ gpr_log(
613
+ GPR_INFO,
614
+ "[xdslb %p helper %p] pending fallback policy %p reports state=%s",
615
+ parent_.get(), this, parent_->pending_fallback_policy_.get(),
616
+ grpc_connectivity_state_name(state));
431
617
  }
618
+ if (state != GRPC_CHANNEL_READY) return;
432
619
  grpc_pollset_set_del_pollset_set(
433
- parent_->child_policy_->interested_parties(),
620
+ parent_->fallback_policy_->interested_parties(),
434
621
  parent_->interested_parties());
435
- MutexLock lock(&parent_->child_policy_mu_);
436
- parent_->child_policy_ = std::move(parent_->pending_child_policy_);
437
- } else if (!CalledByCurrentChild()) {
438
- // This request is from an outdated child, so ignore it.
439
- GRPC_ERROR_UNREF(state_error);
622
+ MutexLock lock(&parent_->fallback_policy_mu_);
623
+ parent_->fallback_policy_ = std::move(parent_->pending_fallback_policy_);
624
+ } else if (!CalledByCurrentFallback()) {
625
+ // This request is from an outdated fallback policy, so ignore it.
440
626
  return;
441
627
  }
442
- // TODO(juanlishen): When in fallback mode, pass the child picker
443
- // through without wrapping it. (Or maybe use a different helper for
444
- // the fallback policy?)
445
- GPR_ASSERT(parent_->lb_chand_ != nullptr);
446
- RefCountedPtr<XdsLbClientStats> client_stats =
447
- parent_->lb_chand_->lb_calld() == nullptr
448
- ? nullptr
449
- : parent_->lb_chand_->lb_calld()->client_stats();
450
- parent_->channel_control_helper()->UpdateState(
451
- state, state_error,
452
- UniquePtr<SubchannelPicker>(
453
- New<Picker>(std::move(picker), std::move(client_stats))));
628
+ parent_->channel_control_helper()->UpdateState(state, std::move(picker));
454
629
  }
455
630
 
456
- void XdsLb::Helper::RequestReresolution() {
631
+ void XdsLb::FallbackHelper::RequestReresolution() {
457
632
  if (parent_->shutting_down_) return;
458
- // If there is a pending child policy, ignore re-resolution requests
459
- // from the current child policy (or any outdated child).
460
- if (parent_->pending_child_policy_ != nullptr && !CalledByPendingChild()) {
461
- return;
462
- }
463
- if (grpc_lb_xds_trace.enabled()) {
633
+ const LoadBalancingPolicy* latest_fallback_policy =
634
+ parent_->pending_fallback_policy_ != nullptr
635
+ ? parent_->pending_fallback_policy_.get()
636
+ : parent_->fallback_policy_.get();
637
+ if (child_ != latest_fallback_policy) return;
638
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
464
639
  gpr_log(GPR_INFO,
465
- "[xdslb %p] Re-resolution requested from the internal RR policy "
466
- "(%p).",
467
- parent_.get(), parent_->child_policy_.get());
640
+ "[xdslb %p] Re-resolution requested from the fallback policy (%p).",
641
+ parent_.get(), child_);
468
642
  }
469
643
  GPR_ASSERT(parent_->lb_chand_ != nullptr);
470
- // If we are talking to a balancer, we expect to get updated addresses
471
- // from the balancer, so we can ignore the re-resolution request from
472
- // the child policy. Otherwise, pass the re-resolution request up to the
473
- // channel.
474
- if (parent_->lb_chand_->lb_calld() == nullptr ||
475
- !parent_->lb_chand_->lb_calld()->seen_initial_response()) {
476
- parent_->channel_control_helper()->RequestReresolution();
477
- }
644
+ parent_->channel_control_helper()->RequestReresolution();
478
645
  }
479
646
 
480
647
  //
@@ -482,12 +649,11 @@ void XdsLb::Helper::RequestReresolution() {
482
649
  //
483
650
 
484
651
  // Returns the backend addresses extracted from the given addresses.
485
- UniquePtr<ServerAddressList> ExtractBackendAddresses(
486
- const ServerAddressList& addresses) {
487
- auto backend_addresses = MakeUnique<ServerAddressList>();
652
+ ServerAddressList ExtractBackendAddresses(const ServerAddressList& addresses) {
653
+ ServerAddressList backend_addresses;
488
654
  for (size_t i = 0; i < addresses.size(); ++i) {
489
655
  if (!addresses[i].IsBalancer()) {
490
- backend_addresses->emplace_back(addresses[i]);
656
+ backend_addresses.emplace_back(addresses[i]);
491
657
  }
492
658
  }
493
659
  return backend_addresses;
@@ -567,6 +733,9 @@ XdsLb::BalancerChannelState::BalancerChannelState(
567
733
  .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER)
568
734
  .set_jitter(GRPC_XDS_RECONNECT_JITTER)
569
735
  .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) {
736
+ GRPC_CLOSURE_INIT(&on_connectivity_changed_,
737
+ &XdsLb::BalancerChannelState::OnConnectivityChangedLocked,
738
+ this, grpc_combiner_scheduler(xdslb_policy_->combiner()));
570
739
  channel_ = xdslb_policy_->channel_control_helper()->CreateChannel(
571
740
  balancer_name, args);
572
741
  GPR_ASSERT(channel_ != nullptr);
@@ -586,7 +755,7 @@ void XdsLb::BalancerChannelState::Orphan() {
586
755
 
587
756
  void XdsLb::BalancerChannelState::StartCallRetryTimerLocked() {
588
757
  grpc_millis next_try = lb_call_backoff_.NextAttemptTime();
589
- if (grpc_lb_xds_trace.enabled()) {
758
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
590
759
  gpr_log(GPR_INFO,
591
760
  "[xdslb %p] Failed to connect to LB server (lb_chand: %p)...",
592
761
  xdslb_policy_.get(), this);
@@ -612,7 +781,7 @@ void XdsLb::BalancerChannelState::OnCallRetryTimerLocked(void* arg,
612
781
  lb_chand->retry_timer_callback_pending_ = false;
613
782
  if (!lb_chand->shutting_down_ && error == GRPC_ERROR_NONE &&
614
783
  lb_chand->lb_calld_ == nullptr) {
615
- if (grpc_lb_xds_trace.enabled()) {
784
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
616
785
  gpr_log(GPR_INFO,
617
786
  "[xdslb %p] Restarting call to LB server (lb_chand: %p)",
618
787
  lb_chand->xdslb_policy_.get(), lb_chand);
@@ -627,7 +796,7 @@ void XdsLb::BalancerChannelState::StartCallLocked() {
627
796
  GPR_ASSERT(channel_ != nullptr);
628
797
  GPR_ASSERT(lb_calld_ == nullptr);
629
798
  lb_calld_ = MakeOrphanable<BalancerCallState>(Ref());
630
- if (grpc_lb_xds_trace.enabled()) {
799
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
631
800
  gpr_log(GPR_INFO,
632
801
  "[xdslb %p] Query for backends (lb_chand: %p, lb_calld: %p)",
633
802
  xdslb_policy_.get(), this, lb_calld_.get());
@@ -635,6 +804,62 @@ void XdsLb::BalancerChannelState::StartCallLocked() {
635
804
  lb_calld_->StartQuery();
636
805
  }
637
806
 
807
+ void XdsLb::BalancerChannelState::StartConnectivityWatchLocked() {
808
+ grpc_channel_element* client_channel_elem =
809
+ grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel_));
810
+ GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
811
+ // Ref held by callback.
812
+ Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity").release();
813
+ grpc_client_channel_watch_connectivity_state(
814
+ client_channel_elem,
815
+ grpc_polling_entity_create_from_pollset_set(
816
+ xdslb_policy_->interested_parties()),
817
+ &connectivity_, &on_connectivity_changed_, nullptr);
818
+ }
819
+
820
+ void XdsLb::BalancerChannelState::CancelConnectivityWatchLocked() {
821
+ grpc_channel_element* client_channel_elem =
822
+ grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel_));
823
+ GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
824
+ grpc_client_channel_watch_connectivity_state(
825
+ client_channel_elem,
826
+ grpc_polling_entity_create_from_pollset_set(
827
+ xdslb_policy_->interested_parties()),
828
+ nullptr, &on_connectivity_changed_, nullptr);
829
+ }
830
+
831
+ void XdsLb::BalancerChannelState::OnConnectivityChangedLocked(
832
+ void* arg, grpc_error* error) {
833
+ BalancerChannelState* self = static_cast<BalancerChannelState*>(arg);
834
+ if (!self->shutting_down_ &&
835
+ self->xdslb_policy_->fallback_at_startup_checks_pending_) {
836
+ if (self->connectivity_ != GRPC_CHANNEL_TRANSIENT_FAILURE) {
837
+ // Not in TRANSIENT_FAILURE. Renew connectivity watch.
838
+ grpc_channel_element* client_channel_elem =
839
+ grpc_channel_stack_last_element(
840
+ grpc_channel_get_channel_stack(self->channel_));
841
+ GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
842
+ grpc_client_channel_watch_connectivity_state(
843
+ client_channel_elem,
844
+ grpc_polling_entity_create_from_pollset_set(
845
+ self->xdslb_policy_->interested_parties()),
846
+ &self->connectivity_, &self->on_connectivity_changed_, nullptr);
847
+ return; // Early out so we don't drop the ref below.
848
+ }
849
+ // In TRANSIENT_FAILURE. Cancel the fallback timer and go into
850
+ // fallback mode immediately.
851
+ gpr_log(GPR_INFO,
852
+ "[xdslb %p] Balancer channel in state TRANSIENT_FAILURE; "
853
+ "entering fallback mode",
854
+ self);
855
+ self->xdslb_policy_->fallback_at_startup_checks_pending_ = false;
856
+ grpc_timer_cancel(&self->xdslb_policy_->lb_fallback_timer_);
857
+ self->xdslb_policy_->UpdateFallbackPolicyLocked();
858
+ }
859
+ // Done watching connectivity state, so drop ref.
860
+ self->Unref(DEBUG_LOCATION, "watch_lb_channel_connectivity");
861
+ }
862
+
638
863
  //
639
864
  // XdsLb::BalancerChannelState::BalancerCallState
640
865
  //
@@ -707,7 +932,7 @@ void XdsLb::BalancerChannelState::BalancerCallState::Orphan() {
707
932
 
708
933
  void XdsLb::BalancerChannelState::BalancerCallState::StartQuery() {
709
934
  GPR_ASSERT(lb_call_ != nullptr);
710
- if (grpc_lb_xds_trace.enabled()) {
935
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
711
936
  gpr_log(GPR_INFO, "[xdslb %p] Starting LB call (lb_calld: %p, lb_call: %p)",
712
937
  xdslb_policy(), this, lb_call_);
713
938
  }
@@ -880,6 +1105,14 @@ void XdsLb::BalancerChannelState::BalancerCallState::
880
1105
  (initial_response = xds_grpclb_initial_response_parse(response_slice)) !=
881
1106
  nullptr) {
882
1107
  // Have NOT seen initial response, look for initial response.
1108
+ // TODO(juanlishen): When we convert this to use the xds protocol, the
1109
+ // balancer will send us a fallback timeout such that we should go into
1110
+ // fallback mode if we have lost contact with the balancer after a certain
1111
+ // period of time. We will need to save the timeout value here, and then
1112
+ // when the balancer call ends, we will need to start a timer for the
1113
+ // specified period of time, and if the timer fires, we go into fallback
1114
+ // mode. We will also need to cancel the timer when we receive a serverlist
1115
+ // from the balancer.
883
1116
  if (initial_response->has_client_stats_report_interval) {
884
1117
  const grpc_millis interval = xds_grpclb_duration_to_millis(
885
1118
  &initial_response->client_stats_report_interval);
@@ -888,7 +1121,7 @@ void XdsLb::BalancerChannelState::BalancerCallState::
888
1121
  GPR_MAX(GPR_MS_PER_SEC, interval);
889
1122
  }
890
1123
  }
891
- if (grpc_lb_xds_trace.enabled()) {
1124
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
892
1125
  if (lb_calld->client_stats_report_interval_ != 0) {
893
1126
  gpr_log(GPR_INFO,
894
1127
  "[xdslb %p] Received initial LB response message; "
@@ -907,7 +1140,7 @@ void XdsLb::BalancerChannelState::BalancerCallState::
907
1140
  response_slice)) != nullptr) {
908
1141
  // Have seen initial response, look for serverlist.
909
1142
  GPR_ASSERT(lb_calld->lb_call_ != nullptr);
910
- if (grpc_lb_xds_trace.enabled()) {
1143
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
911
1144
  gpr_log(GPR_INFO,
912
1145
  "[xdslb %p] Serverlist with %" PRIuPTR " servers received",
913
1146
  xdslb_policy, serverlist->num_servers);
@@ -921,67 +1154,69 @@ void XdsLb::BalancerChannelState::BalancerCallState::
921
1154
  gpr_free(ipport);
922
1155
  }
923
1156
  }
924
- /* update serverlist */
925
- // TODO(juanlishen): Don't ingore empty serverlist.
926
- if (serverlist->num_servers > 0) {
927
- // Pending LB channel receives a serverlist; promote it.
928
- // Note that this call can't be on a discarded pending channel, because
929
- // such channels don't have any current call but we have checked this call
930
- // is a current call.
931
- if (!lb_calld->lb_chand_->IsCurrentChannel()) {
932
- if (grpc_lb_xds_trace.enabled()) {
933
- gpr_log(GPR_INFO,
934
- "[xdslb %p] Promoting pending LB channel %p to replace "
935
- "current LB channel %p",
936
- xdslb_policy, lb_calld->lb_chand_.get(),
937
- lb_calld->xdslb_policy()->lb_chand_.get());
938
- }
939
- lb_calld->xdslb_policy()->lb_chand_ =
940
- std::move(lb_calld->xdslb_policy()->pending_lb_chand_);
941
- }
942
- // Start sending client load report only after we start using the
943
- // serverlist returned from the current LB call.
944
- if (lb_calld->client_stats_report_interval_ > 0 &&
945
- lb_calld->client_stats_ == nullptr) {
946
- lb_calld->client_stats_ = MakeRefCounted<XdsLbClientStats>();
947
- // TODO(roth): We currently track this ref manually. Once the
948
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
949
- // with the callback.
950
- auto self = lb_calld->Ref(DEBUG_LOCATION, "client_load_report");
951
- self.release();
952
- lb_calld->ScheduleNextClientLoadReportLocked();
953
- }
954
- if (xds_grpclb_serverlist_equals(xdslb_policy->serverlist_, serverlist)) {
955
- if (grpc_lb_xds_trace.enabled()) {
956
- gpr_log(GPR_INFO,
957
- "[xdslb %p] Incoming server list identical to current, "
958
- "ignoring.",
959
- xdslb_policy);
960
- }
961
- xds_grpclb_destroy_serverlist(serverlist);
962
- } else { /* new serverlist */
963
- if (xdslb_policy->serverlist_ != nullptr) {
964
- /* dispose of the old serverlist */
965
- xds_grpclb_destroy_serverlist(xdslb_policy->serverlist_);
966
- } else {
967
- /* or dispose of the fallback */
968
- xdslb_policy->fallback_backend_addresses_.reset();
969
- if (xdslb_policy->fallback_timer_callback_pending_) {
970
- grpc_timer_cancel(&xdslb_policy->lb_fallback_timer_);
971
- }
972
- }
973
- // and update the copy in the XdsLb instance. This
974
- // serverlist instance will be destroyed either upon the next
975
- // update or when the XdsLb instance is destroyed.
976
- xdslb_policy->serverlist_ = serverlist;
977
- xdslb_policy->CreateOrUpdateChildPolicyLocked();
1157
+ // Pending LB channel receives a serverlist; promote it.
1158
+ // Note that this call can't be on a discarded pending channel, because
1159
+ // such channels don't have any current call but we have checked this call
1160
+ // is a current call.
1161
+ if (!lb_calld->lb_chand_->IsCurrentChannel()) {
1162
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1163
+ gpr_log(GPR_INFO,
1164
+ "[xdslb %p] Promoting pending LB channel %p to replace "
1165
+ "current LB channel %p",
1166
+ xdslb_policy, lb_calld->lb_chand_.get(),
1167
+ lb_calld->xdslb_policy()->lb_chand_.get());
978
1168
  }
979
- } else {
980
- if (grpc_lb_xds_trace.enabled()) {
981
- gpr_log(GPR_INFO, "[xdslb %p] Received empty server list, ignoring.",
1169
+ lb_calld->xdslb_policy()->lb_chand_ =
1170
+ std::move(lb_calld->xdslb_policy()->pending_lb_chand_);
1171
+ }
1172
+ // Start sending client load report only after we start using the
1173
+ // serverlist returned from the current LB call.
1174
+ if (lb_calld->client_stats_report_interval_ > 0 &&
1175
+ lb_calld->client_stats_ == nullptr) {
1176
+ lb_calld->client_stats_ = MakeRefCounted<XdsLbClientStats>();
1177
+ lb_calld->Ref(DEBUG_LOCATION, "client_load_report").release();
1178
+ lb_calld->ScheduleNextClientLoadReportLocked();
1179
+ }
1180
+ if (!xdslb_policy->locality_serverlist_.empty() &&
1181
+ xds_grpclb_serverlist_equals(
1182
+ xdslb_policy->locality_serverlist_[0]->serverlist, serverlist)) {
1183
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1184
+ gpr_log(GPR_INFO,
1185
+ "[xdslb %p] Incoming server list identical to current, "
1186
+ "ignoring.",
982
1187
  xdslb_policy);
983
1188
  }
984
1189
  xds_grpclb_destroy_serverlist(serverlist);
1190
+ } else { // New serverlist.
1191
+ // If the balancer tells us to drop all the calls, we should exit fallback
1192
+ // mode immediately.
1193
+ // TODO(juanlishen): When we add EDS drop, we should change to check
1194
+ // drop_percentage.
1195
+ if (serverlist->num_servers == 0) xdslb_policy->MaybeExitFallbackMode();
1196
+ if (!xdslb_policy->locality_serverlist_.empty()) {
1197
+ xds_grpclb_destroy_serverlist(
1198
+ xdslb_policy->locality_serverlist_[0]->serverlist);
1199
+ } else {
1200
+ // This is the first serverlist we've received, don't enter fallback
1201
+ // mode.
1202
+ xdslb_policy->MaybeCancelFallbackAtStartupChecks();
1203
+ // Initialize locality serverlist, currently the list only handles
1204
+ // one child.
1205
+ xdslb_policy->locality_serverlist_.emplace_back(
1206
+ MakeUnique<LocalityServerlistEntry>());
1207
+ xdslb_policy->locality_serverlist_[0]->locality_name =
1208
+ static_cast<char*>(gpr_strdup(kDefaultLocalityName));
1209
+ xdslb_policy->locality_serverlist_[0]->locality_weight =
1210
+ kDefaultLocalityWeight;
1211
+ }
1212
+ // Update the serverlist in the XdsLb instance. This serverlist
1213
+ // instance will be destroyed either upon the next update or when the
1214
+ // XdsLb instance is destroyed.
1215
+ xdslb_policy->locality_serverlist_[0]->serverlist = serverlist;
1216
+ xdslb_policy->locality_map_.UpdateLocked(
1217
+ xdslb_policy->locality_serverlist_,
1218
+ xdslb_policy->child_policy_config_.get(), xdslb_policy->args_,
1219
+ xdslb_policy);
985
1220
  }
986
1221
  } else {
987
1222
  // No valid initial response or serverlist found.
@@ -1017,7 +1252,7 @@ void XdsLb::BalancerChannelState::BalancerCallState::
1017
1252
  XdsLb* xdslb_policy = lb_calld->xdslb_policy();
1018
1253
  BalancerChannelState* lb_chand = lb_calld->lb_chand_.get();
1019
1254
  GPR_ASSERT(lb_calld->lb_call_ != nullptr);
1020
- if (grpc_lb_xds_trace.enabled()) {
1255
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1021
1256
  char* status_details =
1022
1257
  grpc_slice_to_c_string(lb_calld->lb_call_status_details_);
1023
1258
  gpr_log(GPR_INFO,
@@ -1036,7 +1271,7 @@ void XdsLb::BalancerChannelState::BalancerCallState::
1036
1271
  if (lb_chand != xdslb_policy->LatestLbChannel()) {
1037
1272
  // This channel must be the current one and there is a pending one. Swap
1038
1273
  // in the pending one and we are done.
1039
- if (grpc_lb_xds_trace.enabled()) {
1274
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1040
1275
  gpr_log(GPR_INFO,
1041
1276
  "[xdslb %p] Promoting pending LB channel %p to replace "
1042
1277
  "current LB channel %p",
@@ -1058,6 +1293,18 @@ void XdsLb::BalancerChannelState::BalancerCallState::
1058
1293
  lb_chand->StartCallRetryTimerLocked();
1059
1294
  }
1060
1295
  xdslb_policy->channel_control_helper()->RequestReresolution();
1296
+ // If the fallback-at-startup checks are pending, go into fallback mode
1297
+ // immediately. This short-circuits the timeout for the
1298
+ // fallback-at-startup case.
1299
+ if (xdslb_policy->fallback_at_startup_checks_pending_) {
1300
+ gpr_log(GPR_INFO,
1301
+ "[xdslb %p] Balancer call finished; entering fallback mode",
1302
+ xdslb_policy);
1303
+ xdslb_policy->fallback_at_startup_checks_pending_ = false;
1304
+ grpc_timer_cancel(&xdslb_policy->lb_fallback_timer_);
1305
+ lb_chand->CancelConnectivityWatchLocked();
1306
+ xdslb_policy->UpdateFallbackPolicyLocked();
1307
+ }
1061
1308
  }
1062
1309
  }
1063
1310
  lb_calld->Unref(DEBUG_LOCATION, "lb_call_ended");
@@ -1112,9 +1359,10 @@ grpc_channel_args* BuildBalancerChannelArgs(const grpc_channel_args* args) {
1112
1359
  // ctor and dtor
1113
1360
  //
1114
1361
 
1115
- XdsLb::XdsLb(Args args) : LoadBalancingPolicy(std::move(args)) {
1116
- gpr_mu_init(&lb_chand_mu_);
1117
- gpr_mu_init(&child_policy_mu_);
1362
+ XdsLb::XdsLb(Args args)
1363
+ : LoadBalancingPolicy(std::move(args)),
1364
+ locality_map_(),
1365
+ locality_serverlist_() {
1118
1366
  // Record server name.
1119
1367
  const grpc_arg* arg = grpc_channel_args_find(args.args, GRPC_ARG_SERVER_URI);
1120
1368
  const char* server_uri = grpc_channel_arg_get_string(arg);
@@ -1122,7 +1370,7 @@ XdsLb::XdsLb(Args args) : LoadBalancingPolicy(std::move(args)) {
1122
1370
  grpc_uri* uri = grpc_uri_parse(server_uri, true);
1123
1371
  GPR_ASSERT(uri->path[0] != '\0');
1124
1372
  server_name_ = gpr_strdup(uri->path[0] == '/' ? uri->path + 1 : uri->path);
1125
- if (grpc_lb_xds_trace.enabled()) {
1373
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1126
1374
  gpr_log(GPR_INFO,
1127
1375
  "[xdslb %p] Will use '%s' as the server name for LB request.", this,
1128
1376
  server_name_);
@@ -1132,43 +1380,38 @@ XdsLb::XdsLb(Args args) : LoadBalancingPolicy(std::move(args)) {
1132
1380
  arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS);
1133
1381
  lb_call_timeout_ms_ = grpc_channel_arg_get_integer(arg, {0, 0, INT_MAX});
1134
1382
  // Record fallback timeout.
1135
- arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_FALLBACK_TIMEOUT_MS);
1383
+ arg = grpc_channel_args_find(args.args, GRPC_ARG_XDS_FALLBACK_TIMEOUT_MS);
1136
1384
  lb_fallback_timeout_ms_ = grpc_channel_arg_get_integer(
1137
1385
  arg, {GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX});
1138
1386
  }
1139
1387
 
1140
1388
  XdsLb::~XdsLb() {
1141
- gpr_mu_destroy(&lb_chand_mu_);
1142
1389
  gpr_free((void*)server_name_);
1143
1390
  grpc_channel_args_destroy(args_);
1144
- if (serverlist_ != nullptr) {
1145
- xds_grpclb_destroy_serverlist(serverlist_);
1146
- }
1147
- gpr_mu_destroy(&child_policy_mu_);
1391
+ locality_serverlist_.clear();
1148
1392
  }
1149
1393
 
1150
1394
  void XdsLb::ShutdownLocked() {
1151
1395
  shutting_down_ = true;
1152
- if (fallback_timer_callback_pending_) {
1396
+ if (fallback_at_startup_checks_pending_) {
1153
1397
  grpc_timer_cancel(&lb_fallback_timer_);
1154
1398
  }
1155
- if (child_policy_ != nullptr) {
1156
- grpc_pollset_set_del_pollset_set(child_policy_->interested_parties(),
1399
+ locality_map_.ShutdownLocked();
1400
+ if (fallback_policy_ != nullptr) {
1401
+ grpc_pollset_set_del_pollset_set(fallback_policy_->interested_parties(),
1157
1402
  interested_parties());
1158
1403
  }
1159
- if (pending_child_policy_ != nullptr) {
1404
+ if (pending_fallback_policy_ != nullptr) {
1160
1405
  grpc_pollset_set_del_pollset_set(
1161
- pending_child_policy_->interested_parties(), interested_parties());
1406
+ pending_fallback_policy_->interested_parties(), interested_parties());
1162
1407
  }
1163
1408
  {
1164
- MutexLock lock(&child_policy_mu_);
1165
- child_policy_.reset();
1166
- pending_child_policy_.reset();
1409
+ MutexLock lock(&fallback_policy_mu_);
1410
+ fallback_policy_.reset();
1411
+ pending_fallback_policy_.reset();
1167
1412
  }
1168
- // We destroy the LB channel here instead of in our destructor because
1169
- // destroying the channel triggers a last callback to
1170
- // OnBalancerChannelConnectivityChangedLocked(), and we need to be
1171
- // alive when that callback is invoked.
1413
+ // We reset the LB channels here instead of in our destructor because they
1414
+ // hold refs to XdsLb.
1172
1415
  {
1173
1416
  MutexLock lock(&lb_chand_mu_);
1174
1417
  lb_chand_.reset();
@@ -1187,28 +1430,30 @@ void XdsLb::ResetBackoffLocked() {
1187
1430
  if (pending_lb_chand_ != nullptr) {
1188
1431
  grpc_channel_reset_connect_backoff(pending_lb_chand_->channel());
1189
1432
  }
1190
- if (child_policy_ != nullptr) {
1191
- child_policy_->ResetBackoffLocked();
1433
+ locality_map_.ResetBackoffLocked();
1434
+ if (fallback_policy_ != nullptr) {
1435
+ fallback_policy_->ResetBackoffLocked();
1192
1436
  }
1193
- if (pending_child_policy_ != nullptr) {
1194
- pending_child_policy_->ResetBackoffLocked();
1437
+ if (pending_fallback_policy_ != nullptr) {
1438
+ pending_fallback_policy_->ResetBackoffLocked();
1195
1439
  }
1196
1440
  }
1197
1441
 
1198
1442
  void XdsLb::FillChildRefsForChannelz(channelz::ChildRefsList* child_subchannels,
1199
1443
  channelz::ChildRefsList* child_channels) {
1444
+ // Delegate to the locality_map_ to fill the children subchannels.
1445
+ locality_map_.FillChildRefsForChannelz(child_subchannels, child_channels);
1200
1446
  {
1201
- // Delegate to the child_policy_ to fill the children subchannels.
1202
- // This must be done holding child_policy_mu_, since this method does not
1447
+ // This must be done holding fallback_policy_mu_, since this method does not
1203
1448
  // run in the combiner.
1204
- MutexLock lock(&child_policy_mu_);
1205
- if (child_policy_ != nullptr) {
1206
- child_policy_->FillChildRefsForChannelz(child_subchannels,
1207
- child_channels);
1449
+ MutexLock lock(&fallback_policy_mu_);
1450
+ if (fallback_policy_ != nullptr) {
1451
+ fallback_policy_->FillChildRefsForChannelz(child_subchannels,
1452
+ child_channels);
1208
1453
  }
1209
- if (pending_child_policy_ != nullptr) {
1210
- pending_child_policy_->FillChildRefsForChannelz(child_subchannels,
1211
- child_channels);
1454
+ if (pending_fallback_policy_ != nullptr) {
1455
+ pending_fallback_policy_->FillChildRefsForChannelz(child_subchannels,
1456
+ child_channels);
1212
1457
  }
1213
1458
  }
1214
1459
  MutexLock lock(&lb_chand_mu_);
@@ -1267,96 +1512,290 @@ void XdsLb::ProcessAddressesAndChannelArgsLocked(
1267
1512
  grpc_channel_args_destroy(lb_channel_args);
1268
1513
  }
1269
1514
 
1270
- void XdsLb::ParseLbConfig(Config* xds_config) {
1271
- const grpc_json* xds_config_json = xds_config->config();
1272
- const char* balancer_name = nullptr;
1273
- grpc_json* child_policy = nullptr;
1274
- grpc_json* fallback_policy = nullptr;
1275
- for (const grpc_json* field = xds_config_json; field != nullptr;
1276
- field = field->next) {
1277
- if (field->key == nullptr) return;
1278
- if (strcmp(field->key, "balancerName") == 0) {
1279
- if (balancer_name != nullptr) return; // Duplicate.
1280
- if (field->type != GRPC_JSON_STRING) return;
1281
- balancer_name = field->value;
1282
- } else if (strcmp(field->key, "childPolicy") == 0) {
1283
- if (child_policy != nullptr) return; // Duplicate.
1284
- child_policy = ParseLoadBalancingConfig(field);
1285
- } else if (strcmp(field->key, "fallbackPolicy") == 0) {
1286
- if (fallback_policy != nullptr) return; // Duplicate.
1287
- fallback_policy = ParseLoadBalancingConfig(field);
1288
- }
1289
- }
1290
- if (balancer_name == nullptr) return; // Required field.
1291
- balancer_name_ = UniquePtr<char>(gpr_strdup(balancer_name));
1292
- if (child_policy != nullptr) {
1293
- child_policy_config_ =
1294
- MakeRefCounted<Config>(child_policy, xds_config->service_config());
1295
- }
1296
- if (fallback_policy != nullptr) {
1297
- fallback_policy_config_ =
1298
- MakeRefCounted<Config>(fallback_policy, xds_config->service_config());
1299
- }
1515
+ void XdsLb::ParseLbConfig(const ParsedXdsConfig* xds_config) {
1516
+ if (xds_config == nullptr || xds_config->balancer_name() == nullptr) return;
1517
+ // TODO(yashykt) : does this need to be a gpr_strdup
1518
+ balancer_name_ = UniquePtr<char>(gpr_strdup(xds_config->balancer_name()));
1519
+ child_policy_config_ = xds_config->child_policy();
1520
+ fallback_policy_config_ = xds_config->fallback_policy();
1300
1521
  }
1301
1522
 
1302
1523
  void XdsLb::UpdateLocked(UpdateArgs args) {
1303
1524
  const bool is_initial_update = lb_chand_ == nullptr;
1304
- ParseLbConfig(args.config.get());
1305
- // TODO(juanlishen): Pass fallback policy config update after fallback policy
1306
- // is added.
1525
+ ParseLbConfig(static_cast<const ParsedXdsConfig*>(args.config.get()));
1307
1526
  if (balancer_name_ == nullptr) {
1308
1527
  gpr_log(GPR_ERROR, "[xdslb %p] LB config parsing fails.", this);
1309
1528
  return;
1310
1529
  }
1311
1530
  ProcessAddressesAndChannelArgsLocked(args.addresses, *args.args);
1312
- // Update the existing child policy.
1313
- // Note: We have disabled fallback mode in the code, so this child policy must
1314
- // have been created from a serverlist.
1315
- // TODO(vpowar): Handle the fallback_address changes when we add support for
1316
- // fallback in xDS.
1317
- if (child_policy_ != nullptr) CreateOrUpdateChildPolicyLocked();
1318
- // If this is the initial update, start the fallback timer.
1531
+ locality_map_.UpdateLocked(locality_serverlist_, child_policy_config_.get(),
1532
+ args_, this);
1533
+ // Update the existing fallback policy. The fallback policy config and/or the
1534
+ // fallback addresses may be new.
1535
+ if (fallback_policy_ != nullptr) UpdateFallbackPolicyLocked();
1536
+ // If this is the initial update, start the fallback-at-startup checks.
1319
1537
  if (is_initial_update) {
1320
- if (lb_fallback_timeout_ms_ > 0 && serverlist_ == nullptr &&
1321
- !fallback_timer_callback_pending_) {
1322
- grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_;
1323
- Ref(DEBUG_LOCATION, "on_fallback_timer").release(); // Held by closure
1324
- GRPC_CLOSURE_INIT(&lb_on_fallback_, &XdsLb::OnFallbackTimerLocked, this,
1325
- grpc_combiner_scheduler(combiner()));
1326
- fallback_timer_callback_pending_ = true;
1327
- grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
1328
- // TODO(juanlishen): Monitor the connectivity state of the balancer
1329
- // channel. If the channel reports TRANSIENT_FAILURE before the
1330
- // fallback timeout expires, go into fallback mode early.
1331
- }
1538
+ grpc_millis deadline = ExecCtx::Get()->Now() + lb_fallback_timeout_ms_;
1539
+ Ref(DEBUG_LOCATION, "on_fallback_timer").release(); // Held by closure
1540
+ GRPC_CLOSURE_INIT(&lb_on_fallback_, &XdsLb::OnFallbackTimerLocked, this,
1541
+ grpc_combiner_scheduler(combiner()));
1542
+ fallback_at_startup_checks_pending_ = true;
1543
+ grpc_timer_init(&lb_fallback_timer_, deadline, &lb_on_fallback_);
1544
+ // Start watching the channel's connectivity state. If the channel
1545
+ // goes into state TRANSIENT_FAILURE, we go into fallback mode even if
1546
+ // the fallback timeout has not elapsed.
1547
+ lb_chand_->StartConnectivityWatchLocked();
1332
1548
  }
1333
1549
  }
1334
1550
 
1335
1551
  //
1336
- // code for balancer channel and call
1552
+ // fallback-related methods
1337
1553
  //
1338
1554
 
1555
+ void XdsLb::MaybeCancelFallbackAtStartupChecks() {
1556
+ if (!fallback_at_startup_checks_pending_) return;
1557
+ gpr_log(GPR_INFO,
1558
+ "[xdslb %p] Cancelling fallback timer and LB channel connectivity "
1559
+ "watch",
1560
+ this);
1561
+ grpc_timer_cancel(&lb_fallback_timer_);
1562
+ lb_chand_->CancelConnectivityWatchLocked();
1563
+ fallback_at_startup_checks_pending_ = false;
1564
+ }
1565
+
1339
1566
  void XdsLb::OnFallbackTimerLocked(void* arg, grpc_error* error) {
1340
1567
  XdsLb* xdslb_policy = static_cast<XdsLb*>(arg);
1341
- xdslb_policy->fallback_timer_callback_pending_ = false;
1342
- // If we receive a serverlist after the timer fires but before this callback
1343
- // actually runs, don't fall back.
1344
- if (xdslb_policy->serverlist_ == nullptr && !xdslb_policy->shutting_down_ &&
1345
- error == GRPC_ERROR_NONE) {
1346
- if (grpc_lb_xds_trace.enabled()) {
1568
+ // If some fallback-at-startup check is done after the timer fires but before
1569
+ // this callback actually runs, don't fall back.
1570
+ if (xdslb_policy->fallback_at_startup_checks_pending_ &&
1571
+ !xdslb_policy->shutting_down_ && error == GRPC_ERROR_NONE) {
1572
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1347
1573
  gpr_log(GPR_INFO,
1348
- "[xdslb %p] Fallback timer fired. Not using fallback backends",
1574
+ "[xdslb %p] Child policy not ready after fallback timeout; "
1575
+ "entering fallback mode",
1349
1576
  xdslb_policy);
1350
1577
  }
1578
+ xdslb_policy->fallback_at_startup_checks_pending_ = false;
1579
+ xdslb_policy->UpdateFallbackPolicyLocked();
1580
+ xdslb_policy->lb_chand_->CancelConnectivityWatchLocked();
1351
1581
  }
1352
1582
  xdslb_policy->Unref(DEBUG_LOCATION, "on_fallback_timer");
1353
1583
  }
1354
1584
 
1585
+ void XdsLb::UpdateFallbackPolicyLocked() {
1586
+ if (shutting_down_) return;
1587
+ // Construct update args.
1588
+ UpdateArgs update_args;
1589
+ update_args.addresses = fallback_backend_addresses_;
1590
+ update_args.config = fallback_policy_config_ == nullptr
1591
+ ? nullptr
1592
+ : fallback_policy_config_->Ref();
1593
+ update_args.args = grpc_channel_args_copy(args_);
1594
+ // If the child policy name changes, we need to create a new child
1595
+ // policy. When this happens, we leave child_policy_ as-is and store
1596
+ // the new child policy in pending_child_policy_. Once the new child
1597
+ // policy transitions into state READY, we swap it into child_policy_,
1598
+ // replacing the original child policy. So pending_child_policy_ is
1599
+ // non-null only between when we apply an update that changes the child
1600
+ // policy name and when the new child reports state READY.
1601
+ //
1602
+ // Updates can arrive at any point during this transition. We always
1603
+ // apply updates relative to the most recently created child policy,
1604
+ // even if the most recent one is still in pending_child_policy_. This
1605
+ // is true both when applying the updates to an existing child policy
1606
+ // and when determining whether we need to create a new policy.
1607
+ //
1608
+ // As a result of this, there are several cases to consider here:
1609
+ //
1610
+ // 1. We have no existing child policy (i.e., we have started up but
1611
+ // have not yet received a serverlist from the balancer or gone
1612
+ // into fallback mode; in this case, both child_policy_ and
1613
+ // pending_child_policy_ are null). In this case, we create a
1614
+ // new child policy and store it in child_policy_.
1615
+ //
1616
+ // 2. We have an existing child policy and have no pending child policy
1617
+ // from a previous update (i.e., either there has not been a
1618
+ // previous update that changed the policy name, or we have already
1619
+ // finished swapping in the new policy; in this case, child_policy_
1620
+ // is non-null but pending_child_policy_ is null). In this case:
1621
+ // a. If child_policy_->name() equals child_policy_name, then we
1622
+ // update the existing child policy.
1623
+ // b. If child_policy_->name() does not equal child_policy_name,
1624
+ // we create a new policy. The policy will be stored in
1625
+ // pending_child_policy_ and will later be swapped into
1626
+ // child_policy_ by the helper when the new child transitions
1627
+ // into state READY.
1628
+ //
1629
+ // 3. We have an existing child policy and have a pending child policy
1630
+ // from a previous update (i.e., a previous update set
1631
+ // pending_child_policy_ as per case 2b above and that policy has
1632
+ // not yet transitioned into state READY and been swapped into
1633
+ // child_policy_; in this case, both child_policy_ and
1634
+ // pending_child_policy_ are non-null). In this case:
1635
+ // a. If pending_child_policy_->name() equals child_policy_name,
1636
+ // then we update the existing pending child policy.
1637
+ // b. If pending_child_policy->name() does not equal
1638
+ // child_policy_name, then we create a new policy. The new
1639
+ // policy is stored in pending_child_policy_ (replacing the one
1640
+ // that was there before, which will be immediately shut down)
1641
+ // and will later be swapped into child_policy_ by the helper
1642
+ // when the new child transitions into state READY.
1643
+ const char* fallback_policy_name = fallback_policy_config_ == nullptr
1644
+ ? "round_robin"
1645
+ : fallback_policy_config_->name();
1646
+ const bool create_policy =
1647
+ // case 1
1648
+ fallback_policy_ == nullptr ||
1649
+ // case 2b
1650
+ (pending_fallback_policy_ == nullptr &&
1651
+ strcmp(fallback_policy_->name(), fallback_policy_name) != 0) ||
1652
+ // case 3b
1653
+ (pending_fallback_policy_ != nullptr &&
1654
+ strcmp(pending_fallback_policy_->name(), fallback_policy_name) != 0);
1655
+ LoadBalancingPolicy* policy_to_update = nullptr;
1656
+ if (create_policy) {
1657
+ // Cases 1, 2b, and 3b: create a new child policy.
1658
+ // If child_policy_ is null, we set it (case 1), else we set
1659
+ // pending_child_policy_ (cases 2b and 3b).
1660
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1661
+ gpr_log(GPR_INFO, "[xdslb %p] Creating new %sfallback policy %s", this,
1662
+ fallback_policy_ == nullptr ? "" : "pending ",
1663
+ fallback_policy_name);
1664
+ }
1665
+ auto new_policy =
1666
+ CreateFallbackPolicyLocked(fallback_policy_name, update_args.args);
1667
+ auto& lb_policy = fallback_policy_ == nullptr ? fallback_policy_
1668
+ : pending_fallback_policy_;
1669
+ {
1670
+ MutexLock lock(&fallback_policy_mu_);
1671
+ lb_policy = std::move(new_policy);
1672
+ }
1673
+ policy_to_update = lb_policy.get();
1674
+ } else {
1675
+ // Cases 2a and 3a: update an existing policy.
1676
+ // If we have a pending child policy, send the update to the pending
1677
+ // policy (case 3a), else send it to the current policy (case 2a).
1678
+ policy_to_update = pending_fallback_policy_ != nullptr
1679
+ ? pending_fallback_policy_.get()
1680
+ : fallback_policy_.get();
1681
+ }
1682
+ GPR_ASSERT(policy_to_update != nullptr);
1683
+ // Update the policy.
1684
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1685
+ gpr_log(
1686
+ GPR_INFO, "[xdslb %p] Updating %sfallback policy %p", this,
1687
+ policy_to_update == pending_fallback_policy_.get() ? "pending " : "",
1688
+ policy_to_update);
1689
+ }
1690
+ policy_to_update->UpdateLocked(std::move(update_args));
1691
+ }
1692
+
1693
+ OrphanablePtr<LoadBalancingPolicy> XdsLb::CreateFallbackPolicyLocked(
1694
+ const char* name, const grpc_channel_args* args) {
1695
+ FallbackHelper* helper = New<FallbackHelper>(Ref());
1696
+ LoadBalancingPolicy::Args lb_policy_args;
1697
+ lb_policy_args.combiner = combiner();
1698
+ lb_policy_args.args = args;
1699
+ lb_policy_args.channel_control_helper =
1700
+ UniquePtr<ChannelControlHelper>(helper);
1701
+ OrphanablePtr<LoadBalancingPolicy> lb_policy =
1702
+ LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
1703
+ name, std::move(lb_policy_args));
1704
+ if (GPR_UNLIKELY(lb_policy == nullptr)) {
1705
+ gpr_log(GPR_ERROR, "[xdslb %p] Failure creating fallback policy %s", this,
1706
+ name);
1707
+ return nullptr;
1708
+ }
1709
+ helper->set_child(lb_policy.get());
1710
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1711
+ gpr_log(GPR_INFO, "[xdslb %p] Created new fallback policy %s (%p)", this,
1712
+ name, lb_policy.get());
1713
+ }
1714
+ // Add the xDS's interested_parties pollset_set to that of the newly created
1715
+ // child policy. This will make the child policy progress upon activity on xDS
1716
+ // LB, which in turn is tied to the application's call.
1717
+ grpc_pollset_set_add_pollset_set(lb_policy->interested_parties(),
1718
+ interested_parties());
1719
+ return lb_policy;
1720
+ }
1721
+
1722
+ void XdsLb::MaybeExitFallbackMode() {
1723
+ if (fallback_policy_ == nullptr) return;
1724
+ gpr_log(GPR_INFO, "[xdslb %p] Exiting fallback mode", this);
1725
+ fallback_policy_.reset();
1726
+ pending_fallback_policy_.reset();
1727
+ }
1728
+
1729
+ //
1730
+ // XdsLb::LocalityMap
1731
+ //
1732
+
1733
+ void XdsLb::LocalityMap::PruneLocalities(const LocalityList& locality_list) {
1734
+ for (auto iter = map_.begin(); iter != map_.end();) {
1735
+ bool found = false;
1736
+ for (size_t i = 0; i < locality_list.size(); i++) {
1737
+ if (!gpr_stricmp(locality_list[i]->locality_name, iter->first.get())) {
1738
+ found = true;
1739
+ }
1740
+ }
1741
+ if (!found) { // Remove entries not present in the locality list
1742
+ MutexLock lock(&child_refs_mu_);
1743
+ iter = map_.erase(iter);
1744
+ } else
1745
+ iter++;
1746
+ }
1747
+ }
1748
+
1749
+ void XdsLb::LocalityMap::UpdateLocked(
1750
+ const LocalityList& locality_serverlist,
1751
+ ParsedLoadBalancingConfig* child_policy_config,
1752
+ const grpc_channel_args* args, XdsLb* parent) {
1753
+ if (parent->shutting_down_) return;
1754
+ for (size_t i = 0; i < locality_serverlist.size(); i++) {
1755
+ UniquePtr<char> locality_name(
1756
+ gpr_strdup(locality_serverlist[i]->locality_name));
1757
+ auto iter = map_.find(locality_name);
1758
+ if (iter == map_.end()) {
1759
+ OrphanablePtr<LocalityEntry> new_entry = MakeOrphanable<LocalityEntry>(
1760
+ parent->Ref(), locality_serverlist[i]->locality_weight);
1761
+ MutexLock lock(&child_refs_mu_);
1762
+ iter = map_.emplace(std::move(locality_name), std::move(new_entry)).first;
1763
+ }
1764
+ // Don't create new child policies if not directed to
1765
+ xds_grpclb_serverlist* serverlist =
1766
+ parent->locality_serverlist_[i]->serverlist;
1767
+ iter->second->UpdateLocked(serverlist, child_policy_config, args);
1768
+ }
1769
+ PruneLocalities(locality_serverlist);
1770
+ }
1771
+
1772
+ void XdsLb::LocalityMap::ShutdownLocked() {
1773
+ MutexLock lock(&child_refs_mu_);
1774
+ map_.clear();
1775
+ }
1776
+
1777
+ void XdsLb::LocalityMap::ResetBackoffLocked() {
1778
+ for (auto& p : map_) {
1779
+ p.second->ResetBackoffLocked();
1780
+ }
1781
+ }
1782
+
1783
+ void XdsLb::LocalityMap::FillChildRefsForChannelz(
1784
+ channelz::ChildRefsList* child_subchannels,
1785
+ channelz::ChildRefsList* child_channels) {
1786
+ MutexLock lock(&child_refs_mu_);
1787
+ for (auto& p : map_) {
1788
+ p.second->FillChildRefsForChannelz(child_subchannels, child_channels);
1789
+ }
1790
+ }
1791
+
1355
1792
  //
1356
- // code for interacting with the child policy
1793
+ // XdsLb::LocalityMap::LocalityEntry
1357
1794
  //
1358
1795
 
1359
- grpc_channel_args* XdsLb::CreateChildPolicyArgsLocked() {
1796
+ grpc_channel_args*
1797
+ XdsLb::LocalityMap::LocalityEntry::CreateChildPolicyArgsLocked(
1798
+ const grpc_channel_args* args_in) {
1360
1799
  const grpc_arg args_to_add[] = {
1361
1800
  // A channel arg indicating if the target is a backend inferred from a
1362
1801
  // grpclb load balancer.
@@ -1368,15 +1807,16 @@ grpc_channel_args* XdsLb::CreateChildPolicyArgsLocked() {
1368
1807
  grpc_channel_arg_integer_create(
1369
1808
  const_cast<char*>(GRPC_ARG_INHIBIT_HEALTH_CHECKING), 1),
1370
1809
  };
1371
- return grpc_channel_args_copy_and_add(args_, args_to_add,
1810
+ return grpc_channel_args_copy_and_add(args_in, args_to_add,
1372
1811
  GPR_ARRAY_SIZE(args_to_add));
1373
1812
  }
1374
1813
 
1375
- OrphanablePtr<LoadBalancingPolicy> XdsLb::CreateChildPolicyLocked(
1814
+ OrphanablePtr<LoadBalancingPolicy>
1815
+ XdsLb::LocalityMap::LocalityEntry::CreateChildPolicyLocked(
1376
1816
  const char* name, const grpc_channel_args* args) {
1377
- Helper* helper = New<Helper>(Ref());
1817
+ Helper* helper = New<Helper>(this->Ref());
1378
1818
  LoadBalancingPolicy::Args lb_policy_args;
1379
- lb_policy_args.combiner = combiner();
1819
+ lb_policy_args.combiner = parent_->combiner();
1380
1820
  lb_policy_args.args = args;
1381
1821
  lb_policy_args.channel_control_helper =
1382
1822
  UniquePtr<ChannelControlHelper>(helper);
@@ -1389,7 +1829,7 @@ OrphanablePtr<LoadBalancingPolicy> XdsLb::CreateChildPolicyLocked(
1389
1829
  return nullptr;
1390
1830
  }
1391
1831
  helper->set_child(lb_policy.get());
1392
- if (grpc_lb_xds_trace.enabled()) {
1832
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1393
1833
  gpr_log(GPR_INFO, "[xdslb %p] Created new child policy %s (%p)", this, name,
1394
1834
  lb_policy.get());
1395
1835
  }
@@ -1397,22 +1837,21 @@ OrphanablePtr<LoadBalancingPolicy> XdsLb::CreateChildPolicyLocked(
1397
1837
  // child policy. This will make the child policy progress upon activity on xDS
1398
1838
  // LB, which in turn is tied to the application's call.
1399
1839
  grpc_pollset_set_add_pollset_set(lb_policy->interested_parties(),
1400
- interested_parties());
1840
+ parent_->interested_parties());
1401
1841
  return lb_policy;
1402
1842
  }
1403
1843
 
1404
- void XdsLb::CreateOrUpdateChildPolicyLocked() {
1405
- if (shutting_down_) return;
1406
- // This should never be invoked if we do not have serverlist_, as fallback
1407
- // mode is disabled for xDS plugin.
1408
- // TODO(juanlishen): Change this as part of implementing fallback mode.
1409
- GPR_ASSERT(serverlist_ != nullptr);
1410
- GPR_ASSERT(serverlist_->num_servers > 0);
1844
+ void XdsLb::LocalityMap::LocalityEntry::UpdateLocked(
1845
+ xds_grpclb_serverlist* serverlist,
1846
+ ParsedLoadBalancingConfig* child_policy_config,
1847
+ const grpc_channel_args* args_in) {
1848
+ if (parent_->shutting_down_) return;
1411
1849
  // Construct update args.
1412
1850
  UpdateArgs update_args;
1413
- update_args.addresses = ProcessServerlist(serverlist_);
1414
- update_args.config = child_policy_config_;
1415
- update_args.args = CreateChildPolicyArgsLocked();
1851
+ update_args.addresses = ProcessServerlist(serverlist);
1852
+ update_args.config =
1853
+ child_policy_config == nullptr ? nullptr : child_policy_config->Ref();
1854
+ update_args.args = CreateChildPolicyArgsLocked(args_in);
1416
1855
  // If the child policy name changes, we need to create a new child
1417
1856
  // policy. When this happens, we leave child_policy_ as-is and store
1418
1857
  // the new child policy in pending_child_policy_. Once the new child
@@ -1464,9 +1903,9 @@ void XdsLb::CreateOrUpdateChildPolicyLocked() {
1464
1903
  // when the new child transitions into state READY.
1465
1904
  // TODO(juanlishen): If the child policy is not configured via service config,
1466
1905
  // use whatever algorithm is specified by the balancer.
1467
- const char* child_policy_name = child_policy_config_ == nullptr
1906
+ const char* child_policy_name = child_policy_config == nullptr
1468
1907
  ? "round_robin"
1469
- : child_policy_config_->name();
1908
+ : child_policy_config->name();
1470
1909
  const bool create_policy =
1471
1910
  // case 1
1472
1911
  child_policy_ == nullptr ||
@@ -1481,7 +1920,7 @@ void XdsLb::CreateOrUpdateChildPolicyLocked() {
1481
1920
  // Cases 1, 2b, and 3b: create a new child policy.
1482
1921
  // If child_policy_ is null, we set it (case 1), else we set
1483
1922
  // pending_child_policy_ (cases 2b and 3b).
1484
- if (grpc_lb_xds_trace.enabled()) {
1923
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1485
1924
  gpr_log(GPR_INFO, "[xdslb %p] Creating new %schild policy %s", this,
1486
1925
  child_policy_ == nullptr ? "" : "pending ", child_policy_name);
1487
1926
  }
@@ -1504,7 +1943,7 @@ void XdsLb::CreateOrUpdateChildPolicyLocked() {
1504
1943
  }
1505
1944
  GPR_ASSERT(policy_to_update != nullptr);
1506
1945
  // Update the policy.
1507
- if (grpc_lb_xds_trace.enabled()) {
1946
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1508
1947
  gpr_log(GPR_INFO, "[xdslb %p] Updating %schild policy %p", this,
1509
1948
  policy_to_update == pending_child_policy_.get() ? "pending " : "",
1510
1949
  policy_to_update);
@@ -1512,6 +1951,201 @@ void XdsLb::CreateOrUpdateChildPolicyLocked() {
1512
1951
  policy_to_update->UpdateLocked(std::move(update_args));
1513
1952
  }
1514
1953
 
1954
+ void XdsLb::LocalityMap::LocalityEntry::ShutdownLocked() {
1955
+ // Remove the child policy's interested_parties pollset_set from the
1956
+ // xDS policy.
1957
+ grpc_pollset_set_del_pollset_set(child_policy_->interested_parties(),
1958
+ parent_->interested_parties());
1959
+ if (pending_child_policy_ != nullptr) {
1960
+ grpc_pollset_set_del_pollset_set(
1961
+ pending_child_policy_->interested_parties(),
1962
+ parent_->interested_parties());
1963
+ }
1964
+ {
1965
+ MutexLock lock(&child_policy_mu_);
1966
+ child_policy_.reset();
1967
+ pending_child_policy_.reset();
1968
+ }
1969
+ }
1970
+
1971
+ void XdsLb::LocalityMap::LocalityEntry::ResetBackoffLocked() {
1972
+ child_policy_->ResetBackoffLocked();
1973
+ if (pending_child_policy_ != nullptr) {
1974
+ pending_child_policy_->ResetBackoffLocked();
1975
+ }
1976
+ }
1977
+
1978
+ void XdsLb::LocalityMap::LocalityEntry::FillChildRefsForChannelz(
1979
+ channelz::ChildRefsList* child_subchannels,
1980
+ channelz::ChildRefsList* child_channels) {
1981
+ MutexLock lock(&child_policy_mu_);
1982
+ child_policy_->FillChildRefsForChannelz(child_subchannels, child_channels);
1983
+ if (pending_child_policy_ != nullptr) {
1984
+ pending_child_policy_->FillChildRefsForChannelz(child_subchannels,
1985
+ child_channels);
1986
+ }
1987
+ }
1988
+
1989
+ void XdsLb::LocalityMap::LocalityEntry::Orphan() {
1990
+ ShutdownLocked();
1991
+ Unref();
1992
+ }
1993
+
1994
+ //
1995
+ // XdsLb::LocalityEntry::Helper
1996
+ //
1997
+
1998
+ bool XdsLb::LocalityMap::LocalityEntry::Helper::CalledByPendingChild() const {
1999
+ GPR_ASSERT(child_ != nullptr);
2000
+ return child_ == entry_->pending_child_policy_.get();
2001
+ }
2002
+
2003
+ bool XdsLb::LocalityMap::LocalityEntry::Helper::CalledByCurrentChild() const {
2004
+ GPR_ASSERT(child_ != nullptr);
2005
+ return child_ == entry_->child_policy_.get();
2006
+ }
2007
+
2008
+ Subchannel* XdsLb::LocalityMap::LocalityEntry::Helper::CreateSubchannel(
2009
+ const grpc_channel_args& args) {
2010
+ if (entry_->parent_->shutting_down_ ||
2011
+ (!CalledByPendingChild() && !CalledByCurrentChild())) {
2012
+ return nullptr;
2013
+ }
2014
+ return entry_->parent_->channel_control_helper()->CreateSubchannel(args);
2015
+ }
2016
+
2017
+ grpc_channel* XdsLb::LocalityMap::LocalityEntry::Helper::CreateChannel(
2018
+ const char* target, const grpc_channel_args& args) {
2019
+ if (entry_->parent_->shutting_down_ ||
2020
+ (!CalledByPendingChild() && !CalledByCurrentChild())) {
2021
+ return nullptr;
2022
+ }
2023
+ return entry_->parent_->channel_control_helper()->CreateChannel(target, args);
2024
+ }
2025
+
2026
+ void XdsLb::LocalityMap::LocalityEntry::Helper::UpdateState(
2027
+ grpc_connectivity_state state, UniquePtr<SubchannelPicker> picker) {
2028
+ if (entry_->parent_->shutting_down_) return;
2029
+ // If this request is from the pending child policy, ignore it until
2030
+ // it reports READY, at which point we swap it into place.
2031
+ if (CalledByPendingChild()) {
2032
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
2033
+ gpr_log(GPR_INFO,
2034
+ "[xdslb %p helper %p] pending child policy %p reports state=%s",
2035
+ entry_->parent_.get(), this, entry_->pending_child_policy_.get(),
2036
+ grpc_connectivity_state_name(state));
2037
+ }
2038
+ if (state != GRPC_CHANNEL_READY) return;
2039
+ grpc_pollset_set_del_pollset_set(
2040
+ entry_->child_policy_->interested_parties(),
2041
+ entry_->parent_->interested_parties());
2042
+ MutexLock lock(&entry_->child_policy_mu_);
2043
+ entry_->child_policy_ = std::move(entry_->pending_child_policy_);
2044
+ } else if (!CalledByCurrentChild()) {
2045
+ // This request is from an outdated child, so ignore it.
2046
+ return;
2047
+ }
2048
+ // At this point, child_ must be the current child policy.
2049
+ if (state == GRPC_CHANNEL_READY) entry_->parent_->MaybeExitFallbackMode();
2050
+ // If we are in fallback mode, ignore update request from the child policy.
2051
+ if (entry_->parent_->fallback_policy_ != nullptr) return;
2052
+ GPR_ASSERT(entry_->parent_->lb_chand_ != nullptr);
2053
+ RefCountedPtr<XdsLbClientStats> client_stats =
2054
+ entry_->parent_->lb_chand_->lb_calld() == nullptr
2055
+ ? nullptr
2056
+ : entry_->parent_->lb_chand_->lb_calld()->client_stats();
2057
+ // Cache the picker and its state in the entry
2058
+ entry_->picker_ref_ = MakeRefCounted<PickerRef>(std::move(picker));
2059
+ entry_->connectivity_state_ = state;
2060
+ // Construct a new xds picker which maintains a map of all locality pickers
2061
+ // that are ready. Each locality is represented by a portion of the range
2062
+ // proportional to its weight, such that the total range is the sum of the
2063
+ // weights of all localities
2064
+ uint32_t end = 0;
2065
+ size_t num_connecting = 0;
2066
+ size_t num_idle = 0;
2067
+ size_t num_transient_failures = 0;
2068
+ auto& locality_map = this->entry_->parent_->locality_map_.map_;
2069
+ Picker::PickerList pickers;
2070
+ for (auto& p : locality_map) {
2071
+ const LocalityEntry* entry = p.second.get();
2072
+ grpc_connectivity_state connectivity_state = entry->connectivity_state_;
2073
+ switch (connectivity_state) {
2074
+ case GRPC_CHANNEL_READY: {
2075
+ end += entry->locality_weight_;
2076
+ pickers.push_back(MakePair(end, entry->picker_ref_));
2077
+ break;
2078
+ }
2079
+ case GRPC_CHANNEL_CONNECTING: {
2080
+ num_connecting++;
2081
+ break;
2082
+ }
2083
+ case GRPC_CHANNEL_IDLE: {
2084
+ num_idle++;
2085
+ break;
2086
+ }
2087
+ case GRPC_CHANNEL_TRANSIENT_FAILURE: {
2088
+ num_transient_failures++;
2089
+ break;
2090
+ }
2091
+ default: {
2092
+ gpr_log(GPR_ERROR, "Invalid locality connectivity state - %d",
2093
+ connectivity_state);
2094
+ }
2095
+ }
2096
+ }
2097
+ // Pass on the constructed xds picker if it has any ready pickers in their map
2098
+ // otherwise pass a QueuePicker if any of the locality pickers are in a
2099
+ // connecting or idle state, finally return a transient failure picker if all
2100
+ // locality pickers are in transient failure
2101
+ if (pickers.size() > 0) {
2102
+ entry_->parent_->channel_control_helper()->UpdateState(
2103
+ GRPC_CHANNEL_READY,
2104
+ UniquePtr<LoadBalancingPolicy::SubchannelPicker>(
2105
+ New<Picker>(std::move(client_stats), std::move(pickers))));
2106
+ } else if (num_connecting > 0) {
2107
+ entry_->parent_->channel_control_helper()->UpdateState(
2108
+ GRPC_CHANNEL_CONNECTING,
2109
+ UniquePtr<SubchannelPicker>(New<QueuePicker>(this->entry_->parent_)));
2110
+ } else if (num_idle > 0) {
2111
+ entry_->parent_->channel_control_helper()->UpdateState(
2112
+ GRPC_CHANNEL_IDLE,
2113
+ UniquePtr<SubchannelPicker>(New<QueuePicker>(this->entry_->parent_)));
2114
+ } else {
2115
+ GPR_ASSERT(num_transient_failures == locality_map.size());
2116
+ grpc_error* error =
2117
+ grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2118
+ "connections to all localities failing"),
2119
+ GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE);
2120
+ entry_->parent_->channel_control_helper()->UpdateState(
2121
+ state, UniquePtr<SubchannelPicker>(New<TransientFailurePicker>(error)));
2122
+ }
2123
+ }
2124
+
2125
+ void XdsLb::LocalityMap::LocalityEntry::Helper::RequestReresolution() {
2126
+ if (entry_->parent_->shutting_down_) return;
2127
+ // If there is a pending child policy, ignore re-resolution requests
2128
+ // from the current child policy (or any outdated child).
2129
+ if (entry_->pending_child_policy_ != nullptr && !CalledByPendingChild()) {
2130
+ return;
2131
+ }
2132
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
2133
+ gpr_log(GPR_INFO,
2134
+ "[xdslb %p] Re-resolution requested from the internal RR policy "
2135
+ "(%p).",
2136
+ entry_->parent_.get(), entry_->child_policy_.get());
2137
+ }
2138
+ GPR_ASSERT(entry_->parent_->lb_chand_ != nullptr);
2139
+ // If we are talking to a balancer, we expect to get updated addresses
2140
+ // from the balancer, so we can ignore the re-resolution request from
2141
+ // the child policy. Otherwise, pass the re-resolution request up to the
2142
+ // channel.
2143
+ if (entry_->parent_->lb_chand_->lb_calld() == nullptr ||
2144
+ !entry_->parent_->lb_chand_->lb_calld()->seen_initial_response()) {
2145
+ entry_->parent_->channel_control_helper()->RequestReresolution();
2146
+ }
2147
+ }
2148
+
1515
2149
  //
1516
2150
  // factory
1517
2151
  //
@@ -1524,6 +2158,77 @@ class XdsFactory : public LoadBalancingPolicyFactory {
1524
2158
  }
1525
2159
 
1526
2160
  const char* name() const override { return kXds; }
2161
+
2162
+ RefCountedPtr<ParsedLoadBalancingConfig> ParseLoadBalancingConfig(
2163
+ const grpc_json* json, grpc_error** error) const override {
2164
+ GPR_DEBUG_ASSERT(error != nullptr && *error == GRPC_ERROR_NONE);
2165
+ if (json == nullptr) {
2166
+ // xds was mentioned as a policy in the deprecated loadBalancingPolicy
2167
+ // field or in the client API.
2168
+ *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2169
+ "field:loadBalancingPolicy error:Xds Parser has required field - "
2170
+ "balancerName. Please use loadBalancingConfig field of service "
2171
+ "config instead.");
2172
+ return nullptr;
2173
+ }
2174
+ GPR_DEBUG_ASSERT(strcmp(json->key, name()) == 0);
2175
+
2176
+ InlinedVector<grpc_error*, 3> error_list;
2177
+ const char* balancer_name = nullptr;
2178
+ RefCountedPtr<ParsedLoadBalancingConfig> child_policy;
2179
+ RefCountedPtr<ParsedLoadBalancingConfig> fallback_policy;
2180
+ for (const grpc_json* field = json->child; field != nullptr;
2181
+ field = field->next) {
2182
+ if (field->key == nullptr) continue;
2183
+ if (strcmp(field->key, "balancerName") == 0) {
2184
+ if (balancer_name != nullptr) {
2185
+ error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2186
+ "field:balancerName error:Duplicate entry"));
2187
+ }
2188
+ if (field->type != GRPC_JSON_STRING) {
2189
+ error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2190
+ "field:balancerName error:type should be string"));
2191
+ continue;
2192
+ }
2193
+ balancer_name = field->value;
2194
+ } else if (strcmp(field->key, "childPolicy") == 0) {
2195
+ if (child_policy != nullptr) {
2196
+ error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2197
+ "field:childPolicy error:Duplicate entry"));
2198
+ }
2199
+ grpc_error* parse_error = GRPC_ERROR_NONE;
2200
+ child_policy = LoadBalancingPolicyRegistry::ParseLoadBalancingConfig(
2201
+ field, &parse_error);
2202
+ if (child_policy == nullptr) {
2203
+ GPR_DEBUG_ASSERT(parse_error != GRPC_ERROR_NONE);
2204
+ error_list.push_back(parse_error);
2205
+ }
2206
+ } else if (strcmp(field->key, "fallbackPolicy") == 0) {
2207
+ if (fallback_policy != nullptr) {
2208
+ error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2209
+ "field:fallbackPolicy error:Duplicate entry"));
2210
+ }
2211
+ grpc_error* parse_error = GRPC_ERROR_NONE;
2212
+ fallback_policy = LoadBalancingPolicyRegistry::ParseLoadBalancingConfig(
2213
+ field, &parse_error);
2214
+ if (fallback_policy == nullptr) {
2215
+ GPR_DEBUG_ASSERT(parse_error != GRPC_ERROR_NONE);
2216
+ error_list.push_back(parse_error);
2217
+ }
2218
+ }
2219
+ }
2220
+ if (balancer_name == nullptr) {
2221
+ error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2222
+ "field:balancerName error:not found"));
2223
+ }
2224
+ if (error_list.empty()) {
2225
+ return RefCountedPtr<ParsedLoadBalancingConfig>(New<ParsedXdsConfig>(
2226
+ balancer_name, std::move(child_policy), std::move(fallback_policy)));
2227
+ } else {
2228
+ *error = GRPC_ERROR_CREATE_FROM_VECTOR("Xds Parser", &error_list);
2229
+ return nullptr;
2230
+ }
2231
+ }
1527
2232
  };
1528
2233
 
1529
2234
  } // namespace