grpc 1.23.1 → 1.24.0.pre1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +757 -477
  3. data/include/grpc/grpc.h +3 -1
  4. data/include/grpc/grpc_security.h +20 -4
  5. data/include/grpc/impl/codegen/grpc_types.h +6 -5
  6. data/include/grpc/impl/codegen/port_platform.h +25 -0
  7. data/src/core/ext/filters/client_channel/backend_metric.cc +78 -0
  8. data/src/core/ext/filters/client_channel/backend_metric.h +36 -0
  9. data/src/core/ext/filters/client_channel/channel_connectivity.cc +16 -2
  10. data/src/core/ext/filters/client_channel/client_channel.cc +325 -267
  11. data/src/core/ext/filters/client_channel/client_channel_factory.h +0 -4
  12. data/src/core/ext/filters/client_channel/health/health_check_client.cc +23 -32
  13. data/src/core/ext/filters/client_channel/http_proxy.cc +7 -3
  14. data/src/core/ext/filters/client_channel/lb_policy.cc +1 -1
  15. data/src/core/ext/filters/client_channel/lb_policy.h +58 -34
  16. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +46 -50
  17. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.h +9 -2
  18. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel_secure.cc +35 -17
  19. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +130 -215
  20. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +34 -21
  21. data/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc +1120 -802
  22. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h +8 -2
  23. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc +33 -12
  24. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc +151 -40
  25. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h +184 -26
  26. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc +389 -245
  27. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h +98 -60
  28. data/src/core/ext/filters/client_channel/lb_policy_registry.cc +6 -1
  29. data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +89 -0
  30. data/src/core/ext/filters/client_channel/resolving_lb_policy.cc +3 -8
  31. data/src/core/ext/filters/client_channel/server_address.cc +1 -3
  32. data/src/core/ext/filters/client_channel/server_address.h +1 -1
  33. data/src/core/ext/filters/client_channel/subchannel.h +2 -1
  34. data/src/core/ext/filters/client_idle/client_idle_filter.cc +207 -29
  35. data/src/core/ext/filters/http/client/http_client_filter.cc +10 -8
  36. data/src/core/ext/filters/http/client_authority_filter.cc +1 -1
  37. data/src/core/ext/filters/http/message_compress/message_compress_filter.cc +10 -7
  38. data/src/core/ext/filters/http/server/http_server_filter.cc +52 -26
  39. data/src/core/ext/transport/chttp2/client/insecure/channel_create.cc +23 -20
  40. data/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc +24 -21
  41. data/src/core/ext/transport/chttp2/server/chttp2_server.cc +1 -1
  42. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +37 -24
  43. data/src/core/ext/transport/chttp2/transport/chttp2_transport.h +1 -0
  44. data/src/core/ext/transport/chttp2/transport/hpack_encoder.cc +237 -191
  45. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +29 -27
  46. data/src/core/ext/transport/chttp2/transport/hpack_parser.h +1 -1
  47. data/src/core/ext/transport/chttp2/transport/hpack_table.cc +19 -4
  48. data/src/core/ext/transport/chttp2/transport/hpack_table.h +13 -4
  49. data/src/core/ext/transport/chttp2/transport/incoming_metadata.cc +2 -1
  50. data/src/core/ext/transport/chttp2/transport/internal.h +0 -2
  51. data/src/core/ext/transport/chttp2/transport/parsing.cc +99 -71
  52. data/src/core/ext/upb-generated/envoy/api/v2/auth/cert.upb.c +222 -0
  53. data/src/core/ext/upb-generated/envoy/api/v2/auth/cert.upb.h +818 -0
  54. data/src/core/ext/upb-generated/envoy/api/v2/cds.upb.c +314 -0
  55. data/src/core/ext/upb-generated/envoy/api/v2/cds.upb.h +1142 -0
  56. data/src/core/ext/upb-generated/envoy/api/v2/cluster/circuit_breaker.upb.c +53 -0
  57. data/src/core/ext/upb-generated/envoy/api/v2/cluster/circuit_breaker.upb.h +158 -0
  58. data/src/core/ext/upb-generated/envoy/api/v2/cluster/filter.upb.c +34 -0
  59. data/src/core/ext/upb-generated/envoy/api/v2/cluster/filter.upb.h +69 -0
  60. data/src/core/ext/upb-generated/envoy/api/v2/cluster/outlier_detection.upb.c +49 -0
  61. data/src/core/ext/upb-generated/envoy/api/v2/cluster/outlier_detection.upb.h +240 -0
  62. data/src/core/ext/upb-generated/envoy/api/v2/core/address.upb.c +110 -0
  63. data/src/core/ext/upb-generated/envoy/api/v2/core/address.upb.h +324 -0
  64. data/src/core/ext/upb-generated/envoy/api/v2/core/base.upb.c +235 -0
  65. data/src/core/ext/upb-generated/envoy/api/v2/core/base.upb.h +661 -0
  66. data/src/core/ext/upb-generated/envoy/api/v2/core/config_source.upb.c +84 -0
  67. data/src/core/ext/upb-generated/envoy/api/v2/core/config_source.upb.h +274 -0
  68. data/src/core/ext/upb-generated/envoy/api/v2/core/grpc_service.upb.c +175 -0
  69. data/src/core/ext/upb-generated/envoy/api/v2/core/grpc_service.upb.h +572 -0
  70. data/src/core/ext/upb-generated/envoy/api/v2/core/health_check.upb.c +150 -0
  71. data/src/core/ext/upb-generated/envoy/api/v2/core/health_check.upb.h +596 -0
  72. data/src/core/ext/upb-generated/envoy/api/v2/core/http_uri.upb.c +35 -0
  73. data/src/core/ext/upb-generated/envoy/api/v2/core/http_uri.upb.h +80 -0
  74. data/src/core/ext/upb-generated/envoy/api/v2/core/protocol.upb.c +95 -0
  75. data/src/core/ext/upb-generated/envoy/api/v2/core/protocol.upb.h +308 -0
  76. data/src/core/ext/upb-generated/envoy/api/v2/discovery.upb.c +128 -0
  77. data/src/core/ext/upb-generated/envoy/api/v2/discovery.upb.h +392 -0
  78. data/src/core/ext/upb-generated/envoy/api/v2/eds.upb.c +91 -0
  79. data/src/core/ext/upb-generated/envoy/api/v2/eds.upb.h +236 -0
  80. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/endpoint.upb.c +88 -0
  81. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/endpoint.upb.h +258 -0
  82. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/load_report.upb.c +111 -0
  83. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/load_report.upb.h +324 -0
  84. data/src/core/ext/upb-generated/envoy/service/discovery/v2/ads.upb.c +23 -0
  85. data/src/core/ext/upb-generated/envoy/service/discovery/v2/ads.upb.h +50 -0
  86. data/src/core/ext/upb-generated/envoy/service/load_stats/v2/lrs.upb.c +52 -0
  87. data/src/core/ext/upb-generated/envoy/service/load_stats/v2/lrs.upb.h +130 -0
  88. data/src/core/ext/upb-generated/envoy/type/percent.upb.c +39 -0
  89. data/src/core/ext/upb-generated/envoy/type/percent.upb.h +87 -0
  90. data/src/core/ext/upb-generated/envoy/type/range.upb.c +39 -0
  91. data/src/core/ext/upb-generated/envoy/type/range.upb.h +85 -0
  92. data/src/core/ext/upb-generated/gogoproto/gogo.upb.c +17 -0
  93. data/src/core/ext/upb-generated/gogoproto/gogo.upb.h +30 -0
  94. data/src/core/ext/upb-generated/google/api/annotations.upb.c +18 -0
  95. data/src/core/ext/upb-generated/google/api/annotations.upb.h +30 -0
  96. data/src/core/ext/upb-generated/google/api/http.upb.c +66 -0
  97. data/src/core/ext/upb-generated/google/api/http.upb.h +190 -0
  98. data/src/core/ext/upb-generated/google/protobuf/any.upb.c +27 -0
  99. data/src/core/ext/upb-generated/google/protobuf/any.upb.h +58 -0
  100. data/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c +485 -0
  101. data/src/core/ext/upb-generated/google/protobuf/descriptor.upb.h +1690 -0
  102. data/src/core/ext/upb-generated/google/protobuf/duration.upb.c +27 -0
  103. data/src/core/ext/upb-generated/google/protobuf/duration.upb.h +58 -0
  104. data/src/core/ext/upb-generated/google/protobuf/empty.upb.c +22 -0
  105. data/src/core/ext/upb-generated/google/protobuf/empty.upb.h +50 -0
  106. data/src/core/ext/upb-generated/google/protobuf/struct.upb.c +79 -0
  107. data/src/core/ext/upb-generated/google/protobuf/struct.upb.h +215 -0
  108. data/src/core/ext/upb-generated/google/protobuf/timestamp.upb.c +27 -0
  109. data/src/core/ext/upb-generated/google/protobuf/timestamp.upb.h +58 -0
  110. data/src/core/ext/upb-generated/google/protobuf/wrappers.upb.c +106 -0
  111. data/src/core/ext/upb-generated/google/protobuf/wrappers.upb.h +238 -0
  112. data/src/core/ext/upb-generated/google/rpc/status.upb.c +33 -0
  113. data/src/core/ext/upb-generated/google/rpc/status.upb.h +74 -0
  114. data/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.c +49 -0
  115. data/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.h +126 -0
  116. data/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.c +209 -0
  117. data/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.h +681 -0
  118. data/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.c +42 -0
  119. data/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.h +109 -0
  120. data/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.c +36 -0
  121. data/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.h +84 -0
  122. data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.c +133 -0
  123. data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.h +359 -0
  124. data/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.c +58 -0
  125. data/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.h +144 -0
  126. data/src/core/ext/upb-generated/validate/validate.upb.c +443 -0
  127. data/src/core/ext/upb-generated/validate/validate.upb.h +2037 -0
  128. data/src/core/lib/channel/channel_args.cc +21 -0
  129. data/src/core/lib/channel/channel_args.h +16 -2
  130. data/src/core/lib/channel/channel_stack.h +2 -1
  131. data/src/core/lib/channel/channelz.cc +54 -56
  132. data/src/core/lib/channel/channelz.h +29 -12
  133. data/src/core/lib/compression/compression.cc +2 -1
  134. data/src/core/lib/compression/compression_internal.h +8 -0
  135. data/src/core/lib/gpr/log_linux.cc +2 -2
  136. data/src/core/lib/gpr/log_posix.cc +2 -2
  137. data/src/core/lib/gpr/time_precise.cc +123 -36
  138. data/src/core/lib/gpr/time_precise.h +37 -0
  139. data/src/core/lib/gprpp/abstract.h +10 -0
  140. data/src/core/lib/gprpp/atomic.h +4 -0
  141. data/src/core/lib/gprpp/inlined_vector.h +20 -4
  142. data/src/core/lib/gprpp/map.h +109 -6
  143. data/src/core/lib/gprpp/memory.h +6 -0
  144. data/src/core/lib/gprpp/ref_counted_ptr.h +2 -0
  145. data/src/core/lib/iomgr/ev_epollex_linux.cc +29 -54
  146. data/src/core/lib/iomgr/exec_ctx.cc +27 -17
  147. data/src/core/lib/iomgr/exec_ctx.h +3 -0
  148. data/src/core/lib/iomgr/sockaddr_utils.cc +1 -3
  149. data/src/core/lib/iomgr/tcp_posix.cc +16 -25
  150. data/src/core/lib/iomgr/tcp_server_custom.cc +1 -1
  151. data/src/core/lib/iomgr/timer_manager.cc +8 -1
  152. data/src/core/lib/iomgr/timer_manager.h +2 -0
  153. data/src/core/lib/security/credentials/credentials.h +8 -2
  154. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.cc +23 -0
  155. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.h +3 -0
  156. data/src/core/lib/security/credentials/tls/spiffe_credentials.cc +3 -3
  157. data/src/core/lib/security/security_connector/ssl_utils.cc +1 -12
  158. data/src/core/lib/security/security_connector/ssl_utils.h +10 -6
  159. data/src/core/lib/security/security_connector/ssl_utils_config.cc +32 -0
  160. data/src/core/lib/security/security_connector/ssl_utils_config.h +30 -0
  161. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.cc +161 -49
  162. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.h +34 -1
  163. data/src/core/lib/slice/slice_intern.cc +17 -9
  164. data/src/core/lib/slice/slice_internal.h +34 -7
  165. data/src/core/lib/slice/slice_utils.h +7 -3
  166. data/src/core/lib/surface/call.cc +97 -57
  167. data/src/core/lib/surface/channel.cc +2 -2
  168. data/src/core/lib/surface/completion_queue.cc +10 -16
  169. data/src/core/lib/surface/init.cc +3 -0
  170. data/src/core/lib/surface/server.cc +11 -14
  171. data/src/core/lib/surface/validate_metadata.cc +4 -0
  172. data/src/core/lib/surface/version.cc +2 -2
  173. data/src/core/lib/transport/metadata.cc +4 -4
  174. data/src/core/lib/transport/metadata_batch.cc +72 -16
  175. data/src/core/lib/transport/metadata_batch.h +38 -0
  176. data/src/core/lib/transport/static_metadata.cc +814 -1023
  177. data/src/core/lib/transport/static_metadata.h +271 -213
  178. data/src/core/lib/transport/transport.h +12 -0
  179. data/src/core/plugin_registry/grpc_plugin_registry.cc +4 -0
  180. data/src/core/tsi/alts/handshaker/alts_handshaker_client.cc +104 -76
  181. data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc +34 -16
  182. data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h +2 -2
  183. data/src/core/tsi/alts/handshaker/alts_tsi_utils.cc +10 -6
  184. data/src/core/tsi/alts/handshaker/alts_tsi_utils.h +4 -3
  185. data/src/core/tsi/alts/handshaker/transport_security_common_api.cc +74 -48
  186. data/src/core/tsi/alts/handshaker/transport_security_common_api.h +34 -26
  187. data/src/core/tsi/ssl_transport_security.cc +14 -6
  188. data/src/core/tsi/ssl_transport_security.h +4 -0
  189. data/src/ruby/ext/grpc/ext-export.clang +1 -0
  190. data/src/ruby/ext/grpc/ext-export.gcc +6 -0
  191. data/src/ruby/ext/grpc/extconf.rb +5 -0
  192. data/src/ruby/ext/grpc/rb_enable_cpp.cc +22 -0
  193. data/src/ruby/ext/grpc/rb_grpc.c +1 -42
  194. data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +4 -0
  195. data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +6 -0
  196. data/src/ruby/lib/grpc.rb +2 -0
  197. data/src/ruby/lib/grpc/core/status_codes.rb +135 -0
  198. data/src/ruby/lib/grpc/errors.rb +4 -7
  199. data/src/ruby/lib/grpc/google_rpc_status_utils.rb +9 -4
  200. data/src/ruby/lib/grpc/structs.rb +15 -0
  201. data/src/ruby/lib/grpc/version.rb +1 -1
  202. data/src/ruby/spec/errors_spec.rb +1 -0
  203. data/src/ruby/spec/pb/codegen/grpc/testing/package_options_import.proto +22 -0
  204. data/src/ruby/spec/pb/codegen/grpc/testing/package_options_ruby_style.proto +34 -0
  205. data/src/ruby/spec/pb/codegen/package_option_spec.rb +53 -29
  206. data/third_party/upb/upb/decode.c +604 -0
  207. data/third_party/upb/upb/decode.h +21 -0
  208. data/third_party/upb/upb/encode.c +378 -0
  209. data/third_party/upb/upb/encode.h +21 -0
  210. data/third_party/upb/upb/generated_util.h +105 -0
  211. data/third_party/upb/upb/msg.c +111 -0
  212. data/third_party/upb/upb/msg.h +69 -0
  213. data/third_party/upb/upb/port.c +27 -0
  214. data/third_party/upb/upb/port_def.inc +152 -0
  215. data/third_party/upb/upb/port_undef.inc +21 -0
  216. data/third_party/upb/upb/table.c +911 -0
  217. data/third_party/upb/upb/table.int.h +507 -0
  218. data/third_party/upb/upb/upb.c +261 -0
  219. data/third_party/upb/upb/upb.h +364 -0
  220. metadata +134 -55
  221. data/src/core/ext/filters/client_channel/health/health.pb.c +0 -23
  222. data/src/core/ext/filters/client_channel/health/health.pb.h +0 -73
  223. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.c +0 -19
  224. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h +0 -54
  225. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.c +0 -19
  226. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h +0 -54
  227. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c +0 -89
  228. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h +0 -164
  229. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api.cc +0 -520
  230. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api.h +0 -323
  231. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api_util.cc +0 -145
  232. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api_util.h +0 -149
  233. data/src/core/tsi/alts/handshaker/altscontext.pb.c +0 -47
  234. data/src/core/tsi/alts/handshaker/altscontext.pb.h +0 -63
  235. data/src/core/tsi/alts/handshaker/handshaker.pb.c +0 -122
  236. data/src/core/tsi/alts/handshaker/handshaker.pb.h +0 -254
  237. data/src/core/tsi/alts/handshaker/transport_security_common.pb.c +0 -49
  238. data/src/core/tsi/alts/handshaker/transport_security_common.pb.h +0 -78
  239. data/third_party/nanopb/pb.h +0 -579
  240. data/third_party/nanopb/pb_common.c +0 -97
  241. data/third_party/nanopb/pb_common.h +0 -42
  242. data/third_party/nanopb/pb_decode.c +0 -1347
  243. data/third_party/nanopb/pb_decode.h +0 -149
  244. data/third_party/nanopb/pb_encode.c +0 -696
  245. data/third_party/nanopb/pb_encode.h +0 -154
@@ -24,38 +24,57 @@
24
24
  #include <grpc/slice_buffer.h>
25
25
 
26
26
  #include "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h"
27
- #include "src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h"
28
27
  #include "src/core/lib/iomgr/exec_ctx.h"
28
+ #include "src/proto/grpc/lb/v1/load_balancer.upb.h"
29
29
 
30
30
  #define GRPC_GRPCLB_SERVICE_NAME_MAX_LENGTH 128
31
+ #define GRPC_GRPCLB_SERVER_IP_ADDRESS_MAX_SIZE 16
32
+ #define GRPC_GRPCLB_SERVER_LOAD_BALANCE_TOKEN_MAX_SIZE 50
33
+
34
+ namespace grpc_core {
31
35
 
32
- typedef grpc_lb_v1_Server_ip_address_t grpc_grpclb_ip_address;
33
36
  typedef grpc_lb_v1_LoadBalanceRequest grpc_grpclb_request;
37
+ typedef grpc_lb_v1_LoadBalanceResponse grpc_grpclb_response;
34
38
  typedef grpc_lb_v1_InitialLoadBalanceResponse grpc_grpclb_initial_response;
35
- typedef grpc_lb_v1_Server grpc_grpclb_server;
36
39
  typedef google_protobuf_Duration grpc_grpclb_duration;
37
40
  typedef google_protobuf_Timestamp grpc_grpclb_timestamp;
38
41
 
42
+ typedef struct {
43
+ int32_t size;
44
+ char data[GRPC_GRPCLB_SERVER_IP_ADDRESS_MAX_SIZE];
45
+ } grpc_grpclb_server_ip_address;
46
+
47
+ // Contains server information. When the drop field is not true, use the other
48
+ // fields.
49
+ typedef struct {
50
+ grpc_grpclb_server_ip_address ip_address;
51
+ int32_t port;
52
+ char load_balance_token[GRPC_GRPCLB_SERVER_LOAD_BALANCE_TOKEN_MAX_SIZE];
53
+ bool drop;
54
+ } grpc_grpclb_server;
55
+
39
56
  typedef struct {
40
57
  grpc_grpclb_server** servers;
41
58
  size_t num_servers;
42
59
  } grpc_grpclb_serverlist;
43
60
 
44
- /** Create a request for a gRPC LB service under \a lb_service_name */
45
- grpc_grpclb_request* grpc_grpclb_request_create(const char* lb_service_name);
61
+ /**
62
+ * Create a request for a gRPC LB service under \a lb_service_name.
63
+ * \a lb_service_name should be alive when returned request is being used.
64
+ */
65
+ grpc_grpclb_request* grpc_grpclb_request_create(const char* lb_service_name,
66
+ upb_arena* arena);
46
67
  grpc_grpclb_request* grpc_grpclb_load_report_request_create(
47
- grpc_core::GrpcLbClientStats* client_stats);
68
+ grpc_core::GrpcLbClientStats* client_stats, upb_arena* arena);
48
69
 
49
70
  /** Protocol Buffers v3-encode \a request */
50
- grpc_slice grpc_grpclb_request_encode(const grpc_grpclb_request* request);
51
-
52
- /** Destroy \a request */
53
- void grpc_grpclb_request_destroy(grpc_grpclb_request* request);
71
+ grpc_slice grpc_grpclb_request_encode(const grpc_grpclb_request* request,
72
+ upb_arena* arena);
54
73
 
55
74
  /** Parse (ie, decode) the bytes in \a encoded_grpc_grpclb_response as a \a
56
75
  * grpc_grpclb_initial_response */
57
- grpc_grpclb_initial_response* grpc_grpclb_initial_response_parse(
58
- const grpc_slice& encoded_grpc_grpclb_response);
76
+ const grpc_grpclb_initial_response* grpc_grpclb_initial_response_parse(
77
+ const grpc_slice& encoded_grpc_grpclb_response, upb_arena* arena);
59
78
 
60
79
  /** Parse the list of servers from an encoded \a grpc_grpclb_response */
61
80
  grpc_grpclb_serverlist* grpc_grpclb_response_parse_serverlist(
@@ -75,16 +94,10 @@ bool grpc_grpclb_server_equals(const grpc_grpclb_server* lhs,
75
94
  /** Destroy \a serverlist */
76
95
  void grpc_grpclb_destroy_serverlist(grpc_grpclb_serverlist* serverlist);
77
96
 
78
- /** Compare \a lhs against \a rhs and return 0 if \a lhs and \a rhs are equal,
79
- * < 0 if \a lhs represents a duration shorter than \a rhs and > 0 otherwise */
80
- int grpc_grpclb_duration_compare(const grpc_grpclb_duration* lhs,
81
- const grpc_grpclb_duration* rhs);
82
-
83
- grpc_millis grpc_grpclb_duration_to_millis(grpc_grpclb_duration* duration_pb);
97
+ grpc_millis grpc_grpclb_duration_to_millis(
98
+ const grpc_grpclb_duration* duration_pb);
84
99
 
85
- /** Destroy \a initial_response */
86
- void grpc_grpclb_initial_response_destroy(
87
- grpc_grpclb_initial_response* response);
100
+ } // namespace grpc_core
88
101
 
89
102
  #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_GRPCLB_LOAD_BALANCER_API_H \
90
103
  */
@@ -108,6 +108,8 @@
108
108
  #define GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS 120
109
109
  #define GRPC_XDS_RECONNECT_JITTER 0.2
110
110
  #define GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS 10000
111
+ #define GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS 1000
112
+ #define GRPC_XDS_DEFAULT_LOCALITY_RETENTION_INTERVAL_MS (15 * 60 * 1000)
111
113
 
112
114
  namespace grpc_core {
113
115
 
@@ -116,10 +118,6 @@ TraceFlag grpc_lb_xds_trace(false, "xds");
116
118
  namespace {
117
119
 
118
120
  constexpr char kXds[] = "xds_experimental";
119
- constexpr char kDefaultLocalityRegion[] = "xds_default_locality_region";
120
- constexpr char kDefaultLocalityZone[] = "xds_default_locality_zone";
121
- constexpr char kDefaultLocalitySubzone[] = "xds_default_locality_subzone";
122
- constexpr uint32_t kDefaultLocalityWeight = 3;
123
121
 
124
122
  class ParsedXdsConfig : public LoadBalancingPolicy::Config {
125
123
  public:
@@ -158,98 +156,192 @@ class XdsLb : public LoadBalancingPolicy {
158
156
  void ResetBackoffLocked() override;
159
157
 
160
158
  private:
161
- struct LocalityServerlistEntry;
162
- using LocalityList = InlinedVector<UniquePtr<LocalityServerlistEntry>, 1>;
163
-
164
- /// Contains a channel to the LB server and all the data related to the
165
- /// channel.
166
- class BalancerChannelState
167
- : public InternallyRefCounted<BalancerChannelState> {
159
+ // Contains a channel to the LB server and all the data related to the
160
+ // channel. Holds a ref to the xds policy.
161
+ class LbChannelState : public InternallyRefCounted<LbChannelState> {
168
162
  public:
169
- /// Contains a call to the LB server and all the data related to the call.
170
- class BalancerCallState : public InternallyRefCounted<BalancerCallState> {
163
+ // An LB call wrapper that can restart a call upon failure. Holds a ref to
164
+ // the LB channel. The template parameter is the kind of wrapped LB call.
165
+ template <typename T>
166
+ class RetryableLbCall : public InternallyRefCounted<RetryableLbCall<T>> {
171
167
  public:
172
- explicit BalancerCallState(RefCountedPtr<BalancerChannelState> lb_chand);
168
+ explicit RetryableLbCall(RefCountedPtr<LbChannelState> lb_chand);
173
169
 
174
- // It's the caller's responsibility to ensure that Orphan() is called from
175
- // inside the combiner.
176
170
  void Orphan() override;
177
171
 
178
- void StartQuery();
172
+ void OnCallFinishedLocked();
179
173
 
180
- RefCountedPtr<XdsLbClientStats> client_stats() const {
181
- return client_stats_;
182
- }
174
+ T* lb_calld() const { return lb_calld_.get(); }
175
+ LbChannelState* lb_chand() const { return lb_chand_.get(); }
176
+
177
+ private:
178
+ void StartNewCallLocked();
179
+ void StartRetryTimerLocked();
180
+ static void OnRetryTimerLocked(void* arg, grpc_error* error);
181
+
182
+ // The wrapped LB call that talks to the LB server. It's instantiated
183
+ // every time we start a new call. It's null during call retry backoff.
184
+ OrphanablePtr<T> lb_calld_;
185
+ // The owing LB channel.
186
+ RefCountedPtr<LbChannelState> lb_chand_;
187
+
188
+ // Retry state.
189
+ BackOff backoff_;
190
+ grpc_timer retry_timer_;
191
+ grpc_closure on_retry_timer_;
192
+ bool retry_timer_callback_pending_ = false;
193
+
194
+ bool shutting_down_ = false;
195
+ };
196
+
197
+ // Contains an EDS call to the LB server.
198
+ class EdsCallState : public InternallyRefCounted<EdsCallState> {
199
+ public:
200
+ // The ctor and dtor should not be used directly.
201
+ explicit EdsCallState(
202
+ RefCountedPtr<RetryableLbCall<EdsCallState>> parent);
203
+ ~EdsCallState() override;
204
+
205
+ void Orphan() override;
183
206
 
184
- bool seen_initial_response() const { return seen_initial_response_; }
207
+ RetryableLbCall<EdsCallState>* parent() const { return parent_.get(); }
208
+ LbChannelState* lb_chand() const { return parent_->lb_chand(); }
209
+ XdsLb* xdslb_policy() const { return lb_chand()->xdslb_policy(); }
210
+ bool seen_response() const { return seen_response_; }
185
211
 
186
212
  private:
187
- GRPC_ALLOW_CLASS_TO_USE_NON_PUBLIC_DELETE
213
+ static void OnResponseReceivedLocked(void* arg, grpc_error* error);
214
+ static void OnStatusReceivedLocked(void* arg, grpc_error* error);
188
215
 
189
- ~BalancerCallState();
216
+ bool IsCurrentCallOnChannel() const;
190
217
 
191
- XdsLb* xdslb_policy() const { return lb_chand_->xdslb_policy_.get(); }
218
+ // The owning RetryableLbCall<>.
219
+ RefCountedPtr<RetryableLbCall<EdsCallState>> parent_;
220
+ bool seen_response_ = false;
192
221
 
193
- bool IsCurrentCallOnChannel() const {
194
- return this == lb_chand_->lb_calld_.get();
195
- }
222
+ // Always non-NULL.
223
+ grpc_call* lb_call_;
224
+
225
+ // recv_initial_metadata
226
+ grpc_metadata_array initial_metadata_recv_;
227
+
228
+ // send_message
229
+ grpc_byte_buffer* send_message_payload_ = nullptr;
230
+
231
+ // recv_message
232
+ grpc_byte_buffer* recv_message_payload_ = nullptr;
233
+ grpc_closure on_response_received_;
234
+
235
+ // recv_trailing_metadata
236
+ grpc_metadata_array trailing_metadata_recv_;
237
+ grpc_status_code status_code_;
238
+ grpc_slice status_details_;
239
+ grpc_closure on_status_received_;
240
+ };
241
+
242
+ // Contains an LRS call to the LB server.
243
+ class LrsCallState : public InternallyRefCounted<LrsCallState> {
244
+ public:
245
+ // The ctor and dtor should not be used directly.
246
+ explicit LrsCallState(
247
+ RefCountedPtr<RetryableLbCall<LrsCallState>> parent);
248
+ ~LrsCallState() override;
249
+
250
+ void Orphan() override;
251
+
252
+ void MaybeStartReportingLocked();
253
+
254
+ RetryableLbCall<LrsCallState>* parent() { return parent_.get(); }
255
+ LbChannelState* lb_chand() const { return parent_->lb_chand(); }
256
+ XdsLb* xdslb_policy() const { return lb_chand()->xdslb_policy(); }
257
+ bool seen_response() const { return seen_response_; }
258
+
259
+ private:
260
+ // Reports client-side load stats according to a fixed interval.
261
+ class Reporter : public InternallyRefCounted<Reporter> {
262
+ public:
263
+ Reporter(RefCountedPtr<LrsCallState> parent,
264
+ grpc_millis report_interval)
265
+ : parent_(std::move(parent)), report_interval_(report_interval) {
266
+ GRPC_CLOSURE_INIT(
267
+ &on_next_report_timer_, OnNextReportTimerLocked, this,
268
+ grpc_combiner_scheduler(xdslb_policy()->combiner()));
269
+ GRPC_CLOSURE_INIT(
270
+ &on_report_done_, OnReportDoneLocked, this,
271
+ grpc_combiner_scheduler(xdslb_policy()->combiner()));
272
+ ScheduleNextReportLocked();
273
+ }
274
+
275
+ void Orphan() override;
196
276
 
197
- void ScheduleNextClientLoadReportLocked();
198
- void SendClientLoadReportLocked();
277
+ private:
278
+ void ScheduleNextReportLocked();
279
+ static void OnNextReportTimerLocked(void* arg, grpc_error* error);
280
+ void SendReportLocked();
281
+ static void OnReportDoneLocked(void* arg, grpc_error* error);
199
282
 
200
- static bool LoadReportCountersAreZero(xds_grpclb_request* request);
283
+ bool IsCurrentReporterOnCall() const {
284
+ return this == parent_->reporter_.get();
285
+ }
286
+ XdsLb* xdslb_policy() const { return parent_->xdslb_policy(); }
287
+
288
+ // The owning LRS call.
289
+ RefCountedPtr<LrsCallState> parent_;
290
+
291
+ // The load reporting state.
292
+ const grpc_millis report_interval_;
293
+ bool last_report_counters_were_zero_ = false;
294
+ bool next_report_timer_callback_pending_ = false;
295
+ grpc_timer next_report_timer_;
296
+ grpc_closure on_next_report_timer_;
297
+ grpc_closure on_report_done_;
298
+ };
201
299
 
202
- static void MaybeSendClientLoadReportLocked(void* arg, grpc_error* error);
203
300
  static void OnInitialRequestSentLocked(void* arg, grpc_error* error);
204
- static void OnBalancerMessageReceivedLocked(void* arg, grpc_error* error);
205
- static void OnBalancerStatusReceivedLocked(void* arg, grpc_error* error);
301
+ static void OnResponseReceivedLocked(void* arg, grpc_error* error);
302
+ static void OnStatusReceivedLocked(void* arg, grpc_error* error);
303
+
304
+ bool IsCurrentCallOnChannel() const;
206
305
 
207
- // The owning LB channel.
208
- RefCountedPtr<BalancerChannelState> lb_chand_;
306
+ // The owning RetryableLbCall<>.
307
+ RefCountedPtr<RetryableLbCall<LrsCallState>> parent_;
308
+ bool seen_response_ = false;
209
309
 
210
- // The streaming call to the LB server. Always non-NULL.
211
- grpc_call* lb_call_ = nullptr;
310
+ // Always non-NULL.
311
+ grpc_call* lb_call_;
212
312
 
213
313
  // recv_initial_metadata
214
- grpc_metadata_array lb_initial_metadata_recv_;
314
+ grpc_metadata_array initial_metadata_recv_;
215
315
 
216
316
  // send_message
217
317
  grpc_byte_buffer* send_message_payload_ = nullptr;
218
- grpc_closure lb_on_initial_request_sent_;
318
+ grpc_closure on_initial_request_sent_;
219
319
 
220
320
  // recv_message
221
321
  grpc_byte_buffer* recv_message_payload_ = nullptr;
222
- grpc_closure lb_on_balancer_message_received_;
223
- bool seen_initial_response_ = false;
322
+ grpc_closure on_response_received_;
224
323
 
225
324
  // recv_trailing_metadata
226
- grpc_closure lb_on_balancer_status_received_;
227
- grpc_metadata_array lb_trailing_metadata_recv_;
228
- grpc_status_code lb_call_status_;
229
- grpc_slice lb_call_status_details_;
230
-
231
- // The stats for client-side load reporting associated with this LB call.
232
- // Created after the first serverlist is received.
233
- RefCountedPtr<XdsLbClientStats> client_stats_;
234
- grpc_millis client_stats_report_interval_ = 0;
235
- grpc_timer client_load_report_timer_;
236
- bool client_load_report_timer_callback_pending_ = false;
237
- bool last_client_load_report_counters_were_zero_ = false;
238
- bool client_load_report_is_due_ = false;
239
- // The closure used for either the load report timer or the callback for
240
- // completion of sending the load report.
241
- grpc_closure client_load_report_closure_;
325
+ grpc_metadata_array trailing_metadata_recv_;
326
+ grpc_status_code status_code_;
327
+ grpc_slice status_details_;
328
+ grpc_closure on_status_received_;
329
+
330
+ // Load reporting state.
331
+ grpc_millis load_reporting_interval_ = 0;
332
+ OrphanablePtr<Reporter> reporter_;
242
333
  };
243
334
 
244
- BalancerChannelState(const char* balancer_name,
245
- const grpc_channel_args& args,
246
- RefCountedPtr<XdsLb> parent_xdslb_policy);
247
- ~BalancerChannelState();
335
+ LbChannelState(RefCountedPtr<XdsLb> xdslb_policy, const char* balancer_name,
336
+ const grpc_channel_args& args);
337
+ ~LbChannelState();
248
338
 
249
339
  void Orphan() override;
250
340
 
251
341
  grpc_channel* channel() const { return channel_; }
252
- BalancerCallState* lb_calld() const { return lb_calld_.get(); }
342
+ XdsLb* xdslb_policy() const { return xdslb_policy_.get(); }
343
+ EdsCallState* eds_calld() const { return eds_calld_->lb_calld(); }
344
+ LrsCallState* lrs_calld() const { return lrs_calld_->lb_calld(); }
253
345
 
254
346
  bool IsCurrentChannel() const {
255
347
  return this == xdslb_policy_->lb_chand_.get();
@@ -257,11 +349,7 @@ class XdsLb : public LoadBalancingPolicy {
257
349
  bool IsPendingChannel() const {
258
350
  return this == xdslb_policy_->pending_lb_chand_.get();
259
351
  }
260
- bool HasActiveCall() const { return lb_calld_ != nullptr; }
261
-
262
- void StartCallRetryTimerLocked();
263
- static void OnCallRetryTimerLocked(void* arg, grpc_error* error);
264
- void StartCallLocked();
352
+ bool HasActiveEdsCall() const { return eds_calld_->lb_calld() != nullptr; }
265
353
 
266
354
  void StartConnectivityWatchLocked();
267
355
  void CancelConnectivityWatchLocked();
@@ -277,29 +365,35 @@ class XdsLb : public LoadBalancingPolicy {
277
365
  grpc_connectivity_state connectivity_ = GRPC_CHANNEL_IDLE;
278
366
  grpc_closure on_connectivity_changed_;
279
367
 
280
- // The data associated with the current LB call. It holds a ref to this LB
281
- // channel. It's instantiated every time we query for backends. It's reset
282
- // whenever the current LB call is no longer needed (e.g., the LB policy is
283
- // shutting down, or the LB call has ended). A non-NULL lb_calld_ always
284
- // contains a non-NULL lb_call_.
285
- OrphanablePtr<BalancerCallState> lb_calld_;
286
- BackOff lb_call_backoff_;
287
- grpc_timer lb_call_retry_timer_;
288
- grpc_closure lb_on_call_retry_;
289
- bool retry_timer_callback_pending_ = false;
368
+ // The retryable XDS calls to the LB server.
369
+ OrphanablePtr<RetryableLbCall<EdsCallState>> eds_calld_;
370
+ OrphanablePtr<RetryableLbCall<LrsCallState>> lrs_calld_;
290
371
  };
291
372
 
292
- // Since pickers are UniquePtrs we use this RefCounted wrapper
293
- // to control references to it by the xds picker and the locality
294
- // entry
295
- class PickerRef : public RefCounted<PickerRef> {
373
+ // We need this wrapper for the following reasons:
374
+ // 1. To process per-locality load reporting.
375
+ // 2. Since pickers are UniquePtrs we use this RefCounted wrapper to control
376
+ // references to it by the xds picker and the locality entry.
377
+ class PickerWrapper : public RefCounted<PickerWrapper> {
296
378
  public:
297
- explicit PickerRef(UniquePtr<SubchannelPicker> picker)
298
- : picker_(std::move(picker)) {}
299
- PickResult Pick(PickArgs args) { return picker_->Pick(args); }
379
+ PickerWrapper(UniquePtr<SubchannelPicker> picker,
380
+ RefCountedPtr<XdsClientStats::LocalityStats> locality_stats)
381
+ : picker_(std::move(picker)),
382
+ locality_stats_(std::move(locality_stats)) {
383
+ locality_stats_->RefByPicker();
384
+ }
385
+ ~PickerWrapper() { locality_stats_->UnrefByPicker(); }
386
+
387
+ PickResult Pick(PickArgs args);
300
388
 
301
389
  private:
390
+ static void RecordCallCompletion(
391
+ void* arg, grpc_error* error,
392
+ LoadBalancingPolicy::MetadataInterface* recv_trailing_metadata,
393
+ LoadBalancingPolicy::CallState* call_state);
394
+
302
395
  UniquePtr<SubchannelPicker> picker_;
396
+ RefCountedPtr<XdsClientStats::LocalityStats> locality_stats_;
303
397
  };
304
398
 
305
399
  // The picker will use a stateless weighting algorithm to pick the locality to
@@ -311,18 +405,21 @@ class XdsLb : public LoadBalancingPolicy {
311
405
  // proportional to the locality's weight. The start of the range is the
312
406
  // previous value in the vector and is 0 for the first element.
313
407
  using PickerList =
314
- InlinedVector<Pair<uint32_t, RefCountedPtr<PickerRef>>, 1>;
315
- Picker(RefCountedPtr<XdsLbClientStats> client_stats, PickerList pickers)
316
- : client_stats_(std::move(client_stats)),
317
- pickers_(std::move(pickers)) {}
408
+ InlinedVector<Pair<uint32_t, RefCountedPtr<PickerWrapper>>, 1>;
409
+ Picker(RefCountedPtr<XdsLb> xds_policy, PickerList pickers)
410
+ : xds_policy_(std::move(xds_policy)),
411
+ pickers_(std::move(pickers)),
412
+ drop_config_(xds_policy_->drop_config_) {}
318
413
 
319
414
  PickResult Pick(PickArgs args) override;
320
415
 
321
416
  private:
322
- // Calls the picker of the locality that the key falls within
417
+ // Calls the picker of the locality that the key falls within.
323
418
  PickResult PickFromLocality(const uint32_t key, PickArgs args);
324
- RefCountedPtr<XdsLbClientStats> client_stats_;
419
+
420
+ RefCountedPtr<XdsLb> xds_policy_;
325
421
  PickerList pickers_;
422
+ RefCountedPtr<XdsDropConfig> drop_config_;
326
423
  };
327
424
 
328
425
  class FallbackHelper : public ChannelControlHelper {
@@ -334,12 +431,10 @@ class XdsLb : public LoadBalancingPolicy {
334
431
 
335
432
  RefCountedPtr<SubchannelInterface> CreateSubchannel(
336
433
  const grpc_channel_args& args) override;
337
- grpc_channel* CreateChannel(const char* target,
338
- const grpc_channel_args& args) override;
339
434
  void UpdateState(grpc_connectivity_state state,
340
435
  UniquePtr<SubchannelPicker> picker) override;
341
436
  void RequestReresolution() override;
342
- void AddTraceEvent(TraceSeverity severity, const char* message) override;
437
+ void AddTraceEvent(TraceSeverity severity, StringView message) override;
343
438
 
344
439
  void set_child(LoadBalancingPolicy* child) { child_ = child; }
345
440
 
@@ -351,63 +446,30 @@ class XdsLb : public LoadBalancingPolicy {
351
446
  LoadBalancingPolicy* child_ = nullptr;
352
447
  };
353
448
 
354
- class LocalityName : public RefCounted<LocalityName> {
355
- public:
356
- struct Less {
357
- bool operator()(const RefCountedPtr<LocalityName>& lhs,
358
- const RefCountedPtr<LocalityName>& rhs) {
359
- int cmp_result = strcmp(lhs->region_.get(), rhs->region_.get());
360
- if (cmp_result != 0) return cmp_result < 0;
361
- cmp_result = strcmp(lhs->zone_.get(), rhs->zone_.get());
362
- if (cmp_result != 0) return cmp_result < 0;
363
- return strcmp(lhs->subzone_.get(), rhs->subzone_.get()) < 0;
364
- }
365
- };
366
-
367
- LocalityName(UniquePtr<char> region, UniquePtr<char> zone,
368
- UniquePtr<char> subzone)
369
- : region_(std::move(region)),
370
- zone_(std::move(zone)),
371
- subzone_(std::move(subzone)) {}
372
-
373
- bool operator==(const LocalityName& other) const {
374
- return strcmp(region_.get(), other.region_.get()) == 0 &&
375
- strcmp(zone_.get(), other.zone_.get()) == 0 &&
376
- strcmp(subzone_.get(), other.subzone_.get()) == 0;
377
- }
378
-
379
- const char* AsHumanReadableString() {
380
- if (human_readable_string_ == nullptr) {
381
- char* tmp;
382
- gpr_asprintf(&tmp, "{region=\"%s\", zone=\"%s\", subzone=\"%s\"}",
383
- region_.get(), zone_.get(), subzone_.get());
384
- human_readable_string_.reset(tmp);
385
- }
386
- return human_readable_string_.get();
387
- }
388
-
389
- private:
390
- UniquePtr<char> region_;
391
- UniquePtr<char> zone_;
392
- UniquePtr<char> subzone_;
393
- UniquePtr<char> human_readable_string_;
394
- };
395
-
396
449
  class LocalityMap {
397
450
  public:
398
451
  class LocalityEntry : public InternallyRefCounted<LocalityEntry> {
399
452
  public:
400
453
  LocalityEntry(RefCountedPtr<XdsLb> parent,
401
- RefCountedPtr<LocalityName> name, uint32_t locality_weight);
454
+ RefCountedPtr<XdsLocalityName> name);
402
455
  ~LocalityEntry();
403
456
 
404
- void UpdateLocked(xds_grpclb_serverlist* serverlist,
457
+ void UpdateLocked(uint32_t locality_weight, ServerAddressList serverlist,
405
458
  LoadBalancingPolicy::Config* child_policy_config,
406
459
  const grpc_channel_args* args);
407
460
  void ShutdownLocked();
408
461
  void ResetBackoffLocked();
462
+ void DeactivateLocked();
409
463
  void Orphan() override;
410
464
 
465
+ grpc_connectivity_state connectivity_state() const {
466
+ return connectivity_state_;
467
+ }
468
+ uint32_t locality_weight() const { return locality_weight_; }
469
+ RefCountedPtr<PickerWrapper> picker_wrapper() const {
470
+ return picker_wrapper_;
471
+ }
472
+
411
473
  private:
412
474
  class Helper : public ChannelControlHelper {
413
475
  public:
@@ -418,13 +480,10 @@ class XdsLb : public LoadBalancingPolicy {
418
480
 
419
481
  RefCountedPtr<SubchannelInterface> CreateSubchannel(
420
482
  const grpc_channel_args& args) override;
421
- grpc_channel* CreateChannel(const char* target,
422
- const grpc_channel_args& args) override;
423
483
  void UpdateState(grpc_connectivity_state state,
424
484
  UniquePtr<SubchannelPicker> picker) override;
425
485
  void RequestReresolution() override;
426
- void AddTraceEvent(TraceSeverity severity,
427
- const char* message) override;
486
+ void AddTraceEvent(TraceSeverity severity, StringView message) override;
428
487
  void set_child(LoadBalancingPolicy* child) { child_ = child; }
429
488
 
430
489
  private:
@@ -434,50 +493,50 @@ class XdsLb : public LoadBalancingPolicy {
434
493
  RefCountedPtr<LocalityEntry> entry_;
435
494
  LoadBalancingPolicy* child_ = nullptr;
436
495
  };
496
+
437
497
  // Methods for dealing with the child policy.
438
498
  OrphanablePtr<LoadBalancingPolicy> CreateChildPolicyLocked(
439
499
  const char* name, const grpc_channel_args* args);
440
500
  grpc_channel_args* CreateChildPolicyArgsLocked(
441
501
  const grpc_channel_args* args);
442
502
 
503
+ static void OnDelayedRemovalTimerLocked(void* arg, grpc_error* error);
504
+
443
505
  RefCountedPtr<XdsLb> parent_;
444
- RefCountedPtr<LocalityName> name_;
506
+ RefCountedPtr<XdsLocalityName> name_;
445
507
  OrphanablePtr<LoadBalancingPolicy> child_policy_;
446
508
  OrphanablePtr<LoadBalancingPolicy> pending_child_policy_;
447
- RefCountedPtr<PickerRef> picker_ref_;
448
- grpc_connectivity_state connectivity_state_;
509
+ RefCountedPtr<PickerWrapper> picker_wrapper_;
510
+ grpc_connectivity_state connectivity_state_ = GRPC_CHANNEL_IDLE;
449
511
  uint32_t locality_weight_;
512
+ grpc_closure on_delayed_removal_timer_;
513
+ grpc_timer delayed_removal_timer_;
514
+ bool delayed_removal_timer_callback_pending_ = false;
450
515
  };
451
516
 
452
- void UpdateLocked(const LocalityList& locality_list,
517
+ explicit LocalityMap(XdsLb* xds_policy) : xds_policy_(xds_policy) {}
518
+
519
+ void UpdateLocked(const XdsLocalityList& locality_list,
453
520
  LoadBalancingPolicy::Config* child_policy_config,
454
- const grpc_channel_args* args, XdsLb* parent);
521
+ const grpc_channel_args* args, XdsLb* parent,
522
+ bool is_initial_update = false);
523
+ void UpdateXdsPickerLocked();
455
524
  void ShutdownLocked();
456
525
  void ResetBackoffLocked();
457
526
 
458
527
  private:
459
- void PruneLocalities(const LocalityList& locality_list);
460
- Map<RefCountedPtr<LocalityName>, OrphanablePtr<LocalityEntry>,
461
- LocalityName::Less>
528
+ XdsLb* xds_policy_;
529
+ Map<RefCountedPtr<XdsLocalityName>, OrphanablePtr<LocalityEntry>,
530
+ XdsLocalityName::Less>
462
531
  map_;
463
532
  };
464
533
 
465
- struct LocalityServerlistEntry {
466
- ~LocalityServerlistEntry() { xds_grpclb_destroy_serverlist(serverlist); }
467
-
468
- RefCountedPtr<LocalityName> locality_name;
469
- uint32_t locality_weight;
470
- // The deserialized response from the balancer. May be nullptr until one
471
- // such response has arrived.
472
- xds_grpclb_serverlist* serverlist;
473
- };
474
-
475
534
  ~XdsLb();
476
535
 
477
536
  void ShutdownLocked() override;
478
537
 
479
538
  // Helper function used in UpdateLocked().
480
- void ProcessAddressesAndChannelArgsLocked(const ServerAddressList& addresses,
539
+ void ProcessAddressesAndChannelArgsLocked(ServerAddressList addresses,
481
540
  const grpc_channel_args& args);
482
541
 
483
542
  // Parses the xds config given the JSON node of the first child of XdsConfig.
@@ -486,7 +545,7 @@ class XdsLb : public LoadBalancingPolicy {
486
545
  // found. Does nothing upon failure.
487
546
  void ParseLbConfig(const ParsedXdsConfig* xds_config);
488
547
 
489
- BalancerChannelState* LatestLbChannel() const {
548
+ LbChannelState* LatestLbChannel() const {
490
549
  return pending_lb_chand_ != nullptr ? pending_lb_chand_.get()
491
550
  : lb_chand_.get();
492
551
  }
@@ -499,7 +558,7 @@ class XdsLb : public LoadBalancingPolicy {
499
558
  const char* name, const grpc_channel_args* args);
500
559
  void MaybeExitFallbackMode();
501
560
 
502
- // Who the client is trying to communicate with.
561
+ // Name of the backend server to connect to.
503
562
  const char* server_name_ = nullptr;
504
563
 
505
564
  // Name of the balancer to connect to.
@@ -512,11 +571,11 @@ class XdsLb : public LoadBalancingPolicy {
512
571
  bool shutting_down_ = false;
513
572
 
514
573
  // The channel for communicating with the LB server.
515
- OrphanablePtr<BalancerChannelState> lb_chand_;
516
- OrphanablePtr<BalancerChannelState> pending_lb_chand_;
574
+ OrphanablePtr<LbChannelState> lb_chand_;
575
+ OrphanablePtr<LbChannelState> pending_lb_chand_;
517
576
 
518
577
  // Timeout in milliseconds for the LB call. 0 means no deadline.
519
- int lb_call_timeout_ms_ = 0;
578
+ const grpc_millis lb_call_timeout_ms_;
520
579
 
521
580
  // Whether the checks for fallback at startup are ALL pending. There are
522
581
  // several cases where this can be reset:
@@ -528,7 +587,7 @@ class XdsLb : public LoadBalancingPolicy {
528
587
  bool fallback_at_startup_checks_pending_ = false;
529
588
  // Timeout in milliseconds for before using fallback backend addresses.
530
589
  // 0 means not using fallback.
531
- int lb_fallback_timeout_ms_ = 0;
590
+ const grpc_millis lb_fallback_timeout_ms_;
532
591
  // The backend addresses from the resolver.
533
592
  ServerAddressList fallback_backend_addresses_;
534
593
  // Fallback timer.
@@ -543,34 +602,75 @@ class XdsLb : public LoadBalancingPolicy {
543
602
 
544
603
  // The policy to use for the backends.
545
604
  RefCountedPtr<LoadBalancingPolicy::Config> child_policy_config_;
605
+ const grpc_millis locality_retention_interval_ms_;
546
606
  // Map of policies to use in the backend
547
607
  LocalityMap locality_map_;
548
608
  // TODO(mhaidry) : Add support for multiple maps of localities
549
609
  // with different priorities
550
- LocalityList locality_serverlist_;
610
+ XdsLocalityList locality_list_;
551
611
  // TODO(mhaidry) : Add a pending locality map that may be swapped with the
552
612
  // the current one when new localities in the pending map are ready
553
613
  // to accept connections
614
+
615
+ // The config for dropping calls.
616
+ RefCountedPtr<XdsDropConfig> drop_config_;
617
+
618
+ // The stats for client-side load reporting.
619
+ XdsClientStats client_stats_;
554
620
  };
555
621
 
622
+ //
623
+ // XdsLb::PickerWrapper::Pick
624
+ //
625
+
626
+ LoadBalancingPolicy::PickResult XdsLb::PickerWrapper::Pick(
627
+ LoadBalancingPolicy::PickArgs args) {
628
+ // Forward the pick to the picker returned from the child policy.
629
+ PickResult result = picker_->Pick(args);
630
+ if (result.type != PickResult::PICK_COMPLETE ||
631
+ result.subchannel == nullptr || locality_stats_ == nullptr) {
632
+ return result;
633
+ }
634
+ // Record a call started.
635
+ locality_stats_->AddCallStarted();
636
+ // Intercept the recv_trailing_metadata op to record call completion.
637
+ result.recv_trailing_metadata_ready = RecordCallCompletion;
638
+ result.recv_trailing_metadata_ready_user_data =
639
+ locality_stats_->Ref(DEBUG_LOCATION, "LocalityStats+call").release();
640
+ return result;
641
+ }
642
+
643
+ // Note that the following callback does not run in either the control plane
644
+ // combiner or the data plane combiner.
645
+ void XdsLb::PickerWrapper::RecordCallCompletion(
646
+ void* arg, grpc_error* error,
647
+ LoadBalancingPolicy::MetadataInterface* recv_trailing_metadata,
648
+ LoadBalancingPolicy::CallState* call_state) {
649
+ XdsClientStats::LocalityStats* locality_stats =
650
+ static_cast<XdsClientStats::LocalityStats*>(arg);
651
+ const bool call_failed = error != GRPC_ERROR_NONE;
652
+ locality_stats->AddCallFinished(call_failed);
653
+ locality_stats->Unref(DEBUG_LOCATION, "LocalityStats+call");
654
+ }
655
+
556
656
  //
557
657
  // XdsLb::Picker
558
658
  //
559
659
 
560
660
  XdsLb::PickResult XdsLb::Picker::Pick(PickArgs args) {
561
- // TODO(roth): Add support for drop handling.
562
- // Generate a random number between 0 and the total weight
563
- const uint32_t key =
564
- (rand() * pickers_[pickers_.size() - 1].first) / RAND_MAX;
661
+ // Handle drop.
662
+ const UniquePtr<char>* drop_category;
663
+ if (drop_config_->ShouldDrop(&drop_category)) {
664
+ xds_policy_->client_stats_.AddCallDropped(*drop_category);
665
+ PickResult result;
666
+ result.type = PickResult::PICK_COMPLETE;
667
+ return result;
668
+ }
669
+ // Generate a random number in [0, total weight).
670
+ const uint32_t key = rand() % pickers_[pickers_.size() - 1].first;
565
671
  // Forward pick to whichever locality maps to the range in which the
566
672
  // random number falls in.
567
- PickResult result = PickFromLocality(key, args);
568
- // If pick succeeded, add client stats.
569
- if (result.type == PickResult::PICK_COMPLETE &&
570
- result.subchannel != nullptr && client_stats_ != nullptr) {
571
- // TODO(roth): Add support for client stats.
572
- }
573
- return result;
673
+ return PickFromLocality(key, args);
574
674
  }
575
675
 
576
676
  XdsLb::PickResult XdsLb::Picker::PickFromLocality(const uint32_t key,
@@ -618,15 +718,6 @@ RefCountedPtr<SubchannelInterface> XdsLb::FallbackHelper::CreateSubchannel(
618
718
  return parent_->channel_control_helper()->CreateSubchannel(args);
619
719
  }
620
720
 
621
- grpc_channel* XdsLb::FallbackHelper::CreateChannel(
622
- const char* target, const grpc_channel_args& args) {
623
- if (parent_->shutting_down_ ||
624
- (!CalledByPendingFallback() && !CalledByCurrentFallback())) {
625
- return nullptr;
626
- }
627
- return parent_->channel_control_helper()->CreateChannel(target, args);
628
- }
629
-
630
721
  void XdsLb::FallbackHelper::UpdateState(grpc_connectivity_state state,
631
722
  UniquePtr<SubchannelPicker> picker) {
632
723
  if (parent_->shutting_down_) return;
@@ -669,7 +760,7 @@ void XdsLb::FallbackHelper::RequestReresolution() {
669
760
  }
670
761
 
671
762
  void XdsLb::FallbackHelper::AddTraceEvent(TraceSeverity severity,
672
- const char* message) {
763
+ StringView message) {
673
764
  if (parent_->shutting_down_ ||
674
765
  (!CalledByPendingFallback() && !CalledByCurrentFallback())) {
675
766
  return;
@@ -678,172 +769,45 @@ void XdsLb::FallbackHelper::AddTraceEvent(TraceSeverity severity,
678
769
  }
679
770
 
680
771
  //
681
- // serverlist parsing code
682
- //
683
-
684
- // Returns the backend addresses extracted from the given addresses.
685
- ServerAddressList ExtractBackendAddresses(const ServerAddressList& addresses) {
686
- ServerAddressList backend_addresses;
687
- for (size_t i = 0; i < addresses.size(); ++i) {
688
- if (!addresses[i].IsBalancer()) {
689
- backend_addresses.emplace_back(addresses[i]);
690
- }
691
- }
692
- return backend_addresses;
693
- }
694
-
695
- bool IsServerValid(const xds_grpclb_server* server, size_t idx, bool log) {
696
- if (server->drop) return false;
697
- const xds_grpclb_ip_address* ip = &server->ip_address;
698
- if (GPR_UNLIKELY(server->port >> 16 != 0)) {
699
- if (log) {
700
- gpr_log(GPR_ERROR,
701
- "Invalid port '%d' at index %lu of serverlist. Ignoring.",
702
- server->port, (unsigned long)idx);
703
- }
704
- return false;
705
- }
706
- if (GPR_UNLIKELY(ip->size != 4 && ip->size != 16)) {
707
- if (log) {
708
- gpr_log(GPR_ERROR,
709
- "Expected IP to be 4 or 16 bytes, got %d at index %lu of "
710
- "serverlist. Ignoring",
711
- ip->size, (unsigned long)idx);
712
- }
713
- return false;
714
- }
715
- return true;
716
- }
717
-
718
- void ParseServer(const xds_grpclb_server* server, grpc_resolved_address* addr) {
719
- memset(addr, 0, sizeof(*addr));
720
- if (server->drop) return;
721
- const uint16_t netorder_port = grpc_htons((uint16_t)server->port);
722
- /* the addresses are given in binary format (a in(6)_addr struct) in
723
- * server->ip_address.bytes. */
724
- const xds_grpclb_ip_address* ip = &server->ip_address;
725
- if (ip->size == 4) {
726
- addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in));
727
- grpc_sockaddr_in* addr4 = reinterpret_cast<grpc_sockaddr_in*>(&addr->addr);
728
- addr4->sin_family = GRPC_AF_INET;
729
- memcpy(&addr4->sin_addr, ip->bytes, ip->size);
730
- addr4->sin_port = netorder_port;
731
- } else if (ip->size == 16) {
732
- addr->len = static_cast<socklen_t>(sizeof(grpc_sockaddr_in6));
733
- grpc_sockaddr_in6* addr6 = (grpc_sockaddr_in6*)&addr->addr;
734
- addr6->sin6_family = GRPC_AF_INET6;
735
- memcpy(&addr6->sin6_addr, ip->bytes, ip->size);
736
- addr6->sin6_port = netorder_port;
737
- }
738
- }
739
-
740
- // Returns addresses extracted from \a serverlist.
741
- ServerAddressList ProcessServerlist(const xds_grpclb_serverlist* serverlist) {
742
- ServerAddressList addresses;
743
- for (size_t i = 0; i < serverlist->num_servers; ++i) {
744
- const xds_grpclb_server* server = serverlist->servers[i];
745
- if (!IsServerValid(serverlist->servers[i], i, false)) continue;
746
- grpc_resolved_address addr;
747
- ParseServer(server, &addr);
748
- addresses.emplace_back(addr, nullptr);
749
- }
750
- return addresses;
751
- }
752
-
753
- //
754
- // XdsLb::BalancerChannelState
772
+ // XdsLb::LbChannelState
755
773
  //
756
774
 
757
- XdsLb::BalancerChannelState::BalancerChannelState(
758
- const char* balancer_name, const grpc_channel_args& args,
759
- RefCountedPtr<XdsLb> parent_xdslb_policy)
760
- : InternallyRefCounted<BalancerChannelState>(&grpc_lb_xds_trace),
761
- xdslb_policy_(std::move(parent_xdslb_policy)),
762
- lb_call_backoff_(
763
- BackOff::Options()
764
- .set_initial_backoff(GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS *
765
- 1000)
766
- .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER)
767
- .set_jitter(GRPC_XDS_RECONNECT_JITTER)
768
- .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) {
769
- GRPC_CLOSURE_INIT(&on_connectivity_changed_,
770
- &XdsLb::BalancerChannelState::OnConnectivityChangedLocked,
775
+ XdsLb::LbChannelState::LbChannelState(RefCountedPtr<XdsLb> xdslb_policy,
776
+ const char* balancer_name,
777
+ const grpc_channel_args& args)
778
+ : InternallyRefCounted<LbChannelState>(&grpc_lb_xds_trace),
779
+ xdslb_policy_(std::move(xdslb_policy)) {
780
+ GRPC_CLOSURE_INIT(&on_connectivity_changed_, OnConnectivityChangedLocked,
771
781
  this, grpc_combiner_scheduler(xdslb_policy_->combiner()));
772
- channel_ = xdslb_policy_->channel_control_helper()->CreateChannel(
773
- balancer_name, args);
782
+ channel_ = CreateXdsBalancerChannel(balancer_name, args);
774
783
  GPR_ASSERT(channel_ != nullptr);
775
- StartCallLocked();
776
- }
777
-
778
- XdsLb::BalancerChannelState::~BalancerChannelState() {
779
- xdslb_policy_.reset(DEBUG_LOCATION, "BalancerChannelState");
780
- grpc_channel_destroy(channel_);
784
+ eds_calld_.reset(New<RetryableLbCall<EdsCallState>>(
785
+ Ref(DEBUG_LOCATION, "LbChannelState+eds")));
786
+ lrs_calld_.reset(New<RetryableLbCall<LrsCallState>>(
787
+ Ref(DEBUG_LOCATION, "LbChannelState+lrs")));
781
788
  }
782
789
 
783
- void XdsLb::BalancerChannelState::Orphan() {
784
- shutting_down_ = true;
785
- lb_calld_.reset();
786
- if (retry_timer_callback_pending_) grpc_timer_cancel(&lb_call_retry_timer_);
787
- Unref(DEBUG_LOCATION, "lb_channel_orphaned");
788
- }
789
-
790
- void XdsLb::BalancerChannelState::StartCallRetryTimerLocked() {
791
- grpc_millis next_try = lb_call_backoff_.NextAttemptTime();
790
+ XdsLb::LbChannelState::~LbChannelState() {
792
791
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
793
- gpr_log(GPR_INFO,
794
- "[xdslb %p] Failed to connect to LB server (lb_chand: %p)...",
795
- xdslb_policy_.get(), this);
796
- grpc_millis timeout = next_try - ExecCtx::Get()->Now();
797
- if (timeout > 0) {
798
- gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active in %" PRId64 "ms.",
799
- xdslb_policy_.get(), timeout);
800
- } else {
801
- gpr_log(GPR_INFO, "[xdslb %p] ... retry_timer_active immediately.",
802
- xdslb_policy_.get());
803
- }
804
- }
805
- Ref(DEBUG_LOCATION, "on_balancer_call_retry_timer").release();
806
- GRPC_CLOSURE_INIT(&lb_on_call_retry_, &OnCallRetryTimerLocked, this,
807
- grpc_combiner_scheduler(xdslb_policy_->combiner()));
808
- grpc_timer_init(&lb_call_retry_timer_, next_try, &lb_on_call_retry_);
809
- retry_timer_callback_pending_ = true;
810
- }
811
-
812
- void XdsLb::BalancerChannelState::OnCallRetryTimerLocked(void* arg,
813
- grpc_error* error) {
814
- BalancerChannelState* lb_chand = static_cast<BalancerChannelState*>(arg);
815
- lb_chand->retry_timer_callback_pending_ = false;
816
- if (!lb_chand->shutting_down_ && error == GRPC_ERROR_NONE &&
817
- lb_chand->lb_calld_ == nullptr) {
818
- if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
819
- gpr_log(GPR_INFO,
820
- "[xdslb %p] Restarting call to LB server (lb_chand: %p)",
821
- lb_chand->xdslb_policy_.get(), lb_chand);
822
- }
823
- lb_chand->StartCallLocked();
792
+ gpr_log(GPR_INFO, "[xdslb %p] Destroying LB channel %p", xdslb_policy(),
793
+ this);
824
794
  }
825
- lb_chand->Unref(DEBUG_LOCATION, "on_balancer_call_retry_timer");
795
+ grpc_channel_destroy(channel_);
826
796
  }
827
797
 
828
- void XdsLb::BalancerChannelState::StartCallLocked() {
829
- if (shutting_down_) return;
830
- GPR_ASSERT(channel_ != nullptr);
831
- GPR_ASSERT(lb_calld_ == nullptr);
832
- lb_calld_ = MakeOrphanable<BalancerCallState>(Ref());
833
- if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
834
- gpr_log(GPR_INFO,
835
- "[xdslb %p] Query for backends (lb_chand: %p, lb_calld: %p)",
836
- xdslb_policy_.get(), this, lb_calld_.get());
837
- }
838
- lb_calld_->StartQuery();
798
+ void XdsLb::LbChannelState::Orphan() {
799
+ shutting_down_ = true;
800
+ eds_calld_.reset();
801
+ lrs_calld_.reset();
802
+ Unref(DEBUG_LOCATION, "LbChannelState+orphaned");
839
803
  }
840
804
 
841
- void XdsLb::BalancerChannelState::StartConnectivityWatchLocked() {
805
+ void XdsLb::LbChannelState::StartConnectivityWatchLocked() {
842
806
  grpc_channel_element* client_channel_elem =
843
807
  grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel_));
844
808
  GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
845
809
  // Ref held by callback.
846
- Ref(DEBUG_LOCATION, "watch_lb_channel_connectivity").release();
810
+ Ref(DEBUG_LOCATION, "LbChannelState+start_watch").release();
847
811
  grpc_client_channel_watch_connectivity_state(
848
812
  client_channel_elem,
849
813
  grpc_polling_entity_create_from_pollset_set(
@@ -851,7 +815,7 @@ void XdsLb::BalancerChannelState::StartConnectivityWatchLocked() {
851
815
  &connectivity_, &on_connectivity_changed_, nullptr);
852
816
  }
853
817
 
854
- void XdsLb::BalancerChannelState::CancelConnectivityWatchLocked() {
818
+ void XdsLb::LbChannelState::CancelConnectivityWatchLocked() {
855
819
  grpc_channel_element* client_channel_elem =
856
820
  grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel_));
857
821
  GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter);
@@ -862,9 +826,9 @@ void XdsLb::BalancerChannelState::CancelConnectivityWatchLocked() {
862
826
  nullptr, &on_connectivity_changed_, nullptr);
863
827
  }
864
828
 
865
- void XdsLb::BalancerChannelState::OnConnectivityChangedLocked(
866
- void* arg, grpc_error* error) {
867
- BalancerChannelState* self = static_cast<BalancerChannelState*>(arg);
829
+ void XdsLb::LbChannelState::OnConnectivityChangedLocked(void* arg,
830
+ grpc_error* error) {
831
+ LbChannelState* self = static_cast<LbChannelState*>(arg);
868
832
  if (!self->shutting_down_ &&
869
833
  self->xdslb_policy_->fallback_at_startup_checks_pending_) {
870
834
  if (self->connectivity_ != GRPC_CHANNEL_TRANSIENT_FAILURE) {
@@ -891,84 +855,145 @@ void XdsLb::BalancerChannelState::OnConnectivityChangedLocked(
891
855
  self->xdslb_policy_->UpdateFallbackPolicyLocked();
892
856
  }
893
857
  // Done watching connectivity state, so drop ref.
894
- self->Unref(DEBUG_LOCATION, "watch_lb_channel_connectivity");
858
+ self->Unref(DEBUG_LOCATION, "LbChannelState+watch_done");
895
859
  }
896
860
 
897
861
  //
898
- // XdsLb::BalancerChannelState::BalancerCallState
862
+ // XdsLb::LbChannelState::RetryableLbCall<>
899
863
  //
900
864
 
901
- XdsLb::BalancerChannelState::BalancerCallState::BalancerCallState(
902
- RefCountedPtr<BalancerChannelState> lb_chand)
903
- : InternallyRefCounted<BalancerCallState>(&grpc_lb_xds_trace),
904
- lb_chand_(std::move(lb_chand)) {
905
- GPR_ASSERT(xdslb_policy() != nullptr);
906
- GPR_ASSERT(!xdslb_policy()->shutting_down_);
865
+ template <typename T>
866
+ XdsLb::LbChannelState::RetryableLbCall<T>::RetryableLbCall(
867
+ RefCountedPtr<LbChannelState> lb_chand)
868
+ : lb_chand_(std::move(lb_chand)),
869
+ backoff_(
870
+ BackOff::Options()
871
+ .set_initial_backoff(GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS *
872
+ 1000)
873
+ .set_multiplier(GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER)
874
+ .set_jitter(GRPC_XDS_RECONNECT_JITTER)
875
+ .set_max_backoff(GRPC_XDS_RECONNECT_MAX_BACKOFF_SECONDS * 1000)) {
876
+ GRPC_CLOSURE_INIT(
877
+ &on_retry_timer_, OnRetryTimerLocked, this,
878
+ grpc_combiner_scheduler(lb_chand_->xdslb_policy()->combiner()));
879
+ StartNewCallLocked();
880
+ }
881
+
882
+ template <typename T>
883
+ void XdsLb::LbChannelState::RetryableLbCall<T>::Orphan() {
884
+ shutting_down_ = true;
885
+ lb_calld_.reset();
886
+ if (retry_timer_callback_pending_) grpc_timer_cancel(&retry_timer_);
887
+ this->Unref(DEBUG_LOCATION, "RetryableLbCall+orphaned");
888
+ }
889
+
890
+ template <typename T>
891
+ void XdsLb::LbChannelState::RetryableLbCall<T>::OnCallFinishedLocked() {
892
+ const bool seen_response = lb_calld_->seen_response();
893
+ lb_calld_.reset();
894
+ if (seen_response) {
895
+ // If we lost connection to the LB server, reset backoff and restart the LB
896
+ // call immediately.
897
+ backoff_.Reset();
898
+ StartNewCallLocked();
899
+ } else {
900
+ // If we failed to connect to the LB server, retry later.
901
+ StartRetryTimerLocked();
902
+ }
903
+ }
904
+
905
+ template <typename T>
906
+ void XdsLb::LbChannelState::RetryableLbCall<T>::StartNewCallLocked() {
907
+ if (shutting_down_) return;
908
+ GPR_ASSERT(lb_chand_->channel_ != nullptr);
909
+ GPR_ASSERT(lb_calld_ == nullptr);
910
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
911
+ gpr_log(GPR_INFO,
912
+ "[xdslb %p] Start new call from retryable call (lb_chand: %p, "
913
+ "retryable call: %p)",
914
+ lb_chand()->xdslb_policy(), lb_chand(), this);
915
+ }
916
+ lb_calld_ = MakeOrphanable<T>(
917
+ this->Ref(DEBUG_LOCATION, "RetryableLbCall+start_new_call"));
918
+ }
919
+
920
+ template <typename T>
921
+ void XdsLb::LbChannelState::RetryableLbCall<T>::StartRetryTimerLocked() {
922
+ if (shutting_down_) return;
923
+ const grpc_millis next_attempt_time = backoff_.NextAttemptTime();
924
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
925
+ grpc_millis timeout = GPR_MAX(next_attempt_time - ExecCtx::Get()->Now(), 0);
926
+ gpr_log(GPR_INFO,
927
+ "[xdslb %p] Failed to connect to LB server (lb_chand: %p) "
928
+ "retry timer will fire in %" PRId64 "ms.",
929
+ lb_chand()->xdslb_policy(), lb_chand(), timeout);
930
+ }
931
+ this->Ref(DEBUG_LOCATION, "RetryableLbCall+retry_timer_start").release();
932
+ grpc_timer_init(&retry_timer_, next_attempt_time, &on_retry_timer_);
933
+ retry_timer_callback_pending_ = true;
934
+ }
935
+
936
+ template <typename T>
937
+ void XdsLb::LbChannelState::RetryableLbCall<T>::OnRetryTimerLocked(
938
+ void* arg, grpc_error* error) {
939
+ RetryableLbCall* lb_calld = static_cast<RetryableLbCall*>(arg);
940
+ lb_calld->retry_timer_callback_pending_ = false;
941
+ if (!lb_calld->shutting_down_ && error == GRPC_ERROR_NONE) {
942
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
943
+ gpr_log(GPR_INFO,
944
+ "[xdslb %p] Retry timer fires (lb_chand: %p, retryable call: %p)",
945
+ lb_calld->lb_chand()->xdslb_policy(), lb_calld->lb_chand(),
946
+ lb_calld);
947
+ }
948
+ lb_calld->StartNewCallLocked();
949
+ }
950
+ lb_calld->Unref(DEBUG_LOCATION, "RetryableLbCall+retry_timer_done");
951
+ }
952
+
953
+ //
954
+ // XdsLb::LbChannelState::EdsCallState
955
+ //
956
+
957
+ XdsLb::LbChannelState::EdsCallState::EdsCallState(
958
+ RefCountedPtr<RetryableLbCall<EdsCallState>> parent)
959
+ : InternallyRefCounted<EdsCallState>(&grpc_lb_xds_trace),
960
+ parent_(std::move(parent)) {
907
961
  // Init the LB call. Note that the LB call will progress every time there's
908
- // activity in xdslb_policy_->interested_parties(), which is comprised of
962
+ // activity in xdslb_policy()->interested_parties(), which is comprised of
909
963
  // the polling entities from client_channel.
964
+ GPR_ASSERT(xdslb_policy() != nullptr);
910
965
  GPR_ASSERT(xdslb_policy()->server_name_ != nullptr);
911
966
  GPR_ASSERT(xdslb_policy()->server_name_[0] != '\0');
912
967
  const grpc_millis deadline =
913
968
  xdslb_policy()->lb_call_timeout_ms_ == 0
914
969
  ? GRPC_MILLIS_INF_FUTURE
915
970
  : ExecCtx::Get()->Now() + xdslb_policy()->lb_call_timeout_ms_;
971
+ // Create an LB call with the specified method name.
916
972
  lb_call_ = grpc_channel_create_pollset_set_call(
917
- lb_chand_->channel_, nullptr, GRPC_PROPAGATE_DEFAULTS,
973
+ lb_chand()->channel_, nullptr, GRPC_PROPAGATE_DEFAULTS,
918
974
  xdslb_policy()->interested_parties(),
919
- GRPC_MDSTR_SLASH_GRPC_DOT_LB_DOT_V1_DOT_LOADBALANCER_SLASH_BALANCELOAD,
975
+ GRPC_MDSTR_SLASH_ENVOY_DOT_API_DOT_V2_DOT_ENDPOINTDISCOVERYSERVICE_SLASH_STREAMENDPOINTS,
920
976
  nullptr, deadline, nullptr);
977
+ GPR_ASSERT(lb_call_ != nullptr);
921
978
  // Init the LB call request payload.
922
- xds_grpclb_request* request =
923
- xds_grpclb_request_create(xdslb_policy()->server_name_);
924
- grpc_slice request_payload_slice = xds_grpclb_request_encode(request);
979
+ grpc_slice request_payload_slice =
980
+ XdsEdsRequestCreateAndEncode(xdslb_policy()->server_name_);
925
981
  send_message_payload_ =
926
982
  grpc_raw_byte_buffer_create(&request_payload_slice, 1);
927
983
  grpc_slice_unref_internal(request_payload_slice);
928
- xds_grpclb_request_destroy(request);
929
984
  // Init other data associated with the LB call.
930
- grpc_metadata_array_init(&lb_initial_metadata_recv_);
931
- grpc_metadata_array_init(&lb_trailing_metadata_recv_);
932
- GRPC_CLOSURE_INIT(&lb_on_initial_request_sent_, OnInitialRequestSentLocked,
933
- this, grpc_combiner_scheduler(xdslb_policy()->combiner()));
934
- GRPC_CLOSURE_INIT(&lb_on_balancer_message_received_,
935
- OnBalancerMessageReceivedLocked, this,
985
+ grpc_metadata_array_init(&initial_metadata_recv_);
986
+ grpc_metadata_array_init(&trailing_metadata_recv_);
987
+ GRPC_CLOSURE_INIT(&on_response_received_, OnResponseReceivedLocked, this,
936
988
  grpc_combiner_scheduler(xdslb_policy()->combiner()));
937
- GRPC_CLOSURE_INIT(&lb_on_balancer_status_received_,
938
- OnBalancerStatusReceivedLocked, this,
989
+ GRPC_CLOSURE_INIT(&on_status_received_, OnStatusReceivedLocked, this,
939
990
  grpc_combiner_scheduler(xdslb_policy()->combiner()));
940
- }
941
-
942
- XdsLb::BalancerChannelState::BalancerCallState::~BalancerCallState() {
943
- GPR_ASSERT(lb_call_ != nullptr);
944
- grpc_call_unref(lb_call_);
945
- grpc_metadata_array_destroy(&lb_initial_metadata_recv_);
946
- grpc_metadata_array_destroy(&lb_trailing_metadata_recv_);
947
- grpc_byte_buffer_destroy(send_message_payload_);
948
- grpc_byte_buffer_destroy(recv_message_payload_);
949
- grpc_slice_unref_internal(lb_call_status_details_);
950
- }
951
-
952
- void XdsLb::BalancerChannelState::BalancerCallState::Orphan() {
953
- GPR_ASSERT(lb_call_ != nullptr);
954
- // If we are here because xdslb_policy wants to cancel the call,
955
- // lb_on_balancer_status_received_ will complete the cancellation and clean
956
- // up. Otherwise, we are here because xdslb_policy has to orphan a failed
957
- // call, then the following cancellation will be a no-op.
958
- grpc_call_cancel(lb_call_, nullptr);
959
- if (client_load_report_timer_callback_pending_) {
960
- grpc_timer_cancel(&client_load_report_timer_);
961
- }
962
- // Note that the initial ref is hold by lb_on_balancer_status_received_
963
- // instead of the caller of this function. So the corresponding unref happens
964
- // in lb_on_balancer_status_received_ instead of here.
965
- }
966
-
967
- void XdsLb::BalancerChannelState::BalancerCallState::StartQuery() {
968
- GPR_ASSERT(lb_call_ != nullptr);
991
+ // Start the call.
969
992
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
970
- gpr_log(GPR_INFO, "[xdslb %p] Starting LB call (lb_calld: %p, lb_call: %p)",
971
- xdslb_policy(), this, lb_call_);
993
+ gpr_log(GPR_INFO,
994
+ "[xdslb %p] Starting EDS call (lb_chand: %p, lb_calld: %p, "
995
+ "lb_call: %p)",
996
+ xdslb_policy(), lb_chand(), this, lb_call_);
972
997
  }
973
998
  // Create the ops.
974
999
  grpc_call_error call_error;
@@ -988,19 +1013,14 @@ void XdsLb::BalancerChannelState::BalancerCallState::StartQuery() {
988
1013
  op->flags = 0;
989
1014
  op->reserved = nullptr;
990
1015
  op++;
991
- // TODO(roth): We currently track this ref manually. Once the
992
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
993
- // with the callback.
994
- auto self = Ref(DEBUG_LOCATION, "on_initial_request_sent");
995
- self.release();
996
- call_error = grpc_call_start_batch_and_execute(
997
- lb_call_, ops, (size_t)(op - ops), &lb_on_initial_request_sent_);
1016
+ call_error = grpc_call_start_batch_and_execute(lb_call_, ops,
1017
+ (size_t)(op - ops), nullptr);
998
1018
  GPR_ASSERT(GRPC_CALL_OK == call_error);
999
1019
  // Op: recv initial metadata.
1000
1020
  op = ops;
1001
1021
  op->op = GRPC_OP_RECV_INITIAL_METADATA;
1002
1022
  op->data.recv_initial_metadata.recv_initial_metadata =
1003
- &lb_initial_metadata_recv_;
1023
+ &initial_metadata_recv_;
1004
1024
  op->flags = 0;
1005
1025
  op->reserved = nullptr;
1006
1026
  op++;
@@ -1010,21 +1030,16 @@ void XdsLb::BalancerChannelState::BalancerCallState::StartQuery() {
1010
1030
  op->flags = 0;
1011
1031
  op->reserved = nullptr;
1012
1032
  op++;
1013
- // TODO(roth): We currently track this ref manually. Once the
1014
- // ClosureRef API is ready, we should pass the RefCountedPtr<> along
1015
- // with the callback.
1016
- self = Ref(DEBUG_LOCATION, "on_message_received");
1017
- self.release();
1033
+ Ref(DEBUG_LOCATION, "EDS+OnResponseReceivedLocked").release();
1018
1034
  call_error = grpc_call_start_batch_and_execute(
1019
- lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_message_received_);
1035
+ lb_call_, ops, (size_t)(op - ops), &on_response_received_);
1020
1036
  GPR_ASSERT(GRPC_CALL_OK == call_error);
1021
1037
  // Op: recv server status.
1022
1038
  op = ops;
1023
1039
  op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
1024
- op->data.recv_status_on_client.trailing_metadata =
1025
- &lb_trailing_metadata_recv_;
1026
- op->data.recv_status_on_client.status = &lb_call_status_;
1027
- op->data.recv_status_on_client.status_details = &lb_call_status_details_;
1040
+ op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv_;
1041
+ op->data.recv_status_on_client.status = &status_code_;
1042
+ op->data.recv_status_on_client.status_details = &status_details_;
1028
1043
  op->flags = 0;
1029
1044
  op->reserved = nullptr;
1030
1045
  op++;
@@ -1032,276 +1047,203 @@ void XdsLb::BalancerChannelState::BalancerCallState::StartQuery() {
1032
1047
  // ref instead of a new ref. When it's invoked, it's the initial ref that is
1033
1048
  // unreffed.
1034
1049
  call_error = grpc_call_start_batch_and_execute(
1035
- lb_call_, ops, (size_t)(op - ops), &lb_on_balancer_status_received_);
1050
+ lb_call_, ops, (size_t)(op - ops), &on_status_received_);
1036
1051
  GPR_ASSERT(GRPC_CALL_OK == call_error);
1037
1052
  }
1038
1053
 
1039
- void XdsLb::BalancerChannelState::BalancerCallState::
1040
- ScheduleNextClientLoadReportLocked() {
1041
- const grpc_millis next_client_load_report_time =
1042
- ExecCtx::Get()->Now() + client_stats_report_interval_;
1043
- GRPC_CLOSURE_INIT(&client_load_report_closure_,
1044
- MaybeSendClientLoadReportLocked, this,
1045
- grpc_combiner_scheduler(xdslb_policy()->combiner()));
1046
- grpc_timer_init(&client_load_report_timer_, next_client_load_report_time,
1047
- &client_load_report_closure_);
1048
- client_load_report_timer_callback_pending_ = true;
1054
+ XdsLb::LbChannelState::EdsCallState::~EdsCallState() {
1055
+ grpc_metadata_array_destroy(&initial_metadata_recv_);
1056
+ grpc_metadata_array_destroy(&trailing_metadata_recv_);
1057
+ grpc_byte_buffer_destroy(send_message_payload_);
1058
+ grpc_byte_buffer_destroy(recv_message_payload_);
1059
+ grpc_slice_unref_internal(status_details_);
1060
+ GPR_ASSERT(lb_call_ != nullptr);
1061
+ grpc_call_unref(lb_call_);
1049
1062
  }
1050
1063
 
1051
- void XdsLb::BalancerChannelState::BalancerCallState::
1052
- MaybeSendClientLoadReportLocked(void* arg, grpc_error* error) {
1053
- BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
1054
- lb_calld->client_load_report_timer_callback_pending_ = false;
1055
- if (error != GRPC_ERROR_NONE || !lb_calld->IsCurrentCallOnChannel()) {
1056
- lb_calld->Unref(DEBUG_LOCATION, "client_load_report");
1057
- return;
1058
- }
1059
- // If we've already sent the initial request, then we can go ahead and send
1060
- // the load report. Otherwise, we need to wait until the initial request has
1061
- // been sent to send this (see OnInitialRequestSentLocked()).
1062
- if (lb_calld->send_message_payload_ == nullptr) {
1063
- lb_calld->SendClientLoadReportLocked();
1064
- } else {
1065
- lb_calld->client_load_report_is_due_ = true;
1066
- }
1067
- }
1068
-
1069
- bool XdsLb::BalancerChannelState::BalancerCallState::LoadReportCountersAreZero(
1070
- xds_grpclb_request* request) {
1071
- XdsLbClientStats::DroppedCallCounts* drop_entries =
1072
- static_cast<XdsLbClientStats::DroppedCallCounts*>(
1073
- request->client_stats.calls_finished_with_drop.arg);
1074
- return request->client_stats.num_calls_started == 0 &&
1075
- request->client_stats.num_calls_finished == 0 &&
1076
- request->client_stats.num_calls_finished_with_client_failed_to_send ==
1077
- 0 &&
1078
- request->client_stats.num_calls_finished_known_received == 0 &&
1079
- (drop_entries == nullptr || drop_entries->empty());
1080
- }
1081
-
1082
- // TODO(vpowar): Use LRS to send the client Load Report.
1083
- void XdsLb::BalancerChannelState::BalancerCallState::
1084
- SendClientLoadReportLocked() {
1085
- // Construct message payload.
1086
- GPR_ASSERT(send_message_payload_ == nullptr);
1087
- xds_grpclb_request* request =
1088
- xds_grpclb_load_report_request_create_locked(client_stats_.get());
1089
- // Skip client load report if the counters were all zero in the last
1090
- // report and they are still zero in this one.
1091
- if (LoadReportCountersAreZero(request)) {
1092
- if (last_client_load_report_counters_were_zero_) {
1093
- xds_grpclb_request_destroy(request);
1094
- ScheduleNextClientLoadReportLocked();
1095
- return;
1096
- }
1097
- last_client_load_report_counters_were_zero_ = true;
1098
- } else {
1099
- last_client_load_report_counters_were_zero_ = false;
1100
- }
1101
- // TODO(vpowar): Send the report on LRS stream.
1102
- xds_grpclb_request_destroy(request);
1064
+ void XdsLb::LbChannelState::EdsCallState::Orphan() {
1065
+ GPR_ASSERT(lb_call_ != nullptr);
1066
+ // If we are here because xdslb_policy wants to cancel the call,
1067
+ // on_status_received_ will complete the cancellation and clean up. Otherwise,
1068
+ // we are here because xdslb_policy has to orphan a failed call, then the
1069
+ // following cancellation will be a no-op.
1070
+ grpc_call_cancel(lb_call_, nullptr);
1071
+ // Note that the initial ref is hold by on_status_received_. So the
1072
+ // corresponding unref happens in on_status_received_ instead of here.
1103
1073
  }
1104
1074
 
1105
- void XdsLb::BalancerChannelState::BalancerCallState::OnInitialRequestSentLocked(
1075
+ void XdsLb::LbChannelState::EdsCallState::OnResponseReceivedLocked(
1106
1076
  void* arg, grpc_error* error) {
1107
- BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
1108
- grpc_byte_buffer_destroy(lb_calld->send_message_payload_);
1109
- lb_calld->send_message_payload_ = nullptr;
1110
- // If we attempted to send a client load report before the initial request was
1111
- // sent (and this lb_calld is still in use), send the load report now.
1112
- if (lb_calld->client_load_report_is_due_ &&
1113
- lb_calld->IsCurrentCallOnChannel()) {
1114
- lb_calld->SendClientLoadReportLocked();
1115
- lb_calld->client_load_report_is_due_ = false;
1116
- }
1117
- lb_calld->Unref(DEBUG_LOCATION, "on_initial_request_sent");
1118
- }
1119
-
1120
- void XdsLb::BalancerChannelState::BalancerCallState::
1121
- OnBalancerMessageReceivedLocked(void* arg, grpc_error* error) {
1122
- BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
1123
- XdsLb* xdslb_policy = lb_calld->xdslb_policy();
1077
+ EdsCallState* eds_calld = static_cast<EdsCallState*>(arg);
1078
+ LbChannelState* lb_chand = eds_calld->lb_chand();
1079
+ XdsLb* xdslb_policy = eds_calld->xdslb_policy();
1124
1080
  // Empty payload means the LB call was cancelled.
1125
- if (!lb_calld->IsCurrentCallOnChannel() ||
1126
- lb_calld->recv_message_payload_ == nullptr) {
1127
- lb_calld->Unref(DEBUG_LOCATION, "on_message_received");
1081
+ if (!eds_calld->IsCurrentCallOnChannel() ||
1082
+ eds_calld->recv_message_payload_ == nullptr) {
1083
+ eds_calld->Unref(DEBUG_LOCATION, "EDS+OnResponseReceivedLocked");
1128
1084
  return;
1129
1085
  }
1086
+ // Read the response.
1130
1087
  grpc_byte_buffer_reader bbr;
1131
- grpc_byte_buffer_reader_init(&bbr, lb_calld->recv_message_payload_);
1088
+ grpc_byte_buffer_reader_init(&bbr, eds_calld->recv_message_payload_);
1132
1089
  grpc_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
1133
1090
  grpc_byte_buffer_reader_destroy(&bbr);
1134
- grpc_byte_buffer_destroy(lb_calld->recv_message_payload_);
1135
- lb_calld->recv_message_payload_ = nullptr;
1136
- xds_grpclb_initial_response* initial_response;
1137
- xds_grpclb_serverlist* serverlist;
1138
- if (!lb_calld->seen_initial_response_ &&
1139
- (initial_response = xds_grpclb_initial_response_parse(response_slice)) !=
1140
- nullptr) {
1141
- // Have NOT seen initial response, look for initial response.
1142
- // TODO(juanlishen): When we convert this to use the xds protocol, the
1143
- // balancer will send us a fallback timeout such that we should go into
1144
- // fallback mode if we have lost contact with the balancer after a certain
1145
- // period of time. We will need to save the timeout value here, and then
1146
- // when the balancer call ends, we will need to start a timer for the
1147
- // specified period of time, and if the timer fires, we go into fallback
1148
- // mode. We will also need to cancel the timer when we receive a serverlist
1149
- // from the balancer.
1150
- if (initial_response->has_client_stats_report_interval) {
1151
- const grpc_millis interval = xds_grpclb_duration_to_millis(
1152
- &initial_response->client_stats_report_interval);
1153
- if (interval > 0) {
1154
- lb_calld->client_stats_report_interval_ =
1155
- GPR_MAX(GPR_MS_PER_SEC, interval);
1156
- }
1091
+ grpc_byte_buffer_destroy(eds_calld->recv_message_payload_);
1092
+ eds_calld->recv_message_payload_ = nullptr;
1093
+ // TODO(juanlishen): When we convert this to use the xds protocol, the
1094
+ // balancer will send us a fallback timeout such that we should go into
1095
+ // fallback mode if we have lost contact with the balancer after a certain
1096
+ // period of time. We will need to save the timeout value here, and then
1097
+ // when the balancer call ends, we will need to start a timer for the
1098
+ // specified period of time, and if the timer fires, we go into fallback
1099
+ // mode. We will also need to cancel the timer when we receive a serverlist
1100
+ // from the balancer.
1101
+ // This anonymous lambda is a hack to avoid the usage of goto.
1102
+ [&]() {
1103
+ // Parse the response.
1104
+ XdsUpdate update;
1105
+ grpc_error* parse_error =
1106
+ XdsEdsResponseDecodeAndParse(response_slice, &update);
1107
+ if (parse_error != GRPC_ERROR_NONE) {
1108
+ gpr_log(GPR_ERROR, "[xdslb %p] EDS response parsing failed. error=%s",
1109
+ xdslb_policy, grpc_error_string(parse_error));
1110
+ GRPC_ERROR_UNREF(parse_error);
1111
+ return;
1157
1112
  }
1158
- if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1159
- if (lb_calld->client_stats_report_interval_ != 0) {
1160
- gpr_log(GPR_INFO,
1161
- "[xdslb %p] Received initial LB response message; "
1162
- "client load reporting interval = %" PRId64 " milliseconds",
1163
- xdslb_policy, lb_calld->client_stats_report_interval_);
1164
- } else {
1165
- gpr_log(GPR_INFO,
1166
- "[xdslb %p] Received initial LB response message; client load "
1167
- "reporting NOT enabled",
1168
- xdslb_policy);
1169
- }
1113
+ if (update.locality_list.empty() && !update.drop_all) {
1114
+ char* response_slice_str =
1115
+ grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX);
1116
+ gpr_log(GPR_ERROR,
1117
+ "[xdslb %p] EDS response '%s' doesn't contain any valid locality "
1118
+ "but doesn't require to drop all calls. Ignoring.",
1119
+ xdslb_policy, response_slice_str);
1120
+ gpr_free(response_slice_str);
1121
+ return;
1170
1122
  }
1171
- xds_grpclb_initial_response_destroy(initial_response);
1172
- lb_calld->seen_initial_response_ = true;
1173
- } else if ((serverlist = xds_grpclb_response_parse_serverlist(
1174
- response_slice)) != nullptr) {
1175
- // Have seen initial response, look for serverlist.
1176
- GPR_ASSERT(lb_calld->lb_call_ != nullptr);
1123
+ eds_calld->seen_response_ = true;
1177
1124
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1178
1125
  gpr_log(GPR_INFO,
1179
- "[xdslb %p] Serverlist with %" PRIuPTR " servers received",
1180
- xdslb_policy, serverlist->num_servers);
1181
- for (size_t i = 0; i < serverlist->num_servers; ++i) {
1182
- grpc_resolved_address addr;
1183
- ParseServer(serverlist->servers[i], &addr);
1184
- char* ipport;
1185
- grpc_sockaddr_to_string(&ipport, &addr, false);
1186
- gpr_log(GPR_INFO, "[xdslb %p] Serverlist[%" PRIuPTR "]: %s",
1187
- xdslb_policy, i, ipport);
1188
- gpr_free(ipport);
1126
+ "[xdslb %p] EDS response with %" PRIuPTR
1127
+ " localities and %" PRIuPTR
1128
+ " drop categories received (drop_all=%d)",
1129
+ xdslb_policy, update.locality_list.size(),
1130
+ update.drop_config->drop_category_list().size(), update.drop_all);
1131
+ for (size_t i = 0; i < update.locality_list.size(); ++i) {
1132
+ const XdsLocalityInfo& locality = update.locality_list[i];
1133
+ gpr_log(GPR_INFO,
1134
+ "[xdslb %p] Locality %" PRIuPTR " %s contains %" PRIuPTR
1135
+ " server addresses",
1136
+ xdslb_policy, i,
1137
+ locality.locality_name->AsHumanReadableString(),
1138
+ locality.serverlist.size());
1139
+ for (size_t j = 0; j < locality.serverlist.size(); ++j) {
1140
+ char* ipport;
1141
+ grpc_sockaddr_to_string(&ipport, &locality.serverlist[j].address(),
1142
+ false);
1143
+ gpr_log(GPR_INFO,
1144
+ "[xdslb %p] Locality %" PRIuPTR
1145
+ " %s, server address %" PRIuPTR ": %s",
1146
+ xdslb_policy, i,
1147
+ locality.locality_name->AsHumanReadableString(), j, ipport);
1148
+ gpr_free(ipport);
1149
+ }
1150
+ }
1151
+ for (size_t i = 0; i < update.drop_config->drop_category_list().size();
1152
+ ++i) {
1153
+ const XdsDropConfig::DropCategory& drop_category =
1154
+ update.drop_config->drop_category_list()[i];
1155
+ gpr_log(GPR_INFO,
1156
+ "[xdslb %p] Drop category %s has drop rate %d per million",
1157
+ xdslb_policy, drop_category.name.get(),
1158
+ drop_category.parts_per_million);
1189
1159
  }
1190
1160
  }
1191
- // Pending LB channel receives a serverlist; promote it.
1161
+ // Pending LB channel receives a response; promote it.
1192
1162
  // Note that this call can't be on a discarded pending channel, because
1193
1163
  // such channels don't have any current call but we have checked this call
1194
1164
  // is a current call.
1195
- if (!lb_calld->lb_chand_->IsCurrentChannel()) {
1165
+ if (!lb_chand->IsCurrentChannel()) {
1196
1166
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1197
1167
  gpr_log(GPR_INFO,
1198
- "[xdslb %p] Promoting pending LB channel %p to replace "
1199
- "current LB channel %p",
1200
- xdslb_policy, lb_calld->lb_chand_.get(),
1201
- lb_calld->xdslb_policy()->lb_chand_.get());
1168
+ "[xdslb %p] Pending LB channel %p receives EDS response; "
1169
+ "promoting it to replace current LB channel %p",
1170
+ xdslb_policy, lb_chand, xdslb_policy->lb_chand_.get());
1202
1171
  }
1203
- lb_calld->xdslb_policy()->lb_chand_ =
1204
- std::move(lb_calld->xdslb_policy()->pending_lb_chand_);
1205
- }
1206
- // Start sending client load report only after we start using the
1207
- // serverlist returned from the current LB call.
1208
- if (lb_calld->client_stats_report_interval_ > 0 &&
1209
- lb_calld->client_stats_ == nullptr) {
1210
- lb_calld->client_stats_ = MakeRefCounted<XdsLbClientStats>();
1211
- lb_calld->Ref(DEBUG_LOCATION, "client_load_report").release();
1212
- lb_calld->ScheduleNextClientLoadReportLocked();
1172
+ // TODO(juanlishen): Maybe promote the pending LB channel when the
1173
+ // response results a READY locality map.
1174
+ xdslb_policy->lb_chand_ = std::move(xdslb_policy->pending_lb_chand_);
1213
1175
  }
1214
- if (!xdslb_policy->locality_serverlist_.empty() &&
1215
- xds_grpclb_serverlist_equals(
1216
- xdslb_policy->locality_serverlist_[0]->serverlist, serverlist)) {
1176
+ // At this point, lb_chand must be the current LB channel, so try to start
1177
+ // load reporting.
1178
+ LrsCallState* lrs_calld = lb_chand->lrs_calld_->lb_calld();
1179
+ if (lrs_calld != nullptr) lrs_calld->MaybeStartReportingLocked();
1180
+ // If the balancer tells us to drop all the calls, we should exit fallback
1181
+ // mode immediately.
1182
+ if (update.drop_all) xdslb_policy->MaybeExitFallbackMode();
1183
+ // Update the drop config.
1184
+ const bool drop_config_changed =
1185
+ xdslb_policy->drop_config_ == nullptr ||
1186
+ *xdslb_policy->drop_config_ != *update.drop_config;
1187
+ xdslb_policy->drop_config_ = std::move(update.drop_config);
1188
+ // Ignore identical locality update.
1189
+ if (xdslb_policy->locality_list_ == update.locality_list) {
1217
1190
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1218
1191
  gpr_log(GPR_INFO,
1219
- "[xdslb %p] Incoming server list identical to current, "
1220
- "ignoring.",
1221
- xdslb_policy);
1192
+ "[xdslb %p] Incoming locality list identical to current, "
1193
+ "ignoring. (drop_config_changed=%d)",
1194
+ xdslb_policy, drop_config_changed);
1222
1195
  }
1223
- xds_grpclb_destroy_serverlist(serverlist);
1224
- } else { // New serverlist.
1225
- // If the balancer tells us to drop all the calls, we should exit fallback
1226
- // mode immediately.
1227
- // TODO(juanlishen): When we add EDS drop, we should change to check
1228
- // drop_percentage.
1229
- if (serverlist->num_servers == 0) xdslb_policy->MaybeExitFallbackMode();
1230
- if (!xdslb_policy->locality_serverlist_.empty()) {
1231
- xds_grpclb_destroy_serverlist(
1232
- xdslb_policy->locality_serverlist_[0]->serverlist);
1233
- } else {
1234
- // Initialize locality serverlist, currently the list only handles
1235
- // one child.
1236
- xdslb_policy->locality_serverlist_.emplace_back(
1237
- MakeUnique<LocalityServerlistEntry>());
1238
- xdslb_policy->locality_serverlist_[0]->locality_name =
1239
- MakeRefCounted<LocalityName>(
1240
- UniquePtr<char>(gpr_strdup(kDefaultLocalityRegion)),
1241
- UniquePtr<char>(gpr_strdup(kDefaultLocalityZone)),
1242
- UniquePtr<char>(gpr_strdup(kDefaultLocalitySubzone)));
1243
- xdslb_policy->locality_serverlist_[0]->locality_weight =
1244
- kDefaultLocalityWeight;
1196
+ if (drop_config_changed) {
1197
+ xdslb_policy->locality_map_.UpdateXdsPickerLocked();
1245
1198
  }
1246
- // Update the serverlist in the XdsLb instance. This serverlist
1247
- // instance will be destroyed either upon the next update or when the
1248
- // XdsLb instance is destroyed.
1249
- xdslb_policy->locality_serverlist_[0]->serverlist = serverlist;
1250
- xdslb_policy->locality_map_.UpdateLocked(
1251
- xdslb_policy->locality_serverlist_,
1252
- xdslb_policy->child_policy_config_.get(), xdslb_policy->args_,
1253
- xdslb_policy);
1199
+ return;
1254
1200
  }
1255
- } else {
1256
- // No valid initial response or serverlist found.
1257
- char* response_slice_str =
1258
- grpc_dump_slice(response_slice, GPR_DUMP_ASCII | GPR_DUMP_HEX);
1259
- gpr_log(GPR_ERROR,
1260
- "[xdslb %p] Invalid LB response received: '%s'. Ignoring.",
1261
- xdslb_policy, response_slice_str);
1262
- gpr_free(response_slice_str);
1263
- }
1201
+ // Update the locality list.
1202
+ xdslb_policy->locality_list_ = std::move(update.locality_list);
1203
+ // Update the locality map.
1204
+ xdslb_policy->locality_map_.UpdateLocked(
1205
+ xdslb_policy->locality_list_, xdslb_policy->child_policy_config_.get(),
1206
+ xdslb_policy->args_, xdslb_policy);
1207
+ }();
1264
1208
  grpc_slice_unref_internal(response_slice);
1265
- if (!xdslb_policy->shutting_down_) {
1266
- // Keep listening for serverlist updates.
1267
- grpc_op op;
1268
- memset(&op, 0, sizeof(op));
1269
- op.op = GRPC_OP_RECV_MESSAGE;
1270
- op.data.recv_message.recv_message = &lb_calld->recv_message_payload_;
1271
- op.flags = 0;
1272
- op.reserved = nullptr;
1273
- // Reuse the "OnBalancerMessageReceivedLocked" ref taken in StartQuery().
1274
- const grpc_call_error call_error = grpc_call_start_batch_and_execute(
1275
- lb_calld->lb_call_, &op, 1,
1276
- &lb_calld->lb_on_balancer_message_received_);
1277
- GPR_ASSERT(GRPC_CALL_OK == call_error);
1278
- } else {
1279
- lb_calld->Unref(DEBUG_LOCATION, "on_message_received+xds_shutdown");
1209
+ if (xdslb_policy->shutting_down_) {
1210
+ eds_calld->Unref(DEBUG_LOCATION,
1211
+ "EDS+OnResponseReceivedLocked+xds_shutdown");
1212
+ return;
1280
1213
  }
1214
+ // Keep listening for serverlist updates.
1215
+ grpc_op op;
1216
+ memset(&op, 0, sizeof(op));
1217
+ op.op = GRPC_OP_RECV_MESSAGE;
1218
+ op.data.recv_message.recv_message = &eds_calld->recv_message_payload_;
1219
+ op.flags = 0;
1220
+ op.reserved = nullptr;
1221
+ GPR_ASSERT(eds_calld->lb_call_ != nullptr);
1222
+ // Reuse the "EDS+OnResponseReceivedLocked" ref taken in ctor.
1223
+ const grpc_call_error call_error = grpc_call_start_batch_and_execute(
1224
+ eds_calld->lb_call_, &op, 1, &eds_calld->on_response_received_);
1225
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
1281
1226
  }
1282
1227
 
1283
- void XdsLb::BalancerChannelState::BalancerCallState::
1284
- OnBalancerStatusReceivedLocked(void* arg, grpc_error* error) {
1285
- BalancerCallState* lb_calld = static_cast<BalancerCallState*>(arg);
1286
- XdsLb* xdslb_policy = lb_calld->xdslb_policy();
1287
- BalancerChannelState* lb_chand = lb_calld->lb_chand_.get();
1288
- GPR_ASSERT(lb_calld->lb_call_ != nullptr);
1228
+ void XdsLb::LbChannelState::EdsCallState::OnStatusReceivedLocked(
1229
+ void* arg, grpc_error* error) {
1230
+ EdsCallState* eds_calld = static_cast<EdsCallState*>(arg);
1231
+ LbChannelState* lb_chand = eds_calld->lb_chand();
1232
+ XdsLb* xdslb_policy = eds_calld->xdslb_policy();
1289
1233
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1290
- char* status_details =
1291
- grpc_slice_to_c_string(lb_calld->lb_call_status_details_);
1234
+ char* status_details = grpc_slice_to_c_string(eds_calld->status_details_);
1292
1235
  gpr_log(GPR_INFO,
1293
- "[xdslb %p] Status from LB server received. Status = %d, details "
1294
- "= '%s', (lb_chand: %p, lb_calld: %p, lb_call: %p), error '%s'",
1295
- xdslb_policy, lb_calld->lb_call_status_, status_details, lb_chand,
1296
- lb_calld, lb_calld->lb_call_, grpc_error_string(error));
1236
+ "[xdslb %p] EDS call status received. Status = %d, details "
1237
+ "= '%s', (lb_chand: %p, eds_calld: %p, lb_call: %p), error '%s'",
1238
+ xdslb_policy, eds_calld->status_code_, status_details, lb_chand,
1239
+ eds_calld, eds_calld->lb_call_, grpc_error_string(error));
1297
1240
  gpr_free(status_details);
1298
1241
  }
1299
1242
  // Ignore status from a stale call.
1300
- if (lb_calld->IsCurrentCallOnChannel()) {
1243
+ if (eds_calld->IsCurrentCallOnChannel()) {
1301
1244
  // Because this call is the current one on the channel, the channel can't
1302
1245
  // have been swapped out; otherwise, the call should have been reset.
1303
1246
  GPR_ASSERT(lb_chand->IsCurrentChannel() || lb_chand->IsPendingChannel());
1304
- GPR_ASSERT(!xdslb_policy->shutting_down_);
1305
1247
  if (lb_chand != xdslb_policy->LatestLbChannel()) {
1306
1248
  // This channel must be the current one and there is a pending one. Swap
1307
1249
  // in the pending one and we are done.
@@ -1309,23 +1251,13 @@ void XdsLb::BalancerChannelState::BalancerCallState::
1309
1251
  gpr_log(GPR_INFO,
1310
1252
  "[xdslb %p] Promoting pending LB channel %p to replace "
1311
1253
  "current LB channel %p",
1312
- xdslb_policy, lb_calld->lb_chand_.get(),
1313
- lb_calld->xdslb_policy()->lb_chand_.get());
1254
+ xdslb_policy, lb_chand, xdslb_policy->lb_chand_.get());
1314
1255
  }
1315
1256
  xdslb_policy->lb_chand_ = std::move(xdslb_policy->pending_lb_chand_);
1316
1257
  } else {
1317
1258
  // This channel is the most recently created one. Try to restart the call
1318
1259
  // and reresolve.
1319
- lb_chand->lb_calld_.reset();
1320
- if (lb_calld->seen_initial_response_) {
1321
- // If we lost connection to the LB server, reset the backoff and restart
1322
- // the LB call immediately.
1323
- lb_chand->lb_call_backoff_.Reset();
1324
- lb_chand->StartCallLocked();
1325
- } else {
1326
- // If we failed to connect to the LB server, retry later.
1327
- lb_chand->StartCallRetryTimerLocked();
1328
- }
1260
+ eds_calld->parent_->OnCallFinishedLocked();
1329
1261
  xdslb_policy->channel_control_helper()->RequestReresolution();
1330
1262
  // If the fallback-at-startup checks are pending, go into fallback mode
1331
1263
  // immediately. This short-circuits the timeout for the
@@ -1341,7 +1273,369 @@ void XdsLb::BalancerChannelState::BalancerCallState::
1341
1273
  }
1342
1274
  }
1343
1275
  }
1344
- lb_calld->Unref(DEBUG_LOCATION, "lb_call_ended");
1276
+ eds_calld->Unref(DEBUG_LOCATION, "EDS+OnStatusReceivedLocked");
1277
+ }
1278
+
1279
+ bool XdsLb::LbChannelState::EdsCallState::IsCurrentCallOnChannel() const {
1280
+ // If the retryable EDS call is null (which only happens when the LB channel
1281
+ // is shutting down), all the EDS calls are stale.
1282
+ if (lb_chand()->eds_calld_ == nullptr) return false;
1283
+ return this == lb_chand()->eds_calld_->lb_calld();
1284
+ }
1285
+
1286
+ //
1287
+ // XdsLb::LbChannelState::LrsCallState::Reporter
1288
+ //
1289
+
1290
+ void XdsLb::LbChannelState::LrsCallState::Reporter::Orphan() {
1291
+ if (next_report_timer_callback_pending_) {
1292
+ grpc_timer_cancel(&next_report_timer_);
1293
+ }
1294
+ }
1295
+
1296
+ void XdsLb::LbChannelState::LrsCallState::Reporter::ScheduleNextReportLocked() {
1297
+ const grpc_millis next_report_time = ExecCtx::Get()->Now() + report_interval_;
1298
+ grpc_timer_init(&next_report_timer_, next_report_time,
1299
+ &on_next_report_timer_);
1300
+ next_report_timer_callback_pending_ = true;
1301
+ }
1302
+
1303
+ void XdsLb::LbChannelState::LrsCallState::Reporter::OnNextReportTimerLocked(
1304
+ void* arg, grpc_error* error) {
1305
+ Reporter* self = static_cast<Reporter*>(arg);
1306
+ self->next_report_timer_callback_pending_ = false;
1307
+ if (error != GRPC_ERROR_NONE || !self->IsCurrentReporterOnCall()) {
1308
+ self->Unref(DEBUG_LOCATION, "Reporter+timer");
1309
+ return;
1310
+ }
1311
+ self->SendReportLocked();
1312
+ }
1313
+
1314
+ void XdsLb::LbChannelState::LrsCallState::Reporter::SendReportLocked() {
1315
+ // Create a request that contains the load report.
1316
+ grpc_slice request_payload_slice = XdsLrsRequestCreateAndEncode(
1317
+ xdslb_policy()->server_name_, &xdslb_policy()->client_stats_);
1318
+ // Skip client load report if the counters were all zero in the last
1319
+ // report and they are still zero in this one.
1320
+ const bool old_val = last_report_counters_were_zero_;
1321
+ last_report_counters_were_zero_ = static_cast<bool>(
1322
+ grpc_slice_eq(request_payload_slice, grpc_empty_slice()));
1323
+ if (old_val && last_report_counters_were_zero_) {
1324
+ ScheduleNextReportLocked();
1325
+ return;
1326
+ }
1327
+ parent_->send_message_payload_ =
1328
+ grpc_raw_byte_buffer_create(&request_payload_slice, 1);
1329
+ grpc_slice_unref_internal(request_payload_slice);
1330
+ // Send the report.
1331
+ grpc_op op;
1332
+ memset(&op, 0, sizeof(op));
1333
+ op.op = GRPC_OP_SEND_MESSAGE;
1334
+ op.data.send_message.send_message = parent_->send_message_payload_;
1335
+ grpc_call_error call_error = grpc_call_start_batch_and_execute(
1336
+ parent_->lb_call_, &op, 1, &on_report_done_);
1337
+ if (GPR_UNLIKELY(call_error != GRPC_CALL_OK)) {
1338
+ gpr_log(GPR_ERROR,
1339
+ "[xdslb %p] lb_calld=%p call_error=%d sending client load report",
1340
+ xdslb_policy(), this, call_error);
1341
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
1342
+ }
1343
+ }
1344
+
1345
+ void XdsLb::LbChannelState::LrsCallState::Reporter::OnReportDoneLocked(
1346
+ void* arg, grpc_error* error) {
1347
+ Reporter* self = static_cast<Reporter*>(arg);
1348
+ grpc_byte_buffer_destroy(self->parent_->send_message_payload_);
1349
+ self->parent_->send_message_payload_ = nullptr;
1350
+ if (error != GRPC_ERROR_NONE || !self->IsCurrentReporterOnCall()) {
1351
+ // If this reporter is no longer the current one on the call, the reason
1352
+ // might be that it was orphaned for a new one due to config update.
1353
+ if (!self->IsCurrentReporterOnCall()) {
1354
+ self->parent_->MaybeStartReportingLocked();
1355
+ }
1356
+ self->Unref(DEBUG_LOCATION, "Reporter+report_done");
1357
+ return;
1358
+ }
1359
+ self->ScheduleNextReportLocked();
1360
+ }
1361
+
1362
+ //
1363
+ // XdsLb::LbChannelState::LrsCallState
1364
+ //
1365
+
1366
+ XdsLb::LbChannelState::LrsCallState::LrsCallState(
1367
+ RefCountedPtr<RetryableLbCall<LrsCallState>> parent)
1368
+ : InternallyRefCounted<LrsCallState>(&grpc_lb_xds_trace),
1369
+ parent_(std::move(parent)) {
1370
+ // Init the LB call. Note that the LB call will progress every time there's
1371
+ // activity in xdslb_policy()->interested_parties(), which is comprised of
1372
+ // the polling entities from client_channel.
1373
+ GPR_ASSERT(xdslb_policy() != nullptr);
1374
+ GPR_ASSERT(xdslb_policy()->server_name_ != nullptr);
1375
+ GPR_ASSERT(xdslb_policy()->server_name_[0] != '\0');
1376
+ const grpc_millis deadline =
1377
+ xdslb_policy()->lb_call_timeout_ms_ == 0
1378
+ ? GRPC_MILLIS_INF_FUTURE
1379
+ : ExecCtx::Get()->Now() + xdslb_policy()->lb_call_timeout_ms_;
1380
+ lb_call_ = grpc_channel_create_pollset_set_call(
1381
+ lb_chand()->channel_, nullptr, GRPC_PROPAGATE_DEFAULTS,
1382
+ xdslb_policy()->interested_parties(),
1383
+ GRPC_MDSTR_SLASH_ENVOY_DOT_SERVICE_DOT_LOAD_STATS_DOT_V2_DOT_LOADREPORTINGSERVICE_SLASH_STREAMLOADSTATS,
1384
+ nullptr, deadline, nullptr);
1385
+ GPR_ASSERT(lb_call_ != nullptr);
1386
+ // Init the LB call request payload.
1387
+ grpc_slice request_payload_slice =
1388
+ XdsLrsRequestCreateAndEncode(xdslb_policy()->server_name_);
1389
+ send_message_payload_ =
1390
+ grpc_raw_byte_buffer_create(&request_payload_slice, 1);
1391
+ grpc_slice_unref_internal(request_payload_slice);
1392
+ // Init other data associated with the LRS call.
1393
+ grpc_metadata_array_init(&initial_metadata_recv_);
1394
+ grpc_metadata_array_init(&trailing_metadata_recv_);
1395
+ GRPC_CLOSURE_INIT(&on_initial_request_sent_, OnInitialRequestSentLocked, this,
1396
+ grpc_combiner_scheduler(xdslb_policy()->combiner()));
1397
+ GRPC_CLOSURE_INIT(&on_response_received_, OnResponseReceivedLocked, this,
1398
+ grpc_combiner_scheduler(xdslb_policy()->combiner()));
1399
+ GRPC_CLOSURE_INIT(&on_status_received_, OnStatusReceivedLocked, this,
1400
+ grpc_combiner_scheduler(xdslb_policy()->combiner()));
1401
+ // Start the call.
1402
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1403
+ gpr_log(GPR_INFO,
1404
+ "[xdslb %p] Starting LRS call (lb_chand: %p, lb_calld: %p, "
1405
+ "lb_call: %p)",
1406
+ xdslb_policy(), lb_chand(), this, lb_call_);
1407
+ }
1408
+ // Create the ops.
1409
+ grpc_call_error call_error;
1410
+ grpc_op ops[3];
1411
+ memset(ops, 0, sizeof(ops));
1412
+ // Op: send initial metadata.
1413
+ grpc_op* op = ops;
1414
+ op->op = GRPC_OP_SEND_INITIAL_METADATA;
1415
+ op->data.send_initial_metadata.count = 0;
1416
+ op->flags = 0;
1417
+ op->reserved = nullptr;
1418
+ op++;
1419
+ // Op: send request message.
1420
+ GPR_ASSERT(send_message_payload_ != nullptr);
1421
+ op->op = GRPC_OP_SEND_MESSAGE;
1422
+ op->data.send_message.send_message = send_message_payload_;
1423
+ op->flags = 0;
1424
+ op->reserved = nullptr;
1425
+ op++;
1426
+ Ref(DEBUG_LOCATION, "LRS+OnInitialRequestSentLocked").release();
1427
+ call_error = grpc_call_start_batch_and_execute(
1428
+ lb_call_, ops, (size_t)(op - ops), &on_initial_request_sent_);
1429
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
1430
+ // Op: recv initial metadata.
1431
+ op = ops;
1432
+ op->op = GRPC_OP_RECV_INITIAL_METADATA;
1433
+ op->data.recv_initial_metadata.recv_initial_metadata =
1434
+ &initial_metadata_recv_;
1435
+ op->flags = 0;
1436
+ op->reserved = nullptr;
1437
+ op++;
1438
+ // Op: recv response.
1439
+ op->op = GRPC_OP_RECV_MESSAGE;
1440
+ op->data.recv_message.recv_message = &recv_message_payload_;
1441
+ op->flags = 0;
1442
+ op->reserved = nullptr;
1443
+ op++;
1444
+ Ref(DEBUG_LOCATION, "LRS+OnResponseReceivedLocked").release();
1445
+ call_error = grpc_call_start_batch_and_execute(
1446
+ lb_call_, ops, (size_t)(op - ops), &on_response_received_);
1447
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
1448
+ // Op: recv server status.
1449
+ op = ops;
1450
+ op->op = GRPC_OP_RECV_STATUS_ON_CLIENT;
1451
+ op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv_;
1452
+ op->data.recv_status_on_client.status = &status_code_;
1453
+ op->data.recv_status_on_client.status_details = &status_details_;
1454
+ op->flags = 0;
1455
+ op->reserved = nullptr;
1456
+ op++;
1457
+ // This callback signals the end of the LB call, so it relies on the initial
1458
+ // ref instead of a new ref. When it's invoked, it's the initial ref that is
1459
+ // unreffed.
1460
+ call_error = grpc_call_start_batch_and_execute(
1461
+ lb_call_, ops, (size_t)(op - ops), &on_status_received_);
1462
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
1463
+ }
1464
+
1465
+ XdsLb::LbChannelState::LrsCallState::~LrsCallState() {
1466
+ grpc_metadata_array_destroy(&initial_metadata_recv_);
1467
+ grpc_metadata_array_destroy(&trailing_metadata_recv_);
1468
+ grpc_byte_buffer_destroy(send_message_payload_);
1469
+ grpc_byte_buffer_destroy(recv_message_payload_);
1470
+ grpc_slice_unref_internal(status_details_);
1471
+ GPR_ASSERT(lb_call_ != nullptr);
1472
+ grpc_call_unref(lb_call_);
1473
+ }
1474
+
1475
+ void XdsLb::LbChannelState::LrsCallState::Orphan() {
1476
+ reporter_.reset();
1477
+ GPR_ASSERT(lb_call_ != nullptr);
1478
+ // If we are here because xdslb_policy wants to cancel the call,
1479
+ // on_status_received_ will complete the cancellation and clean up. Otherwise,
1480
+ // we are here because xdslb_policy has to orphan a failed call, then the
1481
+ // following cancellation will be a no-op.
1482
+ grpc_call_cancel(lb_call_, nullptr);
1483
+ // Note that the initial ref is hold by on_status_received_. So the
1484
+ // corresponding unref happens in on_status_received_ instead of here.
1485
+ }
1486
+
1487
+ void XdsLb::LbChannelState::LrsCallState::MaybeStartReportingLocked() {
1488
+ // Don't start if this is not the current call on the current channel.
1489
+ if (!IsCurrentCallOnChannel() || !lb_chand()->IsCurrentChannel()) return;
1490
+ // Don't start again if already started.
1491
+ if (reporter_ != nullptr) return;
1492
+ // Don't start if the previous send_message op (of the initial request or the
1493
+ // last report of the previous reporter) hasn't completed.
1494
+ if (send_message_payload_ != nullptr) return;
1495
+ // Don't start if no LRS response has arrived.
1496
+ if (!seen_response()) return;
1497
+ // Don't start if the EDS call hasn't received any valid response. Note that
1498
+ // this must be the first channel because it is the current channel but its
1499
+ // EDS call hasn't seen any response.
1500
+ EdsCallState* eds_calld = lb_chand()->eds_calld_->lb_calld();
1501
+ if (eds_calld == nullptr || !eds_calld->seen_response()) return;
1502
+ // Start reporting.
1503
+ lb_chand()->xdslb_policy_->client_stats_.MaybeInitLastReportTime();
1504
+ reporter_ = MakeOrphanable<Reporter>(
1505
+ Ref(DEBUG_LOCATION, "LRS+load_report+start"), load_reporting_interval_);
1506
+ }
1507
+
1508
+ void XdsLb::LbChannelState::LrsCallState::OnInitialRequestSentLocked(
1509
+ void* arg, grpc_error* error) {
1510
+ LrsCallState* lrs_calld = static_cast<LrsCallState*>(arg);
1511
+ // Clear the send_message_payload_.
1512
+ grpc_byte_buffer_destroy(lrs_calld->send_message_payload_);
1513
+ lrs_calld->send_message_payload_ = nullptr;
1514
+ lrs_calld->MaybeStartReportingLocked();
1515
+ lrs_calld->Unref(DEBUG_LOCATION, "LRS+OnInitialRequestSentLocked");
1516
+ }
1517
+
1518
+ void XdsLb::LbChannelState::LrsCallState::OnResponseReceivedLocked(
1519
+ void* arg, grpc_error* error) {
1520
+ LrsCallState* lrs_calld = static_cast<LrsCallState*>(arg);
1521
+ XdsLb* xdslb_policy = lrs_calld->xdslb_policy();
1522
+ // Empty payload means the LB call was cancelled.
1523
+ if (!lrs_calld->IsCurrentCallOnChannel() ||
1524
+ lrs_calld->recv_message_payload_ == nullptr) {
1525
+ lrs_calld->Unref(DEBUG_LOCATION, "LRS+OnResponseReceivedLocked");
1526
+ return;
1527
+ }
1528
+ // Read the response.
1529
+ grpc_byte_buffer_reader bbr;
1530
+ grpc_byte_buffer_reader_init(&bbr, lrs_calld->recv_message_payload_);
1531
+ grpc_slice response_slice = grpc_byte_buffer_reader_readall(&bbr);
1532
+ grpc_byte_buffer_reader_destroy(&bbr);
1533
+ grpc_byte_buffer_destroy(lrs_calld->recv_message_payload_);
1534
+ lrs_calld->recv_message_payload_ = nullptr;
1535
+ // This anonymous lambda is a hack to avoid the usage of goto.
1536
+ [&]() {
1537
+ // Parse the response.
1538
+ grpc_millis new_load_reporting_interval;
1539
+ grpc_error* parse_error = XdsLrsResponseDecodeAndParse(
1540
+ response_slice, &new_load_reporting_interval,
1541
+ xdslb_policy->server_name_);
1542
+ if (parse_error != GRPC_ERROR_NONE) {
1543
+ gpr_log(GPR_ERROR, "[xdslb %p] LRS response parsing failed. error=%s",
1544
+ xdslb_policy, grpc_error_string(parse_error));
1545
+ GRPC_ERROR_UNREF(parse_error);
1546
+ return;
1547
+ }
1548
+ lrs_calld->seen_response_ = true;
1549
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1550
+ gpr_log(GPR_INFO,
1551
+ "[xdslb %p] LRS response received, load_report_interval=%" PRId64
1552
+ "ms",
1553
+ xdslb_policy, new_load_reporting_interval);
1554
+ }
1555
+ if (new_load_reporting_interval <
1556
+ GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS) {
1557
+ new_load_reporting_interval =
1558
+ GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS;
1559
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1560
+ gpr_log(
1561
+ GPR_INFO,
1562
+ "[xdslb %p] Increased load_report_interval to minimum value %dms",
1563
+ xdslb_policy, GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS);
1564
+ }
1565
+ }
1566
+ // Ignore identical update.
1567
+ if (lrs_calld->load_reporting_interval_ == new_load_reporting_interval) {
1568
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1569
+ gpr_log(GPR_INFO,
1570
+ "[xdslb %p] Incoming LRS response identical to current, "
1571
+ "ignoring.",
1572
+ xdslb_policy);
1573
+ }
1574
+ return;
1575
+ }
1576
+ // Stop current load reporting (if any) to adopt the new reporting interval.
1577
+ lrs_calld->reporter_.reset();
1578
+ // Record the new config.
1579
+ lrs_calld->load_reporting_interval_ = new_load_reporting_interval;
1580
+ // Try starting sending load report.
1581
+ lrs_calld->MaybeStartReportingLocked();
1582
+ }();
1583
+ grpc_slice_unref_internal(response_slice);
1584
+ if (xdslb_policy->shutting_down_) {
1585
+ lrs_calld->Unref(DEBUG_LOCATION,
1586
+ "LRS+OnResponseReceivedLocked+xds_shutdown");
1587
+ return;
1588
+ }
1589
+ // Keep listening for LRS config updates.
1590
+ grpc_op op;
1591
+ memset(&op, 0, sizeof(op));
1592
+ op.op = GRPC_OP_RECV_MESSAGE;
1593
+ op.data.recv_message.recv_message = &lrs_calld->recv_message_payload_;
1594
+ op.flags = 0;
1595
+ op.reserved = nullptr;
1596
+ GPR_ASSERT(lrs_calld->lb_call_ != nullptr);
1597
+ // Reuse the "OnResponseReceivedLocked" ref taken in ctor.
1598
+ const grpc_call_error call_error = grpc_call_start_batch_and_execute(
1599
+ lrs_calld->lb_call_, &op, 1, &lrs_calld->on_response_received_);
1600
+ GPR_ASSERT(GRPC_CALL_OK == call_error);
1601
+ }
1602
+
1603
+ void XdsLb::LbChannelState::LrsCallState::OnStatusReceivedLocked(
1604
+ void* arg, grpc_error* error) {
1605
+ LrsCallState* lrs_calld = static_cast<LrsCallState*>(arg);
1606
+ XdsLb* xdslb_policy = lrs_calld->xdslb_policy();
1607
+ LbChannelState* lb_chand = lrs_calld->lb_chand();
1608
+ GPR_ASSERT(lrs_calld->lb_call_ != nullptr);
1609
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1610
+ char* status_details = grpc_slice_to_c_string(lrs_calld->status_details_);
1611
+ gpr_log(GPR_INFO,
1612
+ "[xdslb %p] LRS call status received. Status = %d, details "
1613
+ "= '%s', (lb_chand: %p, lb_calld: %p, lb_call: %p), error '%s'",
1614
+ xdslb_policy, lrs_calld->status_code_, status_details, lb_chand,
1615
+ lrs_calld, lrs_calld->lb_call_, grpc_error_string(error));
1616
+ gpr_free(status_details);
1617
+ }
1618
+ // Ignore status from a stale call.
1619
+ if (lrs_calld->IsCurrentCallOnChannel()) {
1620
+ // Because this call is the current one on the channel, the channel can't
1621
+ // have been swapped out; otherwise, the call should have been reset.
1622
+ GPR_ASSERT(lb_chand->IsCurrentChannel() || lb_chand->IsPendingChannel());
1623
+ GPR_ASSERT(!xdslb_policy->shutting_down_);
1624
+ if (lb_chand == xdslb_policy->LatestLbChannel()) {
1625
+ // This channel is the most recently created one. Try to restart the call
1626
+ // and reresolve.
1627
+ lrs_calld->parent_->OnCallFinishedLocked();
1628
+ xdslb_policy->channel_control_helper()->RequestReresolution();
1629
+ }
1630
+ }
1631
+ lrs_calld->Unref(DEBUG_LOCATION, "LRS+OnStatusReceivedLocked");
1632
+ }
1633
+
1634
+ bool XdsLb::LbChannelState::LrsCallState::IsCurrentCallOnChannel() const {
1635
+ // If the retryable LRS call is null (which only happens when the LB channel
1636
+ // is shutting down), all the LRS calls are stale.
1637
+ if (lb_chand()->lrs_calld_ == nullptr) return false;
1638
+ return this == lb_chand()->lrs_calld_->lb_calld();
1345
1639
  }
1346
1640
 
1347
1641
  //
@@ -1363,7 +1657,7 @@ grpc_channel_args* BuildBalancerChannelArgs(const grpc_channel_args* args) {
1363
1657
  // factory will re-add this arg with the right value.
1364
1658
  GRPC_ARG_SERVER_URI,
1365
1659
  // The LB channel should use the authority indicated by the target
1366
- // authority table (see \a grpc_lb_policy_xds_modify_lb_channel_args),
1660
+ // authority table (see \a ModifyXdsBalancerChannelArgs),
1367
1661
  // as opposed to the authority from the parent channel.
1368
1662
  GRPC_ARG_DEFAULT_AUTHORITY,
1369
1663
  // Just as for \a GRPC_ARG_DEFAULT_AUTHORITY, the LB channel should be
@@ -1394,7 +1688,7 @@ grpc_channel_args* BuildBalancerChannelArgs(const grpc_channel_args* args) {
1394
1688
  args, args_to_remove, GPR_ARRAY_SIZE(args_to_remove), args_to_add.data(),
1395
1689
  args_to_add.size());
1396
1690
  // Make any necessary modifications for security.
1397
- return grpc_lb_policy_xds_modify_lb_channel_args(new_args);
1691
+ return ModifyXdsBalancerChannelArgs(new_args);
1398
1692
  }
1399
1693
 
1400
1694
  //
@@ -1403,8 +1697,15 @@ grpc_channel_args* BuildBalancerChannelArgs(const grpc_channel_args* args) {
1403
1697
 
1404
1698
  XdsLb::XdsLb(Args args)
1405
1699
  : LoadBalancingPolicy(std::move(args)),
1406
- locality_map_(),
1407
- locality_serverlist_() {
1700
+ lb_call_timeout_ms_(grpc_channel_args_find_integer(
1701
+ args.args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS, {0, 0, INT_MAX})),
1702
+ lb_fallback_timeout_ms_(grpc_channel_args_find_integer(
1703
+ args.args, GRPC_ARG_XDS_FALLBACK_TIMEOUT_MS,
1704
+ {GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX})),
1705
+ locality_retention_interval_ms_(grpc_channel_args_find_integer(
1706
+ args.args, GRPC_ARG_LOCALITY_RETENTION_INTERVAL_MS,
1707
+ {GRPC_XDS_DEFAULT_LOCALITY_RETENTION_INTERVAL_MS, 0, INT_MAX})),
1708
+ locality_map_(this) {
1408
1709
  // Record server name.
1409
1710
  const grpc_arg* arg = grpc_channel_args_find(args.args, GRPC_ARG_SERVER_URI);
1410
1711
  const char* server_uri = grpc_channel_arg_get_string(arg);
@@ -1418,13 +1719,6 @@ XdsLb::XdsLb(Args args)
1418
1719
  server_name_);
1419
1720
  }
1420
1721
  grpc_uri_destroy(uri);
1421
- // Record LB call timeout.
1422
- arg = grpc_channel_args_find(args.args, GRPC_ARG_GRPCLB_CALL_TIMEOUT_MS);
1423
- lb_call_timeout_ms_ = grpc_channel_arg_get_integer(arg, {0, 0, INT_MAX});
1424
- // Record fallback timeout.
1425
- arg = grpc_channel_args_find(args.args, GRPC_ARG_XDS_FALLBACK_TIMEOUT_MS);
1426
- lb_fallback_timeout_ms_ = grpc_channel_arg_get_integer(
1427
- arg, {GRPC_XDS_DEFAULT_FALLBACK_TIMEOUT_MS, 0, INT_MAX});
1428
1722
  }
1429
1723
 
1430
1724
  XdsLb::~XdsLb() {
@@ -1433,7 +1727,7 @@ XdsLb::~XdsLb() {
1433
1727
  }
1434
1728
  gpr_free((void*)server_name_);
1435
1729
  grpc_channel_args_destroy(args_);
1436
- locality_serverlist_.clear();
1730
+ locality_list_.clear();
1437
1731
  }
1438
1732
 
1439
1733
  void XdsLb::ShutdownLocked() {
@@ -1482,9 +1776,9 @@ void XdsLb::ResetBackoffLocked() {
1482
1776
  }
1483
1777
 
1484
1778
  void XdsLb::ProcessAddressesAndChannelArgsLocked(
1485
- const ServerAddressList& addresses, const grpc_channel_args& args) {
1779
+ ServerAddressList addresses, const grpc_channel_args& args) {
1486
1780
  // Update fallback address list.
1487
- fallback_backend_addresses_ = ExtractBackendAddresses(addresses);
1781
+ fallback_backend_addresses_ = std::move(addresses);
1488
1782
  // Make sure that GRPC_ARG_LB_POLICY_NAME is set in channel args,
1489
1783
  // since we use this to trigger the client_load_reporting filter.
1490
1784
  static const char* args_to_remove[] = {GRPC_ARG_LB_POLICY_NAME};
@@ -1505,11 +1799,10 @@ void XdsLb::ProcessAddressesAndChannelArgsLocked(
1505
1799
  strcmp(last_balancer_name.get(), balancer_name_.get()) != 0;
1506
1800
  }
1507
1801
  if (create_lb_channel) {
1508
- OrphanablePtr<BalancerChannelState> lb_chand =
1509
- MakeOrphanable<BalancerChannelState>(
1510
- balancer_name_.get(), *lb_channel_args,
1511
- Ref(DEBUG_LOCATION, "BalancerChannelState"));
1512
- if (lb_chand_ == nullptr || !lb_chand_->HasActiveCall()) {
1802
+ OrphanablePtr<LbChannelState> lb_chand = MakeOrphanable<LbChannelState>(
1803
+ Ref(DEBUG_LOCATION, "XdsLb+LbChannelState"), balancer_name_.get(),
1804
+ *lb_channel_args);
1805
+ if (lb_chand_ == nullptr || !lb_chand_->HasActiveEdsCall()) {
1513
1806
  GPR_ASSERT(pending_lb_chand_ == nullptr);
1514
1807
  // If we do not have a working LB channel yet, use the newly created one.
1515
1808
  lb_chand_ = std::move(lb_chand);
@@ -1524,6 +1817,7 @@ void XdsLb::ProcessAddressesAndChannelArgsLocked(
1524
1817
  void XdsLb::ParseLbConfig(const ParsedXdsConfig* xds_config) {
1525
1818
  if (xds_config == nullptr || xds_config->balancer_name() == nullptr) return;
1526
1819
  // TODO(yashykt) : does this need to be a gpr_strdup
1820
+ // TODO(juanlishen): Read balancer name from bootstrap file.
1527
1821
  balancer_name_ = UniquePtr<char>(gpr_strdup(xds_config->balancer_name()));
1528
1822
  child_policy_config_ = xds_config->child_policy();
1529
1823
  fallback_policy_config_ = xds_config->fallback_policy();
@@ -1536,9 +1830,9 @@ void XdsLb::UpdateLocked(UpdateArgs args) {
1536
1830
  gpr_log(GPR_ERROR, "[xdslb %p] LB config parsing fails.", this);
1537
1831
  return;
1538
1832
  }
1539
- ProcessAddressesAndChannelArgsLocked(args.addresses, *args.args);
1540
- locality_map_.UpdateLocked(locality_serverlist_, child_policy_config_.get(),
1541
- args_, this);
1833
+ ProcessAddressesAndChannelArgsLocked(std::move(args.addresses), *args.args);
1834
+ locality_map_.UpdateLocked(locality_list_, child_policy_config_.get(), args_,
1835
+ this, is_initial_update);
1542
1836
  // Update the existing fallback policy. The fallback policy config and/or the
1543
1837
  // fallback addresses may be new.
1544
1838
  if (fallback_policy_ != nullptr) UpdateFallbackPolicyLocked();
@@ -1736,44 +2030,111 @@ void XdsLb::MaybeExitFallbackMode() {
1736
2030
  // XdsLb::LocalityMap
1737
2031
  //
1738
2032
 
1739
- void XdsLb::LocalityMap::PruneLocalities(const LocalityList& locality_list) {
1740
- for (auto iter = map_.begin(); iter != map_.end();) {
1741
- bool found = false;
1742
- for (size_t i = 0; i < locality_list.size(); i++) {
1743
- if (*locality_list[i]->locality_name == *iter->first) {
1744
- found = true;
2033
+ void XdsLb::LocalityMap::UpdateLocked(
2034
+ const XdsLocalityList& locality_list,
2035
+ LoadBalancingPolicy::Config* child_policy_config,
2036
+ const grpc_channel_args* args, XdsLb* parent, bool is_initial_update) {
2037
+ if (parent->shutting_down_) return;
2038
+ // Add or update the localities in locality_list.
2039
+ for (size_t i = 0; i < locality_list.size(); i++) {
2040
+ auto& locality_name = locality_list[i].locality_name;
2041
+ auto iter = map_.find(locality_name);
2042
+ // Add a new entry in the locality map if a new locality is received in the
2043
+ // locality list.
2044
+ if (iter == map_.end()) {
2045
+ OrphanablePtr<LocalityEntry> new_entry = MakeOrphanable<LocalityEntry>(
2046
+ parent->Ref(DEBUG_LOCATION, "LocalityEntry"), locality_name);
2047
+ iter = map_.emplace(locality_name, std::move(new_entry)).first;
2048
+ }
2049
+ // Keep a copy of serverlist in locality_list_ so that we can compare it
2050
+ // with the future ones.
2051
+ iter->second->UpdateLocked(locality_list[i].lb_weight,
2052
+ locality_list[i].serverlist, child_policy_config,
2053
+ args);
2054
+ }
2055
+ // Remove (later) the localities not in locality_list.
2056
+ for (auto& p : map_) {
2057
+ const XdsLocalityName* locality_name = p.first.get();
2058
+ LocalityEntry* locality_entry = p.second.get();
2059
+ bool in_locality_list = false;
2060
+ for (size_t i = 0; i < locality_list.size(); ++i) {
2061
+ if (*locality_list[i].locality_name == *locality_name) {
2062
+ in_locality_list = true;
1745
2063
  break;
1746
2064
  }
1747
2065
  }
1748
- if (!found) { // Remove entries not present in the locality list
1749
- iter = map_.erase(iter);
1750
- } else
1751
- iter++;
2066
+ if (!in_locality_list) locality_entry->DeactivateLocked();
1752
2067
  }
2068
+ // Generate a new xds picker immediately.
2069
+ if (!is_initial_update) UpdateXdsPickerLocked();
1753
2070
  }
1754
2071
 
1755
- void XdsLb::LocalityMap::UpdateLocked(
1756
- const LocalityList& locality_serverlist,
1757
- LoadBalancingPolicy::Config* child_policy_config,
1758
- const grpc_channel_args* args, XdsLb* parent) {
1759
- if (parent->shutting_down_) return;
1760
- for (size_t i = 0; i < locality_serverlist.size(); i++) {
1761
- auto iter = map_.find(locality_serverlist[i]->locality_name);
1762
- if (iter == map_.end()) {
1763
- OrphanablePtr<LocalityEntry> new_entry = MakeOrphanable<LocalityEntry>(
1764
- parent->Ref(DEBUG_LOCATION, "LocalityEntry"),
1765
- locality_serverlist[i]->locality_name,
1766
- locality_serverlist[i]->locality_weight);
1767
- iter = map_.emplace(locality_serverlist[i]->locality_name,
1768
- std::move(new_entry))
1769
- .first;
2072
+ void XdsLb::LocalityMap::UpdateXdsPickerLocked() {
2073
+ // If we are in fallback mode, don't generate an xds picker from localities.
2074
+ if (xds_policy_->fallback_policy_ != nullptr) return;
2075
+ // Construct a new xds picker which maintains a map of all locality pickers
2076
+ // that are ready. Each locality is represented by a portion of the range
2077
+ // proportional to its weight, such that the total range is the sum of the
2078
+ // weights of all localities.
2079
+ uint32_t end = 0;
2080
+ size_t num_connecting = 0;
2081
+ size_t num_idle = 0;
2082
+ size_t num_transient_failures = 0;
2083
+ Picker::PickerList pickers;
2084
+ for (auto& p : map_) {
2085
+ const LocalityEntry* entry = p.second.get();
2086
+ if (entry->locality_weight() == 0) continue;
2087
+ switch (entry->connectivity_state()) {
2088
+ case GRPC_CHANNEL_READY: {
2089
+ end += entry->locality_weight();
2090
+ pickers.push_back(MakePair(end, entry->picker_wrapper()));
2091
+ break;
2092
+ }
2093
+ case GRPC_CHANNEL_CONNECTING: {
2094
+ num_connecting++;
2095
+ break;
2096
+ }
2097
+ case GRPC_CHANNEL_IDLE: {
2098
+ num_idle++;
2099
+ break;
2100
+ }
2101
+ case GRPC_CHANNEL_TRANSIENT_FAILURE: {
2102
+ num_transient_failures++;
2103
+ break;
2104
+ }
2105
+ default:
2106
+ GPR_UNREACHABLE_CODE(return );
1770
2107
  }
1771
- // Don't create new child policies if not directed to
1772
- xds_grpclb_serverlist* serverlist =
1773
- parent->locality_serverlist_[i]->serverlist;
1774
- iter->second->UpdateLocked(serverlist, child_policy_config, args);
1775
2108
  }
1776
- PruneLocalities(locality_serverlist);
2109
+ // Pass on the constructed xds picker if it has any ready pickers in their map
2110
+ // otherwise pass a QueuePicker if any of the locality pickers are in a
2111
+ // connecting or idle state, finally return a transient failure picker if all
2112
+ // locality pickers are in transient failure.
2113
+ if (!pickers.empty()) {
2114
+ xds_policy_->channel_control_helper()->UpdateState(
2115
+ GRPC_CHANNEL_READY,
2116
+ UniquePtr<LoadBalancingPolicy::SubchannelPicker>(
2117
+ New<Picker>(xds_policy_->Ref(DEBUG_LOCATION, "XdsLb+Picker"),
2118
+ std::move(pickers))));
2119
+ } else if (num_connecting > 0) {
2120
+ xds_policy_->channel_control_helper()->UpdateState(
2121
+ GRPC_CHANNEL_CONNECTING,
2122
+ UniquePtr<SubchannelPicker>(
2123
+ New<QueuePicker>(xds_policy_->Ref(DEBUG_LOCATION, "QueuePicker"))));
2124
+ } else if (num_idle > 0) {
2125
+ xds_policy_->channel_control_helper()->UpdateState(
2126
+ GRPC_CHANNEL_IDLE,
2127
+ UniquePtr<SubchannelPicker>(
2128
+ New<QueuePicker>(xds_policy_->Ref(DEBUG_LOCATION, "QueuePicker"))));
2129
+ } else {
2130
+ grpc_error* error =
2131
+ grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2132
+ "connections to all active localities failing"),
2133
+ GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE);
2134
+ xds_policy_->channel_control_helper()->UpdateState(
2135
+ GRPC_CHANNEL_TRANSIENT_FAILURE,
2136
+ UniquePtr<SubchannelPicker>(New<TransientFailurePicker>(error)));
2137
+ }
1777
2138
  }
1778
2139
 
1779
2140
  void XdsLb::LocalityMap::ShutdownLocked() { map_.clear(); }
@@ -1789,15 +2150,14 @@ void XdsLb::LocalityMap::ResetBackoffLocked() {
1789
2150
  //
1790
2151
 
1791
2152
  XdsLb::LocalityMap::LocalityEntry::LocalityEntry(
1792
- RefCountedPtr<XdsLb> parent, RefCountedPtr<LocalityName> name,
1793
- uint32_t locality_weight)
1794
- : parent_(std::move(parent)),
1795
- name_(std::move(name)),
1796
- locality_weight_(locality_weight) {
2153
+ RefCountedPtr<XdsLb> parent, RefCountedPtr<XdsLocalityName> name)
2154
+ : parent_(std::move(parent)), name_(std::move(name)) {
1797
2155
  if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_trace)) {
1798
2156
  gpr_log(GPR_INFO, "[xdslb %p] created LocalityEntry %p for %s",
1799
2157
  parent_.get(), this, name_->AsHumanReadableString());
1800
2158
  }
2159
+ GRPC_CLOSURE_INIT(&on_delayed_removal_timer_, OnDelayedRemovalTimerLocked,
2160
+ this, grpc_combiner_scheduler(parent_->combiner()));
1801
2161
  }
1802
2162
 
1803
2163
  XdsLb::LocalityMap::LocalityEntry::~LocalityEntry() {
@@ -1861,13 +2221,18 @@ XdsLb::LocalityMap::LocalityEntry::CreateChildPolicyLocked(
1861
2221
  }
1862
2222
 
1863
2223
  void XdsLb::LocalityMap::LocalityEntry::UpdateLocked(
1864
- xds_grpclb_serverlist* serverlist,
2224
+ uint32_t locality_weight, ServerAddressList serverlist,
1865
2225
  LoadBalancingPolicy::Config* child_policy_config,
1866
2226
  const grpc_channel_args* args_in) {
1867
2227
  if (parent_->shutting_down_) return;
2228
+ // Update locality weight.
2229
+ locality_weight_ = locality_weight;
2230
+ if (delayed_removal_timer_callback_pending_) {
2231
+ grpc_timer_cancel(&delayed_removal_timer_);
2232
+ }
1868
2233
  // Construct update args.
1869
2234
  UpdateArgs update_args;
1870
- update_args.addresses = ProcessServerlist(serverlist);
2235
+ update_args.addresses = std::move(serverlist);
1871
2236
  update_args.config =
1872
2237
  child_policy_config == nullptr ? nullptr : child_policy_config->Ref();
1873
2238
  update_args.args = CreateChildPolicyArgsLocked(args_in);
@@ -1988,7 +2353,10 @@ void XdsLb::LocalityMap::LocalityEntry::ShutdownLocked() {
1988
2353
  }
1989
2354
  // Drop our ref to the child's picker, in case it's holding a ref to
1990
2355
  // the child.
1991
- picker_ref_.reset();
2356
+ picker_wrapper_.reset();
2357
+ if (delayed_removal_timer_callback_pending_) {
2358
+ grpc_timer_cancel(&delayed_removal_timer_);
2359
+ }
1992
2360
  }
1993
2361
 
1994
2362
  void XdsLb::LocalityMap::LocalityEntry::ResetBackoffLocked() {
@@ -2003,6 +2371,36 @@ void XdsLb::LocalityMap::LocalityEntry::Orphan() {
2003
2371
  Unref();
2004
2372
  }
2005
2373
 
2374
+ void XdsLb::LocalityMap::LocalityEntry::DeactivateLocked() {
2375
+ // If locality retaining is disabled, delete the locality immediately.
2376
+ if (parent_->locality_retention_interval_ms_ == 0) {
2377
+ parent_->locality_map_.map_.erase(name_);
2378
+ return;
2379
+ }
2380
+ // If already deactivated, don't do that again.
2381
+ if (locality_weight_ == 0) return;
2382
+ // Set the locality weight to 0 so that future xds picker won't contain this
2383
+ // locality.
2384
+ locality_weight_ = 0;
2385
+ // Start a timer to delete the locality.
2386
+ Ref(DEBUG_LOCATION, "LocalityEntry+timer").release();
2387
+ grpc_timer_init(
2388
+ &delayed_removal_timer_,
2389
+ ExecCtx::Get()->Now() + parent_->locality_retention_interval_ms_,
2390
+ &on_delayed_removal_timer_);
2391
+ delayed_removal_timer_callback_pending_ = true;
2392
+ }
2393
+
2394
+ void XdsLb::LocalityMap::LocalityEntry::OnDelayedRemovalTimerLocked(
2395
+ void* arg, grpc_error* error) {
2396
+ LocalityEntry* self = static_cast<LocalityEntry*>(arg);
2397
+ self->delayed_removal_timer_callback_pending_ = false;
2398
+ if (error == GRPC_ERROR_NONE && self->locality_weight_ == 0) {
2399
+ self->parent_->locality_map_.map_.erase(self->name_);
2400
+ }
2401
+ self->Unref(DEBUG_LOCATION, "LocalityEntry+timer");
2402
+ }
2403
+
2006
2404
  //
2007
2405
  // XdsLb::LocalityEntry::Helper
2008
2406
  //
@@ -2027,15 +2425,6 @@ XdsLb::LocalityMap::LocalityEntry::Helper::CreateSubchannel(
2027
2425
  return entry_->parent_->channel_control_helper()->CreateSubchannel(args);
2028
2426
  }
2029
2427
 
2030
- grpc_channel* XdsLb::LocalityMap::LocalityEntry::Helper::CreateChannel(
2031
- const char* target, const grpc_channel_args& args) {
2032
- if (entry_->parent_->shutting_down_ ||
2033
- (!CalledByPendingChild() && !CalledByCurrentChild())) {
2034
- return nullptr;
2035
- }
2036
- return entry_->parent_->channel_control_helper()->CreateChannel(target, args);
2037
- }
2038
-
2039
2428
  void XdsLb::LocalityMap::LocalityEntry::Helper::UpdateState(
2040
2429
  grpc_connectivity_state state, UniquePtr<SubchannelPicker> picker) {
2041
2430
  if (entry_->parent_->shutting_down_) return;
@@ -2062,81 +2451,14 @@ void XdsLb::LocalityMap::LocalityEntry::Helper::UpdateState(
2062
2451
  entry_->parent_->MaybeCancelFallbackAtStartupChecks();
2063
2452
  entry_->parent_->MaybeExitFallbackMode();
2064
2453
  }
2065
- // If we are in fallback mode, ignore update request from the child policy.
2066
- if (entry_->parent_->fallback_policy_ != nullptr) return;
2067
2454
  GPR_ASSERT(entry_->parent_->lb_chand_ != nullptr);
2068
- RefCountedPtr<XdsLbClientStats> client_stats =
2069
- entry_->parent_->lb_chand_->lb_calld() == nullptr
2070
- ? nullptr
2071
- : entry_->parent_->lb_chand_->lb_calld()->client_stats();
2072
- // Cache the picker and its state in the entry
2073
- entry_->picker_ref_ = MakeRefCounted<PickerRef>(std::move(picker));
2455
+ // Cache the picker and its state in the entry.
2456
+ entry_->picker_wrapper_ = MakeRefCounted<PickerWrapper>(
2457
+ std::move(picker),
2458
+ entry_->parent_->client_stats_.FindLocalityStats(entry_->name_));
2074
2459
  entry_->connectivity_state_ = state;
2075
- // Construct a new xds picker which maintains a map of all locality pickers
2076
- // that are ready. Each locality is represented by a portion of the range
2077
- // proportional to its weight, such that the total range is the sum of the
2078
- // weights of all localities
2079
- uint32_t end = 0;
2080
- size_t num_connecting = 0;
2081
- size_t num_idle = 0;
2082
- size_t num_transient_failures = 0;
2083
- auto& locality_map = this->entry_->parent_->locality_map_.map_;
2084
- Picker::PickerList pickers;
2085
- for (auto& p : locality_map) {
2086
- const LocalityEntry* entry = p.second.get();
2087
- grpc_connectivity_state connectivity_state = entry->connectivity_state_;
2088
- switch (connectivity_state) {
2089
- case GRPC_CHANNEL_READY: {
2090
- end += entry->locality_weight_;
2091
- pickers.push_back(MakePair(end, entry->picker_ref_));
2092
- break;
2093
- }
2094
- case GRPC_CHANNEL_CONNECTING: {
2095
- num_connecting++;
2096
- break;
2097
- }
2098
- case GRPC_CHANNEL_IDLE: {
2099
- num_idle++;
2100
- break;
2101
- }
2102
- case GRPC_CHANNEL_TRANSIENT_FAILURE: {
2103
- num_transient_failures++;
2104
- break;
2105
- }
2106
- default: {
2107
- gpr_log(GPR_ERROR, "Invalid locality connectivity state - %d",
2108
- connectivity_state);
2109
- }
2110
- }
2111
- }
2112
- // Pass on the constructed xds picker if it has any ready pickers in their map
2113
- // otherwise pass a QueuePicker if any of the locality pickers are in a
2114
- // connecting or idle state, finally return a transient failure picker if all
2115
- // locality pickers are in transient failure
2116
- if (pickers.size() > 0) {
2117
- entry_->parent_->channel_control_helper()->UpdateState(
2118
- GRPC_CHANNEL_READY,
2119
- UniquePtr<LoadBalancingPolicy::SubchannelPicker>(
2120
- New<Picker>(std::move(client_stats), std::move(pickers))));
2121
- } else if (num_connecting > 0) {
2122
- entry_->parent_->channel_control_helper()->UpdateState(
2123
- GRPC_CHANNEL_CONNECTING,
2124
- UniquePtr<SubchannelPicker>(New<QueuePicker>(
2125
- this->entry_->parent_->Ref(DEBUG_LOCATION, "QueuePicker"))));
2126
- } else if (num_idle > 0) {
2127
- entry_->parent_->channel_control_helper()->UpdateState(
2128
- GRPC_CHANNEL_IDLE,
2129
- UniquePtr<SubchannelPicker>(New<QueuePicker>(
2130
- this->entry_->parent_->Ref(DEBUG_LOCATION, "QueuePicker"))));
2131
- } else {
2132
- GPR_ASSERT(num_transient_failures == locality_map.size());
2133
- grpc_error* error =
2134
- grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2135
- "connections to all localities failing"),
2136
- GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_UNAVAILABLE);
2137
- entry_->parent_->channel_control_helper()->UpdateState(
2138
- state, UniquePtr<SubchannelPicker>(New<TransientFailurePicker>(error)));
2139
- }
2460
+ // Construct a new xds picker and pass it to the channel.
2461
+ entry_->parent_->locality_map_.UpdateXdsPickerLocked();
2140
2462
  }
2141
2463
 
2142
2464
  void XdsLb::LocalityMap::LocalityEntry::Helper::RequestReresolution() {
@@ -2157,14 +2479,14 @@ void XdsLb::LocalityMap::LocalityEntry::Helper::RequestReresolution() {
2157
2479
  // from the balancer, so we can ignore the re-resolution request from
2158
2480
  // the child policy. Otherwise, pass the re-resolution request up to the
2159
2481
  // channel.
2160
- if (entry_->parent_->lb_chand_->lb_calld() == nullptr ||
2161
- !entry_->parent_->lb_chand_->lb_calld()->seen_initial_response()) {
2482
+ if (entry_->parent_->lb_chand_->eds_calld() == nullptr ||
2483
+ !entry_->parent_->lb_chand_->eds_calld()->seen_response()) {
2162
2484
  entry_->parent_->channel_control_helper()->RequestReresolution();
2163
2485
  }
2164
2486
  }
2165
2487
 
2166
2488
  void XdsLb::LocalityMap::LocalityEntry::Helper::AddTraceEvent(
2167
- TraceSeverity severity, const char* message) {
2489
+ TraceSeverity severity, StringView message) {
2168
2490
  if (entry_->parent_->shutting_down_ ||
2169
2491
  (!CalledByPendingChild() && !CalledByCurrentChild())) {
2170
2492
  return;
@@ -2243,10 +2565,6 @@ class XdsFactory : public LoadBalancingPolicyFactory {
2243
2565
  }
2244
2566
  }
2245
2567
  }
2246
- if (balancer_name == nullptr) {
2247
- error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
2248
- "field:balancerName error:not found"));
2249
- }
2250
2568
  if (error_list.empty()) {
2251
2569
  return RefCountedPtr<LoadBalancingPolicy::Config>(New<ParsedXdsConfig>(
2252
2570
  balancer_name, std::move(child_policy), std::move(fallback_policy)));