grpc 1.23.0 → 1.24.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +765 -485
  3. data/include/grpc/grpc.h +3 -1
  4. data/include/grpc/grpc_security.h +20 -4
  5. data/include/grpc/impl/codegen/grpc_types.h +6 -5
  6. data/include/grpc/impl/codegen/port_platform.h +25 -0
  7. data/src/core/ext/filters/client_channel/backend_metric.cc +78 -0
  8. data/src/core/ext/filters/client_channel/backend_metric.h +36 -0
  9. data/src/core/ext/filters/client_channel/channel_connectivity.cc +16 -2
  10. data/src/core/ext/filters/client_channel/client_channel.cc +325 -267
  11. data/src/core/ext/filters/client_channel/client_channel_factory.h +0 -4
  12. data/src/core/ext/filters/client_channel/health/health_check_client.cc +23 -32
  13. data/src/core/ext/filters/client_channel/http_proxy.cc +7 -3
  14. data/src/core/ext/filters/client_channel/lb_policy.cc +1 -1
  15. data/src/core/ext/filters/client_channel/lb_policy.h +58 -34
  16. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +46 -50
  17. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel.h +9 -2
  18. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel_secure.cc +35 -17
  19. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +130 -215
  20. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +34 -21
  21. data/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc +1120 -802
  22. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel.h +8 -2
  23. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_secure.cc +33 -12
  24. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.cc +151 -40
  25. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_client_stats.h +184 -26
  26. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.cc +389 -245
  27. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_load_balancer_api.h +98 -60
  28. data/src/core/ext/filters/client_channel/lb_policy_registry.cc +6 -1
  29. data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +89 -0
  30. data/src/core/ext/filters/client_channel/resolving_lb_policy.cc +3 -8
  31. data/src/core/ext/filters/client_channel/server_address.cc +1 -3
  32. data/src/core/ext/filters/client_channel/server_address.h +1 -1
  33. data/src/core/ext/filters/client_channel/subchannel.h +2 -1
  34. data/src/core/ext/filters/client_idle/client_idle_filter.cc +207 -29
  35. data/src/core/ext/filters/http/client/http_client_filter.cc +10 -8
  36. data/src/core/ext/filters/http/client_authority_filter.cc +1 -1
  37. data/src/core/ext/filters/http/message_compress/message_compress_filter.cc +10 -7
  38. data/src/core/ext/filters/http/server/http_server_filter.cc +52 -26
  39. data/src/core/ext/transport/chttp2/client/insecure/channel_create.cc +23 -20
  40. data/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc +24 -21
  41. data/src/core/ext/transport/chttp2/server/chttp2_server.cc +1 -1
  42. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +37 -24
  43. data/src/core/ext/transport/chttp2/transport/chttp2_transport.h +1 -0
  44. data/src/core/ext/transport/chttp2/transport/hpack_encoder.cc +237 -191
  45. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +29 -27
  46. data/src/core/ext/transport/chttp2/transport/hpack_parser.h +1 -1
  47. data/src/core/ext/transport/chttp2/transport/hpack_table.cc +19 -4
  48. data/src/core/ext/transport/chttp2/transport/hpack_table.h +13 -4
  49. data/src/core/ext/transport/chttp2/transport/incoming_metadata.cc +2 -1
  50. data/src/core/ext/transport/chttp2/transport/internal.h +0 -2
  51. data/src/core/ext/transport/chttp2/transport/parsing.cc +99 -71
  52. data/src/core/ext/upb-generated/envoy/api/v2/auth/cert.upb.c +222 -0
  53. data/src/core/ext/upb-generated/envoy/api/v2/auth/cert.upb.h +818 -0
  54. data/src/core/ext/upb-generated/envoy/api/v2/cds.upb.c +314 -0
  55. data/src/core/ext/upb-generated/envoy/api/v2/cds.upb.h +1142 -0
  56. data/src/core/ext/upb-generated/envoy/api/v2/cluster/circuit_breaker.upb.c +53 -0
  57. data/src/core/ext/upb-generated/envoy/api/v2/cluster/circuit_breaker.upb.h +158 -0
  58. data/src/core/ext/upb-generated/envoy/api/v2/cluster/filter.upb.c +34 -0
  59. data/src/core/ext/upb-generated/envoy/api/v2/cluster/filter.upb.h +69 -0
  60. data/src/core/ext/upb-generated/envoy/api/v2/cluster/outlier_detection.upb.c +49 -0
  61. data/src/core/ext/upb-generated/envoy/api/v2/cluster/outlier_detection.upb.h +240 -0
  62. data/src/core/ext/upb-generated/envoy/api/v2/core/address.upb.c +110 -0
  63. data/src/core/ext/upb-generated/envoy/api/v2/core/address.upb.h +324 -0
  64. data/src/core/ext/upb-generated/envoy/api/v2/core/base.upb.c +235 -0
  65. data/src/core/ext/upb-generated/envoy/api/v2/core/base.upb.h +661 -0
  66. data/src/core/ext/upb-generated/envoy/api/v2/core/config_source.upb.c +84 -0
  67. data/src/core/ext/upb-generated/envoy/api/v2/core/config_source.upb.h +274 -0
  68. data/src/core/ext/upb-generated/envoy/api/v2/core/grpc_service.upb.c +175 -0
  69. data/src/core/ext/upb-generated/envoy/api/v2/core/grpc_service.upb.h +572 -0
  70. data/src/core/ext/upb-generated/envoy/api/v2/core/health_check.upb.c +150 -0
  71. data/src/core/ext/upb-generated/envoy/api/v2/core/health_check.upb.h +596 -0
  72. data/src/core/ext/upb-generated/envoy/api/v2/core/http_uri.upb.c +35 -0
  73. data/src/core/ext/upb-generated/envoy/api/v2/core/http_uri.upb.h +80 -0
  74. data/src/core/ext/upb-generated/envoy/api/v2/core/protocol.upb.c +95 -0
  75. data/src/core/ext/upb-generated/envoy/api/v2/core/protocol.upb.h +308 -0
  76. data/src/core/ext/upb-generated/envoy/api/v2/discovery.upb.c +128 -0
  77. data/src/core/ext/upb-generated/envoy/api/v2/discovery.upb.h +392 -0
  78. data/src/core/ext/upb-generated/envoy/api/v2/eds.upb.c +91 -0
  79. data/src/core/ext/upb-generated/envoy/api/v2/eds.upb.h +236 -0
  80. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/endpoint.upb.c +88 -0
  81. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/endpoint.upb.h +258 -0
  82. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/load_report.upb.c +111 -0
  83. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/load_report.upb.h +324 -0
  84. data/src/core/ext/upb-generated/envoy/service/discovery/v2/ads.upb.c +23 -0
  85. data/src/core/ext/upb-generated/envoy/service/discovery/v2/ads.upb.h +50 -0
  86. data/src/core/ext/upb-generated/envoy/service/load_stats/v2/lrs.upb.c +52 -0
  87. data/src/core/ext/upb-generated/envoy/service/load_stats/v2/lrs.upb.h +130 -0
  88. data/src/core/ext/upb-generated/envoy/type/percent.upb.c +39 -0
  89. data/src/core/ext/upb-generated/envoy/type/percent.upb.h +87 -0
  90. data/src/core/ext/upb-generated/envoy/type/range.upb.c +39 -0
  91. data/src/core/ext/upb-generated/envoy/type/range.upb.h +85 -0
  92. data/src/core/ext/upb-generated/gogoproto/gogo.upb.c +17 -0
  93. data/src/core/ext/upb-generated/gogoproto/gogo.upb.h +30 -0
  94. data/src/core/ext/upb-generated/google/api/annotations.upb.c +18 -0
  95. data/src/core/ext/upb-generated/google/api/annotations.upb.h +30 -0
  96. data/src/core/ext/upb-generated/google/api/http.upb.c +66 -0
  97. data/src/core/ext/upb-generated/google/api/http.upb.h +190 -0
  98. data/src/core/ext/upb-generated/google/protobuf/any.upb.c +27 -0
  99. data/src/core/ext/upb-generated/google/protobuf/any.upb.h +58 -0
  100. data/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c +485 -0
  101. data/src/core/ext/upb-generated/google/protobuf/descriptor.upb.h +1690 -0
  102. data/src/core/ext/upb-generated/google/protobuf/duration.upb.c +27 -0
  103. data/src/core/ext/upb-generated/google/protobuf/duration.upb.h +58 -0
  104. data/src/core/ext/upb-generated/google/protobuf/empty.upb.c +22 -0
  105. data/src/core/ext/upb-generated/google/protobuf/empty.upb.h +50 -0
  106. data/src/core/ext/upb-generated/google/protobuf/struct.upb.c +79 -0
  107. data/src/core/ext/upb-generated/google/protobuf/struct.upb.h +215 -0
  108. data/src/core/ext/upb-generated/google/protobuf/timestamp.upb.c +27 -0
  109. data/src/core/ext/upb-generated/google/protobuf/timestamp.upb.h +58 -0
  110. data/src/core/ext/upb-generated/google/protobuf/wrappers.upb.c +106 -0
  111. data/src/core/ext/upb-generated/google/protobuf/wrappers.upb.h +238 -0
  112. data/src/core/ext/upb-generated/google/rpc/status.upb.c +33 -0
  113. data/src/core/ext/upb-generated/google/rpc/status.upb.h +74 -0
  114. data/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.c +49 -0
  115. data/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.h +126 -0
  116. data/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.c +209 -0
  117. data/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.h +681 -0
  118. data/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.c +42 -0
  119. data/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.h +109 -0
  120. data/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.c +36 -0
  121. data/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.h +84 -0
  122. data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.c +133 -0
  123. data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.h +359 -0
  124. data/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.c +58 -0
  125. data/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.h +144 -0
  126. data/src/core/ext/upb-generated/validate/validate.upb.c +443 -0
  127. data/src/core/ext/upb-generated/validate/validate.upb.h +2037 -0
  128. data/src/core/lib/channel/channel_args.cc +21 -0
  129. data/src/core/lib/channel/channel_args.h +16 -2
  130. data/src/core/lib/channel/channel_stack.h +2 -1
  131. data/src/core/lib/channel/channelz.cc +54 -56
  132. data/src/core/lib/channel/channelz.h +29 -12
  133. data/src/core/lib/compression/compression.cc +2 -1
  134. data/src/core/lib/compression/compression_internal.h +8 -0
  135. data/src/core/lib/gpr/log_linux.cc +2 -2
  136. data/src/core/lib/gpr/log_posix.cc +2 -2
  137. data/src/core/lib/gpr/time_precise.cc +123 -36
  138. data/src/core/lib/gpr/time_precise.h +37 -0
  139. data/src/core/lib/gprpp/abstract.h +10 -0
  140. data/src/core/lib/gprpp/atomic.h +4 -0
  141. data/src/core/lib/gprpp/inlined_vector.h +20 -4
  142. data/src/core/lib/gprpp/map.h +109 -6
  143. data/src/core/lib/gprpp/memory.h +6 -0
  144. data/src/core/lib/gprpp/ref_counted_ptr.h +2 -0
  145. data/src/core/lib/iomgr/ev_epollex_linux.cc +29 -54
  146. data/src/core/lib/iomgr/exec_ctx.cc +27 -17
  147. data/src/core/lib/iomgr/exec_ctx.h +3 -0
  148. data/src/core/lib/iomgr/sockaddr_utils.cc +1 -3
  149. data/src/core/lib/iomgr/tcp_posix.cc +16 -25
  150. data/src/core/lib/iomgr/tcp_server_custom.cc +1 -1
  151. data/src/core/lib/iomgr/timer_manager.cc +8 -1
  152. data/src/core/lib/iomgr/timer_manager.h +2 -0
  153. data/src/core/lib/security/credentials/credentials.h +8 -2
  154. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.cc +23 -0
  155. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.h +3 -0
  156. data/src/core/lib/security/credentials/tls/spiffe_credentials.cc +3 -3
  157. data/src/core/lib/security/security_connector/ssl_utils.cc +1 -12
  158. data/src/core/lib/security/security_connector/ssl_utils.h +10 -6
  159. data/src/core/lib/security/security_connector/ssl_utils_config.cc +32 -0
  160. data/src/core/lib/security/security_connector/ssl_utils_config.h +30 -0
  161. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.cc +161 -49
  162. data/src/core/lib/security/security_connector/tls/spiffe_security_connector.h +34 -1
  163. data/src/core/lib/slice/slice_intern.cc +17 -9
  164. data/src/core/lib/slice/slice_internal.h +34 -7
  165. data/src/core/lib/slice/slice_utils.h +7 -3
  166. data/src/core/lib/surface/call.cc +97 -57
  167. data/src/core/lib/surface/channel.cc +2 -2
  168. data/src/core/lib/surface/completion_queue.cc +10 -16
  169. data/src/core/lib/surface/init.cc +3 -0
  170. data/src/core/lib/surface/server.cc +11 -14
  171. data/src/core/lib/surface/validate_metadata.cc +4 -0
  172. data/src/core/lib/surface/version.cc +2 -2
  173. data/src/core/lib/transport/metadata.cc +4 -4
  174. data/src/core/lib/transport/metadata_batch.cc +72 -16
  175. data/src/core/lib/transport/metadata_batch.h +38 -0
  176. data/src/core/lib/transport/static_metadata.cc +814 -1023
  177. data/src/core/lib/transport/static_metadata.h +271 -213
  178. data/src/core/lib/transport/transport.h +12 -0
  179. data/src/core/plugin_registry/grpc_plugin_registry.cc +4 -0
  180. data/src/core/tsi/alts/handshaker/alts_handshaker_client.cc +104 -76
  181. data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc +34 -16
  182. data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h +2 -2
  183. data/src/core/tsi/alts/handshaker/alts_tsi_utils.cc +10 -6
  184. data/src/core/tsi/alts/handshaker/alts_tsi_utils.h +4 -3
  185. data/src/core/tsi/alts/handshaker/transport_security_common_api.cc +74 -48
  186. data/src/core/tsi/alts/handshaker/transport_security_common_api.h +34 -26
  187. data/src/core/tsi/ssl_transport_security.cc +14 -6
  188. data/src/core/tsi/ssl_transport_security.h +4 -0
  189. data/src/ruby/ext/grpc/ext-export.clang +1 -0
  190. data/src/ruby/ext/grpc/ext-export.gcc +6 -0
  191. data/src/ruby/ext/grpc/extconf.rb +5 -0
  192. data/src/ruby/ext/grpc/rb_enable_cpp.cc +22 -0
  193. data/src/ruby/ext/grpc/rb_grpc.c +1 -42
  194. data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +4 -0
  195. data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +6 -0
  196. data/src/ruby/lib/grpc.rb +2 -0
  197. data/src/ruby/lib/grpc/core/status_codes.rb +135 -0
  198. data/src/ruby/lib/grpc/errors.rb +4 -7
  199. data/src/ruby/lib/grpc/google_rpc_status_utils.rb +9 -4
  200. data/src/ruby/lib/grpc/structs.rb +15 -0
  201. data/src/ruby/lib/grpc/version.rb +1 -1
  202. data/src/ruby/spec/errors_spec.rb +1 -0
  203. data/src/ruby/spec/pb/codegen/grpc/testing/package_options_import.proto +22 -0
  204. data/src/ruby/spec/pb/codegen/grpc/testing/package_options_ruby_style.proto +34 -0
  205. data/src/ruby/spec/pb/codegen/package_option_spec.rb +53 -29
  206. data/third_party/upb/upb/decode.c +604 -0
  207. data/third_party/upb/upb/decode.h +21 -0
  208. data/third_party/upb/upb/encode.c +378 -0
  209. data/third_party/upb/upb/encode.h +21 -0
  210. data/third_party/upb/upb/generated_util.h +105 -0
  211. data/third_party/upb/upb/msg.c +111 -0
  212. data/third_party/upb/upb/msg.h +69 -0
  213. data/third_party/upb/upb/port.c +27 -0
  214. data/third_party/upb/upb/port_def.inc +152 -0
  215. data/third_party/upb/upb/port_undef.inc +21 -0
  216. data/third_party/upb/upb/table.c +911 -0
  217. data/third_party/upb/upb/table.int.h +507 -0
  218. data/third_party/upb/upb/upb.c +261 -0
  219. data/third_party/upb/upb/upb.h +364 -0
  220. metadata +129 -50
  221. data/src/core/ext/filters/client_channel/health/health.pb.c +0 -23
  222. data/src/core/ext/filters/client_channel/health/health.pb.h +0 -73
  223. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.c +0 -19
  224. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/duration.pb.h +0 -54
  225. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.c +0 -19
  226. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/google/protobuf/timestamp.pb.h +0 -54
  227. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c +0 -89
  228. data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h +0 -164
  229. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api.cc +0 -520
  230. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api.h +0 -323
  231. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api_util.cc +0 -145
  232. data/src/core/tsi/alts/handshaker/alts_handshaker_service_api_util.h +0 -149
  233. data/src/core/tsi/alts/handshaker/altscontext.pb.c +0 -47
  234. data/src/core/tsi/alts/handshaker/altscontext.pb.h +0 -63
  235. data/src/core/tsi/alts/handshaker/handshaker.pb.c +0 -122
  236. data/src/core/tsi/alts/handshaker/handshaker.pb.h +0 -254
  237. data/src/core/tsi/alts/handshaker/transport_security_common.pb.c +0 -49
  238. data/src/core/tsi/alts/handshaker/transport_security_common.pb.h +0 -78
  239. data/third_party/nanopb/pb.h +0 -579
  240. data/third_party/nanopb/pb_common.c +0 -97
  241. data/third_party/nanopb/pb_common.h +0 -42
  242. data/third_party/nanopb/pb_decode.c +0 -1347
  243. data/third_party/nanopb/pb_decode.h +0 -149
  244. data/third_party/nanopb/pb_encode.c +0 -696
  245. data/third_party/nanopb/pb_encode.h +0 -154
@@ -224,7 +224,9 @@ GRPCAPI grpc_call* grpc_channel_create_call(
224
224
  GRPCAPI void grpc_channel_ping(grpc_channel* channel, grpc_completion_queue* cq,
225
225
  void* tag, void* reserved);
226
226
 
227
- /** Pre-register a method/host pair on a channel. */
227
+ /** Pre-register a method/host pair on a channel.
228
+ method and host are not owned and must remain alive while the server is
229
+ running. */
228
230
  GRPCAPI void* grpc_channel_register_call(grpc_channel* channel,
229
231
  const char* method, const char* host,
230
232
  void* reserved);
@@ -778,6 +778,21 @@ GRPCAPI int grpc_tls_key_materials_config_set_key_materials(
778
778
  const grpc_ssl_pem_key_cert_pair** pem_key_cert_pairs,
779
779
  size_t num_key_cert_pairs);
780
780
 
781
+ /** Set grpc_tls_key_materials_config instance with a provided version number,
782
+ which is used to keep track of the version of key materials.
783
+ It returns 1 on success and 0 on failure. It is used for
784
+ experimental purpose for now and subject to change.
785
+ */
786
+ GRPCAPI int grpc_tls_key_materials_config_set_version(
787
+ grpc_tls_key_materials_config* config, int version);
788
+
789
+ /** Get the version number of a grpc_tls_key_materials_config instance.
790
+ It returns the version number on success and -1 on failure.
791
+ It is used for experimental purpose for now and subject to change.
792
+ */
793
+ GRPCAPI int grpc_tls_key_materials_config_get_version(
794
+ grpc_tls_key_materials_config* config);
795
+
781
796
  /** --- TLS credential reload config. ---
782
797
  It is used for experimental purpose for now and subject to change.*/
783
798
 
@@ -793,10 +808,11 @@ typedef void (*grpc_tls_on_credential_reload_done_cb)(
793
808
  /** A struct containing all information necessary to schedule/cancel
794
809
  a credential reload request. cb and cb_user_data represent a gRPC-provided
795
810
  callback and an argument passed to it. key_materials is an in/output
796
- parameter containing currently used/newly reloaded credentials. status and
797
- error_details are used to hold information about errors occurred when a
798
- credential reload request is scheduled/cancelled. It is used for
799
- experimental purpose for now and subject to change. */
811
+ parameter containing currently used/newly reloaded credentials. If
812
+ credential reload does not result in a new credential, key_materials should
813
+ not be modified. status and error_details are used to hold information about
814
+ errors occurred when a credential reload request is scheduled/cancelled. It
815
+ is used for experimental purpose for now and subject to change. */
800
816
  struct grpc_tls_credential_reload_arg {
801
817
  grpc_tls_on_credential_reload_done_cb cb;
802
818
  void* cb_user_data;
@@ -169,11 +169,7 @@ typedef struct {
169
169
  #define GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS "grpc.max_connection_age_grace_ms"
170
170
  /** Timeout after the last RPC finishes on the client channel at which the
171
171
  * channel goes back into IDLE state. Int valued, milliseconds. INT_MAX means
172
- * unlimited. */
173
- /** TODO(qianchengz): Currently the default value is INT_MAX, which means the
174
- * client idle filter is disabled by default. After the client idle filter
175
- * proves no perfomance issue, we will change the default value to a reasonable
176
- * value. */
172
+ * unlimited. The default value is 30 minutes and the min value is 1 second. */
177
173
  #define GRPC_ARG_CLIENT_IDLE_TIMEOUT_MS "grpc.client_idle_timeout_ms"
178
174
  /** Enable/disable support for per-message compression. Defaults to 1, unless
179
175
  GRPC_ARG_MINIMAL_STACK is enabled, in which case it defaults to 0. */
@@ -330,6 +326,11 @@ typedef struct {
330
326
  balancer before using fallback backend addresses from the resolver.
331
327
  If 0, enter fallback mode immediately. Default value is 10000. */
332
328
  #define GRPC_ARG_XDS_FALLBACK_TIMEOUT_MS "grpc.xds_fallback_timeout_ms"
329
+ /* Time in milliseconds to wait before a locality is deleted after it's removed
330
+ from the received EDS update. If 0, delete the locality immediately. Default
331
+ value is 15 minutes. */
332
+ #define GRPC_ARG_LOCALITY_RETENTION_INTERVAL_MS \
333
+ "grpc.xds_locality_retention_interval_ms"
333
334
  /** If non-zero, grpc server's cronet compression workaround will be enabled */
334
335
  #define GRPC_ARG_WORKAROUND_CRONET_COMPRESSION \
335
336
  "grpc.workaround.cronet_compression"
@@ -27,6 +27,14 @@
27
27
  * - some syscalls to be made directly
28
28
  */
29
29
 
30
+ /*
31
+ * Defines GRPC_USE_CPP_STD_LIB to use standard C++ library instead of
32
+ * in-house library if possible. (e.g. std::map)
33
+ */
34
+ #ifndef GRPC_USE_CPP_STD_LIB
35
+ #define GRPC_USE_CPP_STD_LIB 1
36
+ #endif
37
+
30
38
  /* Get windows.h included everywhere (we need it) */
31
39
  #if defined(_WIN64) || defined(WIN64) || defined(_WIN32) || defined(WIN32)
32
40
  #ifndef WIN32_LEAN_AND_MEAN
@@ -463,6 +471,23 @@ typedef unsigned __int64 uint64_t;
463
471
  #include <stdint.h>
464
472
  #endif /* _MSC_VER */
465
473
 
474
+ /* Type of cycle clock implementation */
475
+ #ifdef GPR_LINUX
476
+ /* Disable cycle clock by default.
477
+ TODO(soheil): enable when we support fallback for unstable cycle clocks.
478
+ #if defined(__i386__)
479
+ #define GPR_CYCLE_COUNTER_RDTSC_32 1
480
+ #elif defined(__x86_64__) || defined(__amd64__)
481
+ #define GPR_CYCLE_COUNTER_RDTSC_64 1
482
+ #else
483
+ #define GPR_CYCLE_COUNTER_FALLBACK 1
484
+ #endif
485
+ */
486
+ #define GPR_CYCLE_COUNTER_FALLBACK 1
487
+ #else
488
+ #define GPR_CYCLE_COUNTER_FALLBACK 1
489
+ #endif /* GPR_LINUX */
490
+
466
491
  /* Cache line alignment */
467
492
  #ifndef GPR_CACHELINE_SIZE_LOG
468
493
  #if defined(__i386__) || defined(__x86_64__)
@@ -0,0 +1,78 @@
1
+ //
2
+ // Copyright 2019 gRPC authors.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ //
16
+
17
+ #include <grpc/support/port_platform.h>
18
+
19
+ #include "src/core/ext/filters/client_channel/backend_metric.h"
20
+
21
+ #include "src/core/lib/gprpp/string_view.h"
22
+ #include "udpa/data/orca/v1/orca_load_report.upb.h"
23
+
24
+ namespace grpc_core {
25
+
26
+ namespace {
27
+
28
+ template <typename EntryType>
29
+ Map<StringView, double, StringLess> ParseMap(
30
+ udpa_data_orca_v1_OrcaLoadReport* msg,
31
+ EntryType** (*entry_func)(udpa_data_orca_v1_OrcaLoadReport*, size_t*),
32
+ upb_strview (*key_func)(const EntryType*),
33
+ double (*value_func)(const EntryType*), Arena* arena) {
34
+ Map<StringView, double, StringLess> result;
35
+ size_t size;
36
+ const auto* const* entries = entry_func(msg, &size);
37
+ for (size_t i = 0; i < size; ++i) {
38
+ upb_strview key_view = key_func(entries[i]);
39
+ char* key = static_cast<char*>(arena->Alloc(key_view.size + 1));
40
+ memcpy(key, key_view.data, key_view.size);
41
+ result[StringView(key, key_view.size)] = value_func(entries[i]);
42
+ }
43
+ return result;
44
+ }
45
+
46
+ } // namespace
47
+
48
+ const LoadBalancingPolicy::BackendMetricData* ParseBackendMetricData(
49
+ const grpc_slice& serialized_load_report, Arena* arena) {
50
+ upb::Arena upb_arena;
51
+ udpa_data_orca_v1_OrcaLoadReport* msg =
52
+ udpa_data_orca_v1_OrcaLoadReport_parse(
53
+ reinterpret_cast<const char*>(
54
+ GRPC_SLICE_START_PTR(serialized_load_report)),
55
+ GRPC_SLICE_LENGTH(serialized_load_report), upb_arena.ptr());
56
+ if (msg == nullptr) return nullptr;
57
+ LoadBalancingPolicy::BackendMetricData* backend_metric_data =
58
+ arena->New<LoadBalancingPolicy::BackendMetricData>();
59
+ backend_metric_data->cpu_utilization =
60
+ udpa_data_orca_v1_OrcaLoadReport_cpu_utilization(msg);
61
+ backend_metric_data->mem_utilization =
62
+ udpa_data_orca_v1_OrcaLoadReport_mem_utilization(msg);
63
+ backend_metric_data->requests_per_second =
64
+ udpa_data_orca_v1_OrcaLoadReport_rps(msg);
65
+ backend_metric_data->request_cost =
66
+ ParseMap<udpa_data_orca_v1_OrcaLoadReport_RequestCostEntry>(
67
+ msg, udpa_data_orca_v1_OrcaLoadReport_mutable_request_cost,
68
+ udpa_data_orca_v1_OrcaLoadReport_RequestCostEntry_key,
69
+ udpa_data_orca_v1_OrcaLoadReport_RequestCostEntry_value, arena);
70
+ backend_metric_data->utilization =
71
+ ParseMap<udpa_data_orca_v1_OrcaLoadReport_UtilizationEntry>(
72
+ msg, udpa_data_orca_v1_OrcaLoadReport_mutable_utilization,
73
+ udpa_data_orca_v1_OrcaLoadReport_UtilizationEntry_key,
74
+ udpa_data_orca_v1_OrcaLoadReport_UtilizationEntry_value, arena);
75
+ return backend_metric_data;
76
+ }
77
+
78
+ } // namespace grpc_core
@@ -0,0 +1,36 @@
1
+ //
2
+ // Copyright 2019 gRPC authors.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ //
16
+
17
+ #ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_BACKEND_METRIC_H
18
+ #define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_BACKEND_METRIC_H
19
+
20
+ #include <grpc/support/port_platform.h>
21
+
22
+ #include <grpc/slice.h>
23
+
24
+ #include "src/core/ext/filters/client_channel/lb_policy.h"
25
+ #include "src/core/lib/gprpp/arena.h"
26
+
27
+ namespace grpc_core {
28
+
29
+ // Parses the serialized load report and allocates a BackendMetricData
30
+ // object on the arena.
31
+ const LoadBalancingPolicy::BackendMetricData* ParseBackendMetricData(
32
+ const grpc_slice& serialized_load_report, Arena* arena);
33
+
34
+ } // namespace grpc_core
35
+
36
+ #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_BACKEND_METRIC_H */
@@ -111,6 +111,12 @@ static void finished_completion(void* pw, grpc_cq_completion* ignored) {
111
111
 
112
112
  static void partly_done(state_watcher* w, bool due_to_completion,
113
113
  grpc_error* error) {
114
+ bool end_op = false;
115
+ void* end_op_tag = nullptr;
116
+ grpc_error* end_op_error = nullptr;
117
+ grpc_completion_queue* end_op_cq = nullptr;
118
+ grpc_cq_completion* end_op_completion_storage = nullptr;
119
+
114
120
  if (due_to_completion) {
115
121
  grpc_timer_cancel(&w->alarm);
116
122
  } else {
@@ -152,8 +158,11 @@ static void partly_done(state_watcher* w, bool due_to_completion,
152
158
  w->error = error;
153
159
  }
154
160
  w->phase = CALLING_BACK_AND_FINISHED;
155
- grpc_cq_end_op(w->cq, w->tag, w->error, finished_completion, w,
156
- &w->completion_storage);
161
+ end_op = true;
162
+ end_op_cq = w->cq;
163
+ end_op_tag = w->tag;
164
+ end_op_error = w->error;
165
+ end_op_completion_storage = &w->completion_storage;
157
166
  break;
158
167
  case CALLING_BACK_AND_FINISHED:
159
168
  GPR_UNREACHABLE_CODE(return );
@@ -161,6 +170,11 @@ static void partly_done(state_watcher* w, bool due_to_completion,
161
170
  }
162
171
  gpr_mu_unlock(&w->mu);
163
172
 
173
+ if (end_op) {
174
+ grpc_cq_end_op(end_op_cq, end_op_tag, end_op_error, finished_completion, w,
175
+ end_op_completion_storage);
176
+ }
177
+
164
178
  GRPC_ERROR_UNREF(error);
165
179
  }
166
180
 
@@ -31,6 +31,7 @@
31
31
  #include <grpc/support/string_util.h>
32
32
  #include <grpc/support/sync.h>
33
33
 
34
+ #include "src/core/ext/filters/client_channel/backend_metric.h"
34
35
  #include "src/core/ext/filters/client_channel/backup_poller.h"
35
36
  #include "src/core/ext/filters/client_channel/global_subchannel_pool.h"
36
37
  #include "src/core/ext/filters/client_channel/http_connect_handshaker.h"
@@ -129,7 +130,7 @@ class ChannelData {
129
130
  return disconnect_error_.Load(MemoryOrder::ACQUIRE);
130
131
  }
131
132
 
132
- grpc_combiner* data_plane_combiner() const { return data_plane_combiner_; }
133
+ Mutex* data_plane_mu() const { return &data_plane_mu_; }
133
134
 
134
135
  LoadBalancingPolicy::SubchannelPicker* picker() const {
135
136
  return picker_.get();
@@ -165,8 +166,6 @@ class ChannelData {
165
166
 
166
167
  private:
167
168
  class SubchannelWrapper;
168
- class ConnectivityStateAndPickerSetter;
169
- class ServiceConfigSetter;
170
169
  class ClientChannelControlHelper;
171
170
 
172
171
  class ExternalConnectivityWatcher {
@@ -213,6 +212,14 @@ class ChannelData {
213
212
  ChannelData(grpc_channel_element_args* args, grpc_error** error);
214
213
  ~ChannelData();
215
214
 
215
+ void UpdateStateAndPickerLocked(
216
+ grpc_connectivity_state state, const char* reason,
217
+ UniquePtr<LoadBalancingPolicy::SubchannelPicker> picker);
218
+
219
+ void UpdateServiceConfigLocked(
220
+ RefCountedPtr<ServerRetryThrottleData> retry_throttle_data,
221
+ RefCountedPtr<ServiceConfig> service_config);
222
+
216
223
  void CreateResolvingLoadBalancingPolicyLocked();
217
224
 
218
225
  void DestroyResolvingLoadBalancingPolicyLocked();
@@ -249,9 +256,9 @@ class ChannelData {
249
256
  channelz::ChannelNode* channelz_node_;
250
257
 
251
258
  //
252
- // Fields used in the data plane. Guarded by data_plane_combiner.
259
+ // Fields used in the data plane. Guarded by data_plane_mu.
253
260
  //
254
- grpc_combiner* data_plane_combiner_;
261
+ mutable Mutex data_plane_mu_;
255
262
  UniquePtr<LoadBalancingPolicy::SubchannelPicker> picker_;
256
263
  QueuedPick* queued_picks_ = nullptr; // Linked list of queued picks.
257
264
  // Data from service config.
@@ -273,15 +280,21 @@ class ChannelData {
273
280
  bool received_first_resolver_result_ = false;
274
281
  // The number of SubchannelWrapper instances referencing a given Subchannel.
275
282
  Map<Subchannel*, int> subchannel_refcount_map_;
283
+ // The set of SubchannelWrappers that currently exist.
284
+ // No need to hold a ref, since the map is updated in the control-plane
285
+ // combiner when the SubchannelWrappers are created and destroyed.
286
+ // TODO(roth): We really want to use a set here, not a map. Since we don't
287
+ // currently have a set implementation, we use a map and ignore the value.
288
+ Map<SubchannelWrapper*, bool> subchannel_wrappers_;
276
289
  // Pending ConnectedSubchannel updates for each SubchannelWrapper.
277
290
  // Updates are queued here in the control plane combiner and then applied
278
- // in the data plane combiner when the picker is updated.
291
+ // in the data plane mutex when the picker is updated.
279
292
  Map<RefCountedPtr<SubchannelWrapper>, RefCountedPtr<ConnectedSubchannel>,
280
293
  RefCountedPtrLess<SubchannelWrapper>>
281
294
  pending_subchannel_updates_;
282
295
 
283
296
  //
284
- // Fields accessed from both data plane and control plane combiners.
297
+ // Fields accessed from both data plane mutex and control plane combiner.
285
298
  //
286
299
  Atomic<grpc_error*> disconnect_error_;
287
300
 
@@ -315,7 +328,16 @@ class CallData {
315
328
  void MaybeApplyServiceConfigToCallLocked(grpc_call_element* elem);
316
329
 
317
330
  // Invoked by channel for queued picks when the picker is updated.
318
- static void StartPickLocked(void* arg, grpc_error* error);
331
+ static void PickSubchannel(void* arg, grpc_error* error);
332
+
333
+ // Helper function for performing a pick while holding the data plane
334
+ // mutex. Returns true if the pick is complete, in which case the caller
335
+ // must invoke PickDone() or AsyncPickDone() with the returned error.
336
+ bool PickSubchannelLocked(grpc_call_element* elem, grpc_error** error);
337
+
338
+ // Schedules a callback to process the completed pick. The callback
339
+ // will not run until after this method returns.
340
+ void AsyncPickDone(grpc_call_element* elem, grpc_error* error);
319
341
 
320
342
  private:
321
343
  class QueuedPickCanceller;
@@ -374,6 +396,19 @@ class CallData {
374
396
 
375
397
  void* Alloc(size_t size) override { return calld_->arena_->Alloc(size); }
376
398
 
399
+ const LoadBalancingPolicy::BackendMetricData* GetBackendMetricData()
400
+ override {
401
+ if (calld_->backend_metric_data_ == nullptr) {
402
+ grpc_linked_mdelem* md = calld_->recv_trailing_metadata_->idx.named
403
+ .x_endpoint_load_metrics_bin;
404
+ if (md != nullptr) {
405
+ calld_->backend_metric_data_ =
406
+ ParseBackendMetricData(GRPC_MDVALUE(md->md), calld_->arena_);
407
+ }
408
+ }
409
+ return calld_->backend_metric_data_;
410
+ }
411
+
377
412
  private:
378
413
  CallData* calld_;
379
414
  };
@@ -685,7 +720,7 @@ class CallData {
685
720
  grpc_deadline_state deadline_state_;
686
721
 
687
722
  grpc_slice path_; // Request path.
688
- gpr_timespec call_start_time_;
723
+ gpr_cycle_counter call_start_time_;
689
724
  grpc_millis deadline_;
690
725
  Arena* arena_;
691
726
  grpc_call_stack* owning_call_;
@@ -706,9 +741,10 @@ class CallData {
706
741
  bool service_config_applied_ = false;
707
742
  QueuedPickCanceller* pick_canceller_ = nullptr;
708
743
  LbCallState lb_call_state_;
744
+ const LoadBalancingPolicy::BackendMetricData* backend_metric_data_ = nullptr;
709
745
  RefCountedPtr<ConnectedSubchannel> connected_subchannel_;
710
746
  void (*lb_recv_trailing_metadata_ready_)(
711
- void* user_data,
747
+ void* user_data, grpc_error* error,
712
748
  LoadBalancingPolicy::MetadataInterface* recv_trailing_metadata,
713
749
  LoadBalancingPolicy::CallState* call_state) = nullptr;
714
750
  void* lb_recv_trailing_metadata_ready_user_data_ = nullptr;
@@ -799,14 +835,14 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
799
835
  GRPC_CHANNEL_STACK_REF(chand_->owning_stack_, "SubchannelWrapper");
800
836
  auto* subchannel_node = subchannel_->channelz_node();
801
837
  if (subchannel_node != nullptr) {
802
- intptr_t subchannel_uuid = subchannel_node->uuid();
803
838
  auto it = chand_->subchannel_refcount_map_.find(subchannel_);
804
839
  if (it == chand_->subchannel_refcount_map_.end()) {
805
- chand_->channelz_node_->AddChildSubchannel(subchannel_uuid);
840
+ chand_->channelz_node_->AddChildSubchannel(subchannel_node->uuid());
806
841
  it = chand_->subchannel_refcount_map_.emplace(subchannel_, 0).first;
807
842
  }
808
843
  ++it->second;
809
844
  }
845
+ chand_->subchannel_wrappers_[this] = true;
810
846
  }
811
847
 
812
848
  ~SubchannelWrapper() {
@@ -815,14 +851,14 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
815
851
  "chand=%p: destroying subchannel wrapper %p for subchannel %p",
816
852
  chand_, this, subchannel_);
817
853
  }
854
+ chand_->subchannel_wrappers_.erase(this);
818
855
  auto* subchannel_node = subchannel_->channelz_node();
819
856
  if (subchannel_node != nullptr) {
820
- intptr_t subchannel_uuid = subchannel_node->uuid();
821
857
  auto it = chand_->subchannel_refcount_map_.find(subchannel_);
822
858
  GPR_ASSERT(it != chand_->subchannel_refcount_map_.end());
823
859
  --it->second;
824
860
  if (it->second == 0) {
825
- chand_->channelz_node_->RemoveChildSubchannel(subchannel_uuid);
861
+ chand_->channelz_node_->RemoveChildSubchannel(subchannel_node->uuid());
826
862
  chand_->subchannel_refcount_map_.erase(it);
827
863
  }
828
864
  }
@@ -844,8 +880,9 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
844
880
  UniquePtr<ConnectivityStateWatcherInterface> watcher) override {
845
881
  auto& watcher_wrapper = watcher_map_[watcher.get()];
846
882
  GPR_ASSERT(watcher_wrapper == nullptr);
847
- watcher_wrapper = New<WatcherWrapper>(
848
- std::move(watcher), Ref(DEBUG_LOCATION, "WatcherWrapper"));
883
+ watcher_wrapper = New<WatcherWrapper>(std::move(watcher),
884
+ Ref(DEBUG_LOCATION, "WatcherWrapper"),
885
+ initial_state);
849
886
  subchannel_->WatchConnectivityState(
850
887
  initial_state,
851
888
  UniquePtr<char>(gpr_strdup(health_check_service_name_.get())),
@@ -870,12 +907,46 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
870
907
  return subchannel_->channel_args();
871
908
  }
872
909
 
910
+ void UpdateHealthCheckServiceName(UniquePtr<char> health_check_service_name) {
911
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) {
912
+ gpr_log(GPR_INFO,
913
+ "chand=%p: subchannel wrapper %p: updating health check service "
914
+ "name from \"%s\" to \"%s\"",
915
+ chand_, this, health_check_service_name_.get(),
916
+ health_check_service_name.get());
917
+ }
918
+ for (auto& p : watcher_map_) {
919
+ WatcherWrapper*& watcher_wrapper = p.second;
920
+ // Cancel the current watcher and create a new one using the new
921
+ // health check service name.
922
+ // TODO(roth): If there is not already an existing health watch
923
+ // call for the new name, then the watcher will initially report
924
+ // state CONNECTING. If the LB policy is currently reporting
925
+ // state READY, this may cause it to switch to CONNECTING before
926
+ // switching back to READY. This could cause a small delay for
927
+ // RPCs being started on the channel. If/when this becomes a
928
+ // problem, we may be able to handle it by waiting for the new
929
+ // watcher to report READY before we use it to replace the old one.
930
+ WatcherWrapper* replacement = watcher_wrapper->MakeReplacement();
931
+ subchannel_->CancelConnectivityStateWatch(
932
+ health_check_service_name_.get(), watcher_wrapper);
933
+ watcher_wrapper = replacement;
934
+ subchannel_->WatchConnectivityState(
935
+ replacement->last_seen_state(),
936
+ UniquePtr<char>(gpr_strdup(health_check_service_name.get())),
937
+ OrphanablePtr<Subchannel::ConnectivityStateWatcherInterface>(
938
+ replacement));
939
+ }
940
+ // Save the new health check service name.
941
+ health_check_service_name_ = std::move(health_check_service_name);
942
+ }
943
+
873
944
  // Caller must be holding the control-plane combiner.
874
945
  ConnectedSubchannel* connected_subchannel() const {
875
946
  return connected_subchannel_.get();
876
947
  }
877
948
 
878
- // Caller must be holding the data-plane combiner.
949
+ // Caller must be holding the data-plane mutex.
879
950
  ConnectedSubchannel* connected_subchannel_in_data_plane() const {
880
951
  return connected_subchannel_in_data_plane_.get();
881
952
  }
@@ -904,8 +975,11 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
904
975
  WatcherWrapper(
905
976
  UniquePtr<SubchannelInterface::ConnectivityStateWatcherInterface>
906
977
  watcher,
907
- RefCountedPtr<SubchannelWrapper> parent)
908
- : watcher_(std::move(watcher)), parent_(std::move(parent)) {}
978
+ RefCountedPtr<SubchannelWrapper> parent,
979
+ grpc_connectivity_state initial_state)
980
+ : watcher_(std::move(watcher)),
981
+ parent_(std::move(parent)),
982
+ last_seen_state_(initial_state) {}
909
983
 
910
984
  ~WatcherWrapper() { parent_.reset(DEBUG_LOCATION, "WatcherWrapper"); }
911
985
 
@@ -928,9 +1002,21 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
928
1002
  }
929
1003
 
930
1004
  grpc_pollset_set* interested_parties() override {
931
- return watcher_->interested_parties();
1005
+ SubchannelInterface::ConnectivityStateWatcherInterface* watcher =
1006
+ watcher_.get();
1007
+ if (watcher_ == nullptr) watcher = replacement_->watcher_.get();
1008
+ return watcher->interested_parties();
932
1009
  }
933
1010
 
1011
+ WatcherWrapper* MakeReplacement() {
1012
+ auto* replacement =
1013
+ New<WatcherWrapper>(std::move(watcher_), parent_, last_seen_state_);
1014
+ replacement_ = replacement;
1015
+ return replacement;
1016
+ }
1017
+
1018
+ grpc_connectivity_state last_seen_state() const { return last_seen_state_; }
1019
+
934
1020
  private:
935
1021
  class Updater {
936
1022
  public:
@@ -954,12 +1040,17 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
954
1040
  gpr_log(GPR_INFO,
955
1041
  "chand=%p: processing connectivity change in combiner "
956
1042
  "for subchannel wrapper %p subchannel %p "
957
- "(connected_subchannel=%p state=%s)",
1043
+ "(connected_subchannel=%p state=%s): watcher=%p",
958
1044
  self->parent_->parent_->chand_, self->parent_->parent_.get(),
959
1045
  self->parent_->parent_->subchannel_,
960
1046
  self->connected_subchannel_.get(),
961
- grpc_connectivity_state_name(self->state_));
1047
+ grpc_connectivity_state_name(self->state_),
1048
+ self->parent_->watcher_.get());
962
1049
  }
1050
+ // Ignore update if the parent WatcherWrapper has been replaced
1051
+ // since this callback was scheduled.
1052
+ if (self->parent_->watcher_ == nullptr) return;
1053
+ self->parent_->last_seen_state_ = self->state_;
963
1054
  self->parent_->parent_->MaybeUpdateConnectedSubchannel(
964
1055
  std::move(self->connected_subchannel_));
965
1056
  self->parent_->watcher_->OnConnectivityStateChange(self->state_);
@@ -974,6 +1065,8 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
974
1065
 
975
1066
  UniquePtr<SubchannelInterface::ConnectivityStateWatcherInterface> watcher_;
976
1067
  RefCountedPtr<SubchannelWrapper> parent_;
1068
+ grpc_connectivity_state last_seen_state_;
1069
+ WatcherWrapper* replacement_ = nullptr;
977
1070
  };
978
1071
 
979
1072
  void MaybeUpdateConnectedSubchannel(
@@ -981,7 +1074,7 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
981
1074
  // Update the connected subchannel only if the channel is not shutting
982
1075
  // down. This is because once the channel is shutting down, we
983
1076
  // ignore picker updates from the LB policy, which means that
984
- // ConnectivityStateAndPickerSetter will never process the entries
1077
+ // UpdateStateAndPickerLocked() will never process the entries
985
1078
  // in chand_->pending_subchannel_updates_. So we don't want to add
986
1079
  // entries there that will never be processed, since that would
987
1080
  // leave dangling refs to the channel and prevent its destruction.
@@ -991,7 +1084,7 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
991
1084
  if (connected_subchannel_ != connected_subchannel) {
992
1085
  connected_subchannel_ = std::move(connected_subchannel);
993
1086
  // Record the new connected subchannel so that it can be updated
994
- // in the data plane combiner the next time the picker is updated.
1087
+ // in the data plane mutex the next time the picker is updated.
995
1088
  chand_->pending_subchannel_updates_[Ref(
996
1089
  DEBUG_LOCATION, "ConnectedSubchannelUpdate")] = connected_subchannel_;
997
1090
  }
@@ -1008,159 +1101,10 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface {
1008
1101
  Map<ConnectivityStateWatcherInterface*, WatcherWrapper*> watcher_map_;
1009
1102
  // To be accessed only in the control plane combiner.
1010
1103
  RefCountedPtr<ConnectedSubchannel> connected_subchannel_;
1011
- // To be accessed only in the data plane combiner.
1104
+ // To be accessed only in the data plane mutex.
1012
1105
  RefCountedPtr<ConnectedSubchannel> connected_subchannel_in_data_plane_;
1013
1106
  };
1014
1107
 
1015
- //
1016
- // ChannelData::ConnectivityStateAndPickerSetter
1017
- //
1018
-
1019
- // A fire-and-forget class that sets the channel's connectivity state
1020
- // and then hops into the data plane combiner to update the picker.
1021
- // Must be instantiated while holding the control plane combiner.
1022
- // Deletes itself when done.
1023
- class ChannelData::ConnectivityStateAndPickerSetter {
1024
- public:
1025
- ConnectivityStateAndPickerSetter(
1026
- ChannelData* chand, grpc_connectivity_state state, const char* reason,
1027
- UniquePtr<LoadBalancingPolicy::SubchannelPicker> picker)
1028
- : chand_(chand), picker_(std::move(picker)) {
1029
- // Clean the control plane when entering IDLE, while holding control plane
1030
- // combiner.
1031
- if (picker_ == nullptr) {
1032
- chand->health_check_service_name_.reset();
1033
- chand->saved_service_config_.reset();
1034
- chand->received_first_resolver_result_ = false;
1035
- }
1036
- // Update connectivity state here, while holding control plane combiner.
1037
- grpc_connectivity_state_set(&chand->state_tracker_, state, reason);
1038
- if (chand->channelz_node_ != nullptr) {
1039
- chand->channelz_node_->SetConnectivityState(state);
1040
- chand->channelz_node_->AddTraceEvent(
1041
- channelz::ChannelTrace::Severity::Info,
1042
- grpc_slice_from_static_string(
1043
- channelz::ChannelNode::GetChannelConnectivityStateChangeString(
1044
- state)));
1045
- }
1046
- // Grab any pending subchannel updates.
1047
- pending_subchannel_updates_ =
1048
- std::move(chand_->pending_subchannel_updates_);
1049
- // Bounce into the data plane combiner to reset the picker.
1050
- GRPC_CHANNEL_STACK_REF(chand->owning_stack_,
1051
- "ConnectivityStateAndPickerSetter");
1052
- GRPC_CLOSURE_INIT(&closure_, SetPickerInDataPlane, this,
1053
- grpc_combiner_scheduler(chand->data_plane_combiner_));
1054
- GRPC_CLOSURE_SCHED(&closure_, GRPC_ERROR_NONE);
1055
- }
1056
-
1057
- private:
1058
- static void SetPickerInDataPlane(void* arg, grpc_error* ignored) {
1059
- auto* self = static_cast<ConnectivityStateAndPickerSetter*>(arg);
1060
- // Handle subchannel updates.
1061
- for (auto& p : self->pending_subchannel_updates_) {
1062
- if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) {
1063
- gpr_log(GPR_INFO,
1064
- "chand=%p: updating subchannel wrapper %p data plane "
1065
- "connected_subchannel to %p",
1066
- self->chand_, p.first.get(), p.second.get());
1067
- }
1068
- p.first->set_connected_subchannel_in_data_plane(std::move(p.second));
1069
- }
1070
- // Swap out the picker. We hang on to the old picker so that it can
1071
- // be deleted in the control-plane combiner, since that's where we need
1072
- // to unref the subchannel wrappers that are reffed by the picker.
1073
- self->picker_.swap(self->chand_->picker_);
1074
- // Clean the data plane if the updated picker is nullptr.
1075
- if (self->chand_->picker_ == nullptr) {
1076
- self->chand_->received_service_config_data_ = false;
1077
- self->chand_->retry_throttle_data_.reset();
1078
- self->chand_->service_config_.reset();
1079
- }
1080
- // Re-process queued picks.
1081
- for (QueuedPick* pick = self->chand_->queued_picks_; pick != nullptr;
1082
- pick = pick->next) {
1083
- CallData::StartPickLocked(pick->elem, GRPC_ERROR_NONE);
1084
- }
1085
- // Pop back into the control plane combiner to delete ourself, so
1086
- // that we make sure to unref subchannel wrappers there. This
1087
- // includes both the ones reffed by the old picker (now stored in
1088
- // self->picker_) and the ones in self->pending_subchannel_updates_.
1089
- GRPC_CLOSURE_INIT(&self->closure_, CleanUpInControlPlane, self,
1090
- grpc_combiner_scheduler(self->chand_->combiner_));
1091
- GRPC_CLOSURE_SCHED(&self->closure_, GRPC_ERROR_NONE);
1092
- }
1093
-
1094
- static void CleanUpInControlPlane(void* arg, grpc_error* ignored) {
1095
- auto* self = static_cast<ConnectivityStateAndPickerSetter*>(arg);
1096
- GRPC_CHANNEL_STACK_UNREF(self->chand_->owning_stack_,
1097
- "ConnectivityStateAndPickerSetter");
1098
- Delete(self);
1099
- }
1100
-
1101
- ChannelData* chand_;
1102
- UniquePtr<LoadBalancingPolicy::SubchannelPicker> picker_;
1103
- Map<RefCountedPtr<SubchannelWrapper>, RefCountedPtr<ConnectedSubchannel>,
1104
- RefCountedPtrLess<SubchannelWrapper>>
1105
- pending_subchannel_updates_;
1106
- grpc_closure closure_;
1107
- };
1108
-
1109
- //
1110
- // ChannelData::ServiceConfigSetter
1111
- //
1112
-
1113
- // A fire-and-forget class that sets the channel's service config data
1114
- // in the data plane combiner. Deletes itself when done.
1115
- class ChannelData::ServiceConfigSetter {
1116
- public:
1117
- ServiceConfigSetter(
1118
- ChannelData* chand,
1119
- Optional<internal::ClientChannelGlobalParsedConfig::RetryThrottling>
1120
- retry_throttle_data,
1121
- RefCountedPtr<ServiceConfig> service_config)
1122
- : chand_(chand),
1123
- retry_throttle_data_(retry_throttle_data),
1124
- service_config_(std::move(service_config)) {
1125
- GRPC_CHANNEL_STACK_REF(chand->owning_stack_, "ServiceConfigSetter");
1126
- GRPC_CLOSURE_INIT(&closure_, SetServiceConfigData, this,
1127
- grpc_combiner_scheduler(chand->data_plane_combiner_));
1128
- GRPC_CLOSURE_SCHED(&closure_, GRPC_ERROR_NONE);
1129
- }
1130
-
1131
- private:
1132
- static void SetServiceConfigData(void* arg, grpc_error* ignored) {
1133
- ServiceConfigSetter* self = static_cast<ServiceConfigSetter*>(arg);
1134
- ChannelData* chand = self->chand_;
1135
- // Update channel state.
1136
- chand->received_service_config_data_ = true;
1137
- if (self->retry_throttle_data_.has_value()) {
1138
- chand->retry_throttle_data_ =
1139
- internal::ServerRetryThrottleMap::GetDataForServer(
1140
- chand->server_name_.get(),
1141
- self->retry_throttle_data_.value().max_milli_tokens,
1142
- self->retry_throttle_data_.value().milli_token_ratio);
1143
- }
1144
- chand->service_config_ = std::move(self->service_config_);
1145
- // Apply service config to queued picks.
1146
- for (QueuedPick* pick = chand->queued_picks_; pick != nullptr;
1147
- pick = pick->next) {
1148
- CallData* calld = static_cast<CallData*>(pick->elem->call_data);
1149
- calld->MaybeApplyServiceConfigToCallLocked(pick->elem);
1150
- }
1151
- // Clean up.
1152
- GRPC_CHANNEL_STACK_UNREF(self->chand_->owning_stack_,
1153
- "ServiceConfigSetter");
1154
- Delete(self);
1155
- }
1156
-
1157
- ChannelData* chand_;
1158
- Optional<internal::ClientChannelGlobalParsedConfig::RetryThrottling>
1159
- retry_throttle_data_;
1160
- RefCountedPtr<ServiceConfig> service_config_;
1161
- grpc_closure closure_;
1162
- };
1163
-
1164
1108
  //
1165
1109
  // ChannelData::ExternalConnectivityWatcher::WatcherList
1166
1110
  //
@@ -1318,11 +1262,6 @@ class ChannelData::ClientChannelControlHelper
1318
1262
  chand_, subchannel, std::move(health_check_service_name));
1319
1263
  }
1320
1264
 
1321
- grpc_channel* CreateChannel(const char* target,
1322
- const grpc_channel_args& args) override {
1323
- return chand_->client_channel_factory_->CreateChannel(target, &args);
1324
- }
1325
-
1326
1265
  void UpdateState(
1327
1266
  grpc_connectivity_state state,
1328
1267
  UniquePtr<LoadBalancingPolicy::SubchannelPicker> picker) override {
@@ -1336,20 +1275,18 @@ class ChannelData::ClientChannelControlHelper
1336
1275
  }
1337
1276
  // Do update only if not shutting down.
1338
1277
  if (disconnect_error == GRPC_ERROR_NONE) {
1339
- // Will delete itself.
1340
- New<ConnectivityStateAndPickerSetter>(chand_, state, "helper",
1341
- std::move(picker));
1278
+ chand_->UpdateStateAndPickerLocked(state, "helper", std::move(picker));
1342
1279
  }
1343
1280
  }
1344
1281
 
1345
1282
  // No-op -- we should never get this from ResolvingLoadBalancingPolicy.
1346
1283
  void RequestReresolution() override {}
1347
1284
 
1348
- void AddTraceEvent(TraceSeverity severity, const char* message) override {
1285
+ void AddTraceEvent(TraceSeverity severity, StringView message) override {
1349
1286
  if (chand_->channelz_node_ != nullptr) {
1350
1287
  chand_->channelz_node_->AddTraceEvent(
1351
1288
  ConvertSeverityEnum(severity),
1352
- grpc_slice_from_copied_string(message));
1289
+ grpc_slice_from_copied_buffer(message.data(), message.size()));
1353
1290
  }
1354
1291
  }
1355
1292
 
@@ -1422,7 +1359,6 @@ ChannelData::ChannelData(grpc_channel_element_args* args, grpc_error** error)
1422
1359
  client_channel_factory_(
1423
1360
  ClientChannelFactory::GetFromChannelArgs(args->channel_args)),
1424
1361
  channelz_node_(GetChannelzNode(args->channel_args)),
1425
- data_plane_combiner_(grpc_combiner_create()),
1426
1362
  combiner_(grpc_combiner_create()),
1427
1363
  interested_parties_(grpc_pollset_set_create()),
1428
1364
  subchannel_pool_(GetSubchannelPool(args->channel_args)),
@@ -1495,13 +1431,108 @@ ChannelData::~ChannelData() {
1495
1431
  // Stop backup polling.
1496
1432
  grpc_client_channel_stop_backup_polling(interested_parties_);
1497
1433
  grpc_pollset_set_destroy(interested_parties_);
1498
- GRPC_COMBINER_UNREF(data_plane_combiner_, "client_channel");
1499
1434
  GRPC_COMBINER_UNREF(combiner_, "client_channel");
1500
1435
  GRPC_ERROR_UNREF(disconnect_error_.Load(MemoryOrder::RELAXED));
1501
1436
  grpc_connectivity_state_destroy(&state_tracker_);
1502
1437
  gpr_mu_destroy(&info_mu_);
1503
1438
  }
1504
1439
 
1440
+ void ChannelData::UpdateStateAndPickerLocked(
1441
+ grpc_connectivity_state state, const char* reason,
1442
+ UniquePtr<LoadBalancingPolicy::SubchannelPicker> picker) {
1443
+ // Clean the control plane when entering IDLE.
1444
+ if (picker_ == nullptr) {
1445
+ health_check_service_name_.reset();
1446
+ saved_service_config_.reset();
1447
+ received_first_resolver_result_ = false;
1448
+ }
1449
+ // Update connectivity state.
1450
+ grpc_connectivity_state_set(&state_tracker_, state, reason);
1451
+ if (channelz_node_ != nullptr) {
1452
+ channelz_node_->SetConnectivityState(state);
1453
+ channelz_node_->AddTraceEvent(
1454
+ channelz::ChannelTrace::Severity::Info,
1455
+ grpc_slice_from_static_string(
1456
+ channelz::ChannelNode::GetChannelConnectivityStateChangeString(
1457
+ state)));
1458
+ }
1459
+ // Grab data plane lock to do subchannel updates and update the picker.
1460
+ //
1461
+ // Note that we want to minimize the work done while holding the data
1462
+ // plane lock, to keep the critical section small. So, for all of the
1463
+ // objects that we might wind up unreffing here, we actually hold onto
1464
+ // the refs until after we release the lock, and then unref them at
1465
+ // that point. This includes the following:
1466
+ // - refs to subchannel wrappers in the keys of pending_subchannel_updates_
1467
+ // - ref stored in retry_throttle_data_
1468
+ // - ref stored in service_config_
1469
+ // - ownership of the existing picker in picker_
1470
+ RefCountedPtr<ServerRetryThrottleData> retry_throttle_data_to_unref;
1471
+ RefCountedPtr<ServiceConfig> service_config_to_unref;
1472
+ {
1473
+ MutexLock lock(&data_plane_mu_);
1474
+ // Handle subchannel updates.
1475
+ for (auto& p : pending_subchannel_updates_) {
1476
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) {
1477
+ gpr_log(GPR_INFO,
1478
+ "chand=%p: updating subchannel wrapper %p data plane "
1479
+ "connected_subchannel to %p",
1480
+ this, p.first.get(), p.second.get());
1481
+ }
1482
+ // Note: We do not remove the entry from pending_subchannel_updates_
1483
+ // here, since this would unref the subchannel wrapper; instead,
1484
+ // we wait until we've released the lock to clear the map.
1485
+ p.first->set_connected_subchannel_in_data_plane(std::move(p.second));
1486
+ }
1487
+ // Swap out the picker.
1488
+ // Note: Original value will be destroyed after the lock is released.
1489
+ picker_.swap(picker);
1490
+ // Clean the data plane if the updated picker is nullptr.
1491
+ if (picker_ == nullptr) {
1492
+ received_service_config_data_ = false;
1493
+ // Note: We save the objects to unref until after the lock is released.
1494
+ retry_throttle_data_to_unref = std::move(retry_throttle_data_);
1495
+ service_config_to_unref = std::move(service_config_);
1496
+ }
1497
+ // Re-process queued picks.
1498
+ for (QueuedPick* pick = queued_picks_; pick != nullptr; pick = pick->next) {
1499
+ grpc_call_element* elem = pick->elem;
1500
+ CallData* calld = static_cast<CallData*>(elem->call_data);
1501
+ grpc_error* error = GRPC_ERROR_NONE;
1502
+ if (calld->PickSubchannelLocked(elem, &error)) {
1503
+ calld->AsyncPickDone(elem, error);
1504
+ }
1505
+ }
1506
+ }
1507
+ // Clear the pending update map after releasing the lock, to keep the
1508
+ // critical section small.
1509
+ pending_subchannel_updates_.clear();
1510
+ }
1511
+
1512
+ void ChannelData::UpdateServiceConfigLocked(
1513
+ RefCountedPtr<ServerRetryThrottleData> retry_throttle_data,
1514
+ RefCountedPtr<ServiceConfig> service_config) {
1515
+ // Grab data plane lock to update service config.
1516
+ //
1517
+ // We defer unreffing the old values (and deallocating memory) until
1518
+ // after releasing the lock to keep the critical section small.
1519
+ {
1520
+ MutexLock lock(&data_plane_mu_);
1521
+ // Update service config.
1522
+ received_service_config_data_ = true;
1523
+ // Old values will be unreffed after lock is released.
1524
+ retry_throttle_data_.swap(retry_throttle_data);
1525
+ service_config_.swap(service_config);
1526
+ // Apply service config to queued picks.
1527
+ for (QueuedPick* pick = queued_picks_; pick != nullptr; pick = pick->next) {
1528
+ CallData* calld = static_cast<CallData*>(pick->elem->call_data);
1529
+ calld->MaybeApplyServiceConfigToCallLocked(pick->elem);
1530
+ }
1531
+ }
1532
+ // Old values will be unreffed after lock is released when they go out
1533
+ // of scope.
1534
+ }
1535
+
1505
1536
  void ChannelData::CreateResolvingLoadBalancingPolicyLocked() {
1506
1537
  // Instantiate resolving LB policy.
1507
1538
  LoadBalancingPolicy::Args lb_args;
@@ -1660,6 +1691,11 @@ bool ChannelData::ProcessResolverResultLocked(
1660
1691
  } else {
1661
1692
  chand->health_check_service_name_.reset();
1662
1693
  }
1694
+ // Update health check service name used by existing subchannel wrappers.
1695
+ for (const auto& p : chand->subchannel_wrappers_) {
1696
+ p.first->UpdateHealthCheckServiceName(
1697
+ UniquePtr<char>(gpr_strdup(chand->health_check_service_name_.get())));
1698
+ }
1663
1699
  // Save service config.
1664
1700
  chand->saved_service_config_ = std::move(service_config);
1665
1701
  }
@@ -1668,15 +1704,20 @@ bool ChannelData::ProcessResolverResultLocked(
1668
1704
  // if we feel it is unnecessary.
1669
1705
  if (service_config_changed || !chand->received_first_resolver_result_) {
1670
1706
  chand->received_first_resolver_result_ = true;
1671
- Optional<internal::ClientChannelGlobalParsedConfig::RetryThrottling>
1672
- retry_throttle_data;
1707
+ RefCountedPtr<ServerRetryThrottleData> retry_throttle_data;
1673
1708
  if (parsed_service_config != nullptr) {
1674
- retry_throttle_data = parsed_service_config->retry_throttling();
1709
+ Optional<internal::ClientChannelGlobalParsedConfig::RetryThrottling>
1710
+ retry_throttle_config = parsed_service_config->retry_throttling();
1711
+ if (retry_throttle_config.has_value()) {
1712
+ retry_throttle_data =
1713
+ internal::ServerRetryThrottleMap::GetDataForServer(
1714
+ chand->server_name_.get(),
1715
+ retry_throttle_config.value().max_milli_tokens,
1716
+ retry_throttle_config.value().milli_token_ratio);
1717
+ }
1675
1718
  }
1676
- // Create service config setter to update channel state in the data
1677
- // plane combiner. Destroys itself when done.
1678
- New<ServiceConfigSetter>(chand, retry_throttle_data,
1679
- chand->saved_service_config_);
1719
+ chand->UpdateServiceConfigLocked(std::move(retry_throttle_data),
1720
+ chand->saved_service_config_);
1680
1721
  }
1681
1722
  UniquePtr<char> processed_lb_policy_name;
1682
1723
  chand->ProcessLbPolicy(result, parsed_service_config,
@@ -1760,8 +1801,8 @@ void ChannelData::StartTransportOpLocked(void* arg, grpc_error* ignored) {
1760
1801
  static_cast<grpc_connectivity_state>(value) == GRPC_CHANNEL_IDLE) {
1761
1802
  if (chand->disconnect_error() == GRPC_ERROR_NONE) {
1762
1803
  // Enter IDLE state.
1763
- New<ConnectivityStateAndPickerSetter>(chand, GRPC_CHANNEL_IDLE,
1764
- "channel entering IDLE", nullptr);
1804
+ chand->UpdateStateAndPickerLocked(GRPC_CHANNEL_IDLE,
1805
+ "channel entering IDLE", nullptr);
1765
1806
  }
1766
1807
  GRPC_ERROR_UNREF(op->disconnect_with_error);
1767
1808
  } else {
@@ -1770,8 +1811,8 @@ void ChannelData::StartTransportOpLocked(void* arg, grpc_error* ignored) {
1770
1811
  GRPC_ERROR_NONE);
1771
1812
  chand->disconnect_error_.Store(op->disconnect_with_error,
1772
1813
  MemoryOrder::RELEASE);
1773
- New<ConnectivityStateAndPickerSetter>(
1774
- chand, GRPC_CHANNEL_SHUTDOWN, "shutdown from API",
1814
+ chand->UpdateStateAndPickerLocked(
1815
+ GRPC_CHANNEL_SHUTDOWN, "shutdown from API",
1775
1816
  UniquePtr<LoadBalancingPolicy::SubchannelPicker>(
1776
1817
  New<LoadBalancingPolicy::TransientFailurePicker>(
1777
1818
  GRPC_ERROR_REF(op->disconnect_with_error))));
@@ -1932,6 +1973,10 @@ CallData::CallData(grpc_call_element* elem, const ChannelData& chand,
1932
1973
  CallData::~CallData() {
1933
1974
  grpc_slice_unref_internal(path_);
1934
1975
  GRPC_ERROR_UNREF(cancel_error_);
1976
+ if (backend_metric_data_ != nullptr) {
1977
+ backend_metric_data_
1978
+ ->LoadBalancingPolicy::BackendMetricData::~BackendMetricData();
1979
+ }
1935
1980
  // Make sure there are no remaining pending batches.
1936
1981
  for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) {
1937
1982
  GPR_ASSERT(pending_batches_[i].batch == nullptr);
@@ -2010,8 +2055,8 @@ void CallData::StartTransportStreamOpBatch(
2010
2055
  // Add the batch to the pending list.
2011
2056
  calld->PendingBatchesAdd(elem, batch);
2012
2057
  // Check if we've already gotten a subchannel call.
2013
- // Note that once we have completed the pick, we do not need to enter
2014
- // the channel combiner, which is more efficient (especially for
2058
+ // Note that once we have picked a subchannel, we do not need to acquire
2059
+ // the channel's data plane mutex, which is more efficient (especially for
2015
2060
  // streaming calls).
2016
2061
  if (calld->subchannel_call_ != nullptr) {
2017
2062
  if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) {
@@ -2023,18 +2068,15 @@ void CallData::StartTransportStreamOpBatch(
2023
2068
  return;
2024
2069
  }
2025
2070
  // We do not yet have a subchannel call.
2026
- // For batches containing a send_initial_metadata op, enter the channel
2027
- // combiner to start a pick.
2071
+ // For batches containing a send_initial_metadata op, acquire the
2072
+ // channel's data plane mutex to pick a subchannel.
2028
2073
  if (GPR_LIKELY(batch->send_initial_metadata)) {
2029
2074
  if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) {
2030
- gpr_log(GPR_INFO, "chand=%p calld=%p: entering client_channel combiner",
2075
+ gpr_log(GPR_INFO,
2076
+ "chand=%p calld=%p: grabbing data plane mutex to perform pick",
2031
2077
  chand, calld);
2032
2078
  }
2033
- GRPC_CLOSURE_SCHED(
2034
- GRPC_CLOSURE_INIT(
2035
- &batch->handler_private.closure, StartPickLocked, elem,
2036
- grpc_combiner_scheduler(chand->data_plane_combiner())),
2037
- GRPC_ERROR_NONE);
2079
+ PickSubchannel(elem, GRPC_ERROR_NONE);
2038
2080
  } else {
2039
2081
  // For all other batches, release the call combiner.
2040
2082
  if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) {
@@ -2160,8 +2202,8 @@ void CallData::RecvTrailingMetadataReadyForLoadBalancingPolicy(
2160
2202
  // Invoke callback to LB policy.
2161
2203
  Metadata trailing_metadata(calld, calld->recv_trailing_metadata_);
2162
2204
  calld->lb_recv_trailing_metadata_ready_(
2163
- calld->lb_recv_trailing_metadata_ready_user_data_, &trailing_metadata,
2164
- &calld->lb_call_state_);
2205
+ calld->lb_recv_trailing_metadata_ready_user_data_, error,
2206
+ &trailing_metadata, &calld->lb_call_state_);
2165
2207
  // Chain to original callback.
2166
2208
  GRPC_CLOSURE_RUN(calld->original_recv_trailing_metadata_ready_,
2167
2209
  GRPC_ERROR_REF(error));
@@ -2462,8 +2504,8 @@ void CallData::DoRetry(grpc_call_element* elem,
2462
2504
  this, next_attempt_time - ExecCtx::Get()->Now());
2463
2505
  }
2464
2506
  // Schedule retry after computed delay.
2465
- GRPC_CLOSURE_INIT(&pick_closure_, StartPickLocked, elem,
2466
- grpc_combiner_scheduler(chand->data_plane_combiner()));
2507
+ GRPC_CLOSURE_INIT(&pick_closure_, PickSubchannel, elem,
2508
+ grpc_schedule_on_exec_ctx);
2467
2509
  grpc_timer_init(&retry_timer_, next_attempt_time, &pick_closure_);
2468
2510
  // Update bookkeeping.
2469
2511
  if (retry_state != nullptr) retry_state->retry_dispatched = true;
@@ -3168,8 +3210,8 @@ void CallData::AddRetriableSendInitialMetadataOp(
3168
3210
  SubchannelCallRetryState* retry_state,
3169
3211
  SubchannelCallBatchData* batch_data) {
3170
3212
  // Maps the number of retries to the corresponding metadata value slice.
3171
- static const grpc_slice* retry_count_strings[] = {
3172
- &GRPC_MDSTR_1, &GRPC_MDSTR_2, &GRPC_MDSTR_3, &GRPC_MDSTR_4};
3213
+ const grpc_slice* retry_count_strings[] = {&GRPC_MDSTR_1, &GRPC_MDSTR_2,
3214
+ &GRPC_MDSTR_3, &GRPC_MDSTR_4};
3173
3215
  // We need to make a copy of the metadata batch for each attempt, since
3174
3216
  // the filters in the subchannel stack may modify this batch, and we don't
3175
3217
  // want those modifications to be passed forward to subsequent attempts.
@@ -3186,8 +3228,7 @@ void CallData::AddRetriableSendInitialMetadataOp(
3186
3228
  if (GPR_UNLIKELY(retry_state->send_initial_metadata.idx.named
3187
3229
  .grpc_previous_rpc_attempts != nullptr)) {
3188
3230
  grpc_metadata_batch_remove(&retry_state->send_initial_metadata,
3189
- retry_state->send_initial_metadata.idx.named
3190
- .grpc_previous_rpc_attempts);
3231
+ GRPC_BATCH_GRPC_PREVIOUS_RPC_ATTEMPTS);
3191
3232
  }
3192
3233
  if (GPR_UNLIKELY(num_attempts_completed_ > 0)) {
3193
3234
  grpc_mdelem retry_md = grpc_mdelem_create(
@@ -3197,7 +3238,7 @@ void CallData::AddRetriableSendInitialMetadataOp(
3197
3238
  &retry_state->send_initial_metadata,
3198
3239
  &retry_state
3199
3240
  ->send_initial_metadata_storage[send_initial_metadata_.list.count],
3200
- retry_md);
3241
+ retry_md, GRPC_BATCH_GRPC_PREVIOUS_RPC_ATTEMPTS);
3201
3242
  if (GPR_UNLIKELY(error != GRPC_ERROR_NONE)) {
3202
3243
  gpr_log(GPR_ERROR, "error adding retry metadata: %s",
3203
3244
  grpc_error_string(error));
@@ -3579,6 +3620,11 @@ void CallData::CreateSubchannelCall(grpc_call_element* elem) {
3579
3620
  }
3580
3621
  }
3581
3622
 
3623
+ void CallData::AsyncPickDone(grpc_call_element* elem, grpc_error* error) {
3624
+ GRPC_CLOSURE_INIT(&pick_closure_, PickDone, elem, grpc_schedule_on_exec_ctx);
3625
+ GRPC_CLOSURE_SCHED(&pick_closure_, error);
3626
+ }
3627
+
3582
3628
  void CallData::PickDone(void* arg, grpc_error* error) {
3583
3629
  grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
3584
3630
  ChannelData* chand = static_cast<ChannelData*>(elem->channel_data);
@@ -3601,10 +3647,9 @@ class CallData::QueuedPickCanceller {
3601
3647
  public:
3602
3648
  explicit QueuedPickCanceller(grpc_call_element* elem) : elem_(elem) {
3603
3649
  auto* calld = static_cast<CallData*>(elem->call_data);
3604
- auto* chand = static_cast<ChannelData*>(elem->channel_data);
3605
3650
  GRPC_CALL_STACK_REF(calld->owning_call_, "QueuedPickCanceller");
3606
3651
  GRPC_CLOSURE_INIT(&closure_, &CancelLocked, this,
3607
- grpc_combiner_scheduler(chand->data_plane_combiner()));
3652
+ grpc_schedule_on_exec_ctx);
3608
3653
  calld->call_combiner_->SetNotifyOnCancel(&closure_);
3609
3654
  }
3610
3655
 
@@ -3613,6 +3658,7 @@ class CallData::QueuedPickCanceller {
3613
3658
  auto* self = static_cast<QueuedPickCanceller*>(arg);
3614
3659
  auto* chand = static_cast<ChannelData*>(self->elem_->channel_data);
3615
3660
  auto* calld = static_cast<CallData*>(self->elem_->call_data);
3661
+ MutexLock lock(chand->data_plane_mu());
3616
3662
  if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) {
3617
3663
  gpr_log(GPR_INFO,
3618
3664
  "chand=%p calld=%p: cancelling queued pick: "
@@ -3684,7 +3730,7 @@ void CallData::ApplyServiceConfigToCallLocked(grpc_call_element* elem) {
3684
3730
  // from the client API, reset the deadline timer.
3685
3731
  if (chand->deadline_checking_enabled() && method_params_->timeout() != 0) {
3686
3732
  const grpc_millis per_method_deadline =
3687
- grpc_timespec_to_millis_round_up(call_start_time_) +
3733
+ grpc_cycle_counter_to_millis_round_up(call_start_time_) +
3688
3734
  method_params_->timeout();
3689
3735
  if (per_method_deadline < deadline_) {
3690
3736
  deadline_ = per_method_deadline;
@@ -3731,29 +3777,44 @@ const char* PickResultTypeName(
3731
3777
  return "COMPLETE";
3732
3778
  case LoadBalancingPolicy::PickResult::PICK_QUEUE:
3733
3779
  return "QUEUE";
3734
- case LoadBalancingPolicy::PickResult::PICK_TRANSIENT_FAILURE:
3735
- return "TRANSIENT_FAILURE";
3780
+ case LoadBalancingPolicy::PickResult::PICK_FAILED:
3781
+ return "FAILED";
3736
3782
  }
3737
3783
  GPR_UNREACHABLE_CODE(return "UNKNOWN");
3738
3784
  }
3739
3785
 
3740
- void CallData::StartPickLocked(void* arg, grpc_error* error) {
3786
+ void CallData::PickSubchannel(void* arg, grpc_error* error) {
3741
3787
  grpc_call_element* elem = static_cast<grpc_call_element*>(arg);
3742
3788
  CallData* calld = static_cast<CallData*>(elem->call_data);
3743
3789
  ChannelData* chand = static_cast<ChannelData*>(elem->channel_data);
3744
- GPR_ASSERT(calld->connected_subchannel_ == nullptr);
3745
- GPR_ASSERT(calld->subchannel_call_ == nullptr);
3746
- // picker's being null means the channel is currently in IDLE state. The
3747
- // incoming call will make the channel exit IDLE and queue itself.
3790
+ bool pick_complete;
3791
+ {
3792
+ MutexLock lock(chand->data_plane_mu());
3793
+ pick_complete = calld->PickSubchannelLocked(elem, &error);
3794
+ }
3795
+ if (pick_complete) {
3796
+ PickDone(elem, error);
3797
+ GRPC_ERROR_UNREF(error);
3798
+ }
3799
+ }
3800
+
3801
+ bool CallData::PickSubchannelLocked(grpc_call_element* elem,
3802
+ grpc_error** error) {
3803
+ ChannelData* chand = static_cast<ChannelData*>(elem->channel_data);
3804
+ GPR_ASSERT(connected_subchannel_ == nullptr);
3805
+ GPR_ASSERT(subchannel_call_ == nullptr);
3806
+ // The picker being null means that the channel is currently in IDLE state.
3807
+ // The incoming call will make the channel exit IDLE.
3748
3808
  if (chand->picker() == nullptr) {
3749
- // We are currently in the data plane.
3750
- // Bounce into the control plane to exit IDLE.
3751
- chand->CheckConnectivityState(true);
3752
- calld->AddCallToQueuedPicksLocked(elem);
3753
- return;
3809
+ // Bounce into the control plane combiner to exit IDLE.
3810
+ chand->CheckConnectivityState(/*try_to_connect=*/true);
3811
+ // Queue the pick, so that it will be attempted once the channel
3812
+ // becomes connected.
3813
+ AddCallToQueuedPicksLocked(elem);
3814
+ return false;
3754
3815
  }
3755
3816
  // Apply service config to call if needed.
3756
- calld->MaybeApplyServiceConfigToCallLocked(elem);
3817
+ MaybeApplyServiceConfigToCallLocked(elem);
3757
3818
  // If this is a retry, use the send_initial_metadata payload that
3758
3819
  // we've cached; otherwise, use the pending batch. The
3759
3820
  // send_initial_metadata batch will be the first pending batch in the
@@ -3765,42 +3826,38 @@ void CallData::StartPickLocked(void* arg, grpc_error* error) {
3765
3826
  // subchannel's copy of the metadata batch (which is copied for each
3766
3827
  // attempt) to the LB policy instead the one from the parent channel.
3767
3828
  LoadBalancingPolicy::PickArgs pick_args;
3768
- pick_args.call_state = &calld->lb_call_state_;
3829
+ pick_args.call_state = &lb_call_state_;
3769
3830
  Metadata initial_metadata(
3770
- calld,
3771
- calld->seen_send_initial_metadata_
3772
- ? &calld->send_initial_metadata_
3773
- : calld->pending_batches_[0]
3831
+ this,
3832
+ seen_send_initial_metadata_
3833
+ ? &send_initial_metadata_
3834
+ : pending_batches_[0]
3774
3835
  .batch->payload->send_initial_metadata.send_initial_metadata);
3775
3836
  pick_args.initial_metadata = &initial_metadata;
3776
3837
  // Grab initial metadata flags so that we can check later if the call has
3777
3838
  // wait_for_ready enabled.
3778
3839
  const uint32_t send_initial_metadata_flags =
3779
- calld->seen_send_initial_metadata_
3780
- ? calld->send_initial_metadata_flags_
3781
- : calld->pending_batches_[0]
3782
- .batch->payload->send_initial_metadata
3783
- .send_initial_metadata_flags;
3784
- // When done, we schedule this closure to leave the data plane combiner.
3785
- GRPC_CLOSURE_INIT(&calld->pick_closure_, PickDone, elem,
3786
- grpc_schedule_on_exec_ctx);
3840
+ seen_send_initial_metadata_ ? send_initial_metadata_flags_
3841
+ : pending_batches_[0]
3842
+ .batch->payload->send_initial_metadata
3843
+ .send_initial_metadata_flags;
3787
3844
  // Attempt pick.
3788
3845
  auto result = chand->picker()->Pick(pick_args);
3789
3846
  if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) {
3790
3847
  gpr_log(GPR_INFO,
3791
3848
  "chand=%p calld=%p: LB pick returned %s (subchannel=%p, error=%s)",
3792
- chand, calld, PickResultTypeName(result.type),
3849
+ chand, this, PickResultTypeName(result.type),
3793
3850
  result.subchannel.get(), grpc_error_string(result.error));
3794
3851
  }
3795
3852
  switch (result.type) {
3796
- case LoadBalancingPolicy::PickResult::PICK_TRANSIENT_FAILURE: {
3853
+ case LoadBalancingPolicy::PickResult::PICK_FAILED: {
3797
3854
  // If we're shutting down, fail all RPCs.
3798
3855
  grpc_error* disconnect_error = chand->disconnect_error();
3799
3856
  if (disconnect_error != GRPC_ERROR_NONE) {
3800
3857
  GRPC_ERROR_UNREF(result.error);
3801
- GRPC_CLOSURE_SCHED(&calld->pick_closure_,
3802
- GRPC_ERROR_REF(disconnect_error));
3803
- break;
3858
+ if (pick_queued_) RemoveCallFromQueuedPicksLocked(elem);
3859
+ *error = GRPC_ERROR_REF(disconnect_error);
3860
+ return true;
3804
3861
  }
3805
3862
  // If wait_for_ready is false, then the error indicates the RPC
3806
3863
  // attempt's final status.
@@ -3808,19 +3865,20 @@ void CallData::StartPickLocked(void* arg, grpc_error* error) {
3808
3865
  GRPC_INITIAL_METADATA_WAIT_FOR_READY) == 0) {
3809
3866
  // Retry if appropriate; otherwise, fail.
3810
3867
  grpc_status_code status = GRPC_STATUS_OK;
3811
- grpc_error_get_status(result.error, calld->deadline_, &status, nullptr,
3868
+ grpc_error_get_status(result.error, deadline_, &status, nullptr,
3812
3869
  nullptr, nullptr);
3813
- if (!calld->enable_retries_ ||
3814
- !calld->MaybeRetry(elem, nullptr /* batch_data */, status,
3815
- nullptr /* server_pushback_md */)) {
3870
+ const bool retried = enable_retries_ &&
3871
+ MaybeRetry(elem, nullptr /* batch_data */, status,
3872
+ nullptr /* server_pushback_md */);
3873
+ if (!retried) {
3816
3874
  grpc_error* new_error =
3817
3875
  GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
3818
3876
  "Failed to pick subchannel", &result.error, 1);
3819
3877
  GRPC_ERROR_UNREF(result.error);
3820
- GRPC_CLOSURE_SCHED(&calld->pick_closure_, new_error);
3878
+ *error = new_error;
3821
3879
  }
3822
- if (calld->pick_queued_) calld->RemoveCallFromQueuedPicksLocked(elem);
3823
- break;
3880
+ if (pick_queued_) RemoveCallFromQueuedPicksLocked(elem);
3881
+ return !retried;
3824
3882
  }
3825
3883
  // If wait_for_ready is true, then queue to retry when we get a new
3826
3884
  // picker.
@@ -3828,26 +3886,26 @@ void CallData::StartPickLocked(void* arg, grpc_error* error) {
3828
3886
  }
3829
3887
  // Fallthrough
3830
3888
  case LoadBalancingPolicy::PickResult::PICK_QUEUE:
3831
- if (!calld->pick_queued_) calld->AddCallToQueuedPicksLocked(elem);
3832
- break;
3889
+ if (!pick_queued_) AddCallToQueuedPicksLocked(elem);
3890
+ return false;
3833
3891
  default: // PICK_COMPLETE
3892
+ if (pick_queued_) RemoveCallFromQueuedPicksLocked(elem);
3834
3893
  // Handle drops.
3835
3894
  if (GPR_UNLIKELY(result.subchannel == nullptr)) {
3836
3895
  result.error = GRPC_ERROR_CREATE_FROM_STATIC_STRING(
3837
3896
  "Call dropped by load balancing policy");
3838
3897
  } else {
3839
3898
  // Grab a ref to the connected subchannel while we're still
3840
- // holding the data plane combiner.
3841
- calld->connected_subchannel_ =
3899
+ // holding the data plane mutex.
3900
+ connected_subchannel_ =
3842
3901
  chand->GetConnectedSubchannelInDataPlane(result.subchannel.get());
3843
- GPR_ASSERT(calld->connected_subchannel_ != nullptr);
3902
+ GPR_ASSERT(connected_subchannel_ != nullptr);
3844
3903
  }
3845
- calld->lb_recv_trailing_metadata_ready_ =
3846
- result.recv_trailing_metadata_ready;
3847
- calld->lb_recv_trailing_metadata_ready_user_data_ =
3904
+ lb_recv_trailing_metadata_ready_ = result.recv_trailing_metadata_ready;
3905
+ lb_recv_trailing_metadata_ready_user_data_ =
3848
3906
  result.recv_trailing_metadata_ready_user_data;
3849
- GRPC_CLOSURE_SCHED(&calld->pick_closure_, result.error);
3850
- if (calld->pick_queued_) calld->RemoveCallFromQueuedPicksLocked(elem);
3907
+ *error = result.error;
3908
+ return true;
3851
3909
  }
3852
3910
  }
3853
3911