grpc 1.56.2 → 1.57.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (383) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +29 -22
  3. data/include/grpc/event_engine/event_engine.h +22 -32
  4. data/include/grpc/impl/grpc_types.h +3 -0
  5. data/include/grpc/support/port_platform.h +29 -23
  6. data/src/core/ext/filters/client_channel/client_channel.cc +44 -8
  7. data/src/core/ext/filters/client_channel/dynamic_filters.h +3 -3
  8. data/src/core/ext/filters/client_channel/http_proxy.cc +5 -0
  9. data/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc +21 -52
  10. data/src/core/ext/filters/client_channel/lb_policy/address_filtering.h +19 -7
  11. data/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc +25 -35
  12. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +78 -132
  13. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc +2 -1
  14. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +4 -3
  15. data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +3 -1
  16. data/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +38 -15
  17. data/src/core/ext/filters/client_channel/lb_policy/health_check_client.h +3 -5
  18. data/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +22 -6
  19. data/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h +2 -0
  20. data/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +97 -71
  21. data/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h +2 -16
  22. data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +56 -11
  23. data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h +25 -0
  24. data/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc +6 -32
  25. data/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +4 -6
  26. data/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc +20 -79
  27. data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +1 -1
  28. data/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +31 -19
  29. data/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +1 -1
  30. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +7 -41
  31. data/src/core/ext/filters/client_channel/lb_policy/xds/cds.cc +3 -67
  32. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h +8 -0
  33. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +31 -74
  34. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc +7 -51
  35. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc +16 -87
  36. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +16 -50
  37. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc +12 -74
  38. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +1 -4
  39. data/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc +69 -59
  40. data/src/core/ext/filters/client_channel/resolver/polling_resolver.cc +7 -2
  41. data/src/core/ext/filters/client_channel/resolver/polling_resolver.h +1 -0
  42. data/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc +25 -13
  43. data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +366 -311
  44. data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.h +17 -1
  45. data/src/core/ext/filters/client_channel/retry_filter.cc +39 -2498
  46. data/src/core/ext/filters/client_channel/retry_filter.h +91 -1
  47. data/src/core/ext/filters/client_channel/retry_filter_legacy_call_data.cc +2052 -0
  48. data/src/core/ext/filters/client_channel/retry_filter_legacy_call_data.h +442 -0
  49. data/src/core/ext/filters/client_channel/service_config_channel_arg_filter.cc +38 -58
  50. data/src/core/ext/filters/client_channel/subchannel.h +3 -3
  51. data/src/core/ext/filters/client_channel/subchannel_interface_internal.h +3 -0
  52. data/src/core/ext/filters/rbac/rbac_filter.cc +40 -111
  53. data/src/core/ext/filters/rbac/rbac_filter.h +12 -30
  54. data/src/core/ext/filters/stateful_session/stateful_session_filter.cc +162 -86
  55. data/src/core/ext/filters/stateful_session/stateful_session_filter.h +0 -6
  56. data/src/core/ext/transport/chttp2/server/chttp2_server.cc +7 -4
  57. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +131 -186
  58. data/src/core/ext/transport/chttp2/transport/decode_huff.cc +6569 -174
  59. data/src/core/ext/transport/chttp2/transport/decode_huff.h +2278 -441
  60. data/src/core/ext/transport/chttp2/transport/frame_ping.cc +2 -3
  61. data/src/core/ext/transport/chttp2/transport/hpack_parse_result.h +4 -3
  62. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +9 -8
  63. data/src/core/ext/transport/chttp2/transport/hpack_parser_table.cc +4 -4
  64. data/src/core/ext/transport/chttp2/transport/hpack_parser_table.h +3 -2
  65. data/src/core/ext/transport/chttp2/transport/internal.h +8 -4
  66. data/src/core/ext/transport/chttp2/transport/parsing.cc +15 -3
  67. data/src/core/ext/transport/chttp2/transport/writing.cc +2 -3
  68. data/src/core/ext/upb-generated/envoy/config/accesslog/v3/accesslog.upb.c +27 -6
  69. data/src/core/ext/upb-generated/envoy/config/accesslog/v3/accesslog.upb.h +143 -0
  70. data/src/core/ext/upb-generated/envoy/config/core/v3/config_source.upb.c +2 -9
  71. data/src/core/ext/upb-generated/envoy/config/core/v3/config_source.upb.h +0 -39
  72. data/src/core/ext/upb-generated/envoy/config/core/v3/grpc_service.upb.c +13 -8
  73. data/src/core/ext/upb-generated/envoy/config/core/v3/grpc_service.upb.h +35 -6
  74. data/src/core/ext/upb-generated/envoy/config/core/v3/health_check.upb.c +17 -13
  75. data/src/core/ext/upb-generated/envoy/config/core/v3/health_check.upb.h +85 -20
  76. data/src/core/ext/upb-generated/envoy/config/core/v3/protocol.upb.c +26 -7
  77. data/src/core/ext/upb-generated/envoy/config/core/v3/protocol.upb.h +45 -3
  78. data/src/core/ext/upb-generated/envoy/config/metrics/v3/metrics_service.upb.c +4 -3
  79. data/src/core/ext/upb-generated/envoy/config/metrics/v3/metrics_service.upb.h +21 -0
  80. data/src/core/ext/upb-generated/envoy/config/overload/v3/overload.upb.c +30 -6
  81. data/src/core/ext/upb-generated/envoy/config/overload/v3/overload.upb.h +180 -0
  82. data/src/core/ext/upb-generated/envoy/data/accesslog/v3/accesslog.upb.c +558 -0
  83. data/src/core/ext/upb-generated/envoy/data/accesslog/v3/accesslog.upb.h +2710 -0
  84. data/src/core/ext/upb-generated/envoy/extensions/filters/http/fault/v3/fault.upb.c +30 -11
  85. data/src/core/ext/upb-generated/envoy/extensions/filters/http/fault/v3/fault.upb.h +53 -24
  86. data/src/core/ext/upb-generated/envoy/extensions/filters/http/router/v3/router.upb.c +30 -5
  87. data/src/core/ext/upb-generated/envoy/extensions/filters/http/router/v3/router.upb.h +110 -0
  88. data/src/core/ext/upb-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upb.c +41 -15
  89. data/src/core/ext/upb-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upb.h +150 -27
  90. data/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3/client_side_weighted_round_robin.upb.c +1 -0
  91. data/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/pick_first/v3/pick_first.upb.c +47 -0
  92. data/src/core/ext/upb-generated/envoy/extensions/load_balancing_policies/pick_first/v3/pick_first.upb.h +93 -0
  93. data/src/core/ext/upbdefs-generated/envoy/config/accesslog/v3/accesslog.upbdefs.c +88 -76
  94. data/src/core/ext/upbdefs-generated/envoy/config/accesslog/v3/accesslog.upbdefs.h +5 -0
  95. data/src/core/ext/upbdefs-generated/envoy/config/core/v3/config_source.upbdefs.c +11 -12
  96. data/src/core/ext/upbdefs-generated/envoy/config/core/v3/config_source.upbdefs.h +0 -5
  97. data/src/core/ext/upbdefs-generated/envoy/config/core/v3/grpc_service.upbdefs.c +162 -160
  98. data/src/core/ext/upbdefs-generated/envoy/config/core/v3/health_check.upbdefs.c +129 -118
  99. data/src/core/ext/upbdefs-generated/envoy/config/core/v3/protocol.upbdefs.c +141 -135
  100. data/src/core/ext/upbdefs-generated/envoy/config/metrics/v3/metrics_service.upbdefs.c +19 -12
  101. data/src/core/ext/upbdefs-generated/envoy/config/overload/v3/overload.upbdefs.c +38 -30
  102. data/src/core/ext/upbdefs-generated/envoy/config/overload/v3/overload.upbdefs.h +5 -0
  103. data/src/core/ext/upbdefs-generated/envoy/data/accesslog/v3/accesslog.upbdefs.c +402 -0
  104. data/src/core/ext/upbdefs-generated/envoy/data/accesslog/v3/accesslog.upbdefs.h +111 -0
  105. data/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/fault/v3/fault.upbdefs.c +80 -74
  106. data/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/router/v3/router.upbdefs.c +63 -47
  107. data/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/router/v3/router.upbdefs.h +5 -0
  108. data/src/core/ext/upbdefs-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upbdefs.c +315 -293
  109. data/src/core/ext/upbdefs-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upbdefs.h +5 -0
  110. data/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/regex.upbdefs.c +29 -29
  111. data/src/core/ext/xds/xds_bootstrap_grpc.cc +33 -30
  112. data/src/core/ext/xds/xds_bootstrap_grpc.h +5 -13
  113. data/src/core/ext/xds/xds_client_grpc.cc +11 -6
  114. data/src/core/ext/xds/xds_client_grpc.h +16 -2
  115. data/src/core/ext/xds/xds_client_stats.h +10 -0
  116. data/src/core/ext/xds/xds_cluster.cc +26 -16
  117. data/src/core/ext/xds/xds_endpoint.cc +4 -7
  118. data/src/core/ext/xds/xds_health_status.cc +0 -17
  119. data/src/core/ext/xds/xds_health_status.h +5 -25
  120. data/src/core/ext/xds/xds_lb_policy_registry.cc +39 -0
  121. data/src/core/ext/xds/xds_route_config.cc +4 -0
  122. data/src/core/ext/xds/xds_transport_grpc.cc +1 -3
  123. data/src/core/lib/address_utils/parse_address.cc +63 -1
  124. data/src/core/lib/address_utils/parse_address.h +8 -0
  125. data/src/core/lib/address_utils/sockaddr_utils.cc +46 -1
  126. data/src/core/lib/address_utils/sockaddr_utils.h +2 -2
  127. data/src/core/lib/channel/channel_args.cc +21 -10
  128. data/src/core/lib/channel/channel_args.h +3 -0
  129. data/src/core/lib/channel/connected_channel.cc +4 -1
  130. data/src/core/lib/channel/promise_based_filter.h +1 -0
  131. data/src/core/lib/debug/trace.cc +1 -4
  132. data/src/core/lib/event_engine/cf_engine/cf_engine.cc +2 -1
  133. data/src/core/lib/event_engine/cf_engine/cf_engine.h +1 -1
  134. data/src/core/lib/event_engine/event_engine.cc +0 -12
  135. data/src/core/lib/event_engine/forkable.cc +47 -42
  136. data/src/core/lib/event_engine/handle_containers.h +0 -4
  137. data/src/core/lib/event_engine/posix_engine/ev_epoll1_linux.cc +4 -6
  138. data/src/core/lib/event_engine/posix_engine/ev_poll_posix.cc +4 -6
  139. data/src/core/lib/event_engine/posix_engine/posix_endpoint.cc +25 -11
  140. data/src/core/lib/event_engine/posix_engine/posix_endpoint.h +3 -1
  141. data/src/core/lib/event_engine/posix_engine/posix_engine.cc +2 -1
  142. data/src/core/lib/event_engine/posix_engine/posix_engine.h +8 -12
  143. data/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc +37 -27
  144. data/src/core/lib/event_engine/posix_engine/posix_engine_listener.h +2 -0
  145. data/src/core/lib/event_engine/posix_engine/posix_engine_listener_utils.cc +4 -2
  146. data/src/core/lib/event_engine/posix_engine/tcp_socket_utils.cc +42 -2
  147. data/src/core/lib/event_engine/posix_engine/tcp_socket_utils.h +6 -0
  148. data/src/core/lib/event_engine/posix_engine/timer.h +10 -37
  149. data/src/core/lib/event_engine/tcp_socket_utils.cc +67 -7
  150. data/src/core/lib/event_engine/tcp_socket_utils.h +3 -0
  151. data/src/core/lib/event_engine/thread_pool/work_stealing_thread_pool.cc +90 -37
  152. data/src/core/lib/event_engine/thread_pool/work_stealing_thread_pool.h +32 -12
  153. data/src/core/lib/event_engine/thready_event_engine/thready_event_engine.cc +12 -21
  154. data/src/core/lib/event_engine/thready_event_engine/thready_event_engine.h +8 -12
  155. data/src/core/lib/event_engine/windows/windows_endpoint.cc +55 -54
  156. data/src/core/lib/event_engine/windows/windows_endpoint.h +15 -12
  157. data/src/core/lib/event_engine/windows/windows_engine.cc +2 -1
  158. data/src/core/lib/event_engine/windows/windows_engine.h +8 -12
  159. data/src/core/lib/experiments/config.cc +60 -22
  160. data/src/core/lib/experiments/config.h +20 -8
  161. data/src/core/lib/experiments/experiments.cc +278 -0
  162. data/src/core/lib/experiments/experiments.h +59 -1
  163. data/src/core/lib/gprpp/dual_ref_counted.h +9 -9
  164. data/src/core/lib/gprpp/fork.cc +8 -9
  165. data/src/core/lib/gprpp/fork.h +6 -5
  166. data/src/core/lib/gprpp/if_list.h +4530 -0
  167. data/src/core/lib/gprpp/orphanable.h +3 -3
  168. data/src/core/lib/gprpp/ref_counted.h +6 -6
  169. data/src/core/lib/gprpp/sorted_pack.h +3 -12
  170. data/src/core/lib/gprpp/status_helper.h +16 -15
  171. data/src/core/lib/gprpp/time.h +12 -0
  172. data/src/core/lib/gprpp/type_list.h +32 -0
  173. data/src/core/lib/http/httpcli.h +6 -9
  174. data/src/core/lib/iomgr/error.cc +32 -2
  175. data/src/core/lib/iomgr/error.h +9 -10
  176. data/src/core/lib/iomgr/ev_epoll1_linux.cc +5 -7
  177. data/src/core/lib/iomgr/ev_poll_posix.cc +6 -5
  178. data/src/core/lib/iomgr/exec_ctx.h +11 -0
  179. data/src/core/lib/iomgr/pollset.h +4 -5
  180. data/src/core/lib/iomgr/port.h +10 -0
  181. data/src/core/lib/iomgr/resolve_address.cc +13 -1
  182. data/src/core/lib/iomgr/resolve_address.h +17 -3
  183. data/src/core/lib/iomgr/sockaddr_posix.h +7 -0
  184. data/src/core/lib/iomgr/socket_utils_common_posix.cc +29 -0
  185. data/src/core/lib/iomgr/socket_utils_posix.cc +2 -0
  186. data/src/core/lib/iomgr/socket_utils_posix.h +6 -0
  187. data/src/core/lib/iomgr/tcp_client_posix.cc +4 -1
  188. data/src/core/lib/iomgr/tcp_posix.cc +21 -4
  189. data/src/core/lib/iomgr/tcp_server_posix.cc +3 -2
  190. data/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +5 -2
  191. data/src/core/lib/iomgr/tcp_windows.cc +1 -3
  192. data/src/core/lib/iomgr/vsock.cc +59 -0
  193. data/src/core/lib/iomgr/vsock.h +38 -0
  194. data/src/core/lib/iomgr/wakeup_fd_posix.h +3 -6
  195. data/src/core/lib/load_balancing/delegating_helper.h +115 -0
  196. data/src/core/lib/load_balancing/lb_policy.h +20 -0
  197. data/src/core/lib/load_balancing/subchannel_interface.h +6 -0
  198. data/src/core/lib/promise/party.h +1 -1
  199. data/src/core/lib/resolver/resolver_factory.h +3 -2
  200. data/src/core/lib/resolver/server_address.cc +9 -94
  201. data/src/core/lib/resolver/server_address.h +10 -64
  202. data/src/core/lib/resource_quota/memory_quota.h +1 -1
  203. data/src/core/lib/security/credentials/channel_creds_registry.h +51 -27
  204. data/src/core/lib/security/credentials/channel_creds_registry_init.cc +169 -9
  205. data/src/core/lib/security/credentials/composite/composite_credentials.cc +1 -1
  206. data/src/core/lib/security/credentials/composite/composite_credentials.h +3 -1
  207. data/src/core/lib/security/credentials/external/external_account_credentials.cc +40 -1
  208. data/src/core/lib/security/credentials/external/external_account_credentials.h +6 -0
  209. data/src/core/lib/security/credentials/fake/fake_credentials.cc +30 -38
  210. data/src/core/lib/security/credentials/fake/fake_credentials.h +28 -0
  211. data/src/core/lib/security/credentials/tls/tls_credentials.cc +1 -1
  212. data/src/core/lib/security/credentials/tls/tls_credentials.h +3 -1
  213. data/src/core/lib/service_config/service_config_call_data.h +5 -0
  214. data/src/core/lib/slice/slice.h +16 -0
  215. data/src/core/lib/surface/call.cc +31 -29
  216. data/src/core/lib/surface/server.h +2 -2
  217. data/src/core/lib/surface/version.cc +2 -2
  218. data/src/core/lib/transport/metadata_batch.cc +7 -7
  219. data/src/core/lib/transport/metadata_batch.h +86 -48
  220. data/src/core/lib/transport/parsed_metadata.h +34 -20
  221. data/src/core/lib/transport/simple_slice_based_metadata.h +9 -2
  222. data/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_record_protocol_common.cc +4 -6
  223. data/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc +1 -2
  224. data/src/core/tsi/ssl_transport_security.cc +28 -7
  225. data/src/ruby/bin/math_pb.rb +24 -18
  226. data/src/ruby/ext/grpc/extconf.rb +19 -18
  227. data/src/ruby/ext/grpc/rb_call.c +62 -39
  228. data/src/ruby/ext/grpc/rb_call_credentials.c +0 -1
  229. data/src/ruby/ext/grpc/rb_channel.c +126 -49
  230. data/src/ruby/ext/grpc/rb_channel.h +1 -0
  231. data/src/ruby/ext/grpc/rb_channel_args.c +16 -2
  232. data/src/ruby/ext/grpc/rb_channel_args.h +4 -0
  233. data/src/ruby/ext/grpc/rb_channel_credentials.c +0 -1
  234. data/src/ruby/ext/grpc/rb_compression_options.c +0 -1
  235. data/src/ruby/ext/grpc/rb_event_thread.c +22 -6
  236. data/src/ruby/ext/grpc/rb_event_thread.h +1 -0
  237. data/src/ruby/ext/grpc/rb_grpc.c +192 -30
  238. data/src/ruby/ext/grpc/rb_grpc.h +8 -2
  239. data/src/ruby/ext/grpc/rb_server.c +62 -45
  240. data/src/ruby/ext/grpc/rb_server_credentials.c +0 -1
  241. data/src/ruby/ext/grpc/rb_xds_channel_credentials.c +0 -1
  242. data/src/ruby/ext/grpc/rb_xds_server_credentials.c +0 -1
  243. data/src/ruby/lib/grpc/generic/bidi_call.rb +2 -0
  244. data/src/ruby/lib/grpc/version.rb +1 -1
  245. data/src/ruby/pb/grpc/health/v1/health_pb.rb +24 -13
  246. data/src/ruby/pb/src/proto/grpc/testing/empty_pb.rb +24 -3
  247. data/src/ruby/pb/src/proto/grpc/testing/messages_pb.rb +25 -111
  248. data/src/ruby/pb/src/proto/grpc/testing/test_pb.rb +25 -2
  249. data/third_party/boringssl-with-bazel/err_data.c +552 -552
  250. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_strnid.c +5 -5
  251. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_time.c +34 -1
  252. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_utctm.c +4 -1
  253. data/third_party/boringssl-with-bazel/src/crypto/bio/bio.c +3 -3
  254. data/third_party/boringssl-with-bazel/src/crypto/bio/bio_mem.c +7 -8
  255. data/third_party/boringssl-with-bazel/src/crypto/bio/connect.c +2 -2
  256. data/third_party/boringssl-with-bazel/src/crypto/bio/fd.c +2 -2
  257. data/third_party/boringssl-with-bazel/src/crypto/bio/file.c +8 -8
  258. data/third_party/boringssl-with-bazel/src/crypto/bio/socket.c +2 -2
  259. data/third_party/boringssl-with-bazel/src/crypto/bio/socket_helper.c +2 -2
  260. data/third_party/boringssl-with-bazel/src/crypto/chacha/chacha.c +19 -1
  261. data/third_party/boringssl-with-bazel/src/crypto/chacha/internal.h +8 -1
  262. data/third_party/boringssl-with-bazel/src/crypto/conf/conf.c +28 -185
  263. data/third_party/boringssl-with-bazel/src/crypto/conf/conf_def.h +3 -7
  264. data/third_party/boringssl-with-bazel/src/crypto/conf/internal.h +8 -0
  265. data/third_party/boringssl-with-bazel/src/crypto/cpu_aarch64_apple.c +3 -0
  266. data/third_party/boringssl-with-bazel/src/crypto/curve25519/curve25519.c +49 -46
  267. data/third_party/boringssl-with-bazel/src/crypto/curve25519/curve25519_64_adx.c +18 -0
  268. data/third_party/boringssl-with-bazel/src/crypto/curve25519/curve25519_tables.h +2809 -7417
  269. data/third_party/boringssl-with-bazel/src/crypto/curve25519/internal.h +27 -5
  270. data/third_party/boringssl-with-bazel/src/crypto/dsa/internal.h +20 -0
  271. data/third_party/boringssl-with-bazel/src/crypto/ec_extra/ec_asn1.c +110 -72
  272. data/third_party/boringssl-with-bazel/src/crypto/ec_extra/ec_derive.c +4 -3
  273. data/third_party/boringssl-with-bazel/src/crypto/ec_extra/hash_to_curve.c +15 -14
  274. data/third_party/boringssl-with-bazel/src/crypto/err/err.c +13 -10
  275. data/third_party/boringssl-with-bazel/src/crypto/evp/evp.c +35 -12
  276. data/third_party/boringssl-with-bazel/src/crypto/evp/p_ec.c +2 -4
  277. data/third_party/boringssl-with-bazel/src/crypto/evp/p_ec_asn1.c +3 -7
  278. data/third_party/boringssl-with-bazel/src/crypto/evp/pbkdf.c +3 -3
  279. data/third_party/boringssl-with-bazel/src/crypto/evp/print.c +7 -6
  280. data/third_party/boringssl-with-bazel/src/crypto/ex_data.c +34 -72
  281. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bcm.c +1 -1
  282. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/div.c +12 -5
  283. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/gcd.c +5 -6
  284. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/internal.h +12 -6
  285. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/montgomery.c +17 -18
  286. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/montgomery_inv.c +51 -15
  287. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/rsaz_exp.c +7 -7
  288. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/delocate.h +5 -6
  289. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/dh/internal.h +2 -0
  290. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/builtin_curves.h +277 -0
  291. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/ec.c +180 -404
  292. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/ec_key.c +3 -3
  293. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/ec_montgomery.c +24 -57
  294. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/felem.c +17 -13
  295. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/internal.h +33 -71
  296. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/oct.c +18 -17
  297. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/p224-64.c +5 -7
  298. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/p256-nistz.c +15 -18
  299. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/p256.c +9 -11
  300. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/scalar.c +24 -24
  301. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/simple.c +11 -27
  302. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/simple_mul.c +8 -8
  303. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/wnaf.c +4 -4
  304. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ecdsa/ecdsa.c +9 -3
  305. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/hkdf/hkdf.c +1 -1
  306. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/fork_detect.c +40 -26
  307. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/internal.h +21 -7
  308. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/rand.c +38 -19
  309. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/urandom.c +2 -29
  310. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rsa/internal.h +55 -0
  311. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rsa/rsa_impl.c +33 -52
  312. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/self_check/self_check.c +3 -8
  313. data/third_party/boringssl-with-bazel/src/crypto/internal.h +198 -79
  314. data/third_party/boringssl-with-bazel/src/crypto/kyber/kyber.c +5 -4
  315. data/third_party/boringssl-with-bazel/src/crypto/mem.c +7 -8
  316. data/third_party/boringssl-with-bazel/src/crypto/obj/obj.c +19 -23
  317. data/third_party/boringssl-with-bazel/src/crypto/pkcs8/internal.h +3 -3
  318. data/third_party/boringssl-with-bazel/src/crypto/pkcs8/p5_pbev2.c +3 -3
  319. data/third_party/boringssl-with-bazel/src/crypto/pkcs8/pkcs8.c +7 -7
  320. data/third_party/boringssl-with-bazel/src/crypto/pkcs8/pkcs8_x509.c +8 -5
  321. data/third_party/boringssl-with-bazel/src/crypto/pool/internal.h +1 -0
  322. data/third_party/boringssl-with-bazel/src/crypto/rand_extra/deterministic.c +7 -6
  323. data/third_party/boringssl-with-bazel/src/crypto/rand_extra/forkunsafe.c +6 -12
  324. data/third_party/boringssl-with-bazel/src/crypto/rand_extra/getentropy.c +48 -0
  325. data/third_party/boringssl-with-bazel/src/crypto/rand_extra/{fuchsia.c → ios.c} +8 -8
  326. data/third_party/boringssl-with-bazel/src/crypto/{refcount_no_threads.c → rand_extra/trusty.c} +15 -19
  327. data/third_party/boringssl-with-bazel/src/crypto/rand_extra/windows.c +41 -19
  328. data/third_party/boringssl-with-bazel/src/crypto/{refcount_c11.c → refcount.c} +11 -17
  329. data/third_party/boringssl-with-bazel/src/crypto/stack/stack.c +147 -72
  330. data/third_party/boringssl-with-bazel/src/crypto/thread_none.c +0 -8
  331. data/third_party/boringssl-with-bazel/src/crypto/thread_pthread.c +6 -35
  332. data/third_party/boringssl-with-bazel/src/crypto/thread_win.c +5 -26
  333. data/third_party/boringssl-with-bazel/src/crypto/trust_token/pmbtoken.c +14 -18
  334. data/third_party/boringssl-with-bazel/src/crypto/trust_token/voprf.c +54 -143
  335. data/third_party/boringssl-with-bazel/src/crypto/x509/by_dir.c +7 -13
  336. data/third_party/boringssl-with-bazel/src/crypto/x509/internal.h +1 -1
  337. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_att.c +1 -1
  338. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_lu.c +2 -4
  339. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_req.c +2 -2
  340. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_trs.c +1 -1
  341. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_v3.c +8 -12
  342. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_vfy.c +19 -20
  343. data/third_party/boringssl-with-bazel/src/crypto/x509/x509name.c +11 -15
  344. data/third_party/boringssl-with-bazel/src/crypto/x509/x_crl.c +5 -5
  345. data/third_party/boringssl-with-bazel/src/crypto/x509/x_name.c +1 -1
  346. data/third_party/boringssl-with-bazel/src/crypto/x509/x_pubkey.c +7 -7
  347. data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_lib.c +2 -3
  348. data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_purp.c +4 -6
  349. data/third_party/boringssl-with-bazel/src/include/openssl/arm_arch.h +4 -119
  350. data/third_party/boringssl-with-bazel/src/include/openssl/asm_base.h +207 -0
  351. data/third_party/boringssl-with-bazel/src/include/openssl/asn1.h +5 -0
  352. data/third_party/boringssl-with-bazel/src/include/openssl/base.h +2 -116
  353. data/third_party/boringssl-with-bazel/src/include/openssl/bn.h +0 -2
  354. data/third_party/boringssl-with-bazel/src/include/openssl/chacha.h +6 -0
  355. data/third_party/boringssl-with-bazel/src/include/openssl/conf.h +5 -1
  356. data/third_party/boringssl-with-bazel/src/include/openssl/dsa.h +0 -21
  357. data/third_party/boringssl-with-bazel/src/include/openssl/ec.h +21 -2
  358. data/third_party/boringssl-with-bazel/src/include/openssl/ec_key.h +19 -6
  359. data/third_party/boringssl-with-bazel/src/include/openssl/evp.h +11 -7
  360. data/third_party/boringssl-with-bazel/src/include/openssl/rand.h +13 -14
  361. data/third_party/boringssl-with-bazel/src/include/openssl/rsa.h +0 -61
  362. data/third_party/boringssl-with-bazel/src/include/openssl/ssl.h +127 -81
  363. data/third_party/boringssl-with-bazel/src/include/openssl/stack.h +224 -209
  364. data/third_party/boringssl-with-bazel/src/include/openssl/target.h +154 -0
  365. data/third_party/boringssl-with-bazel/src/include/openssl/thread.h +1 -29
  366. data/third_party/boringssl-with-bazel/src/include/openssl/x509.h +4 -4
  367. data/third_party/boringssl-with-bazel/src/include/openssl/x509v3.h +3 -2
  368. data/third_party/boringssl-with-bazel/src/ssl/extensions.cc +9 -65
  369. data/third_party/boringssl-with-bazel/src/ssl/handoff.cc +20 -20
  370. data/third_party/boringssl-with-bazel/src/ssl/handshake_server.cc +1 -1
  371. data/third_party/boringssl-with-bazel/src/ssl/internal.h +4 -11
  372. data/third_party/boringssl-with-bazel/src/ssl/ssl_cipher.cc +24 -18
  373. data/third_party/boringssl-with-bazel/src/ssl/ssl_key_share.cc +37 -30
  374. data/third_party/boringssl-with-bazel/src/ssl/ssl_lib.cc +125 -26
  375. data/third_party/boringssl-with-bazel/src/ssl/tls13_client.cc +2 -3
  376. data/third_party/boringssl-with-bazel/src/third_party/fiat/curve25519_64_adx.h +691 -0
  377. data/third_party/upb/upb/collections/map.c +3 -3
  378. metadata +27 -12
  379. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.cc +0 -42
  380. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.h +0 -64
  381. data/src/core/ext/transport/chttp2/transport/stream_map.cc +0 -177
  382. data/src/core/ext/transport/chttp2/transport/stream_map.h +0 -68
  383. data/third_party/boringssl-with-bazel/src/crypto/refcount_win.c +0 -89
@@ -0,0 +1,2052 @@
1
+ // Copyright 2023 gRPC authors.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ #include <grpc/support/port_platform.h>
16
+
17
+ #include "src/core/ext/filters/client_channel/retry_filter_legacy_call_data.h"
18
+
19
+ #include <inttypes.h>
20
+
21
+ #include <memory>
22
+ #include <new>
23
+ #include <string>
24
+
25
+ #include "absl/status/status.h"
26
+ #include "absl/strings/str_cat.h"
27
+
28
+ #include <grpc/support/log.h>
29
+
30
+ #include "src/core/ext/filters/client_channel/client_channel_internal.h"
31
+ #include "src/core/ext/filters/client_channel/retry_service_config.h"
32
+ #include "src/core/ext/filters/client_channel/retry_throttle.h"
33
+ #include "src/core/lib/backoff/backoff.h"
34
+ #include "src/core/lib/channel/channel_stack.h"
35
+ #include "src/core/lib/channel/context.h"
36
+ #include "src/core/lib/channel/status_util.h"
37
+ #include "src/core/lib/debug/trace.h"
38
+ #include "src/core/lib/gpr/useful.h"
39
+ #include "src/core/lib/gprpp/construct_destruct.h"
40
+ #include "src/core/lib/gprpp/debug_location.h"
41
+ #include "src/core/lib/gprpp/orphanable.h"
42
+ #include "src/core/lib/gprpp/ref_counted.h"
43
+ #include "src/core/lib/gprpp/ref_counted_ptr.h"
44
+ #include "src/core/lib/gprpp/status_helper.h"
45
+ #include "src/core/lib/gprpp/time.h"
46
+ #include "src/core/lib/iomgr/call_combiner.h"
47
+ #include "src/core/lib/iomgr/closure.h"
48
+ #include "src/core/lib/iomgr/error.h"
49
+ #include "src/core/lib/iomgr/exec_ctx.h"
50
+ #include "src/core/lib/iomgr/polling_entity.h"
51
+ #include "src/core/lib/resource_quota/arena.h"
52
+ #include "src/core/lib/slice/slice.h"
53
+ #include "src/core/lib/slice/slice_buffer.h"
54
+ #include "src/core/lib/transport/error_utils.h"
55
+ #include "src/core/lib/transport/metadata_batch.h"
56
+ #include "src/core/lib/transport/transport.h"
57
+
58
+ namespace grpc_core {
59
+
60
+ //
61
+ // RetryFilter::LegacyCallData::CallStackDestructionBarrier
62
+ //
63
+
64
+ // A class to track the existence of LoadBalancedCall call stacks that
65
+ // we've created. We wait until all such call stacks have been
66
+ // destroyed before we return the on_call_stack_destruction closure up
67
+ // to the surface.
68
+ //
69
+ // The parent RetryFilter::LegacyCallData object holds a ref to this object.
70
+ // When it is destroyed, it will store the on_call_stack_destruction
71
+ // closure from the surface in this object and then release its ref.
72
+ // We also take a ref to this object for each LB call we create, and
73
+ // those refs are not released until the LB call stack is destroyed.
74
+ // When this object is destroyed, it will invoke the
75
+ // on_call_stack_destruction closure from the surface.
76
+ class RetryFilter::LegacyCallData::CallStackDestructionBarrier
77
+ : public RefCounted<CallStackDestructionBarrier, PolymorphicRefCount,
78
+ UnrefCallDtor> {
79
+ public:
80
+ CallStackDestructionBarrier() {}
81
+
82
+ ~CallStackDestructionBarrier() override {
83
+ // TODO(yashkt) : This can potentially be a Closure::Run
84
+ ExecCtx::Run(DEBUG_LOCATION, on_call_stack_destruction_, absl::OkStatus());
85
+ }
86
+
87
+ // Set the closure from the surface. This closure will be invoked
88
+ // when this object is destroyed.
89
+ void set_on_call_stack_destruction(grpc_closure* on_call_stack_destruction) {
90
+ on_call_stack_destruction_ = on_call_stack_destruction;
91
+ }
92
+
93
+ // Invoked to get an on_call_stack_destruction closure for a new LB call.
94
+ grpc_closure* MakeLbCallDestructionClosure(
95
+ RetryFilter::LegacyCallData* calld) {
96
+ Ref().release(); // Ref held by callback.
97
+ grpc_closure* on_lb_call_destruction_complete =
98
+ calld->arena_->New<grpc_closure>();
99
+ GRPC_CLOSURE_INIT(on_lb_call_destruction_complete,
100
+ OnLbCallDestructionComplete, this, nullptr);
101
+ return on_lb_call_destruction_complete;
102
+ }
103
+
104
+ private:
105
+ static void OnLbCallDestructionComplete(void* arg,
106
+ grpc_error_handle /*error*/) {
107
+ auto* self = static_cast<CallStackDestructionBarrier*>(arg);
108
+ self->Unref();
109
+ }
110
+
111
+ grpc_closure* on_call_stack_destruction_ = nullptr;
112
+ };
113
+
114
+ //
115
+ // RetryFilter::LegacyCallData::CallAttempt
116
+ //
117
+
118
+ RetryFilter::LegacyCallData::CallAttempt::CallAttempt(
119
+ RetryFilter::LegacyCallData* calld, bool is_transparent_retry)
120
+ : RefCounted(GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) ? "CallAttempt"
121
+ : nullptr),
122
+ calld_(calld),
123
+ batch_payload_(calld->call_context_),
124
+ started_send_initial_metadata_(false),
125
+ completed_send_initial_metadata_(false),
126
+ started_send_trailing_metadata_(false),
127
+ completed_send_trailing_metadata_(false),
128
+ started_recv_initial_metadata_(false),
129
+ completed_recv_initial_metadata_(false),
130
+ started_recv_trailing_metadata_(false),
131
+ completed_recv_trailing_metadata_(false),
132
+ sent_cancel_stream_(false),
133
+ seen_recv_trailing_metadata_from_surface_(false),
134
+ abandoned_(false) {
135
+ lb_call_ = calld->CreateLoadBalancedCall(
136
+ [this]() {
137
+ lb_call_committed_ = true;
138
+ if (calld_->retry_committed_) {
139
+ auto* service_config_call_data =
140
+ static_cast<ClientChannelServiceConfigCallData*>(
141
+ calld_->call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA]
142
+ .value);
143
+ service_config_call_data->Commit();
144
+ }
145
+ },
146
+ is_transparent_retry);
147
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
148
+ gpr_log(GPR_INFO,
149
+ "chand=%p calld=%p attempt=%p: created attempt, lb_call=%p",
150
+ calld->chand_, calld, this, lb_call_.get());
151
+ }
152
+ // If per_attempt_recv_timeout is set, start a timer.
153
+ if (calld->retry_policy_ != nullptr &&
154
+ calld->retry_policy_->per_attempt_recv_timeout().has_value()) {
155
+ const Duration per_attempt_recv_timeout =
156
+ *calld->retry_policy_->per_attempt_recv_timeout();
157
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
158
+ gpr_log(GPR_INFO,
159
+ "chand=%p calld=%p attempt=%p: per-attempt timeout in %" PRId64
160
+ " ms",
161
+ calld->chand_, calld, this, per_attempt_recv_timeout.millis());
162
+ }
163
+ // Schedule retry after computed delay.
164
+ GRPC_CALL_STACK_REF(calld->owning_call_, "OnPerAttemptRecvTimer");
165
+ Ref(DEBUG_LOCATION, "OnPerAttemptRecvTimer").release();
166
+ per_attempt_recv_timer_handle_ = calld_->chand_->event_engine()->RunAfter(
167
+ per_attempt_recv_timeout, [this] {
168
+ ApplicationCallbackExecCtx callback_exec_ctx;
169
+ ExecCtx exec_ctx;
170
+ OnPerAttemptRecvTimer();
171
+ });
172
+ }
173
+ }
174
+
175
+ RetryFilter::LegacyCallData::CallAttempt::~CallAttempt() {
176
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
177
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: destroying call attempt",
178
+ calld_->chand_, calld_, this);
179
+ }
180
+ }
181
+
182
+ void RetryFilter::LegacyCallData::CallAttempt::
183
+ FreeCachedSendOpDataAfterCommit() {
184
+ // TODO(roth): When we implement hedging, this logic will need to get
185
+ // a bit more complex, because there may be other (now abandoned) call
186
+ // attempts still using this data. We may need to do some sort of
187
+ // ref-counting instead.
188
+ if (completed_send_initial_metadata_) {
189
+ calld_->FreeCachedSendInitialMetadata();
190
+ }
191
+ for (size_t i = 0; i < completed_send_message_count_; ++i) {
192
+ calld_->FreeCachedSendMessage(i);
193
+ }
194
+ if (completed_send_trailing_metadata_) {
195
+ calld_->FreeCachedSendTrailingMetadata();
196
+ }
197
+ }
198
+
199
+ bool RetryFilter::LegacyCallData::CallAttempt::
200
+ PendingBatchContainsUnstartedSendOps(PendingBatch* pending) {
201
+ if (pending->batch->on_complete == nullptr) return false;
202
+ if (pending->batch->send_initial_metadata &&
203
+ !started_send_initial_metadata_) {
204
+ return true;
205
+ }
206
+ if (pending->batch->send_message &&
207
+ started_send_message_count_ < calld_->send_messages_.size()) {
208
+ return true;
209
+ }
210
+ if (pending->batch->send_trailing_metadata &&
211
+ !started_send_trailing_metadata_) {
212
+ return true;
213
+ }
214
+ return false;
215
+ }
216
+
217
+ bool RetryFilter::LegacyCallData::CallAttempt::HaveSendOpsToReplay() {
218
+ // We don't check send_initial_metadata here, because that op will always
219
+ // be started as soon as it is received from the surface, so it will
220
+ // never need to be started at this point.
221
+ return started_send_message_count_ < calld_->send_messages_.size() ||
222
+ (calld_->seen_send_trailing_metadata_ &&
223
+ !started_send_trailing_metadata_);
224
+ }
225
+
226
+ void RetryFilter::LegacyCallData::CallAttempt::MaybeSwitchToFastPath() {
227
+ // If we're not yet committed, we can't switch yet.
228
+ // TODO(roth): As part of implementing hedging, this logic needs to
229
+ // check that *this* call attempt is the one that we've committed to.
230
+ // Might need to replace abandoned_ with an enum indicating whether we're
231
+ // in flight, abandoned, or the winning call attempt.
232
+ if (!calld_->retry_committed_) return;
233
+ // If we've already switched to fast path, there's nothing to do here.
234
+ if (calld_->committed_call_ != nullptr) return;
235
+ // If the perAttemptRecvTimeout timer is pending, we can't switch yet.
236
+ if (per_attempt_recv_timer_handle_.has_value()) return;
237
+ // If there are still send ops to replay, we can't switch yet.
238
+ if (HaveSendOpsToReplay()) return;
239
+ // If we started an internal batch for recv_trailing_metadata but have not
240
+ // yet seen that op from the surface, we can't switch yet.
241
+ if (recv_trailing_metadata_internal_batch_ != nullptr) return;
242
+ // Switch to fast path.
243
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
244
+ gpr_log(GPR_INFO,
245
+ "chand=%p calld=%p attempt=%p: retry state no longer needed; "
246
+ "moving LB call to parent and unreffing the call attempt",
247
+ calld_->chand_, calld_, this);
248
+ }
249
+ calld_->committed_call_ = std::move(lb_call_);
250
+ calld_->call_attempt_.reset(DEBUG_LOCATION, "MaybeSwitchToFastPath");
251
+ }
252
+
253
+ // If there are any cached send ops that need to be replayed on the
254
+ // current call attempt, creates and returns a new batch to replay those ops.
255
+ // Otherwise, returns nullptr.
256
+ RetryFilter::LegacyCallData::CallAttempt::BatchData*
257
+ RetryFilter::LegacyCallData::CallAttempt::MaybeCreateBatchForReplay() {
258
+ BatchData* replay_batch_data = nullptr;
259
+ // send_initial_metadata.
260
+ if (calld_->seen_send_initial_metadata_ && !started_send_initial_metadata_ &&
261
+ !calld_->pending_send_initial_metadata_) {
262
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
263
+ gpr_log(GPR_INFO,
264
+ "chand=%p calld=%p attempt=%p: replaying previously completed "
265
+ "send_initial_metadata op",
266
+ calld_->chand_, calld_, this);
267
+ }
268
+ replay_batch_data = CreateBatch(1, true /* set_on_complete */);
269
+ replay_batch_data->AddRetriableSendInitialMetadataOp();
270
+ }
271
+ // send_message.
272
+ // Note that we can only have one send_message op in flight at a time.
273
+ if (started_send_message_count_ < calld_->send_messages_.size() &&
274
+ started_send_message_count_ == completed_send_message_count_ &&
275
+ !calld_->pending_send_message_) {
276
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
277
+ gpr_log(GPR_INFO,
278
+ "chand=%p calld=%p attempt=%p: replaying previously completed "
279
+ "send_message op",
280
+ calld_->chand_, calld_, this);
281
+ }
282
+ if (replay_batch_data == nullptr) {
283
+ replay_batch_data = CreateBatch(1, true /* set_on_complete */);
284
+ }
285
+ replay_batch_data->AddRetriableSendMessageOp();
286
+ }
287
+ // send_trailing_metadata.
288
+ // Note that we only add this op if we have no more send_message ops
289
+ // to start, since we can't send down any more send_message ops after
290
+ // send_trailing_metadata.
291
+ if (calld_->seen_send_trailing_metadata_ &&
292
+ started_send_message_count_ == calld_->send_messages_.size() &&
293
+ !started_send_trailing_metadata_ &&
294
+ !calld_->pending_send_trailing_metadata_) {
295
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
296
+ gpr_log(GPR_INFO,
297
+ "chand=%p calld=%p attempt=%p: replaying previously completed "
298
+ "send_trailing_metadata op",
299
+ calld_->chand_, calld_, this);
300
+ }
301
+ if (replay_batch_data == nullptr) {
302
+ replay_batch_data = CreateBatch(1, true /* set_on_complete */);
303
+ }
304
+ replay_batch_data->AddRetriableSendTrailingMetadataOp();
305
+ }
306
+ return replay_batch_data;
307
+ }
308
+
309
+ namespace {
310
+
311
+ void StartBatchInCallCombiner(void* arg, grpc_error_handle /*ignored*/) {
312
+ grpc_transport_stream_op_batch* batch =
313
+ static_cast<grpc_transport_stream_op_batch*>(arg);
314
+ auto* lb_call = static_cast<ClientChannel::FilterBasedLoadBalancedCall*>(
315
+ batch->handler_private.extra_arg);
316
+ // Note: This will release the call combiner.
317
+ lb_call->StartTransportStreamOpBatch(batch);
318
+ }
319
+
320
+ } // namespace
321
+
322
+ void RetryFilter::LegacyCallData::CallAttempt::AddClosureForBatch(
323
+ grpc_transport_stream_op_batch* batch, const char* reason,
324
+ CallCombinerClosureList* closures) {
325
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
326
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: adding batch (%s): %s",
327
+ calld_->chand_, calld_, this, reason,
328
+ grpc_transport_stream_op_batch_string(batch, false).c_str());
329
+ }
330
+ batch->handler_private.extra_arg = lb_call_.get();
331
+ GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner,
332
+ batch, grpc_schedule_on_exec_ctx);
333
+ closures->Add(&batch->handler_private.closure, absl::OkStatus(), reason);
334
+ }
335
+
336
+ void RetryFilter::LegacyCallData::CallAttempt::
337
+ AddBatchForInternalRecvTrailingMetadata(CallCombinerClosureList* closures) {
338
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
339
+ gpr_log(GPR_INFO,
340
+ "chand=%p calld=%p attempt=%p: call failed but "
341
+ "recv_trailing_metadata not started; starting it internally",
342
+ calld_->chand_, calld_, this);
343
+ }
344
+ // Create batch_data with 2 refs, since this batch will be unreffed twice:
345
+ // once for the recv_trailing_metadata_ready callback when the batch
346
+ // completes, and again when we actually get a recv_trailing_metadata
347
+ // op from the surface.
348
+ BatchData* batch_data = CreateBatch(2, false /* set_on_complete */);
349
+ batch_data->AddRetriableRecvTrailingMetadataOp();
350
+ recv_trailing_metadata_internal_batch_.reset(batch_data);
351
+ AddClosureForBatch(batch_data->batch(),
352
+ "starting internal recv_trailing_metadata", closures);
353
+ }
354
+
355
+ void RetryFilter::LegacyCallData::CallAttempt::MaybeAddBatchForCancelOp(
356
+ grpc_error_handle error, CallCombinerClosureList* closures) {
357
+ if (sent_cancel_stream_) {
358
+ return;
359
+ }
360
+ sent_cancel_stream_ = true;
361
+ BatchData* cancel_batch_data = CreateBatch(1, /*set_on_complete=*/true);
362
+ cancel_batch_data->AddCancelStreamOp(error);
363
+ AddClosureForBatch(cancel_batch_data->batch(),
364
+ "start cancellation batch on call attempt", closures);
365
+ }
366
+
367
+ void RetryFilter::LegacyCallData::CallAttempt::AddBatchesForPendingBatches(
368
+ CallCombinerClosureList* closures) {
369
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(calld_->pending_batches_); ++i) {
370
+ PendingBatch* pending = &calld_->pending_batches_[i];
371
+ grpc_transport_stream_op_batch* batch = pending->batch;
372
+ if (batch == nullptr) continue;
373
+ bool has_send_ops = false;
374
+ // Skip any batch that either (a) has already been started on this
375
+ // call attempt or (b) we can't start yet because we're still
376
+ // replaying send ops that need to be completed first.
377
+ // TODO(roth): Note that if any one op in the batch can't be sent
378
+ // yet due to ops that we're replaying, we don't start any of the ops
379
+ // in the batch. This is probably okay, but it could conceivably
380
+ // lead to increased latency in some cases -- e.g., we could delay
381
+ // starting a recv op due to it being in the same batch with a send
382
+ // op. If/when we revamp the callback protocol in
383
+ // transport_stream_op_batch, we may be able to fix this.
384
+ if (batch->send_initial_metadata) {
385
+ if (started_send_initial_metadata_) continue;
386
+ has_send_ops = true;
387
+ }
388
+ if (batch->send_message) {
389
+ // Cases where we can't start this send_message op:
390
+ // - We are currently replaying a previous cached send_message op.
391
+ // - We have already replayed all send_message ops, including this
392
+ // one. (This can happen if a send_message op is in the same
393
+ // batch as a recv op, the send_message op has already completed
394
+ // but the recv op hasn't, and then a subsequent batch with another
395
+ // recv op is started from the surface.)
396
+ if (completed_send_message_count_ < started_send_message_count_ ||
397
+ completed_send_message_count_ ==
398
+ (calld_->send_messages_.size() + !pending->send_ops_cached)) {
399
+ continue;
400
+ }
401
+ has_send_ops = true;
402
+ }
403
+ // Note that we only start send_trailing_metadata if we have no more
404
+ // send_message ops to start, since we can't send down any more
405
+ // send_message ops after send_trailing_metadata.
406
+ if (batch->send_trailing_metadata) {
407
+ if (started_send_message_count_ + batch->send_message <
408
+ calld_->send_messages_.size() ||
409
+ started_send_trailing_metadata_) {
410
+ continue;
411
+ }
412
+ has_send_ops = true;
413
+ }
414
+ int num_callbacks = has_send_ops; // All send ops share one callback.
415
+ if (batch->recv_initial_metadata) {
416
+ if (started_recv_initial_metadata_) continue;
417
+ ++num_callbacks;
418
+ }
419
+ if (batch->recv_message) {
420
+ // Skip if the op is already in flight, or if it has already completed
421
+ // but the completion has not yet been sent to the surface.
422
+ if (completed_recv_message_count_ < started_recv_message_count_ ||
423
+ recv_message_ready_deferred_batch_ != nullptr) {
424
+ continue;
425
+ }
426
+ ++num_callbacks;
427
+ }
428
+ if (batch->recv_trailing_metadata) {
429
+ if (started_recv_trailing_metadata_) {
430
+ seen_recv_trailing_metadata_from_surface_ = true;
431
+ // If we previously completed a recv_trailing_metadata op
432
+ // initiated by AddBatchForInternalRecvTrailingMetadata(), use the
433
+ // result of that instead of trying to re-start this op.
434
+ if (GPR_UNLIKELY(recv_trailing_metadata_internal_batch_ != nullptr)) {
435
+ // If the batch completed, then trigger the completion callback
436
+ // directly, so that we return the previously returned results to
437
+ // the application. Otherwise, just unref the internally started
438
+ // batch, since we'll propagate the completion when it completes.
439
+ if (completed_recv_trailing_metadata_) {
440
+ closures->Add(
441
+ &recv_trailing_metadata_ready_, recv_trailing_metadata_error_,
442
+ "re-executing recv_trailing_metadata_ready to propagate "
443
+ "internally triggered result");
444
+ // Ref will be released by callback.
445
+ recv_trailing_metadata_internal_batch_.release();
446
+ } else {
447
+ recv_trailing_metadata_internal_batch_.reset(
448
+ DEBUG_LOCATION,
449
+ "internally started recv_trailing_metadata batch pending and "
450
+ "recv_trailing_metadata started from surface");
451
+ }
452
+ recv_trailing_metadata_error_ = absl::OkStatus();
453
+ }
454
+ // We don't want the fact that we've already started this op internally
455
+ // to prevent us from adding a batch that may contain other ops.
456
+ // Instead, we'll just skip adding this op below.
457
+ if (num_callbacks == 0) continue;
458
+ } else {
459
+ ++num_callbacks;
460
+ }
461
+ }
462
+ // If we're already committed and the following conditions are met,
463
+ // just send the batch down as-is:
464
+ // - The batch contains no cached send ops. (If it does, we need
465
+ // the logic below to use the cached payloads.)
466
+ // - The batch does not contain recv_trailing_metadata when we have
467
+ // already started an internal recv_trailing_metadata batch. (If
468
+ // we've already started an internal recv_trailing_metadata batch,
469
+ // then we need the logic below to send all ops in the batch
470
+ // *except* the recv_trailing_metadata op.)
471
+ if (calld_->retry_committed_ && !pending->send_ops_cached &&
472
+ (!batch->recv_trailing_metadata || !started_recv_trailing_metadata_)) {
473
+ AddClosureForBatch(
474
+ batch,
475
+ "start non-replayable pending batch on call attempt after commit",
476
+ closures);
477
+ calld_->PendingBatchClear(pending);
478
+ continue;
479
+ }
480
+ // Create batch with the right number of callbacks.
481
+ BatchData* batch_data =
482
+ CreateBatch(num_callbacks, has_send_ops /* set_on_complete */);
483
+ // Cache send ops if needed.
484
+ calld_->MaybeCacheSendOpsForBatch(pending);
485
+ // send_initial_metadata.
486
+ if (batch->send_initial_metadata) {
487
+ batch_data->AddRetriableSendInitialMetadataOp();
488
+ }
489
+ // send_message.
490
+ if (batch->send_message) {
491
+ batch_data->AddRetriableSendMessageOp();
492
+ }
493
+ // send_trailing_metadata.
494
+ if (batch->send_trailing_metadata) {
495
+ batch_data->AddRetriableSendTrailingMetadataOp();
496
+ }
497
+ // recv_initial_metadata.
498
+ if (batch->recv_initial_metadata) {
499
+ batch_data->AddRetriableRecvInitialMetadataOp();
500
+ }
501
+ // recv_message.
502
+ if (batch->recv_message) {
503
+ batch_data->AddRetriableRecvMessageOp();
504
+ }
505
+ // recv_trailing_metadata.
506
+ if (batch->recv_trailing_metadata && !started_recv_trailing_metadata_) {
507
+ batch_data->AddRetriableRecvTrailingMetadataOp();
508
+ }
509
+ AddClosureForBatch(batch_data->batch(),
510
+ "start replayable pending batch on call attempt",
511
+ closures);
512
+ }
513
+ }
514
+
515
+ void RetryFilter::LegacyCallData::CallAttempt::AddRetriableBatches(
516
+ CallCombinerClosureList* closures) {
517
+ // Replay previously-returned send_* ops if needed.
518
+ BatchData* replay_batch_data = MaybeCreateBatchForReplay();
519
+ if (replay_batch_data != nullptr) {
520
+ AddClosureForBatch(replay_batch_data->batch(),
521
+ "start replay batch on call attempt", closures);
522
+ }
523
+ // Now add pending batches.
524
+ AddBatchesForPendingBatches(closures);
525
+ }
526
+
527
+ void RetryFilter::LegacyCallData::CallAttempt::StartRetriableBatches() {
528
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
529
+ gpr_log(GPR_INFO,
530
+ "chand=%p calld=%p attempt=%p: constructing retriable batches",
531
+ calld_->chand_, calld_, this);
532
+ }
533
+ // Construct list of closures to execute, one for each pending batch.
534
+ CallCombinerClosureList closures;
535
+ AddRetriableBatches(&closures);
536
+ // Note: This will yield the call combiner.
537
+ // Start batches on LB call.
538
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
539
+ gpr_log(GPR_INFO,
540
+ "chand=%p calld=%p attempt=%p: starting %" PRIuPTR
541
+ " retriable batches on lb_call=%p",
542
+ calld_->chand_, calld_, this, closures.size(), lb_call_.get());
543
+ }
544
+ closures.RunClosures(calld_->call_combiner_);
545
+ }
546
+
547
+ void RetryFilter::LegacyCallData::CallAttempt::CancelFromSurface(
548
+ grpc_transport_stream_op_batch* cancel_batch) {
549
+ MaybeCancelPerAttemptRecvTimer();
550
+ Abandon();
551
+ // Propagate cancellation to LB call.
552
+ lb_call_->StartTransportStreamOpBatch(cancel_batch);
553
+ }
554
+
555
+ bool RetryFilter::LegacyCallData::CallAttempt::ShouldRetry(
556
+ absl::optional<grpc_status_code> status,
557
+ absl::optional<Duration> server_pushback) {
558
+ // If no retry policy, don't retry.
559
+ if (calld_->retry_policy_ == nullptr) return false;
560
+ // Check status.
561
+ if (status.has_value()) {
562
+ if (GPR_LIKELY(*status == GRPC_STATUS_OK)) {
563
+ if (calld_->retry_throttle_data_ != nullptr) {
564
+ calld_->retry_throttle_data_->RecordSuccess();
565
+ }
566
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
567
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: call succeeded",
568
+ calld_->chand_, calld_, this);
569
+ }
570
+ return false;
571
+ }
572
+ // Status is not OK. Check whether the status is retryable.
573
+ if (!calld_->retry_policy_->retryable_status_codes().Contains(*status)) {
574
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
575
+ gpr_log(GPR_INFO,
576
+ "chand=%p calld=%p attempt=%p: status %s not configured as "
577
+ "retryable",
578
+ calld_->chand_, calld_, this,
579
+ grpc_status_code_to_string(*status));
580
+ }
581
+ return false;
582
+ }
583
+ }
584
+ // Record the failure and check whether retries are throttled.
585
+ // Note that it's important for this check to come after the status
586
+ // code check above, since we should only record failures whose statuses
587
+ // match the configured retryable status codes, so that we don't count
588
+ // things like failures due to malformed requests (INVALID_ARGUMENT).
589
+ // Conversely, it's important for this to come before the remaining
590
+ // checks, so that we don't fail to record failures due to other factors.
591
+ if (calld_->retry_throttle_data_ != nullptr &&
592
+ !calld_->retry_throttle_data_->RecordFailure()) {
593
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
594
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: retries throttled",
595
+ calld_->chand_, calld_, this);
596
+ }
597
+ return false;
598
+ }
599
+ // Check whether the call is committed.
600
+ if (calld_->retry_committed_) {
601
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
602
+ gpr_log(GPR_INFO,
603
+ "chand=%p calld=%p attempt=%p: retries already committed",
604
+ calld_->chand_, calld_, this);
605
+ }
606
+ return false;
607
+ }
608
+ // Check whether we have retries remaining.
609
+ ++calld_->num_attempts_completed_;
610
+ if (calld_->num_attempts_completed_ >=
611
+ calld_->retry_policy_->max_attempts()) {
612
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
613
+ gpr_log(
614
+ GPR_INFO, "chand=%p calld=%p attempt=%p: exceeded %d retry attempts",
615
+ calld_->chand_, calld_, this, calld_->retry_policy_->max_attempts());
616
+ }
617
+ return false;
618
+ }
619
+ // Check server push-back.
620
+ if (server_pushback.has_value()) {
621
+ if (*server_pushback < Duration::Zero()) {
622
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
623
+ gpr_log(GPR_INFO,
624
+ "chand=%p calld=%p attempt=%p: not retrying due to server "
625
+ "push-back",
626
+ calld_->chand_, calld_, this);
627
+ }
628
+ return false;
629
+ } else {
630
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
631
+ gpr_log(
632
+ GPR_INFO,
633
+ "chand=%p calld=%p attempt=%p: server push-back: retry in %" PRIu64
634
+ " ms",
635
+ calld_->chand_, calld_, this, server_pushback->millis());
636
+ }
637
+ }
638
+ }
639
+ // We should retry.
640
+ return true;
641
+ }
642
+
643
+ void RetryFilter::LegacyCallData::CallAttempt::Abandon() {
644
+ abandoned_ = true;
645
+ // Unref batches for deferred completion callbacks that will now never
646
+ // be invoked.
647
+ if (started_recv_trailing_metadata_ &&
648
+ !seen_recv_trailing_metadata_from_surface_) {
649
+ recv_trailing_metadata_internal_batch_.reset(
650
+ DEBUG_LOCATION,
651
+ "unref internal recv_trailing_metadata_ready batch; attempt abandoned");
652
+ }
653
+ recv_trailing_metadata_error_ = absl::OkStatus();
654
+ recv_initial_metadata_ready_deferred_batch_.reset(
655
+ DEBUG_LOCATION,
656
+ "unref deferred recv_initial_metadata_ready batch; attempt abandoned");
657
+ recv_initial_metadata_error_ = absl::OkStatus();
658
+ recv_message_ready_deferred_batch_.reset(
659
+ DEBUG_LOCATION,
660
+ "unref deferred recv_message_ready batch; attempt abandoned");
661
+ recv_message_error_ = absl::OkStatus();
662
+ for (auto& on_complete_deferred_batch : on_complete_deferred_batches_) {
663
+ on_complete_deferred_batch.batch.reset(
664
+ DEBUG_LOCATION, "unref deferred on_complete batch; attempt abandoned");
665
+ }
666
+ on_complete_deferred_batches_.clear();
667
+ }
668
+
669
+ void RetryFilter::LegacyCallData::CallAttempt::OnPerAttemptRecvTimer() {
670
+ GRPC_CLOSURE_INIT(&on_per_attempt_recv_timer_, OnPerAttemptRecvTimerLocked,
671
+ this, nullptr);
672
+ GRPC_CALL_COMBINER_START(calld_->call_combiner_, &on_per_attempt_recv_timer_,
673
+ absl::OkStatus(), "per-attempt timer fired");
674
+ }
675
+
676
+ void RetryFilter::LegacyCallData::CallAttempt::OnPerAttemptRecvTimerLocked(
677
+ void* arg, grpc_error_handle error) {
678
+ auto* call_attempt = static_cast<CallAttempt*>(arg);
679
+ auto* calld = call_attempt->calld_;
680
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
681
+ gpr_log(GPR_INFO,
682
+ "chand=%p calld=%p attempt=%p: perAttemptRecvTimeout timer fired: "
683
+ "error=%s, per_attempt_recv_timer_handle_.has_value()=%d",
684
+ calld->chand_, calld, call_attempt, StatusToString(error).c_str(),
685
+ call_attempt->per_attempt_recv_timer_handle_.has_value());
686
+ }
687
+ CallCombinerClosureList closures;
688
+ call_attempt->per_attempt_recv_timer_handle_.reset();
689
+ // Cancel this attempt.
690
+ // TODO(roth): When implementing hedging, we should not cancel the
691
+ // current attempt.
692
+ call_attempt->MaybeAddBatchForCancelOp(
693
+ grpc_error_set_int(
694
+ GRPC_ERROR_CREATE("retry perAttemptRecvTimeout exceeded"),
695
+ StatusIntProperty::kRpcStatus, GRPC_STATUS_CANCELLED),
696
+ &closures);
697
+ // Check whether we should retry.
698
+ if (call_attempt->ShouldRetry(/*status=*/absl::nullopt,
699
+ /*server_pushback_ms=*/absl::nullopt)) {
700
+ // Mark current attempt as abandoned.
701
+ call_attempt->Abandon();
702
+ // We are retrying. Start backoff timer.
703
+ calld->StartRetryTimer(/*server_pushback=*/absl::nullopt);
704
+ } else {
705
+ // Not retrying, so commit the call.
706
+ calld->RetryCommit(call_attempt);
707
+ // If retry state is no longer needed, switch to fast path for
708
+ // subsequent batches.
709
+ call_attempt->MaybeSwitchToFastPath();
710
+ }
711
+ closures.RunClosures(calld->call_combiner_);
712
+ call_attempt->Unref(DEBUG_LOCATION, "OnPerAttemptRecvTimer");
713
+ GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnPerAttemptRecvTimer");
714
+ }
715
+
716
+ void RetryFilter::LegacyCallData::CallAttempt::
717
+ MaybeCancelPerAttemptRecvTimer() {
718
+ if (per_attempt_recv_timer_handle_.has_value()) {
719
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
720
+ gpr_log(GPR_INFO,
721
+ "chand=%p calld=%p attempt=%p: cancelling "
722
+ "perAttemptRecvTimeout timer",
723
+ calld_->chand_, calld_, this);
724
+ }
725
+ if (calld_->chand_->event_engine()->Cancel(
726
+ *per_attempt_recv_timer_handle_)) {
727
+ Unref(DEBUG_LOCATION, "OnPerAttemptRecvTimer");
728
+ GRPC_CALL_STACK_UNREF(calld_->owning_call_, "OnPerAttemptRecvTimer");
729
+ }
730
+ per_attempt_recv_timer_handle_.reset();
731
+ }
732
+ }
733
+
734
+ //
735
+ // RetryFilter::LegacyCallData::CallAttempt::BatchData
736
+ //
737
+
738
+ RetryFilter::LegacyCallData::CallAttempt::BatchData::BatchData(
739
+ RefCountedPtr<CallAttempt> attempt, int refcount, bool set_on_complete)
740
+ : RefCounted(
741
+ GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) ? "BatchData" : nullptr,
742
+ refcount),
743
+ call_attempt_(attempt.release()) {
744
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
745
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: creating batch %p",
746
+ call_attempt_->calld_->chand_, call_attempt_->calld_, call_attempt_,
747
+ this);
748
+ }
749
+ // We hold a ref to the call stack for every batch sent on a call attempt.
750
+ // This is because some batches on the call attempt may not complete
751
+ // until after all of the batches are completed at the surface (because
752
+ // each batch that is pending at the surface holds a ref). This
753
+ // can happen for replayed send ops, and it can happen for
754
+ // recv_initial_metadata and recv_message ops on a call attempt that has
755
+ // been abandoned.
756
+ GRPC_CALL_STACK_REF(call_attempt_->calld_->owning_call_, "Retry BatchData");
757
+ batch_.payload = &call_attempt_->batch_payload_;
758
+ if (set_on_complete) {
759
+ GRPC_CLOSURE_INIT(&on_complete_, OnComplete, this, nullptr);
760
+ batch_.on_complete = &on_complete_;
761
+ }
762
+ }
763
+
764
+ RetryFilter::LegacyCallData::CallAttempt::BatchData::~BatchData() {
765
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
766
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: destroying batch %p",
767
+ call_attempt_->calld_->chand_, call_attempt_->calld_, call_attempt_,
768
+ this);
769
+ }
770
+ CallAttempt* call_attempt = std::exchange(call_attempt_, nullptr);
771
+ grpc_call_stack* owning_call = call_attempt->calld_->owning_call_;
772
+ call_attempt->Unref(DEBUG_LOCATION, "~BatchData");
773
+ GRPC_CALL_STACK_UNREF(owning_call, "Retry BatchData");
774
+ }
775
+
776
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
777
+ FreeCachedSendOpDataForCompletedBatch() {
778
+ auto* calld = call_attempt_->calld_;
779
+ // TODO(roth): When we implement hedging, this logic will need to get
780
+ // a bit more complex, because there may be other (now abandoned) call
781
+ // attempts still using this data. We may need to do some sort of
782
+ // ref-counting instead.
783
+ if (batch_.send_initial_metadata) {
784
+ calld->FreeCachedSendInitialMetadata();
785
+ }
786
+ if (batch_.send_message) {
787
+ calld->FreeCachedSendMessage(call_attempt_->completed_send_message_count_ -
788
+ 1);
789
+ }
790
+ if (batch_.send_trailing_metadata) {
791
+ calld->FreeCachedSendTrailingMetadata();
792
+ }
793
+ }
794
+
795
+ //
796
+ // recv_initial_metadata callback handling
797
+ //
798
+
799
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
800
+ MaybeAddClosureForRecvInitialMetadataCallback(
801
+ grpc_error_handle error, CallCombinerClosureList* closures) {
802
+ // Find pending batch.
803
+ PendingBatch* pending = call_attempt_->calld_->PendingBatchFind(
804
+ "invoking recv_initial_metadata_ready for",
805
+ [](grpc_transport_stream_op_batch* batch) {
806
+ return batch->recv_initial_metadata &&
807
+ batch->payload->recv_initial_metadata
808
+ .recv_initial_metadata_ready != nullptr;
809
+ });
810
+ if (pending == nullptr) {
811
+ return;
812
+ }
813
+ // Return metadata.
814
+ *pending->batch->payload->recv_initial_metadata.recv_initial_metadata =
815
+ std::move(call_attempt_->recv_initial_metadata_);
816
+ // Propagate trailing_metadata_available.
817
+ *pending->batch->payload->recv_initial_metadata.trailing_metadata_available =
818
+ call_attempt_->trailing_metadata_available_;
819
+ // Update bookkeeping.
820
+ // Note: Need to do this before invoking the callback, since invoking
821
+ // the callback will result in yielding the call combiner.
822
+ grpc_closure* recv_initial_metadata_ready =
823
+ pending->batch->payload->recv_initial_metadata
824
+ .recv_initial_metadata_ready;
825
+ pending->batch->payload->recv_initial_metadata.recv_initial_metadata_ready =
826
+ nullptr;
827
+ call_attempt_->calld_->MaybeClearPendingBatch(pending);
828
+ // Add callback to closures.
829
+ closures->Add(recv_initial_metadata_ready, error,
830
+ "recv_initial_metadata_ready for pending batch");
831
+ }
832
+
833
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
834
+ RecvInitialMetadataReady(void* arg, grpc_error_handle error) {
835
+ RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
836
+ CallAttempt* call_attempt = batch_data->call_attempt_;
837
+ RetryFilter::LegacyCallData* calld = call_attempt->calld_;
838
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
839
+ gpr_log(GPR_INFO,
840
+ "chand=%p calld=%p attempt=%p batch_data=%p: "
841
+ "got recv_initial_metadata_ready, error=%s",
842
+ calld->chand_, calld, call_attempt, batch_data.get(),
843
+ StatusToString(error).c_str());
844
+ }
845
+ call_attempt->completed_recv_initial_metadata_ = true;
846
+ // If this attempt has been abandoned, then we're not going to use the
847
+ // result of this recv_initial_metadata op, so do nothing.
848
+ if (call_attempt->abandoned_) {
849
+ GRPC_CALL_COMBINER_STOP(
850
+ calld->call_combiner_,
851
+ "recv_initial_metadata_ready for abandoned attempt");
852
+ return;
853
+ }
854
+ // Cancel per-attempt recv timer, if any.
855
+ call_attempt->MaybeCancelPerAttemptRecvTimer();
856
+ // If we're not committed, check the response to see if we need to commit.
857
+ if (!calld->retry_committed_) {
858
+ // If we got an error or a Trailers-Only response and have not yet gotten
859
+ // the recv_trailing_metadata_ready callback, then defer propagating this
860
+ // callback back to the surface. We can evaluate whether to retry when
861
+ // recv_trailing_metadata comes back.
862
+ if (GPR_UNLIKELY(
863
+ (call_attempt->trailing_metadata_available_ || !error.ok()) &&
864
+ !call_attempt->completed_recv_trailing_metadata_)) {
865
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
866
+ gpr_log(GPR_INFO,
867
+ "chand=%p calld=%p attempt=%p: deferring "
868
+ "recv_initial_metadata_ready (Trailers-Only)",
869
+ calld->chand_, calld, call_attempt);
870
+ }
871
+ call_attempt->recv_initial_metadata_ready_deferred_batch_ =
872
+ std::move(batch_data);
873
+ call_attempt->recv_initial_metadata_error_ = error;
874
+ CallCombinerClosureList closures;
875
+ if (!error.ok()) {
876
+ call_attempt->MaybeAddBatchForCancelOp(error, &closures);
877
+ }
878
+ if (!call_attempt->started_recv_trailing_metadata_) {
879
+ // recv_trailing_metadata not yet started by application; start it
880
+ // ourselves to get status.
881
+ call_attempt->AddBatchForInternalRecvTrailingMetadata(&closures);
882
+ }
883
+ closures.RunClosures(calld->call_combiner_);
884
+ return;
885
+ }
886
+ // Received valid initial metadata, so commit the call.
887
+ calld->RetryCommit(call_attempt);
888
+ // If retry state is no longer needed, switch to fast path for
889
+ // subsequent batches.
890
+ call_attempt->MaybeSwitchToFastPath();
891
+ }
892
+ // Invoke the callback to return the result to the surface.
893
+ CallCombinerClosureList closures;
894
+ batch_data->MaybeAddClosureForRecvInitialMetadataCallback(error, &closures);
895
+ closures.RunClosures(calld->call_combiner_);
896
+ }
897
+
898
+ //
899
+ // recv_message callback handling
900
+ //
901
+
902
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
903
+ MaybeAddClosureForRecvMessageCallback(grpc_error_handle error,
904
+ CallCombinerClosureList* closures) {
905
+ // Find pending op.
906
+ PendingBatch* pending = call_attempt_->calld_->PendingBatchFind(
907
+ "invoking recv_message_ready for",
908
+ [](grpc_transport_stream_op_batch* batch) {
909
+ return batch->recv_message &&
910
+ batch->payload->recv_message.recv_message_ready != nullptr;
911
+ });
912
+ if (pending == nullptr) {
913
+ return;
914
+ }
915
+ // Return payload.
916
+ *pending->batch->payload->recv_message.recv_message =
917
+ std::move(call_attempt_->recv_message_);
918
+ *pending->batch->payload->recv_message.flags =
919
+ call_attempt_->recv_message_flags_;
920
+ // Update bookkeeping.
921
+ // Note: Need to do this before invoking the callback, since invoking
922
+ // the callback will result in yielding the call combiner.
923
+ grpc_closure* recv_message_ready =
924
+ pending->batch->payload->recv_message.recv_message_ready;
925
+ pending->batch->payload->recv_message.recv_message_ready = nullptr;
926
+ call_attempt_->calld_->MaybeClearPendingBatch(pending);
927
+ // Add callback to closures.
928
+ closures->Add(recv_message_ready, error,
929
+ "recv_message_ready for pending batch");
930
+ }
931
+
932
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::RecvMessageReady(
933
+ void* arg, grpc_error_handle error) {
934
+ RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
935
+ CallAttempt* call_attempt = batch_data->call_attempt_;
936
+ RetryFilter::LegacyCallData* calld = call_attempt->calld_;
937
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
938
+ gpr_log(GPR_INFO,
939
+ "chand=%p calld=%p attempt=%p batch_data=%p: "
940
+ "got recv_message_ready, error=%s",
941
+ calld->chand_, calld, call_attempt, batch_data.get(),
942
+ StatusToString(error).c_str());
943
+ }
944
+ ++call_attempt->completed_recv_message_count_;
945
+ // If this attempt has been abandoned, then we're not going to use the
946
+ // result of this recv_message op, so do nothing.
947
+ if (call_attempt->abandoned_) {
948
+ // The transport will not invoke recv_trailing_metadata_ready until the byte
949
+ // stream for any recv_message op is orphaned, so we do that here to ensure
950
+ // that any pending recv_trailing_metadata op can complete.
951
+ call_attempt->recv_message_.reset();
952
+ GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
953
+ "recv_message_ready for abandoned attempt");
954
+ return;
955
+ }
956
+ // Cancel per-attempt recv timer, if any.
957
+ call_attempt->MaybeCancelPerAttemptRecvTimer();
958
+ // If we're not committed, check the response to see if we need to commit.
959
+ if (!calld->retry_committed_) {
960
+ // If we got an error or the payload was nullptr and we have not yet gotten
961
+ // the recv_trailing_metadata_ready callback, then defer propagating this
962
+ // callback back to the surface. We can evaluate whether to retry when
963
+ // recv_trailing_metadata comes back.
964
+ if (GPR_UNLIKELY(
965
+ (!call_attempt->recv_message_.has_value() || !error.ok()) &&
966
+ !call_attempt->completed_recv_trailing_metadata_)) {
967
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
968
+ gpr_log(GPR_INFO,
969
+ "chand=%p calld=%p attempt=%p: deferring recv_message_ready "
970
+ "(nullptr message and recv_trailing_metadata pending)",
971
+ calld->chand_, calld, call_attempt);
972
+ }
973
+ call_attempt->recv_message_ready_deferred_batch_ = std::move(batch_data);
974
+ call_attempt->recv_message_error_ = error;
975
+ CallCombinerClosureList closures;
976
+ if (!error.ok()) {
977
+ call_attempt->MaybeAddBatchForCancelOp(error, &closures);
978
+ }
979
+ if (!call_attempt->started_recv_trailing_metadata_) {
980
+ // recv_trailing_metadata not yet started by application; start it
981
+ // ourselves to get status.
982
+ call_attempt->AddBatchForInternalRecvTrailingMetadata(&closures);
983
+ }
984
+ closures.RunClosures(calld->call_combiner_);
985
+ return;
986
+ }
987
+ // Received a valid message, so commit the call.
988
+ calld->RetryCommit(call_attempt);
989
+ // If retry state is no longer needed, switch to fast path for
990
+ // subsequent batches.
991
+ call_attempt->MaybeSwitchToFastPath();
992
+ }
993
+ // Invoke the callback to return the result to the surface.
994
+ CallCombinerClosureList closures;
995
+ batch_data->MaybeAddClosureForRecvMessageCallback(error, &closures);
996
+ closures.RunClosures(calld->call_combiner_);
997
+ }
998
+
999
+ //
1000
+ // recv_trailing_metadata handling
1001
+ //
1002
+
1003
+ namespace {
1004
+
1005
+ // Sets *status, *server_pushback, and *is_lb_drop based on md_batch
1006
+ // and error.
1007
+ void GetCallStatus(
1008
+ Timestamp deadline, grpc_metadata_batch* md_batch, grpc_error_handle error,
1009
+ grpc_status_code* status, absl::optional<Duration>* server_pushback,
1010
+ bool* is_lb_drop,
1011
+ absl::optional<GrpcStreamNetworkState::ValueType>* stream_network_state) {
1012
+ if (!error.ok()) {
1013
+ grpc_error_get_status(error, deadline, status, nullptr, nullptr, nullptr);
1014
+ intptr_t value = 0;
1015
+ if (grpc_error_get_int(error, StatusIntProperty::kLbPolicyDrop, &value) &&
1016
+ value != 0) {
1017
+ *is_lb_drop = true;
1018
+ }
1019
+ } else {
1020
+ *status = *md_batch->get(GrpcStatusMetadata());
1021
+ }
1022
+ *server_pushback = md_batch->get(GrpcRetryPushbackMsMetadata());
1023
+ *stream_network_state = md_batch->get(GrpcStreamNetworkState());
1024
+ }
1025
+
1026
+ } // namespace
1027
+
1028
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1029
+ MaybeAddClosureForRecvTrailingMetadataReady(
1030
+ grpc_error_handle error, CallCombinerClosureList* closures) {
1031
+ auto* calld = call_attempt_->calld_;
1032
+ // Find pending batch.
1033
+ PendingBatch* pending = calld->PendingBatchFind(
1034
+ "invoking recv_trailing_metadata_ready for",
1035
+ [](grpc_transport_stream_op_batch* batch) {
1036
+ return batch->recv_trailing_metadata &&
1037
+ batch->payload->recv_trailing_metadata
1038
+ .recv_trailing_metadata_ready != nullptr;
1039
+ });
1040
+ // If we generated the recv_trailing_metadata op internally via
1041
+ // AddBatchForInternalRecvTrailingMetadata(), then there will be no
1042
+ // pending batch.
1043
+ if (pending == nullptr) {
1044
+ call_attempt_->recv_trailing_metadata_error_ = error;
1045
+ return;
1046
+ }
1047
+ // Copy transport stats to be delivered up to the surface.
1048
+ grpc_transport_move_stats(
1049
+ &call_attempt_->collect_stats_,
1050
+ pending->batch->payload->recv_trailing_metadata.collect_stats);
1051
+ // Return metadata.
1052
+ *pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata =
1053
+ std::move(call_attempt_->recv_trailing_metadata_);
1054
+ // Add closure.
1055
+ closures->Add(pending->batch->payload->recv_trailing_metadata
1056
+ .recv_trailing_metadata_ready,
1057
+ error, "recv_trailing_metadata_ready for pending batch");
1058
+ // Update bookkeeping.
1059
+ pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready =
1060
+ nullptr;
1061
+ calld->MaybeClearPendingBatch(pending);
1062
+ }
1063
+
1064
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1065
+ AddClosuresForDeferredCompletionCallbacks(
1066
+ CallCombinerClosureList* closures) {
1067
+ // Add closure for deferred recv_initial_metadata_ready.
1068
+ if (GPR_UNLIKELY(call_attempt_->recv_initial_metadata_ready_deferred_batch_ !=
1069
+ nullptr)) {
1070
+ MaybeAddClosureForRecvInitialMetadataCallback(
1071
+ call_attempt_->recv_initial_metadata_error_, closures);
1072
+ call_attempt_->recv_initial_metadata_ready_deferred_batch_.reset(
1073
+ DEBUG_LOCATION, "resuming deferred recv_initial_metadata_ready");
1074
+ call_attempt_->recv_initial_metadata_error_ = absl::OkStatus();
1075
+ }
1076
+ // Add closure for deferred recv_message_ready.
1077
+ if (GPR_UNLIKELY(call_attempt_->recv_message_ready_deferred_batch_ !=
1078
+ nullptr)) {
1079
+ MaybeAddClosureForRecvMessageCallback(call_attempt_->recv_message_error_,
1080
+ closures);
1081
+ call_attempt_->recv_message_ready_deferred_batch_.reset(
1082
+ DEBUG_LOCATION, "resuming deferred recv_message_ready");
1083
+ call_attempt_->recv_message_error_ = absl::OkStatus();
1084
+ }
1085
+ // Add closures for deferred on_complete callbacks.
1086
+ for (auto& on_complete_deferred_batch :
1087
+ call_attempt_->on_complete_deferred_batches_) {
1088
+ closures->Add(&on_complete_deferred_batch.batch->on_complete_,
1089
+ on_complete_deferred_batch.error, "resuming on_complete");
1090
+ on_complete_deferred_batch.batch.release();
1091
+ }
1092
+ call_attempt_->on_complete_deferred_batches_.clear();
1093
+ }
1094
+
1095
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1096
+ AddClosuresToFailUnstartedPendingBatches(
1097
+ grpc_error_handle error, CallCombinerClosureList* closures) {
1098
+ auto* calld = call_attempt_->calld_;
1099
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches_); ++i) {
1100
+ PendingBatch* pending = &calld->pending_batches_[i];
1101
+ if (pending->batch == nullptr) continue;
1102
+ if (call_attempt_->PendingBatchContainsUnstartedSendOps(pending)) {
1103
+ closures->Add(pending->batch->on_complete, error,
1104
+ "failing on_complete for pending batch");
1105
+ pending->batch->on_complete = nullptr;
1106
+ calld->MaybeClearPendingBatch(pending);
1107
+ }
1108
+ }
1109
+ }
1110
+
1111
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1112
+ RunClosuresForCompletedCall(grpc_error_handle error) {
1113
+ // Construct list of closures to execute.
1114
+ CallCombinerClosureList closures;
1115
+ // First, add closure for recv_trailing_metadata_ready.
1116
+ MaybeAddClosureForRecvTrailingMetadataReady(error, &closures);
1117
+ // If there are deferred batch completion callbacks, add them to closures.
1118
+ AddClosuresForDeferredCompletionCallbacks(&closures);
1119
+ // Add closures to fail any pending batches that have not yet been started.
1120
+ AddClosuresToFailUnstartedPendingBatches(error, &closures);
1121
+ // Schedule all of the closures identified above.
1122
+ // Note: This will release the call combiner.
1123
+ closures.RunClosures(call_attempt_->calld_->call_combiner_);
1124
+ }
1125
+
1126
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1127
+ RecvTrailingMetadataReady(void* arg, grpc_error_handle error) {
1128
+ RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
1129
+ CallAttempt* call_attempt = batch_data->call_attempt_;
1130
+ RetryFilter::LegacyCallData* calld = call_attempt->calld_;
1131
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1132
+ gpr_log(GPR_INFO,
1133
+ "chand=%p calld=%p attempt=%p batch_data=%p: "
1134
+ "got recv_trailing_metadata_ready, error=%s",
1135
+ calld->chand_, calld, call_attempt, batch_data.get(),
1136
+ StatusToString(error).c_str());
1137
+ }
1138
+ call_attempt->completed_recv_trailing_metadata_ = true;
1139
+ // If this attempt has been abandoned, then we're not going to use the
1140
+ // result of this recv_trailing_metadata op, so do nothing.
1141
+ if (call_attempt->abandoned_) {
1142
+ GRPC_CALL_COMBINER_STOP(
1143
+ calld->call_combiner_,
1144
+ "recv_trailing_metadata_ready for abandoned attempt");
1145
+ return;
1146
+ }
1147
+ // Cancel per-attempt recv timer, if any.
1148
+ call_attempt->MaybeCancelPerAttemptRecvTimer();
1149
+ // Get the call's status and check for server pushback metadata.
1150
+ grpc_status_code status = GRPC_STATUS_OK;
1151
+ absl::optional<Duration> server_pushback;
1152
+ bool is_lb_drop = false;
1153
+ absl::optional<GrpcStreamNetworkState::ValueType> stream_network_state;
1154
+ grpc_metadata_batch* md_batch =
1155
+ batch_data->batch_.payload->recv_trailing_metadata.recv_trailing_metadata;
1156
+ GetCallStatus(calld->deadline_, md_batch, error, &status, &server_pushback,
1157
+ &is_lb_drop, &stream_network_state);
1158
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1159
+ gpr_log(GPR_INFO,
1160
+ "chand=%p calld=%p attempt=%p: call finished, status=%s "
1161
+ "server_pushback=%s is_lb_drop=%d stream_network_state=%s",
1162
+ calld->chand_, calld, call_attempt,
1163
+ grpc_status_code_to_string(status),
1164
+ server_pushback.has_value() ? server_pushback->ToString().c_str()
1165
+ : "N/A",
1166
+ is_lb_drop,
1167
+ stream_network_state.has_value()
1168
+ ? absl::StrCat(*stream_network_state).c_str()
1169
+ : "N/A");
1170
+ }
1171
+ // Check if we should retry.
1172
+ if (!is_lb_drop) { // Never retry on LB drops.
1173
+ enum { kNoRetry, kTransparentRetry, kConfigurableRetry } retry = kNoRetry;
1174
+ // Handle transparent retries.
1175
+ if (stream_network_state.has_value() && !calld->retry_committed_) {
1176
+ // If not sent on wire, then always retry.
1177
+ // If sent on wire but not seen by server, retry exactly once.
1178
+ if (*stream_network_state == GrpcStreamNetworkState::kNotSentOnWire) {
1179
+ retry = kTransparentRetry;
1180
+ } else if (*stream_network_state ==
1181
+ GrpcStreamNetworkState::kNotSeenByServer &&
1182
+ !calld->sent_transparent_retry_not_seen_by_server_) {
1183
+ calld->sent_transparent_retry_not_seen_by_server_ = true;
1184
+ retry = kTransparentRetry;
1185
+ }
1186
+ }
1187
+ // If not transparently retrying, check for configurable retry.
1188
+ if (retry == kNoRetry &&
1189
+ call_attempt->ShouldRetry(status, server_pushback)) {
1190
+ retry = kConfigurableRetry;
1191
+ }
1192
+ // If we're retrying, do so.
1193
+ if (retry != kNoRetry) {
1194
+ CallCombinerClosureList closures;
1195
+ // Cancel call attempt.
1196
+ call_attempt->MaybeAddBatchForCancelOp(
1197
+ error.ok() ? grpc_error_set_int(
1198
+ GRPC_ERROR_CREATE("call attempt failed"),
1199
+ StatusIntProperty::kRpcStatus, GRPC_STATUS_CANCELLED)
1200
+ : error,
1201
+ &closures);
1202
+ // For transparent retries, add a closure to immediately start a new
1203
+ // call attempt.
1204
+ // For configurable retries, start retry timer.
1205
+ if (retry == kTransparentRetry) {
1206
+ calld->AddClosureToStartTransparentRetry(&closures);
1207
+ } else {
1208
+ calld->StartRetryTimer(server_pushback);
1209
+ }
1210
+ // Record that this attempt has been abandoned.
1211
+ call_attempt->Abandon();
1212
+ // Yields call combiner.
1213
+ closures.RunClosures(calld->call_combiner_);
1214
+ return;
1215
+ }
1216
+ }
1217
+ // Not retrying, so commit the call.
1218
+ calld->RetryCommit(call_attempt);
1219
+ // If retry state is no longer needed, switch to fast path for
1220
+ // subsequent batches.
1221
+ call_attempt->MaybeSwitchToFastPath();
1222
+ // Run any necessary closures.
1223
+ batch_data->RunClosuresForCompletedCall(error);
1224
+ }
1225
+
1226
+ //
1227
+ // on_complete callback handling
1228
+ //
1229
+
1230
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1231
+ AddClosuresForCompletedPendingBatch(grpc_error_handle error,
1232
+ CallCombinerClosureList* closures) {
1233
+ auto* calld = call_attempt_->calld_;
1234
+ PendingBatch* pending = calld->PendingBatchFind(
1235
+ "completed", [this](grpc_transport_stream_op_batch* batch) {
1236
+ // Match the pending batch with the same set of send ops as the
1237
+ // batch we've just completed.
1238
+ return batch->on_complete != nullptr &&
1239
+ batch_.send_initial_metadata == batch->send_initial_metadata &&
1240
+ batch_.send_message == batch->send_message &&
1241
+ batch_.send_trailing_metadata == batch->send_trailing_metadata;
1242
+ });
1243
+ // If batch_data is a replay batch, then there will be no pending
1244
+ // batch to complete.
1245
+ if (pending == nullptr) {
1246
+ return;
1247
+ }
1248
+ // Propagate payload.
1249
+ if (batch_.send_message) {
1250
+ pending->batch->payload->send_message.stream_write_closed =
1251
+ batch_.payload->send_message.stream_write_closed;
1252
+ }
1253
+ // Add closure.
1254
+ closures->Add(pending->batch->on_complete, error,
1255
+ "on_complete for pending batch");
1256
+ pending->batch->on_complete = nullptr;
1257
+ calld->MaybeClearPendingBatch(pending);
1258
+ }
1259
+
1260
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1261
+ AddClosuresForReplayOrPendingSendOps(CallCombinerClosureList* closures) {
1262
+ auto* calld = call_attempt_->calld_;
1263
+ bool have_pending_send_ops = call_attempt_->HaveSendOpsToReplay();
1264
+ // We don't check send_initial_metadata here, because that op will always
1265
+ // be started as soon as it is received from the surface, so it will
1266
+ // never need to be started at this point.
1267
+ if (!have_pending_send_ops) {
1268
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches_); ++i) {
1269
+ PendingBatch* pending = &calld->pending_batches_[i];
1270
+ grpc_transport_stream_op_batch* batch = pending->batch;
1271
+ if (batch == nullptr || pending->send_ops_cached) continue;
1272
+ if (batch->send_message || batch->send_trailing_metadata) {
1273
+ have_pending_send_ops = true;
1274
+ break;
1275
+ }
1276
+ }
1277
+ }
1278
+ if (have_pending_send_ops) {
1279
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1280
+ gpr_log(GPR_INFO,
1281
+ "chand=%p calld=%p attempt=%p: starting next batch for pending "
1282
+ "send op(s)",
1283
+ calld->chand_, calld, call_attempt_);
1284
+ }
1285
+ call_attempt_->AddRetriableBatches(closures);
1286
+ }
1287
+ }
1288
+
1289
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::OnComplete(
1290
+ void* arg, grpc_error_handle error) {
1291
+ RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
1292
+ CallAttempt* call_attempt = batch_data->call_attempt_;
1293
+ RetryFilter::LegacyCallData* calld = call_attempt->calld_;
1294
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1295
+ gpr_log(GPR_INFO,
1296
+ "chand=%p calld=%p attempt=%p batch_data=%p: "
1297
+ "got on_complete, error=%s, batch=%s",
1298
+ calld->chand_, calld, call_attempt, batch_data.get(),
1299
+ StatusToString(error).c_str(),
1300
+ grpc_transport_stream_op_batch_string(&batch_data->batch_, false)
1301
+ .c_str());
1302
+ }
1303
+ // If this attempt has been abandoned, then we're not going to propagate
1304
+ // the completion of this batch, so do nothing.
1305
+ if (call_attempt->abandoned_) {
1306
+ GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
1307
+ "on_complete for abandoned attempt");
1308
+ return;
1309
+ }
1310
+ // If we got an error and have not yet gotten the
1311
+ // recv_trailing_metadata_ready callback, then defer propagating this
1312
+ // callback back to the surface. We can evaluate whether to retry when
1313
+ // recv_trailing_metadata comes back.
1314
+ if (GPR_UNLIKELY(!calld->retry_committed_ && !error.ok() &&
1315
+ !call_attempt->completed_recv_trailing_metadata_)) {
1316
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1317
+ gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: deferring on_complete",
1318
+ calld->chand_, calld, call_attempt);
1319
+ }
1320
+ call_attempt->on_complete_deferred_batches_.emplace_back(
1321
+ std::move(batch_data), error);
1322
+ CallCombinerClosureList closures;
1323
+ call_attempt->MaybeAddBatchForCancelOp(error, &closures);
1324
+ if (!call_attempt->started_recv_trailing_metadata_) {
1325
+ // recv_trailing_metadata not yet started by application; start it
1326
+ // ourselves to get status.
1327
+ call_attempt->AddBatchForInternalRecvTrailingMetadata(&closures);
1328
+ }
1329
+ closures.RunClosures(calld->call_combiner_);
1330
+ return;
1331
+ }
1332
+ // Update bookkeeping in call_attempt.
1333
+ if (batch_data->batch_.send_initial_metadata) {
1334
+ call_attempt->completed_send_initial_metadata_ = true;
1335
+ }
1336
+ if (batch_data->batch_.send_message) {
1337
+ ++call_attempt->completed_send_message_count_;
1338
+ }
1339
+ if (batch_data->batch_.send_trailing_metadata) {
1340
+ call_attempt->completed_send_trailing_metadata_ = true;
1341
+ }
1342
+ // If the call is committed, free cached data for send ops that we've just
1343
+ // completed.
1344
+ if (calld->retry_committed_) {
1345
+ batch_data->FreeCachedSendOpDataForCompletedBatch();
1346
+ }
1347
+ // Construct list of closures to execute.
1348
+ CallCombinerClosureList closures;
1349
+ // Add closure for the completed pending batch, if any.
1350
+ batch_data->AddClosuresForCompletedPendingBatch(error, &closures);
1351
+ // If needed, add a callback to start any replay or pending send ops on
1352
+ // the LB call.
1353
+ if (!call_attempt->completed_recv_trailing_metadata_) {
1354
+ batch_data->AddClosuresForReplayOrPendingSendOps(&closures);
1355
+ }
1356
+ // If retry state is no longer needed (i.e., we're committed and there
1357
+ // are no more send ops to replay), switch to fast path for subsequent
1358
+ // batches.
1359
+ call_attempt->MaybeSwitchToFastPath();
1360
+ // Schedule all of the closures identified above.
1361
+ // Note: This yields the call combiner.
1362
+ closures.RunClosures(calld->call_combiner_);
1363
+ }
1364
+
1365
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::OnCompleteForCancelOp(
1366
+ void* arg, grpc_error_handle error) {
1367
+ RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
1368
+ CallAttempt* call_attempt = batch_data->call_attempt_;
1369
+ RetryFilter::LegacyCallData* calld = call_attempt->calld_;
1370
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1371
+ gpr_log(GPR_INFO,
1372
+ "chand=%p calld=%p attempt=%p batch_data=%p: "
1373
+ "got on_complete for cancel_stream batch, error=%s, batch=%s",
1374
+ calld->chand_, calld, call_attempt, batch_data.get(),
1375
+ StatusToString(error).c_str(),
1376
+ grpc_transport_stream_op_batch_string(&batch_data->batch_, false)
1377
+ .c_str());
1378
+ }
1379
+ GRPC_CALL_COMBINER_STOP(
1380
+ calld->call_combiner_,
1381
+ "on_complete for internally generated cancel_stream op");
1382
+ }
1383
+
1384
+ //
1385
+ // retriable batch construction
1386
+ //
1387
+
1388
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1389
+ AddRetriableSendInitialMetadataOp() {
1390
+ auto* calld = call_attempt_->calld_;
1391
+ // We need to make a copy of the metadata batch for each attempt, since
1392
+ // the filters in the subchannel stack may modify this batch, and we don't
1393
+ // want those modifications to be passed forward to subsequent attempts.
1394
+ //
1395
+ // If we've already completed one or more attempts, add the
1396
+ // grpc-retry-attempts header.
1397
+ call_attempt_->send_initial_metadata_ = calld->send_initial_metadata_.Copy();
1398
+ if (GPR_UNLIKELY(calld->num_attempts_completed_ > 0)) {
1399
+ call_attempt_->send_initial_metadata_.Set(GrpcPreviousRpcAttemptsMetadata(),
1400
+ calld->num_attempts_completed_);
1401
+ } else {
1402
+ call_attempt_->send_initial_metadata_.Remove(
1403
+ GrpcPreviousRpcAttemptsMetadata());
1404
+ }
1405
+ call_attempt_->started_send_initial_metadata_ = true;
1406
+ batch_.send_initial_metadata = true;
1407
+ batch_.payload->send_initial_metadata.send_initial_metadata =
1408
+ &call_attempt_->send_initial_metadata_;
1409
+ }
1410
+
1411
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1412
+ AddRetriableSendMessageOp() {
1413
+ auto* calld = call_attempt_->calld_;
1414
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1415
+ gpr_log(
1416
+ GPR_INFO,
1417
+ "chand=%p calld=%p attempt=%p: starting calld->send_messages[%" PRIuPTR
1418
+ "]",
1419
+ calld->chand_, calld, call_attempt_,
1420
+ call_attempt_->started_send_message_count_);
1421
+ }
1422
+ CachedSendMessage cache =
1423
+ calld->send_messages_[call_attempt_->started_send_message_count_];
1424
+ ++call_attempt_->started_send_message_count_;
1425
+ batch_.send_message = true;
1426
+ batch_.payload->send_message.send_message = cache.slices;
1427
+ batch_.payload->send_message.flags = cache.flags;
1428
+ }
1429
+
1430
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1431
+ AddRetriableSendTrailingMetadataOp() {
1432
+ auto* calld = call_attempt_->calld_;
1433
+ // We need to make a copy of the metadata batch for each attempt, since
1434
+ // the filters in the subchannel stack may modify this batch, and we don't
1435
+ // want those modifications to be passed forward to subsequent attempts.
1436
+ call_attempt_->send_trailing_metadata_ =
1437
+ calld->send_trailing_metadata_.Copy();
1438
+ call_attempt_->started_send_trailing_metadata_ = true;
1439
+ batch_.send_trailing_metadata = true;
1440
+ batch_.payload->send_trailing_metadata.send_trailing_metadata =
1441
+ &call_attempt_->send_trailing_metadata_;
1442
+ }
1443
+
1444
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1445
+ AddRetriableRecvInitialMetadataOp() {
1446
+ call_attempt_->started_recv_initial_metadata_ = true;
1447
+ batch_.recv_initial_metadata = true;
1448
+ call_attempt_->recv_initial_metadata_.Clear();
1449
+ batch_.payload->recv_initial_metadata.recv_initial_metadata =
1450
+ &call_attempt_->recv_initial_metadata_;
1451
+ batch_.payload->recv_initial_metadata.trailing_metadata_available =
1452
+ &call_attempt_->trailing_metadata_available_;
1453
+ GRPC_CLOSURE_INIT(&call_attempt_->recv_initial_metadata_ready_,
1454
+ RecvInitialMetadataReady, this, grpc_schedule_on_exec_ctx);
1455
+ batch_.payload->recv_initial_metadata.recv_initial_metadata_ready =
1456
+ &call_attempt_->recv_initial_metadata_ready_;
1457
+ }
1458
+
1459
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1460
+ AddRetriableRecvMessageOp() {
1461
+ ++call_attempt_->started_recv_message_count_;
1462
+ batch_.recv_message = true;
1463
+ batch_.payload->recv_message.recv_message = &call_attempt_->recv_message_;
1464
+ batch_.payload->recv_message.flags = &call_attempt_->recv_message_flags_;
1465
+ batch_.payload->recv_message.call_failed_before_recv_message = nullptr;
1466
+ GRPC_CLOSURE_INIT(&call_attempt_->recv_message_ready_, RecvMessageReady, this,
1467
+ grpc_schedule_on_exec_ctx);
1468
+ batch_.payload->recv_message.recv_message_ready =
1469
+ &call_attempt_->recv_message_ready_;
1470
+ }
1471
+
1472
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::
1473
+ AddRetriableRecvTrailingMetadataOp() {
1474
+ call_attempt_->started_recv_trailing_metadata_ = true;
1475
+ batch_.recv_trailing_metadata = true;
1476
+ call_attempt_->recv_trailing_metadata_.Clear();
1477
+ batch_.payload->recv_trailing_metadata.recv_trailing_metadata =
1478
+ &call_attempt_->recv_trailing_metadata_;
1479
+ batch_.payload->recv_trailing_metadata.collect_stats =
1480
+ &call_attempt_->collect_stats_;
1481
+ GRPC_CLOSURE_INIT(&call_attempt_->recv_trailing_metadata_ready_,
1482
+ RecvTrailingMetadataReady, this, grpc_schedule_on_exec_ctx);
1483
+ batch_.payload->recv_trailing_metadata.recv_trailing_metadata_ready =
1484
+ &call_attempt_->recv_trailing_metadata_ready_;
1485
+ }
1486
+
1487
+ void RetryFilter::LegacyCallData::CallAttempt::BatchData::AddCancelStreamOp(
1488
+ grpc_error_handle error) {
1489
+ batch_.cancel_stream = true;
1490
+ batch_.payload->cancel_stream.cancel_error = error;
1491
+ // Override on_complete callback.
1492
+ GRPC_CLOSURE_INIT(&on_complete_, OnCompleteForCancelOp, this, nullptr);
1493
+ }
1494
+
1495
+ //
1496
+ // RetryFilter::LegacyCallData vtable functions
1497
+ //
1498
+
1499
+ grpc_error_handle RetryFilter::LegacyCallData::Init(
1500
+ grpc_call_element* elem, const grpc_call_element_args* args) {
1501
+ auto* chand = static_cast<RetryFilter*>(elem->channel_data);
1502
+ new (elem->call_data) RetryFilter::LegacyCallData(chand, *args);
1503
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1504
+ gpr_log(GPR_INFO, "chand=%p calld=%p: created call", chand,
1505
+ elem->call_data);
1506
+ }
1507
+ return absl::OkStatus();
1508
+ }
1509
+
1510
+ void RetryFilter::LegacyCallData::Destroy(
1511
+ grpc_call_element* elem, const grpc_call_final_info* /*final_info*/,
1512
+ grpc_closure* then_schedule_closure) {
1513
+ auto* calld = static_cast<RetryFilter::LegacyCallData*>(elem->call_data);
1514
+ // Save our ref to the CallStackDestructionBarrier until after our
1515
+ // dtor is invoked.
1516
+ RefCountedPtr<CallStackDestructionBarrier> call_stack_destruction_barrier =
1517
+ std::move(calld->call_stack_destruction_barrier_);
1518
+ calld->~LegacyCallData();
1519
+ // Now set the callback in the CallStackDestructionBarrier object,
1520
+ // right before we release our ref to it (implicitly upon returning).
1521
+ // The callback will be invoked when the CallStackDestructionBarrier
1522
+ // is destroyed.
1523
+ call_stack_destruction_barrier->set_on_call_stack_destruction(
1524
+ then_schedule_closure);
1525
+ }
1526
+
1527
+ void RetryFilter::LegacyCallData::StartTransportStreamOpBatch(
1528
+ grpc_call_element* elem, grpc_transport_stream_op_batch* batch) {
1529
+ auto* calld = static_cast<RetryFilter::LegacyCallData*>(elem->call_data);
1530
+ calld->StartTransportStreamOpBatch(batch);
1531
+ }
1532
+
1533
+ void RetryFilter::LegacyCallData::SetPollent(grpc_call_element* elem,
1534
+ grpc_polling_entity* pollent) {
1535
+ auto* calld = static_cast<RetryFilter::LegacyCallData*>(elem->call_data);
1536
+ calld->pollent_ = pollent;
1537
+ }
1538
+
1539
+ //
1540
+ // RetryFilter::LegacyCallData implementation
1541
+ //
1542
+
1543
+ RetryFilter::LegacyCallData::LegacyCallData(RetryFilter* chand,
1544
+ const grpc_call_element_args& args)
1545
+ : chand_(chand),
1546
+ retry_throttle_data_(chand->retry_throttle_data()),
1547
+ retry_policy_(chand->GetRetryPolicy(args.context)),
1548
+ retry_backoff_(
1549
+ BackOff::Options()
1550
+ .set_initial_backoff(retry_policy_ == nullptr
1551
+ ? Duration::Zero()
1552
+ : retry_policy_->initial_backoff())
1553
+ .set_multiplier(retry_policy_ == nullptr
1554
+ ? 0
1555
+ : retry_policy_->backoff_multiplier())
1556
+ .set_jitter(RetryFilter::BackoffJitter())
1557
+ .set_max_backoff(retry_policy_ == nullptr
1558
+ ? Duration::Zero()
1559
+ : retry_policy_->max_backoff())),
1560
+ path_(CSliceRef(args.path)),
1561
+ deadline_(args.deadline),
1562
+ arena_(args.arena),
1563
+ owning_call_(args.call_stack),
1564
+ call_combiner_(args.call_combiner),
1565
+ call_context_(args.context),
1566
+ call_stack_destruction_barrier_(
1567
+ arena_->New<CallStackDestructionBarrier>()),
1568
+ pending_send_initial_metadata_(false),
1569
+ pending_send_message_(false),
1570
+ pending_send_trailing_metadata_(false),
1571
+ retry_committed_(false),
1572
+ retry_codepath_started_(false),
1573
+ sent_transparent_retry_not_seen_by_server_(false) {}
1574
+
1575
+ RetryFilter::LegacyCallData::~LegacyCallData() {
1576
+ FreeAllCachedSendOpData();
1577
+ CSliceUnref(path_);
1578
+ // Make sure there are no remaining pending batches.
1579
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) {
1580
+ GPR_ASSERT(pending_batches_[i].batch == nullptr);
1581
+ }
1582
+ }
1583
+
1584
+ void RetryFilter::LegacyCallData::StartTransportStreamOpBatch(
1585
+ grpc_transport_stream_op_batch* batch) {
1586
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) &&
1587
+ !GRPC_TRACE_FLAG_ENABLED(grpc_trace_channel)) {
1588
+ gpr_log(GPR_INFO, "chand=%p calld=%p: batch started from surface: %s",
1589
+ chand_, this,
1590
+ grpc_transport_stream_op_batch_string(batch, false).c_str());
1591
+ }
1592
+ // If we have an LB call, delegate to the LB call.
1593
+ if (committed_call_ != nullptr) {
1594
+ // Note: This will release the call combiner.
1595
+ committed_call_->StartTransportStreamOpBatch(batch);
1596
+ return;
1597
+ }
1598
+ // If we were previously cancelled from the surface, fail this
1599
+ // batch immediately.
1600
+ if (!cancelled_from_surface_.ok()) {
1601
+ // Note: This will release the call combiner.
1602
+ grpc_transport_stream_op_batch_finish_with_failure(
1603
+ batch, cancelled_from_surface_, call_combiner_);
1604
+ return;
1605
+ }
1606
+ // Handle cancellation.
1607
+ if (GPR_UNLIKELY(batch->cancel_stream)) {
1608
+ // Save cancel_error in case subsequent batches are started.
1609
+ cancelled_from_surface_ = batch->payload->cancel_stream.cancel_error;
1610
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1611
+ gpr_log(GPR_INFO, "chand=%p calld=%p: cancelled from surface: %s", chand_,
1612
+ this, StatusToString(cancelled_from_surface_).c_str());
1613
+ }
1614
+ // Fail any pending batches.
1615
+ PendingBatchesFail(cancelled_from_surface_);
1616
+ // If we have a current call attempt, commit the call, then send
1617
+ // the cancellation down to that attempt. When the call fails, it
1618
+ // will not be retried, because we have committed it here.
1619
+ if (call_attempt_ != nullptr) {
1620
+ RetryCommit(call_attempt_.get());
1621
+ // TODO(roth): When implementing hedging, this will get more
1622
+ // complex, because instead of just passing the batch down to a
1623
+ // single call attempt, we'll need to cancel multiple call
1624
+ // attempts and wait for the cancellation on_complete from each call
1625
+ // attempt before we propagate the on_complete from this batch
1626
+ // back to the surface.
1627
+ // Note: This will release the call combiner.
1628
+ call_attempt_->CancelFromSurface(batch);
1629
+ return;
1630
+ }
1631
+ // Cancel retry timer if needed.
1632
+ if (retry_timer_handle_.has_value()) {
1633
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1634
+ gpr_log(GPR_INFO, "chand=%p calld=%p: cancelling retry timer", chand_,
1635
+ this);
1636
+ }
1637
+ if (chand_->event_engine()->Cancel(*retry_timer_handle_)) {
1638
+ GRPC_CALL_STACK_UNREF(owning_call_, "OnRetryTimer");
1639
+ }
1640
+ retry_timer_handle_.reset();
1641
+ FreeAllCachedSendOpData();
1642
+ }
1643
+ // We have no call attempt, so there's nowhere to send the cancellation
1644
+ // batch. Return it back to the surface immediately.
1645
+ // Note: This will release the call combiner.
1646
+ grpc_transport_stream_op_batch_finish_with_failure(
1647
+ batch, cancelled_from_surface_, call_combiner_);
1648
+ return;
1649
+ }
1650
+ // Add the batch to the pending list.
1651
+ PendingBatch* pending = PendingBatchesAdd(batch);
1652
+ // If the timer is pending, yield the call combiner and wait for it to
1653
+ // run, since we don't want to start another call attempt until it does.
1654
+ if (retry_timer_handle_.has_value()) {
1655
+ GRPC_CALL_COMBINER_STOP(call_combiner_,
1656
+ "added pending batch while retry timer pending");
1657
+ return;
1658
+ }
1659
+ // If we do not yet have a call attempt, create one.
1660
+ if (call_attempt_ == nullptr) {
1661
+ // If this is the first batch and retries are already committed
1662
+ // (e.g., if this batch put the call above the buffer size limit), then
1663
+ // immediately create an LB call and delegate the batch to it. This
1664
+ // avoids the overhead of unnecessarily allocating a CallAttempt
1665
+ // object or caching any of the send op data.
1666
+ // Note that we would ideally like to do this also on subsequent
1667
+ // attempts (e.g., if a batch puts the call above the buffer size
1668
+ // limit since the last attempt was complete), but in practice that's
1669
+ // not really worthwhile, because we will almost always have cached and
1670
+ // completed at least the send_initial_metadata op on the previous
1671
+ // attempt, which means that we'd need special logic to replay the
1672
+ // batch anyway, which is exactly what the CallAttempt object provides.
1673
+ // We also skip this optimization if perAttemptRecvTimeout is set in the
1674
+ // retry policy, because we need the code in CallAttempt to handle
1675
+ // the associated timer.
1676
+ if (!retry_codepath_started_ && retry_committed_ &&
1677
+ (retry_policy_ == nullptr ||
1678
+ !retry_policy_->per_attempt_recv_timeout().has_value())) {
1679
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1680
+ gpr_log(GPR_INFO,
1681
+ "chand=%p calld=%p: retry committed before first attempt; "
1682
+ "creating LB call",
1683
+ chand_, this);
1684
+ }
1685
+ PendingBatchClear(pending);
1686
+ auto* service_config_call_data =
1687
+ static_cast<ClientChannelServiceConfigCallData*>(
1688
+ call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value);
1689
+ committed_call_ = CreateLoadBalancedCall(
1690
+ [service_config_call_data]() { service_config_call_data->Commit(); },
1691
+ /*is_transparent_retry=*/false);
1692
+ committed_call_->StartTransportStreamOpBatch(batch);
1693
+ return;
1694
+ }
1695
+ // Otherwise, create a call attempt.
1696
+ // The attempt will automatically start any necessary replays or
1697
+ // pending batches.
1698
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1699
+ gpr_log(GPR_INFO, "chand=%p calld=%p: creating call attempt", chand_,
1700
+ this);
1701
+ }
1702
+ retry_codepath_started_ = true;
1703
+ CreateCallAttempt(/*is_transparent_retry=*/false);
1704
+ return;
1705
+ }
1706
+ // Send batches to call attempt.
1707
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1708
+ gpr_log(GPR_INFO, "chand=%p calld=%p: starting batch on attempt=%p", chand_,
1709
+ this, call_attempt_.get());
1710
+ }
1711
+ call_attempt_->StartRetriableBatches();
1712
+ }
1713
+
1714
+ OrphanablePtr<ClientChannel::FilterBasedLoadBalancedCall>
1715
+ RetryFilter::LegacyCallData::CreateLoadBalancedCall(
1716
+ absl::AnyInvocable<void()> on_commit, bool is_transparent_retry) {
1717
+ grpc_call_element_args args = {owning_call_, nullptr, call_context_,
1718
+ path_, /*start_time=*/0, deadline_,
1719
+ arena_, call_combiner_};
1720
+ return chand_->client_channel()->CreateLoadBalancedCall(
1721
+ args, pollent_,
1722
+ // This callback holds a ref to the CallStackDestructionBarrier
1723
+ // object until the LB call is destroyed.
1724
+ call_stack_destruction_barrier_->MakeLbCallDestructionClosure(this),
1725
+ std::move(on_commit), is_transparent_retry);
1726
+ }
1727
+
1728
+ void RetryFilter::LegacyCallData::CreateCallAttempt(bool is_transparent_retry) {
1729
+ call_attempt_ = MakeRefCounted<CallAttempt>(this, is_transparent_retry);
1730
+ call_attempt_->StartRetriableBatches();
1731
+ }
1732
+
1733
+ //
1734
+ // send op data caching
1735
+ //
1736
+
1737
+ void RetryFilter::LegacyCallData::MaybeCacheSendOpsForBatch(
1738
+ PendingBatch* pending) {
1739
+ if (pending->send_ops_cached) return;
1740
+ pending->send_ops_cached = true;
1741
+ grpc_transport_stream_op_batch* batch = pending->batch;
1742
+ // Save a copy of metadata for send_initial_metadata ops.
1743
+ if (batch->send_initial_metadata) {
1744
+ seen_send_initial_metadata_ = true;
1745
+ grpc_metadata_batch* send_initial_metadata =
1746
+ batch->payload->send_initial_metadata.send_initial_metadata;
1747
+ send_initial_metadata_ = send_initial_metadata->Copy();
1748
+ }
1749
+ // Set up cache for send_message ops.
1750
+ if (batch->send_message) {
1751
+ SliceBuffer* cache = arena_->New<SliceBuffer>(std::move(
1752
+ *std::exchange(batch->payload->send_message.send_message, nullptr)));
1753
+ send_messages_.push_back({cache, batch->payload->send_message.flags});
1754
+ }
1755
+ // Save metadata batch for send_trailing_metadata ops.
1756
+ if (batch->send_trailing_metadata) {
1757
+ seen_send_trailing_metadata_ = true;
1758
+ grpc_metadata_batch* send_trailing_metadata =
1759
+ batch->payload->send_trailing_metadata.send_trailing_metadata;
1760
+ send_trailing_metadata_ = send_trailing_metadata->Copy();
1761
+ }
1762
+ }
1763
+
1764
+ void RetryFilter::LegacyCallData::FreeCachedSendInitialMetadata() {
1765
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1766
+ gpr_log(GPR_INFO, "chand=%p calld=%p: destroying send_initial_metadata",
1767
+ chand_, this);
1768
+ }
1769
+ send_initial_metadata_.Clear();
1770
+ }
1771
+
1772
+ void RetryFilter::LegacyCallData::FreeCachedSendMessage(size_t idx) {
1773
+ if (send_messages_[idx].slices != nullptr) {
1774
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1775
+ gpr_log(GPR_INFO,
1776
+ "chand=%p calld=%p: destroying send_messages[%" PRIuPTR "]",
1777
+ chand_, this, idx);
1778
+ }
1779
+ Destruct(std::exchange(send_messages_[idx].slices, nullptr));
1780
+ }
1781
+ }
1782
+
1783
+ void RetryFilter::LegacyCallData::FreeCachedSendTrailingMetadata() {
1784
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1785
+ gpr_log(GPR_INFO, "chand=%p calld=%p: destroying send_trailing_metadata",
1786
+ chand_, this);
1787
+ }
1788
+ send_trailing_metadata_.Clear();
1789
+ }
1790
+
1791
+ void RetryFilter::LegacyCallData::FreeAllCachedSendOpData() {
1792
+ if (seen_send_initial_metadata_) {
1793
+ FreeCachedSendInitialMetadata();
1794
+ }
1795
+ for (size_t i = 0; i < send_messages_.size(); ++i) {
1796
+ FreeCachedSendMessage(i);
1797
+ }
1798
+ if (seen_send_trailing_metadata_) {
1799
+ FreeCachedSendTrailingMetadata();
1800
+ }
1801
+ }
1802
+
1803
+ //
1804
+ // pending_batches management
1805
+ //
1806
+
1807
+ size_t RetryFilter::LegacyCallData::GetBatchIndex(
1808
+ grpc_transport_stream_op_batch* batch) {
1809
+ if (batch->send_initial_metadata) return 0;
1810
+ if (batch->send_message) return 1;
1811
+ if (batch->send_trailing_metadata) return 2;
1812
+ if (batch->recv_initial_metadata) return 3;
1813
+ if (batch->recv_message) return 4;
1814
+ if (batch->recv_trailing_metadata) return 5;
1815
+ GPR_UNREACHABLE_CODE(return (size_t)-1);
1816
+ }
1817
+
1818
+ // This is called via the call combiner, so access to calld is synchronized.
1819
+ RetryFilter::LegacyCallData::PendingBatch*
1820
+ RetryFilter::LegacyCallData::PendingBatchesAdd(
1821
+ grpc_transport_stream_op_batch* batch) {
1822
+ const size_t idx = GetBatchIndex(batch);
1823
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1824
+ gpr_log(GPR_INFO,
1825
+ "chand=%p calld=%p: adding pending batch at index %" PRIuPTR,
1826
+ chand_, this, idx);
1827
+ }
1828
+ PendingBatch* pending = &pending_batches_[idx];
1829
+ GPR_ASSERT(pending->batch == nullptr);
1830
+ pending->batch = batch;
1831
+ pending->send_ops_cached = false;
1832
+ // Update state in calld about pending batches.
1833
+ // Also check if the batch takes us over the retry buffer limit.
1834
+ // Note: We don't check the size of trailing metadata here, because
1835
+ // gRPC clients do not send trailing metadata.
1836
+ if (batch->send_initial_metadata) {
1837
+ pending_send_initial_metadata_ = true;
1838
+ bytes_buffered_for_retry_ += batch->payload->send_initial_metadata
1839
+ .send_initial_metadata->TransportSize();
1840
+ }
1841
+ if (batch->send_message) {
1842
+ pending_send_message_ = true;
1843
+ bytes_buffered_for_retry_ +=
1844
+ batch->payload->send_message.send_message->Length();
1845
+ }
1846
+ if (batch->send_trailing_metadata) {
1847
+ pending_send_trailing_metadata_ = true;
1848
+ }
1849
+ // TODO(roth): When we implement hedging, if there are currently attempts
1850
+ // in flight, we will need to pick the one on which the max number of send
1851
+ // ops have already been sent, and we commit to that attempt.
1852
+ if (GPR_UNLIKELY(bytes_buffered_for_retry_ >
1853
+ chand_->per_rpc_retry_buffer_size())) {
1854
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1855
+ gpr_log(GPR_INFO,
1856
+ "chand=%p calld=%p: exceeded retry buffer size, committing",
1857
+ chand_, this);
1858
+ }
1859
+ RetryCommit(call_attempt_.get());
1860
+ }
1861
+ return pending;
1862
+ }
1863
+
1864
+ void RetryFilter::LegacyCallData::PendingBatchClear(PendingBatch* pending) {
1865
+ if (pending->batch->send_initial_metadata) {
1866
+ pending_send_initial_metadata_ = false;
1867
+ }
1868
+ if (pending->batch->send_message) {
1869
+ pending_send_message_ = false;
1870
+ }
1871
+ if (pending->batch->send_trailing_metadata) {
1872
+ pending_send_trailing_metadata_ = false;
1873
+ }
1874
+ pending->batch = nullptr;
1875
+ }
1876
+
1877
+ void RetryFilter::LegacyCallData::MaybeClearPendingBatch(
1878
+ PendingBatch* pending) {
1879
+ grpc_transport_stream_op_batch* batch = pending->batch;
1880
+ // We clear the pending batch if all of its callbacks have been
1881
+ // scheduled and reset to nullptr.
1882
+ if (batch->on_complete == nullptr &&
1883
+ (!batch->recv_initial_metadata ||
1884
+ batch->payload->recv_initial_metadata.recv_initial_metadata_ready ==
1885
+ nullptr) &&
1886
+ (!batch->recv_message ||
1887
+ batch->payload->recv_message.recv_message_ready == nullptr) &&
1888
+ (!batch->recv_trailing_metadata ||
1889
+ batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready ==
1890
+ nullptr)) {
1891
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1892
+ gpr_log(GPR_INFO, "chand=%p calld=%p: clearing pending batch", chand_,
1893
+ this);
1894
+ }
1895
+ PendingBatchClear(pending);
1896
+ }
1897
+ }
1898
+
1899
+ // This is called via the call combiner, so access to calld is synchronized.
1900
+ void RetryFilter::LegacyCallData::FailPendingBatchInCallCombiner(
1901
+ void* arg, grpc_error_handle error) {
1902
+ grpc_transport_stream_op_batch* batch =
1903
+ static_cast<grpc_transport_stream_op_batch*>(arg);
1904
+ RetryFilter::LegacyCallData* call = static_cast<RetryFilter::LegacyCallData*>(
1905
+ batch->handler_private.extra_arg);
1906
+ // Note: This will release the call combiner.
1907
+ grpc_transport_stream_op_batch_finish_with_failure(batch, error,
1908
+ call->call_combiner_);
1909
+ }
1910
+
1911
+ // This is called via the call combiner, so access to calld is synchronized.
1912
+ void RetryFilter::LegacyCallData::PendingBatchesFail(grpc_error_handle error) {
1913
+ GPR_ASSERT(!error.ok());
1914
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1915
+ size_t num_batches = 0;
1916
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) {
1917
+ if (pending_batches_[i].batch != nullptr) ++num_batches;
1918
+ }
1919
+ gpr_log(GPR_INFO,
1920
+ "chand=%p calld=%p: failing %" PRIuPTR " pending batches: %s",
1921
+ chand_, this, num_batches, StatusToString(error).c_str());
1922
+ }
1923
+ CallCombinerClosureList closures;
1924
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) {
1925
+ PendingBatch* pending = &pending_batches_[i];
1926
+ grpc_transport_stream_op_batch* batch = pending->batch;
1927
+ if (batch != nullptr) {
1928
+ batch->handler_private.extra_arg = this;
1929
+ GRPC_CLOSURE_INIT(&batch->handler_private.closure,
1930
+ FailPendingBatchInCallCombiner, batch,
1931
+ grpc_schedule_on_exec_ctx);
1932
+ closures.Add(&batch->handler_private.closure, error,
1933
+ "PendingBatchesFail");
1934
+ PendingBatchClear(pending);
1935
+ }
1936
+ }
1937
+ closures.RunClosuresWithoutYielding(call_combiner_);
1938
+ }
1939
+
1940
+ template <typename Predicate>
1941
+ RetryFilter::LegacyCallData::PendingBatch*
1942
+ RetryFilter::LegacyCallData::PendingBatchFind(const char* log_message,
1943
+ Predicate predicate) {
1944
+ for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) {
1945
+ PendingBatch* pending = &pending_batches_[i];
1946
+ grpc_transport_stream_op_batch* batch = pending->batch;
1947
+ if (batch != nullptr && predicate(batch)) {
1948
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1949
+ gpr_log(GPR_INFO,
1950
+ "chand=%p calld=%p: %s pending batch at index %" PRIuPTR,
1951
+ chand_, this, log_message, i);
1952
+ }
1953
+ return pending;
1954
+ }
1955
+ }
1956
+ return nullptr;
1957
+ }
1958
+
1959
+ //
1960
+ // retry code
1961
+ //
1962
+
1963
+ void RetryFilter::LegacyCallData::RetryCommit(CallAttempt* call_attempt) {
1964
+ if (retry_committed_) return;
1965
+ retry_committed_ = true;
1966
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
1967
+ gpr_log(GPR_INFO, "chand=%p calld=%p: committing retries", chand_, this);
1968
+ }
1969
+ if (call_attempt != nullptr) {
1970
+ // If the call attempt's LB call has been committed, invoke the
1971
+ // call's on_commit callback.
1972
+ // Note: If call_attempt is null, this is happening before the first
1973
+ // retry attempt is started, in which case we'll just pass the real
1974
+ // on_commit callback down into the LB call, and it won't be our
1975
+ // problem anymore.
1976
+ if (call_attempt->lb_call_committed()) {
1977
+ auto* service_config_call_data =
1978
+ static_cast<ClientChannelServiceConfigCallData*>(
1979
+ call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value);
1980
+ service_config_call_data->Commit();
1981
+ }
1982
+ // Free cached send ops.
1983
+ call_attempt->FreeCachedSendOpDataAfterCommit();
1984
+ }
1985
+ }
1986
+
1987
+ void RetryFilter::LegacyCallData::StartRetryTimer(
1988
+ absl::optional<Duration> server_pushback) {
1989
+ // Reset call attempt.
1990
+ call_attempt_.reset(DEBUG_LOCATION, "StartRetryTimer");
1991
+ // Compute backoff delay.
1992
+ Duration next_attempt_timeout;
1993
+ if (server_pushback.has_value()) {
1994
+ GPR_ASSERT(*server_pushback >= Duration::Zero());
1995
+ next_attempt_timeout = *server_pushback;
1996
+ retry_backoff_.Reset();
1997
+ } else {
1998
+ next_attempt_timeout = retry_backoff_.NextAttemptTime() - Timestamp::Now();
1999
+ }
2000
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
2001
+ gpr_log(GPR_INFO,
2002
+ "chand=%p calld=%p: retrying failed call in %" PRId64 " ms", chand_,
2003
+ this, next_attempt_timeout.millis());
2004
+ }
2005
+ // Schedule retry after computed delay.
2006
+ GRPC_CALL_STACK_REF(owning_call_, "OnRetryTimer");
2007
+ retry_timer_handle_ =
2008
+ chand_->event_engine()->RunAfter(next_attempt_timeout, [this] {
2009
+ ApplicationCallbackExecCtx callback_exec_ctx;
2010
+ ExecCtx exec_ctx;
2011
+ OnRetryTimer();
2012
+ });
2013
+ }
2014
+
2015
+ void RetryFilter::LegacyCallData::OnRetryTimer() {
2016
+ GRPC_CLOSURE_INIT(&retry_closure_, OnRetryTimerLocked, this, nullptr);
2017
+ GRPC_CALL_COMBINER_START(call_combiner_, &retry_closure_, absl::OkStatus(),
2018
+ "retry timer fired");
2019
+ }
2020
+
2021
+ void RetryFilter::LegacyCallData::OnRetryTimerLocked(
2022
+ void* arg, grpc_error_handle /*error*/) {
2023
+ auto* calld = static_cast<RetryFilter::LegacyCallData*>(arg);
2024
+ calld->retry_timer_handle_.reset();
2025
+ calld->CreateCallAttempt(/*is_transparent_retry=*/false);
2026
+ GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnRetryTimer");
2027
+ }
2028
+
2029
+ void RetryFilter::LegacyCallData::AddClosureToStartTransparentRetry(
2030
+ CallCombinerClosureList* closures) {
2031
+ if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
2032
+ gpr_log(GPR_INFO, "chand=%p calld=%p: scheduling transparent retry", chand_,
2033
+ this);
2034
+ }
2035
+ GRPC_CALL_STACK_REF(owning_call_, "OnRetryTimer");
2036
+ GRPC_CLOSURE_INIT(&retry_closure_, StartTransparentRetry, this, nullptr);
2037
+ closures->Add(&retry_closure_, absl::OkStatus(), "start transparent retry");
2038
+ }
2039
+
2040
+ void RetryFilter::LegacyCallData::StartTransparentRetry(
2041
+ void* arg, grpc_error_handle /*error*/) {
2042
+ auto* calld = static_cast<RetryFilter::LegacyCallData*>(arg);
2043
+ if (calld->cancelled_from_surface_.ok()) {
2044
+ calld->CreateCallAttempt(/*is_transparent_retry=*/true);
2045
+ } else {
2046
+ GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
2047
+ "call cancelled before transparent retry");
2048
+ }
2049
+ GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnRetryTimer");
2050
+ }
2051
+
2052
+ } // namespace grpc_core