grpc 1.38.0 → 1.40.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of grpc might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Makefile +68 -21
- data/include/grpc/event_engine/endpoint_config.h +48 -0
- data/include/grpc/event_engine/event_engine.h +23 -29
- data/include/grpc/event_engine/port.h +2 -0
- data/include/grpc/event_engine/slice_allocator.h +21 -36
- data/include/grpc/grpc.h +9 -2
- data/include/grpc/grpc_security.h +32 -0
- data/include/grpc/grpc_security_constants.h +1 -0
- data/include/grpc/impl/codegen/grpc_types.h +33 -19
- data/include/grpc/impl/codegen/port_platform.h +41 -0
- data/src/core/ext/filters/client_channel/client_channel.cc +415 -249
- data/src/core/ext/filters/client_channel/client_channel.h +42 -18
- data/src/core/ext/filters/client_channel/config_selector.h +19 -6
- data/src/core/ext/filters/client_channel/health/health_check_client.cc +2 -0
- data/src/core/ext/filters/client_channel/health/health_check_client.h +3 -3
- data/src/core/ext/filters/client_channel/http_proxy.cc +16 -1
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +7 -8
- data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +12 -21
- data/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc +3 -5
- data/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +734 -0
- data/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h +10 -0
- data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +8 -15
- data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +3 -6
- data/src/core/ext/filters/client_channel/lb_policy/xds/cds.cc +18 -36
- data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +14 -22
- data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc +2 -9
- data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc +68 -103
- data/src/core/ext/filters/client_channel/lb_policy.cc +1 -15
- data/src/core/ext/filters/client_channel/lb_policy.h +70 -46
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +1 -3
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_event_engine.cc +31 -0
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_event_engine.cc +28 -0
- data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +1 -3
- data/src/core/ext/filters/client_channel/resolver/google_c2p/google_c2p_resolver.cc +7 -2
- data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +116 -76
- data/src/core/ext/filters/client_channel/retry_filter.cc +967 -544
- data/src/core/ext/filters/client_channel/retry_service_config.cc +57 -28
- data/src/core/ext/filters/client_channel/retry_service_config.h +9 -3
- data/src/core/ext/filters/client_channel/service_config_call_data.h +45 -5
- data/src/core/ext/filters/client_idle/client_idle_filter.cc +1 -1
- data/src/core/ext/filters/http/client/http_client_filter.cc +5 -2
- data/src/core/ext/transport/chttp2/client/insecure/channel_create_posix.cc +2 -1
- data/src/core/ext/transport/chttp2/server/chttp2_server.cc +5 -1
- data/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc +3 -2
- data/src/core/ext/transport/chttp2/transport/bin_decoder.cc +1 -1
- data/src/core/{lib/event_engine/slice_allocator.cc → ext/transport/chttp2/transport/chttp2_slice_allocator.cc} +23 -16
- data/src/core/ext/transport/chttp2/transport/chttp2_slice_allocator.h +74 -0
- data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +12 -10
- data/src/core/ext/transport/chttp2/transport/flow_control.h +1 -1
- data/src/core/ext/transport/chttp2/transport/frame_data.cc +4 -4
- data/src/core/ext/transport/chttp2/transport/frame_goaway.cc +8 -8
- data/src/core/ext/transport/chttp2/transport/frame_settings.cc +5 -5
- data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +639 -752
- data/src/core/ext/transport/chttp2/transport/hpack_parser.h +190 -69
- data/src/core/ext/transport/chttp2/transport/internal.h +2 -1
- data/src/core/ext/transport/chttp2/transport/parsing.cc +72 -56
- data/src/core/ext/transport/chttp2/transport/varint.cc +6 -4
- data/src/core/ext/transport/inproc/inproc_transport.cc +42 -31
- data/src/core/ext/upb-generated/envoy/config/bootstrap/v3/bootstrap.upb.c +56 -35
- data/src/core/ext/upb-generated/envoy/config/bootstrap/v3/bootstrap.upb.h +180 -76
- data/src/core/ext/upb-generated/envoy/config/cluster/v3/cluster.upb.c +35 -27
- data/src/core/ext/upb-generated/envoy/config/cluster/v3/cluster.upb.h +97 -48
- data/src/core/ext/upb-generated/envoy/config/core/v3/base.upb.c +45 -9
- data/src/core/ext/upb-generated/envoy/config/core/v3/base.upb.h +67 -7
- data/src/core/ext/upb-generated/envoy/config/core/v3/protocol.upb.c +66 -9
- data/src/core/ext/upb-generated/envoy/config/core/v3/protocol.upb.h +227 -0
- data/src/core/ext/upb-generated/envoy/config/core/v3/resolver.upb.c +46 -0
- data/src/core/ext/upb-generated/envoy/config/core/v3/resolver.upb.h +121 -0
- data/src/core/ext/upb-generated/envoy/config/core/v3/substitution_format_string.upb.c +1 -0
- data/src/core/ext/upb-generated/envoy/config/core/v3/udp_socket_config.upb.c +35 -0
- data/src/core/ext/upb-generated/envoy/config/core/v3/udp_socket_config.upb.h +90 -0
- data/src/core/ext/upb-generated/envoy/config/listener/v3/listener.upb.c +32 -24
- data/src/core/ext/upb-generated/envoy/config/listener/v3/listener.upb.h +120 -73
- data/src/core/ext/upb-generated/envoy/config/listener/v3/listener_components.upb.c +4 -2
- data/src/core/ext/upb-generated/envoy/config/listener/v3/listener_components.upb.h +15 -0
- data/src/core/ext/upb-generated/envoy/config/listener/v3/quic_config.upb.c +48 -0
- data/src/core/ext/upb-generated/envoy/config/listener/v3/quic_config.upb.h +171 -0
- data/src/core/ext/upb-generated/envoy/config/listener/v3/udp_listener_config.upb.c +8 -6
- data/src/core/ext/upb-generated/envoy/config/listener/v3/udp_listener_config.upb.h +27 -19
- data/src/core/ext/upb-generated/envoy/config/rbac/v3/rbac.upb.c +1 -0
- data/src/core/ext/upb-generated/envoy/config/route/v3/route.upb.c +24 -7
- data/src/core/ext/upb-generated/envoy/config/route/v3/route.upb.h +57 -0
- data/src/core/ext/upb-generated/envoy/config/route/v3/route_components.upb.c +29 -17
- data/src/core/ext/upb-generated/envoy/config/route/v3/route_components.upb.h +72 -0
- data/src/core/ext/upb-generated/envoy/extensions/filters/http/fault/v3/fault.upb.c +3 -2
- data/src/core/ext/upb-generated/envoy/extensions/filters/http/fault/v3/fault.upb.h +4 -0
- data/src/core/ext/upb-generated/envoy/extensions/filters/http/router/v3/router.upb.c +6 -5
- data/src/core/ext/upb-generated/envoy/extensions/filters/http/router/v3/router.upb.h +15 -11
- data/src/core/ext/upb-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upb.c +85 -43
- data/src/core/ext/upb-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upb.h +274 -91
- data/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/common.upb.c +11 -8
- data/src/core/ext/upb-generated/envoy/extensions/transport_sockets/tls/v3/common.upb.h +30 -13
- data/src/core/ext/upb-generated/envoy/service/status/v3/csds.upb.c +33 -5
- data/src/core/ext/upb-generated/envoy/service/status/v3/csds.upb.h +115 -0
- data/src/core/ext/upb-generated/envoy/type/http/v3/path_transformation.upb.c +60 -0
- data/src/core/ext/upb-generated/envoy/type/http/v3/path_transformation.upb.h +181 -0
- data/src/core/ext/upb-generated/envoy/type/matcher/v3/regex.upb.c +1 -0
- data/src/core/ext/upb-generated/validate/validate.upb.c +82 -66
- data/src/core/ext/upb-generated/validate/validate.upb.h +220 -124
- data/src/core/ext/upbdefs-generated/envoy/annotations/deprecation.upbdefs.c +15 -7
- data/src/core/ext/upbdefs-generated/envoy/config/accesslog/v3/accesslog.upbdefs.c +53 -52
- data/src/core/ext/upbdefs-generated/envoy/config/bootstrap/v3/bootstrap.upbdefs.c +318 -277
- data/src/core/ext/upbdefs-generated/envoy/config/bootstrap/v3/bootstrap.upbdefs.h +5 -0
- data/src/core/ext/upbdefs-generated/envoy/config/cluster/v3/cluster.upbdefs.c +437 -410
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/base.upbdefs.c +198 -170
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/base.upbdefs.h +10 -0
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/config_source.upbdefs.c +9 -8
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/protocol.upbdefs.c +219 -163
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/protocol.upbdefs.h +15 -0
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/resolver.upbdefs.c +59 -0
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/resolver.upbdefs.h +40 -0
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/substitution_format_string.upbdefs.c +29 -25
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/udp_socket_config.upbdefs.c +52 -0
- data/src/core/ext/upbdefs-generated/envoy/config/core/v3/udp_socket_config.upbdefs.h +35 -0
- data/src/core/ext/upbdefs-generated/envoy/config/listener/v3/listener.upbdefs.c +135 -125
- data/src/core/ext/upbdefs-generated/envoy/config/listener/v3/listener.upbdefs.h +5 -0
- data/src/core/ext/upbdefs-generated/envoy/config/listener/v3/listener_components.upbdefs.c +131 -123
- data/src/core/ext/upbdefs-generated/envoy/config/listener/v3/quic_config.upbdefs.c +90 -0
- data/src/core/ext/upbdefs-generated/envoy/config/listener/v3/quic_config.upbdefs.h +35 -0
- data/src/core/ext/upbdefs-generated/envoy/config/listener/v3/udp_listener_config.upbdefs.c +32 -24
- data/src/core/ext/upbdefs-generated/envoy/config/route/v3/route.upbdefs.c +69 -55
- data/src/core/ext/upbdefs-generated/envoy/config/route/v3/route.upbdefs.h +5 -0
- data/src/core/ext/upbdefs-generated/envoy/config/route/v3/route_components.upbdefs.c +684 -664
- data/src/core/ext/upbdefs-generated/envoy/config/route/v3/route_components.upbdefs.h +5 -0
- data/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/fault/v3/fault.upbdefs.c +13 -10
- data/src/core/ext/upbdefs-generated/envoy/extensions/filters/http/router/v3/router.upbdefs.c +13 -10
- data/src/core/ext/upbdefs-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upbdefs.c +441 -375
- data/src/core/ext/upbdefs-generated/envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.upbdefs.h +10 -0
- data/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/common.upbdefs.c +122 -114
- data/src/core/ext/upbdefs-generated/envoy/extensions/transport_sockets/tls/v3/tls.upbdefs.c +1 -1
- data/src/core/ext/upbdefs-generated/envoy/service/status/v3/csds.upbdefs.c +112 -79
- data/src/core/ext/upbdefs-generated/envoy/service/status/v3/csds.upbdefs.h +5 -0
- data/src/core/ext/upbdefs-generated/envoy/type/http/v3/path_transformation.upbdefs.c +64 -0
- data/src/core/ext/upbdefs-generated/envoy/type/http/v3/path_transformation.upbdefs.h +50 -0
- data/src/core/ext/upbdefs-generated/envoy/type/matcher/v3/regex.upbdefs.c +35 -32
- data/src/core/ext/upbdefs-generated/google/rpc/status.upbdefs.c +4 -4
- data/src/core/ext/upbdefs-generated/validate/validate.upbdefs.c +182 -160
- data/src/core/ext/xds/certificate_provider_store.h +1 -1
- data/src/core/ext/xds/xds_api.cc +582 -257
- data/src/core/ext/xds/xds_api.h +46 -8
- data/src/core/ext/xds/xds_bootstrap.cc +4 -1
- data/src/core/ext/xds/xds_client.cc +66 -43
- data/src/core/ext/xds/xds_client.h +0 -4
- data/src/core/ext/xds/xds_http_filters.cc +3 -2
- data/src/core/ext/xds/xds_http_filters.h +3 -0
- data/src/core/lib/address_utils/sockaddr_utils.cc +13 -0
- data/src/core/lib/address_utils/sockaddr_utils.h +10 -0
- data/src/core/lib/channel/call_tracer.h +85 -0
- data/src/core/lib/channel/channel_stack.h +1 -1
- data/src/core/lib/channel/channelz.h +3 -0
- data/src/core/lib/channel/context.h +3 -0
- data/src/core/lib/channel/status_util.h +4 -0
- data/src/core/lib/compression/stream_compression.h +1 -1
- data/src/core/lib/compression/stream_compression_gzip.h +1 -1
- data/src/core/lib/compression/stream_compression_identity.h +1 -1
- data/src/core/lib/debug/stats.h +1 -1
- data/src/core/lib/event_engine/endpoint_config.cc +46 -0
- data/src/core/lib/event_engine/endpoint_config_internal.h +42 -0
- data/src/core/lib/event_engine/event_engine.cc +50 -0
- data/src/core/lib/event_engine/sockaddr.cc +14 -12
- data/src/core/lib/event_engine/sockaddr.h +44 -0
- data/src/core/lib/gpr/murmur_hash.cc +4 -2
- data/src/core/lib/gpr/wrap_memcpy.cc +2 -1
- data/src/core/lib/gprpp/manual_constructor.h +1 -1
- data/src/core/lib/gprpp/orphanable.h +3 -3
- data/src/core/lib/gprpp/status_helper.h +3 -0
- data/src/core/lib/gprpp/sync.h +2 -30
- data/src/core/lib/iomgr/buffer_list.cc +1 -1
- data/src/core/lib/iomgr/endpoint_pair_event_engine.cc +33 -0
- data/src/core/lib/iomgr/error.cc +5 -4
- data/src/core/lib/iomgr/error.h +1 -1
- data/src/core/lib/iomgr/ev_apple.h +1 -1
- data/src/core/lib/iomgr/event_engine/closure.cc +54 -0
- data/src/core/lib/iomgr/event_engine/closure.h +33 -0
- data/src/core/lib/iomgr/event_engine/endpoint.cc +192 -0
- data/src/core/lib/iomgr/event_engine/endpoint.h +53 -0
- data/src/core/lib/iomgr/event_engine/iomgr.cc +105 -0
- data/src/core/lib/iomgr/event_engine/iomgr.h +24 -0
- data/src/core/lib/iomgr/event_engine/pollset.cc +87 -0
- data/{include/grpc/event_engine/channel_args.h → src/core/lib/iomgr/event_engine/pollset.h} +7 -10
- data/src/core/lib/iomgr/event_engine/promise.h +51 -0
- data/src/core/lib/iomgr/event_engine/resolved_address_internal.cc +41 -0
- data/src/core/lib/iomgr/event_engine/resolved_address_internal.h +35 -0
- data/src/core/lib/iomgr/event_engine/resolver.cc +110 -0
- data/src/core/lib/iomgr/event_engine/tcp.cc +263 -0
- data/src/core/lib/iomgr/event_engine/timer.cc +57 -0
- data/src/core/lib/iomgr/exec_ctx.cc +8 -0
- data/src/core/lib/iomgr/exec_ctx.h +3 -4
- data/src/core/lib/iomgr/executor/threadpool.cc +2 -3
- data/src/core/lib/iomgr/executor/threadpool.h +2 -2
- data/src/core/lib/iomgr/iomgr.cc +1 -1
- data/src/core/lib/iomgr/iomgr_posix.cc +2 -0
- data/src/core/lib/iomgr/iomgr_posix_cfstream.cc +40 -10
- data/src/core/lib/iomgr/pollset_custom.cc +2 -2
- data/src/core/lib/iomgr/pollset_custom.h +3 -1
- data/src/core/lib/iomgr/pollset_uv.cc +3 -1
- data/src/core/lib/iomgr/pollset_uv.h +5 -1
- data/src/core/lib/iomgr/port.h +7 -5
- data/src/core/lib/iomgr/python_util.h +1 -1
- data/src/core/lib/iomgr/resolve_address.cc +5 -1
- data/src/core/lib/iomgr/resolve_address.h +6 -0
- data/src/core/lib/iomgr/resource_quota.cc +2 -0
- data/src/core/lib/iomgr/sockaddr.h +1 -0
- data/src/core/lib/iomgr/socket_mutator.cc +15 -2
- data/src/core/lib/iomgr/socket_mutator.h +26 -2
- data/src/core/lib/iomgr/socket_utils_common_posix.cc +4 -4
- data/src/core/lib/iomgr/socket_utils_posix.h +2 -2
- data/src/core/lib/iomgr/tcp_client_posix.cc +7 -2
- data/src/core/lib/iomgr/tcp_client_windows.cc +2 -0
- data/src/core/lib/iomgr/tcp_posix.cc +42 -39
- data/src/core/lib/iomgr/tcp_posix.h +8 -0
- data/src/core/lib/iomgr/tcp_server_custom.cc +3 -4
- data/src/core/lib/iomgr/tcp_server_posix.cc +7 -0
- data/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +2 -1
- data/src/core/lib/iomgr/timer.h +6 -1
- data/src/core/lib/iomgr/timer_manager.cc +1 -1
- data/src/core/lib/json/json_reader.cc +1 -2
- data/src/core/lib/matchers/matchers.cc +8 -20
- data/src/core/lib/matchers/matchers.h +2 -1
- data/src/core/lib/security/authorization/authorization_engine.h +44 -0
- data/src/core/lib/security/authorization/authorization_policy_provider.h +32 -0
- data/src/core/lib/security/authorization/authorization_policy_provider_vtable.cc +46 -0
- data/src/core/lib/security/authorization/evaluate_args.cc +209 -0
- data/src/core/lib/security/authorization/evaluate_args.h +91 -0
- data/src/core/lib/security/credentials/google_default/google_default_credentials.cc +3 -1
- data/src/core/lib/security/credentials/tls/grpc_tls_certificate_provider.cc +49 -0
- data/src/core/lib/security/credentials/tls/grpc_tls_certificate_provider.h +7 -0
- data/src/core/lib/security/credentials/tls/tls_utils.cc +32 -0
- data/src/core/lib/security/credentials/tls/tls_utils.h +13 -0
- data/src/core/lib/security/security_connector/local/local_security_connector.cc +9 -6
- data/src/core/lib/security/security_connector/ssl_utils.cc +5 -0
- data/src/core/lib/security/security_connector/tls/tls_security_connector.cc +6 -18
- data/src/core/lib/security/transport/security_handshaker.cc +12 -4
- data/src/core/lib/security/transport/server_auth_filter.cc +0 -7
- data/src/core/lib/slice/slice.cc +12 -2
- data/src/core/lib/slice/slice_internal.h +1 -0
- data/src/core/lib/surface/call.cc +26 -7
- data/src/core/lib/surface/call.h +11 -0
- data/src/core/lib/surface/completion_queue.cc +22 -22
- data/src/core/lib/surface/completion_queue.h +1 -1
- data/src/core/lib/surface/completion_queue_factory.cc +1 -2
- data/src/core/lib/surface/init.cc +1 -3
- data/src/core/lib/surface/init.h +10 -1
- data/src/core/lib/surface/server.cc +3 -1
- data/src/core/lib/surface/server.h +3 -3
- data/src/core/lib/surface/version.cc +2 -4
- data/src/core/lib/transport/error_utils.cc +2 -2
- data/src/core/lib/transport/metadata_batch.cc +13 -2
- data/src/core/lib/transport/metadata_batch.h +7 -0
- data/src/core/lib/transport/transport.h +2 -0
- data/src/core/lib/transport/transport_op_string.cc +1 -1
- data/src/core/plugin_registry/grpc_plugin_registry.cc +4 -0
- data/src/core/tsi/alts/crypt/gsec.h +2 -0
- data/src/ruby/ext/grpc/extconf.rb +2 -0
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +6 -0
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +10 -1
- data/src/ruby/lib/grpc/version.rb +1 -1
- data/third_party/boringssl-with-bazel/err_data.c +269 -263
- data/third_party/boringssl-with-bazel/src/crypto/asn1/a_object.c +8 -6
- data/third_party/boringssl-with-bazel/src/crypto/cipher_extra/cipher_extra.c +4 -0
- data/third_party/boringssl-with-bazel/src/crypto/curve25519/curve25519.c +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/curve25519/internal.h +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/evp/evp.c +9 -0
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/prime.c +0 -4
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/digest/digest.c +7 -0
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/digest/md32_common.h +87 -121
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/md4/md4.c +20 -30
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/md5/md5.c +19 -30
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/internal.h +1 -4
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/rand.c +0 -13
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rsa/rsa.c +26 -24
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rsa/rsa_impl.c +10 -7
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/sha/sha1.c +28 -39
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/sha/sha256.c +48 -66
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/sha/sha512.c +4 -5
- data/third_party/boringssl-with-bazel/src/crypto/hpke/hpke.c +362 -371
- data/third_party/boringssl-with-bazel/src/crypto/pkcs7/pkcs7_x509.c +4 -2
- data/third_party/boringssl-with-bazel/src/crypto/rand_extra/passive.c +2 -2
- data/third_party/boringssl-with-bazel/src/crypto/rsa_extra/rsa_asn1.c +1 -2
- data/third_party/boringssl-with-bazel/src/crypto/x509/internal.h +101 -11
- data/third_party/boringssl-with-bazel/src/crypto/x509/t_x509a.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_cmp.c +2 -2
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_req.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_set.c +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_trs.c +2 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_vfy.c +14 -15
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_vpm.c +53 -73
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509cset.c +31 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509rset.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_all.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_req.c +5 -8
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_sig.c +5 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_x509a.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509v3/internal.h +7 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_purp.c +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_utl.c +5 -8
- data/third_party/boringssl-with-bazel/src/include/openssl/aead.h +1 -1
- data/third_party/boringssl-with-bazel/src/include/openssl/arm_arch.h +66 -1
- data/third_party/boringssl-with-bazel/src/include/openssl/base.h +40 -9
- data/third_party/boringssl-with-bazel/src/include/openssl/bytestring.h +1 -0
- data/third_party/boringssl-with-bazel/src/include/openssl/chacha.h +1 -1
- data/third_party/boringssl-with-bazel/src/include/openssl/digest.h +6 -2
- data/third_party/boringssl-with-bazel/src/include/openssl/ecdsa.h +14 -0
- data/third_party/boringssl-with-bazel/src/include/openssl/evp.h +19 -11
- data/third_party/boringssl-with-bazel/src/include/openssl/hpke.h +325 -0
- data/third_party/boringssl-with-bazel/src/include/openssl/pkcs7.h +23 -7
- data/third_party/boringssl-with-bazel/src/include/openssl/rsa.h +99 -63
- data/third_party/boringssl-with-bazel/src/include/openssl/ssl.h +139 -109
- data/third_party/boringssl-with-bazel/src/include/openssl/tls1.h +12 -19
- data/third_party/boringssl-with-bazel/src/include/openssl/x509.h +48 -50
- data/third_party/boringssl-with-bazel/src/include/openssl/x509_vfy.h +451 -435
- data/third_party/boringssl-with-bazel/src/include/openssl/x509v3.h +0 -1
- data/third_party/boringssl-with-bazel/src/ssl/d1_both.cc +2 -2
- data/third_party/boringssl-with-bazel/src/ssl/d1_srtp.cc +1 -1
- data/third_party/boringssl-with-bazel/src/ssl/encrypted_client_hello.cc +773 -84
- data/third_party/boringssl-with-bazel/src/ssl/handoff.cc +80 -47
- data/third_party/boringssl-with-bazel/src/ssl/handshake.cc +24 -19
- data/third_party/boringssl-with-bazel/src/ssl/handshake_client.cc +189 -86
- data/third_party/boringssl-with-bazel/src/ssl/handshake_server.cc +45 -56
- data/third_party/boringssl-with-bazel/src/ssl/internal.h +272 -167
- data/third_party/boringssl-with-bazel/src/ssl/s3_both.cc +2 -2
- data/third_party/boringssl-with-bazel/src/ssl/s3_lib.cc +2 -2
- data/third_party/boringssl-with-bazel/src/ssl/s3_pkt.cc +14 -19
- data/third_party/boringssl-with-bazel/src/ssl/ssl_lib.cc +34 -102
- data/third_party/boringssl-with-bazel/src/ssl/ssl_privkey.cc +2 -0
- data/third_party/boringssl-with-bazel/src/ssl/ssl_session.cc +8 -31
- data/third_party/boringssl-with-bazel/src/ssl/ssl_stat.cc +3 -0
- data/third_party/boringssl-with-bazel/src/ssl/ssl_transcript.cc +4 -3
- data/third_party/boringssl-with-bazel/src/ssl/ssl_versions.cc +7 -3
- data/third_party/boringssl-with-bazel/src/ssl/t1_lib.cc +576 -648
- data/third_party/boringssl-with-bazel/src/ssl/tls13_both.cc +31 -3
- data/third_party/boringssl-with-bazel/src/ssl/tls13_client.cc +98 -39
- data/third_party/boringssl-with-bazel/src/ssl/tls13_enc.cc +141 -94
- data/third_party/boringssl-with-bazel/src/ssl/tls13_server.cc +58 -68
- data/third_party/xxhash/xxhash.h +77 -195
- metadata +81 -39
- data/src/core/lib/gpr/arena.h +0 -47
- data/third_party/boringssl-with-bazel/src/crypto/hpke/internal.h +0 -267
- data/third_party/boringssl-with-bazel/src/crypto/x509/vpm_int.h +0 -71
@@ -88,9 +88,7 @@
|
|
88
88
|
|
89
89
|
// TODO(roth): In subsequent PRs:
|
90
90
|
// - add support for transparent retries (including initial metadata)
|
91
|
-
// -
|
92
|
-
// (census filter is on top of this one)
|
93
|
-
// - add census stats for retries
|
91
|
+
// - implement hedging
|
94
92
|
|
95
93
|
// By default, we buffer 256 KiB per RPC for retries.
|
96
94
|
// TODO(roth): Do we have any data to suggest a better value?
|
@@ -200,7 +198,6 @@ class RetryFilter::CallData {
|
|
200
198
|
static void SetPollent(grpc_call_element* elem, grpc_polling_entity* pollent);
|
201
199
|
|
202
200
|
private:
|
203
|
-
class Canceller;
|
204
201
|
class CallStackDestructionBarrier;
|
205
202
|
|
206
203
|
// Pending batches stored in call data.
|
@@ -212,13 +209,12 @@ class RetryFilter::CallData {
|
|
212
209
|
};
|
213
210
|
|
214
211
|
// State associated with each call attempt.
|
215
|
-
|
216
|
-
class CallAttempt
|
217
|
-
: public RefCounted<CallAttempt, PolymorphicRefCount, kUnrefCallDtor> {
|
212
|
+
class CallAttempt : public RefCounted<CallAttempt> {
|
218
213
|
public:
|
219
214
|
explicit CallAttempt(CallData* calld);
|
215
|
+
~CallAttempt() override;
|
220
216
|
|
221
|
-
|
217
|
+
bool lb_call_committed() const { return lb_call_committed_; }
|
222
218
|
|
223
219
|
// Constructs and starts whatever batches are needed on this call
|
224
220
|
// attempt.
|
@@ -228,6 +224,9 @@ class RetryFilter::CallData {
|
|
228
224
|
// committing the call.
|
229
225
|
void FreeCachedSendOpDataAfterCommit();
|
230
226
|
|
227
|
+
// Cancels the call attempt.
|
228
|
+
void CancelFromSurface(grpc_transport_stream_op_batch* cancel_batch);
|
229
|
+
|
231
230
|
private:
|
232
231
|
// State used for starting a retryable batch on the call attempt's LB call.
|
233
232
|
// This provides its own grpc_transport_stream_op_batch and other data
|
@@ -235,7 +234,7 @@ class RetryFilter::CallData {
|
|
235
234
|
// We allocate one struct on the arena for each attempt at starting a
|
236
235
|
// batch on a given LB call.
|
237
236
|
class BatchData
|
238
|
-
: public RefCounted<
|
237
|
+
: public RefCounted<BatchData, PolymorphicRefCount, kUnrefCallDtor> {
|
239
238
|
public:
|
240
239
|
BatchData(RefCountedPtr<CallAttempt> call_attempt, int refcount,
|
241
240
|
bool set_on_complete);
|
@@ -243,48 +242,50 @@ class RetryFilter::CallData {
|
|
243
242
|
|
244
243
|
grpc_transport_stream_op_batch* batch() { return &batch_; }
|
245
244
|
|
246
|
-
// Adds retriable send_initial_metadata op
|
245
|
+
// Adds retriable send_initial_metadata op.
|
247
246
|
void AddRetriableSendInitialMetadataOp();
|
248
|
-
// Adds retriable send_message op
|
247
|
+
// Adds retriable send_message op.
|
249
248
|
void AddRetriableSendMessageOp();
|
250
|
-
// Adds retriable send_trailing_metadata op
|
249
|
+
// Adds retriable send_trailing_metadata op.
|
251
250
|
void AddRetriableSendTrailingMetadataOp();
|
252
|
-
// Adds retriable recv_initial_metadata op
|
251
|
+
// Adds retriable recv_initial_metadata op.
|
253
252
|
void AddRetriableRecvInitialMetadataOp();
|
254
|
-
// Adds retriable recv_message op
|
253
|
+
// Adds retriable recv_message op.
|
255
254
|
void AddRetriableRecvMessageOp();
|
256
|
-
// Adds retriable recv_trailing_metadata op
|
255
|
+
// Adds retriable recv_trailing_metadata op.
|
257
256
|
void AddRetriableRecvTrailingMetadataOp();
|
257
|
+
// Adds cancel_stream op.
|
258
|
+
void AddCancelStreamOp(grpc_error_handle error);
|
258
259
|
|
259
260
|
private:
|
260
|
-
// Returns true if the call is being retried.
|
261
|
-
bool MaybeRetry(grpc_status_code status, grpc_mdelem* server_pushback_md,
|
262
|
-
bool is_lb_drop);
|
263
|
-
|
264
261
|
// Frees cached send ops that were completed by the completed batch in
|
265
262
|
// batch_data. Used when batches are completed after the call is
|
266
263
|
// committed.
|
267
264
|
void FreeCachedSendOpDataForCompletedBatch();
|
268
265
|
|
269
|
-
//
|
270
|
-
|
271
|
-
|
266
|
+
// If there is a pending recv_initial_metadata op, adds a closure
|
267
|
+
// to closures for recv_initial_metadata_ready.
|
268
|
+
void MaybeAddClosureForRecvInitialMetadataCallback(
|
269
|
+
grpc_error_handle error, CallCombinerClosureList* closures);
|
272
270
|
// Intercepts recv_initial_metadata_ready callback for retries.
|
273
271
|
// Commits the call and returns the initial metadata up the stack.
|
274
272
|
static void RecvInitialMetadataReady(void* arg, grpc_error_handle error);
|
275
273
|
|
276
|
-
//
|
277
|
-
|
274
|
+
// If there is a pending recv_message op, adds a closure to closures
|
275
|
+
// for recv_message_ready.
|
276
|
+
void MaybeAddClosureForRecvMessageCallback(
|
277
|
+
grpc_error_handle error, CallCombinerClosureList* closures);
|
278
278
|
// Intercepts recv_message_ready callback for retries.
|
279
279
|
// Commits the call and returns the message up the stack.
|
280
280
|
static void RecvMessageReady(void* arg, grpc_error_handle error);
|
281
281
|
|
282
|
-
//
|
283
|
-
|
282
|
+
// If there is a pending recv_trailing_metadata op, adds a closure to
|
283
|
+
// closures for recv_trailing_metadata_ready.
|
284
|
+
void MaybeAddClosureForRecvTrailingMetadataReady(
|
284
285
|
grpc_error_handle error, CallCombinerClosureList* closures);
|
285
|
-
// Adds any necessary closures for deferred
|
286
|
-
//
|
287
|
-
void
|
286
|
+
// Adds any necessary closures for deferred batch completion
|
287
|
+
// callbacks to closures.
|
288
|
+
void AddClosuresForDeferredCompletionCallbacks(
|
288
289
|
CallCombinerClosureList* closures);
|
289
290
|
// For any pending batch containing an op that has not yet been started,
|
290
291
|
// adds the pending batch's completion closures to closures.
|
@@ -309,6 +310,10 @@ class RetryFilter::CallData {
|
|
309
310
|
// Callback used to intercept on_complete from LB calls.
|
310
311
|
static void OnComplete(void* arg, grpc_error_handle error);
|
311
312
|
|
313
|
+
// Callback used to handle on_complete for internally generated
|
314
|
+
// cancel_stream op.
|
315
|
+
static void OnCompleteForCancelOp(void* arg, grpc_error_handle error);
|
316
|
+
|
312
317
|
RefCountedPtr<CallAttempt> call_attempt_;
|
313
318
|
// The batch to use in the LB call.
|
314
319
|
// Its payload field points to CallAttempt::batch_payload_.
|
@@ -317,12 +322,37 @@ class RetryFilter::CallData {
|
|
317
322
|
grpc_closure on_complete_;
|
318
323
|
};
|
319
324
|
|
325
|
+
class AttemptDispatchController
|
326
|
+
: public ConfigSelector::CallDispatchController {
|
327
|
+
public:
|
328
|
+
explicit AttemptDispatchController(CallAttempt* call_attempt)
|
329
|
+
: call_attempt_(call_attempt) {}
|
330
|
+
|
331
|
+
// Will never be called.
|
332
|
+
bool ShouldRetry() override { return false; }
|
333
|
+
|
334
|
+
void Commit() override {
|
335
|
+
call_attempt_->lb_call_committed_ = true;
|
336
|
+
auto* calld = call_attempt_->calld_;
|
337
|
+
if (calld->retry_committed_) {
|
338
|
+
auto* service_config_call_data = static_cast<ServiceConfigCallData*>(
|
339
|
+
calld->call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA]
|
340
|
+
.value);
|
341
|
+
service_config_call_data->call_dispatch_controller()->Commit();
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
private:
|
346
|
+
CallAttempt* call_attempt_;
|
347
|
+
};
|
348
|
+
|
320
349
|
// Creates a BatchData object on the call's arena with the
|
321
350
|
// specified refcount. If set_on_complete is true, the batch's
|
322
351
|
// on_complete callback will be set to point to on_complete();
|
323
352
|
// otherwise, the batch's on_complete callback will be null.
|
324
353
|
BatchData* CreateBatch(int refcount, bool set_on_complete) {
|
325
|
-
return calld_->arena_->New<BatchData>(Ref(
|
354
|
+
return calld_->arena_->New<BatchData>(Ref(DEBUG_LOCATION, "CreateBatch"),
|
355
|
+
refcount, set_on_complete);
|
326
356
|
}
|
327
357
|
|
328
358
|
// If there are any cached send ops that need to be replayed on this
|
@@ -330,23 +360,61 @@ class RetryFilter::CallData {
|
|
330
360
|
// Otherwise, returns nullptr.
|
331
361
|
BatchData* MaybeCreateBatchForReplay();
|
332
362
|
|
363
|
+
// Adds a closure to closures that will execute batch in the call combiner.
|
364
|
+
void AddClosureForBatch(grpc_transport_stream_op_batch* batch,
|
365
|
+
const char* reason,
|
366
|
+
CallCombinerClosureList* closures);
|
367
|
+
|
368
|
+
// Helper function used to start a recv_trailing_metadata batch. This
|
369
|
+
// is used in the case where a recv_initial_metadata or recv_message
|
370
|
+
// op fails in a way that we know the call is over but when the application
|
371
|
+
// has not yet started its own recv_trailing_metadata op.
|
372
|
+
void AddBatchForInternalRecvTrailingMetadata(
|
373
|
+
CallCombinerClosureList* closures);
|
374
|
+
|
375
|
+
// Adds a batch to closures to cancel this call attempt.
|
376
|
+
void AddBatchForCancelOp(grpc_error_handle error,
|
377
|
+
CallCombinerClosureList* closures);
|
378
|
+
|
333
379
|
// Adds batches for pending batches to closures.
|
334
380
|
void AddBatchesForPendingBatches(CallCombinerClosureList* closures);
|
335
381
|
|
336
382
|
// Adds whatever batches are needed on this attempt to closures.
|
337
383
|
void AddRetriableBatches(CallCombinerClosureList* closures);
|
338
384
|
|
339
|
-
// Returns true if any op in the batch was not yet started on this
|
340
|
-
|
385
|
+
// Returns true if any send op in the batch was not yet started on this
|
386
|
+
// attempt.
|
387
|
+
bool PendingBatchContainsUnstartedSendOps(PendingBatch* pending);
|
341
388
|
|
342
|
-
//
|
343
|
-
|
344
|
-
|
345
|
-
//
|
346
|
-
|
389
|
+
// Returns true if there are cached send ops to replay.
|
390
|
+
bool HaveSendOpsToReplay();
|
391
|
+
|
392
|
+
// If our retry state is no longer needed, switch to fast path by moving
|
393
|
+
// our LB call into calld_->committed_call_ and having calld_ drop
|
394
|
+
// its ref to us.
|
395
|
+
void MaybeSwitchToFastPath();
|
396
|
+
|
397
|
+
// Returns true if the call should be retried.
|
398
|
+
// If server_pushback_md is non-null, sets *server_pushback_ms.
|
399
|
+
bool ShouldRetry(absl::optional<grpc_status_code> status, bool is_lb_drop,
|
400
|
+
grpc_mdelem* server_pushback_md,
|
401
|
+
grpc_millis* server_pushback_ms);
|
402
|
+
|
403
|
+
// Abandons the call attempt. Unrefs any deferred batches.
|
404
|
+
void Abandon();
|
405
|
+
|
406
|
+
static void OnPerAttemptRecvTimer(void* arg, grpc_error_handle error);
|
407
|
+
static void OnPerAttemptRecvTimerLocked(void* arg, grpc_error_handle error);
|
408
|
+
void MaybeCancelPerAttemptRecvTimer();
|
347
409
|
|
348
410
|
CallData* calld_;
|
349
|
-
|
411
|
+
AttemptDispatchController attempt_dispatch_controller_;
|
412
|
+
OrphanablePtr<ClientChannel::LoadBalancedCall> lb_call_;
|
413
|
+
bool lb_call_committed_ = false;
|
414
|
+
|
415
|
+
grpc_timer per_attempt_recv_timer_;
|
416
|
+
grpc_closure on_per_attempt_recv_timer_;
|
417
|
+
bool per_attempt_recv_timer_pending_ = false;
|
350
418
|
|
351
419
|
// BatchData.batch.payload points to this.
|
352
420
|
grpc_transport_stream_op_batch_payload batch_payload_;
|
@@ -389,16 +457,28 @@ class RetryFilter::CallData {
|
|
389
457
|
bool started_recv_trailing_metadata_ : 1;
|
390
458
|
bool completed_recv_trailing_metadata_ : 1;
|
391
459
|
// State for callback processing.
|
392
|
-
BatchData
|
460
|
+
RefCountedPtr<BatchData> recv_initial_metadata_ready_deferred_batch_;
|
393
461
|
grpc_error_handle recv_initial_metadata_error_ = GRPC_ERROR_NONE;
|
394
|
-
BatchData
|
462
|
+
RefCountedPtr<BatchData> recv_message_ready_deferred_batch_;
|
395
463
|
grpc_error_handle recv_message_error_ = GRPC_ERROR_NONE;
|
396
|
-
|
464
|
+
struct OnCompleteDeferredBatch {
|
465
|
+
OnCompleteDeferredBatch(RefCountedPtr<BatchData> batch,
|
466
|
+
grpc_error_handle error)
|
467
|
+
: batch(std::move(batch)), error(error) {}
|
468
|
+
RefCountedPtr<BatchData> batch;
|
469
|
+
grpc_error_handle error;
|
470
|
+
};
|
471
|
+
// There cannot be more than 3 pending send op batches at a time.
|
472
|
+
absl::InlinedVector<OnCompleteDeferredBatch, 3>
|
473
|
+
on_complete_deferred_batches_;
|
474
|
+
RefCountedPtr<BatchData> recv_trailing_metadata_internal_batch_;
|
475
|
+
grpc_error_handle recv_trailing_metadata_error_ = GRPC_ERROR_NONE;
|
476
|
+
bool seen_recv_trailing_metadata_from_surface_ : 1;
|
397
477
|
// NOTE: Do not move this next to the metadata bitfields above. That would
|
398
478
|
// save space but will also result in a data race because compiler
|
399
479
|
// will generate a 2 byte store which overwrites the meta-data
|
400
480
|
// fields upon setting this field.
|
401
|
-
bool
|
481
|
+
bool abandoned_ : 1;
|
402
482
|
};
|
403
483
|
|
404
484
|
CallData(RetryFilter* chand, const grpc_call_element_args& args);
|
@@ -432,18 +512,18 @@ class RetryFilter::CallData {
|
|
432
512
|
// Commits the call so that no further retry attempts will be performed.
|
433
513
|
void RetryCommit(CallAttempt* call_attempt);
|
434
514
|
|
435
|
-
// Starts a retry after appropriate back-off.
|
436
|
-
|
515
|
+
// Starts a timer to retry after appropriate back-off.
|
516
|
+
// If server_pushback_ms is -1, retry_backoff_ is used.
|
517
|
+
void StartRetryTimer(grpc_millis server_pushback_ms);
|
518
|
+
|
437
519
|
static void OnRetryTimer(void* arg, grpc_error_handle error);
|
520
|
+
static void OnRetryTimerLocked(void* arg, grpc_error_handle error);
|
438
521
|
|
439
|
-
|
522
|
+
OrphanablePtr<ClientChannel::LoadBalancedCall> CreateLoadBalancedCall(
|
523
|
+
ConfigSelector::CallDispatchController* call_dispatch_controller);
|
440
524
|
|
441
525
|
void CreateCallAttempt();
|
442
526
|
|
443
|
-
// Adds a closure to closures that will execute batch in the call combiner.
|
444
|
-
void AddClosureForBatch(grpc_transport_stream_op_batch* batch,
|
445
|
-
CallCombinerClosureList* closures);
|
446
|
-
|
447
527
|
RetryFilter* chand_;
|
448
528
|
grpc_polling_entity* pollent_;
|
449
529
|
RefCountedPtr<ServerRetryThrottleData> retry_throttle_data_;
|
@@ -451,13 +531,14 @@ class RetryFilter::CallData {
|
|
451
531
|
BackOff retry_backoff_;
|
452
532
|
|
453
533
|
grpc_slice path_; // Request path.
|
454
|
-
gpr_cycle_counter call_start_time_;
|
455
534
|
grpc_millis deadline_;
|
456
535
|
Arena* arena_;
|
457
536
|
grpc_call_stack* owning_call_;
|
458
537
|
CallCombiner* call_combiner_;
|
459
538
|
grpc_call_context_element* call_context_;
|
460
539
|
|
540
|
+
grpc_error_handle cancelled_from_surface_ = GRPC_ERROR_NONE;
|
541
|
+
|
461
542
|
RefCountedPtr<CallStackDestructionBarrier> call_stack_destruction_barrier_;
|
462
543
|
|
463
544
|
// TODO(roth): As part of implementing hedging, we will need to maintain a
|
@@ -465,13 +546,10 @@ class RetryFilter::CallData {
|
|
465
546
|
// gets cancelled.
|
466
547
|
RefCountedPtr<CallAttempt> call_attempt_;
|
467
548
|
|
468
|
-
// LB call used when
|
469
|
-
//
|
470
|
-
//
|
471
|
-
|
472
|
-
// from the CallAttempt here, thus creating a fast path for the
|
473
|
-
// remainder of the streaming call.
|
474
|
-
RefCountedPtr<ClientChannel::LoadBalancedCall> committed_call_;
|
549
|
+
// LB call used when we've committed to a call attempt and the retry
|
550
|
+
// state for that attempt is no longer needed. This provides a fast
|
551
|
+
// path for long-running streaming calls that minimizes overhead.
|
552
|
+
OrphanablePtr<ClientChannel::LoadBalancedCall> committed_call_;
|
475
553
|
|
476
554
|
// When are are not yet fully committed to a particular call (i.e.,
|
477
555
|
// either we might still retry or we have committed to the call but
|
@@ -486,23 +564,11 @@ class RetryFilter::CallData {
|
|
486
564
|
|
487
565
|
// Retry state.
|
488
566
|
bool retry_committed_ : 1;
|
489
|
-
bool
|
567
|
+
bool retry_timer_pending_ : 1;
|
490
568
|
int num_attempts_completed_ = 0;
|
491
|
-
|
492
|
-
Canceller* canceller_ ABSL_GUARDED_BY(timer_mu_);
|
493
|
-
grpc_timer retry_timer_ ABSL_GUARDED_BY(timer_mu_);
|
569
|
+
grpc_timer retry_timer_;
|
494
570
|
grpc_closure retry_closure_;
|
495
571
|
|
496
|
-
// The number of batches containing send ops that are currently in-flight
|
497
|
-
// on any call attempt.
|
498
|
-
// We hold a ref to the call stack while this is non-zero, since replay
|
499
|
-
// batches may not complete until after all callbacks have been returned
|
500
|
-
// to the surface, and we need to make sure that the call is not destroyed
|
501
|
-
// until all of these batches have completed.
|
502
|
-
// Note that we actually only need to track replay batches, but it's
|
503
|
-
// easier to track all batches with send ops.
|
504
|
-
int num_in_flight_call_attempt_send_batches_ = 0;
|
505
|
-
|
506
572
|
// Cached data for retrying send ops.
|
507
573
|
// send_initial_metadata
|
508
574
|
bool seen_send_initial_metadata_ = false;
|
@@ -513,7 +579,10 @@ class RetryFilter::CallData {
|
|
513
579
|
// have the LB call set a value in CallAttempt and then propagate it
|
514
580
|
// from CallAttempt to the parent call when we commit. Otherwise, we
|
515
581
|
// may leave this with a value for a peer other than the one we
|
516
|
-
// actually commit to.
|
582
|
+
// actually commit to. Alternatively, maybe see if there's a way to
|
583
|
+
// change the surface API such that the peer isn't available until
|
584
|
+
// after initial metadata is received? (Could even change the
|
585
|
+
// transport API to return this with the recv_initial_metadata op.)
|
517
586
|
gpr_atm* peer_string_;
|
518
587
|
// send_message
|
519
588
|
// When we get a send_message op, we replace the original byte stream
|
@@ -522,6 +591,10 @@ class RetryFilter::CallData {
|
|
522
591
|
// Note: We inline the cache for the first 3 send_message ops and use
|
523
592
|
// dynamic allocation after that. This number was essentially picked
|
524
593
|
// at random; it could be changed in the future to tune performance.
|
594
|
+
// TODO(roth): As part of implementing hedging, we may need some
|
595
|
+
// synchronization here, since ByteStreamCache does not provide any
|
596
|
+
// synchronization, so it's not safe to have multiple
|
597
|
+
// CachingByteStreams read from the same ByteStreamCache concurrently.
|
525
598
|
absl::InlinedVector<ByteStreamCache*, 3> send_messages_;
|
526
599
|
// send_trailing_metadata
|
527
600
|
bool seen_send_trailing_metadata_ = false;
|
@@ -582,52 +655,15 @@ class RetryFilter::CallData::CallStackDestructionBarrier
|
|
582
655
|
grpc_closure* on_call_stack_destruction_ = nullptr;
|
583
656
|
};
|
584
657
|
|
585
|
-
//
|
586
|
-
// RetryFilter::CallData::Canceller
|
587
|
-
//
|
588
|
-
|
589
|
-
class RetryFilter::CallData::Canceller {
|
590
|
-
public:
|
591
|
-
explicit Canceller(CallData* calld) : calld_(calld) {
|
592
|
-
GRPC_CALL_STACK_REF(calld_->owning_call_, "RetryCanceller");
|
593
|
-
GRPC_CLOSURE_INIT(&closure_, &Cancel, this, nullptr);
|
594
|
-
calld_->call_combiner_->SetNotifyOnCancel(&closure_);
|
595
|
-
}
|
596
|
-
|
597
|
-
private:
|
598
|
-
static void Cancel(void* arg, grpc_error_handle error) {
|
599
|
-
auto* self = static_cast<Canceller*>(arg);
|
600
|
-
auto* calld = self->calld_;
|
601
|
-
{
|
602
|
-
MutexLock lock(&calld->timer_mu_);
|
603
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
604
|
-
gpr_log(GPR_INFO,
|
605
|
-
"calld=%p: cancelling retry timer: error=%s self=%p "
|
606
|
-
"calld->canceller_=%p",
|
607
|
-
calld, grpc_error_std_string(error).c_str(), self,
|
608
|
-
calld->canceller_);
|
609
|
-
}
|
610
|
-
if (calld->canceller_ == self && error != GRPC_ERROR_NONE) {
|
611
|
-
calld->canceller_ = nullptr; // Checked by OnRetryTimer().
|
612
|
-
grpc_timer_cancel(&calld->retry_timer_);
|
613
|
-
calld->FreeAllCachedSendOpData();
|
614
|
-
GRPC_CALL_COMBINER_STOP(calld->call_combiner_, "Canceller");
|
615
|
-
}
|
616
|
-
}
|
617
|
-
GRPC_CALL_STACK_UNREF(calld->owning_call_, "RetryCanceller");
|
618
|
-
delete self;
|
619
|
-
}
|
620
|
-
|
621
|
-
CallData* calld_;
|
622
|
-
grpc_closure closure_;
|
623
|
-
};
|
624
|
-
|
625
658
|
//
|
626
659
|
// RetryFilter::CallData::CallAttempt
|
627
660
|
//
|
628
661
|
|
629
662
|
RetryFilter::CallData::CallAttempt::CallAttempt(CallData* calld)
|
630
|
-
:
|
663
|
+
: RefCounted(GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) ? "CallAttempt"
|
664
|
+
: nullptr),
|
665
|
+
calld_(calld),
|
666
|
+
attempt_dispatch_controller_(this),
|
631
667
|
batch_payload_(calld->call_context_),
|
632
668
|
started_send_initial_metadata_(false),
|
633
669
|
completed_send_initial_metadata_(false),
|
@@ -637,12 +673,42 @@ RetryFilter::CallData::CallAttempt::CallAttempt(CallData* calld)
|
|
637
673
|
completed_recv_initial_metadata_(false),
|
638
674
|
started_recv_trailing_metadata_(false),
|
639
675
|
completed_recv_trailing_metadata_(false),
|
640
|
-
|
641
|
-
|
676
|
+
seen_recv_trailing_metadata_from_surface_(false),
|
677
|
+
abandoned_(false) {
|
678
|
+
lb_call_ = calld->CreateLoadBalancedCall(&attempt_dispatch_controller_);
|
642
679
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
643
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p
|
680
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: create lb_call=%p",
|
644
681
|
calld->chand_, calld, this, lb_call_.get());
|
645
682
|
}
|
683
|
+
// If per_attempt_recv_timeout is set, start a timer.
|
684
|
+
if (calld->retry_policy_ != nullptr &&
|
685
|
+
calld->retry_policy_->per_attempt_recv_timeout().has_value()) {
|
686
|
+
grpc_millis per_attempt_recv_deadline =
|
687
|
+
ExecCtx::Get()->Now() +
|
688
|
+
*calld->retry_policy_->per_attempt_recv_timeout();
|
689
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
690
|
+
gpr_log(GPR_INFO,
|
691
|
+
"chand=%p calld=%p attempt=%p: per-attempt timeout in %" PRId64
|
692
|
+
" ms",
|
693
|
+
calld->chand_, calld, this,
|
694
|
+
*calld->retry_policy_->per_attempt_recv_timeout());
|
695
|
+
}
|
696
|
+
// Schedule retry after computed delay.
|
697
|
+
GRPC_CLOSURE_INIT(&on_per_attempt_recv_timer_, OnPerAttemptRecvTimer, this,
|
698
|
+
nullptr);
|
699
|
+
GRPC_CALL_STACK_REF(calld->owning_call_, "OnPerAttemptRecvTimer");
|
700
|
+
Ref(DEBUG_LOCATION, "OnPerAttemptRecvTimer").release();
|
701
|
+
per_attempt_recv_timer_pending_ = true;
|
702
|
+
grpc_timer_init(&per_attempt_recv_timer_, per_attempt_recv_deadline,
|
703
|
+
&on_per_attempt_recv_timer_);
|
704
|
+
}
|
705
|
+
}
|
706
|
+
|
707
|
+
RetryFilter::CallData::CallAttempt::~CallAttempt() {
|
708
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
709
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: destroying call attempt",
|
710
|
+
calld_->chand_, calld_, this);
|
711
|
+
}
|
646
712
|
}
|
647
713
|
|
648
714
|
void RetryFilter::CallData::CallAttempt::FreeCachedSendOpDataAfterCommit() {
|
@@ -661,13 +727,9 @@ void RetryFilter::CallData::CallAttempt::FreeCachedSendOpDataAfterCommit() {
|
|
661
727
|
}
|
662
728
|
}
|
663
729
|
|
664
|
-
bool RetryFilter::CallData::CallAttempt::
|
730
|
+
bool RetryFilter::CallData::CallAttempt::PendingBatchContainsUnstartedSendOps(
|
665
731
|
PendingBatch* pending) {
|
666
|
-
|
667
|
-
// only recv ops are always started immediately.
|
668
|
-
if (pending->batch == nullptr || pending->batch->on_complete == nullptr) {
|
669
|
-
return false;
|
670
|
-
}
|
732
|
+
if (pending->batch->on_complete == nullptr) return false;
|
671
733
|
if (pending->batch->send_initial_metadata &&
|
672
734
|
!started_send_initial_metadata_) {
|
673
735
|
return true;
|
@@ -683,22 +745,40 @@ bool RetryFilter::CallData::CallAttempt::PendingBatchIsUnstarted(
|
|
683
745
|
return false;
|
684
746
|
}
|
685
747
|
|
686
|
-
|
748
|
+
bool RetryFilter::CallData::CallAttempt::HaveSendOpsToReplay() {
|
749
|
+
// We don't check send_initial_metadata here, because that op will always
|
750
|
+
// be started as soon as it is received from the surface, so it will
|
751
|
+
// never need to be started at this point.
|
752
|
+
return started_send_message_count_ < calld_->send_messages_.size() ||
|
753
|
+
(calld_->seen_send_trailing_metadata_ &&
|
754
|
+
!started_send_trailing_metadata_);
|
755
|
+
}
|
756
|
+
|
757
|
+
void RetryFilter::CallData::CallAttempt::MaybeSwitchToFastPath() {
|
758
|
+
// If we're not yet committed, we can't switch yet.
|
759
|
+
// TODO(roth): As part of implementing hedging, this logic needs to
|
760
|
+
// check that *this* call attempt is the one that we've committed to.
|
761
|
+
// Might need to replace abandoned_ with an enum indicating whether we're
|
762
|
+
// in flight, abandoned, or the winning call attempt.
|
763
|
+
if (!calld_->retry_committed_) return;
|
764
|
+
// If we've already switched to fast path, there's nothing to do here.
|
765
|
+
if (calld_->committed_call_ != nullptr) return;
|
766
|
+
// If the perAttemptRecvTimeout timer is pending, we can't switch yet.
|
767
|
+
if (per_attempt_recv_timer_pending_) return;
|
768
|
+
// If there are still send ops to replay, we can't switch yet.
|
769
|
+
if (HaveSendOpsToReplay()) return;
|
770
|
+
// If we started an internal batch for recv_trailing_metadata but have not
|
771
|
+
// yet seen that op from the surface, we can't switch yet.
|
772
|
+
if (recv_trailing_metadata_internal_batch_ != nullptr) return;
|
773
|
+
// Switch to fast path.
|
687
774
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
688
775
|
gpr_log(GPR_INFO,
|
689
|
-
"chand=%p calld=%p:
|
690
|
-
"
|
691
|
-
calld_->chand_, calld_);
|
776
|
+
"chand=%p calld=%p attempt=%p: retry state no longer needed; "
|
777
|
+
"moving LB call to parent and unreffing the call attempt",
|
778
|
+
calld_->chand_, calld_, this);
|
692
779
|
}
|
693
|
-
|
694
|
-
|
695
|
-
// completes, and again when we actually get a recv_trailing_metadata
|
696
|
-
// op from the surface.
|
697
|
-
BatchData* batch_data = CreateBatch(2, false /* set_on_complete */);
|
698
|
-
batch_data->AddRetriableRecvTrailingMetadataOp();
|
699
|
-
recv_trailing_metadata_internal_batch_ = batch_data;
|
700
|
-
// Note: This will release the call combiner.
|
701
|
-
lb_call_->StartTransportStreamOpBatch(batch_data->batch());
|
780
|
+
calld_->committed_call_ = std::move(lb_call_);
|
781
|
+
calld_->call_attempt_.reset(DEBUG_LOCATION, "MaybeSwitchToFastPath");
|
702
782
|
}
|
703
783
|
|
704
784
|
// If there are any cached send ops that need to be replayed on the
|
@@ -712,9 +792,9 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
712
792
|
!calld_->pending_send_initial_metadata_) {
|
713
793
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
714
794
|
gpr_log(GPR_INFO,
|
715
|
-
"chand=%p calld=%p: replaying previously completed "
|
795
|
+
"chand=%p calld=%p attempt=%p: replaying previously completed "
|
716
796
|
"send_initial_metadata op",
|
717
|
-
calld_->chand_, calld_);
|
797
|
+
calld_->chand_, calld_, this);
|
718
798
|
}
|
719
799
|
replay_batch_data = CreateBatch(1, true /* set_on_complete */);
|
720
800
|
replay_batch_data->AddRetriableSendInitialMetadataOp();
|
@@ -726,9 +806,9 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
726
806
|
!calld_->pending_send_message_) {
|
727
807
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
728
808
|
gpr_log(GPR_INFO,
|
729
|
-
"chand=%p calld=%p: replaying previously completed "
|
809
|
+
"chand=%p calld=%p attempt=%p: replaying previously completed "
|
730
810
|
"send_message op",
|
731
|
-
calld_->chand_, calld_);
|
811
|
+
calld_->chand_, calld_, this);
|
732
812
|
}
|
733
813
|
if (replay_batch_data == nullptr) {
|
734
814
|
replay_batch_data = CreateBatch(1, true /* set_on_complete */);
|
@@ -745,9 +825,9 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
745
825
|
!calld_->pending_send_trailing_metadata_) {
|
746
826
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
747
827
|
gpr_log(GPR_INFO,
|
748
|
-
"chand=%p calld=%p: replaying previously completed "
|
828
|
+
"chand=%p calld=%p attempt=%p: replaying previously completed "
|
749
829
|
"send_trailing_metadata op",
|
750
|
-
calld_->chand_, calld_);
|
830
|
+
calld_->chand_, calld_, this);
|
751
831
|
}
|
752
832
|
if (replay_batch_data == nullptr) {
|
753
833
|
replay_batch_data = CreateBatch(1, true /* set_on_complete */);
|
@@ -757,12 +837,67 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
757
837
|
return replay_batch_data;
|
758
838
|
}
|
759
839
|
|
840
|
+
namespace {
|
841
|
+
|
842
|
+
void StartBatchInCallCombiner(void* arg, grpc_error_handle /*ignored*/) {
|
843
|
+
grpc_transport_stream_op_batch* batch =
|
844
|
+
static_cast<grpc_transport_stream_op_batch*>(arg);
|
845
|
+
auto* lb_call = static_cast<ClientChannel::LoadBalancedCall*>(
|
846
|
+
batch->handler_private.extra_arg);
|
847
|
+
// Note: This will release the call combiner.
|
848
|
+
lb_call->StartTransportStreamOpBatch(batch);
|
849
|
+
}
|
850
|
+
|
851
|
+
} // namespace
|
852
|
+
|
853
|
+
void RetryFilter::CallData::CallAttempt::AddClosureForBatch(
|
854
|
+
grpc_transport_stream_op_batch* batch, const char* reason,
|
855
|
+
CallCombinerClosureList* closures) {
|
856
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
857
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: adding batch (%s): %s",
|
858
|
+
calld_->chand_, calld_, this, reason,
|
859
|
+
grpc_transport_stream_op_batch_string(batch).c_str());
|
860
|
+
}
|
861
|
+
batch->handler_private.extra_arg = lb_call_.get();
|
862
|
+
GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner,
|
863
|
+
batch, grpc_schedule_on_exec_ctx);
|
864
|
+
closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE, reason);
|
865
|
+
}
|
866
|
+
|
867
|
+
void RetryFilter::CallData::CallAttempt::
|
868
|
+
AddBatchForInternalRecvTrailingMetadata(CallCombinerClosureList* closures) {
|
869
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
870
|
+
gpr_log(GPR_INFO,
|
871
|
+
"chand=%p calld=%p attempt=%p: call failed but "
|
872
|
+
"recv_trailing_metadata not started; starting it internally",
|
873
|
+
calld_->chand_, calld_, this);
|
874
|
+
}
|
875
|
+
// Create batch_data with 2 refs, since this batch will be unreffed twice:
|
876
|
+
// once for the recv_trailing_metadata_ready callback when the batch
|
877
|
+
// completes, and again when we actually get a recv_trailing_metadata
|
878
|
+
// op from the surface.
|
879
|
+
BatchData* batch_data = CreateBatch(2, false /* set_on_complete */);
|
880
|
+
batch_data->AddRetriableRecvTrailingMetadataOp();
|
881
|
+
recv_trailing_metadata_internal_batch_.reset(batch_data);
|
882
|
+
AddClosureForBatch(batch_data->batch(),
|
883
|
+
"starting internal recv_trailing_metadata", closures);
|
884
|
+
}
|
885
|
+
|
886
|
+
void RetryFilter::CallData::CallAttempt::AddBatchForCancelOp(
|
887
|
+
grpc_error_handle error, CallCombinerClosureList* closures) {
|
888
|
+
BatchData* cancel_batch_data = CreateBatch(1, /*set_on_complete=*/true);
|
889
|
+
cancel_batch_data->AddCancelStreamOp(error);
|
890
|
+
AddClosureForBatch(cancel_batch_data->batch(),
|
891
|
+
"start cancellation batch on call attempt", closures);
|
892
|
+
}
|
893
|
+
|
760
894
|
void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
761
895
|
CallCombinerClosureList* closures) {
|
762
896
|
for (size_t i = 0; i < GPR_ARRAY_SIZE(calld_->pending_batches_); ++i) {
|
763
897
|
PendingBatch* pending = &calld_->pending_batches_[i];
|
764
898
|
grpc_transport_stream_op_batch* batch = pending->batch;
|
765
899
|
if (batch == nullptr) continue;
|
900
|
+
bool has_send_ops = false;
|
766
901
|
// Skip any batch that either (a) has already been started on this
|
767
902
|
// call attempt or (b) we can't start yet because we're still
|
768
903
|
// replaying send ops that need to be completed first.
|
@@ -773,65 +908,93 @@ void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
|
773
908
|
// starting a recv op due to it being in the same batch with a send
|
774
909
|
// op. If/when we revamp the callback protocol in
|
775
910
|
// transport_stream_op_batch, we may be able to fix this.
|
776
|
-
if (batch->send_initial_metadata
|
777
|
-
continue;
|
911
|
+
if (batch->send_initial_metadata) {
|
912
|
+
if (started_send_initial_metadata_) continue;
|
913
|
+
has_send_ops = true;
|
778
914
|
}
|
779
|
-
if (batch->send_message
|
780
|
-
|
781
|
-
|
915
|
+
if (batch->send_message) {
|
916
|
+
if (completed_send_message_count_ < started_send_message_count_) {
|
917
|
+
continue;
|
918
|
+
}
|
919
|
+
has_send_ops = true;
|
782
920
|
}
|
783
921
|
// Note that we only start send_trailing_metadata if we have no more
|
784
922
|
// send_message ops to start, since we can't send down any more
|
785
923
|
// send_message ops after send_trailing_metadata.
|
786
|
-
if (batch->send_trailing_metadata
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
924
|
+
if (batch->send_trailing_metadata) {
|
925
|
+
if (started_send_message_count_ + batch->send_message <
|
926
|
+
calld_->send_messages_.size() ||
|
927
|
+
started_send_trailing_metadata_) {
|
928
|
+
continue;
|
929
|
+
}
|
930
|
+
has_send_ops = true;
|
791
931
|
}
|
792
|
-
|
793
|
-
|
932
|
+
int num_callbacks = has_send_ops; // All send ops share one callback.
|
933
|
+
if (batch->recv_initial_metadata) {
|
934
|
+
if (started_recv_initial_metadata_) continue;
|
935
|
+
++num_callbacks;
|
794
936
|
}
|
795
|
-
if (batch->recv_message
|
796
|
-
|
797
|
-
|
937
|
+
if (batch->recv_message) {
|
938
|
+
if (completed_recv_message_count_ < started_recv_message_count_) {
|
939
|
+
continue;
|
940
|
+
}
|
941
|
+
++num_callbacks;
|
798
942
|
}
|
799
|
-
if (batch->recv_trailing_metadata
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
//
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
//
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
943
|
+
if (batch->recv_trailing_metadata) {
|
944
|
+
if (started_recv_trailing_metadata_) {
|
945
|
+
seen_recv_trailing_metadata_from_surface_ = true;
|
946
|
+
// If we previously completed a recv_trailing_metadata op
|
947
|
+
// initiated by AddBatchForInternalRecvTrailingMetadata(), use the
|
948
|
+
// result of that instead of trying to re-start this op.
|
949
|
+
if (GPR_UNLIKELY(recv_trailing_metadata_internal_batch_ != nullptr)) {
|
950
|
+
// If the batch completed, then trigger the completion callback
|
951
|
+
// directly, so that we return the previously returned results to
|
952
|
+
// the application. Otherwise, just unref the internally started
|
953
|
+
// batch, since we'll propagate the completion when it completes.
|
954
|
+
if (completed_recv_trailing_metadata_) {
|
955
|
+
closures->Add(
|
956
|
+
&recv_trailing_metadata_ready_, recv_trailing_metadata_error_,
|
957
|
+
"re-executing recv_trailing_metadata_ready to propagate "
|
958
|
+
"internally triggered result");
|
959
|
+
// Ref will be released by callback.
|
960
|
+
recv_trailing_metadata_internal_batch_.release();
|
961
|
+
} else {
|
962
|
+
recv_trailing_metadata_internal_batch_.reset(
|
963
|
+
DEBUG_LOCATION,
|
964
|
+
"internally started recv_trailing_metadata batch pending and "
|
965
|
+
"recv_trailing_metadata started from surface");
|
966
|
+
GRPC_ERROR_UNREF(recv_trailing_metadata_error_);
|
967
|
+
}
|
968
|
+
recv_trailing_metadata_error_ = GRPC_ERROR_NONE;
|
816
969
|
}
|
817
|
-
|
970
|
+
// We don't want the fact that we've already started this op internally
|
971
|
+
// to prevent us from adding a batch that may contain other ops.
|
972
|
+
// Instead, we'll just skip adding this op below.
|
973
|
+
if (num_callbacks == 0) continue;
|
974
|
+
} else {
|
975
|
+
++num_callbacks;
|
818
976
|
}
|
819
|
-
continue;
|
820
977
|
}
|
821
|
-
// If we're already committed
|
822
|
-
|
823
|
-
|
978
|
+
// If we're already committed and the following conditions are met,
|
979
|
+
// just send the batch down as-is:
|
980
|
+
// - The batch contains no cached send ops. (If it does, we need
|
981
|
+
// the logic below to use the cached payloads.)
|
982
|
+
// - The batch does not contain recv_trailing_metadata when we have
|
983
|
+
// already started an internal recv_trailing_metadata batch. (If
|
984
|
+
// we've already started an internal recv_trailing_metadata batch,
|
985
|
+
// then we need the logic below to send all ops in the batch
|
986
|
+
// *except* the recv_trailing_metadata op.)
|
987
|
+
if (calld_->retry_committed_ && !pending->send_ops_cached &&
|
988
|
+
(!batch->recv_trailing_metadata || !started_recv_trailing_metadata_)) {
|
989
|
+
AddClosureForBatch(
|
990
|
+
batch,
|
991
|
+
"start non-replayable pending batch on call attempt after commit",
|
992
|
+
closures);
|
824
993
|
calld_->PendingBatchClear(pending);
|
825
994
|
continue;
|
826
995
|
}
|
827
996
|
// Create batch with the right number of callbacks.
|
828
|
-
|
829
|
-
batch->send_message ||
|
830
|
-
batch->send_trailing_metadata;
|
831
|
-
const int num_callbacks = has_send_ops + batch->recv_initial_metadata +
|
832
|
-
batch->recv_message +
|
833
|
-
batch->recv_trailing_metadata;
|
834
|
-
CallAttempt::BatchData* batch_data =
|
997
|
+
BatchData* batch_data =
|
835
998
|
CreateBatch(num_callbacks, has_send_ops /* set_on_complete */);
|
836
999
|
// Cache send ops if needed.
|
837
1000
|
calld_->MaybeCacheSendOpsForBatch(pending);
|
@@ -858,19 +1021,12 @@ void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
|
858
1021
|
batch_data->AddRetriableRecvMessageOp();
|
859
1022
|
}
|
860
1023
|
// recv_trailing_metadata.
|
861
|
-
if (batch->recv_trailing_metadata) {
|
1024
|
+
if (batch->recv_trailing_metadata && !started_recv_trailing_metadata_) {
|
862
1025
|
batch_data->AddRetriableRecvTrailingMetadataOp();
|
863
1026
|
}
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
if (batch->send_initial_metadata || batch->send_message ||
|
868
|
-
batch->send_trailing_metadata) {
|
869
|
-
if (calld_->num_in_flight_call_attempt_send_batches_ == 0) {
|
870
|
-
GRPC_CALL_STACK_REF(calld_->owning_call_, "retriable_send_batches");
|
871
|
-
}
|
872
|
-
++calld_->num_in_flight_call_attempt_send_batches_;
|
873
|
-
}
|
1027
|
+
AddClosureForBatch(batch_data->batch(),
|
1028
|
+
"start replayable pending batch on call attempt",
|
1029
|
+
closures);
|
874
1030
|
}
|
875
1031
|
}
|
876
1032
|
|
@@ -879,13 +1035,8 @@ void RetryFilter::CallData::CallAttempt::AddRetriableBatches(
|
|
879
1035
|
// Replay previously-returned send_* ops if needed.
|
880
1036
|
BatchData* replay_batch_data = MaybeCreateBatchForReplay();
|
881
1037
|
if (replay_batch_data != nullptr) {
|
882
|
-
|
883
|
-
|
884
|
-
// If this is the first one, take a ref to the call stack.
|
885
|
-
if (calld_->num_in_flight_call_attempt_send_batches_ == 0) {
|
886
|
-
GRPC_CALL_STACK_REF(calld_->owning_call_, "retriable_send_batches");
|
887
|
-
}
|
888
|
-
++calld_->num_in_flight_call_attempt_send_batches_;
|
1038
|
+
AddClosureForBatch(replay_batch_data->batch(),
|
1039
|
+
"start replay batch on call attempt", closures);
|
889
1040
|
}
|
890
1041
|
// Now add pending batches.
|
891
1042
|
AddBatchesForPendingBatches(closures);
|
@@ -893,8 +1044,9 @@ void RetryFilter::CallData::CallAttempt::AddRetriableBatches(
|
|
893
1044
|
|
894
1045
|
void RetryFilter::CallData::CallAttempt::StartRetriableBatches() {
|
895
1046
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
896
|
-
gpr_log(GPR_INFO,
|
897
|
-
|
1047
|
+
gpr_log(GPR_INFO,
|
1048
|
+
"chand=%p calld=%p attempt=%p: constructing retriable batches",
|
1049
|
+
calld_->chand_, calld_, this);
|
898
1050
|
}
|
899
1051
|
// Construct list of closures to execute, one for each pending batch.
|
900
1052
|
CallCombinerClosureList closures;
|
@@ -903,107 +1055,51 @@ void RetryFilter::CallData::CallAttempt::StartRetriableBatches() {
|
|
903
1055
|
// Start batches on LB call.
|
904
1056
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
905
1057
|
gpr_log(GPR_INFO,
|
906
|
-
"chand=%p calld=%p: starting %" PRIuPTR
|
1058
|
+
"chand=%p calld=%p attempt=%p: starting %" PRIuPTR
|
907
1059
|
" retriable batches on lb_call=%p",
|
908
|
-
calld_->chand_, calld_, closures.size(),
|
1060
|
+
calld_->chand_, calld_, this, closures.size(), lb_call_.get());
|
909
1061
|
}
|
910
1062
|
closures.RunClosures(calld_->call_combiner_);
|
911
1063
|
}
|
912
1064
|
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
RefCountedPtr<CallAttempt> attempt, int refcount, bool set_on_complete)
|
919
|
-
: RefCounted(nullptr, refcount), call_attempt_(std::move(attempt)) {
|
920
|
-
// TODO(roth): Consider holding this ref on the call stack in
|
921
|
-
// CallAttempt instead of here in BatchData. This would eliminate the
|
922
|
-
// need for CallData::num_in_flight_call_attempt_send_batches_.
|
923
|
-
// But it would require having a way to unref CallAttempt when it is
|
924
|
-
// no longer needed (i.e., when the call is committed and all cached
|
925
|
-
// send ops have been replayed and the LB call is moved into
|
926
|
-
// CallData::committed_call_).
|
927
|
-
GRPC_CALL_STACK_REF(call_attempt_->calld_->owning_call_, "CallAttempt");
|
928
|
-
batch_.payload = &call_attempt_->batch_payload_;
|
929
|
-
if (set_on_complete) {
|
930
|
-
GRPC_CLOSURE_INIT(&on_complete_, OnComplete, this,
|
931
|
-
grpc_schedule_on_exec_ctx);
|
932
|
-
batch_.on_complete = &on_complete_;
|
933
|
-
}
|
1065
|
+
void RetryFilter::CallData::CallAttempt::CancelFromSurface(
|
1066
|
+
grpc_transport_stream_op_batch* cancel_batch) {
|
1067
|
+
MaybeCancelPerAttemptRecvTimer();
|
1068
|
+
// Propagate cancellation to LB call.
|
1069
|
+
lb_call_->StartTransportStreamOpBatch(cancel_batch);
|
934
1070
|
}
|
935
1071
|
|
936
|
-
RetryFilter::CallData::CallAttempt::
|
937
|
-
|
938
|
-
|
939
|
-
}
|
940
|
-
if (batch_.send_trailing_metadata) {
|
941
|
-
grpc_metadata_batch_destroy(&call_attempt_->send_trailing_metadata_);
|
942
|
-
}
|
943
|
-
if (batch_.recv_initial_metadata) {
|
944
|
-
grpc_metadata_batch_destroy(&call_attempt_->recv_initial_metadata_);
|
945
|
-
}
|
946
|
-
if (batch_.recv_trailing_metadata) {
|
947
|
-
grpc_metadata_batch_destroy(&call_attempt_->recv_trailing_metadata_);
|
948
|
-
}
|
949
|
-
GRPC_CALL_STACK_UNREF(call_attempt_->calld_->owning_call_, "CallAttempt");
|
950
|
-
}
|
951
|
-
|
952
|
-
void RetryFilter::CallData::CallAttempt::BatchData::
|
953
|
-
FreeCachedSendOpDataForCompletedBatch() {
|
954
|
-
auto* calld = call_attempt_->calld_;
|
955
|
-
// TODO(roth): When we implement hedging, this logic will need to get
|
956
|
-
// a bit more complex, because there may be other (now abandoned) call
|
957
|
-
// attempts still using this data. We may need to do some sort of
|
958
|
-
// ref-counting instead.
|
959
|
-
if (batch_.send_initial_metadata) {
|
960
|
-
calld->FreeCachedSendInitialMetadata();
|
961
|
-
}
|
962
|
-
if (batch_.send_message) {
|
963
|
-
calld->FreeCachedSendMessage(call_attempt_->completed_send_message_count_ -
|
964
|
-
1);
|
965
|
-
}
|
966
|
-
if (batch_.send_trailing_metadata) {
|
967
|
-
calld->FreeCachedSendTrailingMetadata();
|
968
|
-
}
|
969
|
-
}
|
970
|
-
|
971
|
-
bool RetryFilter::CallData::CallAttempt::BatchData::MaybeRetry(
|
972
|
-
grpc_status_code status, grpc_mdelem* server_pushback_md, bool is_lb_drop) {
|
973
|
-
auto* calld = call_attempt_->calld_;
|
1072
|
+
bool RetryFilter::CallData::CallAttempt::ShouldRetry(
|
1073
|
+
absl::optional<grpc_status_code> status, bool is_lb_drop,
|
1074
|
+
grpc_mdelem* server_pushback_md, grpc_millis* server_pushback_ms) {
|
974
1075
|
// LB drops always inhibit retries.
|
975
1076
|
if (is_lb_drop) return false;
|
976
|
-
//
|
977
|
-
|
978
|
-
|
979
|
-
// This catches the case where the batch has multiple callbacks
|
980
|
-
// (i.e., it includes either recv_message or recv_initial_metadata).
|
981
|
-
if (call_attempt_->retry_dispatched_) {
|
982
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
983
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: retry already dispatched",
|
984
|
-
calld->chand_, calld);
|
985
|
-
}
|
986
|
-
return true;
|
987
|
-
}
|
1077
|
+
// TODO(roth): Handle transparent retries here.
|
1078
|
+
// If no retry policy, don't retry.
|
1079
|
+
if (calld_->retry_policy_ == nullptr) return false;
|
988
1080
|
// Check status.
|
989
|
-
if (
|
990
|
-
if (
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
1081
|
+
if (status.has_value()) {
|
1082
|
+
if (GPR_LIKELY(*status == GRPC_STATUS_OK)) {
|
1083
|
+
if (calld_->retry_throttle_data_ != nullptr) {
|
1084
|
+
calld_->retry_throttle_data_->RecordSuccess();
|
1085
|
+
}
|
1086
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1087
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: call succeeded",
|
1088
|
+
calld_->chand_, calld_, this);
|
1089
|
+
}
|
1090
|
+
return false;
|
996
1091
|
}
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1092
|
+
// Status is not OK. Check whether the status is retryable.
|
1093
|
+
if (!calld_->retry_policy_->retryable_status_codes().Contains(*status)) {
|
1094
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1095
|
+
gpr_log(GPR_INFO,
|
1096
|
+
"chand=%p calld=%p attempt=%p: status %s not configured as "
|
1097
|
+
"retryable",
|
1098
|
+
calld_->chand_, calld_, this,
|
1099
|
+
grpc_status_code_to_string(*status));
|
1100
|
+
}
|
1101
|
+
return false;
|
1005
1102
|
}
|
1006
|
-
return false;
|
1007
1103
|
}
|
1008
1104
|
// Record the failure and check whether retries are throttled.
|
1009
1105
|
// Note that it's important for this check to come after the status
|
@@ -1012,78 +1108,267 @@ bool RetryFilter::CallData::CallAttempt::BatchData::MaybeRetry(
|
|
1012
1108
|
// things like failures due to malformed requests (INVALID_ARGUMENT).
|
1013
1109
|
// Conversely, it's important for this to come before the remaining
|
1014
1110
|
// checks, so that we don't fail to record failures due to other factors.
|
1015
|
-
if (
|
1016
|
-
!
|
1111
|
+
if (calld_->retry_throttle_data_ != nullptr &&
|
1112
|
+
!calld_->retry_throttle_data_->RecordFailure()) {
|
1017
1113
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1018
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: retries throttled",
|
1019
|
-
|
1114
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: retries throttled",
|
1115
|
+
calld_->chand_, calld_, this);
|
1020
1116
|
}
|
1021
1117
|
return false;
|
1022
1118
|
}
|
1023
1119
|
// Check whether the call is committed.
|
1024
|
-
if (
|
1120
|
+
if (calld_->retry_committed_) {
|
1025
1121
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1026
|
-
gpr_log(GPR_INFO,
|
1027
|
-
calld
|
1122
|
+
gpr_log(GPR_INFO,
|
1123
|
+
"chand=%p calld=%p attempt=%p: retries already committed",
|
1124
|
+
calld_->chand_, calld_, this);
|
1028
1125
|
}
|
1029
1126
|
return false;
|
1030
1127
|
}
|
1031
1128
|
// Check whether we have retries remaining.
|
1032
|
-
++
|
1033
|
-
if (
|
1129
|
+
++calld_->num_attempts_completed_;
|
1130
|
+
if (calld_->num_attempts_completed_ >=
|
1131
|
+
calld_->retry_policy_->max_attempts()) {
|
1034
1132
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1035
|
-
gpr_log(
|
1036
|
-
|
1133
|
+
gpr_log(
|
1134
|
+
GPR_INFO, "chand=%p calld=%p attempt=%p: exceeded %d retry attempts",
|
1135
|
+
calld_->chand_, calld_, this, calld_->retry_policy_->max_attempts());
|
1037
1136
|
}
|
1038
1137
|
return false;
|
1039
1138
|
}
|
1040
1139
|
// Check server push-back.
|
1041
|
-
grpc_millis server_pushback_ms = -1;
|
1042
1140
|
if (server_pushback_md != nullptr) {
|
1043
1141
|
// If the value is "-1" or any other unparseable string, we do not retry.
|
1044
1142
|
uint32_t ms;
|
1045
1143
|
if (!grpc_parse_slice_to_uint32(GRPC_MDVALUE(*server_pushback_md), &ms)) {
|
1046
1144
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1047
1145
|
gpr_log(GPR_INFO,
|
1048
|
-
"chand=%p calld=%p: not retrying due to server
|
1049
|
-
|
1146
|
+
"chand=%p calld=%p attempt=%p: not retrying due to server "
|
1147
|
+
"push-back",
|
1148
|
+
calld_->chand_, calld_, this);
|
1050
1149
|
}
|
1051
1150
|
return false;
|
1052
1151
|
} else {
|
1053
1152
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1054
|
-
gpr_log(
|
1055
|
-
|
1153
|
+
gpr_log(
|
1154
|
+
GPR_INFO,
|
1155
|
+
"chand=%p calld=%p attempt=%p: server push-back: retry in %u ms",
|
1156
|
+
calld_->chand_, calld_, this, ms);
|
1056
1157
|
}
|
1057
|
-
server_pushback_ms = static_cast<grpc_millis>(ms);
|
1158
|
+
*server_pushback_ms = static_cast<grpc_millis>(ms);
|
1058
1159
|
}
|
1059
1160
|
}
|
1060
|
-
//
|
1061
|
-
|
1062
|
-
|
1161
|
+
// Check with call dispatch controller.
|
1162
|
+
auto* service_config_call_data = static_cast<ServiceConfigCallData*>(
|
1163
|
+
calld_->call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value);
|
1164
|
+
if (!service_config_call_data->call_dispatch_controller()->ShouldRetry()) {
|
1165
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1166
|
+
gpr_log(
|
1167
|
+
GPR_INFO,
|
1168
|
+
"chand=%p calld=%p attempt=%p: call dispatch controller denied retry",
|
1169
|
+
calld_->chand_, calld_, this);
|
1170
|
+
}
|
1171
|
+
return false;
|
1172
|
+
}
|
1173
|
+
// We should retry.
|
1063
1174
|
return true;
|
1064
1175
|
}
|
1065
1176
|
|
1177
|
+
void RetryFilter::CallData::CallAttempt::Abandon() {
|
1178
|
+
abandoned_ = true;
|
1179
|
+
// Unref batches for deferred completion callbacks that will now never
|
1180
|
+
// be invoked.
|
1181
|
+
if (started_recv_trailing_metadata_ &&
|
1182
|
+
!seen_recv_trailing_metadata_from_surface_) {
|
1183
|
+
recv_trailing_metadata_internal_batch_.reset(
|
1184
|
+
DEBUG_LOCATION,
|
1185
|
+
"internal recv_trailing_metadata completed before that op was "
|
1186
|
+
"started from the surface");
|
1187
|
+
}
|
1188
|
+
GRPC_ERROR_UNREF(recv_trailing_metadata_error_);
|
1189
|
+
recv_trailing_metadata_error_ = GRPC_ERROR_NONE;
|
1190
|
+
recv_initial_metadata_ready_deferred_batch_.reset(
|
1191
|
+
DEBUG_LOCATION,
|
1192
|
+
"unref deferred recv_initial_metadata_ready batch due to retry");
|
1193
|
+
GRPC_ERROR_UNREF(recv_initial_metadata_error_);
|
1194
|
+
recv_initial_metadata_error_ = GRPC_ERROR_NONE;
|
1195
|
+
recv_message_ready_deferred_batch_.reset(
|
1196
|
+
DEBUG_LOCATION, "unref deferred recv_message_ready batch due to retry");
|
1197
|
+
GRPC_ERROR_UNREF(recv_message_error_);
|
1198
|
+
recv_message_error_ = GRPC_ERROR_NONE;
|
1199
|
+
for (auto& on_complete_deferred_batch : on_complete_deferred_batches_) {
|
1200
|
+
on_complete_deferred_batch.batch.reset(
|
1201
|
+
DEBUG_LOCATION, "unref deferred on_complete batch due to retry");
|
1202
|
+
GRPC_ERROR_UNREF(on_complete_deferred_batch.error);
|
1203
|
+
}
|
1204
|
+
on_complete_deferred_batches_.clear();
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
void RetryFilter::CallData::CallAttempt::OnPerAttemptRecvTimer(
|
1208
|
+
void* arg, grpc_error_handle error) {
|
1209
|
+
auto* call_attempt = static_cast<CallAttempt*>(arg);
|
1210
|
+
GRPC_CLOSURE_INIT(&call_attempt->on_per_attempt_recv_timer_,
|
1211
|
+
OnPerAttemptRecvTimerLocked, call_attempt, nullptr);
|
1212
|
+
GRPC_CALL_COMBINER_START(call_attempt->calld_->call_combiner_,
|
1213
|
+
&call_attempt->on_per_attempt_recv_timer_,
|
1214
|
+
GRPC_ERROR_REF(error), "per-attempt timer fired");
|
1215
|
+
}
|
1216
|
+
|
1217
|
+
void RetryFilter::CallData::CallAttempt::OnPerAttemptRecvTimerLocked(
|
1218
|
+
void* arg, grpc_error_handle error) {
|
1219
|
+
auto* call_attempt = static_cast<CallAttempt*>(arg);
|
1220
|
+
auto* calld = call_attempt->calld_;
|
1221
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1222
|
+
gpr_log(GPR_INFO,
|
1223
|
+
"chand=%p calld=%p attempt=%p: perAttemptRecvTimeout timer fired: "
|
1224
|
+
"error=%s, per_attempt_recv_timer_pending_=%d",
|
1225
|
+
calld->chand_, calld, call_attempt,
|
1226
|
+
grpc_error_std_string(error).c_str(),
|
1227
|
+
call_attempt->per_attempt_recv_timer_pending_);
|
1228
|
+
}
|
1229
|
+
CallCombinerClosureList closures;
|
1230
|
+
if (error == GRPC_ERROR_NONE &&
|
1231
|
+
call_attempt->per_attempt_recv_timer_pending_) {
|
1232
|
+
call_attempt->per_attempt_recv_timer_pending_ = false;
|
1233
|
+
// Cancel this attempt.
|
1234
|
+
// TODO(roth): When implementing hedging, we should not cancel the
|
1235
|
+
// current attempt.
|
1236
|
+
call_attempt->AddBatchForCancelOp(
|
1237
|
+
grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING(
|
1238
|
+
"retry perAttemptRecvTimeout exceeded"),
|
1239
|
+
GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_CANCELLED),
|
1240
|
+
&closures);
|
1241
|
+
// Check whether we should retry.
|
1242
|
+
if (call_attempt->ShouldRetry(
|
1243
|
+
/*status=*/absl::nullopt, /*is_lb_drop=*/false,
|
1244
|
+
/*server_pushback_md=*/nullptr, /*server_pushback_ms=*/nullptr)) {
|
1245
|
+
// Mark current attempt as abandoned.
|
1246
|
+
call_attempt->Abandon();
|
1247
|
+
// We are retrying. Start backoff timer.
|
1248
|
+
calld->StartRetryTimer(/*server_pushback_ms=*/-1);
|
1249
|
+
} else {
|
1250
|
+
// Not retrying, so commit the call.
|
1251
|
+
calld->RetryCommit(call_attempt);
|
1252
|
+
// If retry state is no longer needed, switch to fast path for
|
1253
|
+
// subsequent batches.
|
1254
|
+
call_attempt->MaybeSwitchToFastPath();
|
1255
|
+
}
|
1256
|
+
}
|
1257
|
+
closures.RunClosures(calld->call_combiner_);
|
1258
|
+
call_attempt->Unref(DEBUG_LOCATION, "OnPerAttemptRecvTimer");
|
1259
|
+
GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnPerAttemptRecvTimer");
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
void RetryFilter::CallData::CallAttempt::MaybeCancelPerAttemptRecvTimer() {
|
1263
|
+
if (per_attempt_recv_timer_pending_) {
|
1264
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1265
|
+
gpr_log(GPR_INFO,
|
1266
|
+
"chand=%p calld=%p attempt=%p: cancelling "
|
1267
|
+
"perAttemptRecvTimeout timer",
|
1268
|
+
calld_->chand_, calld_, this);
|
1269
|
+
}
|
1270
|
+
per_attempt_recv_timer_pending_ = false;
|
1271
|
+
grpc_timer_cancel(&per_attempt_recv_timer_);
|
1272
|
+
}
|
1273
|
+
}
|
1274
|
+
|
1275
|
+
//
|
1276
|
+
// RetryFilter::CallData::CallAttempt::BatchData
|
1277
|
+
//
|
1278
|
+
|
1279
|
+
RetryFilter::CallData::CallAttempt::BatchData::BatchData(
|
1280
|
+
RefCountedPtr<CallAttempt> attempt, int refcount, bool set_on_complete)
|
1281
|
+
: RefCounted(
|
1282
|
+
GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) ? "BatchData" : nullptr,
|
1283
|
+
refcount),
|
1284
|
+
call_attempt_(std::move(attempt)) {
|
1285
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1286
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: creating batch %p",
|
1287
|
+
call_attempt_->calld_->chand_, call_attempt_->calld_,
|
1288
|
+
call_attempt_.get(), this);
|
1289
|
+
}
|
1290
|
+
// We hold a ref to the call stack for every batch sent on a call attempt.
|
1291
|
+
// This is because some batches on the call attempt may not complete
|
1292
|
+
// until after all of the batches are completed at the surface (because
|
1293
|
+
// each batch that is pending at the surface holds a ref). This
|
1294
|
+
// can happen for replayed send ops, and it can happen for
|
1295
|
+
// recv_initial_metadata and recv_message ops on a call attempt that has
|
1296
|
+
// been abandoned.
|
1297
|
+
GRPC_CALL_STACK_REF(call_attempt_->calld_->owning_call_, "Retry BatchData");
|
1298
|
+
batch_.payload = &call_attempt_->batch_payload_;
|
1299
|
+
if (set_on_complete) {
|
1300
|
+
GRPC_CLOSURE_INIT(&on_complete_, OnComplete, this, nullptr);
|
1301
|
+
batch_.on_complete = &on_complete_;
|
1302
|
+
}
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
RetryFilter::CallData::CallAttempt::BatchData::~BatchData() {
|
1306
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1307
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: destroying batch %p",
|
1308
|
+
call_attempt_->calld_->chand_, call_attempt_->calld_,
|
1309
|
+
call_attempt_.get(), this);
|
1310
|
+
}
|
1311
|
+
if (batch_.send_initial_metadata) {
|
1312
|
+
grpc_metadata_batch_destroy(&call_attempt_->send_initial_metadata_);
|
1313
|
+
}
|
1314
|
+
if (batch_.send_trailing_metadata) {
|
1315
|
+
grpc_metadata_batch_destroy(&call_attempt_->send_trailing_metadata_);
|
1316
|
+
}
|
1317
|
+
if (batch_.recv_initial_metadata) {
|
1318
|
+
grpc_metadata_batch_destroy(&call_attempt_->recv_initial_metadata_);
|
1319
|
+
}
|
1320
|
+
if (batch_.recv_trailing_metadata) {
|
1321
|
+
grpc_metadata_batch_destroy(&call_attempt_->recv_trailing_metadata_);
|
1322
|
+
}
|
1323
|
+
GRPC_CALL_STACK_UNREF(call_attempt_->calld_->owning_call_, "Retry BatchData");
|
1324
|
+
call_attempt_.reset(DEBUG_LOCATION, "~BatchData");
|
1325
|
+
}
|
1326
|
+
|
1327
|
+
void RetryFilter::CallData::CallAttempt::BatchData::
|
1328
|
+
FreeCachedSendOpDataForCompletedBatch() {
|
1329
|
+
auto* calld = call_attempt_->calld_;
|
1330
|
+
// TODO(roth): When we implement hedging, this logic will need to get
|
1331
|
+
// a bit more complex, because there may be other (now abandoned) call
|
1332
|
+
// attempts still using this data. We may need to do some sort of
|
1333
|
+
// ref-counting instead.
|
1334
|
+
if (batch_.send_initial_metadata) {
|
1335
|
+
calld->FreeCachedSendInitialMetadata();
|
1336
|
+
}
|
1337
|
+
if (batch_.send_message) {
|
1338
|
+
calld->FreeCachedSendMessage(call_attempt_->completed_send_message_count_ -
|
1339
|
+
1);
|
1340
|
+
}
|
1341
|
+
if (batch_.send_trailing_metadata) {
|
1342
|
+
calld->FreeCachedSendTrailingMetadata();
|
1343
|
+
}
|
1344
|
+
}
|
1345
|
+
|
1066
1346
|
//
|
1067
1347
|
// recv_initial_metadata callback handling
|
1068
1348
|
//
|
1069
1349
|
|
1070
1350
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1071
|
-
|
1072
|
-
|
1073
|
-
auto* call_attempt = batch_data->call_attempt_.get();
|
1351
|
+
MaybeAddClosureForRecvInitialMetadataCallback(
|
1352
|
+
grpc_error_handle error, CallCombinerClosureList* closures) {
|
1074
1353
|
// Find pending batch.
|
1075
|
-
PendingBatch* pending =
|
1354
|
+
PendingBatch* pending = call_attempt_->calld_->PendingBatchFind(
|
1076
1355
|
"invoking recv_initial_metadata_ready for",
|
1077
1356
|
[](grpc_transport_stream_op_batch* batch) {
|
1078
1357
|
return batch->recv_initial_metadata &&
|
1079
1358
|
batch->payload->recv_initial_metadata
|
1080
1359
|
.recv_initial_metadata_ready != nullptr;
|
1081
1360
|
});
|
1082
|
-
|
1361
|
+
if (pending == nullptr) {
|
1362
|
+
GRPC_ERROR_UNREF(error);
|
1363
|
+
return;
|
1364
|
+
}
|
1083
1365
|
// Return metadata.
|
1084
1366
|
grpc_metadata_batch_move(
|
1085
|
-
&
|
1367
|
+
&call_attempt_->recv_initial_metadata_,
|
1086
1368
|
pending->batch->payload->recv_initial_metadata.recv_initial_metadata);
|
1369
|
+
// Propagate trailing_metadata_available.
|
1370
|
+
*pending->batch->payload->recv_initial_metadata.trailing_metadata_available =
|
1371
|
+
call_attempt_->trailing_metadata_available_;
|
1087
1372
|
// Update bookkeeping.
|
1088
1373
|
// Note: Need to do this before invoking the callback, since invoking
|
1089
1374
|
// the callback will result in yielding the call combiner.
|
@@ -1092,33 +1377,36 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1092
1377
|
.recv_initial_metadata_ready;
|
1093
1378
|
pending->batch->payload->recv_initial_metadata.recv_initial_metadata_ready =
|
1094
1379
|
nullptr;
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
GRPC_ERROR_REF(error));
|
1380
|
+
call_attempt_->calld_->MaybeClearPendingBatch(pending);
|
1381
|
+
// Add callback to closures.
|
1382
|
+
closures->Add(recv_initial_metadata_ready, error,
|
1383
|
+
"recv_initial_metadata_ready for pending batch");
|
1100
1384
|
}
|
1101
1385
|
|
1102
1386
|
void RetryFilter::CallData::CallAttempt::BatchData::RecvInitialMetadataReady(
|
1103
1387
|
void* arg, grpc_error_handle error) {
|
1104
|
-
|
1105
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1388
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1106
1389
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1107
1390
|
CallData* calld = call_attempt->calld_;
|
1108
1391
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1109
1392
|
gpr_log(GPR_INFO,
|
1110
|
-
"chand=%p calld=%p
|
1111
|
-
|
1393
|
+
"chand=%p calld=%p attempt=%p batch_data=%p: "
|
1394
|
+
"got recv_initial_metadata_ready, error=%s",
|
1395
|
+
calld->chand_, calld, call_attempt, batch_data.get(),
|
1396
|
+
grpc_error_std_string(error).c_str());
|
1112
1397
|
}
|
1113
1398
|
call_attempt->completed_recv_initial_metadata_ = true;
|
1114
|
-
// If
|
1399
|
+
// If this attempt has been abandoned, then we're not going to use the
|
1115
1400
|
// result of this recv_initial_metadata op, so do nothing.
|
1116
|
-
if (call_attempt->
|
1401
|
+
if (call_attempt->abandoned_) {
|
1117
1402
|
GRPC_CALL_COMBINER_STOP(
|
1118
1403
|
calld->call_combiner_,
|
1119
|
-
"recv_initial_metadata_ready
|
1404
|
+
"recv_initial_metadata_ready for abandoned attempt");
|
1120
1405
|
return;
|
1121
1406
|
}
|
1407
|
+
// Cancel per-attempt recv timer, if any.
|
1408
|
+
call_attempt->MaybeCancelPerAttemptRecvTimer();
|
1409
|
+
// If we're not committed, check the response to see if we need to commit.
|
1122
1410
|
if (!calld->retry_committed_) {
|
1123
1411
|
// If we got an error or a Trailers-Only response and have not yet gotten
|
1124
1412
|
// the recv_trailing_metadata_ready callback, then defer propagating this
|
@@ -1129,82 +1417,94 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvInitialMetadataReady(
|
|
1129
1417
|
!call_attempt->completed_recv_trailing_metadata_)) {
|
1130
1418
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1131
1419
|
gpr_log(GPR_INFO,
|
1132
|
-
"chand=%p calld=%p: deferring
|
1133
|
-
"(Trailers-Only)",
|
1134
|
-
calld->chand_, calld);
|
1420
|
+
"chand=%p calld=%p attempt=%p: deferring "
|
1421
|
+
"recv_initial_metadata_ready (Trailers-Only)",
|
1422
|
+
calld->chand_, calld, call_attempt);
|
1135
1423
|
}
|
1136
|
-
call_attempt->recv_initial_metadata_ready_deferred_batch_ =
|
1424
|
+
call_attempt->recv_initial_metadata_ready_deferred_batch_ =
|
1425
|
+
std::move(batch_data);
|
1137
1426
|
call_attempt->recv_initial_metadata_error_ = GRPC_ERROR_REF(error);
|
1427
|
+
CallCombinerClosureList closures;
|
1428
|
+
if (error != GRPC_ERROR_NONE) {
|
1429
|
+
call_attempt->AddBatchForCancelOp(GRPC_ERROR_REF(error), &closures);
|
1430
|
+
}
|
1138
1431
|
if (!call_attempt->started_recv_trailing_metadata_) {
|
1139
1432
|
// recv_trailing_metadata not yet started by application; start it
|
1140
1433
|
// ourselves to get status.
|
1141
|
-
call_attempt->
|
1142
|
-
} else {
|
1143
|
-
GRPC_CALL_COMBINER_STOP(
|
1144
|
-
calld->call_combiner_,
|
1145
|
-
"recv_initial_metadata_ready trailers-only or error");
|
1434
|
+
call_attempt->AddBatchForInternalRecvTrailingMetadata(&closures);
|
1146
1435
|
}
|
1436
|
+
closures.RunClosures(calld->call_combiner_);
|
1147
1437
|
return;
|
1148
1438
|
}
|
1149
1439
|
// Received valid initial metadata, so commit the call.
|
1150
1440
|
calld->RetryCommit(call_attempt);
|
1441
|
+
// If retry state is no longer needed, switch to fast path for
|
1442
|
+
// subsequent batches.
|
1443
|
+
call_attempt->MaybeSwitchToFastPath();
|
1151
1444
|
}
|
1152
1445
|
// Invoke the callback to return the result to the surface.
|
1153
|
-
|
1154
|
-
|
1446
|
+
CallCombinerClosureList closures;
|
1447
|
+
batch_data->MaybeAddClosureForRecvInitialMetadataCallback(
|
1448
|
+
GRPC_ERROR_REF(error), &closures);
|
1449
|
+
closures.RunClosures(calld->call_combiner_);
|
1155
1450
|
}
|
1156
1451
|
|
1157
1452
|
//
|
1158
1453
|
// recv_message callback handling
|
1159
1454
|
//
|
1160
1455
|
|
1161
|
-
void RetryFilter::CallData::CallAttempt::BatchData::
|
1162
|
-
|
1163
|
-
|
1164
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1165
|
-
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1166
|
-
CallData* calld = call_attempt->calld_;
|
1456
|
+
void RetryFilter::CallData::CallAttempt::BatchData::
|
1457
|
+
MaybeAddClosureForRecvMessageCallback(grpc_error_handle error,
|
1458
|
+
CallCombinerClosureList* closures) {
|
1167
1459
|
// Find pending op.
|
1168
|
-
PendingBatch* pending =
|
1460
|
+
PendingBatch* pending = call_attempt_->calld_->PendingBatchFind(
|
1169
1461
|
"invoking recv_message_ready for",
|
1170
1462
|
[](grpc_transport_stream_op_batch* batch) {
|
1171
1463
|
return batch->recv_message &&
|
1172
1464
|
batch->payload->recv_message.recv_message_ready != nullptr;
|
1173
1465
|
});
|
1174
|
-
|
1466
|
+
if (pending == nullptr) {
|
1467
|
+
GRPC_ERROR_UNREF(error);
|
1468
|
+
return;
|
1469
|
+
}
|
1175
1470
|
// Return payload.
|
1176
1471
|
*pending->batch->payload->recv_message.recv_message =
|
1177
|
-
std::move(
|
1472
|
+
std::move(call_attempt_->recv_message_);
|
1178
1473
|
// Update bookkeeping.
|
1179
1474
|
// Note: Need to do this before invoking the callback, since invoking
|
1180
1475
|
// the callback will result in yielding the call combiner.
|
1181
1476
|
grpc_closure* recv_message_ready =
|
1182
1477
|
pending->batch->payload->recv_message.recv_message_ready;
|
1183
1478
|
pending->batch->payload->recv_message.recv_message_ready = nullptr;
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1479
|
+
call_attempt_->calld_->MaybeClearPendingBatch(pending);
|
1480
|
+
// Add callback to closures.
|
1481
|
+
closures->Add(recv_message_ready, error,
|
1482
|
+
"recv_message_ready for pending batch");
|
1188
1483
|
}
|
1189
1484
|
|
1190
1485
|
void RetryFilter::CallData::CallAttempt::BatchData::RecvMessageReady(
|
1191
1486
|
void* arg, grpc_error_handle error) {
|
1192
|
-
|
1193
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1487
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1194
1488
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1195
1489
|
CallData* calld = call_attempt->calld_;
|
1196
1490
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1197
|
-
gpr_log(GPR_INFO,
|
1198
|
-
|
1491
|
+
gpr_log(GPR_INFO,
|
1492
|
+
"chand=%p calld=%p attempt=%p batch_data=%p: "
|
1493
|
+
"got recv_message_ready, error=%s",
|
1494
|
+
calld->chand_, calld, call_attempt, batch_data.get(),
|
1495
|
+
grpc_error_std_string(error).c_str());
|
1199
1496
|
}
|
1200
1497
|
++call_attempt->completed_recv_message_count_;
|
1201
|
-
// If
|
1498
|
+
// If this attempt has been abandoned, then we're not going to use the
|
1202
1499
|
// result of this recv_message op, so do nothing.
|
1203
|
-
if (call_attempt->
|
1500
|
+
if (call_attempt->abandoned_) {
|
1204
1501
|
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1205
|
-
"recv_message_ready
|
1502
|
+
"recv_message_ready for abandoned attempt");
|
1206
1503
|
return;
|
1207
1504
|
}
|
1505
|
+
// Cancel per-attempt recv timer, if any.
|
1506
|
+
call_attempt->MaybeCancelPerAttemptRecvTimer();
|
1507
|
+
// If we're not committed, check the response to see if we need to commit.
|
1208
1508
|
if (!calld->retry_committed_) {
|
1209
1509
|
// If we got an error or the payload was nullptr and we have not yet gotten
|
1210
1510
|
// the recv_trailing_metadata_ready callback, then defer propagating this
|
@@ -1215,28 +1515,35 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvMessageReady(
|
|
1215
1515
|
!call_attempt->completed_recv_trailing_metadata_)) {
|
1216
1516
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1217
1517
|
gpr_log(GPR_INFO,
|
1218
|
-
"chand=%p calld=%p: deferring recv_message_ready
|
1219
|
-
"message and recv_trailing_metadata pending)",
|
1220
|
-
calld->chand_, calld);
|
1518
|
+
"chand=%p calld=%p attempt=%p: deferring recv_message_ready "
|
1519
|
+
"(nullptr message and recv_trailing_metadata pending)",
|
1520
|
+
calld->chand_, calld, call_attempt);
|
1221
1521
|
}
|
1222
|
-
call_attempt->recv_message_ready_deferred_batch_ = batch_data;
|
1522
|
+
call_attempt->recv_message_ready_deferred_batch_ = std::move(batch_data);
|
1223
1523
|
call_attempt->recv_message_error_ = GRPC_ERROR_REF(error);
|
1524
|
+
CallCombinerClosureList closures;
|
1525
|
+
if (error != GRPC_ERROR_NONE) {
|
1526
|
+
call_attempt->AddBatchForCancelOp(GRPC_ERROR_REF(error), &closures);
|
1527
|
+
}
|
1224
1528
|
if (!call_attempt->started_recv_trailing_metadata_) {
|
1225
1529
|
// recv_trailing_metadata not yet started by application; start it
|
1226
1530
|
// ourselves to get status.
|
1227
|
-
call_attempt->
|
1228
|
-
} else {
|
1229
|
-
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1230
|
-
"recv_message_ready null");
|
1531
|
+
call_attempt->AddBatchForInternalRecvTrailingMetadata(&closures);
|
1231
1532
|
}
|
1533
|
+
closures.RunClosures(calld->call_combiner_);
|
1232
1534
|
return;
|
1233
1535
|
}
|
1234
1536
|
// Received a valid message, so commit the call.
|
1235
1537
|
calld->RetryCommit(call_attempt);
|
1538
|
+
// If retry state is no longer needed, switch to fast path for
|
1539
|
+
// subsequent batches.
|
1540
|
+
call_attempt->MaybeSwitchToFastPath();
|
1236
1541
|
}
|
1237
1542
|
// Invoke the callback to return the result to the surface.
|
1238
|
-
|
1239
|
-
|
1543
|
+
CallCombinerClosureList closures;
|
1544
|
+
batch_data->MaybeAddClosureForRecvMessageCallback(GRPC_ERROR_REF(error),
|
1545
|
+
&closures);
|
1546
|
+
closures.RunClosures(calld->call_combiner_);
|
1240
1547
|
}
|
1241
1548
|
|
1242
1549
|
//
|
@@ -1271,23 +1578,28 @@ void GetCallStatus(grpc_millis deadline, grpc_metadata_batch* md_batch,
|
|
1271
1578
|
} // namespace
|
1272
1579
|
|
1273
1580
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1274
|
-
|
1275
|
-
|
1581
|
+
MaybeAddClosureForRecvTrailingMetadataReady(
|
1582
|
+
grpc_error_handle error, CallCombinerClosureList* closures) {
|
1276
1583
|
auto* calld = call_attempt_->calld_;
|
1277
1584
|
// Find pending batch.
|
1278
1585
|
PendingBatch* pending = calld->PendingBatchFind(
|
1279
|
-
"invoking
|
1586
|
+
"invoking recv_trailing_metadata_ready for",
|
1280
1587
|
[](grpc_transport_stream_op_batch* batch) {
|
1281
1588
|
return batch->recv_trailing_metadata &&
|
1282
1589
|
batch->payload->recv_trailing_metadata
|
1283
1590
|
.recv_trailing_metadata_ready != nullptr;
|
1284
1591
|
});
|
1285
1592
|
// If we generated the recv_trailing_metadata op internally via
|
1286
|
-
//
|
1593
|
+
// AddBatchForInternalRecvTrailingMetadata(), then there will be no
|
1594
|
+
// pending batch.
|
1287
1595
|
if (pending == nullptr) {
|
1288
|
-
|
1596
|
+
call_attempt_->recv_trailing_metadata_error_ = error;
|
1289
1597
|
return;
|
1290
1598
|
}
|
1599
|
+
// Copy transport stats to be delivered up to the surface.
|
1600
|
+
grpc_transport_move_stats(
|
1601
|
+
&call_attempt_->collect_stats_,
|
1602
|
+
pending->batch->payload->recv_trailing_metadata.collect_stats);
|
1291
1603
|
// Return metadata.
|
1292
1604
|
grpc_metadata_batch_move(
|
1293
1605
|
&call_attempt_->recv_trailing_metadata_,
|
@@ -1303,35 +1615,34 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1303
1615
|
}
|
1304
1616
|
|
1305
1617
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
}
|
1618
|
+
AddClosuresForDeferredCompletionCallbacks(
|
1619
|
+
CallCombinerClosureList* closures) {
|
1620
|
+
// Add closure for deferred recv_initial_metadata_ready.
|
1621
|
+
if (GPR_UNLIKELY(call_attempt_->recv_initial_metadata_ready_deferred_batch_ !=
|
1622
|
+
nullptr)) {
|
1623
|
+
MaybeAddClosureForRecvInitialMetadataCallback(
|
1624
|
+
call_attempt_->recv_initial_metadata_error_, closures);
|
1625
|
+
call_attempt_->recv_initial_metadata_ready_deferred_batch_.reset(
|
1626
|
+
DEBUG_LOCATION, "resuming deferred recv_initial_metadata_ready");
|
1627
|
+
call_attempt_->recv_initial_metadata_error_ = GRPC_ERROR_NONE;
|
1628
|
+
}
|
1629
|
+
// Add closure for deferred recv_message_ready.
|
1630
|
+
if (GPR_UNLIKELY(call_attempt_->recv_message_ready_deferred_batch_ !=
|
1631
|
+
nullptr)) {
|
1632
|
+
MaybeAddClosureForRecvMessageCallback(call_attempt_->recv_message_error_,
|
1633
|
+
closures);
|
1634
|
+
call_attempt_->recv_message_ready_deferred_batch_.reset(
|
1635
|
+
DEBUG_LOCATION, "resuming deferred recv_message_ready");
|
1636
|
+
call_attempt_->recv_message_error_ = GRPC_ERROR_NONE;
|
1637
|
+
}
|
1638
|
+
// Add closures for deferred on_complete callbacks.
|
1639
|
+
for (auto& on_complete_deferred_batch :
|
1640
|
+
call_attempt_->on_complete_deferred_batches_) {
|
1641
|
+
closures->Add(&on_complete_deferred_batch.batch->on_complete_,
|
1642
|
+
on_complete_deferred_batch.error, "resuming on_complete");
|
1643
|
+
on_complete_deferred_batch.batch.release();
|
1644
|
+
}
|
1645
|
+
call_attempt_->on_complete_deferred_batches_.clear();
|
1335
1646
|
}
|
1336
1647
|
|
1337
1648
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
@@ -1340,13 +1651,8 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1340
1651
|
auto* calld = call_attempt_->calld_;
|
1341
1652
|
for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches_); ++i) {
|
1342
1653
|
PendingBatch* pending = &calld->pending_batches_[i];
|
1343
|
-
if (
|
1344
|
-
|
1345
|
-
gpr_log(GPR_INFO,
|
1346
|
-
"chand=%p calld=%p: failing unstarted pending batch at "
|
1347
|
-
"index %" PRIuPTR,
|
1348
|
-
calld->chand_, calld, i);
|
1349
|
-
}
|
1654
|
+
if (pending->batch == nullptr) continue;
|
1655
|
+
if (call_attempt_->PendingBatchContainsUnstartedSendOps(pending)) {
|
1350
1656
|
closures->Add(pending->batch->on_complete, GRPC_ERROR_REF(error),
|
1351
1657
|
"failing on_complete for pending batch");
|
1352
1658
|
pending->batch->on_complete = nullptr;
|
@@ -1361,32 +1667,40 @@ void RetryFilter::CallData::CallAttempt::BatchData::RunClosuresForCompletedCall(
|
|
1361
1667
|
// Construct list of closures to execute.
|
1362
1668
|
CallCombinerClosureList closures;
|
1363
1669
|
// First, add closure for recv_trailing_metadata_ready.
|
1364
|
-
|
1365
|
-
// If there are deferred
|
1366
|
-
|
1367
|
-
AddClosuresForDeferredRecvCallbacks(&closures);
|
1670
|
+
MaybeAddClosureForRecvTrailingMetadataReady(GRPC_ERROR_REF(error), &closures);
|
1671
|
+
// If there are deferred batch completion callbacks, add them to closures.
|
1672
|
+
AddClosuresForDeferredCompletionCallbacks(&closures);
|
1368
1673
|
// Add closures to fail any pending batches that have not yet been started.
|
1369
1674
|
AddClosuresToFailUnstartedPendingBatches(GRPC_ERROR_REF(error), &closures);
|
1370
1675
|
// Schedule all of the closures identified above.
|
1371
1676
|
// Note: This will release the call combiner.
|
1372
1677
|
closures.RunClosures(call_attempt_->calld_->call_combiner_);
|
1373
|
-
// Don't need batch_data anymore.
|
1374
|
-
Unref();
|
1375
1678
|
GRPC_ERROR_UNREF(error);
|
1376
1679
|
}
|
1377
1680
|
|
1378
1681
|
void RetryFilter::CallData::CallAttempt::BatchData::RecvTrailingMetadataReady(
|
1379
1682
|
void* arg, grpc_error_handle error) {
|
1380
|
-
|
1381
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1683
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1382
1684
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1383
1685
|
CallData* calld = call_attempt->calld_;
|
1384
1686
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1385
1687
|
gpr_log(GPR_INFO,
|
1386
|
-
"chand=%p calld=%p
|
1387
|
-
|
1688
|
+
"chand=%p calld=%p attempt=%p batch_data=%p: "
|
1689
|
+
"got recv_trailing_metadata_ready, error=%s",
|
1690
|
+
calld->chand_, calld, call_attempt, batch_data.get(),
|
1691
|
+
grpc_error_std_string(error).c_str());
|
1388
1692
|
}
|
1389
1693
|
call_attempt->completed_recv_trailing_metadata_ = true;
|
1694
|
+
// If this attempt has been abandoned, then we're not going to use the
|
1695
|
+
// result of this recv_trailing_metadata op, so do nothing.
|
1696
|
+
if (call_attempt->abandoned_) {
|
1697
|
+
GRPC_CALL_COMBINER_STOP(
|
1698
|
+
calld->call_combiner_,
|
1699
|
+
"recv_trailing_metadata_ready for abandoned attempt");
|
1700
|
+
return;
|
1701
|
+
}
|
1702
|
+
// Cancel per-attempt recv timer, if any.
|
1703
|
+
call_attempt->MaybeCancelPerAttemptRecvTimer();
|
1390
1704
|
// Get the call's status and check for server pushback metadata.
|
1391
1705
|
grpc_status_code status = GRPC_STATUS_OK;
|
1392
1706
|
grpc_mdelem* server_pushback_md = nullptr;
|
@@ -1397,26 +1711,37 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvTrailingMetadataReady(
|
|
1397
1711
|
&server_pushback_md, &is_lb_drop);
|
1398
1712
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1399
1713
|
gpr_log(
|
1400
|
-
GPR_INFO,
|
1401
|
-
calld
|
1714
|
+
GPR_INFO,
|
1715
|
+
"chand=%p calld=%p attempt=%p: call finished, status=%s is_lb_drop=%d",
|
1716
|
+
calld->chand_, calld, call_attempt, grpc_status_code_to_string(status),
|
1717
|
+
is_lb_drop);
|
1402
1718
|
}
|
1403
1719
|
// Check if we should retry.
|
1404
|
-
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1720
|
+
grpc_millis server_pushback_ms = -1;
|
1721
|
+
if (call_attempt->ShouldRetry(status, is_lb_drop, server_pushback_md,
|
1722
|
+
&server_pushback_ms)) {
|
1723
|
+
// Start retry timer.
|
1724
|
+
calld->StartRetryTimer(server_pushback_ms);
|
1725
|
+
// Cancel call attempt.
|
1726
|
+
CallCombinerClosureList closures;
|
1727
|
+
call_attempt->AddBatchForCancelOp(
|
1728
|
+
error == GRPC_ERROR_NONE
|
1729
|
+
? grpc_error_set_int(
|
1730
|
+
GRPC_ERROR_CREATE_FROM_STATIC_STRING("call attempt failed"),
|
1731
|
+
GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_CANCELLED)
|
1732
|
+
: GRPC_ERROR_REF(error),
|
1733
|
+
&closures);
|
1734
|
+
// Record that this attempt has been abandoned.
|
1735
|
+
call_attempt->Abandon();
|
1736
|
+
// Yields call combiner.
|
1737
|
+
closures.RunClosures(calld->call_combiner_);
|
1416
1738
|
return;
|
1417
1739
|
}
|
1418
1740
|
// Not retrying, so commit the call.
|
1419
1741
|
calld->RetryCommit(call_attempt);
|
1742
|
+
// If retry state is no longer needed, switch to fast path for
|
1743
|
+
// subsequent batches.
|
1744
|
+
call_attempt->MaybeSwitchToFastPath();
|
1420
1745
|
// Run any necessary closures.
|
1421
1746
|
batch_data->RunClosuresForCompletedCall(GRPC_ERROR_REF(error));
|
1422
1747
|
}
|
@@ -1444,6 +1769,11 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1444
1769
|
GRPC_ERROR_UNREF(error);
|
1445
1770
|
return;
|
1446
1771
|
}
|
1772
|
+
// Propagate payload.
|
1773
|
+
if (batch_.send_message) {
|
1774
|
+
pending->batch->payload->send_message.stream_write_closed =
|
1775
|
+
batch_.payload->send_message.stream_write_closed;
|
1776
|
+
}
|
1447
1777
|
// Add closure.
|
1448
1778
|
closures->Add(pending->batch->on_complete, error,
|
1449
1779
|
"on_complete for pending batch");
|
@@ -1454,31 +1784,27 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1454
1784
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1455
1785
|
AddClosuresForReplayOrPendingSendOps(CallCombinerClosureList* closures) {
|
1456
1786
|
auto* calld = call_attempt_->calld_;
|
1787
|
+
bool have_pending_send_ops = call_attempt_->HaveSendOpsToReplay();
|
1457
1788
|
// We don't check send_initial_metadata here, because that op will always
|
1458
1789
|
// be started as soon as it is received from the surface, so it will
|
1459
1790
|
// never need to be started at this point.
|
1460
|
-
|
1461
|
-
call_attempt_->started_send_message_count_ < calld->send_messages_.size();
|
1462
|
-
bool have_pending_send_trailing_metadata_op =
|
1463
|
-
calld->seen_send_trailing_metadata_ &&
|
1464
|
-
!call_attempt_->started_send_trailing_metadata_;
|
1465
|
-
if (!have_pending_send_message_ops &&
|
1466
|
-
!have_pending_send_trailing_metadata_op) {
|
1791
|
+
if (!have_pending_send_ops) {
|
1467
1792
|
for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches_); ++i) {
|
1468
1793
|
PendingBatch* pending = &calld->pending_batches_[i];
|
1469
1794
|
grpc_transport_stream_op_batch* batch = pending->batch;
|
1470
1795
|
if (batch == nullptr || pending->send_ops_cached) continue;
|
1471
|
-
if (batch->send_message
|
1472
|
-
|
1473
|
-
|
1796
|
+
if (batch->send_message || batch->send_trailing_metadata) {
|
1797
|
+
have_pending_send_ops = true;
|
1798
|
+
break;
|
1474
1799
|
}
|
1475
1800
|
}
|
1476
1801
|
}
|
1477
|
-
if (
|
1802
|
+
if (have_pending_send_ops) {
|
1478
1803
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1479
1804
|
gpr_log(GPR_INFO,
|
1480
|
-
"chand=%p calld=%p: starting next batch for pending
|
1481
|
-
|
1805
|
+
"chand=%p calld=%p attempt=%p: starting next batch for pending "
|
1806
|
+
"send op(s)",
|
1807
|
+
calld->chand_, calld, call_attempt_.get());
|
1482
1808
|
}
|
1483
1809
|
call_attempt_->AddRetriableBatches(closures);
|
1484
1810
|
}
|
@@ -1486,15 +1812,46 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1486
1812
|
|
1487
1813
|
void RetryFilter::CallData::CallAttempt::BatchData::OnComplete(
|
1488
1814
|
void* arg, grpc_error_handle error) {
|
1489
|
-
|
1490
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1815
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1491
1816
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1492
1817
|
CallData* calld = call_attempt->calld_;
|
1493
1818
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1494
|
-
gpr_log(GPR_INFO,
|
1495
|
-
|
1819
|
+
gpr_log(GPR_INFO,
|
1820
|
+
"chand=%p calld=%p attempt=%p batch_data=%p: "
|
1821
|
+
"got on_complete, error=%s, batch=%s",
|
1822
|
+
calld->chand_, calld, call_attempt, batch_data.get(),
|
1823
|
+
grpc_error_std_string(error).c_str(),
|
1496
1824
|
grpc_transport_stream_op_batch_string(&batch_data->batch_).c_str());
|
1497
1825
|
}
|
1826
|
+
// If this attempt has been abandoned, then we're not going to propagate
|
1827
|
+
// the completion of this batch, so do nothing.
|
1828
|
+
if (call_attempt->abandoned_) {
|
1829
|
+
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1830
|
+
"on_complete for abandoned attempt");
|
1831
|
+
return;
|
1832
|
+
}
|
1833
|
+
// If we got an error and have not yet gotten the
|
1834
|
+
// recv_trailing_metadata_ready callback, then defer propagating this
|
1835
|
+
// callback back to the surface. We can evaluate whether to retry when
|
1836
|
+
// recv_trailing_metadata comes back.
|
1837
|
+
if (GPR_UNLIKELY(!calld->retry_committed_ && error != GRPC_ERROR_NONE &&
|
1838
|
+
!call_attempt->completed_recv_trailing_metadata_)) {
|
1839
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1840
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: deferring on_complete",
|
1841
|
+
calld->chand_, calld, call_attempt);
|
1842
|
+
}
|
1843
|
+
call_attempt->on_complete_deferred_batches_.emplace_back(
|
1844
|
+
std::move(batch_data), GRPC_ERROR_REF(error));
|
1845
|
+
CallCombinerClosureList closures;
|
1846
|
+
call_attempt->AddBatchForCancelOp(GRPC_ERROR_REF(error), &closures);
|
1847
|
+
if (!call_attempt->started_recv_trailing_metadata_) {
|
1848
|
+
// recv_trailing_metadata not yet started by application; start it
|
1849
|
+
// ourselves to get status.
|
1850
|
+
call_attempt->AddBatchForInternalRecvTrailingMetadata(&closures);
|
1851
|
+
}
|
1852
|
+
closures.RunClosures(calld->call_combiner_);
|
1853
|
+
return;
|
1854
|
+
}
|
1498
1855
|
// Update bookkeeping in call_attempt.
|
1499
1856
|
if (batch_data->batch_.send_initial_metadata) {
|
1500
1857
|
call_attempt->completed_send_initial_metadata_ = true;
|
@@ -1512,33 +1869,39 @@ void RetryFilter::CallData::CallAttempt::BatchData::OnComplete(
|
|
1512
1869
|
}
|
1513
1870
|
// Construct list of closures to execute.
|
1514
1871
|
CallCombinerClosureList closures;
|
1515
|
-
//
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
}
|
1528
|
-
// Track number of in-flight send batches and determine if this was the
|
1529
|
-
// last one.
|
1530
|
-
--calld->num_in_flight_call_attempt_send_batches_;
|
1531
|
-
const bool last_send_batch_complete =
|
1532
|
-
calld->num_in_flight_call_attempt_send_batches_ == 0;
|
1533
|
-
// Don't need batch_data anymore.
|
1534
|
-
batch_data->Unref();
|
1872
|
+
// Add closure for the completed pending batch, if any.
|
1873
|
+
batch_data->AddClosuresForCompletedPendingBatch(GRPC_ERROR_REF(error),
|
1874
|
+
&closures);
|
1875
|
+
// If needed, add a callback to start any replay or pending send ops on
|
1876
|
+
// the LB call.
|
1877
|
+
if (!call_attempt->completed_recv_trailing_metadata_) {
|
1878
|
+
batch_data->AddClosuresForReplayOrPendingSendOps(&closures);
|
1879
|
+
}
|
1880
|
+
// If retry state is no longer needed (i.e., we're committed and there
|
1881
|
+
// are no more send ops to replay), switch to fast path for subsequent
|
1882
|
+
// batches.
|
1883
|
+
call_attempt->MaybeSwitchToFastPath();
|
1535
1884
|
// Schedule all of the closures identified above.
|
1536
1885
|
// Note: This yields the call combiner.
|
1537
1886
|
closures.RunClosures(calld->call_combiner_);
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1887
|
+
}
|
1888
|
+
|
1889
|
+
void RetryFilter::CallData::CallAttempt::BatchData::OnCompleteForCancelOp(
|
1890
|
+
void* arg, grpc_error_handle error) {
|
1891
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1892
|
+
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1893
|
+
CallData* calld = call_attempt->calld_;
|
1894
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1895
|
+
gpr_log(GPR_INFO,
|
1896
|
+
"chand=%p calld=%p attempt=%p batch_data=%p: "
|
1897
|
+
"got on_complete for cancel_stream batch, error=%s, batch=%s",
|
1898
|
+
calld->chand_, calld, call_attempt, batch_data.get(),
|
1899
|
+
grpc_error_std_string(error).c_str(),
|
1900
|
+
grpc_transport_stream_op_batch_string(&batch_data->batch_).c_str());
|
1541
1901
|
}
|
1902
|
+
GRPC_CALL_COMBINER_STOP(
|
1903
|
+
calld->call_combiner_,
|
1904
|
+
"on_complete for internally generated cancel_stream op");
|
1542
1905
|
}
|
1543
1906
|
|
1544
1907
|
//
|
@@ -1598,9 +1961,12 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1598
1961
|
AddRetriableSendMessageOp() {
|
1599
1962
|
auto* calld = call_attempt_->calld_;
|
1600
1963
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1601
|
-
gpr_log(
|
1602
|
-
|
1603
|
-
|
1964
|
+
gpr_log(
|
1965
|
+
GPR_INFO,
|
1966
|
+
"chand=%p calld=%p attempt=%p: starting calld->send_messages[%" PRIuPTR
|
1967
|
+
"]",
|
1968
|
+
calld->chand_, calld, call_attempt_.get(),
|
1969
|
+
call_attempt_->started_send_message_count_);
|
1604
1970
|
}
|
1605
1971
|
ByteStreamCache* cache =
|
1606
1972
|
calld->send_messages_[call_attempt_->started_send_message_count_];
|
@@ -1650,6 +2016,7 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1650
2016
|
++call_attempt_->started_recv_message_count_;
|
1651
2017
|
batch_.recv_message = true;
|
1652
2018
|
batch_.payload->recv_message.recv_message = &call_attempt_->recv_message_;
|
2019
|
+
batch_.payload->recv_message.call_failed_before_recv_message = nullptr;
|
1653
2020
|
GRPC_CLOSURE_INIT(&call_attempt_->recv_message_ready_, RecvMessageReady, this,
|
1654
2021
|
grpc_schedule_on_exec_ctx);
|
1655
2022
|
batch_.payload->recv_message.recv_message_ready =
|
@@ -1671,6 +2038,14 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1671
2038
|
&call_attempt_->recv_trailing_metadata_ready_;
|
1672
2039
|
}
|
1673
2040
|
|
2041
|
+
void RetryFilter::CallData::CallAttempt::BatchData::AddCancelStreamOp(
|
2042
|
+
grpc_error_handle error) {
|
2043
|
+
batch_.cancel_stream = true;
|
2044
|
+
batch_.payload->cancel_stream.cancel_error = error;
|
2045
|
+
// Override on_complete callback.
|
2046
|
+
GRPC_CLOSURE_INIT(&on_complete_, OnCompleteForCancelOp, this, nullptr);
|
2047
|
+
}
|
2048
|
+
|
1674
2049
|
//
|
1675
2050
|
// CallData vtable functions
|
1676
2051
|
//
|
@@ -1680,7 +2055,8 @@ grpc_error_handle RetryFilter::CallData::Init(
|
|
1680
2055
|
auto* chand = static_cast<RetryFilter*>(elem->channel_data);
|
1681
2056
|
new (elem->call_data) CallData(chand, *args);
|
1682
2057
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1683
|
-
gpr_log(GPR_INFO, "chand=%p: created call
|
2058
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: created call", chand,
|
2059
|
+
elem->call_data);
|
1684
2060
|
}
|
1685
2061
|
return GRPC_ERROR_NONE;
|
1686
2062
|
}
|
@@ -1746,7 +2122,6 @@ RetryFilter::CallData::CallData(RetryFilter* chand,
|
|
1746
2122
|
.set_max_backoff(
|
1747
2123
|
retry_policy_ == nullptr ? 0 : retry_policy_->max_backoff())),
|
1748
2124
|
path_(grpc_slice_ref_internal(args.path)),
|
1749
|
-
call_start_time_(args.start_time),
|
1750
2125
|
deadline_(args.deadline),
|
1751
2126
|
arena_(args.arena),
|
1752
2127
|
owning_call_(args.call_stack),
|
@@ -1758,7 +2133,7 @@ RetryFilter::CallData::CallData(RetryFilter* chand,
|
|
1758
2133
|
pending_send_message_(false),
|
1759
2134
|
pending_send_trailing_metadata_(false),
|
1760
2135
|
retry_committed_(false),
|
1761
|
-
|
2136
|
+
retry_timer_pending_(false) {}
|
1762
2137
|
|
1763
2138
|
RetryFilter::CallData::~CallData() {
|
1764
2139
|
grpc_slice_unref_internal(path_);
|
@@ -1766,6 +2141,7 @@ RetryFilter::CallData::~CallData() {
|
|
1766
2141
|
for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) {
|
1767
2142
|
GPR_ASSERT(pending_batches_[i].batch == nullptr);
|
1768
2143
|
}
|
2144
|
+
GRPC_ERROR_UNREF(cancelled_from_surface_);
|
1769
2145
|
}
|
1770
2146
|
|
1771
2147
|
void RetryFilter::CallData::StartTransportStreamOpBatch(
|
@@ -1788,10 +2164,29 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1788
2164
|
// will not be retried, because we have committed it here.
|
1789
2165
|
if (call_attempt_ != nullptr) {
|
1790
2166
|
RetryCommit(call_attempt_.get());
|
2167
|
+
// TODO(roth): When implementing hedging, this will get more
|
2168
|
+
// complex, because instead of just passing the batch down to a
|
2169
|
+
// single call attempt, we'll need to cancel multiple call
|
2170
|
+
// attempts and wait for the cancellation on_complete from each call
|
2171
|
+
// attempt before we propagate the on_complete from this batch
|
2172
|
+
// back to the surface.
|
1791
2173
|
// Note: This will release the call combiner.
|
1792
|
-
call_attempt_->
|
2174
|
+
call_attempt_->CancelFromSurface(batch);
|
1793
2175
|
return;
|
1794
2176
|
}
|
2177
|
+
// Save cancel_error in case subsequent batches are started.
|
2178
|
+
GRPC_ERROR_UNREF(cancelled_from_surface_);
|
2179
|
+
cancelled_from_surface_ = GRPC_ERROR_REF(cancel_error);
|
2180
|
+
// Cancel retry timer.
|
2181
|
+
if (retry_timer_pending_) {
|
2182
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
2183
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: cancelling retry timer", chand_,
|
2184
|
+
this);
|
2185
|
+
}
|
2186
|
+
retry_timer_pending_ = false; // Lame timer callback.
|
2187
|
+
grpc_timer_cancel(&retry_timer_);
|
2188
|
+
FreeAllCachedSendOpData();
|
2189
|
+
}
|
1795
2190
|
// Fail pending batches.
|
1796
2191
|
PendingBatchesFail(GRPC_ERROR_REF(cancel_error));
|
1797
2192
|
// Note: This will release the call combiner.
|
@@ -1801,13 +2196,47 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1801
2196
|
}
|
1802
2197
|
// Add the batch to the pending list.
|
1803
2198
|
PendingBatch* pending = PendingBatchesAdd(batch);
|
2199
|
+
// If the timer is pending, yield the call combiner and wait for it to
|
2200
|
+
// run, since we don't want to start another call attempt until it does.
|
2201
|
+
if (retry_timer_pending_) {
|
2202
|
+
GRPC_CALL_COMBINER_STOP(call_combiner_,
|
2203
|
+
"added pending batch while retry timer pending");
|
2204
|
+
return;
|
2205
|
+
}
|
2206
|
+
// If we do not yet have a call attempt, create one.
|
1804
2207
|
if (call_attempt_ == nullptr) {
|
2208
|
+
// If we were previously cancelled from the surface, cancel this
|
2209
|
+
// batch instead of creating a call attempt.
|
2210
|
+
if (cancelled_from_surface_ != GRPC_ERROR_NONE) {
|
2211
|
+
PendingBatchClear(pending);
|
2212
|
+
// Note: This will release the call combiner.
|
2213
|
+
grpc_transport_stream_op_batch_finish_with_failure(
|
2214
|
+
batch, GRPC_ERROR_REF(cancelled_from_surface_), call_combiner_);
|
2215
|
+
return;
|
2216
|
+
}
|
2217
|
+
// If there is no retry policy, then commit retries immediately.
|
2218
|
+
// This ensures that the code below will always jump to the fast path.
|
2219
|
+
// TODO(roth): Remove this special case when we implement
|
2220
|
+
// transparent retries.
|
2221
|
+
if (retry_policy_ == nullptr) retry_committed_ = true;
|
1805
2222
|
// If this is the first batch and retries are already committed
|
1806
2223
|
// (e.g., if this batch put the call above the buffer size limit), then
|
1807
2224
|
// immediately create an LB call and delegate the batch to it. This
|
1808
2225
|
// avoids the overhead of unnecessarily allocating a CallAttempt
|
1809
2226
|
// object or caching any of the send op data.
|
1810
|
-
|
2227
|
+
// Note that we would ideally like to do this also on subsequent
|
2228
|
+
// attempts (e.g., if a batch puts the call above the buffer size
|
2229
|
+
// limit since the last attempt was complete), but in practice that's
|
2230
|
+
// not really worthwhile, because we will almost always have cached and
|
2231
|
+
// completed at least the send_initial_metadata op on the previous
|
2232
|
+
// attempt, which means that we'd need special logic to replay the
|
2233
|
+
// batch anyway, which is exactly what the CallAttempt object provides.
|
2234
|
+
// We also skip this optimization if perAttemptRecvTimeout is set in the
|
2235
|
+
// retry policy, because we need the code in CallAttempt to handle
|
2236
|
+
// the associated timer.
|
2237
|
+
if (num_attempts_completed_ == 0 && retry_committed_ &&
|
2238
|
+
(retry_policy_ == nullptr ||
|
2239
|
+
!retry_policy_->per_attempt_recv_timeout().has_value())) {
|
1811
2240
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1812
2241
|
gpr_log(GPR_INFO,
|
1813
2242
|
"chand=%p calld=%p: retry committed before first attempt; "
|
@@ -1815,11 +2244,16 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1815
2244
|
chand_, this);
|
1816
2245
|
}
|
1817
2246
|
PendingBatchClear(pending);
|
1818
|
-
|
2247
|
+
auto* service_config_call_data = static_cast<ServiceConfigCallData*>(
|
2248
|
+
call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value);
|
2249
|
+
committed_call_ = CreateLoadBalancedCall(
|
2250
|
+
service_config_call_data->call_dispatch_controller());
|
1819
2251
|
committed_call_->StartTransportStreamOpBatch(batch);
|
1820
2252
|
return;
|
1821
2253
|
}
|
1822
|
-
//
|
2254
|
+
// Otherwise, create a call attempt.
|
2255
|
+
// The attempt will automatically start any necessary replays or
|
2256
|
+
// pending batches.
|
1823
2257
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1824
2258
|
gpr_log(GPR_INFO, "chand=%p calld=%p: creating call attempt", chand_,
|
1825
2259
|
this);
|
@@ -1829,56 +2263,31 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1829
2263
|
}
|
1830
2264
|
// Send batches to call attempt.
|
1831
2265
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1832
|
-
gpr_log(GPR_INFO,
|
1833
|
-
|
1834
|
-
chand_, this, call_attempt_.get(), call_attempt_->lb_call());
|
2266
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: starting batch on attempt=%p", chand_,
|
2267
|
+
this, call_attempt_.get());
|
1835
2268
|
}
|
1836
2269
|
call_attempt_->StartRetriableBatches();
|
1837
2270
|
}
|
1838
2271
|
|
1839
|
-
|
1840
|
-
RetryFilter::CallData::CreateLoadBalancedCall(
|
2272
|
+
OrphanablePtr<ClientChannel::LoadBalancedCall>
|
2273
|
+
RetryFilter::CallData::CreateLoadBalancedCall(
|
2274
|
+
ConfigSelector::CallDispatchController* call_dispatch_controller) {
|
1841
2275
|
grpc_call_element_args args = {owning_call_, nullptr, call_context_,
|
1842
|
-
path_,
|
2276
|
+
path_, /*start_time=*/0, deadline_,
|
1843
2277
|
arena_, call_combiner_};
|
1844
2278
|
return chand_->client_channel_->CreateLoadBalancedCall(
|
1845
2279
|
args, pollent_,
|
1846
2280
|
// This callback holds a ref to the CallStackDestructionBarrier
|
1847
2281
|
// object until the LB call is destroyed.
|
1848
|
-
call_stack_destruction_barrier_->MakeLbCallDestructionClosure(this)
|
2282
|
+
call_stack_destruction_barrier_->MakeLbCallDestructionClosure(this),
|
2283
|
+
call_dispatch_controller,
|
2284
|
+
// TODO(roth): Change this when we support transparent retries.
|
2285
|
+
/*is_transparent_retry=*/false);
|
1849
2286
|
}
|
1850
2287
|
|
1851
2288
|
void RetryFilter::CallData::CreateCallAttempt() {
|
1852
|
-
call_attempt_
|
2289
|
+
call_attempt_ = MakeRefCounted<CallAttempt>(this);
|
1853
2290
|
call_attempt_->StartRetriableBatches();
|
1854
|
-
// TODO(roth): When implementing hedging, change this to start a timer
|
1855
|
-
// for the next hedging attempt.
|
1856
|
-
}
|
1857
|
-
|
1858
|
-
namespace {
|
1859
|
-
|
1860
|
-
void StartBatchInCallCombiner(void* arg, grpc_error_handle /*ignored*/) {
|
1861
|
-
grpc_transport_stream_op_batch* batch =
|
1862
|
-
static_cast<grpc_transport_stream_op_batch*>(arg);
|
1863
|
-
auto* lb_call = static_cast<ClientChannel::LoadBalancedCall*>(
|
1864
|
-
batch->handler_private.extra_arg);
|
1865
|
-
// Note: This will release the call combiner.
|
1866
|
-
lb_call->StartTransportStreamOpBatch(batch);
|
1867
|
-
}
|
1868
|
-
|
1869
|
-
} // namespace
|
1870
|
-
|
1871
|
-
void RetryFilter::CallData::AddClosureForBatch(
|
1872
|
-
grpc_transport_stream_op_batch* batch, CallCombinerClosureList* closures) {
|
1873
|
-
batch->handler_private.extra_arg = call_attempt_->lb_call();
|
1874
|
-
GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner,
|
1875
|
-
batch, grpc_schedule_on_exec_ctx);
|
1876
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1877
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: starting batch on LB call: %s",
|
1878
|
-
chand_, this, grpc_transport_stream_op_batch_string(batch).c_str());
|
1879
|
-
}
|
1880
|
-
closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE,
|
1881
|
-
"start_batch_on_lb_call");
|
1882
2291
|
}
|
1883
2292
|
|
1884
2293
|
//
|
@@ -1943,7 +2352,7 @@ void RetryFilter::CallData::FreeCachedSendMessage(size_t idx) {
|
|
1943
2352
|
|
1944
2353
|
void RetryFilter::CallData::FreeCachedSendTrailingMetadata() {
|
1945
2354
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1946
|
-
gpr_log(GPR_INFO, "
|
2355
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: destroying send_trailing_metadata",
|
1947
2356
|
chand_, this);
|
1948
2357
|
}
|
1949
2358
|
grpc_metadata_batch_destroy(&send_trailing_metadata_);
|
@@ -1982,7 +2391,7 @@ RetryFilter::CallData::PendingBatch* RetryFilter::CallData::PendingBatchesAdd(
|
|
1982
2391
|
const size_t idx = GetBatchIndex(batch);
|
1983
2392
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1984
2393
|
gpr_log(GPR_INFO,
|
1985
|
-
"
|
2394
|
+
"chand=%p calld=%p: adding pending batch at index %" PRIuPTR,
|
1986
2395
|
chand_, this, idx);
|
1987
2396
|
}
|
1988
2397
|
PendingBatch* pending = &pending_batches_[idx];
|
@@ -2006,6 +2415,9 @@ RetryFilter::CallData::PendingBatch* RetryFilter::CallData::PendingBatchesAdd(
|
|
2006
2415
|
if (batch->send_trailing_metadata) {
|
2007
2416
|
pending_send_trailing_metadata_ = true;
|
2008
2417
|
}
|
2418
|
+
// TODO(roth): When we implement hedging, if there are currently attempts
|
2419
|
+
// in flight, we will need to pick the one on which the max number of send
|
2420
|
+
// ops have already been sent, and we commit to that attempt.
|
2009
2421
|
if (GPR_UNLIKELY(bytes_buffered_for_retry_ >
|
2010
2422
|
chand_->per_rpc_retry_buffer_size_)) {
|
2011
2423
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
@@ -2122,22 +2534,31 @@ void RetryFilter::CallData::RetryCommit(CallAttempt* call_attempt) {
|
|
2122
2534
|
gpr_log(GPR_INFO, "chand=%p calld=%p: committing retries", chand_, this);
|
2123
2535
|
}
|
2124
2536
|
if (call_attempt != nullptr) {
|
2537
|
+
// If the call attempt's LB call has been committed, inform the call
|
2538
|
+
// dispatch controller that the call has been committed.
|
2539
|
+
// Note: If call_attempt is null, this is happening before the first
|
2540
|
+
// retry attempt is started, in which case we'll just pass the real
|
2541
|
+
// call dispatch controller down into the LB call, and it won't be
|
2542
|
+
// our problem anymore.
|
2543
|
+
if (call_attempt->lb_call_committed()) {
|
2544
|
+
auto* service_config_call_data = static_cast<ServiceConfigCallData*>(
|
2545
|
+
call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value);
|
2546
|
+
service_config_call_data->call_dispatch_controller()->Commit();
|
2547
|
+
}
|
2548
|
+
// Free cached send ops.
|
2125
2549
|
call_attempt->FreeCachedSendOpDataAfterCommit();
|
2126
2550
|
}
|
2127
2551
|
}
|
2128
2552
|
|
2129
|
-
void RetryFilter::CallData::
|
2553
|
+
void RetryFilter::CallData::StartRetryTimer(grpc_millis server_pushback_ms) {
|
2130
2554
|
// Reset call attempt.
|
2131
|
-
call_attempt_.reset();
|
2555
|
+
call_attempt_.reset(DEBUG_LOCATION, "StartRetryTimer");
|
2132
2556
|
// Compute backoff delay.
|
2133
2557
|
grpc_millis next_attempt_time;
|
2134
2558
|
if (server_pushback_ms >= 0) {
|
2135
2559
|
next_attempt_time = ExecCtx::Get()->Now() + server_pushback_ms;
|
2136
|
-
|
2560
|
+
retry_backoff_.Reset();
|
2137
2561
|
} else {
|
2138
|
-
if (num_attempts_completed_ == 1 || last_attempt_got_server_pushback_) {
|
2139
|
-
last_attempt_got_server_pushback_ = false;
|
2140
|
-
}
|
2141
2562
|
next_attempt_time = retry_backoff_.NextAttemptTime();
|
2142
2563
|
}
|
2143
2564
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
@@ -2148,23 +2569,25 @@ void RetryFilter::CallData::DoRetry(grpc_millis server_pushback_ms) {
|
|
2148
2569
|
// Schedule retry after computed delay.
|
2149
2570
|
GRPC_CLOSURE_INIT(&retry_closure_, OnRetryTimer, this, nullptr);
|
2150
2571
|
GRPC_CALL_STACK_REF(owning_call_, "OnRetryTimer");
|
2151
|
-
|
2152
|
-
canceller_ = new Canceller(this);
|
2572
|
+
retry_timer_pending_ = true;
|
2153
2573
|
grpc_timer_init(&retry_timer_, next_attempt_time, &retry_closure_);
|
2154
2574
|
}
|
2155
2575
|
|
2156
2576
|
void RetryFilter::CallData::OnRetryTimer(void* arg, grpc_error_handle error) {
|
2157
2577
|
auto* calld = static_cast<CallData*>(arg);
|
2158
|
-
|
2159
|
-
|
2160
|
-
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2164
|
-
|
2165
|
-
|
2166
|
-
|
2167
|
-
|
2578
|
+
GRPC_CLOSURE_INIT(&calld->retry_closure_, OnRetryTimerLocked, calld, nullptr);
|
2579
|
+
GRPC_CALL_COMBINER_START(calld->call_combiner_, &calld->retry_closure_,
|
2580
|
+
GRPC_ERROR_REF(error), "retry timer fired");
|
2581
|
+
}
|
2582
|
+
|
2583
|
+
void RetryFilter::CallData::OnRetryTimerLocked(void* arg,
|
2584
|
+
grpc_error_handle error) {
|
2585
|
+
auto* calld = static_cast<CallData*>(arg);
|
2586
|
+
if (error == GRPC_ERROR_NONE && calld->retry_timer_pending_) {
|
2587
|
+
calld->retry_timer_pending_ = false;
|
2588
|
+
calld->CreateCallAttempt();
|
2589
|
+
} else {
|
2590
|
+
GRPC_CALL_COMBINER_STOP(calld->call_combiner_, "retry timer cancelled");
|
2168
2591
|
}
|
2169
2592
|
GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnRetryTimer");
|
2170
2593
|
}
|