grpc 1.38.0 → 1.39.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of grpc might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Makefile +50 -19
- data/include/grpc/event_engine/endpoint_config.h +48 -0
- data/include/grpc/event_engine/event_engine.h +13 -15
- data/include/grpc/event_engine/port.h +2 -0
- data/include/grpc/event_engine/slice_allocator.h +17 -7
- data/include/grpc/grpc.h +9 -2
- data/include/grpc/grpc_security.h +32 -0
- data/include/grpc/grpc_security_constants.h +1 -0
- data/include/grpc/impl/codegen/grpc_types.h +17 -13
- data/include/grpc/impl/codegen/port_platform.h +17 -0
- data/src/core/ext/filters/client_channel/client_channel.cc +2 -2
- data/src/core/ext/filters/client_channel/health/health_check_client.cc +2 -0
- data/src/core/ext/filters/client_channel/health/health_check_client.h +3 -3
- data/src/core/ext/filters/client_channel/http_proxy.cc +16 -1
- data/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +755 -0
- data/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h +10 -0
- data/src/core/ext/filters/client_channel/lb_policy/xds/cds.cc +10 -24
- data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc +63 -95
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +1 -3
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_event_engine.cc +31 -0
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_event_engine.cc +28 -0
- data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +1 -3
- data/src/core/ext/filters/client_channel/resolver/google_c2p/google_c2p_resolver.cc +7 -2
- data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +15 -3
- data/src/core/ext/filters/client_channel/retry_filter.cc +665 -404
- data/src/core/ext/filters/client_channel/retry_service_config.cc +43 -24
- data/src/core/ext/filters/client_channel/retry_service_config.h +8 -2
- data/src/core/ext/filters/client_idle/client_idle_filter.cc +1 -1
- data/src/core/ext/filters/fault_injection/fault_injection_filter.cc +6 -0
- data/src/core/ext/transport/chttp2/client/insecure/channel_create_posix.cc +2 -1
- data/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc +3 -2
- data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +10 -4
- data/src/core/ext/transport/chttp2/transport/internal.h +1 -0
- data/src/core/ext/transport/chttp2/transport/parsing.cc +2 -2
- data/src/core/ext/transport/inproc/inproc_transport.cc +42 -31
- data/src/core/ext/xds/xds_api.cc +247 -106
- data/src/core/ext/xds/xds_api.h +15 -6
- data/src/core/lib/address_utils/sockaddr_utils.cc +13 -0
- data/src/core/lib/address_utils/sockaddr_utils.h +10 -0
- data/src/core/lib/channel/channelz.h +3 -0
- data/src/core/lib/event_engine/endpoint_config.cc +46 -0
- data/src/core/lib/event_engine/endpoint_config_internal.h +42 -0
- data/src/core/lib/event_engine/event_engine.cc +50 -0
- data/src/core/lib/event_engine/slice_allocator.cc +33 -3
- data/src/core/lib/event_engine/sockaddr.cc +14 -12
- data/src/core/lib/event_engine/sockaddr.h +44 -0
- data/src/core/lib/gpr/wrap_memcpy.cc +2 -1
- data/src/core/lib/gprpp/status_helper.h +3 -0
- data/src/core/lib/iomgr/endpoint_pair_event_engine.cc +33 -0
- data/src/core/lib/iomgr/error.cc +5 -4
- data/src/core/lib/iomgr/error.h +1 -1
- data/src/core/lib/iomgr/event_engine/closure.cc +54 -0
- data/src/core/lib/iomgr/event_engine/closure.h +33 -0
- data/src/core/lib/iomgr/event_engine/endpoint.cc +194 -0
- data/src/core/lib/iomgr/event_engine/endpoint.h +53 -0
- data/src/core/lib/iomgr/event_engine/iomgr.cc +105 -0
- data/src/core/lib/iomgr/event_engine/iomgr.h +24 -0
- data/src/core/lib/iomgr/event_engine/pollset.cc +87 -0
- data/{include/grpc/event_engine/channel_args.h → src/core/lib/iomgr/event_engine/pollset.h} +7 -10
- data/src/core/lib/iomgr/event_engine/promise.h +51 -0
- data/src/core/lib/iomgr/event_engine/resolved_address_internal.cc +41 -0
- data/src/core/lib/iomgr/event_engine/resolved_address_internal.h +35 -0
- data/src/core/lib/iomgr/event_engine/resolver.cc +110 -0
- data/src/core/lib/iomgr/event_engine/tcp.cc +243 -0
- data/src/core/lib/iomgr/event_engine/timer.cc +57 -0
- data/src/core/lib/iomgr/exec_ctx.cc +8 -0
- data/src/core/lib/iomgr/exec_ctx.h +3 -4
- data/src/core/lib/iomgr/executor/threadpool.cc +2 -3
- data/src/core/lib/iomgr/executor/threadpool.h +2 -2
- data/src/core/lib/iomgr/iomgr.cc +1 -1
- data/src/core/lib/iomgr/iomgr_posix.cc +2 -0
- data/src/core/lib/iomgr/iomgr_posix_cfstream.cc +40 -10
- data/src/core/lib/iomgr/pollset_custom.cc +2 -2
- data/src/core/lib/iomgr/pollset_custom.h +3 -1
- data/src/core/lib/iomgr/pollset_uv.cc +3 -1
- data/src/core/lib/iomgr/pollset_uv.h +5 -1
- data/src/core/lib/iomgr/port.h +7 -5
- data/src/core/lib/iomgr/resolve_address.cc +5 -1
- data/src/core/lib/iomgr/resolve_address.h +6 -0
- data/src/core/lib/iomgr/sockaddr.h +1 -0
- data/src/core/lib/iomgr/socket_mutator.cc +15 -2
- data/src/core/lib/iomgr/socket_mutator.h +26 -2
- data/src/core/lib/iomgr/socket_utils_common_posix.cc +4 -4
- data/src/core/lib/iomgr/socket_utils_posix.h +2 -2
- data/src/core/lib/iomgr/tcp_client_posix.cc +7 -2
- data/src/core/lib/iomgr/tcp_posix.cc +42 -39
- data/src/core/lib/iomgr/tcp_posix.h +8 -0
- data/src/core/lib/iomgr/tcp_server_custom.cc +3 -4
- data/src/core/lib/iomgr/tcp_server_posix.cc +6 -0
- data/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +2 -1
- data/src/core/lib/iomgr/timer.h +6 -1
- data/src/core/lib/security/authorization/authorization_engine.h +44 -0
- data/src/core/lib/security/authorization/authorization_policy_provider.h +32 -0
- data/src/core/lib/security/authorization/authorization_policy_provider_vtable.cc +46 -0
- data/src/core/lib/security/authorization/evaluate_args.cc +209 -0
- data/src/core/lib/security/authorization/evaluate_args.h +91 -0
- data/src/core/lib/security/credentials/google_default/google_default_credentials.cc +3 -1
- data/src/core/lib/security/credentials/tls/tls_utils.cc +32 -0
- data/src/core/lib/security/credentials/tls/tls_utils.h +13 -0
- data/src/core/lib/security/security_connector/local/local_security_connector.cc +9 -6
- data/src/core/lib/security/security_connector/ssl_utils.cc +5 -0
- data/src/core/lib/surface/call.cc +21 -1
- data/src/core/lib/surface/call.h +11 -0
- data/src/core/lib/surface/completion_queue.cc +22 -22
- data/src/core/lib/surface/completion_queue.h +1 -1
- data/src/core/lib/surface/completion_queue_factory.cc +1 -2
- data/src/core/lib/surface/init.cc +1 -3
- data/src/core/lib/surface/init.h +10 -1
- data/src/core/lib/surface/version.cc +1 -1
- data/src/core/lib/transport/error_utils.cc +2 -2
- data/src/core/lib/transport/transport.h +2 -0
- data/src/core/lib/transport/transport_op_string.cc +1 -1
- data/src/core/plugin_registry/grpc_plugin_registry.cc +4 -0
- data/src/core/tsi/alts/crypt/gsec.h +2 -0
- data/src/ruby/ext/grpc/extconf.rb +2 -0
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +6 -0
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +10 -1
- data/src/ruby/lib/grpc/version.rb +1 -1
- data/third_party/boringssl-with-bazel/err_data.c +269 -263
- data/third_party/boringssl-with-bazel/src/crypto/asn1/a_object.c +8 -6
- data/third_party/boringssl-with-bazel/src/crypto/cipher_extra/cipher_extra.c +4 -0
- data/third_party/boringssl-with-bazel/src/crypto/curve25519/curve25519.c +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/curve25519/internal.h +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/evp/evp.c +9 -0
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/prime.c +0 -4
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/digest/digest.c +7 -0
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/digest/md32_common.h +87 -121
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/md4/md4.c +20 -30
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/md5/md5.c +19 -30
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/internal.h +1 -4
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rand/rand.c +0 -13
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rsa/rsa.c +26 -24
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/rsa/rsa_impl.c +10 -7
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/sha/sha1.c +28 -39
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/sha/sha256.c +48 -66
- data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/sha/sha512.c +4 -5
- data/third_party/boringssl-with-bazel/src/crypto/hpke/hpke.c +362 -371
- data/third_party/boringssl-with-bazel/src/crypto/pkcs7/pkcs7_x509.c +4 -2
- data/third_party/boringssl-with-bazel/src/crypto/rand_extra/passive.c +2 -2
- data/third_party/boringssl-with-bazel/src/crypto/rsa_extra/rsa_asn1.c +1 -2
- data/third_party/boringssl-with-bazel/src/crypto/x509/internal.h +101 -11
- data/third_party/boringssl-with-bazel/src/crypto/x509/t_x509a.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_cmp.c +2 -2
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_req.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_set.c +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_trs.c +2 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_vfy.c +14 -15
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509_vpm.c +53 -73
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509cset.c +31 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x509rset.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_all.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_req.c +5 -8
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_sig.c +5 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509/x_x509a.c +3 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509v3/internal.h +7 -0
- data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_purp.c +1 -1
- data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_utl.c +5 -8
- data/third_party/boringssl-with-bazel/src/include/openssl/aead.h +1 -1
- data/third_party/boringssl-with-bazel/src/include/openssl/arm_arch.h +66 -1
- data/third_party/boringssl-with-bazel/src/include/openssl/base.h +40 -9
- data/third_party/boringssl-with-bazel/src/include/openssl/bytestring.h +1 -0
- data/third_party/boringssl-with-bazel/src/include/openssl/chacha.h +1 -1
- data/third_party/boringssl-with-bazel/src/include/openssl/digest.h +6 -2
- data/third_party/boringssl-with-bazel/src/include/openssl/ecdsa.h +14 -0
- data/third_party/boringssl-with-bazel/src/include/openssl/evp.h +19 -11
- data/third_party/boringssl-with-bazel/src/include/openssl/hpke.h +325 -0
- data/third_party/boringssl-with-bazel/src/include/openssl/pkcs7.h +23 -7
- data/third_party/boringssl-with-bazel/src/include/openssl/rsa.h +99 -63
- data/third_party/boringssl-with-bazel/src/include/openssl/ssl.h +139 -109
- data/third_party/boringssl-with-bazel/src/include/openssl/tls1.h +12 -19
- data/third_party/boringssl-with-bazel/src/include/openssl/x509.h +48 -50
- data/third_party/boringssl-with-bazel/src/include/openssl/x509_vfy.h +451 -435
- data/third_party/boringssl-with-bazel/src/include/openssl/x509v3.h +0 -1
- data/third_party/boringssl-with-bazel/src/ssl/d1_both.cc +2 -2
- data/third_party/boringssl-with-bazel/src/ssl/d1_srtp.cc +1 -1
- data/third_party/boringssl-with-bazel/src/ssl/encrypted_client_hello.cc +773 -84
- data/third_party/boringssl-with-bazel/src/ssl/handoff.cc +80 -47
- data/third_party/boringssl-with-bazel/src/ssl/handshake.cc +24 -19
- data/third_party/boringssl-with-bazel/src/ssl/handshake_client.cc +189 -86
- data/third_party/boringssl-with-bazel/src/ssl/handshake_server.cc +45 -56
- data/third_party/boringssl-with-bazel/src/ssl/internal.h +272 -167
- data/third_party/boringssl-with-bazel/src/ssl/s3_both.cc +2 -2
- data/third_party/boringssl-with-bazel/src/ssl/s3_lib.cc +2 -2
- data/third_party/boringssl-with-bazel/src/ssl/s3_pkt.cc +14 -19
- data/third_party/boringssl-with-bazel/src/ssl/ssl_lib.cc +34 -102
- data/third_party/boringssl-with-bazel/src/ssl/ssl_privkey.cc +2 -0
- data/third_party/boringssl-with-bazel/src/ssl/ssl_session.cc +8 -31
- data/third_party/boringssl-with-bazel/src/ssl/ssl_stat.cc +3 -0
- data/third_party/boringssl-with-bazel/src/ssl/ssl_transcript.cc +4 -3
- data/third_party/boringssl-with-bazel/src/ssl/ssl_versions.cc +7 -3
- data/third_party/boringssl-with-bazel/src/ssl/t1_lib.cc +576 -648
- data/third_party/boringssl-with-bazel/src/ssl/tls13_both.cc +31 -3
- data/third_party/boringssl-with-bazel/src/ssl/tls13_client.cc +98 -39
- data/third_party/boringssl-with-bazel/src/ssl/tls13_enc.cc +141 -94
- data/third_party/boringssl-with-bazel/src/ssl/tls13_server.cc +58 -68
- metadata +65 -40
- data/third_party/boringssl-with-bazel/src/crypto/hpke/internal.h +0 -267
- data/third_party/boringssl-with-bazel/src/crypto/x509/vpm_int.h +0 -71
@@ -16,6 +16,8 @@
|
|
16
16
|
|
17
17
|
#include <grpc/support/port_platform.h>
|
18
18
|
|
19
|
+
#include <random>
|
20
|
+
|
19
21
|
#include "src/core/ext/filters/client_channel/resolver_registry.h"
|
20
22
|
#include "src/core/ext/xds/xds_client.h"
|
21
23
|
#include "src/core/lib/gpr/env.h"
|
@@ -195,7 +197,7 @@ void GoogleCloud2ProdResolver::ZoneQuery::OnDone(
|
|
195
197
|
gpr_log(GPR_ERROR, "could not parse zone from metadata server: %s",
|
196
198
|
std::string(body).c_str());
|
197
199
|
} else {
|
198
|
-
zone = std::string(body.substr(i));
|
200
|
+
zone = std::string(body.substr(i + 1));
|
199
201
|
}
|
200
202
|
}
|
201
203
|
resolver->ZoneQueryDone(std::move(zone));
|
@@ -297,8 +299,11 @@ void GoogleCloud2ProdResolver::IPv6QueryDone(bool ipv6_supported) {
|
|
297
299
|
|
298
300
|
void GoogleCloud2ProdResolver::StartXdsResolver() {
|
299
301
|
// Construct bootstrap JSON.
|
302
|
+
std::random_device rd;
|
303
|
+
std::mt19937 mt(rd());
|
304
|
+
std::uniform_int_distribution<uint64_t> dist(1, UINT64_MAX);
|
300
305
|
Json::Object node = {
|
301
|
-
{"id", "C2P"},
|
306
|
+
{"id", absl::StrCat("C2P-", dist(mt))},
|
302
307
|
};
|
303
308
|
if (!zone_->empty()) {
|
304
309
|
node["locality"] = Json::Object{
|
@@ -568,6 +568,9 @@ absl::optional<uint64_t> HeaderHashHelper(
|
|
568
568
|
std::string value_buffer;
|
569
569
|
absl::optional<absl::string_view> header_value =
|
570
570
|
GetHeaderValue(initial_metadata, policy.header_name, &value_buffer);
|
571
|
+
if (!header_value.has_value()) {
|
572
|
+
return absl::nullopt;
|
573
|
+
}
|
571
574
|
if (policy.regex != nullptr) {
|
572
575
|
// If GetHeaderValue() did not already store the value in
|
573
576
|
// value_buffer, copy it there now, so we can modify it.
|
@@ -671,7 +674,12 @@ ConfigSelector::CallConfig XdsResolver::XdsConfigSelector::GetCallConfig(
|
|
671
674
|
}
|
672
675
|
if (!hash.has_value()) {
|
673
676
|
// If there is no hash, we just choose a random value as a default.
|
674
|
-
|
677
|
+
// We cannot directly use the result of rand() as the hash value,
|
678
|
+
// since it is a 32-bit number and not a 64-bit number and will
|
679
|
+
// therefore not be evenly distributed.
|
680
|
+
uint32_t upper = rand();
|
681
|
+
uint32_t lower = rand();
|
682
|
+
hash = (static_cast<uint64_t>(upper) << 32) | lower;
|
675
683
|
}
|
676
684
|
CallConfig call_config;
|
677
685
|
if (method_config != nullptr) {
|
@@ -680,8 +688,12 @@ ConfigSelector::CallConfig XdsResolver::XdsConfigSelector::GetCallConfig(
|
|
680
688
|
call_config.service_config = std::move(method_config);
|
681
689
|
}
|
682
690
|
call_config.call_attributes[kXdsClusterAttribute] = it->first;
|
683
|
-
|
684
|
-
|
691
|
+
std::string hash_string = absl::StrCat(hash.value());
|
692
|
+
char* hash_value =
|
693
|
+
static_cast<char*>(args.arena->Alloc(hash_string.size() + 1));
|
694
|
+
memcpy(hash_value, hash_string.c_str(), hash_string.size());
|
695
|
+
hash_value[hash_string.size()] = '\0';
|
696
|
+
call_config.call_attributes[kRequestRingHashAttribute] = hash_value;
|
685
697
|
call_config.on_call_committed = [resolver, cluster_state]() {
|
686
698
|
cluster_state->Unref();
|
687
699
|
ExecCtx::Run(
|
@@ -200,7 +200,6 @@ class RetryFilter::CallData {
|
|
200
200
|
static void SetPollent(grpc_call_element* elem, grpc_polling_entity* pollent);
|
201
201
|
|
202
202
|
private:
|
203
|
-
class Canceller;
|
204
203
|
class CallStackDestructionBarrier;
|
205
204
|
|
206
205
|
// Pending batches stored in call data.
|
@@ -212,13 +211,10 @@ class RetryFilter::CallData {
|
|
212
211
|
};
|
213
212
|
|
214
213
|
// State associated with each call attempt.
|
215
|
-
|
216
|
-
class CallAttempt
|
217
|
-
: public RefCounted<CallAttempt, PolymorphicRefCount, kUnrefCallDtor> {
|
214
|
+
class CallAttempt : public RefCounted<CallAttempt> {
|
218
215
|
public:
|
219
216
|
explicit CallAttempt(CallData* calld);
|
220
|
-
|
221
|
-
ClientChannel::LoadBalancedCall* lb_call() const { return lb_call_.get(); }
|
217
|
+
~CallAttempt() override;
|
222
218
|
|
223
219
|
// Constructs and starts whatever batches are needed on this call
|
224
220
|
// attempt.
|
@@ -228,6 +224,9 @@ class RetryFilter::CallData {
|
|
228
224
|
// committing the call.
|
229
225
|
void FreeCachedSendOpDataAfterCommit();
|
230
226
|
|
227
|
+
// Cancels the call attempt.
|
228
|
+
void CancelFromSurface(grpc_transport_stream_op_batch* cancel_batch);
|
229
|
+
|
231
230
|
private:
|
232
231
|
// State used for starting a retryable batch on the call attempt's LB call.
|
233
232
|
// This provides its own grpc_transport_stream_op_batch and other data
|
@@ -235,7 +234,7 @@ class RetryFilter::CallData {
|
|
235
234
|
// We allocate one struct on the arena for each attempt at starting a
|
236
235
|
// batch on a given LB call.
|
237
236
|
class BatchData
|
238
|
-
: public RefCounted<
|
237
|
+
: public RefCounted<BatchData, PolymorphicRefCount, kUnrefCallDtor> {
|
239
238
|
public:
|
240
239
|
BatchData(RefCountedPtr<CallAttempt> call_attempt, int refcount,
|
241
240
|
bool set_on_complete);
|
@@ -243,24 +242,22 @@ class RetryFilter::CallData {
|
|
243
242
|
|
244
243
|
grpc_transport_stream_op_batch* batch() { return &batch_; }
|
245
244
|
|
246
|
-
// Adds retriable send_initial_metadata op
|
245
|
+
// Adds retriable send_initial_metadata op.
|
247
246
|
void AddRetriableSendInitialMetadataOp();
|
248
|
-
// Adds retriable send_message op
|
247
|
+
// Adds retriable send_message op.
|
249
248
|
void AddRetriableSendMessageOp();
|
250
|
-
// Adds retriable send_trailing_metadata op
|
249
|
+
// Adds retriable send_trailing_metadata op.
|
251
250
|
void AddRetriableSendTrailingMetadataOp();
|
252
|
-
// Adds retriable recv_initial_metadata op
|
251
|
+
// Adds retriable recv_initial_metadata op.
|
253
252
|
void AddRetriableRecvInitialMetadataOp();
|
254
|
-
// Adds retriable recv_message op
|
253
|
+
// Adds retriable recv_message op.
|
255
254
|
void AddRetriableRecvMessageOp();
|
256
|
-
// Adds retriable recv_trailing_metadata op
|
255
|
+
// Adds retriable recv_trailing_metadata op.
|
257
256
|
void AddRetriableRecvTrailingMetadataOp();
|
257
|
+
// Adds cancel_stream op.
|
258
|
+
void AddCancelStreamOp();
|
258
259
|
|
259
260
|
private:
|
260
|
-
// Returns true if the call is being retried.
|
261
|
-
bool MaybeRetry(grpc_status_code status, grpc_mdelem* server_pushback_md,
|
262
|
-
bool is_lb_drop);
|
263
|
-
|
264
261
|
// Frees cached send ops that were completed by the completed batch in
|
265
262
|
// batch_data. Used when batches are completed after the call is
|
266
263
|
// committed.
|
@@ -282,9 +279,9 @@ class RetryFilter::CallData {
|
|
282
279
|
// Adds recv_trailing_metadata_ready closure to closures.
|
283
280
|
void AddClosureForRecvTrailingMetadataReady(
|
284
281
|
grpc_error_handle error, CallCombinerClosureList* closures);
|
285
|
-
// Adds any necessary closures for deferred
|
286
|
-
//
|
287
|
-
void
|
282
|
+
// Adds any necessary closures for deferred batch completion
|
283
|
+
// callbacks to closures.
|
284
|
+
void AddClosuresForDeferredCompletionCallbacks(
|
288
285
|
CallCombinerClosureList* closures);
|
289
286
|
// For any pending batch containing an op that has not yet been started,
|
290
287
|
// adds the pending batch's completion closures to closures.
|
@@ -322,7 +319,8 @@ class RetryFilter::CallData {
|
|
322
319
|
// on_complete callback will be set to point to on_complete();
|
323
320
|
// otherwise, the batch's on_complete callback will be null.
|
324
321
|
BatchData* CreateBatch(int refcount, bool set_on_complete) {
|
325
|
-
return calld_->arena_->New<BatchData>(Ref(
|
322
|
+
return calld_->arena_->New<BatchData>(Ref(DEBUG_LOCATION, "CreateBatch"),
|
323
|
+
refcount, set_on_complete);
|
326
324
|
}
|
327
325
|
|
328
326
|
// If there are any cached send ops that need to be replayed on this
|
@@ -330,6 +328,11 @@ class RetryFilter::CallData {
|
|
330
328
|
// Otherwise, returns nullptr.
|
331
329
|
BatchData* MaybeCreateBatchForReplay();
|
332
330
|
|
331
|
+
// Adds a closure to closures that will execute batch in the call combiner.
|
332
|
+
void AddClosureForBatch(grpc_transport_stream_op_batch* batch,
|
333
|
+
const char* reason,
|
334
|
+
CallCombinerClosureList* closures);
|
335
|
+
|
333
336
|
// Adds batches for pending batches to closures.
|
334
337
|
void AddBatchesForPendingBatches(CallCombinerClosureList* closures);
|
335
338
|
|
@@ -339,15 +342,41 @@ class RetryFilter::CallData {
|
|
339
342
|
// Returns true if any op in the batch was not yet started on this attempt.
|
340
343
|
bool PendingBatchIsUnstarted(PendingBatch* pending);
|
341
344
|
|
345
|
+
// Returns true if there are cached send ops to replay.
|
346
|
+
bool HaveSendOpsToReplay();
|
347
|
+
|
348
|
+
// If our retry state is no longer needed, switch to fast path by moving
|
349
|
+
// our LB call into calld_->committed_call_ and having calld_ drop
|
350
|
+
// its ref to us.
|
351
|
+
void MaybeSwitchToFastPath();
|
352
|
+
|
342
353
|
// Helper function used to start a recv_trailing_metadata batch. This
|
343
354
|
// is used in the case where a recv_initial_metadata or recv_message
|
344
355
|
// op fails in a way that we know the call is over but when the application
|
345
356
|
// has not yet started its own recv_trailing_metadata op.
|
346
357
|
void StartInternalRecvTrailingMetadata();
|
347
358
|
|
359
|
+
// Returns true if the call should be retried.
|
360
|
+
// If server_pushback_md is non-null, sets *server_pushback_ms.
|
361
|
+
bool ShouldRetry(absl::optional<grpc_status_code> status, bool is_lb_drop,
|
362
|
+
grpc_mdelem* server_pushback_md,
|
363
|
+
grpc_millis* server_pushback_ms);
|
364
|
+
|
365
|
+
// Cancels the call attempt. Unrefs any deferred batches.
|
366
|
+
// Adds a batch to closures to cancel this call attempt.
|
367
|
+
void Cancel(CallCombinerClosureList* closures);
|
368
|
+
|
369
|
+
static void OnPerAttemptRecvTimer(void* arg, grpc_error_handle error);
|
370
|
+
static void OnPerAttemptRecvTimerLocked(void* arg, grpc_error_handle error);
|
371
|
+
void MaybeCancelPerAttemptRecvTimer();
|
372
|
+
|
348
373
|
CallData* calld_;
|
349
374
|
RefCountedPtr<ClientChannel::LoadBalancedCall> lb_call_;
|
350
375
|
|
376
|
+
grpc_timer per_attempt_recv_timer_;
|
377
|
+
grpc_closure on_per_attempt_recv_timer_;
|
378
|
+
bool per_attempt_recv_timer_pending_ = false;
|
379
|
+
|
351
380
|
// BatchData.batch.payload points to this.
|
352
381
|
grpc_transport_stream_op_batch_payload batch_payload_;
|
353
382
|
// For send_initial_metadata.
|
@@ -389,16 +418,20 @@ class RetryFilter::CallData {
|
|
389
418
|
bool started_recv_trailing_metadata_ : 1;
|
390
419
|
bool completed_recv_trailing_metadata_ : 1;
|
391
420
|
// State for callback processing.
|
392
|
-
BatchData
|
421
|
+
RefCountedPtr<BatchData> recv_initial_metadata_ready_deferred_batch_;
|
393
422
|
grpc_error_handle recv_initial_metadata_error_ = GRPC_ERROR_NONE;
|
394
|
-
BatchData
|
423
|
+
RefCountedPtr<BatchData> recv_message_ready_deferred_batch_;
|
395
424
|
grpc_error_handle recv_message_error_ = GRPC_ERROR_NONE;
|
396
|
-
BatchData
|
425
|
+
RefCountedPtr<BatchData> on_complete_deferred_batch_;
|
426
|
+
grpc_error_handle on_complete_error_ = GRPC_ERROR_NONE;
|
427
|
+
RefCountedPtr<BatchData> recv_trailing_metadata_internal_batch_;
|
428
|
+
grpc_error_handle recv_trailing_metadata_error_ = GRPC_ERROR_NONE;
|
429
|
+
bool seen_recv_trailing_metadata_from_surface_ : 1;
|
397
430
|
// NOTE: Do not move this next to the metadata bitfields above. That would
|
398
431
|
// save space but will also result in a data race because compiler
|
399
432
|
// will generate a 2 byte store which overwrites the meta-data
|
400
433
|
// fields upon setting this field.
|
401
|
-
bool
|
434
|
+
bool cancelled_ : 1;
|
402
435
|
};
|
403
436
|
|
404
437
|
CallData(RetryFilter* chand, const grpc_call_element_args& args);
|
@@ -432,18 +465,17 @@ class RetryFilter::CallData {
|
|
432
465
|
// Commits the call so that no further retry attempts will be performed.
|
433
466
|
void RetryCommit(CallAttempt* call_attempt);
|
434
467
|
|
435
|
-
// Starts a retry after appropriate back-off.
|
436
|
-
|
468
|
+
// Starts a timer to retry after appropriate back-off.
|
469
|
+
// If server_pushback_ms is -1, retry_backoff_ is used.
|
470
|
+
void StartRetryTimer(grpc_millis server_pushback_ms);
|
471
|
+
|
437
472
|
static void OnRetryTimer(void* arg, grpc_error_handle error);
|
473
|
+
static void OnRetryTimerLocked(void* arg, grpc_error_handle error);
|
438
474
|
|
439
475
|
RefCountedPtr<ClientChannel::LoadBalancedCall> CreateLoadBalancedCall();
|
440
476
|
|
441
477
|
void CreateCallAttempt();
|
442
478
|
|
443
|
-
// Adds a closure to closures that will execute batch in the call combiner.
|
444
|
-
void AddClosureForBatch(grpc_transport_stream_op_batch* batch,
|
445
|
-
CallCombinerClosureList* closures);
|
446
|
-
|
447
479
|
RetryFilter* chand_;
|
448
480
|
grpc_polling_entity* pollent_;
|
449
481
|
RefCountedPtr<ServerRetryThrottleData> retry_throttle_data_;
|
@@ -465,12 +497,9 @@ class RetryFilter::CallData {
|
|
465
497
|
// gets cancelled.
|
466
498
|
RefCountedPtr<CallAttempt> call_attempt_;
|
467
499
|
|
468
|
-
// LB call used when
|
469
|
-
//
|
470
|
-
//
|
471
|
-
// and all cached send ops have been replayed, we move the LB call
|
472
|
-
// from the CallAttempt here, thus creating a fast path for the
|
473
|
-
// remainder of the streaming call.
|
500
|
+
// LB call used when we've committed to a call attempt and the retry
|
501
|
+
// state for that attempt is no longer needed. This provides a fast
|
502
|
+
// path for long-running streaming calls that minimizes overhead.
|
474
503
|
RefCountedPtr<ClientChannel::LoadBalancedCall> committed_call_;
|
475
504
|
|
476
505
|
// When are are not yet fully committed to a particular call (i.e.,
|
@@ -486,23 +515,11 @@ class RetryFilter::CallData {
|
|
486
515
|
|
487
516
|
// Retry state.
|
488
517
|
bool retry_committed_ : 1;
|
489
|
-
bool
|
518
|
+
bool retry_timer_pending_ : 1;
|
490
519
|
int num_attempts_completed_ = 0;
|
491
|
-
|
492
|
-
Canceller* canceller_ ABSL_GUARDED_BY(timer_mu_);
|
493
|
-
grpc_timer retry_timer_ ABSL_GUARDED_BY(timer_mu_);
|
520
|
+
grpc_timer retry_timer_;
|
494
521
|
grpc_closure retry_closure_;
|
495
522
|
|
496
|
-
// The number of batches containing send ops that are currently in-flight
|
497
|
-
// on any call attempt.
|
498
|
-
// We hold a ref to the call stack while this is non-zero, since replay
|
499
|
-
// batches may not complete until after all callbacks have been returned
|
500
|
-
// to the surface, and we need to make sure that the call is not destroyed
|
501
|
-
// until all of these batches have completed.
|
502
|
-
// Note that we actually only need to track replay batches, but it's
|
503
|
-
// easier to track all batches with send ops.
|
504
|
-
int num_in_flight_call_attempt_send_batches_ = 0;
|
505
|
-
|
506
523
|
// Cached data for retrying send ops.
|
507
524
|
// send_initial_metadata
|
508
525
|
bool seen_send_initial_metadata_ = false;
|
@@ -513,7 +530,10 @@ class RetryFilter::CallData {
|
|
513
530
|
// have the LB call set a value in CallAttempt and then propagate it
|
514
531
|
// from CallAttempt to the parent call when we commit. Otherwise, we
|
515
532
|
// may leave this with a value for a peer other than the one we
|
516
|
-
// actually commit to.
|
533
|
+
// actually commit to. Alternatively, maybe see if there's a way to
|
534
|
+
// change the surface API such that the peer isn't available until
|
535
|
+
// after initial metadata is received? (Could even change the
|
536
|
+
// transport API to return this with the recv_initial_metadata op.)
|
517
537
|
gpr_atm* peer_string_;
|
518
538
|
// send_message
|
519
539
|
// When we get a send_message op, we replace the original byte stream
|
@@ -522,6 +542,10 @@ class RetryFilter::CallData {
|
|
522
542
|
// Note: We inline the cache for the first 3 send_message ops and use
|
523
543
|
// dynamic allocation after that. This number was essentially picked
|
524
544
|
// at random; it could be changed in the future to tune performance.
|
545
|
+
// TODO(roth): As part of implementing hedging, we may need some
|
546
|
+
// synchronization here, since ByteStreamCache does not provide any
|
547
|
+
// synchronization, so it's not safe to have multiple
|
548
|
+
// CachingByteStreams read from the same ByteStreamCache concurrently.
|
525
549
|
absl::InlinedVector<ByteStreamCache*, 3> send_messages_;
|
526
550
|
// send_trailing_metadata
|
527
551
|
bool seen_send_trailing_metadata_ = false;
|
@@ -582,52 +606,14 @@ class RetryFilter::CallData::CallStackDestructionBarrier
|
|
582
606
|
grpc_closure* on_call_stack_destruction_ = nullptr;
|
583
607
|
};
|
584
608
|
|
585
|
-
//
|
586
|
-
// RetryFilter::CallData::Canceller
|
587
|
-
//
|
588
|
-
|
589
|
-
class RetryFilter::CallData::Canceller {
|
590
|
-
public:
|
591
|
-
explicit Canceller(CallData* calld) : calld_(calld) {
|
592
|
-
GRPC_CALL_STACK_REF(calld_->owning_call_, "RetryCanceller");
|
593
|
-
GRPC_CLOSURE_INIT(&closure_, &Cancel, this, nullptr);
|
594
|
-
calld_->call_combiner_->SetNotifyOnCancel(&closure_);
|
595
|
-
}
|
596
|
-
|
597
|
-
private:
|
598
|
-
static void Cancel(void* arg, grpc_error_handle error) {
|
599
|
-
auto* self = static_cast<Canceller*>(arg);
|
600
|
-
auto* calld = self->calld_;
|
601
|
-
{
|
602
|
-
MutexLock lock(&calld->timer_mu_);
|
603
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
604
|
-
gpr_log(GPR_INFO,
|
605
|
-
"calld=%p: cancelling retry timer: error=%s self=%p "
|
606
|
-
"calld->canceller_=%p",
|
607
|
-
calld, grpc_error_std_string(error).c_str(), self,
|
608
|
-
calld->canceller_);
|
609
|
-
}
|
610
|
-
if (calld->canceller_ == self && error != GRPC_ERROR_NONE) {
|
611
|
-
calld->canceller_ = nullptr; // Checked by OnRetryTimer().
|
612
|
-
grpc_timer_cancel(&calld->retry_timer_);
|
613
|
-
calld->FreeAllCachedSendOpData();
|
614
|
-
GRPC_CALL_COMBINER_STOP(calld->call_combiner_, "Canceller");
|
615
|
-
}
|
616
|
-
}
|
617
|
-
GRPC_CALL_STACK_UNREF(calld->owning_call_, "RetryCanceller");
|
618
|
-
delete self;
|
619
|
-
}
|
620
|
-
|
621
|
-
CallData* calld_;
|
622
|
-
grpc_closure closure_;
|
623
|
-
};
|
624
|
-
|
625
609
|
//
|
626
610
|
// RetryFilter::CallData::CallAttempt
|
627
611
|
//
|
628
612
|
|
629
613
|
RetryFilter::CallData::CallAttempt::CallAttempt(CallData* calld)
|
630
|
-
:
|
614
|
+
: RefCounted(GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) ? "CallAttempt"
|
615
|
+
: nullptr),
|
616
|
+
calld_(calld),
|
631
617
|
batch_payload_(calld->call_context_),
|
632
618
|
started_send_initial_metadata_(false),
|
633
619
|
completed_send_initial_metadata_(false),
|
@@ -637,12 +623,42 @@ RetryFilter::CallData::CallAttempt::CallAttempt(CallData* calld)
|
|
637
623
|
completed_recv_initial_metadata_(false),
|
638
624
|
started_recv_trailing_metadata_(false),
|
639
625
|
completed_recv_trailing_metadata_(false),
|
640
|
-
|
626
|
+
seen_recv_trailing_metadata_from_surface_(false),
|
627
|
+
cancelled_(false) {
|
641
628
|
lb_call_ = calld->CreateLoadBalancedCall();
|
642
629
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
643
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p
|
630
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: create lb_call=%p",
|
644
631
|
calld->chand_, calld, this, lb_call_.get());
|
645
632
|
}
|
633
|
+
// If per_attempt_recv_timeout is set, start a timer.
|
634
|
+
if (calld->retry_policy_ != nullptr &&
|
635
|
+
calld->retry_policy_->per_attempt_recv_timeout().has_value()) {
|
636
|
+
grpc_millis per_attempt_recv_deadline =
|
637
|
+
ExecCtx::Get()->Now() +
|
638
|
+
*calld->retry_policy_->per_attempt_recv_timeout();
|
639
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
640
|
+
gpr_log(GPR_INFO,
|
641
|
+
"chand=%p calld=%p attempt=%p: per-attempt timeout in %" PRId64
|
642
|
+
" ms",
|
643
|
+
calld->chand_, calld, this,
|
644
|
+
*calld->retry_policy_->per_attempt_recv_timeout());
|
645
|
+
}
|
646
|
+
// Schedule retry after computed delay.
|
647
|
+
GRPC_CLOSURE_INIT(&on_per_attempt_recv_timer_, OnPerAttemptRecvTimer, this,
|
648
|
+
nullptr);
|
649
|
+
GRPC_CALL_STACK_REF(calld->owning_call_, "OnPerAttemptRecvTimer");
|
650
|
+
Ref(DEBUG_LOCATION, "OnPerAttemptRecvTimer").release();
|
651
|
+
per_attempt_recv_timer_pending_ = true;
|
652
|
+
grpc_timer_init(&per_attempt_recv_timer_, per_attempt_recv_deadline,
|
653
|
+
&on_per_attempt_recv_timer_);
|
654
|
+
}
|
655
|
+
}
|
656
|
+
|
657
|
+
RetryFilter::CallData::CallAttempt::~CallAttempt() {
|
658
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
659
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: destroying call attempt",
|
660
|
+
calld_->chand_, calld_, this);
|
661
|
+
}
|
646
662
|
}
|
647
663
|
|
648
664
|
void RetryFilter::CallData::CallAttempt::FreeCachedSendOpDataAfterCommit() {
|
@@ -683,12 +699,48 @@ bool RetryFilter::CallData::CallAttempt::PendingBatchIsUnstarted(
|
|
683
699
|
return false;
|
684
700
|
}
|
685
701
|
|
702
|
+
bool RetryFilter::CallData::CallAttempt::HaveSendOpsToReplay() {
|
703
|
+
// We don't check send_initial_metadata here, because that op will always
|
704
|
+
// be started as soon as it is received from the surface, so it will
|
705
|
+
// never need to be started at this point.
|
706
|
+
return started_send_message_count_ < calld_->send_messages_.size() ||
|
707
|
+
(calld_->seen_send_trailing_metadata_ &&
|
708
|
+
!started_send_trailing_metadata_);
|
709
|
+
}
|
710
|
+
|
711
|
+
void RetryFilter::CallData::CallAttempt::MaybeSwitchToFastPath() {
|
712
|
+
// If we're not yet committed, we can't switch yet.
|
713
|
+
// TODO(roth): As part of implementing hedging, this logic needs to
|
714
|
+
// check that *this* call attempt is the one that we've committed to.
|
715
|
+
// Might need to replace cancelled_ with an enum indicating whether we're
|
716
|
+
// in flight, cancelled, or the winning call attempt.
|
717
|
+
if (!calld_->retry_committed_) return;
|
718
|
+
// If we've already switched to fast path, there's nothing to do here.
|
719
|
+
if (calld_->committed_call_ != nullptr) return;
|
720
|
+
// If the perAttemptRecvTimeout timer is pending, we can't switch yet.
|
721
|
+
if (per_attempt_recv_timer_pending_) return;
|
722
|
+
// If there are still send ops to replay, we can't switch yet.
|
723
|
+
if (HaveSendOpsToReplay()) return;
|
724
|
+
// If we started an internal batch for recv_trailing_metadata but have not
|
725
|
+
// yet seen that op from the surface, we can't switch yet.
|
726
|
+
if (recv_trailing_metadata_internal_batch_ != nullptr) return;
|
727
|
+
// Switch to fast path.
|
728
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
729
|
+
gpr_log(GPR_INFO,
|
730
|
+
"chand=%p calld=%p attempt=%p: retry state no longer needed; "
|
731
|
+
"moving LB call to parent and unreffing the call attempt",
|
732
|
+
calld_->chand_, calld_, this);
|
733
|
+
}
|
734
|
+
calld_->committed_call_ = std::move(lb_call_);
|
735
|
+
calld_->call_attempt_.reset(DEBUG_LOCATION, "MaybeSwitchToFastPath");
|
736
|
+
}
|
737
|
+
|
686
738
|
void RetryFilter::CallData::CallAttempt::StartInternalRecvTrailingMetadata() {
|
687
739
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
688
740
|
gpr_log(GPR_INFO,
|
689
|
-
"chand=%p calld=%p: call failed but
|
690
|
-
"started; starting it internally",
|
691
|
-
calld_->chand_, calld_);
|
741
|
+
"chand=%p calld=%p attempt=%p: call failed but "
|
742
|
+
"recv_trailing_metadata not started; starting it internally",
|
743
|
+
calld_->chand_, calld_, this);
|
692
744
|
}
|
693
745
|
// Create batch_data with 2 refs, since this batch will be unreffed twice:
|
694
746
|
// once for the recv_trailing_metadata_ready callback when the batch
|
@@ -696,7 +748,7 @@ void RetryFilter::CallData::CallAttempt::StartInternalRecvTrailingMetadata() {
|
|
696
748
|
// op from the surface.
|
697
749
|
BatchData* batch_data = CreateBatch(2, false /* set_on_complete */);
|
698
750
|
batch_data->AddRetriableRecvTrailingMetadataOp();
|
699
|
-
recv_trailing_metadata_internal_batch_
|
751
|
+
recv_trailing_metadata_internal_batch_.reset(batch_data);
|
700
752
|
// Note: This will release the call combiner.
|
701
753
|
lb_call_->StartTransportStreamOpBatch(batch_data->batch());
|
702
754
|
}
|
@@ -712,9 +764,9 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
712
764
|
!calld_->pending_send_initial_metadata_) {
|
713
765
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
714
766
|
gpr_log(GPR_INFO,
|
715
|
-
"chand=%p calld=%p: replaying previously completed "
|
767
|
+
"chand=%p calld=%p attempt=%p: replaying previously completed "
|
716
768
|
"send_initial_metadata op",
|
717
|
-
calld_->chand_, calld_);
|
769
|
+
calld_->chand_, calld_, this);
|
718
770
|
}
|
719
771
|
replay_batch_data = CreateBatch(1, true /* set_on_complete */);
|
720
772
|
replay_batch_data->AddRetriableSendInitialMetadataOp();
|
@@ -726,9 +778,9 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
726
778
|
!calld_->pending_send_message_) {
|
727
779
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
728
780
|
gpr_log(GPR_INFO,
|
729
|
-
"chand=%p calld=%p: replaying previously completed "
|
781
|
+
"chand=%p calld=%p attempt=%p: replaying previously completed "
|
730
782
|
"send_message op",
|
731
|
-
calld_->chand_, calld_);
|
783
|
+
calld_->chand_, calld_, this);
|
732
784
|
}
|
733
785
|
if (replay_batch_data == nullptr) {
|
734
786
|
replay_batch_data = CreateBatch(1, true /* set_on_complete */);
|
@@ -745,9 +797,9 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
745
797
|
!calld_->pending_send_trailing_metadata_) {
|
746
798
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
747
799
|
gpr_log(GPR_INFO,
|
748
|
-
"chand=%p calld=%p: replaying previously completed "
|
800
|
+
"chand=%p calld=%p attempt=%p: replaying previously completed "
|
749
801
|
"send_trailing_metadata op",
|
750
|
-
calld_->chand_, calld_);
|
802
|
+
calld_->chand_, calld_, this);
|
751
803
|
}
|
752
804
|
if (replay_batch_data == nullptr) {
|
753
805
|
replay_batch_data = CreateBatch(1, true /* set_on_complete */);
|
@@ -757,6 +809,33 @@ RetryFilter::CallData::CallAttempt::MaybeCreateBatchForReplay() {
|
|
757
809
|
return replay_batch_data;
|
758
810
|
}
|
759
811
|
|
812
|
+
namespace {
|
813
|
+
|
814
|
+
void StartBatchInCallCombiner(void* arg, grpc_error_handle /*ignored*/) {
|
815
|
+
grpc_transport_stream_op_batch* batch =
|
816
|
+
static_cast<grpc_transport_stream_op_batch*>(arg);
|
817
|
+
auto* lb_call = static_cast<ClientChannel::LoadBalancedCall*>(
|
818
|
+
batch->handler_private.extra_arg);
|
819
|
+
// Note: This will release the call combiner.
|
820
|
+
lb_call->StartTransportStreamOpBatch(batch);
|
821
|
+
}
|
822
|
+
|
823
|
+
} // namespace
|
824
|
+
|
825
|
+
void RetryFilter::CallData::CallAttempt::AddClosureForBatch(
|
826
|
+
grpc_transport_stream_op_batch* batch, const char* reason,
|
827
|
+
CallCombinerClosureList* closures) {
|
828
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
829
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: adding batch (%s): %s",
|
830
|
+
calld_->chand_, calld_, this, reason,
|
831
|
+
grpc_transport_stream_op_batch_string(batch).c_str());
|
832
|
+
}
|
833
|
+
batch->handler_private.extra_arg = lb_call_.get();
|
834
|
+
GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner,
|
835
|
+
batch, grpc_schedule_on_exec_ctx);
|
836
|
+
closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE, reason);
|
837
|
+
}
|
838
|
+
|
760
839
|
void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
761
840
|
CallCombinerClosureList* closures) {
|
762
841
|
for (size_t i = 0; i < GPR_ARRAY_SIZE(calld_->pending_batches_); ++i) {
|
@@ -797,6 +876,7 @@ void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
|
797
876
|
continue;
|
798
877
|
}
|
799
878
|
if (batch->recv_trailing_metadata && started_recv_trailing_metadata_) {
|
879
|
+
seen_recv_trailing_metadata_from_surface_ = true;
|
800
880
|
// If we previously completed a recv_trailing_metadata op
|
801
881
|
// initiated by StartInternalRecvTrailingMetadata(), use the
|
802
882
|
// result of that instead of trying to re-start this op.
|
@@ -806,21 +886,30 @@ void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
|
806
886
|
// the application. Otherwise, just unref the internally started
|
807
887
|
// batch, since we'll propagate the completion when it completes.
|
808
888
|
if (completed_recv_trailing_metadata_) {
|
809
|
-
// Batches containing recv_trailing_metadata always succeed.
|
810
889
|
closures->Add(
|
811
|
-
&recv_trailing_metadata_ready_,
|
890
|
+
&recv_trailing_metadata_ready_, recv_trailing_metadata_error_,
|
812
891
|
"re-executing recv_trailing_metadata_ready to propagate "
|
813
892
|
"internally triggered result");
|
893
|
+
// Ref will be released by callback.
|
894
|
+
recv_trailing_metadata_internal_batch_.release();
|
814
895
|
} else {
|
815
|
-
recv_trailing_metadata_internal_batch_
|
896
|
+
recv_trailing_metadata_internal_batch_.reset(
|
897
|
+
DEBUG_LOCATION,
|
898
|
+
"internally started recv_trailing_metadata batch pending and "
|
899
|
+
"recv_trailing_metadata started from surface");
|
900
|
+
GRPC_ERROR_UNREF(recv_trailing_metadata_error_);
|
816
901
|
}
|
817
|
-
|
902
|
+
recv_trailing_metadata_error_ = GRPC_ERROR_NONE;
|
818
903
|
}
|
819
904
|
continue;
|
820
905
|
}
|
821
|
-
// If we're already committed
|
822
|
-
|
823
|
-
|
906
|
+
// If we're already committed and these send ops aren't cached, just send
|
907
|
+
// the batch as-is.
|
908
|
+
if (calld_->retry_committed_ && !pending->send_ops_cached) {
|
909
|
+
AddClosureForBatch(
|
910
|
+
batch,
|
911
|
+
"start non-replayable pending batch on call attempt after commit",
|
912
|
+
closures);
|
824
913
|
calld_->PendingBatchClear(pending);
|
825
914
|
continue;
|
826
915
|
}
|
@@ -831,7 +920,7 @@ void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
|
831
920
|
const int num_callbacks = has_send_ops + batch->recv_initial_metadata +
|
832
921
|
batch->recv_message +
|
833
922
|
batch->recv_trailing_metadata;
|
834
|
-
|
923
|
+
BatchData* batch_data =
|
835
924
|
CreateBatch(num_callbacks, has_send_ops /* set_on_complete */);
|
836
925
|
// Cache send ops if needed.
|
837
926
|
calld_->MaybeCacheSendOpsForBatch(pending);
|
@@ -861,16 +950,9 @@ void RetryFilter::CallData::CallAttempt::AddBatchesForPendingBatches(
|
|
861
950
|
if (batch->recv_trailing_metadata) {
|
862
951
|
batch_data->AddRetriableRecvTrailingMetadataOp();
|
863
952
|
}
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
if (batch->send_initial_metadata || batch->send_message ||
|
868
|
-
batch->send_trailing_metadata) {
|
869
|
-
if (calld_->num_in_flight_call_attempt_send_batches_ == 0) {
|
870
|
-
GRPC_CALL_STACK_REF(calld_->owning_call_, "retriable_send_batches");
|
871
|
-
}
|
872
|
-
++calld_->num_in_flight_call_attempt_send_batches_;
|
873
|
-
}
|
953
|
+
AddClosureForBatch(batch_data->batch(),
|
954
|
+
"start replayable pending batch on call attempt",
|
955
|
+
closures);
|
874
956
|
}
|
875
957
|
}
|
876
958
|
|
@@ -879,13 +961,8 @@ void RetryFilter::CallData::CallAttempt::AddRetriableBatches(
|
|
879
961
|
// Replay previously-returned send_* ops if needed.
|
880
962
|
BatchData* replay_batch_data = MaybeCreateBatchForReplay();
|
881
963
|
if (replay_batch_data != nullptr) {
|
882
|
-
|
883
|
-
|
884
|
-
// If this is the first one, take a ref to the call stack.
|
885
|
-
if (calld_->num_in_flight_call_attempt_send_batches_ == 0) {
|
886
|
-
GRPC_CALL_STACK_REF(calld_->owning_call_, "retriable_send_batches");
|
887
|
-
}
|
888
|
-
++calld_->num_in_flight_call_attempt_send_batches_;
|
964
|
+
AddClosureForBatch(replay_batch_data->batch(),
|
965
|
+
"start replay batch on call attempt", closures);
|
889
966
|
}
|
890
967
|
// Now add pending batches.
|
891
968
|
AddBatchesForPendingBatches(closures);
|
@@ -893,8 +970,9 @@ void RetryFilter::CallData::CallAttempt::AddRetriableBatches(
|
|
893
970
|
|
894
971
|
void RetryFilter::CallData::CallAttempt::StartRetriableBatches() {
|
895
972
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
896
|
-
gpr_log(GPR_INFO,
|
897
|
-
|
973
|
+
gpr_log(GPR_INFO,
|
974
|
+
"chand=%p calld=%p attempt=%p: constructing retriable batches",
|
975
|
+
calld_->chand_, calld_, this);
|
898
976
|
}
|
899
977
|
// Construct list of closures to execute, one for each pending batch.
|
900
978
|
CallCombinerClosureList closures;
|
@@ -903,28 +981,235 @@ void RetryFilter::CallData::CallAttempt::StartRetriableBatches() {
|
|
903
981
|
// Start batches on LB call.
|
904
982
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
905
983
|
gpr_log(GPR_INFO,
|
906
|
-
"chand=%p calld=%p: starting %" PRIuPTR
|
984
|
+
"chand=%p calld=%p attempt=%p: starting %" PRIuPTR
|
907
985
|
" retriable batches on lb_call=%p",
|
908
|
-
calld_->chand_, calld_, closures.size(),
|
986
|
+
calld_->chand_, calld_, this, closures.size(), lb_call_.get());
|
909
987
|
}
|
910
988
|
closures.RunClosures(calld_->call_combiner_);
|
911
989
|
}
|
912
990
|
|
991
|
+
void RetryFilter::CallData::CallAttempt::CancelFromSurface(
|
992
|
+
grpc_transport_stream_op_batch* cancel_batch) {
|
993
|
+
MaybeCancelPerAttemptRecvTimer();
|
994
|
+
// Propagate cancellation to LB call.
|
995
|
+
lb_call_->StartTransportStreamOpBatch(cancel_batch);
|
996
|
+
}
|
997
|
+
|
998
|
+
bool RetryFilter::CallData::CallAttempt::ShouldRetry(
|
999
|
+
absl::optional<grpc_status_code> status, bool is_lb_drop,
|
1000
|
+
grpc_mdelem* server_pushback_md, grpc_millis* server_pushback_ms) {
|
1001
|
+
// LB drops always inhibit retries.
|
1002
|
+
if (is_lb_drop) return false;
|
1003
|
+
// TODO(roth): Handle transparent retries here.
|
1004
|
+
// If no retry policy, don't retry.
|
1005
|
+
if (calld_->retry_policy_ == nullptr) return false;
|
1006
|
+
// Check status.
|
1007
|
+
if (status.has_value()) {
|
1008
|
+
if (GPR_LIKELY(*status == GRPC_STATUS_OK)) {
|
1009
|
+
if (calld_->retry_throttle_data_ != nullptr) {
|
1010
|
+
calld_->retry_throttle_data_->RecordSuccess();
|
1011
|
+
}
|
1012
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1013
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: call succeeded",
|
1014
|
+
calld_->chand_, calld_, this);
|
1015
|
+
}
|
1016
|
+
return false;
|
1017
|
+
}
|
1018
|
+
// Status is not OK. Check whether the status is retryable.
|
1019
|
+
if (!calld_->retry_policy_->retryable_status_codes().Contains(*status)) {
|
1020
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1021
|
+
gpr_log(GPR_INFO,
|
1022
|
+
"chand=%p calld=%p attempt=%p: status %s not configured as "
|
1023
|
+
"retryable",
|
1024
|
+
calld_->chand_, calld_, this,
|
1025
|
+
grpc_status_code_to_string(*status));
|
1026
|
+
}
|
1027
|
+
return false;
|
1028
|
+
}
|
1029
|
+
}
|
1030
|
+
// Record the failure and check whether retries are throttled.
|
1031
|
+
// Note that it's important for this check to come after the status
|
1032
|
+
// code check above, since we should only record failures whose statuses
|
1033
|
+
// match the configured retryable status codes, so that we don't count
|
1034
|
+
// things like failures due to malformed requests (INVALID_ARGUMENT).
|
1035
|
+
// Conversely, it's important for this to come before the remaining
|
1036
|
+
// checks, so that we don't fail to record failures due to other factors.
|
1037
|
+
if (calld_->retry_throttle_data_ != nullptr &&
|
1038
|
+
!calld_->retry_throttle_data_->RecordFailure()) {
|
1039
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1040
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: retries throttled",
|
1041
|
+
calld_->chand_, calld_, this);
|
1042
|
+
}
|
1043
|
+
return false;
|
1044
|
+
}
|
1045
|
+
// Check whether the call is committed.
|
1046
|
+
if (calld_->retry_committed_) {
|
1047
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1048
|
+
gpr_log(GPR_INFO,
|
1049
|
+
"chand=%p calld=%p attempt=%p: retries already committed",
|
1050
|
+
calld_->chand_, calld_, this);
|
1051
|
+
}
|
1052
|
+
return false;
|
1053
|
+
}
|
1054
|
+
// Check whether we have retries remaining.
|
1055
|
+
++calld_->num_attempts_completed_;
|
1056
|
+
if (calld_->num_attempts_completed_ >=
|
1057
|
+
calld_->retry_policy_->max_attempts()) {
|
1058
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1059
|
+
gpr_log(
|
1060
|
+
GPR_INFO, "chand=%p calld=%p attempt=%p: exceeded %d retry attempts",
|
1061
|
+
calld_->chand_, calld_, this, calld_->retry_policy_->max_attempts());
|
1062
|
+
}
|
1063
|
+
return false;
|
1064
|
+
}
|
1065
|
+
// Check server push-back.
|
1066
|
+
if (server_pushback_md != nullptr) {
|
1067
|
+
// If the value is "-1" or any other unparseable string, we do not retry.
|
1068
|
+
uint32_t ms;
|
1069
|
+
if (!grpc_parse_slice_to_uint32(GRPC_MDVALUE(*server_pushback_md), &ms)) {
|
1070
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1071
|
+
gpr_log(GPR_INFO,
|
1072
|
+
"chand=%p calld=%p attempt=%p: not retrying due to server "
|
1073
|
+
"push-back",
|
1074
|
+
calld_->chand_, calld_, this);
|
1075
|
+
}
|
1076
|
+
return false;
|
1077
|
+
} else {
|
1078
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1079
|
+
gpr_log(
|
1080
|
+
GPR_INFO,
|
1081
|
+
"chand=%p calld=%p attempt=%p: server push-back: retry in %u ms",
|
1082
|
+
calld_->chand_, calld_, this, ms);
|
1083
|
+
}
|
1084
|
+
*server_pushback_ms = static_cast<grpc_millis>(ms);
|
1085
|
+
}
|
1086
|
+
}
|
1087
|
+
// We should retry.
|
1088
|
+
return true;
|
1089
|
+
}
|
1090
|
+
|
1091
|
+
void RetryFilter::CallData::CallAttempt::Cancel(
|
1092
|
+
CallCombinerClosureList* closures) {
|
1093
|
+
// Record that this attempt has been cancelled.
|
1094
|
+
cancelled_ = true;
|
1095
|
+
// Unref batches for deferred completion callbacks that will now never
|
1096
|
+
// be invoked.
|
1097
|
+
if (started_recv_trailing_metadata_ &&
|
1098
|
+
!seen_recv_trailing_metadata_from_surface_) {
|
1099
|
+
recv_trailing_metadata_internal_batch_.reset(
|
1100
|
+
DEBUG_LOCATION,
|
1101
|
+
"internal recv_trailing_metadata completed before that op was "
|
1102
|
+
"started from the surface");
|
1103
|
+
}
|
1104
|
+
GRPC_ERROR_UNREF(recv_trailing_metadata_error_);
|
1105
|
+
recv_trailing_metadata_error_ = GRPC_ERROR_NONE;
|
1106
|
+
recv_initial_metadata_ready_deferred_batch_.reset(
|
1107
|
+
DEBUG_LOCATION,
|
1108
|
+
"unref deferred recv_initial_metadata_ready batch due to retry");
|
1109
|
+
GRPC_ERROR_UNREF(recv_initial_metadata_error_);
|
1110
|
+
recv_initial_metadata_error_ = GRPC_ERROR_NONE;
|
1111
|
+
recv_message_ready_deferred_batch_.reset(
|
1112
|
+
DEBUG_LOCATION, "unref deferred recv_message_ready batch due to retry");
|
1113
|
+
GRPC_ERROR_UNREF(recv_message_error_);
|
1114
|
+
recv_message_error_ = GRPC_ERROR_NONE;
|
1115
|
+
on_complete_deferred_batch_.reset(
|
1116
|
+
DEBUG_LOCATION, "unref deferred on_complete batch due to retry");
|
1117
|
+
GRPC_ERROR_UNREF(on_complete_error_);
|
1118
|
+
on_complete_error_ = GRPC_ERROR_NONE;
|
1119
|
+
// Start a cancellation op on this call attempt to make sure the
|
1120
|
+
// transport knows that this call should be cleaned up, even if it
|
1121
|
+
// hasn't received any ops.
|
1122
|
+
BatchData* cancel_batch_data = CreateBatch(1, /*set_on_complete=*/true);
|
1123
|
+
cancel_batch_data->AddCancelStreamOp();
|
1124
|
+
AddClosureForBatch(cancel_batch_data->batch(),
|
1125
|
+
"start cancellation batch on call attempt", closures);
|
1126
|
+
}
|
1127
|
+
|
1128
|
+
void RetryFilter::CallData::CallAttempt::OnPerAttemptRecvTimer(
|
1129
|
+
void* arg, grpc_error_handle error) {
|
1130
|
+
auto* call_attempt = static_cast<CallAttempt*>(arg);
|
1131
|
+
GRPC_CLOSURE_INIT(&call_attempt->on_per_attempt_recv_timer_,
|
1132
|
+
OnPerAttemptRecvTimerLocked, call_attempt, nullptr);
|
1133
|
+
GRPC_CALL_COMBINER_START(call_attempt->calld_->call_combiner_,
|
1134
|
+
&call_attempt->on_per_attempt_recv_timer_,
|
1135
|
+
GRPC_ERROR_REF(error), "per-attempt timer fired");
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
void RetryFilter::CallData::CallAttempt::OnPerAttemptRecvTimerLocked(
|
1139
|
+
void* arg, grpc_error_handle error) {
|
1140
|
+
auto* call_attempt = static_cast<CallAttempt*>(arg);
|
1141
|
+
auto* calld = call_attempt->calld_;
|
1142
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1143
|
+
gpr_log(GPR_INFO,
|
1144
|
+
"chand=%p calld=%p attempt=%p: perAttemptRecvTimeout timer fired: "
|
1145
|
+
"error=%s, per_attempt_recv_timer_pending_=%d",
|
1146
|
+
calld->chand_, calld, call_attempt,
|
1147
|
+
grpc_error_std_string(error).c_str(),
|
1148
|
+
call_attempt->per_attempt_recv_timer_pending_);
|
1149
|
+
}
|
1150
|
+
CallCombinerClosureList closures;
|
1151
|
+
if (error == GRPC_ERROR_NONE &&
|
1152
|
+
call_attempt->per_attempt_recv_timer_pending_) {
|
1153
|
+
call_attempt->per_attempt_recv_timer_pending_ = false;
|
1154
|
+
// Cancel this attempt.
|
1155
|
+
// TODO(roth): When implementing hedging, we should not cancel the
|
1156
|
+
// current attempt.
|
1157
|
+
call_attempt->Cancel(&closures);
|
1158
|
+
// Check whether we should retry.
|
1159
|
+
if (call_attempt->ShouldRetry(
|
1160
|
+
/*status=*/absl::nullopt, /*is_lb_drop=*/false,
|
1161
|
+
/*server_pushback_md=*/nullptr, /*server_pushback_ms=*/nullptr)) {
|
1162
|
+
// We are retrying. Start backoff timer.
|
1163
|
+
calld->StartRetryTimer(/*server_pushback_ms=*/-1);
|
1164
|
+
} else {
|
1165
|
+
// Not retrying, so commit the call.
|
1166
|
+
calld->RetryCommit(call_attempt);
|
1167
|
+
// If retry state is no longer needed, switch to fast path for
|
1168
|
+
// subsequent batches.
|
1169
|
+
call_attempt->MaybeSwitchToFastPath();
|
1170
|
+
}
|
1171
|
+
}
|
1172
|
+
closures.RunClosures(calld->call_combiner_);
|
1173
|
+
call_attempt->Unref(DEBUG_LOCATION, "OnPerAttemptRecvTimer");
|
1174
|
+
GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnPerAttemptRecvTimer");
|
1175
|
+
}
|
1176
|
+
|
1177
|
+
void RetryFilter::CallData::CallAttempt::MaybeCancelPerAttemptRecvTimer() {
|
1178
|
+
if (per_attempt_recv_timer_pending_) {
|
1179
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1180
|
+
gpr_log(GPR_INFO,
|
1181
|
+
"chand=%p calld=%p attempt=%p: cancelling "
|
1182
|
+
"perAttemptRecvTimeout timer",
|
1183
|
+
calld_->chand_, calld_, this);
|
1184
|
+
}
|
1185
|
+
per_attempt_recv_timer_pending_ = false;
|
1186
|
+
grpc_timer_cancel(&per_attempt_recv_timer_);
|
1187
|
+
}
|
1188
|
+
}
|
1189
|
+
|
913
1190
|
//
|
914
1191
|
// RetryFilter::CallData::CallAttempt::BatchData
|
915
1192
|
//
|
916
1193
|
|
917
1194
|
RetryFilter::CallData::CallAttempt::BatchData::BatchData(
|
918
1195
|
RefCountedPtr<CallAttempt> attempt, int refcount, bool set_on_complete)
|
919
|
-
: RefCounted(
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
1196
|
+
: RefCounted(
|
1197
|
+
GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace) ? "BatchData" : nullptr,
|
1198
|
+
refcount),
|
1199
|
+
call_attempt_(std::move(attempt)) {
|
1200
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1201
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: creating batch %p",
|
1202
|
+
call_attempt_->calld_->chand_, call_attempt_->calld_,
|
1203
|
+
call_attempt_.get(), this);
|
1204
|
+
}
|
1205
|
+
// We hold a ref to the call stack for every batch sent on a call attempt.
|
1206
|
+
// This is because some batches on the call attempt may not complete
|
1207
|
+
// until after all of the batches are completed at the surface (because
|
1208
|
+
// each batch that is pending at the surface holds a ref). This
|
1209
|
+
// can happen for replayed send ops, and it can happen for
|
1210
|
+
// recv_initial_metadata and recv_message ops on a call attempt that has
|
1211
|
+
// been abandoned.
|
1212
|
+
GRPC_CALL_STACK_REF(call_attempt_->calld_->owning_call_, "Retry BatchData");
|
928
1213
|
batch_.payload = &call_attempt_->batch_payload_;
|
929
1214
|
if (set_on_complete) {
|
930
1215
|
GRPC_CLOSURE_INIT(&on_complete_, OnComplete, this,
|
@@ -934,6 +1219,11 @@ RetryFilter::CallData::CallAttempt::BatchData::BatchData(
|
|
934
1219
|
}
|
935
1220
|
|
936
1221
|
RetryFilter::CallData::CallAttempt::BatchData::~BatchData() {
|
1222
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1223
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: destroying batch %p",
|
1224
|
+
call_attempt_->calld_->chand_, call_attempt_->calld_,
|
1225
|
+
call_attempt_.get(), this);
|
1226
|
+
}
|
937
1227
|
if (batch_.send_initial_metadata) {
|
938
1228
|
grpc_metadata_batch_destroy(&call_attempt_->send_initial_metadata_);
|
939
1229
|
}
|
@@ -946,7 +1236,8 @@ RetryFilter::CallData::CallAttempt::BatchData::~BatchData() {
|
|
946
1236
|
if (batch_.recv_trailing_metadata) {
|
947
1237
|
grpc_metadata_batch_destroy(&call_attempt_->recv_trailing_metadata_);
|
948
1238
|
}
|
949
|
-
GRPC_CALL_STACK_UNREF(call_attempt_->calld_->owning_call_, "
|
1239
|
+
GRPC_CALL_STACK_UNREF(call_attempt_->calld_->owning_call_, "Retry BatchData");
|
1240
|
+
call_attempt_.reset(DEBUG_LOCATION, "~BatchData");
|
950
1241
|
}
|
951
1242
|
|
952
1243
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
@@ -968,108 +1259,13 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
968
1259
|
}
|
969
1260
|
}
|
970
1261
|
|
971
|
-
bool RetryFilter::CallData::CallAttempt::BatchData::MaybeRetry(
|
972
|
-
grpc_status_code status, grpc_mdelem* server_pushback_md, bool is_lb_drop) {
|
973
|
-
auto* calld = call_attempt_->calld_;
|
974
|
-
// LB drops always inhibit retries.
|
975
|
-
if (is_lb_drop) return false;
|
976
|
-
// Get retry policy.
|
977
|
-
if (calld->retry_policy_ == nullptr) return false;
|
978
|
-
// If we've already dispatched a retry from this call, return true.
|
979
|
-
// This catches the case where the batch has multiple callbacks
|
980
|
-
// (i.e., it includes either recv_message or recv_initial_metadata).
|
981
|
-
if (call_attempt_->retry_dispatched_) {
|
982
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
983
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: retry already dispatched",
|
984
|
-
calld->chand_, calld);
|
985
|
-
}
|
986
|
-
return true;
|
987
|
-
}
|
988
|
-
// Check status.
|
989
|
-
if (GPR_LIKELY(status == GRPC_STATUS_OK)) {
|
990
|
-
if (calld->retry_throttle_data_ != nullptr) {
|
991
|
-
calld->retry_throttle_data_->RecordSuccess();
|
992
|
-
}
|
993
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
994
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: call succeeded", calld->chand_,
|
995
|
-
calld);
|
996
|
-
}
|
997
|
-
return false;
|
998
|
-
}
|
999
|
-
// Status is not OK. Check whether the status is retryable.
|
1000
|
-
if (!calld->retry_policy_->retryable_status_codes().Contains(status)) {
|
1001
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1002
|
-
gpr_log(GPR_INFO,
|
1003
|
-
"chand=%p calld=%p: status %s not configured as retryable",
|
1004
|
-
calld->chand_, calld, grpc_status_code_to_string(status));
|
1005
|
-
}
|
1006
|
-
return false;
|
1007
|
-
}
|
1008
|
-
// Record the failure and check whether retries are throttled.
|
1009
|
-
// Note that it's important for this check to come after the status
|
1010
|
-
// code check above, since we should only record failures whose statuses
|
1011
|
-
// match the configured retryable status codes, so that we don't count
|
1012
|
-
// things like failures due to malformed requests (INVALID_ARGUMENT).
|
1013
|
-
// Conversely, it's important for this to come before the remaining
|
1014
|
-
// checks, so that we don't fail to record failures due to other factors.
|
1015
|
-
if (calld->retry_throttle_data_ != nullptr &&
|
1016
|
-
!calld->retry_throttle_data_->RecordFailure()) {
|
1017
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1018
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: retries throttled", calld->chand_,
|
1019
|
-
calld);
|
1020
|
-
}
|
1021
|
-
return false;
|
1022
|
-
}
|
1023
|
-
// Check whether the call is committed.
|
1024
|
-
if (calld->retry_committed_) {
|
1025
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1026
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: retries already committed",
|
1027
|
-
calld->chand_, calld);
|
1028
|
-
}
|
1029
|
-
return false;
|
1030
|
-
}
|
1031
|
-
// Check whether we have retries remaining.
|
1032
|
-
++calld->num_attempts_completed_;
|
1033
|
-
if (calld->num_attempts_completed_ >= calld->retry_policy_->max_attempts()) {
|
1034
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1035
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: exceeded %d retry attempts",
|
1036
|
-
calld->chand_, calld, calld->retry_policy_->max_attempts());
|
1037
|
-
}
|
1038
|
-
return false;
|
1039
|
-
}
|
1040
|
-
// Check server push-back.
|
1041
|
-
grpc_millis server_pushback_ms = -1;
|
1042
|
-
if (server_pushback_md != nullptr) {
|
1043
|
-
// If the value is "-1" or any other unparseable string, we do not retry.
|
1044
|
-
uint32_t ms;
|
1045
|
-
if (!grpc_parse_slice_to_uint32(GRPC_MDVALUE(*server_pushback_md), &ms)) {
|
1046
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1047
|
-
gpr_log(GPR_INFO,
|
1048
|
-
"chand=%p calld=%p: not retrying due to server push-back",
|
1049
|
-
calld->chand_, calld);
|
1050
|
-
}
|
1051
|
-
return false;
|
1052
|
-
} else {
|
1053
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1054
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: server push-back: retry in %u ms",
|
1055
|
-
calld->chand_, calld, ms);
|
1056
|
-
}
|
1057
|
-
server_pushback_ms = static_cast<grpc_millis>(ms);
|
1058
|
-
}
|
1059
|
-
}
|
1060
|
-
// Do retry.
|
1061
|
-
call_attempt_->retry_dispatched_ = true;
|
1062
|
-
calld->DoRetry(server_pushback_ms);
|
1063
|
-
return true;
|
1064
|
-
}
|
1065
|
-
|
1066
1262
|
//
|
1067
1263
|
// recv_initial_metadata callback handling
|
1068
1264
|
//
|
1069
1265
|
|
1070
1266
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1071
1267
|
InvokeRecvInitialMetadataCallback(void* arg, grpc_error_handle error) {
|
1072
|
-
auto* batch_data = static_cast<
|
1268
|
+
auto* batch_data = static_cast<BatchData*>(arg);
|
1073
1269
|
auto* call_attempt = batch_data->call_attempt_.get();
|
1074
1270
|
// Find pending batch.
|
1075
1271
|
PendingBatch* pending = call_attempt->calld_->PendingBatchFind(
|
@@ -1101,24 +1297,27 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1101
1297
|
|
1102
1298
|
void RetryFilter::CallData::CallAttempt::BatchData::RecvInitialMetadataReady(
|
1103
1299
|
void* arg, grpc_error_handle error) {
|
1104
|
-
|
1105
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1300
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1106
1301
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1107
1302
|
CallData* calld = call_attempt->calld_;
|
1108
1303
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1109
1304
|
gpr_log(GPR_INFO,
|
1110
|
-
"chand=%p calld=%p: got recv_initial_metadata_ready,
|
1111
|
-
|
1305
|
+
"chand=%p calld=%p attempt=%p: got recv_initial_metadata_ready, "
|
1306
|
+
"error=%s",
|
1307
|
+
calld->chand_, calld, call_attempt,
|
1308
|
+
grpc_error_std_string(error).c_str());
|
1112
1309
|
}
|
1113
1310
|
call_attempt->completed_recv_initial_metadata_ = true;
|
1114
|
-
// If
|
1311
|
+
// If this attempt has been cancelled, then we're not going to use the
|
1115
1312
|
// result of this recv_initial_metadata op, so do nothing.
|
1116
|
-
if (call_attempt->
|
1117
|
-
GRPC_CALL_COMBINER_STOP(
|
1118
|
-
|
1119
|
-
"recv_initial_metadata_ready after retry dispatched");
|
1313
|
+
if (call_attempt->cancelled_) {
|
1314
|
+
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1315
|
+
"recv_initial_metadata_ready after cancellation");
|
1120
1316
|
return;
|
1121
1317
|
}
|
1318
|
+
// Cancel per-attempt recv timer, if any.
|
1319
|
+
call_attempt->MaybeCancelPerAttemptRecvTimer();
|
1320
|
+
// If we're not committed, check the response to see if we need to commit.
|
1122
1321
|
if (!calld->retry_committed_) {
|
1123
1322
|
// If we got an error or a Trailers-Only response and have not yet gotten
|
1124
1323
|
// the recv_trailing_metadata_ready callback, then defer propagating this
|
@@ -1129,11 +1328,12 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvInitialMetadataReady(
|
|
1129
1328
|
!call_attempt->completed_recv_trailing_metadata_)) {
|
1130
1329
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1131
1330
|
gpr_log(GPR_INFO,
|
1132
|
-
"chand=%p calld=%p: deferring
|
1133
|
-
"(Trailers-Only)",
|
1134
|
-
calld->chand_, calld);
|
1331
|
+
"chand=%p calld=%p attempt=%p: deferring "
|
1332
|
+
"recv_initial_metadata_ready (Trailers-Only)",
|
1333
|
+
calld->chand_, calld, call_attempt);
|
1135
1334
|
}
|
1136
|
-
call_attempt->recv_initial_metadata_ready_deferred_batch_ =
|
1335
|
+
call_attempt->recv_initial_metadata_ready_deferred_batch_ =
|
1336
|
+
std::move(batch_data);
|
1137
1337
|
call_attempt->recv_initial_metadata_error_ = GRPC_ERROR_REF(error);
|
1138
1338
|
if (!call_attempt->started_recv_trailing_metadata_) {
|
1139
1339
|
// recv_trailing_metadata not yet started by application; start it
|
@@ -1148,10 +1348,13 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvInitialMetadataReady(
|
|
1148
1348
|
}
|
1149
1349
|
// Received valid initial metadata, so commit the call.
|
1150
1350
|
calld->RetryCommit(call_attempt);
|
1351
|
+
// If retry state is no longer needed, switch to fast path for
|
1352
|
+
// subsequent batches.
|
1353
|
+
call_attempt->MaybeSwitchToFastPath();
|
1151
1354
|
}
|
1152
1355
|
// Invoke the callback to return the result to the surface.
|
1153
1356
|
// Manually invoking a callback function; it does not take ownership of error.
|
1154
|
-
InvokeRecvInitialMetadataCallback(batch_data, error);
|
1357
|
+
InvokeRecvInitialMetadataCallback(batch_data.release(), error);
|
1155
1358
|
}
|
1156
1359
|
|
1157
1360
|
//
|
@@ -1160,8 +1363,7 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvInitialMetadataReady(
|
|
1160
1363
|
|
1161
1364
|
void RetryFilter::CallData::CallAttempt::BatchData::InvokeRecvMessageCallback(
|
1162
1365
|
void* arg, grpc_error_handle error) {
|
1163
|
-
|
1164
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1366
|
+
auto* batch_data = static_cast<BatchData*>(arg);
|
1165
1367
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1166
1368
|
CallData* calld = call_attempt->calld_;
|
1167
1369
|
// Find pending op.
|
@@ -1189,22 +1391,26 @@ void RetryFilter::CallData::CallAttempt::BatchData::InvokeRecvMessageCallback(
|
|
1189
1391
|
|
1190
1392
|
void RetryFilter::CallData::CallAttempt::BatchData::RecvMessageReady(
|
1191
1393
|
void* arg, grpc_error_handle error) {
|
1192
|
-
|
1193
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1394
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1194
1395
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1195
1396
|
CallData* calld = call_attempt->calld_;
|
1196
1397
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1197
|
-
gpr_log(GPR_INFO,
|
1198
|
-
calld
|
1398
|
+
gpr_log(GPR_INFO,
|
1399
|
+
"chand=%p calld=%p attempt=%p: got recv_message_ready, error=%s",
|
1400
|
+
calld->chand_, calld, call_attempt,
|
1401
|
+
grpc_error_std_string(error).c_str());
|
1199
1402
|
}
|
1200
1403
|
++call_attempt->completed_recv_message_count_;
|
1201
|
-
// If
|
1404
|
+
// If this attempt has been cancelled, then we're not going to use the
|
1202
1405
|
// result of this recv_message op, so do nothing.
|
1203
|
-
if (call_attempt->
|
1406
|
+
if (call_attempt->cancelled_) {
|
1204
1407
|
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1205
|
-
"recv_message_ready after
|
1408
|
+
"recv_message_ready after cancellation");
|
1206
1409
|
return;
|
1207
1410
|
}
|
1411
|
+
// Cancel per-attempt recv timer, if any.
|
1412
|
+
call_attempt->MaybeCancelPerAttemptRecvTimer();
|
1413
|
+
// If we're not committed, check the response to see if we need to commit.
|
1208
1414
|
if (!calld->retry_committed_) {
|
1209
1415
|
// If we got an error or the payload was nullptr and we have not yet gotten
|
1210
1416
|
// the recv_trailing_metadata_ready callback, then defer propagating this
|
@@ -1215,11 +1421,11 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvMessageReady(
|
|
1215
1421
|
!call_attempt->completed_recv_trailing_metadata_)) {
|
1216
1422
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1217
1423
|
gpr_log(GPR_INFO,
|
1218
|
-
"chand=%p calld=%p: deferring recv_message_ready
|
1219
|
-
"message and recv_trailing_metadata pending)",
|
1220
|
-
calld->chand_, calld);
|
1424
|
+
"chand=%p calld=%p attempt=%p: deferring recv_message_ready "
|
1425
|
+
"(nullptr message and recv_trailing_metadata pending)",
|
1426
|
+
calld->chand_, calld, call_attempt);
|
1221
1427
|
}
|
1222
|
-
call_attempt->recv_message_ready_deferred_batch_ = batch_data;
|
1428
|
+
call_attempt->recv_message_ready_deferred_batch_ = std::move(batch_data);
|
1223
1429
|
call_attempt->recv_message_error_ = GRPC_ERROR_REF(error);
|
1224
1430
|
if (!call_attempt->started_recv_trailing_metadata_) {
|
1225
1431
|
// recv_trailing_metadata not yet started by application; start it
|
@@ -1233,10 +1439,13 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvMessageReady(
|
|
1233
1439
|
}
|
1234
1440
|
// Received a valid message, so commit the call.
|
1235
1441
|
calld->RetryCommit(call_attempt);
|
1442
|
+
// If retry state is no longer needed, switch to fast path for
|
1443
|
+
// subsequent batches.
|
1444
|
+
call_attempt->MaybeSwitchToFastPath();
|
1236
1445
|
}
|
1237
1446
|
// Invoke the callback to return the result to the surface.
|
1238
1447
|
// Manually invoking a callback function; it does not take ownership of error.
|
1239
|
-
InvokeRecvMessageCallback(batch_data, error);
|
1448
|
+
InvokeRecvMessageCallback(batch_data.release(), error);
|
1240
1449
|
}
|
1241
1450
|
|
1242
1451
|
//
|
@@ -1285,7 +1494,7 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1285
1494
|
// If we generated the recv_trailing_metadata op internally via
|
1286
1495
|
// StartInternalRecvTrailingMetadata(), then there will be no pending batch.
|
1287
1496
|
if (pending == nullptr) {
|
1288
|
-
|
1497
|
+
call_attempt_->recv_trailing_metadata_error_ = error;
|
1289
1498
|
return;
|
1290
1499
|
}
|
1291
1500
|
// Return metadata.
|
@@ -1303,7 +1512,8 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1303
1512
|
}
|
1304
1513
|
|
1305
1514
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1306
|
-
|
1515
|
+
AddClosuresForDeferredCompletionCallbacks(
|
1516
|
+
CallCombinerClosureList* closures) {
|
1307
1517
|
if (batch_.recv_trailing_metadata) {
|
1308
1518
|
// Add closure for deferred recv_initial_metadata_ready.
|
1309
1519
|
if (GPR_UNLIKELY(
|
@@ -1312,24 +1522,28 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1312
1522
|
GRPC_CLOSURE_INIT(
|
1313
1523
|
&call_attempt_->recv_initial_metadata_ready_,
|
1314
1524
|
InvokeRecvInitialMetadataCallback,
|
1315
|
-
call_attempt_->recv_initial_metadata_ready_deferred_batch_,
|
1525
|
+
call_attempt_->recv_initial_metadata_ready_deferred_batch_.release(),
|
1316
1526
|
grpc_schedule_on_exec_ctx);
|
1317
1527
|
closures->Add(&call_attempt_->recv_initial_metadata_ready_,
|
1318
1528
|
call_attempt_->recv_initial_metadata_error_,
|
1319
1529
|
"resuming recv_initial_metadata_ready");
|
1320
|
-
call_attempt_->recv_initial_metadata_ready_deferred_batch_ = nullptr;
|
1321
1530
|
}
|
1322
1531
|
// Add closure for deferred recv_message_ready.
|
1323
1532
|
if (GPR_UNLIKELY(call_attempt_->recv_message_ready_deferred_batch_ !=
|
1324
1533
|
nullptr)) {
|
1325
|
-
GRPC_CLOSURE_INIT(
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1534
|
+
GRPC_CLOSURE_INIT(
|
1535
|
+
&call_attempt_->recv_message_ready_, InvokeRecvMessageCallback,
|
1536
|
+
call_attempt_->recv_message_ready_deferred_batch_.release(),
|
1537
|
+
grpc_schedule_on_exec_ctx);
|
1329
1538
|
closures->Add(&call_attempt_->recv_message_ready_,
|
1330
1539
|
call_attempt_->recv_message_error_,
|
1331
1540
|
"resuming recv_message_ready");
|
1332
|
-
|
1541
|
+
}
|
1542
|
+
// Add closure for deferred on_complete.
|
1543
|
+
if (GPR_UNLIKELY(call_attempt_->on_complete_deferred_batch_ != nullptr)) {
|
1544
|
+
closures->Add(&call_attempt_->on_complete_deferred_batch_->on_complete_,
|
1545
|
+
call_attempt_->on_complete_error_, "resuming on_complete");
|
1546
|
+
call_attempt_->on_complete_deferred_batch_.release();
|
1333
1547
|
}
|
1334
1548
|
}
|
1335
1549
|
}
|
@@ -1343,9 +1557,9 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1343
1557
|
if (call_attempt_->PendingBatchIsUnstarted(pending)) {
|
1344
1558
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1345
1559
|
gpr_log(GPR_INFO,
|
1346
|
-
"chand=%p calld=%p: failing unstarted pending
|
1347
|
-
"index %" PRIuPTR,
|
1348
|
-
calld->chand_, calld, i);
|
1560
|
+
"chand=%p calld=%p attempt=%p: failing unstarted pending "
|
1561
|
+
"batch at index %" PRIuPTR,
|
1562
|
+
calld->chand_, calld, call_attempt_.get(), i);
|
1349
1563
|
}
|
1350
1564
|
closures->Add(pending->batch->on_complete, GRPC_ERROR_REF(error),
|
1351
1565
|
"failing on_complete for pending batch");
|
@@ -1362,31 +1576,38 @@ void RetryFilter::CallData::CallAttempt::BatchData::RunClosuresForCompletedCall(
|
|
1362
1576
|
CallCombinerClosureList closures;
|
1363
1577
|
// First, add closure for recv_trailing_metadata_ready.
|
1364
1578
|
AddClosureForRecvTrailingMetadataReady(GRPC_ERROR_REF(error), &closures);
|
1365
|
-
// If there are deferred
|
1366
|
-
|
1367
|
-
AddClosuresForDeferredRecvCallbacks(&closures);
|
1579
|
+
// If there are deferred batch completion callbacks, add them to closures.
|
1580
|
+
AddClosuresForDeferredCompletionCallbacks(&closures);
|
1368
1581
|
// Add closures to fail any pending batches that have not yet been started.
|
1369
1582
|
AddClosuresToFailUnstartedPendingBatches(GRPC_ERROR_REF(error), &closures);
|
1370
1583
|
// Schedule all of the closures identified above.
|
1371
1584
|
// Note: This will release the call combiner.
|
1372
1585
|
closures.RunClosures(call_attempt_->calld_->call_combiner_);
|
1373
|
-
// Don't need batch_data anymore.
|
1374
|
-
Unref();
|
1375
1586
|
GRPC_ERROR_UNREF(error);
|
1376
1587
|
}
|
1377
1588
|
|
1378
1589
|
void RetryFilter::CallData::CallAttempt::BatchData::RecvTrailingMetadataReady(
|
1379
1590
|
void* arg, grpc_error_handle error) {
|
1380
|
-
|
1381
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1591
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1382
1592
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1383
1593
|
CallData* calld = call_attempt->calld_;
|
1384
1594
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1385
1595
|
gpr_log(GPR_INFO,
|
1386
|
-
"chand=%p calld=%p: got recv_trailing_metadata_ready,
|
1387
|
-
|
1596
|
+
"chand=%p calld=%p attempt=%p: got recv_trailing_metadata_ready, "
|
1597
|
+
"error=%s",
|
1598
|
+
calld->chand_, calld, call_attempt,
|
1599
|
+
grpc_error_std_string(error).c_str());
|
1388
1600
|
}
|
1389
1601
|
call_attempt->completed_recv_trailing_metadata_ = true;
|
1602
|
+
// If this attempt has been cancelled, then we're not going to use the
|
1603
|
+
// result of this recv_trailing_metadata op, so do nothing.
|
1604
|
+
if (call_attempt->cancelled_) {
|
1605
|
+
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1606
|
+
"recv_trailing_metadata_ready after cancellation");
|
1607
|
+
return;
|
1608
|
+
}
|
1609
|
+
// Cancel per-attempt recv timer, if any.
|
1610
|
+
call_attempt->MaybeCancelPerAttemptRecvTimer();
|
1390
1611
|
// Get the call's status and check for server pushback metadata.
|
1391
1612
|
grpc_status_code status = GRPC_STATUS_OK;
|
1392
1613
|
grpc_mdelem* server_pushback_md = nullptr;
|
@@ -1397,26 +1618,29 @@ void RetryFilter::CallData::CallAttempt::BatchData::RecvTrailingMetadataReady(
|
|
1397
1618
|
&server_pushback_md, &is_lb_drop);
|
1398
1619
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1399
1620
|
gpr_log(
|
1400
|
-
GPR_INFO,
|
1401
|
-
calld
|
1621
|
+
GPR_INFO,
|
1622
|
+
"chand=%p calld=%p attempt=%p: call finished, status=%s is_lb_drop=%d",
|
1623
|
+
calld->chand_, calld, call_attempt, grpc_status_code_to_string(status),
|
1624
|
+
is_lb_drop);
|
1402
1625
|
}
|
1403
1626
|
// Check if we should retry.
|
1404
|
-
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
}
|
1415
|
-
batch_data->Unref();
|
1627
|
+
grpc_millis server_pushback_ms = -1;
|
1628
|
+
if (call_attempt->ShouldRetry(status, is_lb_drop, server_pushback_md,
|
1629
|
+
&server_pushback_ms)) {
|
1630
|
+
// Start retry timer.
|
1631
|
+
calld->StartRetryTimer(server_pushback_ms);
|
1632
|
+
// Cancel call attempt.
|
1633
|
+
CallCombinerClosureList closures;
|
1634
|
+
call_attempt->Cancel(&closures);
|
1635
|
+
// Yields call combiner.
|
1636
|
+
closures.RunClosures(calld->call_combiner_);
|
1416
1637
|
return;
|
1417
1638
|
}
|
1418
1639
|
// Not retrying, so commit the call.
|
1419
1640
|
calld->RetryCommit(call_attempt);
|
1641
|
+
// If retry state is no longer needed, switch to fast path for
|
1642
|
+
// subsequent batches.
|
1643
|
+
call_attempt->MaybeSwitchToFastPath();
|
1420
1644
|
// Run any necessary closures.
|
1421
1645
|
batch_data->RunClosuresForCompletedCall(GRPC_ERROR_REF(error));
|
1422
1646
|
}
|
@@ -1454,31 +1678,27 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1454
1678
|
void RetryFilter::CallData::CallAttempt::BatchData::
|
1455
1679
|
AddClosuresForReplayOrPendingSendOps(CallCombinerClosureList* closures) {
|
1456
1680
|
auto* calld = call_attempt_->calld_;
|
1681
|
+
bool have_pending_send_ops = call_attempt_->HaveSendOpsToReplay();
|
1457
1682
|
// We don't check send_initial_metadata here, because that op will always
|
1458
1683
|
// be started as soon as it is received from the surface, so it will
|
1459
1684
|
// never need to be started at this point.
|
1460
|
-
|
1461
|
-
call_attempt_->started_send_message_count_ < calld->send_messages_.size();
|
1462
|
-
bool have_pending_send_trailing_metadata_op =
|
1463
|
-
calld->seen_send_trailing_metadata_ &&
|
1464
|
-
!call_attempt_->started_send_trailing_metadata_;
|
1465
|
-
if (!have_pending_send_message_ops &&
|
1466
|
-
!have_pending_send_trailing_metadata_op) {
|
1685
|
+
if (!have_pending_send_ops) {
|
1467
1686
|
for (size_t i = 0; i < GPR_ARRAY_SIZE(calld->pending_batches_); ++i) {
|
1468
1687
|
PendingBatch* pending = &calld->pending_batches_[i];
|
1469
1688
|
grpc_transport_stream_op_batch* batch = pending->batch;
|
1470
1689
|
if (batch == nullptr || pending->send_ops_cached) continue;
|
1471
|
-
if (batch->send_message
|
1472
|
-
|
1473
|
-
|
1690
|
+
if (batch->send_message || batch->send_trailing_metadata) {
|
1691
|
+
have_pending_send_ops = true;
|
1692
|
+
break;
|
1474
1693
|
}
|
1475
1694
|
}
|
1476
1695
|
}
|
1477
|
-
if (
|
1696
|
+
if (have_pending_send_ops) {
|
1478
1697
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1479
1698
|
gpr_log(GPR_INFO,
|
1480
|
-
"chand=%p calld=%p: starting next batch for pending
|
1481
|
-
|
1699
|
+
"chand=%p calld=%p attempt=%p: starting next batch for pending "
|
1700
|
+
"send op(s)",
|
1701
|
+
calld->chand_, calld, call_attempt_.get());
|
1482
1702
|
}
|
1483
1703
|
call_attempt_->AddRetriableBatches(closures);
|
1484
1704
|
}
|
@@ -1486,15 +1706,46 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1486
1706
|
|
1487
1707
|
void RetryFilter::CallData::CallAttempt::BatchData::OnComplete(
|
1488
1708
|
void* arg, grpc_error_handle error) {
|
1489
|
-
|
1490
|
-
static_cast<CallAttempt::BatchData*>(arg);
|
1709
|
+
RefCountedPtr<BatchData> batch_data(static_cast<BatchData*>(arg));
|
1491
1710
|
CallAttempt* call_attempt = batch_data->call_attempt_.get();
|
1492
1711
|
CallData* calld = call_attempt->calld_;
|
1493
1712
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1494
|
-
gpr_log(GPR_INFO,
|
1495
|
-
calld
|
1713
|
+
gpr_log(GPR_INFO,
|
1714
|
+
"chand=%p calld=%p attempt=%p: got on_complete, error=%s, batch=%s",
|
1715
|
+
calld->chand_, calld, call_attempt,
|
1716
|
+
grpc_error_std_string(error).c_str(),
|
1496
1717
|
grpc_transport_stream_op_batch_string(&batch_data->batch_).c_str());
|
1497
1718
|
}
|
1719
|
+
// If this attempt has been cancelled, then we're not going to propagate
|
1720
|
+
// the completion of this batch, so do nothing.
|
1721
|
+
if (call_attempt->cancelled_) {
|
1722
|
+
GRPC_CALL_COMBINER_STOP(calld->call_combiner_,
|
1723
|
+
"on_complete after cancellation");
|
1724
|
+
return;
|
1725
|
+
}
|
1726
|
+
// If we got an error and have not yet gotten the
|
1727
|
+
// recv_trailing_metadata_ready callback, then defer propagating this
|
1728
|
+
// callback back to the surface. We can evaluate whether to retry when
|
1729
|
+
// recv_trailing_metadata comes back.
|
1730
|
+
if (GPR_UNLIKELY(!calld->retry_committed_ && error != GRPC_ERROR_NONE &&
|
1731
|
+
!call_attempt->completed_recv_trailing_metadata_)) {
|
1732
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1733
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p attempt=%p: deferring on_complete",
|
1734
|
+
calld->chand_, calld, call_attempt);
|
1735
|
+
}
|
1736
|
+
call_attempt->on_complete_deferred_batch_ = std::move(batch_data);
|
1737
|
+
call_attempt->on_complete_error_ = GRPC_ERROR_REF(error);
|
1738
|
+
if (!call_attempt->started_recv_trailing_metadata_) {
|
1739
|
+
// recv_trailing_metadata not yet started by application; start it
|
1740
|
+
// ourselves to get status.
|
1741
|
+
call_attempt->StartInternalRecvTrailingMetadata();
|
1742
|
+
} else {
|
1743
|
+
GRPC_CALL_COMBINER_STOP(
|
1744
|
+
calld->call_combiner_,
|
1745
|
+
"on_complete failure before recv_trailing_metadata_ready");
|
1746
|
+
}
|
1747
|
+
return;
|
1748
|
+
}
|
1498
1749
|
// Update bookkeeping in call_attempt.
|
1499
1750
|
if (batch_data->batch_.send_initial_metadata) {
|
1500
1751
|
call_attempt->completed_send_initial_metadata_ = true;
|
@@ -1512,33 +1763,21 @@ void RetryFilter::CallData::CallAttempt::BatchData::OnComplete(
|
|
1512
1763
|
}
|
1513
1764
|
// Construct list of closures to execute.
|
1514
1765
|
CallCombinerClosureList closures;
|
1515
|
-
//
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
}
|
1528
|
-
// Track number of in-flight send batches and determine if this was the
|
1529
|
-
// last one.
|
1530
|
-
--calld->num_in_flight_call_attempt_send_batches_;
|
1531
|
-
const bool last_send_batch_complete =
|
1532
|
-
calld->num_in_flight_call_attempt_send_batches_ == 0;
|
1533
|
-
// Don't need batch_data anymore.
|
1534
|
-
batch_data->Unref();
|
1766
|
+
// Add closure for the completed pending batch, if any.
|
1767
|
+
batch_data->AddClosuresForCompletedPendingBatch(GRPC_ERROR_REF(error),
|
1768
|
+
&closures);
|
1769
|
+
// If needed, add a callback to start any replay or pending send ops on
|
1770
|
+
// the LB call.
|
1771
|
+
if (!call_attempt->completed_recv_trailing_metadata_) {
|
1772
|
+
batch_data->AddClosuresForReplayOrPendingSendOps(&closures);
|
1773
|
+
}
|
1774
|
+
// If retry state is no longer needed (i.e., we're committed and there
|
1775
|
+
// are no more send ops to replay), switch to fast path for subsequent
|
1776
|
+
// batches.
|
1777
|
+
call_attempt->MaybeSwitchToFastPath();
|
1535
1778
|
// Schedule all of the closures identified above.
|
1536
1779
|
// Note: This yields the call combiner.
|
1537
1780
|
closures.RunClosures(calld->call_combiner_);
|
1538
|
-
// If this was the last in-flight send batch, unref the call stack.
|
1539
|
-
if (last_send_batch_complete) {
|
1540
|
-
GRPC_CALL_STACK_UNREF(calld->owning_call_, "retriable_send_batches");
|
1541
|
-
}
|
1542
1781
|
}
|
1543
1782
|
|
1544
1783
|
//
|
@@ -1598,9 +1837,12 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1598
1837
|
AddRetriableSendMessageOp() {
|
1599
1838
|
auto* calld = call_attempt_->calld_;
|
1600
1839
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1601
|
-
gpr_log(
|
1602
|
-
|
1603
|
-
|
1840
|
+
gpr_log(
|
1841
|
+
GPR_INFO,
|
1842
|
+
"chand=%p calld=%p attempt=%p: starting calld->send_messages[%" PRIuPTR
|
1843
|
+
"]",
|
1844
|
+
calld->chand_, calld, call_attempt_.get(),
|
1845
|
+
call_attempt_->started_send_message_count_);
|
1604
1846
|
}
|
1605
1847
|
ByteStreamCache* cache =
|
1606
1848
|
calld->send_messages_[call_attempt_->started_send_message_count_];
|
@@ -1650,6 +1892,7 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1650
1892
|
++call_attempt_->started_recv_message_count_;
|
1651
1893
|
batch_.recv_message = true;
|
1652
1894
|
batch_.payload->recv_message.recv_message = &call_attempt_->recv_message_;
|
1895
|
+
batch_.payload->recv_message.call_failed_before_recv_message = nullptr;
|
1653
1896
|
GRPC_CLOSURE_INIT(&call_attempt_->recv_message_ready_, RecvMessageReady, this,
|
1654
1897
|
grpc_schedule_on_exec_ctx);
|
1655
1898
|
batch_.payload->recv_message.recv_message_ready =
|
@@ -1671,6 +1914,12 @@ void RetryFilter::CallData::CallAttempt::BatchData::
|
|
1671
1914
|
&call_attempt_->recv_trailing_metadata_ready_;
|
1672
1915
|
}
|
1673
1916
|
|
1917
|
+
void RetryFilter::CallData::CallAttempt::BatchData::AddCancelStreamOp() {
|
1918
|
+
batch_.cancel_stream = true;
|
1919
|
+
batch_.payload->cancel_stream.cancel_error =
|
1920
|
+
GRPC_ERROR_CREATE_FROM_STATIC_STRING("retry attempt abandoned");
|
1921
|
+
}
|
1922
|
+
|
1674
1923
|
//
|
1675
1924
|
// CallData vtable functions
|
1676
1925
|
//
|
@@ -1680,7 +1929,8 @@ grpc_error_handle RetryFilter::CallData::Init(
|
|
1680
1929
|
auto* chand = static_cast<RetryFilter*>(elem->channel_data);
|
1681
1930
|
new (elem->call_data) CallData(chand, *args);
|
1682
1931
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1683
|
-
gpr_log(GPR_INFO, "chand=%p: created call
|
1932
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: created call", chand,
|
1933
|
+
elem->call_data);
|
1684
1934
|
}
|
1685
1935
|
return GRPC_ERROR_NONE;
|
1686
1936
|
}
|
@@ -1758,7 +2008,7 @@ RetryFilter::CallData::CallData(RetryFilter* chand,
|
|
1758
2008
|
pending_send_message_(false),
|
1759
2009
|
pending_send_trailing_metadata_(false),
|
1760
2010
|
retry_committed_(false),
|
1761
|
-
|
2011
|
+
retry_timer_pending_(false) {}
|
1762
2012
|
|
1763
2013
|
RetryFilter::CallData::~CallData() {
|
1764
2014
|
grpc_slice_unref_internal(path_);
|
@@ -1788,10 +2038,26 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1788
2038
|
// will not be retried, because we have committed it here.
|
1789
2039
|
if (call_attempt_ != nullptr) {
|
1790
2040
|
RetryCommit(call_attempt_.get());
|
2041
|
+
// TODO(roth): When implementing hedging, this will get more
|
2042
|
+
// complex, because instead of just passing the batch down to a
|
2043
|
+
// single call attempt, we'll need to cancel multiple call
|
2044
|
+
// attempts and wait for the cancellation on_complete from each call
|
2045
|
+
// attempt before we propagate the on_complete from this batch
|
2046
|
+
// back to the surface.
|
1791
2047
|
// Note: This will release the call combiner.
|
1792
|
-
call_attempt_->
|
2048
|
+
call_attempt_->CancelFromSurface(batch);
|
1793
2049
|
return;
|
1794
2050
|
}
|
2051
|
+
// Cancel retry timer.
|
2052
|
+
if (retry_timer_pending_) {
|
2053
|
+
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
2054
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: cancelling retry timer", chand_,
|
2055
|
+
this);
|
2056
|
+
}
|
2057
|
+
retry_timer_pending_ = false; // Lame timer callback.
|
2058
|
+
grpc_timer_cancel(&retry_timer_);
|
2059
|
+
FreeAllCachedSendOpData();
|
2060
|
+
}
|
1795
2061
|
// Fail pending batches.
|
1796
2062
|
PendingBatchesFail(GRPC_ERROR_REF(cancel_error));
|
1797
2063
|
// Note: This will release the call combiner.
|
@@ -1801,13 +2067,33 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1801
2067
|
}
|
1802
2068
|
// Add the batch to the pending list.
|
1803
2069
|
PendingBatch* pending = PendingBatchesAdd(batch);
|
2070
|
+
// If the timer is pending, yield the call combiner and wait for it to
|
2071
|
+
// run, since we don't want to start another call attempt until it does.
|
2072
|
+
if (retry_timer_pending_) {
|
2073
|
+
GRPC_CALL_COMBINER_STOP(call_combiner_,
|
2074
|
+
"added pending batch while retry timer pending");
|
2075
|
+
return;
|
2076
|
+
}
|
2077
|
+
// If we do not yet have a call attempt, create one.
|
1804
2078
|
if (call_attempt_ == nullptr) {
|
1805
2079
|
// If this is the first batch and retries are already committed
|
1806
2080
|
// (e.g., if this batch put the call above the buffer size limit), then
|
1807
2081
|
// immediately create an LB call and delegate the batch to it. This
|
1808
2082
|
// avoids the overhead of unnecessarily allocating a CallAttempt
|
1809
2083
|
// object or caching any of the send op data.
|
1810
|
-
|
2084
|
+
// Note that we would ideally like to do this also on subsequent
|
2085
|
+
// attempts (e.g., if a batch puts the call above the buffer size
|
2086
|
+
// limit since the last attempt was complete), but in practice that's
|
2087
|
+
// not really worthwhile, because we will almost always have cached and
|
2088
|
+
// completed at least the send_initial_metadata op on the previous
|
2089
|
+
// attempt, which means that we'd need special logic to replay the
|
2090
|
+
// batch anyway, which is exactly what the CallAttempt object provides.
|
2091
|
+
// We also skip this optimization if perAttemptRecvTimeout is set in the
|
2092
|
+
// retry policy, because we need the code in CallAttempt to handle
|
2093
|
+
// the associated timer.
|
2094
|
+
if (num_attempts_completed_ == 0 && retry_committed_ &&
|
2095
|
+
(retry_policy_ == nullptr ||
|
2096
|
+
!retry_policy_->per_attempt_recv_timeout().has_value())) {
|
1811
2097
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1812
2098
|
gpr_log(GPR_INFO,
|
1813
2099
|
"chand=%p calld=%p: retry committed before first attempt; "
|
@@ -1819,7 +2105,9 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1819
2105
|
committed_call_->StartTransportStreamOpBatch(batch);
|
1820
2106
|
return;
|
1821
2107
|
}
|
1822
|
-
//
|
2108
|
+
// Otherwise, create a call attempt.
|
2109
|
+
// The attempt will automatically start any necessary replays or
|
2110
|
+
// pending batches.
|
1823
2111
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1824
2112
|
gpr_log(GPR_INFO, "chand=%p calld=%p: creating call attempt", chand_,
|
1825
2113
|
this);
|
@@ -1829,9 +2117,8 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
|
|
1829
2117
|
}
|
1830
2118
|
// Send batches to call attempt.
|
1831
2119
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1832
|
-
gpr_log(GPR_INFO,
|
1833
|
-
|
1834
|
-
chand_, this, call_attempt_.get(), call_attempt_->lb_call());
|
2120
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: starting batch on attempt=%p", chand_,
|
2121
|
+
this, call_attempt_.get());
|
1835
2122
|
}
|
1836
2123
|
call_attempt_->StartRetriableBatches();
|
1837
2124
|
}
|
@@ -1849,36 +2136,8 @@ RetryFilter::CallData::CreateLoadBalancedCall() {
|
|
1849
2136
|
}
|
1850
2137
|
|
1851
2138
|
void RetryFilter::CallData::CreateCallAttempt() {
|
1852
|
-
call_attempt_
|
2139
|
+
call_attempt_ = MakeRefCounted<CallAttempt>(this);
|
1853
2140
|
call_attempt_->StartRetriableBatches();
|
1854
|
-
// TODO(roth): When implementing hedging, change this to start a timer
|
1855
|
-
// for the next hedging attempt.
|
1856
|
-
}
|
1857
|
-
|
1858
|
-
namespace {
|
1859
|
-
|
1860
|
-
void StartBatchInCallCombiner(void* arg, grpc_error_handle /*ignored*/) {
|
1861
|
-
grpc_transport_stream_op_batch* batch =
|
1862
|
-
static_cast<grpc_transport_stream_op_batch*>(arg);
|
1863
|
-
auto* lb_call = static_cast<ClientChannel::LoadBalancedCall*>(
|
1864
|
-
batch->handler_private.extra_arg);
|
1865
|
-
// Note: This will release the call combiner.
|
1866
|
-
lb_call->StartTransportStreamOpBatch(batch);
|
1867
|
-
}
|
1868
|
-
|
1869
|
-
} // namespace
|
1870
|
-
|
1871
|
-
void RetryFilter::CallData::AddClosureForBatch(
|
1872
|
-
grpc_transport_stream_op_batch* batch, CallCombinerClosureList* closures) {
|
1873
|
-
batch->handler_private.extra_arg = call_attempt_->lb_call();
|
1874
|
-
GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner,
|
1875
|
-
batch, grpc_schedule_on_exec_ctx);
|
1876
|
-
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1877
|
-
gpr_log(GPR_INFO, "chand=%p calld=%p: starting batch on LB call: %s",
|
1878
|
-
chand_, this, grpc_transport_stream_op_batch_string(batch).c_str());
|
1879
|
-
}
|
1880
|
-
closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE,
|
1881
|
-
"start_batch_on_lb_call");
|
1882
2141
|
}
|
1883
2142
|
|
1884
2143
|
//
|
@@ -1943,7 +2202,7 @@ void RetryFilter::CallData::FreeCachedSendMessage(size_t idx) {
|
|
1943
2202
|
|
1944
2203
|
void RetryFilter::CallData::FreeCachedSendTrailingMetadata() {
|
1945
2204
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1946
|
-
gpr_log(GPR_INFO, "
|
2205
|
+
gpr_log(GPR_INFO, "chand=%p calld=%p: destroying send_trailing_metadata",
|
1947
2206
|
chand_, this);
|
1948
2207
|
}
|
1949
2208
|
grpc_metadata_batch_destroy(&send_trailing_metadata_);
|
@@ -1982,7 +2241,7 @@ RetryFilter::CallData::PendingBatch* RetryFilter::CallData::PendingBatchesAdd(
|
|
1982
2241
|
const size_t idx = GetBatchIndex(batch);
|
1983
2242
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
1984
2243
|
gpr_log(GPR_INFO,
|
1985
|
-
"
|
2244
|
+
"chand=%p calld=%p: adding pending batch at index %" PRIuPTR,
|
1986
2245
|
chand_, this, idx);
|
1987
2246
|
}
|
1988
2247
|
PendingBatch* pending = &pending_batches_[idx];
|
@@ -2006,6 +2265,9 @@ RetryFilter::CallData::PendingBatch* RetryFilter::CallData::PendingBatchesAdd(
|
|
2006
2265
|
if (batch->send_trailing_metadata) {
|
2007
2266
|
pending_send_trailing_metadata_ = true;
|
2008
2267
|
}
|
2268
|
+
// TODO(roth): When we implement hedging, if there are currently attempts
|
2269
|
+
// in flight, we will need to pick the one on which the max number of send
|
2270
|
+
// ops have already been sent, and we commit to that attempt.
|
2009
2271
|
if (GPR_UNLIKELY(bytes_buffered_for_retry_ >
|
2010
2272
|
chand_->per_rpc_retry_buffer_size_)) {
|
2011
2273
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
@@ -2126,18 +2388,15 @@ void RetryFilter::CallData::RetryCommit(CallAttempt* call_attempt) {
|
|
2126
2388
|
}
|
2127
2389
|
}
|
2128
2390
|
|
2129
|
-
void RetryFilter::CallData::
|
2391
|
+
void RetryFilter::CallData::StartRetryTimer(grpc_millis server_pushback_ms) {
|
2130
2392
|
// Reset call attempt.
|
2131
|
-
call_attempt_.reset();
|
2393
|
+
call_attempt_.reset(DEBUG_LOCATION, "StartRetryTimer");
|
2132
2394
|
// Compute backoff delay.
|
2133
2395
|
grpc_millis next_attempt_time;
|
2134
2396
|
if (server_pushback_ms >= 0) {
|
2135
2397
|
next_attempt_time = ExecCtx::Get()->Now() + server_pushback_ms;
|
2136
|
-
|
2398
|
+
retry_backoff_.Reset();
|
2137
2399
|
} else {
|
2138
|
-
if (num_attempts_completed_ == 1 || last_attempt_got_server_pushback_) {
|
2139
|
-
last_attempt_got_server_pushback_ = false;
|
2140
|
-
}
|
2141
2400
|
next_attempt_time = retry_backoff_.NextAttemptTime();
|
2142
2401
|
}
|
2143
2402
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
|
@@ -2148,23 +2407,25 @@ void RetryFilter::CallData::DoRetry(grpc_millis server_pushback_ms) {
|
|
2148
2407
|
// Schedule retry after computed delay.
|
2149
2408
|
GRPC_CLOSURE_INIT(&retry_closure_, OnRetryTimer, this, nullptr);
|
2150
2409
|
GRPC_CALL_STACK_REF(owning_call_, "OnRetryTimer");
|
2151
|
-
|
2152
|
-
canceller_ = new Canceller(this);
|
2410
|
+
retry_timer_pending_ = true;
|
2153
2411
|
grpc_timer_init(&retry_timer_, next_attempt_time, &retry_closure_);
|
2154
2412
|
}
|
2155
2413
|
|
2156
2414
|
void RetryFilter::CallData::OnRetryTimer(void* arg, grpc_error_handle error) {
|
2157
2415
|
auto* calld = static_cast<CallData*>(arg);
|
2158
|
-
|
2159
|
-
|
2160
|
-
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2164
|
-
|
2165
|
-
|
2166
|
-
|
2167
|
-
|
2416
|
+
GRPC_CLOSURE_INIT(&calld->retry_closure_, OnRetryTimerLocked, calld, nullptr);
|
2417
|
+
GRPC_CALL_COMBINER_START(calld->call_combiner_, &calld->retry_closure_,
|
2418
|
+
GRPC_ERROR_REF(error), "retry timer fired");
|
2419
|
+
}
|
2420
|
+
|
2421
|
+
void RetryFilter::CallData::OnRetryTimerLocked(void* arg,
|
2422
|
+
grpc_error_handle error) {
|
2423
|
+
auto* calld = static_cast<CallData*>(arg);
|
2424
|
+
if (error == GRPC_ERROR_NONE && calld->retry_timer_pending_) {
|
2425
|
+
calld->retry_timer_pending_ = false;
|
2426
|
+
calld->CreateCallAttempt();
|
2427
|
+
} else {
|
2428
|
+
GRPC_CALL_COMBINER_STOP(calld->call_combiner_, "retry timer cancelled");
|
2168
2429
|
}
|
2169
2430
|
GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnRetryTimer");
|
2170
2431
|
}
|