grpc 1.26.0 → 1.27.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of grpc might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Makefile +1654 -1519
- data/etc/roots.pem +44 -0
- data/include/grpc/grpc_security.h +37 -15
- data/include/grpc/grpc_security_constants.h +27 -0
- data/include/grpc/impl/codegen/grpc_types.h +14 -0
- data/include/grpc/impl/codegen/port_platform.h +1 -1
- data/src/core/ext/filters/client_channel/client_channel.cc +0 -20
- data/src/core/ext/filters/client_channel/http_proxy.cc +4 -4
- data/src/core/ext/filters/client_channel/lb_policy.cc +4 -3
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +191 -201
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc +89 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h +40 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_channel_secure.cc +3 -2
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc +88 -121
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +28 -57
- data/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +0 -7
- data/src/core/ext/filters/client_channel/lb_policy/xds/cds.cc +8 -9
- data/src/core/ext/filters/client_channel/lb_policy/xds/xds.cc +53 -34
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +18 -5
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +24 -19
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h +2 -1
- data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_fallback.cc +4 -2
- data/src/core/ext/filters/client_channel/server_address.cc +6 -9
- data/src/core/ext/filters/client_channel/server_address.h +3 -10
- data/src/core/ext/filters/client_channel/xds/xds_api.cc +394 -150
- data/src/core/ext/filters/client_channel/xds/xds_api.h +75 -35
- data/src/core/ext/filters/client_channel/xds/xds_bootstrap.cc +59 -22
- data/src/core/ext/filters/client_channel/xds/xds_bootstrap.h +13 -9
- data/src/core/ext/filters/client_channel/xds/xds_channel_secure.cc +8 -6
- data/src/core/ext/filters/client_channel/xds/xds_client.cc +456 -175
- data/src/core/ext/filters/client_channel/xds/xds_client.h +33 -21
- data/src/core/ext/filters/client_channel/xds/xds_client_stats.cc +5 -8
- data/src/core/ext/filters/client_channel/xds/xds_client_stats.h +18 -24
- data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +2 -2
- data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.c +13 -5
- data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.h +34 -0
- data/src/core/lib/channel/channelz.h +11 -1
- data/src/core/lib/gpr/time_precise.cc +1 -1
- data/src/core/lib/gprpp/optional.h +26 -0
- data/src/core/lib/gprpp/string_view.h +14 -10
- data/src/core/lib/iomgr/executor.cc +1 -1
- data/src/core/lib/iomgr/fork_posix.cc +4 -0
- data/src/core/lib/iomgr/poller/eventmanager_libuv.cc +87 -0
- data/src/core/lib/iomgr/poller/eventmanager_libuv.h +88 -0
- data/src/core/lib/iomgr/socket_utils_common_posix.cc +14 -0
- data/src/core/lib/iomgr/socket_utils_posix.h +12 -0
- data/src/core/lib/iomgr/tcp_custom.h +3 -0
- data/src/core/lib/iomgr/tcp_posix.cc +607 -56
- data/src/core/lib/iomgr/tcp_server_custom.cc +15 -2
- data/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +8 -0
- data/src/core/lib/json/json.h +11 -1
- data/src/core/lib/json/json_reader.cc +206 -28
- data/src/core/lib/json/json_writer.cc +111 -24
- data/src/core/lib/security/credentials/composite/composite_credentials.cc +7 -0
- data/src/core/lib/security/credentials/composite/composite_credentials.h +5 -1
- data/src/core/lib/security/credentials/credentials.h +10 -1
- data/src/core/lib/security/credentials/fake/fake_credentials.h +2 -1
- data/src/core/lib/security/credentials/oauth2/oauth2_credentials.cc +1 -1
- data/src/core/lib/security/credentials/plugin/plugin_credentials.cc +6 -4
- data/src/core/lib/security/credentials/plugin/plugin_credentials.h +2 -1
- data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.cc +20 -0
- data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.h +8 -0
- data/src/core/lib/security/credentials/tls/{spiffe_credentials.cc → tls_credentials.cc} +23 -24
- data/src/core/lib/security/credentials/tls/{spiffe_credentials.h → tls_credentials.h} +9 -9
- data/src/core/lib/security/security_connector/alts/alts_security_connector.cc +13 -0
- data/src/core/lib/security/security_connector/fake/fake_security_connector.cc +22 -2
- data/src/core/lib/security/security_connector/load_system_roots_fallback.cc +2 -2
- data/src/core/lib/security/security_connector/load_system_roots_linux.cc +2 -2
- data/src/core/lib/security/security_connector/local/local_security_connector.cc +30 -3
- data/src/core/lib/security/security_connector/ssl_utils.cc +45 -3
- data/src/core/lib/security/security_connector/ssl_utils.h +12 -0
- data/src/core/lib/security/security_connector/tls/{spiffe_security_connector.cc → tls_security_connector.cc} +82 -69
- data/src/core/lib/security/security_connector/tls/{spiffe_security_connector.h → tls_security_connector.h} +17 -18
- data/src/core/lib/security/transport/client_auth_filter.cc +33 -0
- data/src/core/lib/surface/completion_queue.cc +22 -1
- data/src/core/lib/surface/version.cc +1 -1
- data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc +11 -1
- data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.h +1 -1
- data/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc +3 -3
- data/src/core/tsi/fake_transport_security.cc +7 -3
- data/src/core/tsi/fake_transport_security.h +2 -0
- data/src/core/tsi/ssl_transport_security.cc +144 -8
- data/src/core/tsi/ssl_transport_security.h +15 -1
- data/src/core/tsi/transport_security.cc +13 -0
- data/src/core/tsi/transport_security_grpc.cc +2 -2
- data/src/core/tsi/transport_security_grpc.h +2 -2
- data/src/core/tsi/transport_security_interface.h +12 -0
- data/src/ruby/bin/math_pb.rb +5 -5
- data/src/ruby/ext/grpc/rb_call_credentials.c +4 -1
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +2 -0
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +4 -1
- data/src/ruby/lib/grpc/version.rb +1 -1
- data/src/ruby/pb/grpc/health/v1/health_pb.rb +3 -3
- data/src/ruby/pb/src/proto/grpc/testing/empty_pb.rb +1 -1
- data/src/ruby/pb/src/proto/grpc/testing/messages_pb.rb +23 -13
- data/third_party/abseil-cpp/absl/algorithm/algorithm.h +159 -0
- data/third_party/abseil-cpp/absl/base/attributes.h +609 -0
- data/third_party/abseil-cpp/absl/base/call_once.h +226 -0
- data/third_party/abseil-cpp/absl/base/casts.h +184 -0
- data/third_party/abseil-cpp/absl/base/config.h +622 -0
- data/third_party/abseil-cpp/absl/base/const_init.h +76 -0
- data/third_party/abseil-cpp/absl/base/dynamic_annotations.cc +129 -0
- data/third_party/abseil-cpp/absl/base/dynamic_annotations.h +389 -0
- data/third_party/abseil-cpp/absl/base/internal/atomic_hook.h +179 -0
- data/third_party/abseil-cpp/absl/base/internal/bits.h +218 -0
- data/third_party/abseil-cpp/absl/base/internal/cycleclock.cc +107 -0
- data/third_party/abseil-cpp/absl/base/internal/cycleclock.h +94 -0
- data/third_party/abseil-cpp/absl/base/internal/endian.h +266 -0
- data/third_party/abseil-cpp/absl/base/internal/hide_ptr.h +51 -0
- data/third_party/abseil-cpp/absl/base/internal/identity.h +37 -0
- data/third_party/abseil-cpp/absl/base/internal/inline_variable.h +107 -0
- data/third_party/abseil-cpp/absl/base/internal/invoke.h +187 -0
- data/third_party/abseil-cpp/absl/base/internal/low_level_scheduling.h +107 -0
- data/third_party/abseil-cpp/absl/base/internal/per_thread_tls.h +52 -0
- data/third_party/abseil-cpp/absl/base/internal/raw_logging.cc +237 -0
- data/third_party/abseil-cpp/absl/base/internal/raw_logging.h +179 -0
- data/third_party/abseil-cpp/absl/base/internal/scheduling_mode.h +58 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock.cc +233 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock.h +243 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock_akaros.inc +35 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock_linux.inc +67 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock_posix.inc +46 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock_wait.cc +81 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock_wait.h +93 -0
- data/third_party/abseil-cpp/absl/base/internal/spinlock_win32.inc +37 -0
- data/third_party/abseil-cpp/absl/base/internal/sysinfo.cc +414 -0
- data/third_party/abseil-cpp/absl/base/internal/sysinfo.h +66 -0
- data/third_party/abseil-cpp/absl/base/internal/thread_annotations.h +271 -0
- data/third_party/abseil-cpp/absl/base/internal/thread_identity.cc +140 -0
- data/third_party/abseil-cpp/absl/base/internal/thread_identity.h +250 -0
- data/third_party/abseil-cpp/absl/base/internal/throw_delegate.cc +108 -0
- data/third_party/abseil-cpp/absl/base/internal/throw_delegate.h +75 -0
- data/third_party/abseil-cpp/absl/base/internal/tsan_mutex_interface.h +66 -0
- data/third_party/abseil-cpp/absl/base/internal/unaligned_access.h +158 -0
- data/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock.cc +103 -0
- data/third_party/abseil-cpp/absl/base/internal/unscaledcycleclock.h +124 -0
- data/third_party/abseil-cpp/absl/base/log_severity.cc +27 -0
- data/third_party/abseil-cpp/absl/base/log_severity.h +121 -0
- data/third_party/abseil-cpp/absl/base/macros.h +220 -0
- data/third_party/abseil-cpp/absl/base/optimization.h +181 -0
- data/third_party/abseil-cpp/absl/base/options.h +214 -0
- data/third_party/abseil-cpp/absl/base/policy_checks.h +111 -0
- data/third_party/abseil-cpp/absl/base/port.h +26 -0
- data/third_party/abseil-cpp/absl/base/thread_annotations.h +280 -0
- data/third_party/abseil-cpp/absl/container/inlined_vector.h +848 -0
- data/third_party/abseil-cpp/absl/container/internal/compressed_tuple.h +265 -0
- data/third_party/abseil-cpp/absl/container/internal/inlined_vector.h +892 -0
- data/third_party/abseil-cpp/absl/memory/memory.h +695 -0
- data/third_party/abseil-cpp/absl/meta/type_traits.h +759 -0
- data/third_party/abseil-cpp/absl/numeric/int128.cc +404 -0
- data/third_party/abseil-cpp/absl/numeric/int128.h +1091 -0
- data/third_party/abseil-cpp/absl/numeric/int128_have_intrinsic.inc +302 -0
- data/third_party/abseil-cpp/absl/numeric/int128_no_intrinsic.inc +308 -0
- data/third_party/abseil-cpp/absl/strings/ascii.cc +200 -0
- data/third_party/abseil-cpp/absl/strings/ascii.h +241 -0
- data/third_party/abseil-cpp/absl/strings/charconv.cc +985 -0
- data/third_party/abseil-cpp/absl/strings/charconv.h +119 -0
- data/third_party/abseil-cpp/absl/strings/escaping.cc +949 -0
- data/third_party/abseil-cpp/absl/strings/escaping.h +164 -0
- data/third_party/abseil-cpp/absl/strings/internal/char_map.h +156 -0
- data/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc +359 -0
- data/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h +421 -0
- data/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc +504 -0
- data/third_party/abseil-cpp/absl/strings/internal/charconv_parse.h +99 -0
- data/third_party/abseil-cpp/absl/strings/internal/escaping.cc +180 -0
- data/third_party/abseil-cpp/absl/strings/internal/escaping.h +58 -0
- data/third_party/abseil-cpp/absl/strings/internal/memutil.cc +112 -0
- data/third_party/abseil-cpp/absl/strings/internal/memutil.h +148 -0
- data/third_party/abseil-cpp/absl/strings/internal/ostringstream.cc +36 -0
- data/third_party/abseil-cpp/absl/strings/internal/ostringstream.h +89 -0
- data/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h +73 -0
- data/third_party/abseil-cpp/absl/strings/internal/stl_type_traits.h +248 -0
- data/third_party/abseil-cpp/absl/strings/internal/str_join_internal.h +314 -0
- data/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h +455 -0
- data/third_party/abseil-cpp/absl/strings/internal/utf8.cc +53 -0
- data/third_party/abseil-cpp/absl/strings/internal/utf8.h +50 -0
- data/third_party/abseil-cpp/absl/strings/match.cc +40 -0
- data/third_party/abseil-cpp/absl/strings/match.h +90 -0
- data/third_party/abseil-cpp/absl/strings/numbers.cc +916 -0
- data/third_party/abseil-cpp/absl/strings/numbers.h +263 -0
- data/third_party/abseil-cpp/absl/strings/str_cat.cc +246 -0
- data/third_party/abseil-cpp/absl/strings/str_cat.h +408 -0
- data/third_party/abseil-cpp/absl/strings/str_join.h +293 -0
- data/third_party/abseil-cpp/absl/strings/str_replace.cc +82 -0
- data/third_party/abseil-cpp/absl/strings/str_replace.h +219 -0
- data/third_party/abseil-cpp/absl/strings/str_split.cc +139 -0
- data/third_party/abseil-cpp/absl/strings/str_split.h +513 -0
- data/third_party/abseil-cpp/absl/strings/string_view.cc +235 -0
- data/third_party/abseil-cpp/absl/strings/string_view.h +615 -0
- data/third_party/abseil-cpp/absl/strings/strip.h +91 -0
- data/third_party/abseil-cpp/absl/strings/substitute.cc +171 -0
- data/third_party/abseil-cpp/absl/strings/substitute.h +693 -0
- data/third_party/abseil-cpp/absl/types/bad_optional_access.cc +48 -0
- data/third_party/abseil-cpp/absl/types/bad_optional_access.h +78 -0
- data/third_party/abseil-cpp/absl/types/internal/optional.h +396 -0
- data/third_party/abseil-cpp/absl/types/internal/span.h +128 -0
- data/third_party/abseil-cpp/absl/types/optional.h +776 -0
- data/third_party/abseil-cpp/absl/types/span.h +713 -0
- data/third_party/abseil-cpp/absl/utility/utility.h +350 -0
- data/third_party/upb/upb/decode.c +4 -0
- data/third_party/upb/upb/port.c +0 -1
- data/third_party/upb/upb/port_def.inc +1 -3
- data/third_party/upb/upb/table.c +2 -1
- metadata +147 -43
- data/src/core/lib/json/json_common.h +0 -34
- data/src/core/lib/json/json_reader.h +0 -146
- data/src/core/lib/json/json_string.cc +0 -367
- data/src/core/lib/json/json_writer.h +0 -84
@@ -121,6 +121,16 @@ class StringView final {
|
|
121
121
|
size());
|
122
122
|
}
|
123
123
|
|
124
|
+
// Compares with other.
|
125
|
+
inline int compare(StringView other) {
|
126
|
+
const size_t len = GPR_MIN(size(), other.size());
|
127
|
+
const int ret = strncmp(data(), other.data(), len);
|
128
|
+
if (ret != 0) return ret;
|
129
|
+
if (size() == other.size()) return 0;
|
130
|
+
if (size() < other.size()) return -1;
|
131
|
+
return 1;
|
132
|
+
}
|
133
|
+
|
124
134
|
private:
|
125
135
|
const char* ptr_;
|
126
136
|
size_t size_;
|
@@ -133,6 +143,10 @@ inline bool operator==(StringView lhs, StringView rhs) {
|
|
133
143
|
|
134
144
|
inline bool operator!=(StringView lhs, StringView rhs) { return !(lhs == rhs); }
|
135
145
|
|
146
|
+
inline bool operator<(StringView lhs, StringView rhs) {
|
147
|
+
return lhs.compare(rhs) < 0;
|
148
|
+
}
|
149
|
+
|
136
150
|
#endif // GRPC_USE_ABSL
|
137
151
|
|
138
152
|
// Converts grpc_slice to StringView.
|
@@ -150,16 +164,6 @@ inline grpc_core::UniquePtr<char> StringViewToCString(const StringView sv) {
|
|
150
164
|
return grpc_core::UniquePtr<char>(str);
|
151
165
|
}
|
152
166
|
|
153
|
-
// Compares lhs and rhs.
|
154
|
-
inline int StringViewCmp(const StringView lhs, const StringView rhs) {
|
155
|
-
const size_t len = GPR_MIN(lhs.size(), rhs.size());
|
156
|
-
const int ret = strncmp(lhs.data(), rhs.data(), len);
|
157
|
-
if (ret != 0) return ret;
|
158
|
-
if (lhs.size() == rhs.size()) return 0;
|
159
|
-
if (lhs.size() < rhs.size()) return -1;
|
160
|
-
return 1;
|
161
|
-
}
|
162
|
-
|
163
167
|
} // namespace grpc_core
|
164
168
|
|
165
169
|
#endif /* GRPC_CORE_LIB_GPRPP_STRING_VIEW_H */
|
@@ -143,7 +143,7 @@ void Executor::SetThreading(bool threading) {
|
|
143
143
|
|
144
144
|
if (threading) {
|
145
145
|
if (curr_num_threads > 0) {
|
146
|
-
EXECUTOR_TRACE("(%s) SetThreading(true). curr_num_threads
|
146
|
+
EXECUTOR_TRACE("(%s) SetThreading(true). curr_num_threads > 0", name_);
|
147
147
|
return;
|
148
148
|
}
|
149
149
|
|
@@ -0,0 +1,87 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright 2019 gRPC authors.
|
4
|
+
*
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
* you may not use this file except in compliance with the License.
|
7
|
+
* You may obtain a copy of the License at
|
8
|
+
*
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
*
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
* See the License for the specific language governing permissions and
|
15
|
+
* limitations under the License.
|
16
|
+
*
|
17
|
+
*/
|
18
|
+
|
19
|
+
#include <grpc/support/port_platform.h>
|
20
|
+
|
21
|
+
#include "src/core/lib/iomgr/poller/eventmanager_libuv.h"
|
22
|
+
|
23
|
+
#include <grpc/support/time.h>
|
24
|
+
|
25
|
+
grpc::experimental::LibuvEventManager::Options::Options() : num_workers_(-1) {}
|
26
|
+
grpc::experimental::LibuvEventManager::Options::Options(int num_workers)
|
27
|
+
: num_workers_(num_workers) {}
|
28
|
+
|
29
|
+
grpc::experimental::LibuvEventManager::LibuvEventManager(const Options& options)
|
30
|
+
: options_(options) {
|
31
|
+
int num_workers = options_.num_workers();
|
32
|
+
// Number of workers can't be 0 if we do not accept thread donation.
|
33
|
+
// TODO(guantaol): replaces the hard-coded number with a flag.
|
34
|
+
if (num_workers <= 0) num_workers = 32;
|
35
|
+
|
36
|
+
for (int i = 0; i < num_workers; i++) {
|
37
|
+
workers_.emplace_back(
|
38
|
+
options_.thread_name_prefix().c_str(),
|
39
|
+
[](void* em) { static_cast<LibuvEventManager*>(em)->RunWorkerLoop(); },
|
40
|
+
this);
|
41
|
+
workers_.back().Start();
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
grpc::experimental::LibuvEventManager::~LibuvEventManager() {
|
46
|
+
Shutdown();
|
47
|
+
for (auto& th : workers_) {
|
48
|
+
th.Join();
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
void grpc::experimental::LibuvEventManager::RunWorkerLoop() {
|
53
|
+
while (true) {
|
54
|
+
// TODO(guantaol): extend the worker loop with real work.
|
55
|
+
if (ShouldStop()) return;
|
56
|
+
gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
|
57
|
+
gpr_time_from_micros(10, GPR_TIMESPAN)));
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
bool grpc::experimental::LibuvEventManager::ShouldStop() {
|
62
|
+
return should_stop_.Load(grpc_core::MemoryOrder::ACQUIRE) != 0;
|
63
|
+
}
|
64
|
+
|
65
|
+
void grpc::experimental::LibuvEventManager::Shutdown() {
|
66
|
+
if (should_stop_.Load(grpc_core::MemoryOrder::ACQUIRE))
|
67
|
+
return; // Already shut down.
|
68
|
+
|
69
|
+
{
|
70
|
+
grpc_core::MutexLock lock(&shutdown_mu_);
|
71
|
+
while (shutdown_refcount_.Load(grpc_core::MemoryOrder::ACQUIRE) > 0) {
|
72
|
+
shutdown_cv_.Wait(&shutdown_mu_);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
should_stop_.Store(true, grpc_core::MemoryOrder::RELEASE);
|
76
|
+
}
|
77
|
+
|
78
|
+
void grpc::experimental::LibuvEventManager::ShutdownRef() {
|
79
|
+
shutdown_refcount_.FetchAdd(1, grpc_core::MemoryOrder::RELAXED);
|
80
|
+
}
|
81
|
+
|
82
|
+
void grpc::experimental::LibuvEventManager::ShutdownUnref() {
|
83
|
+
if (shutdown_refcount_.FetchSub(1, grpc_core::MemoryOrder::ACQ_REL) == 1) {
|
84
|
+
grpc_core::MutexLock lock(&shutdown_mu_);
|
85
|
+
shutdown_cv_.Signal();
|
86
|
+
}
|
87
|
+
}
|
@@ -0,0 +1,88 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright 2019 gRPC authors.
|
4
|
+
*
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
* you may not use this file except in compliance with the License.
|
7
|
+
* You may obtain a copy of the License at
|
8
|
+
*
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
*
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
* See the License for the specific language governing permissions and
|
15
|
+
* limitations under the License.
|
16
|
+
*
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifndef GRPC_CORE_LIB_IOMGR_POLLER_EVENTMANAGER_LIBUV_H
|
20
|
+
#define GRPC_CORE_LIB_IOMGR_POLLER_EVENTMANAGER_LIBUV_H
|
21
|
+
|
22
|
+
#include <grpc/support/port_platform.h>
|
23
|
+
|
24
|
+
#include <string>
|
25
|
+
#include <vector>
|
26
|
+
|
27
|
+
#include "src/core/lib/gprpp/atomic.h"
|
28
|
+
#include "src/core/lib/gprpp/sync.h"
|
29
|
+
#include "src/core/lib/gprpp/thd.h"
|
30
|
+
|
31
|
+
namespace grpc {
|
32
|
+
namespace experimental {
|
33
|
+
|
34
|
+
class LibuvEventManager {
|
35
|
+
public:
|
36
|
+
class Options {
|
37
|
+
public:
|
38
|
+
Options();
|
39
|
+
Options(int num_workers);
|
40
|
+
|
41
|
+
int num_workers() const { return num_workers_; }
|
42
|
+
void set_num_workers(int num) { num_workers_ = num; }
|
43
|
+
|
44
|
+
const std::string& thread_name_prefix() const {
|
45
|
+
return thread_name_prefix_;
|
46
|
+
}
|
47
|
+
void set_thread_name_prefix(const std::string& name) {
|
48
|
+
thread_name_prefix_ = name;
|
49
|
+
}
|
50
|
+
|
51
|
+
private:
|
52
|
+
// Number of worker threads to create at startup. If less than 0, uses the
|
53
|
+
// default value of 32.
|
54
|
+
int num_workers_;
|
55
|
+
// Name prefix used for worker.
|
56
|
+
std::string thread_name_prefix_;
|
57
|
+
};
|
58
|
+
|
59
|
+
explicit LibuvEventManager(const Options& options);
|
60
|
+
virtual ~LibuvEventManager();
|
61
|
+
|
62
|
+
void Shutdown();
|
63
|
+
void ShutdownRef();
|
64
|
+
void ShutdownUnref();
|
65
|
+
|
66
|
+
private:
|
67
|
+
// Function run by the worker threads.
|
68
|
+
void RunWorkerLoop();
|
69
|
+
|
70
|
+
// Whether the EventManager has been shut down.
|
71
|
+
bool ShouldStop();
|
72
|
+
|
73
|
+
const Options options_;
|
74
|
+
// Whether the EventManager workers should be stopped.
|
75
|
+
grpc_core::Atomic<bool> should_stop_{false};
|
76
|
+
// A refcount preventing the EventManager from shutdown.
|
77
|
+
grpc_core::Atomic<int> shutdown_refcount_{0};
|
78
|
+
// Worker threads of the EventManager.
|
79
|
+
std::vector<grpc_core::Thread> workers_;
|
80
|
+
// Mutex and condition variable used for shutdown.
|
81
|
+
grpc_core::Mutex shutdown_mu_;
|
82
|
+
grpc_core::CondVar shutdown_cv_;
|
83
|
+
};
|
84
|
+
|
85
|
+
} // namespace experimental
|
86
|
+
} // namespace grpc
|
87
|
+
|
88
|
+
#endif /* GRPC_CORE_LIB_IOMGR_POLLER_EVENTMANAGER_LIBUV_H */
|
@@ -50,6 +50,20 @@
|
|
50
50
|
#include "src/core/lib/iomgr/sockaddr.h"
|
51
51
|
#include "src/core/lib/iomgr/sockaddr_utils.h"
|
52
52
|
|
53
|
+
/* set a socket to use zerocopy */
|
54
|
+
grpc_error* grpc_set_socket_zerocopy(int fd) {
|
55
|
+
#ifdef GRPC_LINUX_ERRQUEUE
|
56
|
+
const int enable = 1;
|
57
|
+
auto err = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &enable, sizeof(enable));
|
58
|
+
if (err != 0) {
|
59
|
+
return GRPC_OS_ERROR(errno, "setsockopt(SO_ZEROCOPY)");
|
60
|
+
}
|
61
|
+
return GRPC_ERROR_NONE;
|
62
|
+
#else
|
63
|
+
return GRPC_OS_ERROR(ENOSYS, "setsockopt(SO_ZEROCOPY)");
|
64
|
+
#endif
|
65
|
+
}
|
66
|
+
|
53
67
|
/* set a socket to non blocking mode */
|
54
68
|
grpc_error* grpc_set_socket_nonblocking(int fd, int non_blocking) {
|
55
69
|
int oldflags = fcntl(fd, F_GETFL, 0);
|
@@ -31,10 +31,22 @@
|
|
31
31
|
#include "src/core/lib/iomgr/socket_factory_posix.h"
|
32
32
|
#include "src/core/lib/iomgr/socket_mutator.h"
|
33
33
|
|
34
|
+
#ifdef GRPC_LINUX_ERRQUEUE
|
35
|
+
#ifndef SO_ZEROCOPY
|
36
|
+
#define SO_ZEROCOPY 60
|
37
|
+
#endif
|
38
|
+
#ifndef SO_EE_ORIGIN_ZEROCOPY
|
39
|
+
#define SO_EE_ORIGIN_ZEROCOPY 5
|
40
|
+
#endif
|
41
|
+
#endif /* ifdef GRPC_LINUX_ERRQUEUE */
|
42
|
+
|
34
43
|
/* a wrapper for accept or accept4 */
|
35
44
|
int grpc_accept4(int sockfd, grpc_resolved_address* resolved_addr, int nonblock,
|
36
45
|
int cloexec);
|
37
46
|
|
47
|
+
/* set a socket to use zerocopy */
|
48
|
+
grpc_error* grpc_set_socket_zerocopy(int fd);
|
49
|
+
|
38
50
|
/* set a socket to non blocking mode */
|
39
51
|
grpc_error* grpc_set_socket_nonblocking(int fd, int non_blocking);
|
40
52
|
|
@@ -24,6 +24,9 @@
|
|
24
24
|
#include "src/core/lib/iomgr/endpoint.h"
|
25
25
|
#include "src/core/lib/iomgr/sockaddr.h"
|
26
26
|
|
27
|
+
// Same number as the micro of SO_REUSEPORT in kernel
|
28
|
+
#define GRPC_CUSTOM_SOCKET_OPT_SO_REUSEPORT (0x00000200u)
|
29
|
+
|
27
30
|
typedef struct grpc_tcp_listener grpc_tcp_listener;
|
28
31
|
typedef struct grpc_custom_tcp_connect grpc_custom_tcp_connect;
|
29
32
|
|
@@ -36,6 +36,7 @@
|
|
36
36
|
#include <sys/types.h>
|
37
37
|
#include <unistd.h>
|
38
38
|
#include <algorithm>
|
39
|
+
#include <unordered_map>
|
39
40
|
|
40
41
|
#include <grpc/slice.h>
|
41
42
|
#include <grpc/support/alloc.h>
|
@@ -49,9 +50,11 @@
|
|
49
50
|
#include "src/core/lib/debug/trace.h"
|
50
51
|
#include "src/core/lib/gpr/string.h"
|
51
52
|
#include "src/core/lib/gpr/useful.h"
|
53
|
+
#include "src/core/lib/gprpp/sync.h"
|
52
54
|
#include "src/core/lib/iomgr/buffer_list.h"
|
53
55
|
#include "src/core/lib/iomgr/ev_posix.h"
|
54
56
|
#include "src/core/lib/iomgr/executor.h"
|
57
|
+
#include "src/core/lib/iomgr/socket_utils_posix.h"
|
55
58
|
#include "src/core/lib/profiling/timers.h"
|
56
59
|
#include "src/core/lib/slice/slice_internal.h"
|
57
60
|
#include "src/core/lib/slice/slice_string_helpers.h"
|
@@ -71,6 +74,15 @@
|
|
71
74
|
#define SENDMSG_FLAGS 0
|
72
75
|
#endif
|
73
76
|
|
77
|
+
// TCP zero copy sendmsg flag.
|
78
|
+
// NB: We define this here as a fallback in case we're using an older set of
|
79
|
+
// library headers that has not defined MSG_ZEROCOPY. Since this constant is
|
80
|
+
// part of the kernel, we are guaranteed it will never change/disagree so
|
81
|
+
// defining it here is safe.
|
82
|
+
#ifndef MSG_ZEROCOPY
|
83
|
+
#define MSG_ZEROCOPY 0x4000000
|
84
|
+
#endif
|
85
|
+
|
74
86
|
#ifdef GRPC_MSG_IOVLEN_TYPE
|
75
87
|
typedef GRPC_MSG_IOVLEN_TYPE msg_iovlen_type;
|
76
88
|
#else
|
@@ -79,6 +91,264 @@ typedef size_t msg_iovlen_type;
|
|
79
91
|
|
80
92
|
extern grpc_core::TraceFlag grpc_tcp_trace;
|
81
93
|
|
94
|
+
namespace grpc_core {
|
95
|
+
|
96
|
+
class TcpZerocopySendRecord {
|
97
|
+
public:
|
98
|
+
TcpZerocopySendRecord() { grpc_slice_buffer_init(&buf_); }
|
99
|
+
|
100
|
+
~TcpZerocopySendRecord() {
|
101
|
+
AssertEmpty();
|
102
|
+
grpc_slice_buffer_destroy_internal(&buf_);
|
103
|
+
}
|
104
|
+
|
105
|
+
// Given the slices that we wish to send, and the current offset into the
|
106
|
+
// slice buffer (indicating which have already been sent), populate an iovec
|
107
|
+
// array that will be used for a zerocopy enabled sendmsg().
|
108
|
+
msg_iovlen_type PopulateIovs(size_t* unwind_slice_idx,
|
109
|
+
size_t* unwind_byte_idx, size_t* sending_length,
|
110
|
+
iovec* iov);
|
111
|
+
|
112
|
+
// A sendmsg() may not be able to send the bytes that we requested at this
|
113
|
+
// time, returning EAGAIN (possibly due to backpressure). In this case,
|
114
|
+
// unwind the offset into the slice buffer so we retry sending these bytes.
|
115
|
+
void UnwindIfThrottled(size_t unwind_slice_idx, size_t unwind_byte_idx) {
|
116
|
+
out_offset_.byte_idx = unwind_byte_idx;
|
117
|
+
out_offset_.slice_idx = unwind_slice_idx;
|
118
|
+
}
|
119
|
+
|
120
|
+
// Update the offset into the slice buffer based on how much we wanted to sent
|
121
|
+
// vs. what sendmsg() actually sent (which may be lower, possibly due to
|
122
|
+
// backpressure).
|
123
|
+
void UpdateOffsetForBytesSent(size_t sending_length, size_t actually_sent);
|
124
|
+
|
125
|
+
// Indicates whether all underlying data has been sent or not.
|
126
|
+
bool AllSlicesSent() { return out_offset_.slice_idx == buf_.count; }
|
127
|
+
|
128
|
+
// Reset this structure for a new tcp_write() with zerocopy.
|
129
|
+
void PrepareForSends(grpc_slice_buffer* slices_to_send) {
|
130
|
+
AssertEmpty();
|
131
|
+
out_offset_.slice_idx = 0;
|
132
|
+
out_offset_.byte_idx = 0;
|
133
|
+
grpc_slice_buffer_swap(slices_to_send, &buf_);
|
134
|
+
Ref();
|
135
|
+
}
|
136
|
+
|
137
|
+
// References: 1 reference per sendmsg(), and 1 for the tcp_write().
|
138
|
+
void Ref() { ref_.FetchAdd(1, MemoryOrder::RELAXED); }
|
139
|
+
|
140
|
+
// Unref: called when we get an error queue notification for a sendmsg(), if a
|
141
|
+
// sendmsg() failed or when tcp_write() is done.
|
142
|
+
bool Unref() {
|
143
|
+
const intptr_t prior = ref_.FetchSub(1, MemoryOrder::ACQ_REL);
|
144
|
+
GPR_DEBUG_ASSERT(prior > 0);
|
145
|
+
if (prior == 1) {
|
146
|
+
AllSendsComplete();
|
147
|
+
return true;
|
148
|
+
}
|
149
|
+
return false;
|
150
|
+
}
|
151
|
+
|
152
|
+
private:
|
153
|
+
struct OutgoingOffset {
|
154
|
+
size_t slice_idx = 0;
|
155
|
+
size_t byte_idx = 0;
|
156
|
+
};
|
157
|
+
|
158
|
+
void AssertEmpty() {
|
159
|
+
GPR_DEBUG_ASSERT(buf_.count == 0);
|
160
|
+
GPR_DEBUG_ASSERT(buf_.length == 0);
|
161
|
+
GPR_DEBUG_ASSERT(ref_.Load(MemoryOrder::RELAXED) == 0);
|
162
|
+
}
|
163
|
+
|
164
|
+
// When all sendmsg() calls associated with this tcp_write() have been
|
165
|
+
// completed (ie. we have received the notifications for each sequence number
|
166
|
+
// for each sendmsg()) and all reference counts have been dropped, drop our
|
167
|
+
// reference to the underlying data since we no longer need it.
|
168
|
+
void AllSendsComplete() {
|
169
|
+
GPR_DEBUG_ASSERT(ref_.Load(MemoryOrder::RELAXED) == 0);
|
170
|
+
grpc_slice_buffer_reset_and_unref_internal(&buf_);
|
171
|
+
}
|
172
|
+
|
173
|
+
grpc_slice_buffer buf_;
|
174
|
+
Atomic<intptr_t> ref_;
|
175
|
+
OutgoingOffset out_offset_;
|
176
|
+
};
|
177
|
+
|
178
|
+
class TcpZerocopySendCtx {
|
179
|
+
public:
|
180
|
+
static constexpr int kDefaultMaxSends = 4;
|
181
|
+
static constexpr size_t kDefaultSendBytesThreshold = 16 * 1024; // 16KB
|
182
|
+
|
183
|
+
TcpZerocopySendCtx(int max_sends = kDefaultMaxSends,
|
184
|
+
size_t send_bytes_threshold = kDefaultSendBytesThreshold)
|
185
|
+
: max_sends_(max_sends),
|
186
|
+
free_send_records_size_(max_sends),
|
187
|
+
threshold_bytes_(send_bytes_threshold) {
|
188
|
+
send_records_ = static_cast<TcpZerocopySendRecord*>(
|
189
|
+
gpr_malloc(max_sends * sizeof(*send_records_)));
|
190
|
+
free_send_records_ = static_cast<TcpZerocopySendRecord**>(
|
191
|
+
gpr_malloc(max_sends * sizeof(*free_send_records_)));
|
192
|
+
if (send_records_ == nullptr || free_send_records_ == nullptr) {
|
193
|
+
gpr_free(send_records_);
|
194
|
+
gpr_free(free_send_records_);
|
195
|
+
gpr_log(GPR_INFO, "Disabling TCP TX zerocopy due to memory pressure.\n");
|
196
|
+
memory_limited_ = true;
|
197
|
+
} else {
|
198
|
+
for (int idx = 0; idx < max_sends_; ++idx) {
|
199
|
+
new (send_records_ + idx) TcpZerocopySendRecord();
|
200
|
+
free_send_records_[idx] = send_records_ + idx;
|
201
|
+
}
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
~TcpZerocopySendCtx() {
|
206
|
+
if (send_records_ != nullptr) {
|
207
|
+
for (int idx = 0; idx < max_sends_; ++idx) {
|
208
|
+
send_records_[idx].~TcpZerocopySendRecord();
|
209
|
+
}
|
210
|
+
}
|
211
|
+
gpr_free(send_records_);
|
212
|
+
gpr_free(free_send_records_);
|
213
|
+
}
|
214
|
+
|
215
|
+
// True if we were unable to allocate the various bookkeeping structures at
|
216
|
+
// transport initialization time. If memory limited, we do not zerocopy.
|
217
|
+
bool memory_limited() const { return memory_limited_; }
|
218
|
+
|
219
|
+
// TCP send zerocopy maintains an implicit sequence number for every
|
220
|
+
// successful sendmsg() with zerocopy enabled; the kernel later gives us an
|
221
|
+
// error queue notification with this sequence number indicating that the
|
222
|
+
// underlying data buffers that we sent can now be released. Once that
|
223
|
+
// notification is received, we can release the buffers associated with this
|
224
|
+
// zerocopy send record. Here, we associate the sequence number with the data
|
225
|
+
// buffers that were sent with the corresponding call to sendmsg().
|
226
|
+
void NoteSend(TcpZerocopySendRecord* record) {
|
227
|
+
record->Ref();
|
228
|
+
AssociateSeqWithSendRecord(last_send_, record);
|
229
|
+
++last_send_;
|
230
|
+
}
|
231
|
+
|
232
|
+
// If sendmsg() actually failed, though, we need to revert the sequence number
|
233
|
+
// that we speculatively bumped before calling sendmsg(). Note that we bump
|
234
|
+
// this sequence number and perform relevant bookkeeping (see: NoteSend())
|
235
|
+
// *before* calling sendmsg() since, if we called it *after* sendmsg(), then
|
236
|
+
// there is a possible race with the release notification which could occur on
|
237
|
+
// another thread before we do the necessary bookkeeping. Hence, calling
|
238
|
+
// NoteSend() *before* sendmsg() and implementing an undo function is needed.
|
239
|
+
void UndoSend() {
|
240
|
+
--last_send_;
|
241
|
+
if (ReleaseSendRecord(last_send_)->Unref()) {
|
242
|
+
// We should still be holding the ref taken by tcp_write().
|
243
|
+
GPR_DEBUG_ASSERT(0);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
// Simply associate this send record (and the underlying sent data buffers)
|
248
|
+
// with the implicit sequence number for this zerocopy sendmsg().
|
249
|
+
void AssociateSeqWithSendRecord(uint32_t seq, TcpZerocopySendRecord* record) {
|
250
|
+
MutexLock guard(&lock_);
|
251
|
+
ctx_lookup_.emplace(seq, record);
|
252
|
+
}
|
253
|
+
|
254
|
+
// Get a send record for a send that we wish to do with zerocopy.
|
255
|
+
TcpZerocopySendRecord* GetSendRecord() {
|
256
|
+
MutexLock guard(&lock_);
|
257
|
+
return TryGetSendRecordLocked();
|
258
|
+
}
|
259
|
+
|
260
|
+
// A given send record corresponds to a single tcp_write() with zerocopy
|
261
|
+
// enabled. This can result in several sendmsg() calls to flush all of the
|
262
|
+
// data to wire. Each sendmsg() takes a reference on the
|
263
|
+
// TcpZerocopySendRecord, and corresponds to a single sequence number.
|
264
|
+
// ReleaseSendRecord releases a reference on TcpZerocopySendRecord for a
|
265
|
+
// single sequence number. This is called either when we receive the relevant
|
266
|
+
// error queue notification (saying that we can discard the underlying
|
267
|
+
// buffers for this sendmsg()) is received from the kernel - or, in case
|
268
|
+
// sendmsg() was unsuccessful to begin with.
|
269
|
+
TcpZerocopySendRecord* ReleaseSendRecord(uint32_t seq) {
|
270
|
+
MutexLock guard(&lock_);
|
271
|
+
return ReleaseSendRecordLocked(seq);
|
272
|
+
}
|
273
|
+
|
274
|
+
// After all the references to a TcpZerocopySendRecord are released, we can
|
275
|
+
// add it back to the pool (of size max_sends_). Note that we can only have
|
276
|
+
// max_sends_ tcp_write() instances with zerocopy enabled in flight at the
|
277
|
+
// same time.
|
278
|
+
void PutSendRecord(TcpZerocopySendRecord* record) {
|
279
|
+
GPR_DEBUG_ASSERT(record >= send_records_ &&
|
280
|
+
record < send_records_ + max_sends_);
|
281
|
+
MutexLock guard(&lock_);
|
282
|
+
PutSendRecordLocked(record);
|
283
|
+
}
|
284
|
+
|
285
|
+
// Indicate that we are disposing of this zerocopy context. This indicator
|
286
|
+
// will prevent new zerocopy writes from being issued.
|
287
|
+
void Shutdown() { shutdown_.Store(true, MemoryOrder::RELEASE); }
|
288
|
+
|
289
|
+
// Indicates that there are no inflight tcp_write() instances with zerocopy
|
290
|
+
// enabled.
|
291
|
+
bool AllSendRecordsEmpty() {
|
292
|
+
MutexLock guard(&lock_);
|
293
|
+
return free_send_records_size_ == max_sends_;
|
294
|
+
}
|
295
|
+
|
296
|
+
bool enabled() const { return enabled_; }
|
297
|
+
|
298
|
+
void set_enabled(bool enabled) {
|
299
|
+
GPR_DEBUG_ASSERT(!enabled || !memory_limited());
|
300
|
+
enabled_ = enabled;
|
301
|
+
}
|
302
|
+
|
303
|
+
// Only use zerocopy if we are sending at least this many bytes. The
|
304
|
+
// additional overhead of reading the error queue for notifications means that
|
305
|
+
// zerocopy is not useful for small transfers.
|
306
|
+
size_t threshold_bytes() const { return threshold_bytes_; }
|
307
|
+
|
308
|
+
private:
|
309
|
+
TcpZerocopySendRecord* ReleaseSendRecordLocked(uint32_t seq) {
|
310
|
+
auto iter = ctx_lookup_.find(seq);
|
311
|
+
GPR_DEBUG_ASSERT(iter != ctx_lookup_.end());
|
312
|
+
TcpZerocopySendRecord* record = iter->second;
|
313
|
+
ctx_lookup_.erase(iter);
|
314
|
+
return record;
|
315
|
+
}
|
316
|
+
|
317
|
+
TcpZerocopySendRecord* TryGetSendRecordLocked() {
|
318
|
+
if (shutdown_.Load(MemoryOrder::ACQUIRE)) {
|
319
|
+
return nullptr;
|
320
|
+
}
|
321
|
+
if (free_send_records_size_ == 0) {
|
322
|
+
return nullptr;
|
323
|
+
}
|
324
|
+
free_send_records_size_--;
|
325
|
+
return free_send_records_[free_send_records_size_];
|
326
|
+
}
|
327
|
+
|
328
|
+
void PutSendRecordLocked(TcpZerocopySendRecord* record) {
|
329
|
+
GPR_DEBUG_ASSERT(free_send_records_size_ < max_sends_);
|
330
|
+
free_send_records_[free_send_records_size_] = record;
|
331
|
+
free_send_records_size_++;
|
332
|
+
}
|
333
|
+
|
334
|
+
TcpZerocopySendRecord* send_records_;
|
335
|
+
TcpZerocopySendRecord** free_send_records_;
|
336
|
+
int max_sends_;
|
337
|
+
int free_send_records_size_;
|
338
|
+
Mutex lock_;
|
339
|
+
uint32_t last_send_ = 0;
|
340
|
+
Atomic<bool> shutdown_;
|
341
|
+
bool enabled_ = false;
|
342
|
+
size_t threshold_bytes_ = kDefaultSendBytesThreshold;
|
343
|
+
std::unordered_map<uint32_t, TcpZerocopySendRecord*> ctx_lookup_;
|
344
|
+
bool memory_limited_ = false;
|
345
|
+
};
|
346
|
+
|
347
|
+
} // namespace grpc_core
|
348
|
+
|
349
|
+
using grpc_core::TcpZerocopySendCtx;
|
350
|
+
using grpc_core::TcpZerocopySendRecord;
|
351
|
+
|
82
352
|
namespace {
|
83
353
|
struct grpc_tcp {
|
84
354
|
grpc_endpoint base;
|
@@ -142,6 +412,8 @@ struct grpc_tcp {
|
|
142
412
|
bool ts_capable; /* Cache whether we can set timestamping options */
|
143
413
|
gpr_atm stop_error_notification; /* Set to 1 if we do not want to be notified
|
144
414
|
on errors anymore */
|
415
|
+
TcpZerocopySendCtx tcp_zerocopy_send_ctx;
|
416
|
+
TcpZerocopySendRecord* current_zerocopy_send = nullptr;
|
145
417
|
};
|
146
418
|
|
147
419
|
struct backup_poller {
|
@@ -151,6 +423,8 @@ struct backup_poller {
|
|
151
423
|
|
152
424
|
} // namespace
|
153
425
|
|
426
|
+
static void ZerocopyDisableAndWaitForRemaining(grpc_tcp* tcp);
|
427
|
+
|
154
428
|
#define BACKUP_POLLER_POLLSET(b) ((grpc_pollset*)((b) + 1))
|
155
429
|
|
156
430
|
static gpr_atm g_uncovered_notifications_pending;
|
@@ -339,6 +613,7 @@ static void tcp_handle_write(void* arg /* grpc_tcp */, grpc_error* error);
|
|
339
613
|
|
340
614
|
static void tcp_shutdown(grpc_endpoint* ep, grpc_error* why) {
|
341
615
|
grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep);
|
616
|
+
ZerocopyDisableAndWaitForRemaining(tcp);
|
342
617
|
grpc_fd_shutdown(tcp->em_fd, why);
|
343
618
|
grpc_resource_user_shutdown(tcp->resource_user);
|
344
619
|
}
|
@@ -357,6 +632,7 @@ static void tcp_free(grpc_tcp* tcp) {
|
|
357
632
|
gpr_mu_unlock(&tcp->tb_mu);
|
358
633
|
tcp->outgoing_buffer_arg = nullptr;
|
359
634
|
gpr_mu_destroy(&tcp->tb_mu);
|
635
|
+
tcp->tcp_zerocopy_send_ctx.~TcpZerocopySendCtx();
|
360
636
|
gpr_free(tcp);
|
361
637
|
}
|
362
638
|
|
@@ -390,6 +666,7 @@ static void tcp_destroy(grpc_endpoint* ep) {
|
|
390
666
|
grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep);
|
391
667
|
grpc_slice_buffer_reset_and_unref_internal(&tcp->last_read_buffer);
|
392
668
|
if (grpc_event_engine_can_track_errors()) {
|
669
|
+
ZerocopyDisableAndWaitForRemaining(tcp);
|
393
670
|
gpr_atm_no_barrier_store(&tcp->stop_error_notification, true);
|
394
671
|
grpc_fd_set_error(tcp->em_fd);
|
395
672
|
}
|
@@ -652,13 +929,13 @@ static void tcp_read(grpc_endpoint* ep, grpc_slice_buffer* incoming_buffer,
|
|
652
929
|
|
653
930
|
/* A wrapper around sendmsg. It sends \a msg over \a fd and returns the number
|
654
931
|
* of bytes sent. */
|
655
|
-
ssize_t tcp_send(int fd, const struct msghdr* msg) {
|
932
|
+
ssize_t tcp_send(int fd, const struct msghdr* msg, int additional_flags = 0) {
|
656
933
|
GPR_TIMER_SCOPE("sendmsg", 1);
|
657
934
|
ssize_t sent_length;
|
658
935
|
do {
|
659
936
|
/* TODO(klempner): Cork if this is a partial write */
|
660
937
|
GRPC_STATS_INC_SYSCALL_WRITE();
|
661
|
-
sent_length = sendmsg(fd, msg, SENDMSG_FLAGS);
|
938
|
+
sent_length = sendmsg(fd, msg, SENDMSG_FLAGS | additional_flags);
|
662
939
|
} while (sent_length < 0 && errno == EINTR);
|
663
940
|
return sent_length;
|
664
941
|
}
|
@@ -671,16 +948,52 @@ ssize_t tcp_send(int fd, const struct msghdr* msg) {
|
|
671
948
|
*/
|
672
949
|
static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg,
|
673
950
|
size_t sending_length,
|
674
|
-
ssize_t* sent_length
|
951
|
+
ssize_t* sent_length,
|
952
|
+
int additional_flags = 0);
|
675
953
|
|
676
954
|
/** The callback function to be invoked when we get an error on the socket. */
|
677
955
|
static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error);
|
678
956
|
|
957
|
+
static TcpZerocopySendRecord* tcp_get_send_zerocopy_record(
|
958
|
+
grpc_tcp* tcp, grpc_slice_buffer* buf);
|
959
|
+
|
679
960
|
#ifdef GRPC_LINUX_ERRQUEUE
|
961
|
+
static bool process_errors(grpc_tcp* tcp);
|
962
|
+
|
963
|
+
static TcpZerocopySendRecord* tcp_get_send_zerocopy_record(
|
964
|
+
grpc_tcp* tcp, grpc_slice_buffer* buf) {
|
965
|
+
TcpZerocopySendRecord* zerocopy_send_record = nullptr;
|
966
|
+
const bool use_zerocopy =
|
967
|
+
tcp->tcp_zerocopy_send_ctx.enabled() &&
|
968
|
+
tcp->tcp_zerocopy_send_ctx.threshold_bytes() < buf->length;
|
969
|
+
if (use_zerocopy) {
|
970
|
+
zerocopy_send_record = tcp->tcp_zerocopy_send_ctx.GetSendRecord();
|
971
|
+
if (zerocopy_send_record == nullptr) {
|
972
|
+
process_errors(tcp);
|
973
|
+
zerocopy_send_record = tcp->tcp_zerocopy_send_ctx.GetSendRecord();
|
974
|
+
}
|
975
|
+
if (zerocopy_send_record != nullptr) {
|
976
|
+
zerocopy_send_record->PrepareForSends(buf);
|
977
|
+
GPR_DEBUG_ASSERT(buf->count == 0);
|
978
|
+
GPR_DEBUG_ASSERT(buf->length == 0);
|
979
|
+
tcp->outgoing_byte_idx = 0;
|
980
|
+
tcp->outgoing_buffer = nullptr;
|
981
|
+
}
|
982
|
+
}
|
983
|
+
return zerocopy_send_record;
|
984
|
+
}
|
985
|
+
|
986
|
+
static void ZerocopyDisableAndWaitForRemaining(grpc_tcp* tcp) {
|
987
|
+
tcp->tcp_zerocopy_send_ctx.Shutdown();
|
988
|
+
while (!tcp->tcp_zerocopy_send_ctx.AllSendRecordsEmpty()) {
|
989
|
+
process_errors(tcp);
|
990
|
+
}
|
991
|
+
}
|
680
992
|
|
681
993
|
static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg,
|
682
994
|
size_t sending_length,
|
683
|
-
ssize_t* sent_length
|
995
|
+
ssize_t* sent_length,
|
996
|
+
int additional_flags) {
|
684
997
|
if (!tcp->socket_ts_enabled) {
|
685
998
|
uint32_t opt = grpc_core::kTimestampingSocketOptions;
|
686
999
|
if (setsockopt(tcp->fd, SOL_SOCKET, SO_TIMESTAMPING,
|
@@ -708,7 +1021,7 @@ static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg,
|
|
708
1021
|
msg->msg_controllen = CMSG_SPACE(sizeof(uint32_t));
|
709
1022
|
|
710
1023
|
/* If there was an error on sendmsg the logic in tcp_flush will handle it. */
|
711
|
-
ssize_t length = tcp_send(tcp->fd, msg);
|
1024
|
+
ssize_t length = tcp_send(tcp->fd, msg, additional_flags);
|
712
1025
|
*sent_length = length;
|
713
1026
|
/* Only save timestamps if all the bytes were taken by sendmsg. */
|
714
1027
|
if (sending_length == static_cast<size_t>(length)) {
|
@@ -722,6 +1035,43 @@ static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg,
|
|
722
1035
|
return true;
|
723
1036
|
}
|
724
1037
|
|
1038
|
+
static void UnrefMaybePutZerocopySendRecord(grpc_tcp* tcp,
|
1039
|
+
TcpZerocopySendRecord* record,
|
1040
|
+
uint32_t seq, const char* tag);
|
1041
|
+
// Reads \a cmsg to process zerocopy control messages.
|
1042
|
+
static void process_zerocopy(grpc_tcp* tcp, struct cmsghdr* cmsg) {
|
1043
|
+
GPR_DEBUG_ASSERT(cmsg);
|
1044
|
+
auto serr = reinterpret_cast<struct sock_extended_err*>(CMSG_DATA(cmsg));
|
1045
|
+
GPR_DEBUG_ASSERT(serr->ee_errno == 0);
|
1046
|
+
GPR_DEBUG_ASSERT(serr->ee_origin == SO_EE_ORIGIN_ZEROCOPY);
|
1047
|
+
const uint32_t lo = serr->ee_info;
|
1048
|
+
const uint32_t hi = serr->ee_data;
|
1049
|
+
for (uint32_t seq = lo; seq <= hi; ++seq) {
|
1050
|
+
// TODO(arjunroy): It's likely that lo and hi refer to zerocopy sequence
|
1051
|
+
// numbers that are generated by a single call to grpc_endpoint_write; ie.
|
1052
|
+
// we can batch the unref operation. So, check if record is the same for
|
1053
|
+
// both; if so, batch the unref/put.
|
1054
|
+
TcpZerocopySendRecord* record =
|
1055
|
+
tcp->tcp_zerocopy_send_ctx.ReleaseSendRecord(seq);
|
1056
|
+
GPR_DEBUG_ASSERT(record);
|
1057
|
+
UnrefMaybePutZerocopySendRecord(tcp, record, seq, "CALLBACK RCVD");
|
1058
|
+
}
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
// Whether the cmsg received from error queue is of the IPv4 or IPv6 levels.
|
1062
|
+
static bool CmsgIsIpLevel(const cmsghdr& cmsg) {
|
1063
|
+
return (cmsg.cmsg_level == SOL_IPV6 && cmsg.cmsg_type == IPV6_RECVERR) ||
|
1064
|
+
(cmsg.cmsg_level == SOL_IP && cmsg.cmsg_type == IP_RECVERR);
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
static bool CmsgIsZeroCopy(const cmsghdr& cmsg) {
|
1068
|
+
if (!CmsgIsIpLevel(cmsg)) {
|
1069
|
+
return false;
|
1070
|
+
}
|
1071
|
+
auto serr = reinterpret_cast<const sock_extended_err*> CMSG_DATA(&cmsg);
|
1072
|
+
return serr->ee_errno == 0 && serr->ee_origin == SO_EE_ORIGIN_ZEROCOPY;
|
1073
|
+
}
|
1074
|
+
|
725
1075
|
/** Reads \a cmsg to derive timestamps from the control messages. If a valid
|
726
1076
|
* timestamp is found, the traced buffer list is updated with this timestamp.
|
727
1077
|
* The caller of this function should be looping on the control messages found
|
@@ -783,73 +1133,76 @@ struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg,
|
|
783
1133
|
/** For linux platforms, reads the socket's error queue and processes error
|
784
1134
|
* messages from the queue.
|
785
1135
|
*/
|
786
|
-
static
|
1136
|
+
static bool process_errors(grpc_tcp* tcp) {
|
1137
|
+
bool processed_err = false;
|
1138
|
+
struct iovec iov;
|
1139
|
+
iov.iov_base = nullptr;
|
1140
|
+
iov.iov_len = 0;
|
1141
|
+
struct msghdr msg;
|
1142
|
+
msg.msg_name = nullptr;
|
1143
|
+
msg.msg_namelen = 0;
|
1144
|
+
msg.msg_iov = &iov;
|
1145
|
+
msg.msg_iovlen = 0;
|
1146
|
+
msg.msg_flags = 0;
|
1147
|
+
/* Allocate enough space so we don't need to keep increasing this as size
|
1148
|
+
* of OPT_STATS increase */
|
1149
|
+
constexpr size_t cmsg_alloc_space =
|
1150
|
+
CMSG_SPACE(sizeof(grpc_core::scm_timestamping)) +
|
1151
|
+
CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in)) +
|
1152
|
+
CMSG_SPACE(32 * NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)));
|
1153
|
+
/* Allocate aligned space for cmsgs received along with timestamps */
|
1154
|
+
union {
|
1155
|
+
char rbuf[cmsg_alloc_space];
|
1156
|
+
struct cmsghdr align;
|
1157
|
+
} aligned_buf;
|
1158
|
+
msg.msg_control = aligned_buf.rbuf;
|
1159
|
+
msg.msg_controllen = sizeof(aligned_buf.rbuf);
|
1160
|
+
int r, saved_errno;
|
787
1161
|
while (true) {
|
788
|
-
struct iovec iov;
|
789
|
-
iov.iov_base = nullptr;
|
790
|
-
iov.iov_len = 0;
|
791
|
-
struct msghdr msg;
|
792
|
-
msg.msg_name = nullptr;
|
793
|
-
msg.msg_namelen = 0;
|
794
|
-
msg.msg_iov = &iov;
|
795
|
-
msg.msg_iovlen = 0;
|
796
|
-
msg.msg_flags = 0;
|
797
|
-
|
798
|
-
/* Allocate enough space so we don't need to keep increasing this as size
|
799
|
-
* of OPT_STATS increase */
|
800
|
-
constexpr size_t cmsg_alloc_space =
|
801
|
-
CMSG_SPACE(sizeof(grpc_core::scm_timestamping)) +
|
802
|
-
CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in)) +
|
803
|
-
CMSG_SPACE(32 * NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)));
|
804
|
-
/* Allocate aligned space for cmsgs received along with timestamps */
|
805
|
-
union {
|
806
|
-
char rbuf[cmsg_alloc_space];
|
807
|
-
struct cmsghdr align;
|
808
|
-
} aligned_buf;
|
809
|
-
memset(&aligned_buf, 0, sizeof(aligned_buf));
|
810
|
-
|
811
|
-
msg.msg_control = aligned_buf.rbuf;
|
812
|
-
msg.msg_controllen = sizeof(aligned_buf.rbuf);
|
813
|
-
|
814
|
-
int r, saved_errno;
|
815
1162
|
do {
|
816
1163
|
r = recvmsg(tcp->fd, &msg, MSG_ERRQUEUE);
|
817
1164
|
saved_errno = errno;
|
818
1165
|
} while (r < 0 && saved_errno == EINTR);
|
819
1166
|
|
820
1167
|
if (r == -1 && saved_errno == EAGAIN) {
|
821
|
-
return; /* No more errors to process */
|
1168
|
+
return processed_err; /* No more errors to process */
|
822
1169
|
}
|
823
1170
|
if (r == -1) {
|
824
|
-
return;
|
1171
|
+
return processed_err;
|
825
1172
|
}
|
826
|
-
if ((msg.msg_flags & MSG_CTRUNC) != 0) {
|
1173
|
+
if (GPR_UNLIKELY((msg.msg_flags & MSG_CTRUNC) != 0)) {
|
827
1174
|
gpr_log(GPR_ERROR, "Error message was truncated.");
|
828
1175
|
}
|
829
1176
|
|
830
1177
|
if (msg.msg_controllen == 0) {
|
831
1178
|
/* There was no control message found. It was probably spurious. */
|
832
|
-
return;
|
1179
|
+
return processed_err;
|
833
1180
|
}
|
834
1181
|
bool seen = false;
|
835
1182
|
for (auto cmsg = CMSG_FIRSTHDR(&msg); cmsg && cmsg->cmsg_len;
|
836
1183
|
cmsg = CMSG_NXTHDR(&msg, cmsg)) {
|
837
|
-
if (cmsg
|
838
|
-
|
839
|
-
|
840
|
-
|
1184
|
+
if (CmsgIsZeroCopy(*cmsg)) {
|
1185
|
+
process_zerocopy(tcp, cmsg);
|
1186
|
+
seen = true;
|
1187
|
+
processed_err = true;
|
1188
|
+
} else if (cmsg->cmsg_level == SOL_SOCKET &&
|
1189
|
+
cmsg->cmsg_type == SCM_TIMESTAMPING) {
|
1190
|
+
cmsg = process_timestamp(tcp, &msg, cmsg);
|
1191
|
+
seen = true;
|
1192
|
+
processed_err = true;
|
1193
|
+
} else {
|
1194
|
+
/* Got a control message that is not a timestamp or zerocopy. Don't know
|
1195
|
+
* how to handle this. */
|
841
1196
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace)) {
|
842
1197
|
gpr_log(GPR_INFO,
|
843
1198
|
"unknown control message cmsg_level:%d cmsg_type:%d",
|
844
1199
|
cmsg->cmsg_level, cmsg->cmsg_type);
|
845
1200
|
}
|
846
|
-
return;
|
1201
|
+
return processed_err;
|
847
1202
|
}
|
848
|
-
cmsg = process_timestamp(tcp, &msg, cmsg);
|
849
|
-
seen = true;
|
850
1203
|
}
|
851
1204
|
if (!seen) {
|
852
|
-
return;
|
1205
|
+
return processed_err;
|
853
1206
|
}
|
854
1207
|
}
|
855
1208
|
}
|
@@ -870,18 +1223,28 @@ static void tcp_handle_error(void* arg /* grpc_tcp */, grpc_error* error) {
|
|
870
1223
|
|
871
1224
|
/* We are still interested in collecting timestamps, so let's try reading
|
872
1225
|
* them. */
|
873
|
-
process_errors(tcp);
|
1226
|
+
bool processed = process_errors(tcp);
|
874
1227
|
/* This might not a timestamps error. Set the read and write closures to be
|
875
1228
|
* ready. */
|
876
|
-
|
877
|
-
|
1229
|
+
if (!processed) {
|
1230
|
+
grpc_fd_set_readable(tcp->em_fd);
|
1231
|
+
grpc_fd_set_writable(tcp->em_fd);
|
1232
|
+
}
|
878
1233
|
grpc_fd_notify_on_error(tcp->em_fd, &tcp->error_closure);
|
879
1234
|
}
|
880
1235
|
|
881
1236
|
#else /* GRPC_LINUX_ERRQUEUE */
|
1237
|
+
static TcpZerocopySendRecord* tcp_get_send_zerocopy_record(
|
1238
|
+
grpc_tcp* tcp, grpc_slice_buffer* buf) {
|
1239
|
+
return nullptr;
|
1240
|
+
}
|
1241
|
+
|
1242
|
+
static void ZerocopyDisableAndWaitForRemaining(grpc_tcp* tcp) {}
|
1243
|
+
|
882
1244
|
static bool tcp_write_with_timestamps(grpc_tcp* /*tcp*/, struct msghdr* /*msg*/,
|
883
1245
|
size_t /*sending_length*/,
|
884
|
-
ssize_t* /*sent_length
|
1246
|
+
ssize_t* /*sent_length*/,
|
1247
|
+
int /*additional_flags*/) {
|
885
1248
|
gpr_log(GPR_ERROR, "Write with timestamps not supported for this platform");
|
886
1249
|
GPR_ASSERT(0);
|
887
1250
|
return false;
|
@@ -907,12 +1270,138 @@ void tcp_shutdown_buffer_list(grpc_tcp* tcp) {
|
|
907
1270
|
}
|
908
1271
|
}
|
909
1272
|
|
910
|
-
/* returns true if done, false if pending; if returning true, *error is set */
|
911
1273
|
#if defined(IOV_MAX) && IOV_MAX < 1000
|
912
1274
|
#define MAX_WRITE_IOVEC IOV_MAX
|
913
1275
|
#else
|
914
1276
|
#define MAX_WRITE_IOVEC 1000
|
915
1277
|
#endif
|
1278
|
+
msg_iovlen_type TcpZerocopySendRecord::PopulateIovs(size_t* unwind_slice_idx,
|
1279
|
+
size_t* unwind_byte_idx,
|
1280
|
+
size_t* sending_length,
|
1281
|
+
iovec* iov) {
|
1282
|
+
msg_iovlen_type iov_size;
|
1283
|
+
*unwind_slice_idx = out_offset_.slice_idx;
|
1284
|
+
*unwind_byte_idx = out_offset_.byte_idx;
|
1285
|
+
for (iov_size = 0;
|
1286
|
+
out_offset_.slice_idx != buf_.count && iov_size != MAX_WRITE_IOVEC;
|
1287
|
+
iov_size++) {
|
1288
|
+
iov[iov_size].iov_base =
|
1289
|
+
GRPC_SLICE_START_PTR(buf_.slices[out_offset_.slice_idx]) +
|
1290
|
+
out_offset_.byte_idx;
|
1291
|
+
iov[iov_size].iov_len =
|
1292
|
+
GRPC_SLICE_LENGTH(buf_.slices[out_offset_.slice_idx]) -
|
1293
|
+
out_offset_.byte_idx;
|
1294
|
+
*sending_length += iov[iov_size].iov_len;
|
1295
|
+
++(out_offset_.slice_idx);
|
1296
|
+
out_offset_.byte_idx = 0;
|
1297
|
+
}
|
1298
|
+
GPR_DEBUG_ASSERT(iov_size > 0);
|
1299
|
+
return iov_size;
|
1300
|
+
}
|
1301
|
+
|
1302
|
+
void TcpZerocopySendRecord::UpdateOffsetForBytesSent(size_t sending_length,
|
1303
|
+
size_t actually_sent) {
|
1304
|
+
size_t trailing = sending_length - actually_sent;
|
1305
|
+
while (trailing > 0) {
|
1306
|
+
size_t slice_length;
|
1307
|
+
out_offset_.slice_idx--;
|
1308
|
+
slice_length = GRPC_SLICE_LENGTH(buf_.slices[out_offset_.slice_idx]);
|
1309
|
+
if (slice_length > trailing) {
|
1310
|
+
out_offset_.byte_idx = slice_length - trailing;
|
1311
|
+
break;
|
1312
|
+
} else {
|
1313
|
+
trailing -= slice_length;
|
1314
|
+
}
|
1315
|
+
}
|
1316
|
+
}
|
1317
|
+
|
1318
|
+
// returns true if done, false if pending; if returning true, *error is set
|
1319
|
+
static bool do_tcp_flush_zerocopy(grpc_tcp* tcp, TcpZerocopySendRecord* record,
|
1320
|
+
grpc_error** error) {
|
1321
|
+
struct msghdr msg;
|
1322
|
+
struct iovec iov[MAX_WRITE_IOVEC];
|
1323
|
+
msg_iovlen_type iov_size;
|
1324
|
+
ssize_t sent_length = 0;
|
1325
|
+
size_t sending_length;
|
1326
|
+
size_t unwind_slice_idx;
|
1327
|
+
size_t unwind_byte_idx;
|
1328
|
+
while (true) {
|
1329
|
+
sending_length = 0;
|
1330
|
+
iov_size = record->PopulateIovs(&unwind_slice_idx, &unwind_byte_idx,
|
1331
|
+
&sending_length, iov);
|
1332
|
+
msg.msg_name = nullptr;
|
1333
|
+
msg.msg_namelen = 0;
|
1334
|
+
msg.msg_iov = iov;
|
1335
|
+
msg.msg_iovlen = iov_size;
|
1336
|
+
msg.msg_flags = 0;
|
1337
|
+
bool tried_sending_message = false;
|
1338
|
+
// Before calling sendmsg (with or without timestamps): we
|
1339
|
+
// take a single ref on the zerocopy send record.
|
1340
|
+
tcp->tcp_zerocopy_send_ctx.NoteSend(record);
|
1341
|
+
if (tcp->outgoing_buffer_arg != nullptr) {
|
1342
|
+
if (!tcp->ts_capable ||
|
1343
|
+
!tcp_write_with_timestamps(tcp, &msg, sending_length, &sent_length,
|
1344
|
+
MSG_ZEROCOPY)) {
|
1345
|
+
/* We could not set socket options to collect Fathom timestamps.
|
1346
|
+
* Fallback on writing without timestamps. */
|
1347
|
+
tcp->ts_capable = false;
|
1348
|
+
tcp_shutdown_buffer_list(tcp);
|
1349
|
+
} else {
|
1350
|
+
tried_sending_message = true;
|
1351
|
+
}
|
1352
|
+
}
|
1353
|
+
if (!tried_sending_message) {
|
1354
|
+
msg.msg_control = nullptr;
|
1355
|
+
msg.msg_controllen = 0;
|
1356
|
+
GRPC_STATS_INC_TCP_WRITE_SIZE(sending_length);
|
1357
|
+
GRPC_STATS_INC_TCP_WRITE_IOV_SIZE(iov_size);
|
1358
|
+
sent_length = tcp_send(tcp->fd, &msg, MSG_ZEROCOPY);
|
1359
|
+
}
|
1360
|
+
if (sent_length < 0) {
|
1361
|
+
// If this particular send failed, drop ref taken earlier in this method.
|
1362
|
+
tcp->tcp_zerocopy_send_ctx.UndoSend();
|
1363
|
+
if (errno == EAGAIN) {
|
1364
|
+
record->UnwindIfThrottled(unwind_slice_idx, unwind_byte_idx);
|
1365
|
+
return false;
|
1366
|
+
} else if (errno == EPIPE) {
|
1367
|
+
*error = tcp_annotate_error(GRPC_OS_ERROR(errno, "sendmsg"), tcp);
|
1368
|
+
tcp_shutdown_buffer_list(tcp);
|
1369
|
+
return true;
|
1370
|
+
} else {
|
1371
|
+
*error = tcp_annotate_error(GRPC_OS_ERROR(errno, "sendmsg"), tcp);
|
1372
|
+
tcp_shutdown_buffer_list(tcp);
|
1373
|
+
return true;
|
1374
|
+
}
|
1375
|
+
}
|
1376
|
+
tcp->bytes_counter += sent_length;
|
1377
|
+
record->UpdateOffsetForBytesSent(sending_length,
|
1378
|
+
static_cast<size_t>(sent_length));
|
1379
|
+
if (record->AllSlicesSent()) {
|
1380
|
+
*error = GRPC_ERROR_NONE;
|
1381
|
+
return true;
|
1382
|
+
}
|
1383
|
+
}
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
static void UnrefMaybePutZerocopySendRecord(grpc_tcp* tcp,
|
1387
|
+
TcpZerocopySendRecord* record,
|
1388
|
+
uint32_t seq, const char* tag) {
|
1389
|
+
if (record->Unref()) {
|
1390
|
+
tcp->tcp_zerocopy_send_ctx.PutSendRecord(record);
|
1391
|
+
}
|
1392
|
+
}
|
1393
|
+
|
1394
|
+
static bool tcp_flush_zerocopy(grpc_tcp* tcp, TcpZerocopySendRecord* record,
|
1395
|
+
grpc_error** error) {
|
1396
|
+
bool done = do_tcp_flush_zerocopy(tcp, record, error);
|
1397
|
+
if (done) {
|
1398
|
+
// Either we encountered an error, or we successfully sent all the bytes.
|
1399
|
+
// In either case, we're done with this record.
|
1400
|
+
UnrefMaybePutZerocopySendRecord(tcp, record, 0, "flush_done");
|
1401
|
+
}
|
1402
|
+
return done;
|
1403
|
+
}
|
1404
|
+
|
916
1405
|
static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) {
|
917
1406
|
struct msghdr msg;
|
918
1407
|
struct iovec iov[MAX_WRITE_IOVEC];
|
@@ -927,7 +1416,7 @@ static bool tcp_flush(grpc_tcp* tcp, grpc_error** error) {
|
|
927
1416
|
// buffer as we write
|
928
1417
|
size_t outgoing_slice_idx = 0;
|
929
1418
|
|
930
|
-
|
1419
|
+
while (true) {
|
931
1420
|
sending_length = 0;
|
932
1421
|
unwind_slice_idx = outgoing_slice_idx;
|
933
1422
|
unwind_byte_idx = tcp->outgoing_byte_idx;
|
@@ -1027,12 +1516,21 @@ static void tcp_handle_write(void* arg /* grpc_tcp */, grpc_error* error) {
|
|
1027
1516
|
if (error != GRPC_ERROR_NONE) {
|
1028
1517
|
cb = tcp->write_cb;
|
1029
1518
|
tcp->write_cb = nullptr;
|
1519
|
+
if (tcp->current_zerocopy_send != nullptr) {
|
1520
|
+
UnrefMaybePutZerocopySendRecord(tcp, tcp->current_zerocopy_send, 0,
|
1521
|
+
"handle_write_err");
|
1522
|
+
tcp->current_zerocopy_send = nullptr;
|
1523
|
+
}
|
1030
1524
|
grpc_core::Closure::Run(DEBUG_LOCATION, cb, GRPC_ERROR_REF(error));
|
1031
1525
|
TCP_UNREF(tcp, "write");
|
1032
1526
|
return;
|
1033
1527
|
}
|
1034
1528
|
|
1035
|
-
|
1529
|
+
bool flush_result =
|
1530
|
+
tcp->current_zerocopy_send != nullptr
|
1531
|
+
? tcp_flush_zerocopy(tcp, tcp->current_zerocopy_send, &error)
|
1532
|
+
: tcp_flush(tcp, &error);
|
1533
|
+
if (!flush_result) {
|
1036
1534
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace)) {
|
1037
1535
|
gpr_log(GPR_INFO, "write: delayed");
|
1038
1536
|
}
|
@@ -1042,6 +1540,7 @@ static void tcp_handle_write(void* arg /* grpc_tcp */, grpc_error* error) {
|
|
1042
1540
|
} else {
|
1043
1541
|
cb = tcp->write_cb;
|
1044
1542
|
tcp->write_cb = nullptr;
|
1543
|
+
tcp->current_zerocopy_send = nullptr;
|
1045
1544
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace)) {
|
1046
1545
|
const char* str = grpc_error_string(error);
|
1047
1546
|
gpr_log(GPR_INFO, "write: %s", str);
|
@@ -1057,6 +1556,7 @@ static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf,
|
|
1057
1556
|
GPR_TIMER_SCOPE("tcp_write", 0);
|
1058
1557
|
grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep);
|
1059
1558
|
grpc_error* error = GRPC_ERROR_NONE;
|
1559
|
+
TcpZerocopySendRecord* zerocopy_send_record = nullptr;
|
1060
1560
|
|
1061
1561
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace)) {
|
1062
1562
|
size_t i;
|
@@ -1073,8 +1573,8 @@ static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf,
|
|
1073
1573
|
}
|
1074
1574
|
|
1075
1575
|
GPR_ASSERT(tcp->write_cb == nullptr);
|
1576
|
+
GPR_DEBUG_ASSERT(tcp->current_zerocopy_send == nullptr);
|
1076
1577
|
|
1077
|
-
tcp->outgoing_buffer_arg = arg;
|
1078
1578
|
if (buf->length == 0) {
|
1079
1579
|
grpc_core::Closure::Run(
|
1080
1580
|
DEBUG_LOCATION, cb,
|
@@ -1085,15 +1585,26 @@ static void tcp_write(grpc_endpoint* ep, grpc_slice_buffer* buf,
|
|
1085
1585
|
tcp_shutdown_buffer_list(tcp);
|
1086
1586
|
return;
|
1087
1587
|
}
|
1088
|
-
|
1089
|
-
|
1588
|
+
|
1589
|
+
zerocopy_send_record = tcp_get_send_zerocopy_record(tcp, buf);
|
1590
|
+
if (zerocopy_send_record == nullptr) {
|
1591
|
+
// Either not enough bytes, or couldn't allocate a zerocopy context.
|
1592
|
+
tcp->outgoing_buffer = buf;
|
1593
|
+
tcp->outgoing_byte_idx = 0;
|
1594
|
+
}
|
1595
|
+
tcp->outgoing_buffer_arg = arg;
|
1090
1596
|
if (arg) {
|
1091
1597
|
GPR_ASSERT(grpc_event_engine_can_track_errors());
|
1092
1598
|
}
|
1093
1599
|
|
1094
|
-
|
1600
|
+
bool flush_result =
|
1601
|
+
zerocopy_send_record != nullptr
|
1602
|
+
? tcp_flush_zerocopy(tcp, zerocopy_send_record, &error)
|
1603
|
+
: tcp_flush(tcp, &error);
|
1604
|
+
if (!flush_result) {
|
1095
1605
|
TCP_REF(tcp, "write");
|
1096
1606
|
tcp->write_cb = cb;
|
1607
|
+
tcp->current_zerocopy_send = zerocopy_send_record;
|
1097
1608
|
if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace)) {
|
1098
1609
|
gpr_log(GPR_INFO, "write: delayed");
|
1099
1610
|
}
|
@@ -1121,6 +1632,7 @@ static void tcp_add_to_pollset_set(grpc_endpoint* ep,
|
|
1121
1632
|
static void tcp_delete_from_pollset_set(grpc_endpoint* ep,
|
1122
1633
|
grpc_pollset_set* pollset_set) {
|
1123
1634
|
grpc_tcp* tcp = reinterpret_cast<grpc_tcp*>(ep);
|
1635
|
+
ZerocopyDisableAndWaitForRemaining(tcp);
|
1124
1636
|
grpc_pollset_set_del_fd(pollset_set, tcp->em_fd);
|
1125
1637
|
}
|
1126
1638
|
|
@@ -1172,9 +1684,15 @@ static const grpc_endpoint_vtable vtable = {tcp_read,
|
|
1172
1684
|
grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd,
|
1173
1685
|
const grpc_channel_args* channel_args,
|
1174
1686
|
const char* peer_string) {
|
1687
|
+
static constexpr bool kZerocpTxEnabledDefault = false;
|
1175
1688
|
int tcp_read_chunk_size = GRPC_TCP_DEFAULT_READ_SLICE_SIZE;
|
1176
1689
|
int tcp_max_read_chunk_size = 4 * 1024 * 1024;
|
1177
1690
|
int tcp_min_read_chunk_size = 256;
|
1691
|
+
bool tcp_tx_zerocopy_enabled = kZerocpTxEnabledDefault;
|
1692
|
+
int tcp_tx_zerocopy_send_bytes_thresh =
|
1693
|
+
grpc_core::TcpZerocopySendCtx::kDefaultSendBytesThreshold;
|
1694
|
+
int tcp_tx_zerocopy_max_simult_sends =
|
1695
|
+
grpc_core::TcpZerocopySendCtx::kDefaultMaxSends;
|
1178
1696
|
grpc_resource_quota* resource_quota = grpc_resource_quota_create(nullptr);
|
1179
1697
|
if (channel_args != nullptr) {
|
1180
1698
|
for (size_t i = 0; i < channel_args->num_args; i++) {
|
@@ -1199,6 +1717,23 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd,
|
|
1199
1717
|
resource_quota =
|
1200
1718
|
grpc_resource_quota_ref_internal(static_cast<grpc_resource_quota*>(
|
1201
1719
|
channel_args->args[i].value.pointer.p));
|
1720
|
+
} else if (0 == strcmp(channel_args->args[i].key,
|
1721
|
+
GRPC_ARG_TCP_TX_ZEROCOPY_ENABLED)) {
|
1722
|
+
tcp_tx_zerocopy_enabled = grpc_channel_arg_get_bool(
|
1723
|
+
&channel_args->args[i], kZerocpTxEnabledDefault);
|
1724
|
+
} else if (0 == strcmp(channel_args->args[i].key,
|
1725
|
+
GRPC_ARG_TCP_TX_ZEROCOPY_SEND_BYTES_THRESHOLD)) {
|
1726
|
+
grpc_integer_options options = {
|
1727
|
+
grpc_core::TcpZerocopySendCtx::kDefaultSendBytesThreshold, 0,
|
1728
|
+
INT_MAX};
|
1729
|
+
tcp_tx_zerocopy_send_bytes_thresh =
|
1730
|
+
grpc_channel_arg_get_integer(&channel_args->args[i], options);
|
1731
|
+
} else if (0 == strcmp(channel_args->args[i].key,
|
1732
|
+
GRPC_ARG_TCP_TX_ZEROCOPY_MAX_SIMULT_SENDS)) {
|
1733
|
+
grpc_integer_options options = {
|
1734
|
+
grpc_core::TcpZerocopySendCtx::kDefaultMaxSends, 0, INT_MAX};
|
1735
|
+
tcp_tx_zerocopy_max_simult_sends =
|
1736
|
+
grpc_channel_arg_get_integer(&channel_args->args[i], options);
|
1202
1737
|
}
|
1203
1738
|
}
|
1204
1739
|
}
|
@@ -1215,6 +1750,7 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd,
|
|
1215
1750
|
tcp->fd = grpc_fd_wrapped_fd(em_fd);
|
1216
1751
|
tcp->read_cb = nullptr;
|
1217
1752
|
tcp->write_cb = nullptr;
|
1753
|
+
tcp->current_zerocopy_send = nullptr;
|
1218
1754
|
tcp->release_fd_cb = nullptr;
|
1219
1755
|
tcp->release_fd = nullptr;
|
1220
1756
|
tcp->incoming_buffer = nullptr;
|
@@ -1228,6 +1764,20 @@ grpc_endpoint* grpc_tcp_create(grpc_fd* em_fd,
|
|
1228
1764
|
tcp->socket_ts_enabled = false;
|
1229
1765
|
tcp->ts_capable = true;
|
1230
1766
|
tcp->outgoing_buffer_arg = nullptr;
|
1767
|
+
new (&tcp->tcp_zerocopy_send_ctx) TcpZerocopySendCtx(
|
1768
|
+
tcp_tx_zerocopy_max_simult_sends, tcp_tx_zerocopy_send_bytes_thresh);
|
1769
|
+
if (tcp_tx_zerocopy_enabled && !tcp->tcp_zerocopy_send_ctx.memory_limited()) {
|
1770
|
+
#ifdef GRPC_LINUX_ERRQUEUE
|
1771
|
+
const int enable = 1;
|
1772
|
+
auto err =
|
1773
|
+
setsockopt(tcp->fd, SOL_SOCKET, SO_ZEROCOPY, &enable, sizeof(enable));
|
1774
|
+
if (err == 0) {
|
1775
|
+
tcp->tcp_zerocopy_send_ctx.set_enabled(true);
|
1776
|
+
} else {
|
1777
|
+
gpr_log(GPR_ERROR, "Failed to set zerocopy options on the socket.");
|
1778
|
+
}
|
1779
|
+
#endif
|
1780
|
+
}
|
1231
1781
|
/* paired with unref in grpc_tcp_destroy */
|
1232
1782
|
new (&tcp->refcount) grpc_core::RefCount(1, &grpc_tcp_trace);
|
1233
1783
|
gpr_atm_no_barrier_store(&tcp->shutdown_count, 0);
|
@@ -1294,6 +1844,7 @@ void grpc_tcp_destroy_and_release_fd(grpc_endpoint* ep, int* fd,
|
|
1294
1844
|
grpc_slice_buffer_reset_and_unref_internal(&tcp->last_read_buffer);
|
1295
1845
|
if (grpc_event_engine_can_track_errors()) {
|
1296
1846
|
/* Stop errors notification. */
|
1847
|
+
ZerocopyDisableAndWaitForRemaining(tcp);
|
1297
1848
|
gpr_atm_no_barrier_store(&tcp->stop_error_notification, true);
|
1298
1849
|
grpc_fd_set_error(tcp->em_fd);
|
1299
1850
|
}
|