wearefair-grpc 1.3.1.pre.c → 1.4.0.fair
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Makefile +418 -126
- data/include/grpc/grpc.h +15 -69
- data/include/grpc/grpc_security.h +1 -1
- data/include/grpc/impl/codegen/compression_types.h +3 -4
- data/include/grpc/impl/codegen/gpr_types.h +0 -1
- data/include/grpc/impl/codegen/grpc_types.h +69 -3
- data/include/grpc/impl/codegen/port_platform.h +6 -0
- data/include/grpc/impl/codegen/slice.h +2 -1
- data/include/grpc/load_reporting.h +6 -6
- data/include/grpc/slice.h +25 -3
- data/include/grpc/slice_buffer.h +4 -0
- data/src/core/ext/census/context.c +1 -1
- data/src/core/ext/census/resource.c +3 -1
- data/src/core/ext/filters/client_channel/channel_connectivity.c +1 -1
- data/src/core/ext/filters/client_channel/client_channel.c +158 -100
- data/src/core/ext/filters/client_channel/client_channel_plugin.c +3 -2
- data/src/core/ext/filters/client_channel/lb_policy.c +2 -1
- data/src/core/ext/filters/client_channel/lb_policy.h +5 -6
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.c +153 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h +42 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.c +344 -88
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.c +133 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h +65 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.c +47 -5
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h +6 -0
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c +19 -8
- data/src/core/ext/filters/client_channel/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.h +63 -34
- data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.c +2 -1
- data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.c +13 -12
- data/src/core/ext/filters/client_channel/lb_policy_factory.c +28 -5
- data/src/core/ext/filters/client_channel/lb_policy_factory.h +18 -4
- data/src/core/ext/filters/client_channel/parse_address.c +37 -7
- data/src/core/ext/filters/client_channel/parse_address.h +11 -8
- data/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.c +3 -3
- data/src/core/ext/filters/client_channel/subchannel.c +19 -16
- data/src/core/ext/filters/client_channel/subchannel.h +1 -0
- data/src/core/ext/filters/client_channel/uri_parser.c +36 -22
- data/src/core/ext/filters/client_channel/uri_parser.h +1 -1
- data/src/core/{lib/channel → ext/filters/deadline}/deadline_filter.c +42 -17
- data/src/core/{lib/channel → ext/filters/deadline}/deadline_filter.h +8 -9
- data/src/core/{lib/channel → ext/filters/http/client}/http_client_filter.c +19 -11
- data/src/core/{lib/channel → ext/filters/http/client}/http_client_filter.h +3 -6
- data/src/core/ext/filters/http/http_filters_plugin.c +104 -0
- data/src/core/{lib/channel/compress_filter.c → ext/filters/http/message_compress/message_compress_filter.c} +124 -23
- data/src/core/{lib/channel/compress_filter.h → ext/filters/http/message_compress/message_compress_filter.h} +5 -6
- data/src/core/{lib/channel → ext/filters/http/server}/http_server_filter.c +4 -6
- data/src/core/{lib/channel → ext/filters/http/server}/http_server_filter.h +3 -3
- data/src/core/ext/filters/load_reporting/load_reporting.c +2 -25
- data/src/core/ext/filters/load_reporting/load_reporting_filter.c +26 -1
- data/src/core/ext/filters/max_age/max_age_filter.c +14 -14
- data/src/core/{lib/channel → ext/filters/message_size}/message_size_filter.c +91 -47
- data/src/core/{lib/channel → ext/filters/message_size}/message_size_filter.h +3 -3
- data/src/core/ext/transport/chttp2/client/insecure/channel_create.c +1 -1
- data/src/core/ext/transport/chttp2/server/chttp2_server.c +2 -2
- data/src/core/ext/transport/chttp2/transport/bin_decoder.c +2 -2
- data/src/core/ext/transport/chttp2/transport/bin_encoder.c +3 -3
- data/src/core/ext/transport/chttp2/transport/chttp2_transport.c +296 -172
- data/src/core/ext/transport/chttp2/transport/chttp2_transport.h +3 -2
- data/src/core/ext/transport/chttp2/transport/frame_data.c +203 -164
- data/src/core/ext/transport/chttp2/transport/frame_data.h +8 -14
- data/src/core/ext/transport/chttp2/transport/frame_goaway.c +1 -1
- data/src/core/ext/transport/chttp2/transport/frame_ping.c +1 -1
- data/src/core/ext/transport/chttp2/transport/frame_rst_stream.c +1 -1
- data/src/core/ext/transport/chttp2/transport/frame_settings.c +5 -5
- data/src/core/ext/transport/chttp2/transport/frame_window_update.c +1 -1
- data/src/core/ext/transport/chttp2/transport/hpack_encoder.c +4 -4
- data/src/core/ext/transport/chttp2/transport/hpack_parser.c +2 -4
- data/src/core/ext/transport/chttp2/transport/hpack_table.c +4 -3
- data/src/core/ext/transport/chttp2/transport/internal.h +50 -33
- data/src/core/ext/transport/chttp2/transport/parsing.c +10 -11
- data/src/core/ext/transport/chttp2/transport/writing.c +32 -13
- data/src/core/lib/channel/channel_args.c +28 -9
- data/src/core/lib/channel/channel_args.h +5 -1
- data/src/core/lib/channel/channel_stack.c +1 -1
- data/src/core/lib/channel/channel_stack.h +2 -2
- data/src/core/lib/channel/channel_stack_builder.c +13 -1
- data/src/core/lib/channel/channel_stack_builder.h +5 -1
- data/src/core/lib/channel/connected_channel.c +3 -1
- data/src/core/lib/channel/context.h +2 -2
- data/src/core/lib/compression/message_compress.c +2 -2
- data/src/core/lib/debug/trace.c +13 -6
- data/src/core/lib/debug/trace.h +27 -1
- data/src/core/lib/http/httpcli.c +1 -1
- data/src/core/lib/http/httpcli_security_connector.c +6 -10
- data/src/core/lib/http/parser.c +2 -2
- data/src/core/lib/http/parser.h +2 -1
- data/src/core/lib/iomgr/combiner.c +6 -6
- data/src/core/lib/iomgr/combiner.h +2 -1
- data/src/core/lib/iomgr/error.c +12 -5
- data/src/core/lib/iomgr/error.h +13 -13
- data/src/core/lib/iomgr/ev_epoll1_linux.c +984 -0
- data/src/core/lib/iomgr/ev_epoll1_linux.h +44 -0
- data/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.c +2146 -0
- data/src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h +43 -0
- data/src/core/lib/iomgr/ev_epoll_thread_pool_linux.c +1337 -0
- data/src/core/lib/iomgr/ev_epoll_thread_pool_linux.h +43 -0
- data/src/core/lib/iomgr/ev_epollex_linux.c +1511 -0
- data/src/core/lib/iomgr/ev_epollex_linux.h +43 -0
- data/src/core/lib/iomgr/{ev_epoll_linux.c → ev_epollsig_linux.c} +24 -31
- data/src/core/lib/iomgr/{ev_epoll_linux.h → ev_epollsig_linux.h} +4 -4
- data/src/core/lib/iomgr/ev_poll_posix.c +12 -27
- data/src/core/lib/iomgr/ev_poll_posix.h +2 -2
- data/src/core/lib/iomgr/ev_posix.c +22 -8
- data/src/core/lib/iomgr/ev_posix.h +4 -3
- data/src/core/lib/iomgr/exec_ctx.c +5 -0
- data/src/core/lib/iomgr/exec_ctx.h +2 -0
- data/src/core/lib/iomgr/iomgr.c +4 -0
- data/src/core/lib/iomgr/iomgr.h +3 -0
- data/src/core/lib/iomgr/is_epollexclusive_available.c +116 -0
- data/src/core/lib/iomgr/is_epollexclusive_available.h +41 -0
- data/src/core/lib/iomgr/lockfree_event.c +16 -0
- data/src/core/lib/iomgr/pollset.h +2 -5
- data/src/core/lib/iomgr/pollset_uv.c +1 -1
- data/src/core/lib/iomgr/pollset_windows.c +3 -3
- data/src/core/lib/iomgr/resource_quota.c +9 -8
- data/src/core/lib/iomgr/resource_quota.h +2 -1
- data/src/core/lib/iomgr/sockaddr_utils.h +1 -1
- data/src/core/lib/iomgr/socket_mutator.h +2 -0
- data/src/core/lib/iomgr/sys_epoll_wrapper.h +43 -0
- data/src/core/lib/iomgr/tcp_client_posix.c +6 -6
- data/src/core/lib/iomgr/tcp_client_uv.c +3 -3
- data/src/core/lib/iomgr/tcp_posix.c +7 -7
- data/src/core/lib/iomgr/tcp_posix.h +2 -1
- data/src/core/lib/iomgr/tcp_server_posix.c +1 -1
- data/src/core/lib/iomgr/tcp_uv.c +6 -6
- data/src/core/lib/iomgr/tcp_uv.h +2 -1
- data/src/core/lib/iomgr/tcp_windows.c +1 -1
- data/src/core/lib/iomgr/timer_generic.c +24 -25
- data/src/core/lib/iomgr/timer_manager.c +276 -0
- data/src/core/lib/iomgr/timer_manager.h +52 -0
- data/src/core/lib/iomgr/timer_uv.c +6 -0
- data/src/core/lib/iomgr/udp_server.c +42 -9
- data/src/core/lib/iomgr/udp_server.h +3 -1
- data/src/core/lib/security/credentials/credentials.c +0 -1
- data/src/core/lib/security/credentials/fake/fake_credentials.c +23 -0
- data/src/core/lib/security/credentials/fake/fake_credentials.h +12 -9
- data/src/core/lib/security/credentials/google_default/google_default_credentials.c +1 -1
- data/src/core/lib/security/credentials/jwt/jwt_credentials.c +1 -1
- data/src/core/lib/security/credentials/oauth2/oauth2_credentials.c +1 -1
- data/src/core/lib/security/credentials/ssl/ssl_credentials.c +24 -53
- data/src/core/lib/security/transport/client_auth_filter.c +9 -3
- data/src/core/lib/security/transport/secure_endpoint.c +7 -7
- data/src/core/lib/security/transport/secure_endpoint.h +1 -1
- data/src/core/lib/security/transport/security_connector.c +32 -51
- data/src/core/lib/security/transport/security_connector.h +10 -14
- data/src/core/lib/slice/b64.c +1 -1
- data/src/core/lib/slice/percent_encoding.c +3 -3
- data/src/core/lib/slice/slice.c +66 -33
- data/src/core/lib/slice/slice_buffer.c +25 -6
- data/src/core/lib/slice/slice_hash_table.c +33 -35
- data/src/core/lib/slice/slice_hash_table.h +7 -12
- data/src/core/lib/support/atomic.h +45 -0
- data/src/core/lib/support/atomic_with_atm.h +70 -0
- data/src/core/lib/support/atomic_with_std.h +48 -0
- data/src/core/lib/support/avl.c +14 -14
- data/src/core/lib/support/memory.h +74 -0
- data/src/core/lib/support/mpscq.c +12 -1
- data/src/core/lib/support/mpscq.h +4 -0
- data/src/core/lib/support/stack_lockfree.c +3 -36
- data/src/core/lib/support/time_posix.c +8 -0
- data/src/core/lib/support/tmpfile_posix.c +10 -10
- data/src/core/lib/surface/alarm.c +3 -1
- data/src/core/lib/surface/api_trace.c +2 -1
- data/src/core/lib/surface/api_trace.h +2 -2
- data/src/core/lib/surface/byte_buffer_reader.c +1 -1
- data/src/core/lib/surface/call.c +65 -22
- data/src/core/lib/surface/call.h +4 -2
- data/src/core/lib/surface/channel_init.c +2 -19
- data/src/core/lib/surface/channel_stack_type.c +18 -0
- data/src/core/lib/surface/channel_stack_type.h +2 -0
- data/src/core/lib/surface/completion_queue.c +249 -83
- data/src/core/lib/surface/completion_queue.h +18 -13
- data/src/core/lib/surface/completion_queue_factory.c +24 -9
- data/src/core/lib/surface/init.c +1 -52
- data/src/core/lib/surface/{lame_client.c → lame_client.cc} +37 -26
- data/src/core/lib/surface/server.c +50 -27
- data/src/core/lib/surface/server.h +2 -1
- data/src/core/lib/surface/version.c +2 -2
- data/src/core/lib/transport/bdp_estimator.c +20 -9
- data/src/core/lib/transport/bdp_estimator.h +5 -1
- data/src/core/lib/transport/byte_stream.c +23 -9
- data/src/core/lib/transport/byte_stream.h +15 -6
- data/src/core/lib/transport/connectivity_state.c +6 -6
- data/src/core/lib/transport/connectivity_state.h +2 -1
- data/src/core/lib/transport/service_config.c +6 -13
- data/src/core/lib/transport/service_config.h +2 -2
- data/src/core/lib/transport/static_metadata.c +403 -389
- data/src/core/lib/transport/static_metadata.h +127 -114
- data/src/core/plugin_registry/grpc_plugin_registry.c +12 -0
- data/src/core/tsi/fake_transport_security.c +5 -4
- data/src/core/tsi/ssl_transport_security.c +71 -82
- data/src/core/tsi/ssl_transport_security.h +39 -61
- data/src/core/tsi/transport_security.c +83 -2
- data/src/core/tsi/transport_security.h +27 -2
- data/src/core/tsi/transport_security_adapter.c +236 -0
- data/src/core/tsi/transport_security_adapter.h +62 -0
- data/src/core/tsi/transport_security_interface.h +179 -66
- data/src/ruby/ext/grpc/extconf.rb +2 -1
- data/src/ruby/ext/grpc/rb_byte_buffer.c +8 -6
- data/src/ruby/ext/grpc/rb_call.c +56 -48
- data/src/ruby/ext/grpc/rb_call.h +3 -4
- data/src/ruby/ext/grpc/rb_call_credentials.c +23 -22
- data/src/ruby/ext/grpc/rb_channel.c +45 -29
- data/src/ruby/ext/grpc/rb_channel_args.c +11 -9
- data/src/ruby/ext/grpc/rb_channel_credentials.c +16 -12
- data/src/ruby/ext/grpc/rb_completion_queue.c +7 -9
- data/src/ruby/ext/grpc/rb_compression_options.c +7 -6
- data/src/ruby/ext/grpc/rb_event_thread.c +10 -12
- data/src/ruby/ext/grpc/rb_event_thread.h +1 -2
- data/src/ruby/ext/grpc/rb_grpc.c +11 -15
- data/src/ruby/ext/grpc/rb_grpc.h +2 -2
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +14 -6
- data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +22 -10
- data/src/ruby/ext/grpc/rb_server.c +26 -28
- data/src/ruby/lib/grpc/version.rb +1 -1
- metadata +40 -18
- data/src/ruby/lib/grpc/grpc_c.bundle +0 -0
- data/src/ruby/lib/grpc/grpc_c.so +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright 2017, Google Inc.
|
4
|
+
* All rights reserved.
|
5
|
+
*
|
6
|
+
* Redistribution and use in source and binary forms, with or without
|
7
|
+
* modification, are permitted provided that the following conditions are
|
8
|
+
* met:
|
9
|
+
*
|
10
|
+
* * Redistributions of source code must retain the above copyright
|
11
|
+
* notice, this list of conditions and the following disclaimer.
|
12
|
+
* * Redistributions in binary form must reproduce the above
|
13
|
+
* copyright notice, this list of conditions and the following disclaimer
|
14
|
+
* in the documentation and/or other materials provided with the
|
15
|
+
* distribution.
|
16
|
+
* * Neither the name of Google Inc. nor the names of its
|
17
|
+
* contributors may be used to endorse or promote products derived from
|
18
|
+
* this software without specific prior written permission.
|
19
|
+
*
|
20
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
26
|
+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
27
|
+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
28
|
+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
29
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*
|
32
|
+
*/
|
33
|
+
|
34
|
+
#ifndef GRPC_CORE_LIB_IOMGR_EV_EPOLL1_LINUX_H
|
35
|
+
#define GRPC_CORE_LIB_IOMGR_EV_EPOLL1_LINUX_H
|
36
|
+
|
37
|
+
#include "src/core/lib/iomgr/ev_posix.h"
|
38
|
+
#include "src/core/lib/iomgr/port.h"
|
39
|
+
|
40
|
+
// a polling engine that utilizes a singleton epoll set and turnstile polling
|
41
|
+
|
42
|
+
const grpc_event_engine_vtable *grpc_init_epoll1_linux(bool explicit_request);
|
43
|
+
|
44
|
+
#endif /* GRPC_CORE_LIB_IOMGR_EV_EPOLL1_LINUX_H */
|
@@ -0,0 +1,2146 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright 2017, Google Inc.
|
4
|
+
* All rights reserved.
|
5
|
+
*
|
6
|
+
* Redistribution and use in source and binary forms, with or without
|
7
|
+
* modification, are permitted provided that the following conditions are
|
8
|
+
* met:
|
9
|
+
*
|
10
|
+
* * Redistributions of source code must retain the above copyright
|
11
|
+
* notice, this list of conditions and the following disclaimer.
|
12
|
+
* * Redistributions in binary form must reproduce the above
|
13
|
+
* copyright notice, this list of conditions and the following disclaimer
|
14
|
+
* in the documentation and/or other materials provided with the
|
15
|
+
* distribution.
|
16
|
+
* * Neither the name of Google Inc. nor the names of its
|
17
|
+
* contributors may be used to endorse or promote products derived from
|
18
|
+
* this software without specific prior written permission.
|
19
|
+
*
|
20
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
26
|
+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
27
|
+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
28
|
+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
29
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*
|
32
|
+
*/
|
33
|
+
|
34
|
+
#include "src/core/lib/iomgr/port.h"
|
35
|
+
|
36
|
+
/* This polling engine is only relevant on linux kernels supporting epoll() */
|
37
|
+
#ifdef GRPC_LINUX_EPOLL
|
38
|
+
|
39
|
+
#include "src/core/lib/iomgr/ev_epoll_limited_pollers_linux.h"
|
40
|
+
|
41
|
+
#include <assert.h>
|
42
|
+
#include <errno.h>
|
43
|
+
#include <limits.h>
|
44
|
+
#include <poll.h>
|
45
|
+
#include <pthread.h>
|
46
|
+
#include <signal.h>
|
47
|
+
#include <string.h>
|
48
|
+
#include <sys/epoll.h>
|
49
|
+
#include <sys/socket.h>
|
50
|
+
#include <unistd.h>
|
51
|
+
|
52
|
+
#include <grpc/support/alloc.h>
|
53
|
+
#include <grpc/support/log.h>
|
54
|
+
#include <grpc/support/string_util.h>
|
55
|
+
#include <grpc/support/tls.h>
|
56
|
+
#include <grpc/support/useful.h>
|
57
|
+
|
58
|
+
#include "src/core/lib/debug/trace.h"
|
59
|
+
#include "src/core/lib/iomgr/ev_posix.h"
|
60
|
+
#include "src/core/lib/iomgr/iomgr_internal.h"
|
61
|
+
#include "src/core/lib/iomgr/lockfree_event.h"
|
62
|
+
#include "src/core/lib/iomgr/timer.h"
|
63
|
+
#include "src/core/lib/iomgr/wakeup_fd_posix.h"
|
64
|
+
#include "src/core/lib/iomgr/workqueue.h"
|
65
|
+
#include "src/core/lib/profiling/timers.h"
|
66
|
+
#include "src/core/lib/support/block_annotate.h"
|
67
|
+
#include "src/core/lib/support/env.h"
|
68
|
+
|
69
|
+
#define GRPC_POLLING_TRACE(fmt, ...) \
|
70
|
+
if (GRPC_TRACER_ON(grpc_polling_trace)) { \
|
71
|
+
gpr_log(GPR_INFO, (fmt), __VA_ARGS__); \
|
72
|
+
}
|
73
|
+
|
74
|
+
#define GRPC_POLLSET_KICK_BROADCAST ((grpc_pollset_worker *)1)
|
75
|
+
|
76
|
+
/* Uncomment the following to enable extra checks on poll_object operations */
|
77
|
+
/* #define PO_DEBUG */
|
78
|
+
|
79
|
+
/* The maximum number of polling threads per polling island. By default no
|
80
|
+
limit */
|
81
|
+
static int g_max_pollers_per_pi = INT_MAX;
|
82
|
+
|
83
|
+
static int grpc_wakeup_signal = -1;
|
84
|
+
static bool is_grpc_wakeup_signal_initialized = false;
|
85
|
+
|
86
|
+
/* Implements the function defined in grpc_posix.h. This function might be
|
87
|
+
* called before even calling grpc_init() to set either a different signal to
|
88
|
+
* use. If signum == -1, then the use of signals is disabled */
|
89
|
+
static void grpc_use_signal(int signum) {
|
90
|
+
grpc_wakeup_signal = signum;
|
91
|
+
is_grpc_wakeup_signal_initialized = true;
|
92
|
+
|
93
|
+
if (grpc_wakeup_signal < 0) {
|
94
|
+
gpr_log(GPR_INFO,
|
95
|
+
"Use of signals is disabled. Epoll engine will not be used");
|
96
|
+
} else {
|
97
|
+
gpr_log(GPR_INFO, "epoll engine will be using signal: %d",
|
98
|
+
grpc_wakeup_signal);
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
struct polling_island;
|
103
|
+
|
104
|
+
typedef enum {
|
105
|
+
POLL_OBJ_FD,
|
106
|
+
POLL_OBJ_POLLSET,
|
107
|
+
POLL_OBJ_POLLSET_SET
|
108
|
+
} poll_obj_type;
|
109
|
+
|
110
|
+
typedef struct poll_obj {
|
111
|
+
#ifdef PO_DEBUG
|
112
|
+
poll_obj_type obj_type;
|
113
|
+
#endif
|
114
|
+
gpr_mu mu;
|
115
|
+
struct polling_island *pi;
|
116
|
+
} poll_obj;
|
117
|
+
|
118
|
+
static const char *poll_obj_string(poll_obj_type po_type) {
|
119
|
+
switch (po_type) {
|
120
|
+
case POLL_OBJ_FD:
|
121
|
+
return "fd";
|
122
|
+
case POLL_OBJ_POLLSET:
|
123
|
+
return "pollset";
|
124
|
+
case POLL_OBJ_POLLSET_SET:
|
125
|
+
return "pollset_set";
|
126
|
+
}
|
127
|
+
|
128
|
+
GPR_UNREACHABLE_CODE(return "UNKNOWN");
|
129
|
+
}
|
130
|
+
|
131
|
+
/*******************************************************************************
|
132
|
+
* Fd Declarations
|
133
|
+
*/
|
134
|
+
|
135
|
+
#define FD_FROM_PO(po) ((grpc_fd *)(po))
|
136
|
+
|
137
|
+
struct grpc_fd {
|
138
|
+
poll_obj po;
|
139
|
+
|
140
|
+
int fd;
|
141
|
+
/* refst format:
|
142
|
+
bit 0 : 1=Active / 0=Orphaned
|
143
|
+
bits 1-n : refcount
|
144
|
+
Ref/Unref by two to avoid altering the orphaned bit */
|
145
|
+
gpr_atm refst;
|
146
|
+
|
147
|
+
/* The fd is either closed or we relinquished control of it. In either
|
148
|
+
cases, this indicates that the 'fd' on this structure is no longer
|
149
|
+
valid */
|
150
|
+
bool orphaned;
|
151
|
+
|
152
|
+
gpr_atm read_closure;
|
153
|
+
gpr_atm write_closure;
|
154
|
+
|
155
|
+
struct grpc_fd *freelist_next;
|
156
|
+
grpc_closure *on_done_closure;
|
157
|
+
|
158
|
+
/* The pollset that last noticed that the fd is readable. The actual type
|
159
|
+
* stored in this is (grpc_pollset *) */
|
160
|
+
gpr_atm read_notifier_pollset;
|
161
|
+
|
162
|
+
grpc_iomgr_object iomgr_object;
|
163
|
+
};
|
164
|
+
|
165
|
+
/* Reference counting for fds */
|
166
|
+
// #define GRPC_FD_REF_COUNT_DEBUG
|
167
|
+
#ifdef GRPC_FD_REF_COUNT_DEBUG
|
168
|
+
static void fd_ref(grpc_fd *fd, const char *reason, const char *file, int line);
|
169
|
+
static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
|
170
|
+
int line);
|
171
|
+
#define GRPC_FD_REF(fd, reason) fd_ref(fd, reason, __FILE__, __LINE__)
|
172
|
+
#define GRPC_FD_UNREF(fd, reason) fd_unref(fd, reason, __FILE__, __LINE__)
|
173
|
+
#else
|
174
|
+
static void fd_ref(grpc_fd *fd);
|
175
|
+
static void fd_unref(grpc_fd *fd);
|
176
|
+
#define GRPC_FD_REF(fd, reason) fd_ref(fd)
|
177
|
+
#define GRPC_FD_UNREF(fd, reason) fd_unref(fd)
|
178
|
+
#endif
|
179
|
+
|
180
|
+
static void fd_global_init(void);
|
181
|
+
static void fd_global_shutdown(void);
|
182
|
+
|
183
|
+
/*******************************************************************************
|
184
|
+
* Polling island Declarations
|
185
|
+
*/
|
186
|
+
|
187
|
+
#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
|
188
|
+
|
189
|
+
#define PI_ADD_REF(p, r) pi_add_ref_dbg((p), (r), __FILE__, __LINE__)
|
190
|
+
#define PI_UNREF(exec_ctx, p, r) \
|
191
|
+
pi_unref_dbg((exec_ctx), (p), (r), __FILE__, __LINE__)
|
192
|
+
|
193
|
+
#else /* defined(GRPC_WORKQUEUE_REFCOUNT_DEBUG) */
|
194
|
+
|
195
|
+
#define PI_ADD_REF(p, r) pi_add_ref((p))
|
196
|
+
#define PI_UNREF(exec_ctx, p, r) pi_unref((exec_ctx), (p))
|
197
|
+
|
198
|
+
#endif /* !defined(GRPC_PI_REF_COUNT_DEBUG) */
|
199
|
+
|
200
|
+
typedef struct worker_node {
|
201
|
+
struct worker_node *next;
|
202
|
+
struct worker_node *prev;
|
203
|
+
} worker_node;
|
204
|
+
|
205
|
+
/* This is also used as grpc_workqueue (by directly casing it) */
|
206
|
+
typedef struct polling_island {
|
207
|
+
grpc_closure_scheduler workqueue_scheduler;
|
208
|
+
|
209
|
+
gpr_mu mu;
|
210
|
+
/* Ref count. Use PI_ADD_REF() and PI_UNREF() macros to increment/decrement
|
211
|
+
the refcount.
|
212
|
+
Once the ref count becomes zero, this structure is destroyed which means
|
213
|
+
we should ensure that there is never a scenario where a PI_ADD_REF() is
|
214
|
+
racing with a PI_UNREF() that just made the ref_count zero. */
|
215
|
+
gpr_atm ref_count;
|
216
|
+
|
217
|
+
/* Pointer to the polling_island this merged into.
|
218
|
+
* merged_to value is only set once in polling_island's lifetime (and that too
|
219
|
+
* only if the island is merged with another island). Because of this, we can
|
220
|
+
* use gpr_atm type here so that we can do atomic access on this and reduce
|
221
|
+
* lock contention on 'mu' mutex.
|
222
|
+
*
|
223
|
+
* Note that if this field is not NULL (i.e not 0), all the remaining fields
|
224
|
+
* (except mu and ref_count) are invalid and must be ignored. */
|
225
|
+
gpr_atm merged_to;
|
226
|
+
|
227
|
+
/* Number of threads currently polling on this island */
|
228
|
+
gpr_atm poller_count;
|
229
|
+
/* Mutex guarding the read end of the workqueue (must be held to pop from
|
230
|
+
* workqueue_items) */
|
231
|
+
gpr_mu workqueue_read_mu;
|
232
|
+
/* Queue of closures to be executed */
|
233
|
+
gpr_mpscq workqueue_items;
|
234
|
+
/* Count of items in workqueue_items */
|
235
|
+
gpr_atm workqueue_item_count;
|
236
|
+
/* Wakeup fd used to wake pollers to check the contents of workqueue_items */
|
237
|
+
grpc_wakeup_fd workqueue_wakeup_fd;
|
238
|
+
|
239
|
+
/* The list of workers waiting to do polling on this polling island */
|
240
|
+
gpr_mu worker_list_mu;
|
241
|
+
worker_node worker_list_head;
|
242
|
+
|
243
|
+
/* The fd of the underlying epoll set */
|
244
|
+
int epoll_fd;
|
245
|
+
|
246
|
+
/* The file descriptors in the epoll set */
|
247
|
+
size_t fd_cnt;
|
248
|
+
size_t fd_capacity;
|
249
|
+
grpc_fd **fds;
|
250
|
+
} polling_island;
|
251
|
+
|
252
|
+
/*******************************************************************************
|
253
|
+
* Pollset Declarations
|
254
|
+
*/
|
255
|
+
#define WORKER_FROM_WORKER_LIST_NODE(p) \
|
256
|
+
(struct grpc_pollset_worker *)(((char *)(p)) - \
|
257
|
+
offsetof(grpc_pollset_worker, pi_list_link))
|
258
|
+
struct grpc_pollset_worker {
|
259
|
+
/* Thread id of this worker */
|
260
|
+
pthread_t pt_id;
|
261
|
+
|
262
|
+
/* Used to prevent a worker from getting kicked multiple times */
|
263
|
+
gpr_atm is_kicked;
|
264
|
+
|
265
|
+
struct grpc_pollset_worker *next;
|
266
|
+
struct grpc_pollset_worker *prev;
|
267
|
+
|
268
|
+
/* Indicates if it is this worker's turn to do epoll */
|
269
|
+
gpr_atm is_polling_turn;
|
270
|
+
|
271
|
+
/* Node in the polling island's worker list. */
|
272
|
+
worker_node pi_list_link;
|
273
|
+
};
|
274
|
+
|
275
|
+
struct grpc_pollset {
|
276
|
+
poll_obj po;
|
277
|
+
|
278
|
+
grpc_pollset_worker root_worker;
|
279
|
+
bool kicked_without_pollers;
|
280
|
+
|
281
|
+
bool shutting_down; /* Is the pollset shutting down ? */
|
282
|
+
bool finish_shutdown_called; /* Is the 'finish_shutdown_locked()' called ? */
|
283
|
+
grpc_closure *shutdown_done; /* Called after after shutdown is complete */
|
284
|
+
};
|
285
|
+
|
286
|
+
/*******************************************************************************
|
287
|
+
* Pollset-set Declarations
|
288
|
+
*/
|
289
|
+
struct grpc_pollset_set {
|
290
|
+
poll_obj po;
|
291
|
+
};
|
292
|
+
|
293
|
+
/*******************************************************************************
|
294
|
+
* Common helpers
|
295
|
+
*/
|
296
|
+
|
297
|
+
static bool append_error(grpc_error **composite, grpc_error *error,
|
298
|
+
const char *desc) {
|
299
|
+
if (error == GRPC_ERROR_NONE) return true;
|
300
|
+
if (*composite == GRPC_ERROR_NONE) {
|
301
|
+
*composite = GRPC_ERROR_CREATE_FROM_COPIED_STRING(desc);
|
302
|
+
}
|
303
|
+
*composite = grpc_error_add_child(*composite, error);
|
304
|
+
return false;
|
305
|
+
}
|
306
|
+
|
307
|
+
/*******************************************************************************
|
308
|
+
* Polling island Definitions
|
309
|
+
*/
|
310
|
+
|
311
|
+
/* The wakeup fd that is used to wake up all threads in a Polling island. This
|
312
|
+
is useful in the polling island merge operation where we need to wakeup all
|
313
|
+
the threads currently polling the smaller polling island (so that they can
|
314
|
+
start polling the new/merged polling island)
|
315
|
+
|
316
|
+
NOTE: This fd is initialized to be readable and MUST NOT be consumed i.e the
|
317
|
+
threads that woke up MUST NOT call grpc_wakeup_fd_consume_wakeup() */
|
318
|
+
static grpc_wakeup_fd polling_island_wakeup_fd;
|
319
|
+
|
320
|
+
/* The polling island being polled right now.
|
321
|
+
See comments in workqueue_maybe_wakeup for why this is tracked. */
|
322
|
+
static __thread polling_island *g_current_thread_polling_island;
|
323
|
+
|
324
|
+
/* Forward declaration */
|
325
|
+
static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi);
|
326
|
+
static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
|
327
|
+
grpc_error *error);
|
328
|
+
|
329
|
+
#ifdef GRPC_TSAN
|
330
|
+
/* Currently TSAN may incorrectly flag data races between epoll_ctl and
|
331
|
+
epoll_wait for any grpc_fd structs that are added to the epoll set via
|
332
|
+
epoll_ctl and are returned (within a very short window) via epoll_wait().
|
333
|
+
|
334
|
+
To work-around this race, we establish a happens-before relation between
|
335
|
+
the code just-before epoll_ctl() and the code after epoll_wait() by using
|
336
|
+
this atomic */
|
337
|
+
gpr_atm g_epoll_sync;
|
338
|
+
#endif /* defined(GRPC_TSAN) */
|
339
|
+
|
340
|
+
static const grpc_closure_scheduler_vtable workqueue_scheduler_vtable = {
|
341
|
+
workqueue_enqueue, workqueue_enqueue, "workqueue"};
|
342
|
+
|
343
|
+
static void pi_add_ref(polling_island *pi);
|
344
|
+
static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi);
|
345
|
+
|
346
|
+
#ifdef GRPC_WORKQUEUE_REFCOUNT_DEBUG
|
347
|
+
static void pi_add_ref_dbg(polling_island *pi, const char *reason,
|
348
|
+
const char *file, int line) {
|
349
|
+
long old_cnt = gpr_atm_acq_load(&pi->ref_count);
|
350
|
+
pi_add_ref(pi);
|
351
|
+
gpr_log(GPR_DEBUG, "Add ref pi: %p, old: %ld -> new:%ld (%s) - (%s, %d)",
|
352
|
+
(void *)pi, old_cnt, old_cnt + 1, reason, file, line);
|
353
|
+
}
|
354
|
+
|
355
|
+
static void pi_unref_dbg(grpc_exec_ctx *exec_ctx, polling_island *pi,
|
356
|
+
const char *reason, const char *file, int line) {
|
357
|
+
long old_cnt = gpr_atm_acq_load(&pi->ref_count);
|
358
|
+
pi_unref(exec_ctx, pi);
|
359
|
+
gpr_log(GPR_DEBUG, "Unref pi: %p, old:%ld -> new:%ld (%s) - (%s, %d)",
|
360
|
+
(void *)pi, old_cnt, (old_cnt - 1), reason, file, line);
|
361
|
+
}
|
362
|
+
|
363
|
+
static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue,
|
364
|
+
const char *file, int line,
|
365
|
+
const char *reason) {
|
366
|
+
if (workqueue != NULL) {
|
367
|
+
pi_add_ref_dbg((polling_island *)workqueue, reason, file, line);
|
368
|
+
}
|
369
|
+
return workqueue;
|
370
|
+
}
|
371
|
+
|
372
|
+
static void workqueue_unref(grpc_exec_ctx *exec_ctx, grpc_workqueue *workqueue,
|
373
|
+
const char *file, int line, const char *reason) {
|
374
|
+
if (workqueue != NULL) {
|
375
|
+
pi_unref_dbg(exec_ctx, (polling_island *)workqueue, reason, file, line);
|
376
|
+
}
|
377
|
+
}
|
378
|
+
#else
|
379
|
+
static grpc_workqueue *workqueue_ref(grpc_workqueue *workqueue) {
|
380
|
+
if (workqueue != NULL) {
|
381
|
+
pi_add_ref((polling_island *)workqueue);
|
382
|
+
}
|
383
|
+
return workqueue;
|
384
|
+
}
|
385
|
+
|
386
|
+
static void workqueue_unref(grpc_exec_ctx *exec_ctx,
|
387
|
+
grpc_workqueue *workqueue) {
|
388
|
+
if (workqueue != NULL) {
|
389
|
+
pi_unref(exec_ctx, (polling_island *)workqueue);
|
390
|
+
}
|
391
|
+
}
|
392
|
+
#endif
|
393
|
+
|
394
|
+
static void pi_add_ref(polling_island *pi) {
|
395
|
+
gpr_atm_no_barrier_fetch_add(&pi->ref_count, 1);
|
396
|
+
}
|
397
|
+
|
398
|
+
static void pi_unref(grpc_exec_ctx *exec_ctx, polling_island *pi) {
|
399
|
+
/* If ref count went to zero, delete the polling island.
|
400
|
+
Note that this deletion not be done under a lock. Once the ref count goes
|
401
|
+
to zero, we are guaranteed that no one else holds a reference to the
|
402
|
+
polling island (and that there is no racing pi_add_ref() call either).
|
403
|
+
|
404
|
+
Also, if we are deleting the polling island and the merged_to field is
|
405
|
+
non-empty, we should remove a ref to the merged_to polling island
|
406
|
+
*/
|
407
|
+
if (1 == gpr_atm_full_fetch_add(&pi->ref_count, -1)) {
|
408
|
+
polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
|
409
|
+
polling_island_delete(exec_ctx, pi);
|
410
|
+
if (next != NULL) {
|
411
|
+
PI_UNREF(exec_ctx, next, "pi_delete"); /* Recursive call */
|
412
|
+
}
|
413
|
+
}
|
414
|
+
}
|
415
|
+
|
416
|
+
static void worker_node_init(worker_node *node) {
|
417
|
+
node->next = node->prev = node;
|
418
|
+
}
|
419
|
+
|
420
|
+
/* Not thread safe. Do under a list-level lock */
|
421
|
+
static void push_back_worker_node(worker_node *head, worker_node *node) {
|
422
|
+
node->next = head;
|
423
|
+
node->prev = head->prev;
|
424
|
+
head->prev->next = node;
|
425
|
+
head->prev = node;
|
426
|
+
}
|
427
|
+
|
428
|
+
/* Not thread safe. Do under a list-level lock */
|
429
|
+
static void remove_worker_node(worker_node *node) {
|
430
|
+
node->next->prev = node->prev;
|
431
|
+
node->prev->next = node->next;
|
432
|
+
/* If node's next and prev point to itself, the node is considered detached
|
433
|
+
* from the list*/
|
434
|
+
node->next = node->prev = node;
|
435
|
+
}
|
436
|
+
|
437
|
+
/* Not thread safe. Do under a list-level lock */
|
438
|
+
static worker_node *pop_front_worker_node(worker_node *head) {
|
439
|
+
worker_node *node = head->next;
|
440
|
+
if (node != head) {
|
441
|
+
remove_worker_node(node);
|
442
|
+
} else {
|
443
|
+
node = NULL;
|
444
|
+
}
|
445
|
+
|
446
|
+
return node;
|
447
|
+
}
|
448
|
+
|
449
|
+
/* Returns true if the node's next and prev are pointing to itself (which
|
450
|
+
indicates that the node is not in the list */
|
451
|
+
static bool is_worker_node_detached(worker_node *node) {
|
452
|
+
return (node->next == node->prev && node->next == node);
|
453
|
+
}
|
454
|
+
|
455
|
+
/* The caller is expected to hold pi->mu lock before calling this function
|
456
|
+
*/
|
457
|
+
static void polling_island_add_fds_locked(polling_island *pi, grpc_fd **fds,
|
458
|
+
size_t fd_count, bool add_fd_refs,
|
459
|
+
grpc_error **error) {
|
460
|
+
int err;
|
461
|
+
size_t i;
|
462
|
+
struct epoll_event ev;
|
463
|
+
char *err_msg;
|
464
|
+
const char *err_desc = "polling_island_add_fds";
|
465
|
+
|
466
|
+
#ifdef GRPC_TSAN
|
467
|
+
/* See the definition of g_epoll_sync for more context */
|
468
|
+
gpr_atm_rel_store(&g_epoll_sync, (gpr_atm)0);
|
469
|
+
#endif /* defined(GRPC_TSAN) */
|
470
|
+
|
471
|
+
for (i = 0; i < fd_count; i++) {
|
472
|
+
ev.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET);
|
473
|
+
ev.data.ptr = fds[i];
|
474
|
+
err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD, fds[i]->fd, &ev);
|
475
|
+
|
476
|
+
if (err < 0) {
|
477
|
+
if (errno != EEXIST) {
|
478
|
+
gpr_asprintf(
|
479
|
+
&err_msg,
|
480
|
+
"epoll_ctl (epoll_fd: %d) add fd: %d failed with error: %d (%s)",
|
481
|
+
pi->epoll_fd, fds[i]->fd, errno, strerror(errno));
|
482
|
+
append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
|
483
|
+
gpr_free(err_msg);
|
484
|
+
}
|
485
|
+
|
486
|
+
continue;
|
487
|
+
}
|
488
|
+
|
489
|
+
if (pi->fd_cnt == pi->fd_capacity) {
|
490
|
+
pi->fd_capacity = GPR_MAX(pi->fd_capacity + 8, pi->fd_cnt * 3 / 2);
|
491
|
+
pi->fds = gpr_realloc(pi->fds, sizeof(grpc_fd *) * pi->fd_capacity);
|
492
|
+
}
|
493
|
+
|
494
|
+
pi->fds[pi->fd_cnt++] = fds[i];
|
495
|
+
if (add_fd_refs) {
|
496
|
+
GRPC_FD_REF(fds[i], "polling_island");
|
497
|
+
}
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
501
|
+
/* The caller is expected to hold pi->mu before calling this */
|
502
|
+
static void polling_island_add_wakeup_fd_locked(polling_island *pi,
|
503
|
+
grpc_wakeup_fd *wakeup_fd,
|
504
|
+
grpc_error **error) {
|
505
|
+
struct epoll_event ev;
|
506
|
+
int err;
|
507
|
+
char *err_msg;
|
508
|
+
const char *err_desc = "polling_island_add_wakeup_fd";
|
509
|
+
|
510
|
+
ev.events = (uint32_t)(EPOLLIN | EPOLLET);
|
511
|
+
ev.data.ptr = wakeup_fd;
|
512
|
+
err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_ADD,
|
513
|
+
GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), &ev);
|
514
|
+
if (err < 0 && errno != EEXIST) {
|
515
|
+
gpr_asprintf(&err_msg,
|
516
|
+
"epoll_ctl (epoll_fd: %d) add wakeup fd: %d failed with "
|
517
|
+
"error: %d (%s)",
|
518
|
+
pi->epoll_fd, GRPC_WAKEUP_FD_GET_READ_FD(wakeup_fd), errno,
|
519
|
+
strerror(errno));
|
520
|
+
append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
|
521
|
+
gpr_free(err_msg);
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
/* The caller is expected to hold pi->mu lock before calling this function */
|
526
|
+
static void polling_island_remove_all_fds_locked(polling_island *pi,
|
527
|
+
bool remove_fd_refs,
|
528
|
+
grpc_error **error) {
|
529
|
+
int err;
|
530
|
+
size_t i;
|
531
|
+
char *err_msg;
|
532
|
+
const char *err_desc = "polling_island_remove_fds";
|
533
|
+
|
534
|
+
for (i = 0; i < pi->fd_cnt; i++) {
|
535
|
+
err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, pi->fds[i]->fd, NULL);
|
536
|
+
if (err < 0 && errno != ENOENT) {
|
537
|
+
gpr_asprintf(&err_msg,
|
538
|
+
"epoll_ctl (epoll_fd: %d) delete fds[%zu]: %d failed with "
|
539
|
+
"error: %d (%s)",
|
540
|
+
pi->epoll_fd, i, pi->fds[i]->fd, errno, strerror(errno));
|
541
|
+
append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
|
542
|
+
gpr_free(err_msg);
|
543
|
+
}
|
544
|
+
|
545
|
+
if (remove_fd_refs) {
|
546
|
+
GRPC_FD_UNREF(pi->fds[i], "polling_island");
|
547
|
+
}
|
548
|
+
}
|
549
|
+
|
550
|
+
pi->fd_cnt = 0;
|
551
|
+
}
|
552
|
+
|
553
|
+
/* The caller is expected to hold pi->mu lock before calling this function */
|
554
|
+
static void polling_island_remove_fd_locked(polling_island *pi, grpc_fd *fd,
|
555
|
+
bool is_fd_closed,
|
556
|
+
grpc_error **error) {
|
557
|
+
int err;
|
558
|
+
size_t i;
|
559
|
+
char *err_msg;
|
560
|
+
const char *err_desc = "polling_island_remove_fd";
|
561
|
+
|
562
|
+
/* If fd is already closed, then it would have been automatically been removed
|
563
|
+
from the epoll set */
|
564
|
+
if (!is_fd_closed) {
|
565
|
+
err = epoll_ctl(pi->epoll_fd, EPOLL_CTL_DEL, fd->fd, NULL);
|
566
|
+
if (err < 0 && errno != ENOENT) {
|
567
|
+
gpr_asprintf(
|
568
|
+
&err_msg,
|
569
|
+
"epoll_ctl (epoll_fd: %d) del fd: %d failed with error: %d (%s)",
|
570
|
+
pi->epoll_fd, fd->fd, errno, strerror(errno));
|
571
|
+
append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
|
572
|
+
gpr_free(err_msg);
|
573
|
+
}
|
574
|
+
}
|
575
|
+
|
576
|
+
for (i = 0; i < pi->fd_cnt; i++) {
|
577
|
+
if (pi->fds[i] == fd) {
|
578
|
+
pi->fds[i] = pi->fds[--pi->fd_cnt];
|
579
|
+
GRPC_FD_UNREF(fd, "polling_island");
|
580
|
+
break;
|
581
|
+
}
|
582
|
+
}
|
583
|
+
}
|
584
|
+
|
585
|
+
/* Might return NULL in case of an error */
|
586
|
+
static polling_island *polling_island_create(grpc_exec_ctx *exec_ctx,
|
587
|
+
grpc_fd *initial_fd,
|
588
|
+
grpc_error **error) {
|
589
|
+
polling_island *pi = NULL;
|
590
|
+
const char *err_desc = "polling_island_create";
|
591
|
+
|
592
|
+
*error = GRPC_ERROR_NONE;
|
593
|
+
|
594
|
+
pi = gpr_malloc(sizeof(*pi));
|
595
|
+
pi->workqueue_scheduler.vtable = &workqueue_scheduler_vtable;
|
596
|
+
gpr_mu_init(&pi->mu);
|
597
|
+
pi->fd_cnt = 0;
|
598
|
+
pi->fd_capacity = 0;
|
599
|
+
pi->fds = NULL;
|
600
|
+
pi->epoll_fd = -1;
|
601
|
+
|
602
|
+
gpr_mu_init(&pi->workqueue_read_mu);
|
603
|
+
gpr_mpscq_init(&pi->workqueue_items);
|
604
|
+
gpr_atm_rel_store(&pi->workqueue_item_count, 0);
|
605
|
+
|
606
|
+
gpr_atm_rel_store(&pi->ref_count, 0);
|
607
|
+
gpr_atm_rel_store(&pi->poller_count, 0);
|
608
|
+
gpr_atm_rel_store(&pi->merged_to, (gpr_atm)NULL);
|
609
|
+
|
610
|
+
gpr_mu_init(&pi->worker_list_mu);
|
611
|
+
worker_node_init(&pi->worker_list_head);
|
612
|
+
|
613
|
+
if (!append_error(error, grpc_wakeup_fd_init(&pi->workqueue_wakeup_fd),
|
614
|
+
err_desc)) {
|
615
|
+
goto done;
|
616
|
+
}
|
617
|
+
|
618
|
+
pi->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
|
619
|
+
|
620
|
+
if (pi->epoll_fd < 0) {
|
621
|
+
append_error(error, GRPC_OS_ERROR(errno, "epoll_create1"), err_desc);
|
622
|
+
goto done;
|
623
|
+
}
|
624
|
+
|
625
|
+
polling_island_add_wakeup_fd_locked(pi, &pi->workqueue_wakeup_fd, error);
|
626
|
+
|
627
|
+
if (initial_fd != NULL) {
|
628
|
+
polling_island_add_fds_locked(pi, &initial_fd, 1, true, error);
|
629
|
+
}
|
630
|
+
|
631
|
+
done:
|
632
|
+
if (*error != GRPC_ERROR_NONE) {
|
633
|
+
polling_island_delete(exec_ctx, pi);
|
634
|
+
pi = NULL;
|
635
|
+
}
|
636
|
+
return pi;
|
637
|
+
}
|
638
|
+
|
639
|
+
static void polling_island_delete(grpc_exec_ctx *exec_ctx, polling_island *pi) {
|
640
|
+
GPR_ASSERT(pi->fd_cnt == 0);
|
641
|
+
|
642
|
+
if (pi->epoll_fd >= 0) {
|
643
|
+
close(pi->epoll_fd);
|
644
|
+
}
|
645
|
+
GPR_ASSERT(gpr_atm_no_barrier_load(&pi->workqueue_item_count) == 0);
|
646
|
+
gpr_mu_destroy(&pi->workqueue_read_mu);
|
647
|
+
gpr_mpscq_destroy(&pi->workqueue_items);
|
648
|
+
gpr_mu_destroy(&pi->mu);
|
649
|
+
grpc_wakeup_fd_destroy(&pi->workqueue_wakeup_fd);
|
650
|
+
gpr_mu_destroy(&pi->worker_list_mu);
|
651
|
+
GPR_ASSERT(is_worker_node_detached(&pi->worker_list_head));
|
652
|
+
|
653
|
+
gpr_free(pi->fds);
|
654
|
+
gpr_free(pi);
|
655
|
+
}
|
656
|
+
|
657
|
+
/* Attempts to gets the last polling island in the linked list (liked by the
|
658
|
+
* 'merged_to' field). Since this does not lock the polling island, there are no
|
659
|
+
* guarantees that the island returned is the last island */
|
660
|
+
static polling_island *polling_island_maybe_get_latest(polling_island *pi) {
|
661
|
+
polling_island *next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
|
662
|
+
while (next != NULL) {
|
663
|
+
pi = next;
|
664
|
+
next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
|
665
|
+
}
|
666
|
+
|
667
|
+
return pi;
|
668
|
+
}
|
669
|
+
|
670
|
+
/* Gets the lock on the *latest* polling island i.e the last polling island in
|
671
|
+
the linked list (linked by the 'merged_to' field). Call gpr_mu_unlock on the
|
672
|
+
returned polling island's mu.
|
673
|
+
Usage: To lock/unlock polling island "pi", do the following:
|
674
|
+
polling_island *pi_latest = polling_island_lock(pi);
|
675
|
+
...
|
676
|
+
... critical section ..
|
677
|
+
...
|
678
|
+
gpr_mu_unlock(&pi_latest->mu); // NOTE: use pi_latest->mu. NOT pi->mu */
|
679
|
+
static polling_island *polling_island_lock(polling_island *pi) {
|
680
|
+
polling_island *next = NULL;
|
681
|
+
|
682
|
+
while (true) {
|
683
|
+
next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
|
684
|
+
if (next == NULL) {
|
685
|
+
/* Looks like 'pi' is the last node in the linked list but unless we check
|
686
|
+
this by holding the pi->mu lock, we cannot be sure (i.e without the
|
687
|
+
pi->mu lock, we don't prevent island merges).
|
688
|
+
To be absolutely sure, check once more by holding the pi->mu lock */
|
689
|
+
gpr_mu_lock(&pi->mu);
|
690
|
+
next = (polling_island *)gpr_atm_acq_load(&pi->merged_to);
|
691
|
+
if (next == NULL) {
|
692
|
+
/* pi is infact the last node and we have the pi->mu lock. we're done */
|
693
|
+
break;
|
694
|
+
}
|
695
|
+
|
696
|
+
/* pi->merged_to is not NULL i.e pi isn't the last node anymore. pi->mu
|
697
|
+
* isn't the lock we are interested in. Continue traversing the list */
|
698
|
+
gpr_mu_unlock(&pi->mu);
|
699
|
+
}
|
700
|
+
|
701
|
+
pi = next;
|
702
|
+
}
|
703
|
+
|
704
|
+
return pi;
|
705
|
+
}
|
706
|
+
|
707
|
+
/* Gets the lock on the *latest* polling islands in the linked lists pointed by
|
708
|
+
*p and *q (and also updates *p and *q to point to the latest polling islands)
|
709
|
+
|
710
|
+
This function is needed because calling the following block of code to obtain
|
711
|
+
locks on polling islands (*p and *q) is prone to deadlocks.
|
712
|
+
{
|
713
|
+
polling_island_lock(*p, true);
|
714
|
+
polling_island_lock(*q, true);
|
715
|
+
}
|
716
|
+
|
717
|
+
Usage/example:
|
718
|
+
polling_island *p1;
|
719
|
+
polling_island *p2;
|
720
|
+
..
|
721
|
+
polling_island_lock_pair(&p1, &p2);
|
722
|
+
..
|
723
|
+
.. Critical section with both p1 and p2 locked
|
724
|
+
..
|
725
|
+
// Release locks: Always call polling_island_unlock_pair() to release locks
|
726
|
+
polling_island_unlock_pair(p1, p2);
|
727
|
+
*/
|
728
|
+
static void polling_island_lock_pair(polling_island **p, polling_island **q) {
|
729
|
+
polling_island *pi_1 = *p;
|
730
|
+
polling_island *pi_2 = *q;
|
731
|
+
polling_island *next_1 = NULL;
|
732
|
+
polling_island *next_2 = NULL;
|
733
|
+
|
734
|
+
/* The algorithm is simple:
|
735
|
+
- Go to the last polling islands in the linked lists *pi_1 and *pi_2 (and
|
736
|
+
keep updating pi_1 and pi_2)
|
737
|
+
- Then obtain locks on the islands by following a lock order rule of
|
738
|
+
locking polling_island with lower address first
|
739
|
+
Special case: Before obtaining the locks, check if pi_1 and pi_2 are
|
740
|
+
pointing to the same island. If that is the case, we can just call
|
741
|
+
polling_island_lock()
|
742
|
+
- After obtaining both the locks, double check that the polling islands
|
743
|
+
are still the last polling islands in their respective linked lists
|
744
|
+
(this is because there might have been polling island merges before
|
745
|
+
we got the lock)
|
746
|
+
- If the polling islands are the last islands, we are done. If not,
|
747
|
+
release the locks and continue the process from the first step */
|
748
|
+
while (true) {
|
749
|
+
next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
|
750
|
+
while (next_1 != NULL) {
|
751
|
+
pi_1 = next_1;
|
752
|
+
next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
|
753
|
+
}
|
754
|
+
|
755
|
+
next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
|
756
|
+
while (next_2 != NULL) {
|
757
|
+
pi_2 = next_2;
|
758
|
+
next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
|
759
|
+
}
|
760
|
+
|
761
|
+
if (pi_1 == pi_2) {
|
762
|
+
pi_1 = pi_2 = polling_island_lock(pi_1);
|
763
|
+
break;
|
764
|
+
}
|
765
|
+
|
766
|
+
if (pi_1 < pi_2) {
|
767
|
+
gpr_mu_lock(&pi_1->mu);
|
768
|
+
gpr_mu_lock(&pi_2->mu);
|
769
|
+
} else {
|
770
|
+
gpr_mu_lock(&pi_2->mu);
|
771
|
+
gpr_mu_lock(&pi_1->mu);
|
772
|
+
}
|
773
|
+
|
774
|
+
next_1 = (polling_island *)gpr_atm_acq_load(&pi_1->merged_to);
|
775
|
+
next_2 = (polling_island *)gpr_atm_acq_load(&pi_2->merged_to);
|
776
|
+
if (next_1 == NULL && next_2 == NULL) {
|
777
|
+
break;
|
778
|
+
}
|
779
|
+
|
780
|
+
gpr_mu_unlock(&pi_1->mu);
|
781
|
+
gpr_mu_unlock(&pi_2->mu);
|
782
|
+
}
|
783
|
+
|
784
|
+
*p = pi_1;
|
785
|
+
*q = pi_2;
|
786
|
+
}
|
787
|
+
|
788
|
+
static void polling_island_unlock_pair(polling_island *p, polling_island *q) {
|
789
|
+
if (p == q) {
|
790
|
+
gpr_mu_unlock(&p->mu);
|
791
|
+
} else {
|
792
|
+
gpr_mu_unlock(&p->mu);
|
793
|
+
gpr_mu_unlock(&q->mu);
|
794
|
+
}
|
795
|
+
}
|
796
|
+
|
797
|
+
static void workqueue_maybe_wakeup(polling_island *pi) {
|
798
|
+
/* If this thread is the current poller, then it may be that it's about to
|
799
|
+
decrement the current poller count, so we need to look past this thread */
|
800
|
+
bool is_current_poller = (g_current_thread_polling_island == pi);
|
801
|
+
gpr_atm min_current_pollers_for_wakeup = is_current_poller ? 1 : 0;
|
802
|
+
gpr_atm current_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
|
803
|
+
/* Only issue a wakeup if it's likely that some poller could come in and take
|
804
|
+
it right now. Note that since we do an anticipatory mpscq_pop every poll
|
805
|
+
loop, it's ok if we miss the wakeup here, as we'll get the work item when
|
806
|
+
the next poller enters anyway. */
|
807
|
+
if (current_pollers > min_current_pollers_for_wakeup) {
|
808
|
+
GRPC_LOG_IF_ERROR("workqueue_wakeup_fd",
|
809
|
+
grpc_wakeup_fd_wakeup(&pi->workqueue_wakeup_fd));
|
810
|
+
}
|
811
|
+
}
|
812
|
+
|
813
|
+
static void workqueue_move_items_to_parent(polling_island *q) {
|
814
|
+
polling_island *p = (polling_island *)gpr_atm_no_barrier_load(&q->merged_to);
|
815
|
+
if (p == NULL) {
|
816
|
+
return;
|
817
|
+
}
|
818
|
+
gpr_mu_lock(&q->workqueue_read_mu);
|
819
|
+
int num_added = 0;
|
820
|
+
while (gpr_atm_no_barrier_load(&q->workqueue_item_count) > 0) {
|
821
|
+
gpr_mpscq_node *n = gpr_mpscq_pop(&q->workqueue_items);
|
822
|
+
if (n != NULL) {
|
823
|
+
gpr_atm_no_barrier_fetch_add(&q->workqueue_item_count, -1);
|
824
|
+
gpr_atm_no_barrier_fetch_add(&p->workqueue_item_count, 1);
|
825
|
+
gpr_mpscq_push(&p->workqueue_items, n);
|
826
|
+
num_added++;
|
827
|
+
}
|
828
|
+
}
|
829
|
+
gpr_mu_unlock(&q->workqueue_read_mu);
|
830
|
+
if (num_added > 0) {
|
831
|
+
workqueue_maybe_wakeup(p);
|
832
|
+
}
|
833
|
+
workqueue_move_items_to_parent(p);
|
834
|
+
}
|
835
|
+
|
836
|
+
static polling_island *polling_island_merge(polling_island *p,
|
837
|
+
polling_island *q,
|
838
|
+
grpc_error **error) {
|
839
|
+
/* Get locks on both the polling islands */
|
840
|
+
polling_island_lock_pair(&p, &q);
|
841
|
+
|
842
|
+
if (p != q) {
|
843
|
+
/* Make sure that p points to the polling island with fewer fds than q */
|
844
|
+
if (p->fd_cnt > q->fd_cnt) {
|
845
|
+
GPR_SWAP(polling_island *, p, q);
|
846
|
+
}
|
847
|
+
|
848
|
+
/* Merge p with q i.e move all the fds from p (The one with fewer fds) to q
|
849
|
+
Note that the refcounts on the fds being moved will not change here.
|
850
|
+
This is why the last param in the following two functions is 'false') */
|
851
|
+
polling_island_add_fds_locked(q, p->fds, p->fd_cnt, false, error);
|
852
|
+
polling_island_remove_all_fds_locked(p, false, error);
|
853
|
+
|
854
|
+
/* Wakeup all the pollers (if any) on p so that they pickup this change */
|
855
|
+
polling_island_add_wakeup_fd_locked(p, &polling_island_wakeup_fd, error);
|
856
|
+
|
857
|
+
/* Add the 'merged_to' link from p --> q */
|
858
|
+
gpr_atm_rel_store(&p->merged_to, (gpr_atm)q);
|
859
|
+
PI_ADD_REF(q, "pi_merge"); /* To account for the new incoming ref from p */
|
860
|
+
|
861
|
+
workqueue_move_items_to_parent(p);
|
862
|
+
}
|
863
|
+
/* else if p == q, nothing needs to be done */
|
864
|
+
|
865
|
+
polling_island_unlock_pair(p, q);
|
866
|
+
|
867
|
+
/* Return the merged polling island (Note that no merge would have happened
|
868
|
+
if p == q which is ok) */
|
869
|
+
return q;
|
870
|
+
}
|
871
|
+
|
872
|
+
static void workqueue_enqueue(grpc_exec_ctx *exec_ctx, grpc_closure *closure,
|
873
|
+
grpc_error *error) {
|
874
|
+
GPR_TIMER_BEGIN("workqueue.enqueue", 0);
|
875
|
+
grpc_workqueue *workqueue = (grpc_workqueue *)closure->scheduler;
|
876
|
+
/* take a ref to the workqueue: otherwise it can happen that whatever events
|
877
|
+
* this kicks off ends up destroying the workqueue before this function
|
878
|
+
* completes */
|
879
|
+
GRPC_WORKQUEUE_REF(workqueue, "enqueue");
|
880
|
+
polling_island *pi = (polling_island *)workqueue;
|
881
|
+
gpr_atm last = gpr_atm_no_barrier_fetch_add(&pi->workqueue_item_count, 1);
|
882
|
+
closure->error_data.error = error;
|
883
|
+
gpr_mpscq_push(&pi->workqueue_items, &closure->next_data.atm_next);
|
884
|
+
if (last == 0) {
|
885
|
+
workqueue_maybe_wakeup(pi);
|
886
|
+
}
|
887
|
+
workqueue_move_items_to_parent(pi);
|
888
|
+
GRPC_WORKQUEUE_UNREF(exec_ctx, workqueue, "enqueue");
|
889
|
+
GPR_TIMER_END("workqueue.enqueue", 0);
|
890
|
+
}
|
891
|
+
|
892
|
+
static grpc_closure_scheduler *workqueue_scheduler(grpc_workqueue *workqueue) {
|
893
|
+
polling_island *pi = (polling_island *)workqueue;
|
894
|
+
return workqueue == NULL ? grpc_schedule_on_exec_ctx
|
895
|
+
: &pi->workqueue_scheduler;
|
896
|
+
}
|
897
|
+
|
898
|
+
static grpc_error *polling_island_global_init() {
|
899
|
+
grpc_error *error = GRPC_ERROR_NONE;
|
900
|
+
|
901
|
+
error = grpc_wakeup_fd_init(&polling_island_wakeup_fd);
|
902
|
+
if (error == GRPC_ERROR_NONE) {
|
903
|
+
error = grpc_wakeup_fd_wakeup(&polling_island_wakeup_fd);
|
904
|
+
}
|
905
|
+
|
906
|
+
return error;
|
907
|
+
}
|
908
|
+
|
909
|
+
static void polling_island_global_shutdown() {
|
910
|
+
grpc_wakeup_fd_destroy(&polling_island_wakeup_fd);
|
911
|
+
}
|
912
|
+
|
913
|
+
/*******************************************************************************
|
914
|
+
* Fd Definitions
|
915
|
+
*/
|
916
|
+
|
917
|
+
/* We need to keep a freelist not because of any concerns of malloc performance
|
918
|
+
* but instead so that implementations with multiple threads in (for example)
|
919
|
+
* epoll_wait deal with the race between pollset removal and incoming poll
|
920
|
+
* notifications.
|
921
|
+
*
|
922
|
+
* The problem is that the poller ultimately holds a reference to this
|
923
|
+
* object, so it is very difficult to know when is safe to free it, at least
|
924
|
+
* without some expensive synchronization.
|
925
|
+
*
|
926
|
+
* If we keep the object freelisted, in the worst case losing this race just
|
927
|
+
* becomes a spurious read notification on a reused fd.
|
928
|
+
*/
|
929
|
+
|
930
|
+
/* The alarm system needs to be able to wakeup 'some poller' sometimes
|
931
|
+
* (specifically when a new alarm needs to be triggered earlier than the next
|
932
|
+
* alarm 'epoch'). This wakeup_fd gives us something to alert on when such a
|
933
|
+
* case occurs. */
|
934
|
+
|
935
|
+
static grpc_fd *fd_freelist = NULL;
|
936
|
+
static gpr_mu fd_freelist_mu;
|
937
|
+
|
938
|
+
#ifdef GRPC_FD_REF_COUNT_DEBUG
|
939
|
+
#define REF_BY(fd, n, reason) ref_by(fd, n, reason, __FILE__, __LINE__)
|
940
|
+
#define UNREF_BY(fd, n, reason) unref_by(fd, n, reason, __FILE__, __LINE__)
|
941
|
+
static void ref_by(grpc_fd *fd, int n, const char *reason, const char *file,
|
942
|
+
int line) {
|
943
|
+
gpr_log(GPR_DEBUG, "FD %d %p ref %d %ld -> %ld [%s; %s:%d]", fd->fd,
|
944
|
+
(void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
|
945
|
+
gpr_atm_no_barrier_load(&fd->refst) + n, reason, file, line);
|
946
|
+
#else
|
947
|
+
#define REF_BY(fd, n, reason) ref_by(fd, n)
|
948
|
+
#define UNREF_BY(fd, n, reason) unref_by(fd, n)
|
949
|
+
static void ref_by(grpc_fd *fd, int n) {
|
950
|
+
#endif
|
951
|
+
GPR_ASSERT(gpr_atm_no_barrier_fetch_add(&fd->refst, n) > 0);
|
952
|
+
}
|
953
|
+
|
954
|
+
#ifdef GRPC_FD_REF_COUNT_DEBUG
|
955
|
+
static void unref_by(grpc_fd *fd, int n, const char *reason, const char *file,
|
956
|
+
int line) {
|
957
|
+
gpr_atm old;
|
958
|
+
gpr_log(GPR_DEBUG, "FD %d %p unref %d %ld -> %ld [%s; %s:%d]", fd->fd,
|
959
|
+
(void *)fd, n, gpr_atm_no_barrier_load(&fd->refst),
|
960
|
+
gpr_atm_no_barrier_load(&fd->refst) - n, reason, file, line);
|
961
|
+
#else
|
962
|
+
static void unref_by(grpc_fd *fd, int n) {
|
963
|
+
gpr_atm old;
|
964
|
+
#endif
|
965
|
+
old = gpr_atm_full_fetch_add(&fd->refst, -n);
|
966
|
+
if (old == n) {
|
967
|
+
/* Add the fd to the freelist */
|
968
|
+
gpr_mu_lock(&fd_freelist_mu);
|
969
|
+
fd->freelist_next = fd_freelist;
|
970
|
+
fd_freelist = fd;
|
971
|
+
grpc_iomgr_unregister_object(&fd->iomgr_object);
|
972
|
+
|
973
|
+
grpc_lfev_destroy(&fd->read_closure);
|
974
|
+
grpc_lfev_destroy(&fd->write_closure);
|
975
|
+
|
976
|
+
gpr_mu_unlock(&fd_freelist_mu);
|
977
|
+
} else {
|
978
|
+
GPR_ASSERT(old > n);
|
979
|
+
}
|
980
|
+
}
|
981
|
+
|
982
|
+
/* Increment refcount by two to avoid changing the orphan bit */
|
983
|
+
#ifdef GRPC_FD_REF_COUNT_DEBUG
|
984
|
+
static void fd_ref(grpc_fd *fd, const char *reason, const char *file,
|
985
|
+
int line) {
|
986
|
+
ref_by(fd, 2, reason, file, line);
|
987
|
+
}
|
988
|
+
|
989
|
+
static void fd_unref(grpc_fd *fd, const char *reason, const char *file,
|
990
|
+
int line) {
|
991
|
+
unref_by(fd, 2, reason, file, line);
|
992
|
+
}
|
993
|
+
#else
|
994
|
+
static void fd_ref(grpc_fd *fd) { ref_by(fd, 2); }
|
995
|
+
static void fd_unref(grpc_fd *fd) { unref_by(fd, 2); }
|
996
|
+
#endif
|
997
|
+
|
998
|
+
static void fd_global_init(void) { gpr_mu_init(&fd_freelist_mu); }
|
999
|
+
|
1000
|
+
static void fd_global_shutdown(void) {
|
1001
|
+
gpr_mu_lock(&fd_freelist_mu);
|
1002
|
+
gpr_mu_unlock(&fd_freelist_mu);
|
1003
|
+
while (fd_freelist != NULL) {
|
1004
|
+
grpc_fd *fd = fd_freelist;
|
1005
|
+
fd_freelist = fd_freelist->freelist_next;
|
1006
|
+
gpr_mu_destroy(&fd->po.mu);
|
1007
|
+
gpr_free(fd);
|
1008
|
+
}
|
1009
|
+
gpr_mu_destroy(&fd_freelist_mu);
|
1010
|
+
}
|
1011
|
+
|
1012
|
+
static grpc_fd *fd_create(int fd, const char *name) {
|
1013
|
+
grpc_fd *new_fd = NULL;
|
1014
|
+
|
1015
|
+
gpr_mu_lock(&fd_freelist_mu);
|
1016
|
+
if (fd_freelist != NULL) {
|
1017
|
+
new_fd = fd_freelist;
|
1018
|
+
fd_freelist = fd_freelist->freelist_next;
|
1019
|
+
}
|
1020
|
+
gpr_mu_unlock(&fd_freelist_mu);
|
1021
|
+
|
1022
|
+
if (new_fd == NULL) {
|
1023
|
+
new_fd = gpr_malloc(sizeof(grpc_fd));
|
1024
|
+
gpr_mu_init(&new_fd->po.mu);
|
1025
|
+
}
|
1026
|
+
|
1027
|
+
/* Note: It is not really needed to get the new_fd->po.mu lock here. If this
|
1028
|
+
* is a newly created fd (or an fd we got from the freelist), no one else
|
1029
|
+
* would be holding a lock to it anyway. */
|
1030
|
+
gpr_mu_lock(&new_fd->po.mu);
|
1031
|
+
new_fd->po.pi = NULL;
|
1032
|
+
#ifdef PO_DEBUG
|
1033
|
+
new_fd->po.obj_type = POLL_OBJ_FD;
|
1034
|
+
#endif
|
1035
|
+
|
1036
|
+
gpr_atm_rel_store(&new_fd->refst, (gpr_atm)1);
|
1037
|
+
new_fd->fd = fd;
|
1038
|
+
new_fd->orphaned = false;
|
1039
|
+
grpc_lfev_init(&new_fd->read_closure);
|
1040
|
+
grpc_lfev_init(&new_fd->write_closure);
|
1041
|
+
gpr_atm_no_barrier_store(&new_fd->read_notifier_pollset, (gpr_atm)NULL);
|
1042
|
+
|
1043
|
+
new_fd->freelist_next = NULL;
|
1044
|
+
new_fd->on_done_closure = NULL;
|
1045
|
+
|
1046
|
+
gpr_mu_unlock(&new_fd->po.mu);
|
1047
|
+
|
1048
|
+
char *fd_name;
|
1049
|
+
gpr_asprintf(&fd_name, "%s fd=%d", name, fd);
|
1050
|
+
grpc_iomgr_register_object(&new_fd->iomgr_object, fd_name);
|
1051
|
+
#ifdef GRPC_FD_REF_COUNT_DEBUG
|
1052
|
+
gpr_log(GPR_DEBUG, "FD %d %p create %s", fd, (void *)new_fd, fd_name);
|
1053
|
+
#endif
|
1054
|
+
gpr_free(fd_name);
|
1055
|
+
return new_fd;
|
1056
|
+
}
|
1057
|
+
|
1058
|
+
static int fd_wrapped_fd(grpc_fd *fd) {
|
1059
|
+
int ret_fd = -1;
|
1060
|
+
gpr_mu_lock(&fd->po.mu);
|
1061
|
+
if (!fd->orphaned) {
|
1062
|
+
ret_fd = fd->fd;
|
1063
|
+
}
|
1064
|
+
gpr_mu_unlock(&fd->po.mu);
|
1065
|
+
|
1066
|
+
return ret_fd;
|
1067
|
+
}
|
1068
|
+
|
1069
|
+
static void fd_orphan(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
|
1070
|
+
grpc_closure *on_done, int *release_fd,
|
1071
|
+
const char *reason) {
|
1072
|
+
bool is_fd_closed = false;
|
1073
|
+
grpc_error *error = GRPC_ERROR_NONE;
|
1074
|
+
polling_island *unref_pi = NULL;
|
1075
|
+
|
1076
|
+
gpr_mu_lock(&fd->po.mu);
|
1077
|
+
fd->on_done_closure = on_done;
|
1078
|
+
|
1079
|
+
/* If release_fd is not NULL, we should be relinquishing control of the file
|
1080
|
+
descriptor fd->fd (but we still own the grpc_fd structure). */
|
1081
|
+
if (release_fd != NULL) {
|
1082
|
+
*release_fd = fd->fd;
|
1083
|
+
} else {
|
1084
|
+
close(fd->fd);
|
1085
|
+
is_fd_closed = true;
|
1086
|
+
}
|
1087
|
+
|
1088
|
+
fd->orphaned = true;
|
1089
|
+
|
1090
|
+
/* Remove the active status but keep referenced. We want this grpc_fd struct
|
1091
|
+
to be alive (and not added to freelist) until the end of this function */
|
1092
|
+
REF_BY(fd, 1, reason);
|
1093
|
+
|
1094
|
+
/* Remove the fd from the polling island:
|
1095
|
+
- Get a lock on the latest polling island (i.e the last island in the
|
1096
|
+
linked list pointed by fd->po.pi). This is the island that
|
1097
|
+
would actually contain the fd
|
1098
|
+
- Remove the fd from the latest polling island
|
1099
|
+
- Unlock the latest polling island
|
1100
|
+
- Set fd->po.pi to NULL (but remove the ref on the polling island
|
1101
|
+
before doing this.) */
|
1102
|
+
if (fd->po.pi != NULL) {
|
1103
|
+
polling_island *pi_latest = polling_island_lock(fd->po.pi);
|
1104
|
+
polling_island_remove_fd_locked(pi_latest, fd, is_fd_closed, &error);
|
1105
|
+
gpr_mu_unlock(&pi_latest->mu);
|
1106
|
+
|
1107
|
+
unref_pi = fd->po.pi;
|
1108
|
+
fd->po.pi = NULL;
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
grpc_closure_sched(exec_ctx, fd->on_done_closure, GRPC_ERROR_REF(error));
|
1112
|
+
|
1113
|
+
gpr_mu_unlock(&fd->po.mu);
|
1114
|
+
UNREF_BY(fd, 2, reason); /* Drop the reference */
|
1115
|
+
if (unref_pi != NULL) {
|
1116
|
+
/* Unref stale polling island here, outside the fd lock above.
|
1117
|
+
The polling island owns a workqueue which owns an fd, and unreffing
|
1118
|
+
inside the lock can cause an eventual lock loop that makes TSAN very
|
1119
|
+
unhappy. */
|
1120
|
+
PI_UNREF(exec_ctx, unref_pi, "fd_orphan");
|
1121
|
+
}
|
1122
|
+
GRPC_LOG_IF_ERROR("fd_orphan", GRPC_ERROR_REF(error));
|
1123
|
+
GRPC_ERROR_UNREF(error);
|
1124
|
+
}
|
1125
|
+
|
1126
|
+
static grpc_pollset *fd_get_read_notifier_pollset(grpc_exec_ctx *exec_ctx,
|
1127
|
+
grpc_fd *fd) {
|
1128
|
+
gpr_atm notifier = gpr_atm_acq_load(&fd->read_notifier_pollset);
|
1129
|
+
return (grpc_pollset *)notifier;
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
static bool fd_is_shutdown(grpc_fd *fd) {
|
1133
|
+
return grpc_lfev_is_shutdown(&fd->read_closure);
|
1134
|
+
}
|
1135
|
+
|
1136
|
+
/* Might be called multiple times */
|
1137
|
+
static void fd_shutdown(grpc_exec_ctx *exec_ctx, grpc_fd *fd, grpc_error *why) {
|
1138
|
+
if (grpc_lfev_set_shutdown(exec_ctx, &fd->read_closure,
|
1139
|
+
GRPC_ERROR_REF(why))) {
|
1140
|
+
shutdown(fd->fd, SHUT_RDWR);
|
1141
|
+
grpc_lfev_set_shutdown(exec_ctx, &fd->write_closure, GRPC_ERROR_REF(why));
|
1142
|
+
}
|
1143
|
+
GRPC_ERROR_UNREF(why);
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
static void fd_notify_on_read(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
|
1147
|
+
grpc_closure *closure) {
|
1148
|
+
grpc_lfev_notify_on(exec_ctx, &fd->read_closure, closure);
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
static void fd_notify_on_write(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
|
1152
|
+
grpc_closure *closure) {
|
1153
|
+
grpc_lfev_notify_on(exec_ctx, &fd->write_closure, closure);
|
1154
|
+
}
|
1155
|
+
|
1156
|
+
static grpc_workqueue *fd_get_workqueue(grpc_fd *fd) {
|
1157
|
+
gpr_mu_lock(&fd->po.mu);
|
1158
|
+
grpc_workqueue *workqueue =
|
1159
|
+
GRPC_WORKQUEUE_REF((grpc_workqueue *)fd->po.pi, "fd_get_workqueue");
|
1160
|
+
gpr_mu_unlock(&fd->po.mu);
|
1161
|
+
return workqueue;
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
/*******************************************************************************
|
1165
|
+
* Pollset Definitions
|
1166
|
+
*/
|
1167
|
+
GPR_TLS_DECL(g_current_thread_pollset);
|
1168
|
+
GPR_TLS_DECL(g_current_thread_worker);
|
1169
|
+
static __thread bool g_initialized_sigmask;
|
1170
|
+
static __thread sigset_t g_orig_sigmask;
|
1171
|
+
static __thread sigset_t g_wakeup_sig_set;
|
1172
|
+
|
1173
|
+
static void sig_handler(int sig_num) {
|
1174
|
+
#ifdef GRPC_EPOLL_DEBUG
|
1175
|
+
gpr_log(GPR_INFO, "Received signal %d", sig_num);
|
1176
|
+
#endif
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
static void pollset_worker_init(grpc_pollset_worker *worker) {
|
1180
|
+
worker->pt_id = pthread_self();
|
1181
|
+
worker->next = worker->prev = NULL;
|
1182
|
+
gpr_atm_no_barrier_store(&worker->is_kicked, (gpr_atm)0);
|
1183
|
+
gpr_atm_no_barrier_store(&worker->is_polling_turn, (gpr_atm)0);
|
1184
|
+
worker_node_init(&worker->pi_list_link);
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
static void poller_kick_init() { signal(grpc_wakeup_signal, sig_handler); }
|
1188
|
+
|
1189
|
+
/* Global state management */
|
1190
|
+
static grpc_error *pollset_global_init(void) {
|
1191
|
+
gpr_tls_init(&g_current_thread_pollset);
|
1192
|
+
gpr_tls_init(&g_current_thread_worker);
|
1193
|
+
poller_kick_init();
|
1194
|
+
return GRPC_ERROR_NONE;
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
static void pollset_global_shutdown(void) {
|
1198
|
+
gpr_tls_destroy(&g_current_thread_pollset);
|
1199
|
+
gpr_tls_destroy(&g_current_thread_worker);
|
1200
|
+
}
|
1201
|
+
|
1202
|
+
static grpc_error *worker_kick(grpc_pollset_worker *worker,
|
1203
|
+
gpr_atm *is_kicked) {
|
1204
|
+
grpc_error *err = GRPC_ERROR_NONE;
|
1205
|
+
|
1206
|
+
/* Kick the worker only if it was not already kicked */
|
1207
|
+
if (gpr_atm_no_barrier_cas(is_kicked, (gpr_atm)0, (gpr_atm)1)) {
|
1208
|
+
GRPC_POLLING_TRACE(
|
1209
|
+
"pollset_worker_kick: Kicking worker: %p (thread id: %ld)",
|
1210
|
+
(void *)worker, (long int)worker->pt_id);
|
1211
|
+
int err_num = pthread_kill(worker->pt_id, grpc_wakeup_signal);
|
1212
|
+
if (err_num != 0) {
|
1213
|
+
err = GRPC_OS_ERROR(err_num, "pthread_kill");
|
1214
|
+
}
|
1215
|
+
}
|
1216
|
+
return err;
|
1217
|
+
}
|
1218
|
+
|
1219
|
+
static grpc_error *pollset_worker_kick(grpc_pollset_worker *worker) {
|
1220
|
+
return worker_kick(worker, &worker->is_kicked);
|
1221
|
+
}
|
1222
|
+
|
1223
|
+
static grpc_error *poller_kick(grpc_pollset_worker *worker) {
|
1224
|
+
return worker_kick(worker, &worker->is_polling_turn);
|
1225
|
+
}
|
1226
|
+
|
1227
|
+
/* Return 1 if the pollset has active threads in pollset_work (pollset must
|
1228
|
+
* be locked) */
|
1229
|
+
static int pollset_has_workers(grpc_pollset *p) {
|
1230
|
+
return p->root_worker.next != &p->root_worker;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
static void remove_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
|
1234
|
+
worker->prev->next = worker->next;
|
1235
|
+
worker->next->prev = worker->prev;
|
1236
|
+
}
|
1237
|
+
|
1238
|
+
static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) {
|
1239
|
+
if (pollset_has_workers(p)) {
|
1240
|
+
grpc_pollset_worker *w = p->root_worker.next;
|
1241
|
+
remove_worker(p, w);
|
1242
|
+
return w;
|
1243
|
+
} else {
|
1244
|
+
return NULL;
|
1245
|
+
}
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
static void push_back_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
|
1249
|
+
worker->next = &p->root_worker;
|
1250
|
+
worker->prev = worker->next->prev;
|
1251
|
+
worker->prev->next = worker->next->prev = worker;
|
1252
|
+
}
|
1253
|
+
|
1254
|
+
static void push_front_worker(grpc_pollset *p, grpc_pollset_worker *worker) {
|
1255
|
+
worker->prev = &p->root_worker;
|
1256
|
+
worker->next = worker->prev->next;
|
1257
|
+
worker->prev->next = worker->next->prev = worker;
|
1258
|
+
}
|
1259
|
+
|
1260
|
+
/* p->mu must be held before calling this function */
|
1261
|
+
static grpc_error *pollset_kick(grpc_pollset *p,
|
1262
|
+
grpc_pollset_worker *specific_worker) {
|
1263
|
+
GPR_TIMER_BEGIN("pollset_kick", 0);
|
1264
|
+
grpc_error *error = GRPC_ERROR_NONE;
|
1265
|
+
const char *err_desc = "Kick Failure";
|
1266
|
+
grpc_pollset_worker *worker = specific_worker;
|
1267
|
+
if (worker != NULL) {
|
1268
|
+
if (worker == GRPC_POLLSET_KICK_BROADCAST) {
|
1269
|
+
if (pollset_has_workers(p)) {
|
1270
|
+
GPR_TIMER_BEGIN("pollset_kick.broadcast", 0);
|
1271
|
+
for (worker = p->root_worker.next; worker != &p->root_worker;
|
1272
|
+
worker = worker->next) {
|
1273
|
+
if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
|
1274
|
+
append_error(&error, pollset_worker_kick(worker), err_desc);
|
1275
|
+
}
|
1276
|
+
}
|
1277
|
+
GPR_TIMER_END("pollset_kick.broadcast", 0);
|
1278
|
+
} else {
|
1279
|
+
p->kicked_without_pollers = true;
|
1280
|
+
}
|
1281
|
+
} else {
|
1282
|
+
GPR_TIMER_MARK("kicked_specifically", 0);
|
1283
|
+
if (gpr_tls_get(&g_current_thread_worker) != (intptr_t)worker) {
|
1284
|
+
append_error(&error, pollset_worker_kick(worker), err_desc);
|
1285
|
+
}
|
1286
|
+
}
|
1287
|
+
} else if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)p) {
|
1288
|
+
/* Since worker == NULL, it means that we can kick "any" worker on this
|
1289
|
+
pollset 'p'. If 'p' happens to be the same pollset this thread is
|
1290
|
+
currently polling (i.e in pollset_work() function), then there is no need
|
1291
|
+
to kick any other worker since the current thread can just absorb the
|
1292
|
+
kick. This is the reason why we enter this case only when
|
1293
|
+
g_current_thread_pollset is != p */
|
1294
|
+
|
1295
|
+
GPR_TIMER_MARK("kick_anonymous", 0);
|
1296
|
+
worker = pop_front_worker(p);
|
1297
|
+
if (worker != NULL) {
|
1298
|
+
GPR_TIMER_MARK("finally_kick", 0);
|
1299
|
+
push_back_worker(p, worker);
|
1300
|
+
append_error(&error, pollset_worker_kick(worker), err_desc);
|
1301
|
+
} else {
|
1302
|
+
GPR_TIMER_MARK("kicked_no_pollers", 0);
|
1303
|
+
p->kicked_without_pollers = true;
|
1304
|
+
}
|
1305
|
+
}
|
1306
|
+
|
1307
|
+
GPR_TIMER_END("pollset_kick", 0);
|
1308
|
+
GRPC_LOG_IF_ERROR("pollset_kick", GRPC_ERROR_REF(error));
|
1309
|
+
return error;
|
1310
|
+
}
|
1311
|
+
|
1312
|
+
static void pollset_init(grpc_pollset *pollset, gpr_mu **mu) {
|
1313
|
+
gpr_mu_init(&pollset->po.mu);
|
1314
|
+
*mu = &pollset->po.mu;
|
1315
|
+
pollset->po.pi = NULL;
|
1316
|
+
#ifdef PO_DEBUG
|
1317
|
+
pollset->po.obj_type = POLL_OBJ_POLLSET;
|
1318
|
+
#endif
|
1319
|
+
|
1320
|
+
pollset->root_worker.next = pollset->root_worker.prev = &pollset->root_worker;
|
1321
|
+
pollset->kicked_without_pollers = false;
|
1322
|
+
|
1323
|
+
pollset->shutting_down = false;
|
1324
|
+
pollset->finish_shutdown_called = false;
|
1325
|
+
pollset->shutdown_done = NULL;
|
1326
|
+
}
|
1327
|
+
|
1328
|
+
/* Convert millis to timespec (clock-type is assumed to be GPR_TIMESPAN) */
|
1329
|
+
static struct timespec millis_to_timespec(int millis) {
|
1330
|
+
struct timespec linux_ts;
|
1331
|
+
gpr_timespec gpr_ts;
|
1332
|
+
|
1333
|
+
if (millis == -1) {
|
1334
|
+
gpr_ts = gpr_inf_future(GPR_TIMESPAN);
|
1335
|
+
} else {
|
1336
|
+
gpr_ts = gpr_time_from_millis(millis, GPR_TIMESPAN);
|
1337
|
+
}
|
1338
|
+
|
1339
|
+
linux_ts.tv_sec = (time_t)gpr_ts.tv_sec;
|
1340
|
+
linux_ts.tv_nsec = gpr_ts.tv_nsec;
|
1341
|
+
return linux_ts;
|
1342
|
+
}
|
1343
|
+
|
1344
|
+
/* Convert a timespec to milliseconds:
|
1345
|
+
- Very small or negative poll times are clamped to zero to do a non-blocking
|
1346
|
+
poll (which becomes spin polling)
|
1347
|
+
- Other small values are rounded up to one millisecond
|
1348
|
+
- Longer than a millisecond polls are rounded up to the next nearest
|
1349
|
+
millisecond to avoid spinning
|
1350
|
+
- Infinite timeouts are converted to -1 */
|
1351
|
+
static int poll_deadline_to_millis_timeout(gpr_timespec deadline,
|
1352
|
+
gpr_timespec now) {
|
1353
|
+
gpr_timespec timeout;
|
1354
|
+
static const int64_t max_spin_polling_us = 10;
|
1355
|
+
if (gpr_time_cmp(deadline, gpr_inf_future(deadline.clock_type)) == 0) {
|
1356
|
+
return -1;
|
1357
|
+
}
|
1358
|
+
|
1359
|
+
if (gpr_time_cmp(deadline, gpr_time_add(now, gpr_time_from_micros(
|
1360
|
+
max_spin_polling_us,
|
1361
|
+
GPR_TIMESPAN))) <= 0) {
|
1362
|
+
return 0;
|
1363
|
+
}
|
1364
|
+
timeout = gpr_time_sub(deadline, now);
|
1365
|
+
int millis = gpr_time_to_millis(gpr_time_add(
|
1366
|
+
timeout, gpr_time_from_nanos(GPR_NS_PER_MS - 1, GPR_TIMESPAN)));
|
1367
|
+
return millis >= 1 ? millis : 1;
|
1368
|
+
}
|
1369
|
+
|
1370
|
+
static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
|
1371
|
+
grpc_pollset *notifier) {
|
1372
|
+
grpc_lfev_set_ready(exec_ctx, &fd->read_closure);
|
1373
|
+
|
1374
|
+
/* Note, it is possible that fd_become_readable might be called twice with
|
1375
|
+
different 'notifier's when an fd becomes readable and it is in two epoll
|
1376
|
+
sets (This can happen briefly during polling island merges). In such cases
|
1377
|
+
it does not really matter which notifer is set as the read_notifier_pollset
|
1378
|
+
(They would both point to the same polling island anyway) */
|
1379
|
+
/* Use release store to match with acquire load in fd_get_read_notifier */
|
1380
|
+
gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
|
1381
|
+
}
|
1382
|
+
|
1383
|
+
static void fd_become_writable(grpc_exec_ctx *exec_ctx, grpc_fd *fd) {
|
1384
|
+
grpc_lfev_set_ready(exec_ctx, &fd->write_closure);
|
1385
|
+
}
|
1386
|
+
|
1387
|
+
static void pollset_release_polling_island(grpc_exec_ctx *exec_ctx,
|
1388
|
+
grpc_pollset *ps, char *reason) {
|
1389
|
+
if (ps->po.pi != NULL) {
|
1390
|
+
PI_UNREF(exec_ctx, ps->po.pi, reason);
|
1391
|
+
}
|
1392
|
+
ps->po.pi = NULL;
|
1393
|
+
}
|
1394
|
+
|
1395
|
+
static void finish_shutdown_locked(grpc_exec_ctx *exec_ctx,
|
1396
|
+
grpc_pollset *pollset) {
|
1397
|
+
/* The pollset cannot have any workers if we are at this stage */
|
1398
|
+
GPR_ASSERT(!pollset_has_workers(pollset));
|
1399
|
+
|
1400
|
+
pollset->finish_shutdown_called = true;
|
1401
|
+
|
1402
|
+
/* Release the ref and set pollset->po.pi to NULL */
|
1403
|
+
pollset_release_polling_island(exec_ctx, pollset, "ps_shutdown");
|
1404
|
+
grpc_closure_sched(exec_ctx, pollset->shutdown_done, GRPC_ERROR_NONE);
|
1405
|
+
}
|
1406
|
+
|
1407
|
+
/* pollset->po.mu lock must be held by the caller before calling this */
|
1408
|
+
static void pollset_shutdown(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
|
1409
|
+
grpc_closure *closure) {
|
1410
|
+
GPR_TIMER_BEGIN("pollset_shutdown", 0);
|
1411
|
+
GPR_ASSERT(!pollset->shutting_down);
|
1412
|
+
pollset->shutting_down = true;
|
1413
|
+
pollset->shutdown_done = closure;
|
1414
|
+
pollset_kick(pollset, GRPC_POLLSET_KICK_BROADCAST);
|
1415
|
+
|
1416
|
+
/* If the pollset has any workers, we cannot call finish_shutdown_locked()
|
1417
|
+
because it would release the underlying polling island. In such a case, we
|
1418
|
+
let the last worker call finish_shutdown_locked() from pollset_work() */
|
1419
|
+
if (!pollset_has_workers(pollset)) {
|
1420
|
+
GPR_ASSERT(!pollset->finish_shutdown_called);
|
1421
|
+
GPR_TIMER_MARK("pollset_shutdown.finish_shutdown_locked", 0);
|
1422
|
+
finish_shutdown_locked(exec_ctx, pollset);
|
1423
|
+
}
|
1424
|
+
GPR_TIMER_END("pollset_shutdown", 0);
|
1425
|
+
}
|
1426
|
+
|
1427
|
+
/* pollset_shutdown is guaranteed to be called before pollset_destroy. So other
|
1428
|
+
* than destroying the mutexes, there is nothing special that needs to be done
|
1429
|
+
* here */
|
1430
|
+
static void pollset_destroy(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset) {
|
1431
|
+
GPR_ASSERT(!pollset_has_workers(pollset));
|
1432
|
+
gpr_mu_destroy(&pollset->po.mu);
|
1433
|
+
}
|
1434
|
+
|
1435
|
+
static bool maybe_do_workqueue_work(grpc_exec_ctx *exec_ctx,
|
1436
|
+
polling_island *pi) {
|
1437
|
+
if (gpr_mu_trylock(&pi->workqueue_read_mu)) {
|
1438
|
+
gpr_mpscq_node *n = gpr_mpscq_pop(&pi->workqueue_items);
|
1439
|
+
gpr_mu_unlock(&pi->workqueue_read_mu);
|
1440
|
+
if (n != NULL) {
|
1441
|
+
if (gpr_atm_full_fetch_add(&pi->workqueue_item_count, -1) > 1) {
|
1442
|
+
workqueue_maybe_wakeup(pi);
|
1443
|
+
}
|
1444
|
+
grpc_closure *c = (grpc_closure *)n;
|
1445
|
+
grpc_error *error = c->error_data.error;
|
1446
|
+
#ifndef NDEBUG
|
1447
|
+
c->scheduled = false;
|
1448
|
+
#endif
|
1449
|
+
c->cb(exec_ctx, c->cb_arg, error);
|
1450
|
+
GRPC_ERROR_UNREF(error);
|
1451
|
+
return true;
|
1452
|
+
} else if (gpr_atm_no_barrier_load(&pi->workqueue_item_count) > 0) {
|
1453
|
+
/* n == NULL might mean there's work but it's not available to be popped
|
1454
|
+
* yet - try to ensure another workqueue wakes up to check shortly if so
|
1455
|
+
*/
|
1456
|
+
workqueue_maybe_wakeup(pi);
|
1457
|
+
}
|
1458
|
+
}
|
1459
|
+
return false;
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
/* NOTE: This function may modify 'now' */
|
1463
|
+
static bool acquire_polling_lease(grpc_pollset_worker *worker,
|
1464
|
+
polling_island *pi, gpr_timespec deadline,
|
1465
|
+
gpr_timespec *now) {
|
1466
|
+
bool is_lease_acquired = false;
|
1467
|
+
|
1468
|
+
gpr_mu_lock(&pi->worker_list_mu); // LOCK
|
1469
|
+
long num_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
|
1470
|
+
|
1471
|
+
if (num_pollers >= g_max_pollers_per_pi) {
|
1472
|
+
push_back_worker_node(&pi->worker_list_head, &worker->pi_list_link);
|
1473
|
+
gpr_mu_unlock(&pi->worker_list_mu); // UNLOCK
|
1474
|
+
|
1475
|
+
bool is_timeout = false;
|
1476
|
+
int ret;
|
1477
|
+
int timeout_ms = poll_deadline_to_millis_timeout(deadline, *now);
|
1478
|
+
if (timeout_ms == -1) {
|
1479
|
+
ret = sigwaitinfo(&g_wakeup_sig_set, NULL);
|
1480
|
+
} else {
|
1481
|
+
struct timespec sigwait_timeout = millis_to_timespec(timeout_ms);
|
1482
|
+
GRPC_SCHEDULING_START_BLOCKING_REGION;
|
1483
|
+
ret = sigtimedwait(&g_wakeup_sig_set, NULL, &sigwait_timeout);
|
1484
|
+
GRPC_SCHEDULING_END_BLOCKING_REGION;
|
1485
|
+
}
|
1486
|
+
|
1487
|
+
if (ret == -1) {
|
1488
|
+
if (errno == EAGAIN) {
|
1489
|
+
is_timeout = true;
|
1490
|
+
} else {
|
1491
|
+
/* NOTE: This should not happen. If we see these log messages, it means
|
1492
|
+
we are most likely doing something incorrect in the setup * needed
|
1493
|
+
for sigwaitinfo/sigtimedwait */
|
1494
|
+
gpr_log(GPR_ERROR,
|
1495
|
+
"sigtimedwait failed with retcode: %d (timeout_ms: %d)", errno,
|
1496
|
+
timeout_ms);
|
1497
|
+
}
|
1498
|
+
}
|
1499
|
+
|
1500
|
+
/* Did the worker come out of sigtimedwait due to a thread that just
|
1501
|
+
exited epoll and kicking it (in release_polling_lease function). */
|
1502
|
+
bool is_polling_turn = gpr_atm_acq_load(&worker->is_polling_turn);
|
1503
|
+
|
1504
|
+
/* Did the worker come out of sigtimedwait due to a thread alerting it that
|
1505
|
+
some completion event was (likely) available in the completion queue */
|
1506
|
+
bool is_kicked = gpr_atm_no_barrier_load(&worker->is_kicked);
|
1507
|
+
|
1508
|
+
if (is_kicked || is_timeout) {
|
1509
|
+
*now = deadline; /* Essentially make the epoll timeout = 0 */
|
1510
|
+
} else if (is_polling_turn) {
|
1511
|
+
*now = gpr_now(GPR_CLOCK_MONOTONIC); /* Reduce the epoll timeout */
|
1512
|
+
}
|
1513
|
+
|
1514
|
+
gpr_mu_lock(&pi->worker_list_mu); // LOCK
|
1515
|
+
/* The node might have already been removed from the list by the poller
|
1516
|
+
that kicked this. However it is safe to call 'remove_worker_node' on
|
1517
|
+
an already detached node */
|
1518
|
+
remove_worker_node(&worker->pi_list_link);
|
1519
|
+
/* It is important to read the num_pollers again under the lock so that we
|
1520
|
+
* have the latest num_pollers value that doesn't change while we are doing
|
1521
|
+
* the "(num_pollers < g_max_pollers_per_pi)" a a few lines below */
|
1522
|
+
num_pollers = gpr_atm_no_barrier_load(&pi->poller_count);
|
1523
|
+
}
|
1524
|
+
|
1525
|
+
if (num_pollers < g_max_pollers_per_pi) {
|
1526
|
+
gpr_atm_no_barrier_fetch_add(&pi->poller_count, 1);
|
1527
|
+
is_lease_acquired = true;
|
1528
|
+
}
|
1529
|
+
|
1530
|
+
gpr_mu_unlock(&pi->worker_list_mu); // UNLOCK
|
1531
|
+
return is_lease_acquired;
|
1532
|
+
}
|
1533
|
+
|
1534
|
+
static void release_polling_lease(polling_island *pi, grpc_error **error) {
|
1535
|
+
gpr_mu_lock(&pi->worker_list_mu);
|
1536
|
+
|
1537
|
+
gpr_atm_no_barrier_fetch_add(&pi->poller_count, -1);
|
1538
|
+
worker_node *node = pop_front_worker_node(&pi->worker_list_head);
|
1539
|
+
if (node != NULL) {
|
1540
|
+
grpc_pollset_worker *next_worker = WORKER_FROM_WORKER_LIST_NODE(node);
|
1541
|
+
append_error(error, poller_kick(next_worker), "poller kick error");
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
gpr_mu_unlock(&pi->worker_list_mu);
|
1545
|
+
}
|
1546
|
+
|
1547
|
+
#define GRPC_EPOLL_MAX_EVENTS 100
|
1548
|
+
static void pollset_do_epoll_pwait(grpc_exec_ctx *exec_ctx, int epoll_fd,
|
1549
|
+
grpc_pollset *pollset, polling_island *pi,
|
1550
|
+
grpc_pollset_worker *worker,
|
1551
|
+
gpr_timespec now, gpr_timespec deadline,
|
1552
|
+
sigset_t *sig_mask, grpc_error **error) {
|
1553
|
+
/* Only g_max_pollers_per_pi threads can be doing polling in parallel.
|
1554
|
+
If we cannot get a lease, we cannot continue to do epoll_pwait() */
|
1555
|
+
if (!acquire_polling_lease(worker, pi, deadline, &now)) {
|
1556
|
+
return;
|
1557
|
+
}
|
1558
|
+
|
1559
|
+
struct epoll_event ep_ev[GRPC_EPOLL_MAX_EVENTS];
|
1560
|
+
int ep_rv;
|
1561
|
+
char *err_msg;
|
1562
|
+
const char *err_desc = "pollset_work_and_unlock";
|
1563
|
+
|
1564
|
+
/* timeout_ms is the time between 'now' and 'deadline' */
|
1565
|
+
int timeout_ms = poll_deadline_to_millis_timeout(deadline, now);
|
1566
|
+
|
1567
|
+
GRPC_SCHEDULING_START_BLOCKING_REGION;
|
1568
|
+
ep_rv =
|
1569
|
+
epoll_pwait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, timeout_ms, sig_mask);
|
1570
|
+
GRPC_SCHEDULING_END_BLOCKING_REGION;
|
1571
|
+
|
1572
|
+
/* Give back the lease right away so that some other thread can enter */
|
1573
|
+
release_polling_lease(pi, error);
|
1574
|
+
|
1575
|
+
if (ep_rv < 0) {
|
1576
|
+
if (errno != EINTR) {
|
1577
|
+
gpr_asprintf(&err_msg,
|
1578
|
+
"epoll_wait() epoll fd: %d failed with error: %d (%s)",
|
1579
|
+
epoll_fd, errno, strerror(errno));
|
1580
|
+
append_error(error, GRPC_OS_ERROR(errno, err_msg), err_desc);
|
1581
|
+
} else {
|
1582
|
+
/* We were interrupted. Save an interation by doing a zero timeout
|
1583
|
+
epoll_wait to see if there are any other events of interest */
|
1584
|
+
GRPC_POLLING_TRACE("pollset_work: pollset: %p, worker: %p received kick",
|
1585
|
+
(void *)pollset, (void *)worker);
|
1586
|
+
ep_rv = epoll_wait(epoll_fd, ep_ev, GRPC_EPOLL_MAX_EVENTS, 0);
|
1587
|
+
}
|
1588
|
+
}
|
1589
|
+
|
1590
|
+
#ifdef GRPC_TSAN
|
1591
|
+
/* See the definition of g_poll_sync for more details */
|
1592
|
+
gpr_atm_acq_load(&g_epoll_sync);
|
1593
|
+
#endif /* defined(GRPC_TSAN) */
|
1594
|
+
|
1595
|
+
for (int i = 0; i < ep_rv; ++i) {
|
1596
|
+
void *data_ptr = ep_ev[i].data.ptr;
|
1597
|
+
if (data_ptr == &pi->workqueue_wakeup_fd) {
|
1598
|
+
append_error(error,
|
1599
|
+
grpc_wakeup_fd_consume_wakeup(&pi->workqueue_wakeup_fd),
|
1600
|
+
err_desc);
|
1601
|
+
maybe_do_workqueue_work(exec_ctx, pi);
|
1602
|
+
} else if (data_ptr == &polling_island_wakeup_fd) {
|
1603
|
+
GRPC_POLLING_TRACE(
|
1604
|
+
"pollset_work: pollset: %p, worker: %p polling island (epoll_fd: "
|
1605
|
+
"%d) got merged",
|
1606
|
+
(void *)pollset, (void *)worker, epoll_fd);
|
1607
|
+
/* This means that our polling island is merged with a different
|
1608
|
+
island. We do not have to do anything here since the subsequent call
|
1609
|
+
to the function pollset_work_and_unlock() will pick up the correct
|
1610
|
+
epoll_fd */
|
1611
|
+
} else {
|
1612
|
+
grpc_fd *fd = data_ptr;
|
1613
|
+
int cancel = ep_ev[i].events & (EPOLLERR | EPOLLHUP);
|
1614
|
+
int read_ev = ep_ev[i].events & (EPOLLIN | EPOLLPRI);
|
1615
|
+
int write_ev = ep_ev[i].events & EPOLLOUT;
|
1616
|
+
if (read_ev || cancel) {
|
1617
|
+
fd_become_readable(exec_ctx, fd, pollset);
|
1618
|
+
}
|
1619
|
+
if (write_ev || cancel) {
|
1620
|
+
fd_become_writable(exec_ctx, fd);
|
1621
|
+
}
|
1622
|
+
}
|
1623
|
+
}
|
1624
|
+
}
|
1625
|
+
|
1626
|
+
/* Note: sig_mask contains the signal mask to use *during* epoll_wait() */
|
1627
|
+
static void pollset_work_and_unlock(grpc_exec_ctx *exec_ctx,
|
1628
|
+
grpc_pollset *pollset,
|
1629
|
+
grpc_pollset_worker *worker,
|
1630
|
+
gpr_timespec now, gpr_timespec deadline,
|
1631
|
+
sigset_t *sig_mask, grpc_error **error) {
|
1632
|
+
int epoll_fd = -1;
|
1633
|
+
polling_island *pi = NULL;
|
1634
|
+
GPR_TIMER_BEGIN("pollset_work_and_unlock", 0);
|
1635
|
+
|
1636
|
+
/* We need to get the epoll_fd to wait on. The epoll_fd is in inside the
|
1637
|
+
latest polling island pointed by pollset->po.pi
|
1638
|
+
|
1639
|
+
Since epoll_fd is immutable, it is safe to read it without a lock on the
|
1640
|
+
polling island. There is however a possibility that the polling island from
|
1641
|
+
which we got the epoll_fd, got merged with another island in the meantime.
|
1642
|
+
This is okay because in such a case, we will wakeup right-away from
|
1643
|
+
epoll_pwait() (because any merge will poison the old polling island's epoll
|
1644
|
+
set 'polling_island_wakeup_fd') and then pick up the latest polling_island
|
1645
|
+
the next time this function - pollset_work_and_unlock()) is called */
|
1646
|
+
|
1647
|
+
if (pollset->po.pi == NULL) {
|
1648
|
+
pollset->po.pi = polling_island_create(exec_ctx, NULL, error);
|
1649
|
+
if (pollset->po.pi == NULL) {
|
1650
|
+
GPR_TIMER_END("pollset_work_and_unlock", 0);
|
1651
|
+
return; /* Fatal error. Cannot continue */
|
1652
|
+
}
|
1653
|
+
|
1654
|
+
PI_ADD_REF(pollset->po.pi, "ps");
|
1655
|
+
GRPC_POLLING_TRACE("pollset_work: pollset: %p created new pi: %p",
|
1656
|
+
(void *)pollset, (void *)pollset->po.pi);
|
1657
|
+
}
|
1658
|
+
|
1659
|
+
pi = polling_island_maybe_get_latest(pollset->po.pi);
|
1660
|
+
epoll_fd = pi->epoll_fd;
|
1661
|
+
|
1662
|
+
/* Update the pollset->po.pi since the island being pointed by
|
1663
|
+
pollset->po.pi maybe older than the one pointed by pi) */
|
1664
|
+
if (pollset->po.pi != pi) {
|
1665
|
+
/* Always do PI_ADD_REF before PI_UNREF because PI_UNREF may cause the
|
1666
|
+
polling island to be deleted */
|
1667
|
+
PI_ADD_REF(pi, "ps");
|
1668
|
+
PI_UNREF(exec_ctx, pollset->po.pi, "ps");
|
1669
|
+
pollset->po.pi = pi;
|
1670
|
+
}
|
1671
|
+
|
1672
|
+
/* Add an extra ref so that the island does not get destroyed (which means
|
1673
|
+
the epoll_fd won't be closed) while we are are doing an epoll_wait() on the
|
1674
|
+
epoll_fd */
|
1675
|
+
PI_ADD_REF(pi, "ps_work");
|
1676
|
+
gpr_mu_unlock(&pollset->po.mu);
|
1677
|
+
|
1678
|
+
/* If we get some workqueue work to do, it might end up completing an item on
|
1679
|
+
the completion queue, so there's no need to poll... so we skip that and
|
1680
|
+
redo the complete loop to verify */
|
1681
|
+
if (!maybe_do_workqueue_work(exec_ctx, pi)) {
|
1682
|
+
g_current_thread_polling_island = pi;
|
1683
|
+
pollset_do_epoll_pwait(exec_ctx, epoll_fd, pollset, pi, worker, now,
|
1684
|
+
deadline, sig_mask, error);
|
1685
|
+
g_current_thread_polling_island = NULL;
|
1686
|
+
}
|
1687
|
+
|
1688
|
+
GPR_ASSERT(pi != NULL);
|
1689
|
+
|
1690
|
+
/* Before leaving, release the extra ref we added to the polling island. It
|
1691
|
+
is important to use "pi" here (i.e our old copy of pollset->po.pi
|
1692
|
+
that we got before releasing the polling island lock). This is because
|
1693
|
+
pollset->po.pi pointer might get udpated in other parts of the
|
1694
|
+
code when there is an island merge while we are doing epoll_wait() above */
|
1695
|
+
PI_UNREF(exec_ctx, pi, "ps_work");
|
1696
|
+
|
1697
|
+
GPR_TIMER_END("pollset_work_and_unlock", 0);
|
1698
|
+
}
|
1699
|
+
|
1700
|
+
/* pollset->po.mu lock must be held by the caller before calling this.
|
1701
|
+
The function pollset_work() may temporarily release the lock (pollset->po.mu)
|
1702
|
+
during the course of its execution but it will always re-acquire the lock and
|
1703
|
+
ensure that it is held by the time the function returns */
|
1704
|
+
static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
|
1705
|
+
grpc_pollset_worker **worker_hdl,
|
1706
|
+
gpr_timespec now, gpr_timespec deadline) {
|
1707
|
+
GPR_TIMER_BEGIN("pollset_work", 0);
|
1708
|
+
grpc_error *error = GRPC_ERROR_NONE;
|
1709
|
+
|
1710
|
+
grpc_pollset_worker worker;
|
1711
|
+
pollset_worker_init(&worker);
|
1712
|
+
|
1713
|
+
if (worker_hdl) *worker_hdl = &worker;
|
1714
|
+
|
1715
|
+
gpr_tls_set(&g_current_thread_pollset, (intptr_t)pollset);
|
1716
|
+
gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker);
|
1717
|
+
|
1718
|
+
if (pollset->kicked_without_pollers) {
|
1719
|
+
/* If the pollset was kicked without pollers, pretend that the current
|
1720
|
+
worker got the kick and skip polling. A kick indicates that there is some
|
1721
|
+
work that needs attention like an event on the completion queue or an
|
1722
|
+
alarm */
|
1723
|
+
GPR_TIMER_MARK("pollset_work.kicked_without_pollers", 0);
|
1724
|
+
pollset->kicked_without_pollers = 0;
|
1725
|
+
} else if (!pollset->shutting_down) {
|
1726
|
+
/* We use the posix-signal with number 'grpc_wakeup_signal' for waking up
|
1727
|
+
(i.e 'kicking') a worker in the pollset. A 'kick' is a way to inform the
|
1728
|
+
worker that there is some pending work that needs immediate attention
|
1729
|
+
(like an event on the completion queue, or a polling island merge that
|
1730
|
+
results in a new epoll-fd to wait on) and that the worker should not
|
1731
|
+
spend time waiting in epoll_pwait().
|
1732
|
+
|
1733
|
+
A worker can be kicked anytime from the point it is added to the pollset
|
1734
|
+
via push_front_worker() (or push_back_worker()) to the point it is
|
1735
|
+
removed via remove_worker().
|
1736
|
+
If the worker is kicked before/during it calls epoll_pwait(), it should
|
1737
|
+
immediately exit from epoll_wait(). If the worker is kicked after it
|
1738
|
+
returns from epoll_wait(), then nothing really needs to be done.
|
1739
|
+
|
1740
|
+
To accomplish this, we mask 'grpc_wakeup_signal' on this thread at all
|
1741
|
+
times *except* when it is in epoll_pwait(). This way, the worker never
|
1742
|
+
misses acting on a kick */
|
1743
|
+
|
1744
|
+
if (!g_initialized_sigmask) {
|
1745
|
+
sigemptyset(&g_wakeup_sig_set);
|
1746
|
+
sigaddset(&g_wakeup_sig_set, grpc_wakeup_signal);
|
1747
|
+
pthread_sigmask(SIG_BLOCK, &g_wakeup_sig_set, &g_orig_sigmask);
|
1748
|
+
sigdelset(&g_orig_sigmask, grpc_wakeup_signal);
|
1749
|
+
g_initialized_sigmask = true;
|
1750
|
+
/* new_mask: The new thread mask which blocks 'grpc_wakeup_signal'.
|
1751
|
+
This is the mask used at all times *except during
|
1752
|
+
epoll_wait()*"
|
1753
|
+
g_orig_sigmask: The thread mask which allows 'grpc_wakeup_signal' and
|
1754
|
+
this is the mask to use *during epoll_wait()*
|
1755
|
+
|
1756
|
+
The new_mask is set on the worker before it is added to the pollset
|
1757
|
+
(i.e before it can be kicked) */
|
1758
|
+
}
|
1759
|
+
|
1760
|
+
push_front_worker(pollset, &worker); /* Add worker to pollset */
|
1761
|
+
|
1762
|
+
pollset_work_and_unlock(exec_ctx, pollset, &worker, now, deadline,
|
1763
|
+
&g_orig_sigmask, &error);
|
1764
|
+
grpc_exec_ctx_flush(exec_ctx);
|
1765
|
+
|
1766
|
+
gpr_mu_lock(&pollset->po.mu);
|
1767
|
+
|
1768
|
+
/* Note: There is no need to reset worker.is_kicked to 0 since we are no
|
1769
|
+
longer going to use this worker */
|
1770
|
+
remove_worker(pollset, &worker);
|
1771
|
+
}
|
1772
|
+
|
1773
|
+
/* If we are the last worker on the pollset (i.e pollset_has_workers() is
|
1774
|
+
false at this point) and the pollset is shutting down, we may have to
|
1775
|
+
finish the shutdown process by calling finish_shutdown_locked().
|
1776
|
+
See pollset_shutdown() for more details.
|
1777
|
+
|
1778
|
+
Note: Continuing to access pollset here is safe; it is the caller's
|
1779
|
+
responsibility to not destroy a pollset when it has outstanding calls to
|
1780
|
+
pollset_work() */
|
1781
|
+
if (pollset->shutting_down && !pollset_has_workers(pollset) &&
|
1782
|
+
!pollset->finish_shutdown_called) {
|
1783
|
+
GPR_TIMER_MARK("pollset_work.finish_shutdown_locked", 0);
|
1784
|
+
finish_shutdown_locked(exec_ctx, pollset);
|
1785
|
+
|
1786
|
+
gpr_mu_unlock(&pollset->po.mu);
|
1787
|
+
grpc_exec_ctx_flush(exec_ctx);
|
1788
|
+
gpr_mu_lock(&pollset->po.mu);
|
1789
|
+
}
|
1790
|
+
|
1791
|
+
if (worker_hdl) *worker_hdl = NULL;
|
1792
|
+
|
1793
|
+
gpr_tls_set(&g_current_thread_pollset, (intptr_t)0);
|
1794
|
+
gpr_tls_set(&g_current_thread_worker, (intptr_t)0);
|
1795
|
+
|
1796
|
+
GPR_TIMER_END("pollset_work", 0);
|
1797
|
+
|
1798
|
+
GRPC_LOG_IF_ERROR("pollset_work", GRPC_ERROR_REF(error));
|
1799
|
+
return error;
|
1800
|
+
}
|
1801
|
+
|
1802
|
+
static void add_poll_object(grpc_exec_ctx *exec_ctx, poll_obj *bag,
|
1803
|
+
poll_obj_type bag_type, poll_obj *item,
|
1804
|
+
poll_obj_type item_type) {
|
1805
|
+
GPR_TIMER_BEGIN("add_poll_object", 0);
|
1806
|
+
|
1807
|
+
#ifdef PO_DEBUG
|
1808
|
+
GPR_ASSERT(item->obj_type == item_type);
|
1809
|
+
GPR_ASSERT(bag->obj_type == bag_type);
|
1810
|
+
#endif
|
1811
|
+
|
1812
|
+
grpc_error *error = GRPC_ERROR_NONE;
|
1813
|
+
polling_island *pi_new = NULL;
|
1814
|
+
|
1815
|
+
gpr_mu_lock(&bag->mu);
|
1816
|
+
gpr_mu_lock(&item->mu);
|
1817
|
+
|
1818
|
+
retry:
|
1819
|
+
/*
|
1820
|
+
* 1) If item->pi and bag->pi are both non-NULL and equal, do nothing
|
1821
|
+
* 2) If item->pi and bag->pi are both NULL, create a new polling island (with
|
1822
|
+
* a refcount of 2) and point item->pi and bag->pi to the new island
|
1823
|
+
* 3) If exactly one of item->pi or bag->pi is NULL, update it to point to
|
1824
|
+
* the other's non-NULL pi
|
1825
|
+
* 4) Finally if item->pi and bag-pi are non-NULL and not-equal, merge the
|
1826
|
+
* polling islands and update item->pi and bag->pi to point to the new
|
1827
|
+
* island
|
1828
|
+
*/
|
1829
|
+
|
1830
|
+
/* Early out if we are trying to add an 'fd' to a 'bag' but the fd is already
|
1831
|
+
* orphaned */
|
1832
|
+
if (item_type == POLL_OBJ_FD && (FD_FROM_PO(item))->orphaned) {
|
1833
|
+
gpr_mu_unlock(&item->mu);
|
1834
|
+
gpr_mu_unlock(&bag->mu);
|
1835
|
+
return;
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
if (item->pi == bag->pi) {
|
1839
|
+
pi_new = item->pi;
|
1840
|
+
if (pi_new == NULL) {
|
1841
|
+
/* GPR_ASSERT(item->pi == bag->pi == NULL) */
|
1842
|
+
|
1843
|
+
/* If we are adding an fd to a bag (i.e pollset or pollset_set), then
|
1844
|
+
* we need to do some extra work to make TSAN happy */
|
1845
|
+
if (item_type == POLL_OBJ_FD) {
|
1846
|
+
/* Unlock before creating a new polling island: the polling island will
|
1847
|
+
create a workqueue which creates a file descriptor, and holding an fd
|
1848
|
+
lock here can eventually cause a loop to appear to TSAN (making it
|
1849
|
+
unhappy). We don't think it's a real loop (there's an epoch point
|
1850
|
+
where that loop possibility disappears), but the advantages of
|
1851
|
+
keeping TSAN happy outweigh any performance advantage we might have
|
1852
|
+
by keeping the lock held. */
|
1853
|
+
gpr_mu_unlock(&item->mu);
|
1854
|
+
pi_new = polling_island_create(exec_ctx, FD_FROM_PO(item), &error);
|
1855
|
+
gpr_mu_lock(&item->mu);
|
1856
|
+
|
1857
|
+
/* Need to reverify any assumptions made between the initial lock and
|
1858
|
+
getting to this branch: if they've changed, we need to throw away our
|
1859
|
+
work and figure things out again. */
|
1860
|
+
if (item->pi != NULL) {
|
1861
|
+
GRPC_POLLING_TRACE(
|
1862
|
+
"add_poll_object: Raced creating new polling island. pi_new: %p "
|
1863
|
+
"(fd: %d, %s: %p)",
|
1864
|
+
(void *)pi_new, FD_FROM_PO(item)->fd, poll_obj_string(bag_type),
|
1865
|
+
(void *)bag);
|
1866
|
+
/* No need to lock 'pi_new' here since this is a new polling island
|
1867
|
+
and no one has a reference to it yet */
|
1868
|
+
polling_island_remove_all_fds_locked(pi_new, true, &error);
|
1869
|
+
|
1870
|
+
/* Ref and unref so that the polling island gets deleted during unref
|
1871
|
+
*/
|
1872
|
+
PI_ADD_REF(pi_new, "dance_of_destruction");
|
1873
|
+
PI_UNREF(exec_ctx, pi_new, "dance_of_destruction");
|
1874
|
+
goto retry;
|
1875
|
+
}
|
1876
|
+
} else {
|
1877
|
+
pi_new = polling_island_create(exec_ctx, NULL, &error);
|
1878
|
+
}
|
1879
|
+
|
1880
|
+
GRPC_POLLING_TRACE(
|
1881
|
+
"add_poll_object: Created new polling island. pi_new: %p (%s: %p, "
|
1882
|
+
"%s: %p)",
|
1883
|
+
(void *)pi_new, poll_obj_string(item_type), (void *)item,
|
1884
|
+
poll_obj_string(bag_type), (void *)bag);
|
1885
|
+
} else {
|
1886
|
+
GRPC_POLLING_TRACE(
|
1887
|
+
"add_poll_object: Same polling island. pi: %p (%s, %s)",
|
1888
|
+
(void *)pi_new, poll_obj_string(item_type),
|
1889
|
+
poll_obj_string(bag_type));
|
1890
|
+
}
|
1891
|
+
} else if (item->pi == NULL) {
|
1892
|
+
/* GPR_ASSERT(bag->pi != NULL) */
|
1893
|
+
/* Make pi_new point to latest pi*/
|
1894
|
+
pi_new = polling_island_lock(bag->pi);
|
1895
|
+
|
1896
|
+
if (item_type == POLL_OBJ_FD) {
|
1897
|
+
grpc_fd *fd = FD_FROM_PO(item);
|
1898
|
+
polling_island_add_fds_locked(pi_new, &fd, 1, true, &error);
|
1899
|
+
}
|
1900
|
+
|
1901
|
+
gpr_mu_unlock(&pi_new->mu);
|
1902
|
+
GRPC_POLLING_TRACE(
|
1903
|
+
"add_poll_obj: item->pi was NULL. pi_new: %p (item(%s): %p, "
|
1904
|
+
"bag(%s): %p)",
|
1905
|
+
(void *)pi_new, poll_obj_string(item_type), (void *)item,
|
1906
|
+
poll_obj_string(bag_type), (void *)bag);
|
1907
|
+
} else if (bag->pi == NULL) {
|
1908
|
+
/* GPR_ASSERT(item->pi != NULL) */
|
1909
|
+
/* Make pi_new to point to latest pi */
|
1910
|
+
pi_new = polling_island_lock(item->pi);
|
1911
|
+
gpr_mu_unlock(&pi_new->mu);
|
1912
|
+
GRPC_POLLING_TRACE(
|
1913
|
+
"add_poll_obj: bag->pi was NULL. pi_new: %p (item(%s): %p, "
|
1914
|
+
"bag(%s): %p)",
|
1915
|
+
(void *)pi_new, poll_obj_string(item_type), (void *)item,
|
1916
|
+
poll_obj_string(bag_type), (void *)bag);
|
1917
|
+
} else {
|
1918
|
+
pi_new = polling_island_merge(item->pi, bag->pi, &error);
|
1919
|
+
GRPC_POLLING_TRACE(
|
1920
|
+
"add_poll_obj: polling islands merged. pi_new: %p (item(%s): %p, "
|
1921
|
+
"bag(%s): %p)",
|
1922
|
+
(void *)pi_new, poll_obj_string(item_type), (void *)item,
|
1923
|
+
poll_obj_string(bag_type), (void *)bag);
|
1924
|
+
}
|
1925
|
+
|
1926
|
+
/* At this point, pi_new is the polling island that both item->pi and bag->pi
|
1927
|
+
MUST be pointing to */
|
1928
|
+
|
1929
|
+
if (item->pi != pi_new) {
|
1930
|
+
PI_ADD_REF(pi_new, poll_obj_string(item_type));
|
1931
|
+
if (item->pi != NULL) {
|
1932
|
+
PI_UNREF(exec_ctx, item->pi, poll_obj_string(item_type));
|
1933
|
+
}
|
1934
|
+
item->pi = pi_new;
|
1935
|
+
}
|
1936
|
+
|
1937
|
+
if (bag->pi != pi_new) {
|
1938
|
+
PI_ADD_REF(pi_new, poll_obj_string(bag_type));
|
1939
|
+
if (bag->pi != NULL) {
|
1940
|
+
PI_UNREF(exec_ctx, bag->pi, poll_obj_string(bag_type));
|
1941
|
+
}
|
1942
|
+
bag->pi = pi_new;
|
1943
|
+
}
|
1944
|
+
|
1945
|
+
gpr_mu_unlock(&item->mu);
|
1946
|
+
gpr_mu_unlock(&bag->mu);
|
1947
|
+
|
1948
|
+
GRPC_LOG_IF_ERROR("add_poll_object", error);
|
1949
|
+
GPR_TIMER_END("add_poll_object", 0);
|
1950
|
+
}
|
1951
|
+
|
1952
|
+
static void pollset_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset,
|
1953
|
+
grpc_fd *fd) {
|
1954
|
+
add_poll_object(exec_ctx, &pollset->po, POLL_OBJ_POLLSET, &fd->po,
|
1955
|
+
POLL_OBJ_FD);
|
1956
|
+
}
|
1957
|
+
|
1958
|
+
/*******************************************************************************
|
1959
|
+
* Pollset-set Definitions
|
1960
|
+
*/
|
1961
|
+
|
1962
|
+
static grpc_pollset_set *pollset_set_create(void) {
|
1963
|
+
grpc_pollset_set *pss = gpr_malloc(sizeof(*pss));
|
1964
|
+
gpr_mu_init(&pss->po.mu);
|
1965
|
+
pss->po.pi = NULL;
|
1966
|
+
#ifdef PO_DEBUG
|
1967
|
+
pss->po.obj_type = POLL_OBJ_POLLSET_SET;
|
1968
|
+
#endif
|
1969
|
+
return pss;
|
1970
|
+
}
|
1971
|
+
|
1972
|
+
static void pollset_set_destroy(grpc_exec_ctx *exec_ctx,
|
1973
|
+
grpc_pollset_set *pss) {
|
1974
|
+
gpr_mu_destroy(&pss->po.mu);
|
1975
|
+
|
1976
|
+
if (pss->po.pi != NULL) {
|
1977
|
+
PI_UNREF(exec_ctx, pss->po.pi, "pss_destroy");
|
1978
|
+
}
|
1979
|
+
|
1980
|
+
gpr_free(pss);
|
1981
|
+
}
|
1982
|
+
|
1983
|
+
static void pollset_set_add_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
|
1984
|
+
grpc_fd *fd) {
|
1985
|
+
add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &fd->po,
|
1986
|
+
POLL_OBJ_FD);
|
1987
|
+
}
|
1988
|
+
|
1989
|
+
static void pollset_set_del_fd(grpc_exec_ctx *exec_ctx, grpc_pollset_set *pss,
|
1990
|
+
grpc_fd *fd) {
|
1991
|
+
/* Nothing to do */
|
1992
|
+
}
|
1993
|
+
|
1994
|
+
static void pollset_set_add_pollset(grpc_exec_ctx *exec_ctx,
|
1995
|
+
grpc_pollset_set *pss, grpc_pollset *ps) {
|
1996
|
+
add_poll_object(exec_ctx, &pss->po, POLL_OBJ_POLLSET_SET, &ps->po,
|
1997
|
+
POLL_OBJ_POLLSET);
|
1998
|
+
}
|
1999
|
+
|
2000
|
+
static void pollset_set_del_pollset(grpc_exec_ctx *exec_ctx,
|
2001
|
+
grpc_pollset_set *pss, grpc_pollset *ps) {
|
2002
|
+
/* Nothing to do */
|
2003
|
+
}
|
2004
|
+
|
2005
|
+
static void pollset_set_add_pollset_set(grpc_exec_ctx *exec_ctx,
|
2006
|
+
grpc_pollset_set *bag,
|
2007
|
+
grpc_pollset_set *item) {
|
2008
|
+
add_poll_object(exec_ctx, &bag->po, POLL_OBJ_POLLSET_SET, &item->po,
|
2009
|
+
POLL_OBJ_POLLSET_SET);
|
2010
|
+
}
|
2011
|
+
|
2012
|
+
static void pollset_set_del_pollset_set(grpc_exec_ctx *exec_ctx,
|
2013
|
+
grpc_pollset_set *bag,
|
2014
|
+
grpc_pollset_set *item) {
|
2015
|
+
/* Nothing to do */
|
2016
|
+
}
|
2017
|
+
|
2018
|
+
/*******************************************************************************
|
2019
|
+
* Event engine binding
|
2020
|
+
*/
|
2021
|
+
|
2022
|
+
static void shutdown_engine(void) {
|
2023
|
+
fd_global_shutdown();
|
2024
|
+
pollset_global_shutdown();
|
2025
|
+
polling_island_global_shutdown();
|
2026
|
+
}
|
2027
|
+
|
2028
|
+
static const grpc_event_engine_vtable vtable = {
|
2029
|
+
.pollset_size = sizeof(grpc_pollset),
|
2030
|
+
|
2031
|
+
.fd_create = fd_create,
|
2032
|
+
.fd_wrapped_fd = fd_wrapped_fd,
|
2033
|
+
.fd_orphan = fd_orphan,
|
2034
|
+
.fd_shutdown = fd_shutdown,
|
2035
|
+
.fd_is_shutdown = fd_is_shutdown,
|
2036
|
+
.fd_notify_on_read = fd_notify_on_read,
|
2037
|
+
.fd_notify_on_write = fd_notify_on_write,
|
2038
|
+
.fd_get_read_notifier_pollset = fd_get_read_notifier_pollset,
|
2039
|
+
.fd_get_workqueue = fd_get_workqueue,
|
2040
|
+
|
2041
|
+
.pollset_init = pollset_init,
|
2042
|
+
.pollset_shutdown = pollset_shutdown,
|
2043
|
+
.pollset_destroy = pollset_destroy,
|
2044
|
+
.pollset_work = pollset_work,
|
2045
|
+
.pollset_kick = pollset_kick,
|
2046
|
+
.pollset_add_fd = pollset_add_fd,
|
2047
|
+
|
2048
|
+
.pollset_set_create = pollset_set_create,
|
2049
|
+
.pollset_set_destroy = pollset_set_destroy,
|
2050
|
+
.pollset_set_add_pollset = pollset_set_add_pollset,
|
2051
|
+
.pollset_set_del_pollset = pollset_set_del_pollset,
|
2052
|
+
.pollset_set_add_pollset_set = pollset_set_add_pollset_set,
|
2053
|
+
.pollset_set_del_pollset_set = pollset_set_del_pollset_set,
|
2054
|
+
.pollset_set_add_fd = pollset_set_add_fd,
|
2055
|
+
.pollset_set_del_fd = pollset_set_del_fd,
|
2056
|
+
|
2057
|
+
.workqueue_ref = workqueue_ref,
|
2058
|
+
.workqueue_unref = workqueue_unref,
|
2059
|
+
.workqueue_scheduler = workqueue_scheduler,
|
2060
|
+
|
2061
|
+
.shutdown_engine = shutdown_engine,
|
2062
|
+
};
|
2063
|
+
|
2064
|
+
/* It is possible that GLIBC has epoll but the underlying kernel doesn't.
|
2065
|
+
* Create a dummy epoll_fd to make sure epoll support is available */
|
2066
|
+
static bool is_epoll_available() {
|
2067
|
+
int fd = epoll_create1(EPOLL_CLOEXEC);
|
2068
|
+
if (fd < 0) {
|
2069
|
+
gpr_log(
|
2070
|
+
GPR_ERROR,
|
2071
|
+
"epoll_create1 failed with error: %d. Not using epoll polling engine",
|
2072
|
+
fd);
|
2073
|
+
return false;
|
2074
|
+
}
|
2075
|
+
close(fd);
|
2076
|
+
return true;
|
2077
|
+
}
|
2078
|
+
|
2079
|
+
/* This is mainly for testing purposes. Checks to see if environment variable
|
2080
|
+
* GRPC_MAX_POLLERS_PER_PI is set and if so, assigns that value to
|
2081
|
+
* g_max_pollers_per_pi (any negative value is considered INT_MAX) */
|
2082
|
+
static void set_max_pollers_per_island() {
|
2083
|
+
char *s = gpr_getenv("GRPC_MAX_POLLERS_PER_PI");
|
2084
|
+
if (s) {
|
2085
|
+
g_max_pollers_per_pi = (int)strtol(s, NULL, 10);
|
2086
|
+
if (g_max_pollers_per_pi < 0) {
|
2087
|
+
g_max_pollers_per_pi = INT_MAX;
|
2088
|
+
}
|
2089
|
+
} else {
|
2090
|
+
g_max_pollers_per_pi = INT_MAX;
|
2091
|
+
}
|
2092
|
+
|
2093
|
+
gpr_log(GPR_INFO, "Max number of pollers per polling island: %d",
|
2094
|
+
g_max_pollers_per_pi);
|
2095
|
+
}
|
2096
|
+
|
2097
|
+
const grpc_event_engine_vtable *grpc_init_epoll_limited_pollers_linux(
|
2098
|
+
bool explicitly_requested) {
|
2099
|
+
if (!explicitly_requested) {
|
2100
|
+
return NULL;
|
2101
|
+
}
|
2102
|
+
|
2103
|
+
/* If use of signals is disabled, we cannot use epoll engine*/
|
2104
|
+
if (is_grpc_wakeup_signal_initialized && grpc_wakeup_signal < 0) {
|
2105
|
+
return NULL;
|
2106
|
+
}
|
2107
|
+
|
2108
|
+
if (!grpc_has_wakeup_fd()) {
|
2109
|
+
return NULL;
|
2110
|
+
}
|
2111
|
+
|
2112
|
+
if (!is_epoll_available()) {
|
2113
|
+
return NULL;
|
2114
|
+
}
|
2115
|
+
|
2116
|
+
if (!is_grpc_wakeup_signal_initialized) {
|
2117
|
+
grpc_use_signal(SIGRTMIN + 6);
|
2118
|
+
}
|
2119
|
+
|
2120
|
+
set_max_pollers_per_island();
|
2121
|
+
|
2122
|
+
fd_global_init();
|
2123
|
+
|
2124
|
+
if (!GRPC_LOG_IF_ERROR("pollset_global_init", pollset_global_init())) {
|
2125
|
+
return NULL;
|
2126
|
+
}
|
2127
|
+
|
2128
|
+
if (!GRPC_LOG_IF_ERROR("polling_island_global_init",
|
2129
|
+
polling_island_global_init())) {
|
2130
|
+
return NULL;
|
2131
|
+
}
|
2132
|
+
|
2133
|
+
return &vtable;
|
2134
|
+
}
|
2135
|
+
|
2136
|
+
#else /* defined(GRPC_LINUX_EPOLL) */
|
2137
|
+
#if defined(GRPC_POSIX_SOCKET)
|
2138
|
+
#include "src/core/lib/iomgr/ev_posix.h"
|
2139
|
+
/* If GRPC_LINUX_EPOLL is not defined, it means epoll is not available. Return
|
2140
|
+
* NULL */
|
2141
|
+
const grpc_event_engine_vtable *grpc_init_epoll_limited_pollers_linux(
|
2142
|
+
bool explicitly_requested) {
|
2143
|
+
return NULL;
|
2144
|
+
}
|
2145
|
+
#endif /* defined(GRPC_POSIX_SOCKET) */
|
2146
|
+
#endif /* !defined(GRPC_LINUX_EPOLL) */
|