grpc 1.50.0 → 1.51.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (459) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +131 -42
  3. data/include/grpc/event_engine/event_engine.h +10 -3
  4. data/include/grpc/event_engine/slice_buffer.h +17 -0
  5. data/include/grpc/grpc.h +0 -10
  6. data/include/grpc/impl/codegen/grpc_types.h +1 -5
  7. data/include/grpc/impl/codegen/port_platform.h +0 -3
  8. data/src/core/ext/filters/channel_idle/channel_idle_filter.cc +19 -13
  9. data/src/core/ext/filters/channel_idle/channel_idle_filter.h +1 -0
  10. data/src/core/ext/filters/client_channel/backup_poller.cc +3 -3
  11. data/src/core/ext/filters/client_channel/channel_connectivity.cc +7 -5
  12. data/src/core/ext/filters/client_channel/client_channel.cc +120 -140
  13. data/src/core/ext/filters/client_channel/client_channel.h +3 -4
  14. data/src/core/ext/filters/client_channel/client_channel_channelz.cc +0 -2
  15. data/src/core/ext/filters/client_channel/client_channel_plugin.cc +1 -1
  16. data/src/core/ext/filters/client_channel/client_channel_service_config.cc +153 -0
  17. data/src/core/ext/filters/client_channel/{resolver_result_parsing.h → client_channel_service_config.h} +26 -23
  18. data/src/core/ext/filters/client_channel/connector.h +1 -1
  19. data/src/core/ext/filters/client_channel/dynamic_filters.cc +20 -47
  20. data/src/core/ext/filters/client_channel/dynamic_filters.h +7 -8
  21. data/src/core/ext/filters/client_channel/health/health_check_client.cc +3 -4
  22. data/src/core/ext/filters/client_channel/http_proxy.cc +0 -1
  23. data/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc +3 -4
  24. data/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc +5 -0
  25. data/src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc +8 -7
  26. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +35 -44
  27. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc +0 -1
  28. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc +1 -3
  29. data/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc +3 -4
  30. data/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h +1 -1
  31. data/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +41 -29
  32. data/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h +2 -2
  33. data/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +9 -11
  34. data/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc +15 -12
  35. data/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +8 -10
  36. data/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc +26 -27
  37. data/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +7 -9
  38. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +44 -26
  39. data/src/core/ext/filters/client_channel/lb_policy/xds/cds.cc +17 -27
  40. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.cc +42 -0
  41. data/src/core/ext/filters/client_channel/lb_policy/xds/{xds.h → xds_attributes.h} +15 -17
  42. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +13 -7
  43. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc +48 -47
  44. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc +40 -126
  45. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc +364 -0
  46. data/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc +9 -9
  47. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +23 -32
  48. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc +1 -2
  49. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc +22 -23
  50. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +50 -52
  51. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h +1 -1
  52. data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +2 -4
  53. data/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc +1 -3
  54. data/src/core/ext/filters/client_channel/resolver/google_c2p/google_c2p_resolver.cc +34 -26
  55. data/src/core/ext/filters/client_channel/resolver/polling_resolver.cc +3 -4
  56. data/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc +4 -7
  57. data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +63 -46
  58. data/src/core/ext/filters/client_channel/retry_filter.cc +80 -102
  59. data/src/core/ext/filters/client_channel/retry_service_config.cc +192 -234
  60. data/src/core/ext/filters/client_channel/retry_service_config.h +20 -23
  61. data/src/core/ext/filters/client_channel/retry_throttle.cc +8 -8
  62. data/src/core/ext/filters/client_channel/retry_throttle.h +8 -7
  63. data/src/core/ext/filters/client_channel/service_config_channel_arg_filter.cc +2 -2
  64. data/src/core/ext/filters/client_channel/subchannel.cc +21 -25
  65. data/src/core/ext/filters/client_channel/subchannel.h +2 -2
  66. data/src/core/ext/filters/client_channel/subchannel_stream_client.cc +11 -12
  67. data/src/core/ext/filters/deadline/deadline_filter.cc +13 -14
  68. data/src/core/ext/filters/fault_injection/fault_injection_filter.cc +1 -1
  69. data/src/core/ext/filters/fault_injection/fault_injection_filter.h +0 -4
  70. data/src/core/ext/filters/fault_injection/fault_injection_service_config_parser.cc +118 -0
  71. data/src/core/ext/filters/fault_injection/{service_config_parser.h → fault_injection_service_config_parser.h} +20 -12
  72. data/src/core/ext/filters/http/client/http_client_filter.cc +16 -16
  73. data/src/core/ext/filters/http/client_authority_filter.cc +1 -1
  74. data/src/core/ext/filters/http/message_compress/message_compress_filter.cc +13 -13
  75. data/src/core/ext/filters/http/message_compress/message_decompress_filter.cc +34 -34
  76. data/src/core/ext/filters/http/server/http_server_filter.cc +26 -25
  77. data/src/core/ext/filters/message_size/message_size_filter.cc +86 -117
  78. data/src/core/ext/filters/message_size/message_size_filter.h +22 -15
  79. data/src/core/ext/filters/rbac/rbac_filter.cc +12 -12
  80. data/src/core/ext/filters/rbac/rbac_service_config_parser.cc +728 -530
  81. data/src/core/ext/filters/rbac/rbac_service_config_parser.h +4 -3
  82. data/src/core/ext/filters/server_config_selector/server_config_selector.h +1 -1
  83. data/src/core/ext/filters/server_config_selector/server_config_selector_filter.cc +6 -7
  84. data/src/core/ext/transport/chttp2/client/chttp2_connector.cc +17 -21
  85. data/src/core/ext/transport/chttp2/server/chttp2_server.cc +57 -72
  86. data/src/core/ext/transport/chttp2/transport/bin_decoder.cc +5 -5
  87. data/src/core/ext/transport/chttp2/transport/bin_encoder.h +1 -1
  88. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +212 -253
  89. data/src/core/ext/transport/chttp2/transport/flow_control.cc +42 -11
  90. data/src/core/ext/transport/chttp2/transport/flow_control.h +4 -3
  91. data/src/core/ext/transport/chttp2/transport/frame_data.cc +16 -15
  92. data/src/core/ext/transport/chttp2/transport/frame_data.h +1 -1
  93. data/src/core/ext/transport/chttp2/transport/frame_goaway.cc +13 -13
  94. data/src/core/ext/transport/chttp2/transport/frame_ping.cc +4 -3
  95. data/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc +10 -7
  96. data/src/core/ext/transport/chttp2/transport/frame_settings.cc +15 -17
  97. data/src/core/ext/transport/chttp2/transport/frame_window_update.cc +5 -4
  98. data/src/core/ext/transport/chttp2/transport/hpack_encoder.cc +5 -6
  99. data/src/core/ext/transport/chttp2/transport/hpack_encoder.h +1 -1
  100. data/src/core/ext/transport/chttp2/transport/hpack_encoder_table.cc +2 -1
  101. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +31 -39
  102. data/src/core/ext/transport/chttp2/transport/hpack_parser_table.cc +7 -6
  103. data/src/core/ext/transport/chttp2/transport/internal.h +24 -8
  104. data/src/core/ext/transport/chttp2/transport/parsing.cc +51 -52
  105. data/src/core/ext/transport/chttp2/transport/varint.cc +2 -3
  106. data/src/core/ext/transport/chttp2/transport/varint.h +11 -8
  107. data/src/core/ext/transport/chttp2/transport/writing.cc +16 -16
  108. data/src/core/ext/transport/inproc/inproc_transport.cc +97 -115
  109. data/src/core/ext/xds/certificate_provider_store.cc +4 -4
  110. data/src/core/ext/xds/file_watcher_certificate_provider_factory.cc +4 -7
  111. data/src/core/ext/xds/xds_api.cc +15 -68
  112. data/src/core/ext/xds/xds_api.h +3 -7
  113. data/src/core/ext/xds/xds_bootstrap.h +0 -1
  114. data/src/core/ext/xds/xds_bootstrap_grpc.cc +3 -12
  115. data/src/core/ext/xds/xds_bootstrap_grpc.h +16 -1
  116. data/src/core/ext/xds/xds_certificate_provider.cc +22 -25
  117. data/src/core/ext/xds/xds_channel_stack_modifier.cc +0 -1
  118. data/src/core/ext/xds/xds_client.cc +122 -90
  119. data/src/core/ext/xds/xds_client.h +7 -2
  120. data/src/core/ext/xds/xds_client_grpc.cc +5 -24
  121. data/src/core/ext/xds/xds_cluster.cc +291 -183
  122. data/src/core/ext/xds/xds_cluster.h +11 -15
  123. data/src/core/ext/xds/xds_cluster_specifier_plugin.cc +32 -29
  124. data/src/core/ext/xds/xds_cluster_specifier_plugin.h +35 -16
  125. data/src/core/ext/xds/xds_common_types.cc +208 -141
  126. data/src/core/ext/xds/xds_common_types.h +19 -13
  127. data/src/core/ext/xds/xds_endpoint.cc +214 -129
  128. data/src/core/ext/xds/xds_endpoint.h +4 -7
  129. data/src/core/ext/xds/xds_http_fault_filter.cc +56 -43
  130. data/src/core/ext/xds/xds_http_fault_filter.h +13 -21
  131. data/src/core/ext/xds/xds_http_filters.cc +60 -73
  132. data/src/core/ext/xds/xds_http_filters.h +67 -19
  133. data/src/core/ext/xds/xds_http_rbac_filter.cc +152 -207
  134. data/src/core/ext/xds/xds_http_rbac_filter.h +12 -15
  135. data/src/core/ext/xds/xds_lb_policy_registry.cc +122 -169
  136. data/src/core/ext/xds/xds_lb_policy_registry.h +10 -11
  137. data/src/core/ext/xds/xds_listener.cc +459 -417
  138. data/src/core/ext/xds/xds_listener.h +43 -47
  139. data/src/core/ext/xds/xds_resource_type.h +3 -11
  140. data/src/core/ext/xds/xds_resource_type_impl.h +8 -13
  141. data/src/core/ext/xds/xds_route_config.cc +94 -80
  142. data/src/core/ext/xds/xds_route_config.h +10 -10
  143. data/src/core/ext/xds/xds_routing.cc +2 -1
  144. data/src/core/ext/xds/xds_routing.h +2 -0
  145. data/src/core/ext/xds/xds_server_config_fetcher.cc +109 -94
  146. data/src/core/ext/xds/xds_transport_grpc.cc +4 -5
  147. data/src/core/lib/address_utils/parse_address.cc +11 -10
  148. data/src/core/lib/channel/channel_args.h +16 -1
  149. data/src/core/lib/channel/channel_stack.cc +23 -20
  150. data/src/core/lib/channel/channel_stack.h +17 -4
  151. data/src/core/lib/channel/channel_stack_builder.cc +4 -7
  152. data/src/core/lib/channel/channel_stack_builder.h +14 -6
  153. data/src/core/lib/channel/channel_stack_builder_impl.cc +25 -7
  154. data/src/core/lib/channel/channel_stack_builder_impl.h +2 -0
  155. data/src/core/lib/channel/channel_trace.cc +4 -5
  156. data/src/core/lib/channel/channelz.cc +1 -1
  157. data/src/core/lib/channel/connected_channel.cc +695 -35
  158. data/src/core/lib/channel/connected_channel.h +0 -4
  159. data/src/core/lib/channel/promise_based_filter.cc +1004 -140
  160. data/src/core/lib/channel/promise_based_filter.h +364 -87
  161. data/src/core/lib/compression/message_compress.cc +5 -5
  162. data/src/core/lib/debug/event_log.cc +88 -0
  163. data/src/core/lib/debug/event_log.h +81 -0
  164. data/src/core/lib/debug/histogram_view.cc +69 -0
  165. data/src/core/lib/{slice/slice_refcount.cc → debug/histogram_view.h} +15 -13
  166. data/src/core/lib/debug/stats.cc +22 -119
  167. data/src/core/lib/debug/stats.h +29 -35
  168. data/src/core/lib/debug/stats_data.cc +224 -73
  169. data/src/core/lib/debug/stats_data.h +263 -122
  170. data/src/core/lib/event_engine/common_closures.h +71 -0
  171. data/src/core/lib/event_engine/default_event_engine.cc +38 -15
  172. data/src/core/lib/event_engine/default_event_engine.h +15 -3
  173. data/src/core/lib/event_engine/default_event_engine_factory.cc +2 -4
  174. data/src/core/lib/event_engine/memory_allocator.cc +1 -1
  175. data/src/core/lib/event_engine/poller.h +10 -4
  176. data/src/core/lib/event_engine/posix_engine/ev_epoll1_linux.cc +618 -0
  177. data/src/core/lib/event_engine/posix_engine/ev_epoll1_linux.h +129 -0
  178. data/src/core/lib/event_engine/posix_engine/ev_poll_posix.cc +901 -0
  179. data/src/core/lib/event_engine/posix_engine/ev_poll_posix.h +97 -0
  180. data/src/core/lib/event_engine/posix_engine/event_poller.h +111 -0
  181. data/src/core/lib/event_engine/posix_engine/event_poller_posix_default.cc +74 -0
  182. data/src/core/lib/event_engine/{executor/threaded_executor.cc → posix_engine/event_poller_posix_default.h} +13 -16
  183. data/src/core/lib/event_engine/posix_engine/internal_errqueue.cc +77 -0
  184. data/src/core/lib/event_engine/posix_engine/internal_errqueue.h +179 -0
  185. data/src/core/lib/event_engine/posix_engine/lockfree_event.cc +267 -0
  186. data/src/core/lib/event_engine/posix_engine/lockfree_event.h +73 -0
  187. data/src/core/lib/event_engine/posix_engine/posix_endpoint.cc +1270 -0
  188. data/src/core/lib/event_engine/posix_engine/posix_endpoint.h +682 -0
  189. data/src/core/lib/event_engine/posix_engine/posix_engine.cc +453 -18
  190. data/src/core/lib/event_engine/posix_engine/posix_engine.h +148 -24
  191. data/src/core/lib/event_engine/posix_engine/posix_engine_closure.h +80 -0
  192. data/src/core/lib/event_engine/posix_engine/tcp_socket_utils.cc +1081 -0
  193. data/src/core/lib/event_engine/posix_engine/tcp_socket_utils.h +361 -0
  194. data/src/core/lib/event_engine/posix_engine/timer.h +9 -8
  195. data/src/core/lib/event_engine/posix_engine/timer_manager.cc +57 -194
  196. data/src/core/lib/event_engine/posix_engine/timer_manager.h +21 -49
  197. data/src/core/lib/event_engine/posix_engine/traced_buffer_list.cc +301 -0
  198. data/src/core/lib/event_engine/posix_engine/traced_buffer_list.h +179 -0
  199. data/src/core/lib/event_engine/posix_engine/wakeup_fd_eventfd.cc +126 -0
  200. data/src/core/lib/event_engine/posix_engine/wakeup_fd_eventfd.h +45 -0
  201. data/src/core/lib/event_engine/posix_engine/wakeup_fd_pipe.cc +151 -0
  202. data/src/core/lib/event_engine/posix_engine/wakeup_fd_pipe.h +45 -0
  203. data/src/core/lib/event_engine/posix_engine/wakeup_fd_posix.h +76 -0
  204. data/src/core/lib/event_engine/posix_engine/wakeup_fd_posix_default.cc +67 -0
  205. data/src/core/lib/event_engine/posix_engine/wakeup_fd_posix_default.h +37 -0
  206. data/src/core/lib/event_engine/slice.cc +7 -6
  207. data/src/core/lib/event_engine/slice_buffer.cc +2 -2
  208. data/src/core/lib/event_engine/thread_pool.cc +106 -25
  209. data/src/core/lib/event_engine/thread_pool.h +32 -9
  210. data/src/core/lib/event_engine/windows/win_socket.cc +7 -7
  211. data/src/core/lib/event_engine/windows/windows_engine.cc +18 -12
  212. data/src/core/lib/event_engine/windows/windows_engine.h +8 -4
  213. data/src/core/lib/experiments/config.cc +1 -1
  214. data/src/core/lib/experiments/experiments.cc +13 -2
  215. data/src/core/lib/experiments/experiments.h +8 -1
  216. data/src/core/lib/gpr/cpu_linux.cc +6 -2
  217. data/src/core/lib/gpr/log_linux.cc +3 -4
  218. data/src/core/lib/gpr/string.h +1 -1
  219. data/src/core/lib/gpr/tmpfile_posix.cc +3 -2
  220. data/src/core/lib/gprpp/load_file.cc +75 -0
  221. data/src/core/lib/gprpp/load_file.h +33 -0
  222. data/src/core/lib/gprpp/per_cpu.h +46 -0
  223. data/src/core/lib/gprpp/stat_posix.cc +5 -4
  224. data/src/core/lib/gprpp/stat_windows.cc +3 -2
  225. data/src/core/lib/gprpp/status_helper.h +1 -3
  226. data/src/core/lib/gprpp/strerror.cc +41 -0
  227. data/src/core/{ext/xds/xds_resource_type.cc → lib/gprpp/strerror.h} +9 -13
  228. data/src/core/lib/gprpp/thd_windows.cc +1 -2
  229. data/src/core/lib/gprpp/time.cc +3 -4
  230. data/src/core/lib/gprpp/time.h +13 -2
  231. data/src/core/lib/gprpp/validation_errors.h +18 -1
  232. data/src/core/lib/http/httpcli.cc +40 -44
  233. data/src/core/lib/http/httpcli.h +6 -5
  234. data/src/core/lib/http/httpcli_security_connector.cc +4 -6
  235. data/src/core/lib/http/parser.cc +54 -65
  236. data/src/core/lib/iomgr/buffer_list.cc +105 -116
  237. data/src/core/lib/iomgr/buffer_list.h +60 -44
  238. data/src/core/lib/iomgr/call_combiner.cc +11 -10
  239. data/src/core/lib/iomgr/call_combiner.h +3 -4
  240. data/src/core/lib/iomgr/cfstream_handle.cc +13 -16
  241. data/src/core/lib/iomgr/closure.h +49 -5
  242. data/src/core/lib/iomgr/combiner.cc +2 -2
  243. data/src/core/lib/iomgr/endpoint.h +1 -1
  244. data/src/core/lib/iomgr/endpoint_cfstream.cc +26 -25
  245. data/src/core/lib/iomgr/endpoint_pair_posix.cc +2 -2
  246. data/src/core/lib/iomgr/error.cc +27 -42
  247. data/src/core/lib/iomgr/error.h +22 -152
  248. data/src/core/lib/iomgr/ev_apple.cc +4 -4
  249. data/src/core/lib/iomgr/ev_epoll1_linux.cc +26 -25
  250. data/src/core/lib/iomgr/ev_poll_posix.cc +27 -31
  251. data/src/core/lib/iomgr/exec_ctx.cc +3 -4
  252. data/src/core/lib/iomgr/exec_ctx.h +2 -3
  253. data/src/core/lib/iomgr/executor.cc +1 -2
  254. data/src/core/lib/iomgr/internal_errqueue.cc +3 -1
  255. data/src/core/lib/iomgr/iocp_windows.cc +1 -0
  256. data/src/core/lib/iomgr/iomgr_posix.cc +2 -2
  257. data/src/core/lib/iomgr/iomgr_posix_cfstream.cc +2 -1
  258. data/src/core/lib/iomgr/iomgr_windows.cc +2 -1
  259. data/src/core/lib/iomgr/load_file.cc +5 -9
  260. data/src/core/lib/iomgr/lockfree_event.cc +10 -10
  261. data/src/core/lib/iomgr/pollset_windows.cc +4 -4
  262. data/src/core/lib/iomgr/python_util.h +2 -2
  263. data/src/core/lib/iomgr/resolve_address.cc +8 -3
  264. data/src/core/lib/iomgr/resolve_address.h +3 -4
  265. data/src/core/lib/iomgr/resolve_address_impl.h +1 -1
  266. data/src/core/lib/iomgr/resolve_address_posix.cc +14 -25
  267. data/src/core/lib/iomgr/resolve_address_posix.h +1 -2
  268. data/src/core/lib/iomgr/resolve_address_windows.cc +14 -17
  269. data/src/core/lib/iomgr/resolve_address_windows.h +1 -2
  270. data/src/core/lib/iomgr/socket_utils_common_posix.cc +30 -29
  271. data/src/core/lib/iomgr/socket_utils_posix.cc +1 -0
  272. data/src/core/lib/iomgr/socket_utils_posix.h +2 -2
  273. data/src/core/lib/iomgr/socket_windows.cc +2 -2
  274. data/src/core/lib/iomgr/tcp_client_cfstream.cc +6 -10
  275. data/src/core/lib/iomgr/tcp_client_posix.cc +31 -35
  276. data/src/core/lib/iomgr/tcp_client_windows.cc +8 -12
  277. data/src/core/lib/iomgr/tcp_posix.cc +92 -108
  278. data/src/core/lib/iomgr/tcp_server_posix.cc +34 -34
  279. data/src/core/lib/iomgr/tcp_server_utils_posix.h +1 -1
  280. data/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +18 -21
  281. data/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc +12 -13
  282. data/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc +1 -1
  283. data/src/core/lib/iomgr/tcp_server_windows.cc +26 -29
  284. data/src/core/lib/iomgr/tcp_windows.cc +27 -34
  285. data/src/core/lib/iomgr/timer.h +8 -8
  286. data/src/core/lib/iomgr/timer_generic.cc +9 -15
  287. data/src/core/lib/iomgr/unix_sockets_posix.cc +2 -4
  288. data/src/core/lib/iomgr/wakeup_fd_eventfd.cc +4 -3
  289. data/src/core/lib/iomgr/wakeup_fd_pipe.cc +10 -8
  290. data/src/core/lib/json/json_channel_args.h +42 -0
  291. data/src/core/lib/json/json_object_loader.cc +7 -2
  292. data/src/core/lib/json/json_object_loader.h +22 -0
  293. data/src/core/lib/json/json_util.cc +5 -5
  294. data/src/core/lib/json/json_util.h +4 -4
  295. data/src/core/lib/load_balancing/lb_policy.cc +1 -1
  296. data/src/core/lib/load_balancing/lb_policy.h +4 -0
  297. data/src/core/lib/load_balancing/subchannel_interface.h +0 -7
  298. data/src/core/lib/matchers/matchers.cc +3 -4
  299. data/src/core/lib/promise/activity.cc +16 -2
  300. data/src/core/lib/promise/activity.h +38 -15
  301. data/src/core/lib/promise/arena_promise.h +80 -51
  302. data/src/core/lib/promise/context.h +13 -6
  303. data/src/core/lib/promise/detail/basic_seq.h +9 -28
  304. data/src/core/lib/promise/detail/promise_factory.h +58 -10
  305. data/src/core/lib/promise/detail/status.h +28 -0
  306. data/src/core/lib/promise/detail/switch.h +1455 -0
  307. data/src/core/lib/promise/exec_ctx_wakeup_scheduler.h +3 -1
  308. data/src/core/lib/promise/for_each.h +129 -0
  309. data/src/core/lib/promise/loop.h +7 -5
  310. data/src/core/lib/promise/map_pipe.h +87 -0
  311. data/src/core/lib/promise/pipe.cc +19 -0
  312. data/src/core/lib/promise/pipe.h +505 -0
  313. data/src/core/lib/promise/poll.h +13 -0
  314. data/src/core/lib/promise/seq.h +3 -5
  315. data/src/core/lib/promise/sleep.cc +5 -4
  316. data/src/core/lib/promise/sleep.h +1 -2
  317. data/src/core/lib/promise/try_concurrently.h +341 -0
  318. data/src/core/lib/promise/try_seq.h +10 -13
  319. data/src/core/lib/resolver/server_address.cc +1 -0
  320. data/src/core/lib/resolver/server_address.h +1 -3
  321. data/src/core/lib/resource_quota/api.cc +0 -1
  322. data/src/core/lib/resource_quota/arena.cc +19 -0
  323. data/src/core/lib/resource_quota/arena.h +89 -0
  324. data/src/core/lib/resource_quota/memory_quota.cc +1 -0
  325. data/src/core/lib/security/authorization/grpc_authorization_engine.cc +1 -3
  326. data/src/core/lib/security/authorization/grpc_server_authz_filter.cc +4 -2
  327. data/src/core/lib/security/authorization/matchers.cc +25 -22
  328. data/src/core/lib/security/authorization/rbac_policy.cc +2 -3
  329. data/src/core/lib/security/context/security_context.h +10 -0
  330. data/src/core/lib/security/credentials/channel_creds_registry_init.cc +3 -4
  331. data/src/core/lib/security/credentials/composite/composite_credentials.cc +1 -1
  332. data/src/core/lib/security/credentials/external/aws_external_account_credentials.cc +77 -55
  333. data/src/core/lib/security/credentials/external/aws_request_signer.cc +4 -3
  334. data/src/core/lib/security/credentials/external/external_account_credentials.cc +40 -51
  335. data/src/core/lib/security/credentials/external/file_external_account_credentials.cc +17 -21
  336. data/src/core/lib/security/credentials/external/url_external_account_credentials.cc +21 -25
  337. data/src/core/lib/security/credentials/fake/fake_credentials.cc +1 -0
  338. data/src/core/lib/security/credentials/google_default/google_default_credentials.cc +27 -24
  339. data/src/core/lib/security/credentials/iam/iam_credentials.cc +1 -0
  340. data/src/core/lib/security/credentials/jwt/json_token.cc +1 -2
  341. data/src/core/lib/security/credentials/jwt/jwt_credentials.cc +1 -1
  342. data/src/core/lib/security/credentials/jwt/jwt_verifier.cc +5 -5
  343. data/src/core/lib/security/credentials/oauth2/oauth2_credentials.cc +24 -30
  344. data/src/core/lib/security/credentials/plugin/plugin_credentials.cc +6 -5
  345. data/src/core/lib/security/credentials/plugin/plugin_credentials.h +3 -3
  346. data/src/core/lib/security/credentials/tls/grpc_tls_certificate_distributor.cc +19 -27
  347. data/src/core/lib/security/credentials/tls/grpc_tls_certificate_distributor.h +4 -11
  348. data/src/core/lib/security/credentials/tls/grpc_tls_certificate_provider.cc +29 -41
  349. data/src/core/lib/security/credentials/tls/grpc_tls_certificate_verifier.cc +1 -1
  350. data/src/core/lib/security/security_connector/alts/alts_security_connector.cc +6 -11
  351. data/src/core/lib/security/security_connector/fake/fake_security_connector.cc +8 -15
  352. data/src/core/lib/security/security_connector/insecure/insecure_security_connector.cc +2 -2
  353. data/src/core/lib/security/security_connector/insecure/insecure_security_connector.h +2 -6
  354. data/src/core/lib/security/security_connector/load_system_roots_supported.cc +1 -4
  355. data/src/core/lib/security/security_connector/local/local_security_connector.cc +7 -11
  356. data/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc +9 -14
  357. data/src/core/lib/security/security_connector/ssl_utils.cc +5 -7
  358. data/src/core/lib/security/security_connector/tls/tls_security_connector.cc +21 -27
  359. data/src/core/lib/security/transport/client_auth_filter.cc +1 -1
  360. data/src/core/lib/security/transport/secure_endpoint.cc +26 -28
  361. data/src/core/lib/security/transport/security_handshaker.cc +53 -53
  362. data/src/core/lib/security/transport/server_auth_filter.cc +21 -21
  363. data/src/core/lib/security/transport/tsi_error.cc +6 -3
  364. data/src/core/lib/security/util/json_util.cc +4 -5
  365. data/src/core/lib/service_config/service_config.h +1 -1
  366. data/src/core/lib/service_config/service_config_impl.cc +111 -158
  367. data/src/core/lib/service_config/service_config_impl.h +14 -17
  368. data/src/core/lib/service_config/service_config_parser.cc +14 -31
  369. data/src/core/lib/service_config/service_config_parser.h +14 -10
  370. data/src/core/lib/slice/b64.cc +2 -2
  371. data/src/core/lib/slice/slice.cc +7 -1
  372. data/src/core/lib/slice/slice.h +19 -6
  373. data/src/core/lib/slice/slice_buffer.cc +13 -14
  374. data/src/core/lib/slice/slice_internal.h +13 -21
  375. data/src/core/lib/slice/slice_refcount.h +34 -19
  376. data/src/core/lib/surface/byte_buffer.cc +3 -4
  377. data/src/core/lib/surface/byte_buffer_reader.cc +4 -4
  378. data/src/core/lib/surface/call.cc +1366 -239
  379. data/src/core/lib/surface/call.h +44 -0
  380. data/src/core/lib/surface/call_details.cc +3 -3
  381. data/src/core/lib/surface/call_trace.cc +113 -0
  382. data/src/core/lib/surface/call_trace.h +30 -0
  383. data/src/core/lib/surface/channel.cc +44 -49
  384. data/src/core/lib/surface/channel.h +9 -1
  385. data/src/core/lib/surface/channel_ping.cc +1 -1
  386. data/src/core/lib/surface/channel_stack_type.cc +4 -0
  387. data/src/core/lib/surface/channel_stack_type.h +2 -0
  388. data/src/core/lib/surface/completion_queue.cc +38 -52
  389. data/src/core/lib/surface/init.cc +8 -39
  390. data/src/core/lib/surface/init_internally.h +8 -0
  391. data/src/core/lib/surface/lame_client.cc +10 -8
  392. data/src/core/lib/surface/server.cc +48 -70
  393. data/src/core/lib/surface/server.h +3 -4
  394. data/src/core/lib/surface/validate_metadata.cc +11 -12
  395. data/src/core/lib/surface/version.cc +2 -2
  396. data/src/core/lib/transport/connectivity_state.cc +2 -2
  397. data/src/core/lib/transport/error_utils.cc +34 -28
  398. data/src/core/lib/transport/error_utils.h +3 -3
  399. data/src/core/lib/transport/handshaker.cc +14 -14
  400. data/src/core/lib/transport/handshaker.h +1 -1
  401. data/src/core/lib/transport/handshaker_factory.h +26 -0
  402. data/src/core/lib/transport/handshaker_registry.cc +8 -2
  403. data/src/core/lib/transport/handshaker_registry.h +3 -4
  404. data/src/core/lib/transport/http_connect_handshaker.cc +23 -24
  405. data/src/core/lib/transport/metadata_batch.h +17 -1
  406. data/src/core/lib/transport/parsed_metadata.cc +2 -6
  407. data/src/core/lib/transport/tcp_connect_handshaker.cc +15 -20
  408. data/src/core/lib/transport/transport.cc +63 -17
  409. data/src/core/lib/transport/transport.h +64 -68
  410. data/src/core/lib/transport/transport_impl.h +1 -1
  411. data/src/core/lib/transport/transport_op_string.cc +7 -6
  412. data/src/core/plugin_registry/grpc_plugin_registry.cc +6 -10
  413. data/src/core/plugin_registry/grpc_plugin_registry_extra.cc +2 -14
  414. data/src/core/tsi/alts/handshaker/alts_handshaker_client.cc +10 -10
  415. data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc +8 -8
  416. data/src/core/tsi/alts/handshaker/alts_tsi_utils.cc +2 -1
  417. data/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_integrity_only_record_protocol.cc +7 -7
  418. data/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_privacy_integrity_record_protocol.cc +7 -6
  419. data/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_record_protocol_common.cc +1 -1
  420. data/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc +5 -5
  421. data/src/core/tsi/fake_transport_security.cc +3 -3
  422. data/src/core/tsi/ssl/key_logging/ssl_key_logging.cc +7 -3
  423. data/src/core/tsi/ssl/session_cache/ssl_session_boringssl.cc +1 -1
  424. data/src/core/tsi/ssl/session_cache/ssl_session_openssl.cc +6 -2
  425. data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +0 -2
  426. data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +0 -3
  427. data/src/ruby/lib/grpc/version.rb +1 -1
  428. data/src/ruby/spec/channel_spec.rb +0 -43
  429. data/src/ruby/spec/generic/active_call_spec.rb +12 -3
  430. data/third_party/abseil-cpp/absl/cleanup/cleanup.h +140 -0
  431. data/third_party/abseil-cpp/absl/cleanup/internal/cleanup.h +100 -0
  432. data/third_party/zlib/compress.c +3 -3
  433. data/third_party/zlib/crc32.c +21 -12
  434. data/third_party/zlib/deflate.c +112 -106
  435. data/third_party/zlib/deflate.h +2 -2
  436. data/third_party/zlib/gzlib.c +1 -1
  437. data/third_party/zlib/gzread.c +3 -5
  438. data/third_party/zlib/gzwrite.c +1 -1
  439. data/third_party/zlib/infback.c +10 -7
  440. data/third_party/zlib/inflate.c +5 -2
  441. data/third_party/zlib/inftrees.c +2 -2
  442. data/third_party/zlib/inftrees.h +1 -1
  443. data/third_party/zlib/trees.c +61 -62
  444. data/third_party/zlib/uncompr.c +2 -2
  445. data/third_party/zlib/zconf.h +16 -3
  446. data/third_party/zlib/zlib.h +10 -10
  447. data/third_party/zlib/zutil.c +9 -7
  448. data/third_party/zlib/zutil.h +1 -0
  449. metadata +55 -18
  450. data/src/core/ext/filters/client_channel/resolver_result_parsing.cc +0 -188
  451. data/src/core/ext/filters/fault_injection/service_config_parser.cc +0 -187
  452. data/src/core/lib/event_engine/executor/threaded_executor.h +0 -44
  453. data/src/core/lib/gpr/murmur_hash.cc +0 -82
  454. data/src/core/lib/gpr/murmur_hash.h +0 -29
  455. data/src/core/lib/gpr/tls.h +0 -156
  456. data/src/core/lib/promise/call_push_pull.h +0 -148
  457. data/src/core/lib/slice/slice_api.cc +0 -39
  458. data/src/core/lib/slice/slice_buffer_api.cc +0 -35
  459. data/src/core/lib/slice/slice_refcount_base.h +0 -60
@@ -0,0 +1,1270 @@
1
+ // Copyright 2022 gRPC Authors
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ #include <grpc/support/port_platform.h>
15
+
16
+ #include "src/core/lib/event_engine/posix_engine/posix_endpoint.h"
17
+
18
+ #include <errno.h>
19
+ #include <inttypes.h>
20
+ #include <limits.h>
21
+
22
+ #include <algorithm>
23
+ #include <cctype>
24
+ #include <cstdint>
25
+ #include <cstdlib>
26
+ #include <cstring>
27
+ #include <memory>
28
+ #include <string>
29
+
30
+ #include "absl/functional/any_invocable.h"
31
+ #include "absl/status/status.h"
32
+ #include "absl/status/statusor.h"
33
+ #include "absl/strings/str_cat.h"
34
+ #include "absl/types/optional.h"
35
+
36
+ #include <grpc/event_engine/memory_request.h>
37
+ #include <grpc/event_engine/slice.h>
38
+ #include <grpc/event_engine/slice_buffer.h>
39
+ #include <grpc/support/log.h>
40
+
41
+ #include "src/core/lib/event_engine/posix_engine/event_poller.h"
42
+ #include "src/core/lib/event_engine/posix_engine/internal_errqueue.h"
43
+ #include "src/core/lib/event_engine/posix_engine/tcp_socket_utils.h"
44
+ #include "src/core/lib/experiments/experiments.h"
45
+ #include "src/core/lib/gpr/useful.h"
46
+ #include "src/core/lib/gprpp/load_file.h"
47
+ #include "src/core/lib/gprpp/ref_counted_ptr.h"
48
+ #include "src/core/lib/gprpp/strerror.h"
49
+ #include "src/core/lib/gprpp/time.h"
50
+ #include "src/core/lib/resource_quota/resource_quota.h"
51
+ #include "src/core/lib/slice/slice.h"
52
+
53
+ #ifdef GRPC_POSIX_SOCKET_TCP
54
+ #ifdef GRPC_LINUX_ERRQUEUE
55
+ #include <dirent.h> // IWYU pragma: keep
56
+ #include <linux/capability.h> // IWYU pragma: keep
57
+ #include <linux/errqueue.h> // IWYU pragma: keep
58
+ #include <linux/netlink.h> // IWYU pragma: keep
59
+ #include <sys/prctl.h> // IWYU pragma: keep
60
+ #include <sys/resource.h> // IWYU pragma: keep
61
+ #endif
62
+ #include <netinet/in.h> // IWYU pragma: keep
63
+
64
+ #ifndef SOL_TCP
65
+ #define SOL_TCP IPPROTO_TCP
66
+ #endif
67
+
68
+ #ifndef TCP_INQ
69
+ #define TCP_INQ 36
70
+ #define TCP_CM_INQ TCP_INQ
71
+ #endif
72
+
73
+ #ifdef GRPC_HAVE_MSG_NOSIGNAL
74
+ #define SENDMSG_FLAGS MSG_NOSIGNAL
75
+ #else
76
+ #define SENDMSG_FLAGS 0
77
+ #endif
78
+
79
+ // TCP zero copy sendmsg flag.
80
+ // NB: We define this here as a fallback in case we're using an older set of
81
+ // library headers that has not defined MSG_ZEROCOPY. Since this constant is
82
+ // part of the kernel, we are guaranteed it will never change/disagree so
83
+ // defining it here is safe.
84
+ #ifndef MSG_ZEROCOPY
85
+ #define MSG_ZEROCOPY 0x4000000
86
+ #endif
87
+
88
+ #define MAX_READ_IOVEC 64
89
+
90
+ namespace grpc_event_engine {
91
+ namespace posix_engine {
92
+
93
+ namespace {
94
+
95
+ using ::grpc_event_engine::experimental::EventEngine;
96
+ using ::grpc_event_engine::experimental::MemoryAllocator;
97
+ using ::grpc_event_engine::experimental::Slice;
98
+ using ::grpc_event_engine::experimental::SliceBuffer;
99
+
100
+ // A wrapper around sendmsg. It sends \a msg over \a fd and returns the number
101
+ // of bytes sent.
102
+ ssize_t TcpSend(int fd, const struct msghdr* msg, int* saved_errno,
103
+ int additional_flags = 0) {
104
+ ssize_t sent_length;
105
+ do {
106
+ sent_length = sendmsg(fd, msg, SENDMSG_FLAGS | additional_flags);
107
+ } while (sent_length < 0 && (*saved_errno = errno) == EINTR);
108
+ return sent_length;
109
+ }
110
+
111
+ #ifdef GRPC_LINUX_ERRQUEUE
112
+
113
+ #define CAP_IS_SUPPORTED(cap) (prctl(PR_CAPBSET_READ, (cap), 0) > 0)
114
+
115
+ // Remove spaces and newline characters from the end of a string.
116
+ void rtrim(std::string& s) {
117
+ s.erase(std::find_if(s.rbegin(), s.rend(),
118
+ [](unsigned char ch) { return !std::isspace(ch); })
119
+ .base(),
120
+ s.end());
121
+ }
122
+
123
+ uint64_t ParseUlimitMemLockFromFile(std::string file_name) {
124
+ static std::string kHardMemlockPrefix = "* hard memlock";
125
+ auto result = grpc_core::LoadFile(file_name, false);
126
+ if (!result.ok()) {
127
+ return 0;
128
+ }
129
+ std::string file_contents(reinterpret_cast<const char*>((*result).begin()),
130
+ (*result).length());
131
+ // Find start position containing prefix.
132
+ size_t start = file_contents.find(kHardMemlockPrefix);
133
+ if (start == std::string::npos) {
134
+ return 0;
135
+ }
136
+ // Find position of next newline after prefix.
137
+ size_t end = file_contents.find(start, '\n');
138
+ // Extract substring between prefix and next newline.
139
+ auto memlock_value_string = file_contents.substr(
140
+ start + kHardMemlockPrefix.length() + 1, end - start);
141
+ rtrim(memlock_value_string);
142
+ if (memlock_value_string == "unlimited" ||
143
+ memlock_value_string == "infinity") {
144
+ return UINT64_MAX;
145
+ } else {
146
+ return std::atoi(memlock_value_string.c_str());
147
+ }
148
+ }
149
+
150
+ // Ulimit hard memlock controls per socket limit for maximum locked memory in
151
+ // RAM. Parses all files under /etc/security/limits.d/ and
152
+ // /etc/security/limits.conf file for a line of the following format:
153
+ // * hard memlock <value>
154
+ // It extracts the first valid <value> and returns it. A value of UINT64_MAX
155
+ // represents unlimited or infinity. Hard memlock value should be set to
156
+ // allow zerocopy sendmsgs to succeed. It controls the maximum amount of
157
+ // memory that can be locked by a socket in RAM.
158
+ uint64_t GetUlimitHardMemLock() {
159
+ static const uint64_t kUlimitHardMemLock = []() -> uint64_t {
160
+ if (CAP_IS_SUPPORTED(CAP_SYS_RESOURCE)) {
161
+ // hard memlock ulimit is ignored for privileged user.
162
+ return UINT64_MAX;
163
+ }
164
+ if (auto dir = opendir("/etc/security/limits.d")) {
165
+ while (auto f = readdir(dir)) {
166
+ if (f->d_name[0] == '.') {
167
+ continue; // Skip everything that starts with a dot
168
+ }
169
+ uint64_t hard_memlock = ParseUlimitMemLockFromFile(
170
+ absl::StrCat("/etc/security/limits.d/", std::string(f->d_name)));
171
+ if (hard_memlock != 0) {
172
+ return hard_memlock;
173
+ }
174
+ }
175
+ closedir(dir);
176
+ }
177
+ return ParseUlimitMemLockFromFile("/etc/security/limits.conf");
178
+ }();
179
+ return kUlimitHardMemLock;
180
+ }
181
+
182
+ // RLIMIT_MEMLOCK controls per process limit for maximum locked memory in RAM.
183
+ uint64_t GetRLimitMemLockMax() {
184
+ static const uint64_t kRlimitMemLock = []() -> uint64_t {
185
+ if (CAP_IS_SUPPORTED(CAP_SYS_RESOURCE)) {
186
+ // RLIMIT_MEMLOCK is ignored for privileged user.
187
+ return UINT64_MAX;
188
+ }
189
+ struct rlimit limit;
190
+ if (getrlimit(RLIMIT_MEMLOCK, &limit) != 0) {
191
+ return 0;
192
+ }
193
+ return static_cast<uint64_t>(limit.rlim_max);
194
+ }();
195
+ return kRlimitMemLock;
196
+ }
197
+
198
+ // Whether the cmsg received from error queue is of the IPv4 or IPv6 levels.
199
+ bool CmsgIsIpLevel(const cmsghdr& cmsg) {
200
+ return (cmsg.cmsg_level == SOL_IPV6 && cmsg.cmsg_type == IPV6_RECVERR) ||
201
+ (cmsg.cmsg_level == SOL_IP && cmsg.cmsg_type == IP_RECVERR);
202
+ }
203
+
204
+ bool CmsgIsZeroCopy(const cmsghdr& cmsg) {
205
+ if (!CmsgIsIpLevel(cmsg)) {
206
+ return false;
207
+ }
208
+ auto serr = reinterpret_cast<const sock_extended_err*> CMSG_DATA(&cmsg);
209
+ return serr->ee_errno == 0 && serr->ee_origin == SO_EE_ORIGIN_ZEROCOPY;
210
+ }
211
+ #endif // GRPC_LINUX_ERRQUEUE
212
+
213
+ } // namespace
214
+
215
+ #if defined(IOV_MAX) && IOV_MAX < 260
216
+ #define MAX_WRITE_IOVEC IOV_MAX
217
+ #else
218
+ #define MAX_WRITE_IOVEC 260
219
+ #endif
220
+ msg_iovlen_type TcpZerocopySendRecord::PopulateIovs(size_t* unwind_slice_idx,
221
+ size_t* unwind_byte_idx,
222
+ size_t* sending_length,
223
+ iovec* iov) {
224
+ msg_iovlen_type iov_size;
225
+ *unwind_slice_idx = out_offset_.slice_idx;
226
+ *unwind_byte_idx = out_offset_.byte_idx;
227
+ for (iov_size = 0;
228
+ out_offset_.slice_idx != buf_.Count() && iov_size != MAX_WRITE_IOVEC;
229
+ iov_size++) {
230
+ auto slice = buf_.RefSlice(out_offset_.slice_idx);
231
+ iov[iov_size].iov_base =
232
+ const_cast<uint8_t*>(slice.begin()) + out_offset_.byte_idx;
233
+ iov[iov_size].iov_len = slice.length() - out_offset_.byte_idx;
234
+ *sending_length += iov[iov_size].iov_len;
235
+ ++(out_offset_.slice_idx);
236
+ out_offset_.byte_idx = 0;
237
+ }
238
+ GPR_DEBUG_ASSERT(iov_size > 0);
239
+ return iov_size;
240
+ }
241
+
242
+ void TcpZerocopySendRecord::UpdateOffsetForBytesSent(size_t sending_length,
243
+ size_t actually_sent) {
244
+ size_t trailing = sending_length - actually_sent;
245
+ while (trailing > 0) {
246
+ size_t slice_length;
247
+ out_offset_.slice_idx--;
248
+ slice_length = buf_.RefSlice(out_offset_.slice_idx).length();
249
+ if (slice_length > trailing) {
250
+ out_offset_.byte_idx = slice_length - trailing;
251
+ break;
252
+ } else {
253
+ trailing -= slice_length;
254
+ }
255
+ }
256
+ }
257
+
258
+ void PosixEndpointImpl::AddToEstimate(size_t bytes) {
259
+ bytes_read_this_round_ += static_cast<double>(bytes);
260
+ }
261
+
262
+ void PosixEndpointImpl::FinishEstimate() {
263
+ // If we read >80% of the target buffer in one read loop, increase the size of
264
+ // the target buffer to either the amount read, or twice its previous value.
265
+ if (bytes_read_this_round_ > target_length_ * 0.8) {
266
+ target_length_ = std::max(2 * target_length_, bytes_read_this_round_);
267
+ } else {
268
+ target_length_ = 0.99 * target_length_ + 0.01 * bytes_read_this_round_;
269
+ }
270
+ bytes_read_this_round_ = 0;
271
+ }
272
+
273
+ // Returns true if data available to read or error other than EAGAIN.
274
+ bool PosixEndpointImpl::TcpDoRead(absl::Status& status) {
275
+ struct msghdr msg;
276
+ struct iovec iov[MAX_READ_IOVEC];
277
+ ssize_t read_bytes;
278
+ size_t total_read_bytes = 0;
279
+ size_t iov_len = std::min<size_t>(MAX_READ_IOVEC, incoming_buffer_->Count());
280
+ #ifdef GRPC_LINUX_ERRQUEUE
281
+ constexpr size_t cmsg_alloc_space =
282
+ CMSG_SPACE(sizeof(scm_timestamping)) + CMSG_SPACE(sizeof(int));
283
+ #else
284
+ constexpr size_t cmsg_alloc_space = 24; // CMSG_SPACE(sizeof(int))
285
+ #endif // GRPC_LINUX_ERRQUEUE
286
+ char cmsgbuf[cmsg_alloc_space];
287
+ for (size_t i = 0; i < iov_len; i++) {
288
+ Slice slice = incoming_buffer_->RefSlice(i);
289
+ iov[i].iov_base = const_cast<uint8_t*>(slice.begin());
290
+ iov[i].iov_len = slice.length();
291
+ }
292
+
293
+ GPR_ASSERT(incoming_buffer_->Length() != 0);
294
+ GPR_DEBUG_ASSERT(min_progress_size_ > 0);
295
+
296
+ do {
297
+ // Assume there is something on the queue. If we receive TCP_INQ from
298
+ // kernel, we will update this value, otherwise, we have to assume there is
299
+ // always something to read until we get EAGAIN.
300
+ inq_ = 1;
301
+
302
+ msg.msg_name = nullptr;
303
+ msg.msg_namelen = 0;
304
+ msg.msg_iov = iov;
305
+ msg.msg_iovlen = static_cast<msg_iovlen_type>(iov_len);
306
+ if (inq_capable_) {
307
+ msg.msg_control = cmsgbuf;
308
+ msg.msg_controllen = sizeof(cmsgbuf);
309
+ } else {
310
+ msg.msg_control = nullptr;
311
+ msg.msg_controllen = 0;
312
+ }
313
+ msg.msg_flags = 0;
314
+
315
+ do {
316
+ read_bytes = recvmsg(fd_, &msg, 0);
317
+ } while (read_bytes < 0 && errno == EINTR);
318
+
319
+ if (read_bytes < 0 && errno == EAGAIN) {
320
+ // NB: After calling call_read_cb a parallel call of the read handler may
321
+ // be running.
322
+ if (total_read_bytes > 0) {
323
+ break;
324
+ }
325
+ FinishEstimate();
326
+ inq_ = 0;
327
+ return false;
328
+ }
329
+
330
+ // We have read something in previous reads. We need to deliver those bytes
331
+ // to the upper layer.
332
+ if (read_bytes <= 0 && total_read_bytes >= 1) {
333
+ inq_ = 1;
334
+ break;
335
+ }
336
+
337
+ if (read_bytes <= 0) {
338
+ // 0 read size ==> end of stream
339
+ incoming_buffer_->Clear();
340
+ if (read_bytes == 0) {
341
+ status = absl::InternalError("Socket closed");
342
+ } else {
343
+ status = absl::InternalError(
344
+ absl::StrCat("recvmsg:", grpc_core::StrError(errno)));
345
+ }
346
+ return true;
347
+ }
348
+
349
+ AddToEstimate(static_cast<size_t>(read_bytes));
350
+ GPR_DEBUG_ASSERT((size_t)read_bytes <=
351
+ incoming_buffer_->Length() - total_read_bytes);
352
+
353
+ #ifdef GRPC_HAVE_TCP_INQ
354
+ if (inq_capable_) {
355
+ GPR_DEBUG_ASSERT(!(msg.msg_flags & MSG_CTRUNC));
356
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
357
+ for (; cmsg != nullptr; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
358
+ if (cmsg->cmsg_level == SOL_TCP && cmsg->cmsg_type == TCP_CM_INQ &&
359
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
360
+ inq_ = *reinterpret_cast<int*>(CMSG_DATA(cmsg));
361
+ break;
362
+ }
363
+ }
364
+ }
365
+ #endif // GRPC_HAVE_TCP_INQ
366
+
367
+ total_read_bytes += read_bytes;
368
+ if (inq_ == 0 || total_read_bytes == incoming_buffer_->Length()) {
369
+ break;
370
+ }
371
+
372
+ // We had a partial read, and still have space to read more data. So, adjust
373
+ // IOVs and try to read more.
374
+ size_t remaining = read_bytes;
375
+ size_t j = 0;
376
+ for (size_t i = 0; i < iov_len; i++) {
377
+ if (remaining >= iov[i].iov_len) {
378
+ remaining -= iov[i].iov_len;
379
+ continue;
380
+ }
381
+ if (remaining > 0) {
382
+ iov[j].iov_base = static_cast<char*>(iov[i].iov_base) + remaining;
383
+ iov[j].iov_len = iov[i].iov_len - remaining;
384
+ remaining = 0;
385
+ } else {
386
+ iov[j].iov_base = iov[i].iov_base;
387
+ iov[j].iov_len = iov[i].iov_len;
388
+ }
389
+ ++j;
390
+ }
391
+ iov_len = j;
392
+ } while (true);
393
+
394
+ if (inq_ == 0) {
395
+ FinishEstimate();
396
+ }
397
+
398
+ GPR_DEBUG_ASSERT(total_read_bytes > 0);
399
+ status = absl::OkStatus();
400
+ if (grpc_core::IsTcpFrameSizeTuningEnabled()) {
401
+ // Update min progress size based on the total number of bytes read in
402
+ // this round.
403
+ min_progress_size_ -= total_read_bytes;
404
+ if (min_progress_size_ > 0) {
405
+ // There is still some bytes left to be read before we can signal
406
+ // the read as complete. Append the bytes read so far into
407
+ // last_read_buffer which serves as a staging buffer. Return false
408
+ // to indicate tcp_handle_read needs to be scheduled again.
409
+ incoming_buffer_->MoveFirstNBytesIntoSliceBuffer(total_read_bytes,
410
+ last_read_buffer_);
411
+ return false;
412
+ } else {
413
+ // The required number of bytes have been read. Append the bytes
414
+ // read in this round into last_read_buffer. Then swap last_read_buffer
415
+ // and incoming_buffer. Now incoming buffer contains all the bytes
416
+ // read since the start of the last tcp_read operation. last_read_buffer
417
+ // would contain any spare space left in the incoming buffer. This
418
+ // space will be used in the next tcp_read operation.
419
+ min_progress_size_ = 1;
420
+ incoming_buffer_->MoveFirstNBytesIntoSliceBuffer(total_read_bytes,
421
+ last_read_buffer_);
422
+ incoming_buffer_->Swap(last_read_buffer_);
423
+ return true;
424
+ }
425
+ }
426
+ if (total_read_bytes < incoming_buffer_->Length()) {
427
+ incoming_buffer_->MoveLastNBytesIntoSliceBuffer(
428
+ incoming_buffer_->Length() - total_read_bytes, last_read_buffer_);
429
+ // last_read_buffer_.Clear();
430
+ }
431
+ return true;
432
+ }
433
+
434
+ void PosixEndpointImpl::PerformReclamation() {
435
+ read_mu_.Lock();
436
+ if (incoming_buffer_ != nullptr) {
437
+ incoming_buffer_->Clear();
438
+ }
439
+ has_posted_reclaimer_ = false;
440
+ read_mu_.Unlock();
441
+ }
442
+
443
+ void PosixEndpointImpl::MaybePostReclaimer() {
444
+ if (!has_posted_reclaimer_) {
445
+ has_posted_reclaimer_ = true;
446
+ memory_owner_.PostReclaimer(
447
+ grpc_core::ReclamationPass::kBenign,
448
+ [this](absl::optional<grpc_core::ReclamationSweep> sweep) {
449
+ if (!sweep.has_value()) return;
450
+ PerformReclamation();
451
+ });
452
+ }
453
+ }
454
+
455
+ void PosixEndpointImpl::UpdateRcvLowat() {
456
+ if (!grpc_core::IsTcpRcvLowatEnabled()) return;
457
+
458
+ // TODO(ctiller): Check if supported by OS.
459
+ // TODO(ctiller): Allow some adjustments instead of hardcoding things.
460
+
461
+ static constexpr int kRcvLowatMax = 16 * 1024 * 1024;
462
+ static constexpr int kRcvLowatThreshold = 16 * 1024;
463
+
464
+ int remaining = std::min({static_cast<int>(incoming_buffer_->Length()),
465
+ kRcvLowatMax, min_progress_size_});
466
+
467
+ // Setting SO_RCVLOWAT for small quantities does not save on CPU.
468
+ if (remaining < kRcvLowatThreshold) {
469
+ remaining = 0;
470
+ }
471
+
472
+ // If zerocopy is off, wake shortly before the full RPC is here. More can
473
+ // show up partway through recvmsg() since it takes a while to copy data.
474
+ // So an early wakeup aids latency.
475
+ if (!tcp_zerocopy_send_ctx_->Enabled() && remaining > 0) {
476
+ remaining -= kRcvLowatThreshold;
477
+ }
478
+
479
+ // We still do not know the RPC size. Do not set SO_RCVLOWAT.
480
+ if (set_rcvlowat_ <= 1 && remaining <= 1) return;
481
+
482
+ // Previous value is still valid. No change needed in SO_RCVLOWAT.
483
+ if (set_rcvlowat_ == remaining) {
484
+ return;
485
+ }
486
+ auto result = sock_.SetSocketRcvLowat(remaining);
487
+ if (result.ok()) {
488
+ set_rcvlowat_ = *result;
489
+ } else {
490
+ gpr_log(GPR_ERROR, "%s",
491
+ absl::StrCat("ERROR in SO_RCVLOWAT: ", result.status().message())
492
+ .c_str());
493
+ }
494
+ }
495
+
496
+ void PosixEndpointImpl::MaybeMakeReadSlices() {
497
+ if (grpc_core::IsTcpReadChunksEnabled()) {
498
+ static const int kBigAlloc = 64 * 1024;
499
+ static const int kSmallAlloc = 8 * 1024;
500
+ if (incoming_buffer_->Length() < static_cast<size_t>(min_progress_size_)) {
501
+ size_t allocate_length = min_progress_size_;
502
+ const size_t target_length = static_cast<size_t>(target_length_);
503
+ // If memory pressure is low and we think there will be more than
504
+ // min_progress_size bytes to read, allocate a bit more.
505
+ const bool low_memory_pressure =
506
+ memory_owner_.GetPressureInfo().pressure_control_value < 0.8;
507
+ if (low_memory_pressure && target_length > allocate_length) {
508
+ allocate_length = target_length;
509
+ }
510
+ int extra_wanted =
511
+ allocate_length - static_cast<int>(incoming_buffer_->Length());
512
+ if (extra_wanted >=
513
+ (low_memory_pressure ? kSmallAlloc * 3 / 2 : kBigAlloc)) {
514
+ while (extra_wanted > 0) {
515
+ extra_wanted -= kBigAlloc;
516
+ incoming_buffer_->AppendIndexed(
517
+ Slice(memory_owner_.MakeSlice(kBigAlloc)));
518
+ }
519
+ } else {
520
+ while (extra_wanted > 0) {
521
+ extra_wanted -= kSmallAlloc;
522
+ incoming_buffer_->AppendIndexed(
523
+ Slice(memory_owner_.MakeSlice(kSmallAlloc)));
524
+ }
525
+ }
526
+ MaybePostReclaimer();
527
+ }
528
+ } else {
529
+ if (incoming_buffer_->Length() < static_cast<size_t>(min_progress_size_) &&
530
+ incoming_buffer_->Count() < MAX_READ_IOVEC) {
531
+ int target_length =
532
+ std::max(static_cast<int>(target_length_), min_progress_size_);
533
+ int extra_wanted =
534
+ target_length - static_cast<int>(incoming_buffer_->Length());
535
+ int min_read_chunk_size =
536
+ std::max(min_read_chunk_size_, min_progress_size_);
537
+ int max_read_chunk_size =
538
+ std::max(max_read_chunk_size_, min_progress_size_);
539
+ incoming_buffer_->AppendIndexed(
540
+ Slice(memory_owner_.MakeSlice(grpc_core::MemoryRequest(
541
+ min_read_chunk_size,
542
+ grpc_core::Clamp(extra_wanted, min_read_chunk_size,
543
+ max_read_chunk_size)))));
544
+ MaybePostReclaimer();
545
+ }
546
+ }
547
+ }
548
+
549
+ void PosixEndpointImpl::HandleRead(absl::Status status) {
550
+ read_mu_.Lock();
551
+ if (status.ok()) {
552
+ MaybeMakeReadSlices();
553
+ if (!TcpDoRead(status)) {
554
+ // We've consumed the edge, request a new one.
555
+ read_mu_.Unlock();
556
+ handle_->NotifyOnRead(on_read_);
557
+ return;
558
+ }
559
+ } else {
560
+ incoming_buffer_->Clear();
561
+ last_read_buffer_.Clear();
562
+ }
563
+ absl::AnyInvocable<void(absl::Status)> cb = std::move(read_cb_);
564
+ read_cb_ = nullptr;
565
+ incoming_buffer_ = nullptr;
566
+ read_mu_.Unlock();
567
+ cb(status);
568
+ Unref();
569
+ }
570
+
571
+ void PosixEndpointImpl::Read(absl::AnyInvocable<void(absl::Status)> on_read,
572
+ SliceBuffer* buffer,
573
+ const EventEngine::Endpoint::ReadArgs* args) {
574
+ read_mu_.Lock();
575
+ GPR_ASSERT(read_cb_ == nullptr);
576
+ read_cb_ = std::move(on_read);
577
+ incoming_buffer_ = buffer;
578
+ incoming_buffer_->Clear();
579
+ incoming_buffer_->Swap(last_read_buffer_);
580
+ read_mu_.Unlock();
581
+ if (args != nullptr && grpc_core::IsTcpFrameSizeTuningEnabled()) {
582
+ min_progress_size_ = args->read_hint_bytes;
583
+ } else {
584
+ min_progress_size_ = 1;
585
+ }
586
+ Ref().release();
587
+ if (is_first_read_) {
588
+ // Endpoint read called for the very first time. Register read callback
589
+ // with the polling engine.
590
+ is_first_read_ = false;
591
+ handle_->NotifyOnRead(on_read_);
592
+ } else if (inq_ == 0) {
593
+ // Upper layer asked to read more but we know there is no pending data to
594
+ // read from previous reads. So, wait for POLLIN.
595
+ handle_->NotifyOnRead(on_read_);
596
+ } else {
597
+ on_read_->SetStatus(absl::OkStatus());
598
+ engine_->Run(on_read_);
599
+ }
600
+ }
601
+
602
+ #ifdef GRPC_LINUX_ERRQUEUE
603
+ TcpZerocopySendRecord* PosixEndpointImpl::TcpGetSendZerocopyRecord(
604
+ SliceBuffer& buf) {
605
+ TcpZerocopySendRecord* zerocopy_send_record = nullptr;
606
+ const bool use_zerocopy =
607
+ tcp_zerocopy_send_ctx_->Enabled() &&
608
+ tcp_zerocopy_send_ctx_->ThresholdBytes() < buf.Length();
609
+ if (use_zerocopy) {
610
+ zerocopy_send_record = tcp_zerocopy_send_ctx_->GetSendRecord();
611
+ if (zerocopy_send_record == nullptr) {
612
+ ProcessErrors();
613
+ zerocopy_send_record = tcp_zerocopy_send_ctx_->GetSendRecord();
614
+ }
615
+ if (zerocopy_send_record != nullptr) {
616
+ zerocopy_send_record->PrepareForSends(buf);
617
+ GPR_DEBUG_ASSERT(buf.Count() == 0);
618
+ GPR_DEBUG_ASSERT(buf.Length() == 0);
619
+ outgoing_byte_idx_ = 0;
620
+ outgoing_buffer_ = nullptr;
621
+ }
622
+ }
623
+ return zerocopy_send_record;
624
+ }
625
+
626
+ // For linux platforms, reads the socket's error queue and processes error
627
+ // messages from the queue.
628
+ bool PosixEndpointImpl::ProcessErrors() {
629
+ bool processed_err = false;
630
+ struct iovec iov;
631
+ iov.iov_base = nullptr;
632
+ iov.iov_len = 0;
633
+ struct msghdr msg;
634
+ msg.msg_name = nullptr;
635
+ msg.msg_namelen = 0;
636
+ msg.msg_iov = &iov;
637
+ msg.msg_iovlen = 0;
638
+ msg.msg_flags = 0;
639
+ // Allocate enough space so we don't need to keep increasing this as size of
640
+ // OPT_STATS increase.
641
+ constexpr size_t cmsg_alloc_space =
642
+ CMSG_SPACE(sizeof(scm_timestamping)) +
643
+ CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in)) +
644
+ CMSG_SPACE(32 * NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)));
645
+ // Allocate aligned space for cmsgs received along with timestamps.
646
+ union {
647
+ char rbuf[cmsg_alloc_space];
648
+ struct cmsghdr align;
649
+ } aligned_buf;
650
+ msg.msg_control = aligned_buf.rbuf;
651
+ int r, saved_errno;
652
+ while (true) {
653
+ msg.msg_controllen = sizeof(aligned_buf.rbuf);
654
+ do {
655
+ r = recvmsg(fd_, &msg, MSG_ERRQUEUE);
656
+ saved_errno = errno;
657
+ } while (r < 0 && saved_errno == EINTR);
658
+
659
+ if (r < 0 && saved_errno == EAGAIN) {
660
+ return processed_err; // No more errors to process
661
+ } else if (r < 0) {
662
+ return processed_err;
663
+ }
664
+ if (GPR_UNLIKELY((msg.msg_flags & MSG_CTRUNC) != 0)) {
665
+ gpr_log(GPR_ERROR, "Error message was truncated.");
666
+ }
667
+
668
+ if (msg.msg_controllen == 0) {
669
+ // There was no control message found. It was probably spurious.
670
+ return processed_err;
671
+ }
672
+ bool seen = false;
673
+ for (auto cmsg = CMSG_FIRSTHDR(&msg); cmsg && cmsg->cmsg_len;
674
+ cmsg = CMSG_NXTHDR(&msg, cmsg)) {
675
+ if (CmsgIsZeroCopy(*cmsg)) {
676
+ ProcessZerocopy(cmsg);
677
+ seen = true;
678
+ processed_err = true;
679
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
680
+ cmsg->cmsg_type == SCM_TIMESTAMPING) {
681
+ cmsg = ProcessTimestamp(&msg, cmsg);
682
+ seen = true;
683
+ processed_err = true;
684
+ } else {
685
+ // Got a control message that is not a timestamp or zerocopy. Don't know
686
+ // how to handle this.
687
+ return processed_err;
688
+ }
689
+ }
690
+ if (!seen) {
691
+ return processed_err;
692
+ }
693
+ }
694
+ }
695
+
696
+ void PosixEndpointImpl::ZerocopyDisableAndWaitForRemaining() {
697
+ tcp_zerocopy_send_ctx_->Shutdown();
698
+ while (!tcp_zerocopy_send_ctx_->AllSendRecordsEmpty()) {
699
+ ProcessErrors();
700
+ }
701
+ }
702
+
703
+ // Reads \a cmsg to process zerocopy control messages.
704
+ void PosixEndpointImpl::ProcessZerocopy(struct cmsghdr* cmsg) {
705
+ GPR_DEBUG_ASSERT(cmsg);
706
+ auto serr = reinterpret_cast<struct sock_extended_err*>(CMSG_DATA(cmsg));
707
+ GPR_DEBUG_ASSERT(serr->ee_errno == 0);
708
+ GPR_DEBUG_ASSERT(serr->ee_origin == SO_EE_ORIGIN_ZEROCOPY);
709
+ const uint32_t lo = serr->ee_info;
710
+ const uint32_t hi = serr->ee_data;
711
+ for (uint32_t seq = lo; seq <= hi; ++seq) {
712
+ // TODO(arjunroy): It's likely that lo and hi refer to zerocopy sequence
713
+ // numbers that are generated by a single call to grpc_endpoint_write; ie.
714
+ // we can batch the unref operation. So, check if record is the same for
715
+ // both; if so, batch the unref/put.
716
+ TcpZerocopySendRecord* record =
717
+ tcp_zerocopy_send_ctx_->ReleaseSendRecord(seq);
718
+ GPR_DEBUG_ASSERT(record);
719
+ UnrefMaybePutZerocopySendRecord(record);
720
+ }
721
+ if (tcp_zerocopy_send_ctx_->UpdateZeroCopyOptMemStateAfterFree()) {
722
+ handle_->SetWritable();
723
+ }
724
+ }
725
+
726
+ // Reads \a cmsg to derive timestamps from the control messages. If a valid
727
+ // timestamp is found, the traced buffer list is updated with this timestamp.
728
+ // The caller of this function should be looping on the control messages found
729
+ // in \a msg. \a cmsg should point to the control message that the caller wants
730
+ // processed. On return, a pointer to a control message is returned. On the next
731
+ // iteration, CMSG_NXTHDR(msg, ret_val) should be passed as \a cmsg.
732
+ struct cmsghdr* PosixEndpointImpl::ProcessTimestamp(msghdr* msg,
733
+ struct cmsghdr* cmsg) {
734
+ auto next_cmsg = CMSG_NXTHDR(msg, cmsg);
735
+ cmsghdr* opt_stats = nullptr;
736
+ if (next_cmsg == nullptr) {
737
+ return cmsg;
738
+ }
739
+
740
+ // Check if next_cmsg is an OPT_STATS msg.
741
+ if (next_cmsg->cmsg_level == SOL_SOCKET &&
742
+ next_cmsg->cmsg_type == SCM_TIMESTAMPING_OPT_STATS) {
743
+ opt_stats = next_cmsg;
744
+ next_cmsg = CMSG_NXTHDR(msg, opt_stats);
745
+ if (next_cmsg == nullptr) {
746
+ return opt_stats;
747
+ }
748
+ }
749
+
750
+ if (!(next_cmsg->cmsg_level == SOL_IP || next_cmsg->cmsg_level == SOL_IPV6) ||
751
+ !(next_cmsg->cmsg_type == IP_RECVERR ||
752
+ next_cmsg->cmsg_type == IPV6_RECVERR)) {
753
+ return cmsg;
754
+ }
755
+
756
+ auto tss = reinterpret_cast<scm_timestamping*>(CMSG_DATA(cmsg));
757
+ auto serr = reinterpret_cast<struct sock_extended_err*>(CMSG_DATA(next_cmsg));
758
+ if (serr->ee_errno != ENOMSG ||
759
+ serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
760
+ gpr_log(GPR_ERROR, "Unexpected control message");
761
+ return cmsg;
762
+ }
763
+ traced_buffers_.ProcessTimestamp(serr, opt_stats, tss);
764
+ return next_cmsg;
765
+ }
766
+
767
+ void PosixEndpointImpl::HandleError(absl::Status status) {
768
+ if (!status.ok() ||
769
+ stop_error_notification_.load(std::memory_order_relaxed)) {
770
+ // We aren't going to register to hear on error anymore, so it is safe to
771
+ // unref.
772
+ Unref();
773
+ return;
774
+ }
775
+ // We are still interested in collecting timestamps, so let's try reading
776
+ // them.
777
+ if (!ProcessErrors()) {
778
+ // This might not a timestamps error. Set the read and write closures to be
779
+ // ready.
780
+ handle_->SetReadable();
781
+ handle_->SetWritable();
782
+ }
783
+ handle_->NotifyOnError(on_error_);
784
+ }
785
+
786
+ bool PosixEndpointImpl::WriteWithTimestamps(struct msghdr* msg,
787
+ size_t sending_length,
788
+ ssize_t* sent_length,
789
+ int* saved_errno,
790
+ int additional_flags) {
791
+ if (!socket_ts_enabled_) {
792
+ uint32_t opt = kTimestampingSocketOptions;
793
+ if (setsockopt(fd_, SOL_SOCKET, SO_TIMESTAMPING, static_cast<void*>(&opt),
794
+ sizeof(opt)) != 0) {
795
+ return false;
796
+ }
797
+ bytes_counter_ = -1;
798
+ socket_ts_enabled_ = true;
799
+ }
800
+ // Set control message to indicate that you want timestamps.
801
+ union {
802
+ char cmsg_buf[CMSG_SPACE(sizeof(uint32_t))];
803
+ struct cmsghdr align;
804
+ } u;
805
+ cmsghdr* cmsg = reinterpret_cast<cmsghdr*>(u.cmsg_buf);
806
+ cmsg->cmsg_level = SOL_SOCKET;
807
+ cmsg->cmsg_type = SO_TIMESTAMPING;
808
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
809
+ *reinterpret_cast<int*>(CMSG_DATA(cmsg)) = kTimestampingRecordingOptions;
810
+ msg->msg_control = u.cmsg_buf;
811
+ msg->msg_controllen = CMSG_SPACE(sizeof(uint32_t));
812
+
813
+ // If there was an error on sendmsg the logic in tcp_flush will handle it.
814
+ ssize_t length = TcpSend(fd_, msg, saved_errno, additional_flags);
815
+ *sent_length = length;
816
+ // Only save timestamps if all the bytes were taken by sendmsg.
817
+ if (sending_length == static_cast<size_t>(length)) {
818
+ traced_buffers_.AddNewEntry(static_cast<uint32_t>(bytes_counter_ + length),
819
+ fd_, outgoing_buffer_arg_);
820
+ outgoing_buffer_arg_ = nullptr;
821
+ }
822
+ return true;
823
+ }
824
+
825
+ #else // GRPC_LINUX_ERRQUEUE
826
+ TcpZerocopySendRecord* PosixEndpointImpl::TcpGetSendZerocopyRecord(
827
+ SliceBuffer& /*buf*/) {
828
+ return nullptr;
829
+ }
830
+
831
+ void PosixEndpointImpl::HandleError(absl::Status /*status*/) {
832
+ GPR_ASSERT(false && "Error handling not supported on this platform");
833
+ }
834
+
835
+ void PosixEndpointImpl::ZerocopyDisableAndWaitForRemaining() {}
836
+
837
+ bool PosixEndpointImpl::WriteWithTimestamps(struct msghdr* /*msg*/,
838
+ size_t /*sending_length*/,
839
+ ssize_t* /*sent_length*/,
840
+ int* /*saved_errno*/,
841
+ int /*additional_flags*/) {
842
+ GPR_ASSERT(false && "Write with timestamps not supported for this platform");
843
+ }
844
+ #endif // GRPC_LINUX_ERRQUEUE
845
+
846
+ void PosixEndpointImpl::UnrefMaybePutZerocopySendRecord(
847
+ TcpZerocopySendRecord* record) {
848
+ if (record->Unref()) {
849
+ tcp_zerocopy_send_ctx_->PutSendRecord(record);
850
+ }
851
+ }
852
+
853
+ // If outgoing_buffer_arg is filled, shuts down the list early, so that any
854
+ // release operations needed can be performed on the arg.
855
+ void PosixEndpointImpl::TcpShutdownTracedBufferList() {
856
+ if (outgoing_buffer_arg_ != nullptr) {
857
+ traced_buffers_.Shutdown(outgoing_buffer_arg_,
858
+ absl::InternalError("TracedBuffer list shutdown"));
859
+ outgoing_buffer_arg_ = nullptr;
860
+ }
861
+ }
862
+
863
+ // returns true if done, false if pending; if returning true, *error is set
864
+ bool PosixEndpointImpl::DoFlushZerocopy(TcpZerocopySendRecord* record,
865
+ absl::Status& status) {
866
+ msg_iovlen_type iov_size;
867
+ ssize_t sent_length = 0;
868
+ size_t sending_length;
869
+ size_t unwind_slice_idx;
870
+ size_t unwind_byte_idx;
871
+ bool tried_sending_message;
872
+ int saved_errno;
873
+ msghdr msg;
874
+ bool constrained;
875
+ status = absl::OkStatus();
876
+ // iov consumes a large space. Keep it as the last item on the stack to
877
+ // improve locality. After all, we expect only the first elements of it
878
+ // being populated in most cases.
879
+ iovec iov[MAX_WRITE_IOVEC];
880
+ while (true) {
881
+ sending_length = 0;
882
+ iov_size = record->PopulateIovs(&unwind_slice_idx, &unwind_byte_idx,
883
+ &sending_length, iov);
884
+ msg.msg_name = nullptr;
885
+ msg.msg_namelen = 0;
886
+ msg.msg_iov = iov;
887
+ msg.msg_iovlen = iov_size;
888
+ msg.msg_flags = 0;
889
+ tried_sending_message = false;
890
+ constrained = false;
891
+ // Before calling sendmsg (with or without timestamps): we
892
+ // take a single ref on the zerocopy send record.
893
+ tcp_zerocopy_send_ctx_->NoteSend(record);
894
+ saved_errno = 0;
895
+ if (outgoing_buffer_arg_ != nullptr) {
896
+ if (!ts_capable_ ||
897
+ !WriteWithTimestamps(&msg, sending_length, &sent_length, &saved_errno,
898
+ MSG_ZEROCOPY)) {
899
+ // We could not set socket options to collect Fathom timestamps.
900
+ // Fallback on writing without timestamps.
901
+ ts_capable_ = false;
902
+ TcpShutdownTracedBufferList();
903
+ } else {
904
+ tried_sending_message = true;
905
+ }
906
+ }
907
+ if (!tried_sending_message) {
908
+ msg.msg_control = nullptr;
909
+ msg.msg_controllen = 0;
910
+ sent_length = TcpSend(fd_, &msg, &saved_errno, MSG_ZEROCOPY);
911
+ }
912
+ if (tcp_zerocopy_send_ctx_->UpdateZeroCopyOptMemStateAfterSend(
913
+ saved_errno == ENOBUFS, constrained) ||
914
+ constrained) {
915
+ // If constrained, is true it implies that we received an ENOBUFS error
916
+ // but there are no un-acked z-copy records. This situation may arise
917
+ // because the per-process RLIMIT_MEMLOCK limit or the per-socket hard
918
+ // memlock ulimit on the machine may be very small. These limits control
919
+ // the max number of bytes a process/socket can respectively pin to RAM.
920
+ // Tx0cp respects these limits and if a sendmsg tries to send more than
921
+ // this limit, the kernel may return ENOBUFS error. Print a warning
922
+ // message here to allow help with debugging. Grpc should not attempt to
923
+ // raise the limit values.
924
+ if (!constrained) {
925
+ handle_->SetWritable();
926
+ } else {
927
+ #ifdef GRPC_LINUX_ERRQUEUE
928
+ GRPC_LOG_EVERY_N_SEC(
929
+ 1,
930
+ "Tx0cp encountered an ENOBUFS error possibly because one or "
931
+ "both of RLIMIT_MEMLOCK or hard memlock ulimit values are too "
932
+ "small for the intended user. Current system value of "
933
+ "RLIMIT_MEMLOCK is %" PRIu64 " and hard memlock ulimit is %" PRIu64
934
+ ".Consider increasing these values appropriately for the intended "
935
+ "user.",
936
+ GetRLimitMemLockMax(), GetUlimitHardMemLock());
937
+ #endif
938
+ }
939
+ }
940
+ if (sent_length < 0) {
941
+ // If this particular send failed, drop ref taken earlier in this method.
942
+ tcp_zerocopy_send_ctx_->UndoSend();
943
+ if (saved_errno == EAGAIN || saved_errno == ENOBUFS) {
944
+ record->UnwindIfThrottled(unwind_slice_idx, unwind_byte_idx);
945
+ return false;
946
+ } else {
947
+ status = absl::InternalError(
948
+ absl::StrCat("sendmsg", std::strerror(saved_errno)));
949
+ TcpShutdownTracedBufferList();
950
+ return true;
951
+ }
952
+ }
953
+ bytes_counter_ += sent_length;
954
+ record->UpdateOffsetForBytesSent(sending_length,
955
+ static_cast<size_t>(sent_length));
956
+ if (record->AllSlicesSent()) {
957
+ return true;
958
+ }
959
+ }
960
+ }
961
+
962
+ bool PosixEndpointImpl::TcpFlushZerocopy(TcpZerocopySendRecord* record,
963
+ absl::Status& status) {
964
+ bool done = DoFlushZerocopy(record, status);
965
+ if (done) {
966
+ // Either we encountered an error, or we successfully sent all the bytes.
967
+ // In either case, we're done with this record.
968
+ UnrefMaybePutZerocopySendRecord(record);
969
+ }
970
+ return done;
971
+ }
972
+
973
+ bool PosixEndpointImpl::TcpFlush(absl::Status& status) {
974
+ struct msghdr msg;
975
+ struct iovec iov[MAX_WRITE_IOVEC];
976
+ msg_iovlen_type iov_size;
977
+ ssize_t sent_length = 0;
978
+ size_t sending_length;
979
+ size_t trailing;
980
+ size_t unwind_slice_idx;
981
+ size_t unwind_byte_idx;
982
+ int saved_errno;
983
+ status = absl::OkStatus();
984
+
985
+ // We always start at zero, because we eagerly unref and trim the slice
986
+ // buffer as we write
987
+ size_t outgoing_slice_idx = 0;
988
+
989
+ while (true) {
990
+ sending_length = 0;
991
+ unwind_slice_idx = outgoing_slice_idx;
992
+ unwind_byte_idx = outgoing_byte_idx_;
993
+ for (iov_size = 0; outgoing_slice_idx != outgoing_buffer_->Count() &&
994
+ iov_size != MAX_WRITE_IOVEC;
995
+ iov_size++) {
996
+ auto slice = outgoing_buffer_->RefSlice(outgoing_slice_idx);
997
+ iov[iov_size].iov_base =
998
+ const_cast<uint8_t*>(slice.begin()) + outgoing_byte_idx_;
999
+ iov[iov_size].iov_len = slice.length() - outgoing_byte_idx_;
1000
+ sending_length += iov[iov_size].iov_len;
1001
+ outgoing_slice_idx++;
1002
+ outgoing_byte_idx_ = 0;
1003
+ }
1004
+ GPR_ASSERT(iov_size > 0);
1005
+
1006
+ msg.msg_name = nullptr;
1007
+ msg.msg_namelen = 0;
1008
+ msg.msg_iov = iov;
1009
+ msg.msg_iovlen = iov_size;
1010
+ msg.msg_flags = 0;
1011
+ bool tried_sending_message = false;
1012
+ saved_errno = 0;
1013
+ if (outgoing_buffer_arg_ != nullptr) {
1014
+ if (!ts_capable_ || !WriteWithTimestamps(&msg, sending_length,
1015
+ &sent_length, &saved_errno, 0)) {
1016
+ // We could not set socket options to collect Fathom timestamps.
1017
+ // Fallback on writing without timestamps.
1018
+ ts_capable_ = false;
1019
+ TcpShutdownTracedBufferList();
1020
+ } else {
1021
+ tried_sending_message = true;
1022
+ }
1023
+ }
1024
+ if (!tried_sending_message) {
1025
+ msg.msg_control = nullptr;
1026
+ msg.msg_controllen = 0;
1027
+ sent_length = TcpSend(fd_, &msg, &saved_errno);
1028
+ }
1029
+
1030
+ if (sent_length < 0) {
1031
+ if (saved_errno == EAGAIN || saved_errno == ENOBUFS) {
1032
+ outgoing_byte_idx_ = unwind_byte_idx;
1033
+ // unref all and forget about all slices that have been written to this
1034
+ // point
1035
+ for (size_t idx = 0; idx < unwind_slice_idx; ++idx) {
1036
+ outgoing_buffer_->TakeFirst();
1037
+ }
1038
+ return false;
1039
+ } else {
1040
+ status = absl::InternalError(
1041
+ absl::StrCat("sendmsg", std::strerror(saved_errno)));
1042
+ outgoing_buffer_->Clear();
1043
+ TcpShutdownTracedBufferList();
1044
+ return true;
1045
+ }
1046
+ }
1047
+
1048
+ GPR_ASSERT(outgoing_byte_idx_ == 0);
1049
+ bytes_counter_ += sent_length;
1050
+ trailing = sending_length - static_cast<size_t>(sent_length);
1051
+ while (trailing > 0) {
1052
+ size_t slice_length;
1053
+ outgoing_slice_idx--;
1054
+ slice_length = outgoing_buffer_->RefSlice(outgoing_slice_idx).length();
1055
+ if (slice_length > trailing) {
1056
+ outgoing_byte_idx_ = slice_length - trailing;
1057
+ break;
1058
+ } else {
1059
+ trailing -= slice_length;
1060
+ }
1061
+ }
1062
+ if (outgoing_slice_idx == outgoing_buffer_->Count()) {
1063
+ outgoing_buffer_->Clear();
1064
+ return true;
1065
+ }
1066
+ }
1067
+ }
1068
+
1069
+ void PosixEndpointImpl::HandleWrite(absl::Status status) {
1070
+ if (!status.ok()) {
1071
+ absl::AnyInvocable<void(absl::Status)> cb_ = std::move(write_cb_);
1072
+ write_cb_ = nullptr;
1073
+ if (current_zerocopy_send_ != nullptr) {
1074
+ UnrefMaybePutZerocopySendRecord(current_zerocopy_send_);
1075
+ current_zerocopy_send_ = nullptr;
1076
+ }
1077
+ cb_(status);
1078
+ Unref();
1079
+ return;
1080
+ }
1081
+ bool flush_result = current_zerocopy_send_ != nullptr
1082
+ ? TcpFlushZerocopy(current_zerocopy_send_, status)
1083
+ : TcpFlush(status);
1084
+ if (!flush_result) {
1085
+ GPR_DEBUG_ASSERT(status.ok());
1086
+ handle_->NotifyOnWrite(on_write_);
1087
+ } else {
1088
+ absl::AnyInvocable<void(absl::Status)> cb_ = std::move(write_cb_);
1089
+ write_cb_ = nullptr;
1090
+ current_zerocopy_send_ = nullptr;
1091
+ cb_(status);
1092
+ Unref();
1093
+ }
1094
+ }
1095
+
1096
+ void PosixEndpointImpl::Write(
1097
+ absl::AnyInvocable<void(absl::Status)> on_writable, SliceBuffer* data,
1098
+ const EventEngine::Endpoint::WriteArgs* args) {
1099
+ absl::Status status = absl::OkStatus();
1100
+ TcpZerocopySendRecord* zerocopy_send_record = nullptr;
1101
+
1102
+ GPR_ASSERT(write_cb_ == nullptr);
1103
+ GPR_DEBUG_ASSERT(current_zerocopy_send_ == nullptr);
1104
+ GPR_DEBUG_ASSERT(data != nullptr);
1105
+
1106
+ if (data->Length() == 0) {
1107
+ on_writable(handle_->IsHandleShutdown() ? absl::InternalError("EOF")
1108
+ : status);
1109
+ TcpShutdownTracedBufferList();
1110
+ return;
1111
+ }
1112
+
1113
+ zerocopy_send_record = TcpGetSendZerocopyRecord(*data);
1114
+ if (zerocopy_send_record == nullptr) {
1115
+ // Either not enough bytes, or couldn't allocate a zerocopy context.
1116
+ outgoing_buffer_ = data;
1117
+ outgoing_byte_idx_ = 0;
1118
+ }
1119
+ if (args != nullptr) {
1120
+ outgoing_buffer_arg_ = args->google_specific;
1121
+ }
1122
+ if (outgoing_buffer_arg_) {
1123
+ GPR_ASSERT(poller_->CanTrackErrors());
1124
+ }
1125
+
1126
+ bool flush_result = zerocopy_send_record != nullptr
1127
+ ? TcpFlushZerocopy(zerocopy_send_record, status)
1128
+ : TcpFlush(status);
1129
+ if (!flush_result) {
1130
+ Ref().release();
1131
+ write_cb_ = std::move(on_writable);
1132
+ current_zerocopy_send_ = zerocopy_send_record;
1133
+ handle_->NotifyOnWrite(on_write_);
1134
+ } else {
1135
+ on_writable(status);
1136
+ }
1137
+ }
1138
+
1139
+ void PosixEndpointImpl::MaybeShutdown(absl::Status why) {
1140
+ if (poller_->CanTrackErrors()) {
1141
+ ZerocopyDisableAndWaitForRemaining();
1142
+ stop_error_notification_.store(true, std::memory_order_release);
1143
+ handle_->SetHasError();
1144
+ }
1145
+ handle_->ShutdownHandle(why);
1146
+ Unref();
1147
+ }
1148
+
1149
+ PosixEndpointImpl ::~PosixEndpointImpl() {
1150
+ handle_->OrphanHandle(on_done_, nullptr, "");
1151
+ delete on_read_;
1152
+ delete on_write_;
1153
+ delete on_error_;
1154
+ }
1155
+
1156
+ PosixEndpointImpl::PosixEndpointImpl(EventHandle* handle,
1157
+ PosixEngineClosure* on_done,
1158
+ std::shared_ptr<EventEngine> engine,
1159
+ MemoryAllocator&& /*allocator*/,
1160
+ const PosixTcpOptions& options)
1161
+ : sock_(PosixSocketWrapper(handle->WrappedFd())),
1162
+ on_done_(on_done),
1163
+ traced_buffers_(),
1164
+ handle_(handle),
1165
+ poller_(handle->Poller()),
1166
+ engine_(engine) {
1167
+ PosixSocketWrapper sock(handle->WrappedFd());
1168
+ fd_ = handle_->WrappedFd();
1169
+ GPR_ASSERT(options.resource_quota != nullptr);
1170
+ memory_owner_ = options.resource_quota->memory_quota()->CreateMemoryOwner(
1171
+ *sock.PeerAddressString());
1172
+ self_reservation_ = memory_owner_.MakeReservation(sizeof(PosixEndpointImpl));
1173
+ local_address_ = *sock.LocalAddress();
1174
+ peer_address_ = *sock.PeerAddress();
1175
+ target_length_ = static_cast<double>(options.tcp_read_chunk_size);
1176
+ bytes_read_this_round_ = 0;
1177
+ min_read_chunk_size_ = options.tcp_min_read_chunk_size;
1178
+ max_read_chunk_size_ = options.tcp_max_read_chunk_size;
1179
+ bool zerocopy_enabled =
1180
+ options.tcp_tx_zero_copy_enabled && poller_->CanTrackErrors();
1181
+ #ifdef GRPC_LINUX_ERRQUEUE
1182
+ if (zerocopy_enabled) {
1183
+ if (GetRLimitMemLockMax() == 0) {
1184
+ zerocopy_enabled = false;
1185
+ gpr_log(
1186
+ GPR_ERROR,
1187
+ "Tx zero-copy will not be used by gRPC since RLIMIT_MEMLOCK value is "
1188
+ "not set. Consider raising its value with setrlimit().");
1189
+ } else if (GetUlimitHardMemLock() == 0) {
1190
+ zerocopy_enabled = false;
1191
+ gpr_log(GPR_ERROR,
1192
+ "Tx zero-copy will not be used by gRPC since hard memlock ulimit "
1193
+ "value is not set. Use ulimit -l <value> to set its value.");
1194
+ } else {
1195
+ const int enable = 1;
1196
+ if (setsockopt(fd_, SOL_SOCKET, SO_ZEROCOPY, &enable, sizeof(enable)) !=
1197
+ 0) {
1198
+ zerocopy_enabled = false;
1199
+ gpr_log(GPR_ERROR, "Failed to set zerocopy options on the socket.");
1200
+ }
1201
+ }
1202
+
1203
+ if (zerocopy_enabled) {
1204
+ gpr_log(GPR_INFO,
1205
+ "Tx-zero copy enabled for gRPC sends. RLIMIT_MEMLOCK value = "
1206
+ "%" PRIu64 ",ulimit hard memlock value = %" PRIu64,
1207
+ GetRLimitMemLockMax(), GetUlimitHardMemLock());
1208
+ }
1209
+ }
1210
+ #endif // GRPC_LINUX_ERRQUEUE
1211
+ tcp_zerocopy_send_ctx_ = std::make_unique<TcpZerocopySendCtx>(
1212
+ zerocopy_enabled, options.tcp_tx_zerocopy_max_simultaneous_sends,
1213
+ options.tcp_tx_zerocopy_send_bytes_threshold);
1214
+ #ifdef GRPC_HAVE_TCP_INQ
1215
+ int one = 1;
1216
+ if (setsockopt(fd_, SOL_TCP, TCP_INQ, &one, sizeof(one)) == 0) {
1217
+ inq_capable_ = true;
1218
+ } else {
1219
+ gpr_log(GPR_DEBUG, "cannot set inq fd=%d errno=%d", fd_, errno);
1220
+ inq_capable_ = false;
1221
+ }
1222
+ #else
1223
+ inq_capable_ = false;
1224
+ #endif // GRPC_HAVE_TCP_INQ
1225
+
1226
+ on_read_ = PosixEngineClosure::ToPermanentClosure(
1227
+ [this](absl::Status status) { HandleRead(std::move(status)); });
1228
+ on_write_ = PosixEngineClosure::ToPermanentClosure(
1229
+ [this](absl::Status status) { HandleWrite(std::move(status)); });
1230
+ on_error_ = PosixEngineClosure::ToPermanentClosure(
1231
+ [this](absl::Status status) { HandleError(std::move(status)); });
1232
+
1233
+ // Start being notified on errors if poller can track errors.
1234
+ if (poller_->CanTrackErrors()) {
1235
+ Ref().release();
1236
+ handle_->NotifyOnError(on_error_);
1237
+ }
1238
+ }
1239
+
1240
+ std::unique_ptr<PosixEndpoint> CreatePosixEndpoint(
1241
+ EventHandle* handle, PosixEngineClosure* on_shutdown,
1242
+ std::shared_ptr<EventEngine> engine, MemoryAllocator&& allocator,
1243
+ const PosixTcpOptions& options) {
1244
+ GPR_DEBUG_ASSERT(handle != nullptr);
1245
+ return std::make_unique<PosixEndpoint>(handle, on_shutdown, std::move(engine),
1246
+ std::move(allocator), options);
1247
+ }
1248
+
1249
+ } // namespace posix_engine
1250
+ } // namespace grpc_event_engine
1251
+
1252
+ #else // GRPC_POSIX_SOCKET_TCP
1253
+
1254
+ namespace grpc_event_engine {
1255
+ namespace posix_engine {
1256
+
1257
+ using ::grpc_event_engine::experimental::EndpointConfig;
1258
+ using ::grpc_event_engine::experimental::EventEngine;
1259
+
1260
+ std::unique_ptr<PosixEndpoint> CreatePosixEndpoint(
1261
+ EventHandle* /*handle*/, PosixEngineClosure* /*on_shutdown*/,
1262
+ std::shared_ptr<EventEngine> /*engine*/,
1263
+ const PosixTcpOptions& /*options*/) {
1264
+ GPR_ASSERT(false && "Cannot create PosixEndpoint on this platform");
1265
+ }
1266
+
1267
+ } // namespace posix_engine
1268
+ } // namespace grpc_event_engine
1269
+
1270
+ #endif // GRPC_POSIX_SOCKET_TCP