grpc 1.30.0 → 1.31.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (383) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +560 -619
  3. data/include/grpc/grpc_security.h +8 -0
  4. data/include/grpc/grpc_security_constants.h +3 -0
  5. data/include/grpc/impl/codegen/grpc_types.h +7 -5
  6. data/include/grpc/impl/codegen/port_platform.h +0 -32
  7. data/src/core/ext/filters/client_channel/backend_metric.cc +12 -9
  8. data/src/core/ext/filters/client_channel/client_channel.cc +406 -261
  9. data/src/core/ext/filters/client_channel/config_selector.cc +62 -0
  10. data/src/core/ext/filters/client_channel/config_selector.h +93 -0
  11. data/src/core/ext/filters/client_channel/global_subchannel_pool.cc +24 -2
  12. data/src/core/ext/filters/client_channel/health/health_check_client.cc +2 -0
  13. data/src/core/ext/filters/client_channel/http_connect_handshaker.cc +6 -5
  14. data/src/core/ext/filters/client_channel/http_proxy.cc +6 -4
  15. data/src/core/ext/filters/client_channel/lb_policy.h +2 -0
  16. data/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +39 -23
  17. data/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +4 -6
  18. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +3 -4
  19. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_routing.cc +381 -72
  20. data/src/core/ext/filters/client_channel/lb_policy_registry.cc +4 -5
  21. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +5 -2
  22. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver.cc +6 -5
  23. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_libuv.cc +8 -6
  24. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_posix.cc +9 -7
  25. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_ev_driver_windows.cc +7 -5
  26. data/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +33 -48
  27. data/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +6 -2
  28. data/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +21 -18
  29. data/src/core/ext/filters/client_channel/resolver_registry.cc +13 -14
  30. data/src/core/ext/filters/client_channel/resolver_result_parsing.cc +6 -7
  31. data/src/core/ext/filters/client_channel/resolving_lb_policy.cc +33 -28
  32. data/src/core/ext/filters/client_channel/resolving_lb_policy.h +39 -20
  33. data/src/core/ext/filters/client_channel/service_config_channel_arg_filter.cc +142 -0
  34. data/src/core/ext/filters/client_channel/subchannel.cc +1 -1
  35. data/src/core/ext/filters/client_channel/xds/xds_api.cc +327 -123
  36. data/src/core/ext/filters/client_channel/xds/xds_api.h +72 -7
  37. data/src/core/ext/filters/client_channel/xds/xds_bootstrap.cc +12 -23
  38. data/src/core/ext/filters/client_channel/xds/xds_client.cc +112 -33
  39. data/src/core/ext/filters/client_channel/xds/xds_client_stats.h +10 -10
  40. data/src/core/ext/filters/http/client/http_client_filter.cc +5 -5
  41. data/src/core/ext/filters/http/http_filters_plugin.cc +2 -1
  42. data/src/core/ext/filters/http/message_compress/message_decompress_filter.cc +74 -33
  43. data/src/core/ext/filters/http/message_compress/message_decompress_filter.h +3 -1
  44. data/src/core/ext/filters/message_size/message_size_filter.cc +56 -80
  45. data/src/core/ext/filters/message_size/message_size_filter.h +6 -0
  46. data/src/core/ext/transport/chttp2/server/chttp2_server.cc +383 -347
  47. data/src/core/ext/transport/chttp2/server/chttp2_server.h +6 -2
  48. data/src/core/ext/transport/chttp2/server/insecure/server_chttp2.cc +1 -1
  49. data/src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.cc +7 -13
  50. data/src/core/ext/transport/chttp2/server/secure/server_secure_chttp2.cc +7 -8
  51. data/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +19 -4
  52. data/src/core/ext/transport/chttp2/transport/flow_control.cc +22 -27
  53. data/src/core/ext/transport/chttp2/transport/flow_control.h +14 -16
  54. data/src/core/ext/transport/chttp2/transport/frame_data.cc +9 -12
  55. data/src/core/ext/transport/chttp2/transport/frame_goaway.cc +4 -6
  56. data/src/core/ext/transport/chttp2/transport/frame_ping.cc +5 -6
  57. data/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc +12 -13
  58. data/src/core/ext/transport/chttp2/transport/frame_settings.cc +6 -7
  59. data/src/core/ext/transport/chttp2/transport/frame_window_update.cc +9 -12
  60. data/src/core/ext/transport/chttp2/transport/hpack_parser.cc +25 -29
  61. data/src/core/ext/transport/chttp2/transport/hpack_table.cc +13 -17
  62. data/src/core/ext/transport/chttp2/transport/internal.h +13 -0
  63. data/src/core/ext/transport/chttp2/transport/parsing.cc +33 -43
  64. data/src/core/ext/transport/chttp2/transport/writing.cc +9 -14
  65. data/src/core/ext/transport/inproc/inproc_transport.cc +35 -15
  66. data/src/core/ext/upb-generated/envoy/annotations/deprecation.upb.h +0 -1
  67. data/src/core/ext/upb-generated/envoy/annotations/resource.upb.h +3 -4
  68. data/src/core/ext/upb-generated/envoy/api/v2/auth/cert.upb.h +0 -1
  69. data/src/core/ext/upb-generated/envoy/api/v2/auth/common.upb.h +80 -69
  70. data/src/core/ext/upb-generated/envoy/api/v2/auth/secret.upb.h +24 -23
  71. data/src/core/ext/upb-generated/envoy/api/v2/auth/tls.upb.h +66 -56
  72. data/src/core/ext/upb-generated/envoy/api/v2/cds.upb.h +1 -2
  73. data/src/core/ext/upb-generated/envoy/api/v2/cluster.upb.c +2 -2
  74. data/src/core/ext/upb-generated/envoy/api/v2/cluster.upb.h +317 -311
  75. data/src/core/ext/upb-generated/envoy/api/v2/cluster/circuit_breaker.upb.h +42 -34
  76. data/src/core/ext/upb-generated/envoy/api/v2/cluster/filter.upb.h +7 -7
  77. data/src/core/ext/upb-generated/envoy/api/v2/cluster/outlier_detection.upb.h +79 -61
  78. data/src/core/ext/upb-generated/envoy/api/v2/core/address.upb.h +55 -49
  79. data/src/core/ext/upb-generated/envoy/api/v2/core/backoff.upb.h +9 -8
  80. data/src/core/ext/upb-generated/envoy/api/v2/core/base.upb.c +1 -1
  81. data/src/core/ext/upb-generated/envoy/api/v2/core/base.upb.h +163 -169
  82. data/src/core/ext/upb-generated/envoy/api/v2/core/config_source.upb.h +51 -45
  83. data/src/core/ext/upb-generated/envoy/api/v2/core/event_service_config.upb.h +4 -5
  84. data/src/core/ext/upb-generated/envoy/api/v2/core/grpc_service.upb.h +107 -100
  85. data/src/core/ext/upb-generated/envoy/api/v2/core/health_check.upb.h +137 -117
  86. data/src/core/ext/upb-generated/envoy/api/v2/core/http_uri.upb.h +9 -9
  87. data/src/core/ext/upb-generated/envoy/api/v2/core/protocol.upb.h +105 -87
  88. data/src/core/ext/upb-generated/envoy/api/v2/core/socket_option.upb.h +12 -13
  89. data/src/core/ext/upb-generated/envoy/api/v2/discovery.upb.c +1 -1
  90. data/src/core/ext/upb-generated/envoy/api/v2/discovery.upb.h +95 -101
  91. data/src/core/ext/upb-generated/envoy/api/v2/eds.upb.h +1 -2
  92. data/src/core/ext/upb-generated/envoy/api/v2/endpoint.upb.c +1 -1
  93. data/src/core/ext/upb-generated/envoy/api/v2/endpoint.upb.h +49 -65
  94. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/endpoint.upb.h +0 -1
  95. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/endpoint_components.upb.h +49 -42
  96. data/src/core/ext/upb-generated/envoy/api/v2/endpoint/load_report.upb.h +70 -62
  97. data/src/core/ext/upb-generated/envoy/api/v2/lds.upb.h +1 -2
  98. data/src/core/ext/upb-generated/envoy/api/v2/listener.upb.h +81 -65
  99. data/src/core/ext/upb-generated/envoy/api/v2/listener/listener.upb.h +0 -1
  100. data/src/core/ext/upb-generated/envoy/api/v2/listener/listener_components.upb.c +1 -1
  101. data/src/core/ext/upb-generated/envoy/api/v2/listener/listener_components.upb.h +91 -80
  102. data/src/core/ext/upb-generated/envoy/api/v2/listener/udp_listener_config.upb.h +9 -10
  103. data/src/core/ext/upb-generated/envoy/api/v2/rds.upb.h +1 -2
  104. data/src/core/ext/upb-generated/envoy/api/v2/route.upb.h +36 -31
  105. data/src/core/ext/upb-generated/envoy/api/v2/route/route.upb.h +0 -1
  106. data/src/core/ext/upb-generated/envoy/api/v2/route/route_components.upb.c +7 -7
  107. data/src/core/ext/upb-generated/envoy/api/v2/route/route_components.upb.h +648 -696
  108. data/src/core/ext/upb-generated/envoy/api/v2/scoped_route.upb.h +16 -15
  109. data/src/core/ext/upb-generated/envoy/api/v2/srds.upb.h +1 -2
  110. data/src/core/ext/upb-generated/envoy/config/filter/accesslog/v2/accesslog.upb.c +1 -1
  111. data/src/core/ext/upb-generated/envoy/config/filter/accesslog/v2/accesslog.upb.h +95 -88
  112. data/src/core/ext/upb-generated/envoy/config/filter/network/http_connection_manager/v2/http_connection_manager.upb.h +234 -199
  113. data/src/core/ext/upb-generated/envoy/config/listener/v2/api_listener.upb.h +5 -5
  114. data/src/core/ext/upb-generated/envoy/config/trace/v2/http_tracer.upb.h +13 -13
  115. data/src/core/ext/upb-generated/envoy/service/discovery/v2/ads.upb.h +1 -2
  116. data/src/core/ext/upb-generated/envoy/service/load_stats/v2/lrs.upb.h +20 -18
  117. data/src/core/ext/upb-generated/envoy/type/http.upb.h +0 -1
  118. data/src/core/ext/upb-generated/envoy/type/matcher/regex.upb.h +18 -17
  119. data/src/core/ext/upb-generated/envoy/type/matcher/string.upb.h +14 -14
  120. data/src/core/ext/upb-generated/envoy/type/metadata/v2/metadata.upb.h +23 -23
  121. data/src/core/ext/upb-generated/envoy/type/percent.upb.h +8 -9
  122. data/src/core/ext/upb-generated/envoy/type/range.upb.h +15 -16
  123. data/src/core/ext/upb-generated/envoy/type/semantic_version.upb.h +7 -8
  124. data/src/core/ext/upb-generated/envoy/type/tracing/v2/custom_tag.upb.h +36 -35
  125. data/src/core/ext/upb-generated/gogoproto/gogo.upb.h +0 -1
  126. data/src/core/ext/upb-generated/google/api/annotations.upb.h +0 -1
  127. data/src/core/ext/upb-generated/google/api/http.upb.h +29 -28
  128. data/src/core/ext/upb-generated/google/protobuf/any.upb.h +5 -6
  129. data/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c +3 -3
  130. data/src/core/ext/upb-generated/google/protobuf/descriptor.upb.h +412 -386
  131. data/src/core/ext/upb-generated/google/protobuf/duration.upb.h +5 -6
  132. data/src/core/ext/upb-generated/google/protobuf/empty.upb.h +1 -2
  133. data/src/core/ext/upb-generated/google/protobuf/struct.upb.c +1 -1
  134. data/src/core/ext/upb-generated/google/protobuf/struct.upb.h +33 -54
  135. data/src/core/ext/upb-generated/google/protobuf/timestamp.upb.h +5 -6
  136. data/src/core/ext/upb-generated/google/protobuf/wrappers.upb.h +27 -28
  137. data/src/core/ext/upb-generated/google/rpc/status.upb.h +8 -8
  138. data/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.c +1 -1
  139. data/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.h +32 -45
  140. data/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.c +4 -4
  141. data/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.h +157 -178
  142. data/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.h +14 -13
  143. data/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.h +6 -7
  144. data/src/core/ext/upb-generated/src/proto/grpc/lb/v1/load_balancer.upb.h +59 -56
  145. data/src/core/ext/upb-generated/udpa/annotations/migrate.upb.h +11 -12
  146. data/src/core/ext/upb-generated/udpa/annotations/sensitive.upb.h +0 -1
  147. data/src/core/ext/upb-generated/udpa/annotations/status.upb.h +5 -6
  148. data/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.c +6 -6
  149. data/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.h +41 -68
  150. data/src/core/ext/upb-generated/validate/validate.upb.h +536 -535
  151. data/src/core/lib/channel/channel_trace.cc +2 -6
  152. data/src/core/lib/channel/channelz.cc +5 -15
  153. data/src/core/lib/gpr/log_linux.cc +6 -8
  154. data/src/core/lib/gpr/log_posix.cc +6 -8
  155. data/src/core/lib/gpr/string.cc +10 -9
  156. data/src/core/lib/gpr/string.h +4 -2
  157. data/src/core/lib/gprpp/global_config_env.cc +8 -6
  158. data/src/core/lib/http/httpcli.cc +13 -10
  159. data/src/core/lib/http/httpcli_security_connector.cc +5 -5
  160. data/src/core/lib/iomgr/cfstream_handle.cc +1 -0
  161. data/src/core/lib/iomgr/endpoint_pair_posix.cc +10 -10
  162. data/src/core/lib/iomgr/error_cfstream.cc +9 -8
  163. data/src/core/lib/iomgr/ev_epoll1_linux.cc +5 -6
  164. data/src/core/lib/iomgr/ev_epollex_linux.cc +15 -21
  165. data/src/core/lib/iomgr/ev_poll_posix.cc +6 -5
  166. data/src/core/lib/iomgr/ev_posix.cc +2 -0
  167. data/src/core/lib/iomgr/iomgr.cc +10 -0
  168. data/src/core/lib/iomgr/iomgr.h +10 -0
  169. data/src/core/lib/iomgr/is_epollexclusive_available.cc +14 -0
  170. data/src/core/lib/iomgr/port.h +1 -21
  171. data/src/core/lib/iomgr/resolve_address_custom.cc +13 -18
  172. data/src/core/lib/iomgr/resolve_address_windows.cc +8 -8
  173. data/src/core/lib/iomgr/resource_quota.cc +34 -31
  174. data/src/core/lib/iomgr/sockaddr_utils.cc +7 -5
  175. data/src/core/lib/iomgr/sockaddr_utils.h +1 -1
  176. data/src/core/lib/iomgr/socket_utils_common_posix.cc +95 -55
  177. data/src/core/lib/iomgr/socket_windows.cc +4 -5
  178. data/src/core/lib/iomgr/tcp_client_cfstream.cc +9 -11
  179. data/src/core/lib/iomgr/tcp_client_custom.cc +6 -9
  180. data/src/core/lib/iomgr/tcp_client_posix.cc +27 -36
  181. data/src/core/lib/iomgr/tcp_client_windows.cc +9 -9
  182. data/src/core/lib/iomgr/tcp_custom.cc +1 -1
  183. data/src/core/lib/iomgr/tcp_custom.h +1 -1
  184. data/src/core/lib/iomgr/tcp_server.cc +3 -4
  185. data/src/core/lib/iomgr/tcp_server.h +7 -5
  186. data/src/core/lib/iomgr/tcp_server_custom.cc +6 -14
  187. data/src/core/lib/iomgr/tcp_server_posix.cc +34 -41
  188. data/src/core/lib/iomgr/tcp_server_utils_posix.h +3 -4
  189. data/src/core/lib/iomgr/tcp_server_utils_posix_common.cc +5 -7
  190. data/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc +4 -9
  191. data/src/core/lib/iomgr/tcp_server_windows.cc +16 -16
  192. data/src/core/lib/iomgr/timer_generic.cc +13 -12
  193. data/src/core/lib/iomgr/udp_server.cc +24 -23
  194. data/src/core/lib/iomgr/udp_server.h +5 -2
  195. data/src/core/lib/iomgr/unix_sockets_posix.cc +9 -14
  196. data/src/core/lib/iomgr/unix_sockets_posix.h +3 -1
  197. data/src/core/lib/iomgr/unix_sockets_posix_noop.cc +5 -2
  198. data/src/core/lib/json/json_reader.cc +20 -21
  199. data/src/core/lib/security/credentials/credentials.h +5 -3
  200. data/src/core/lib/security/credentials/google_default/credentials_generic.cc +8 -6
  201. data/src/core/lib/security/credentials/google_default/google_default_credentials.cc +12 -9
  202. data/src/core/lib/security/credentials/jwt/jwt_credentials.cc +7 -4
  203. data/src/core/lib/security/credentials/oauth2/oauth2_credentials.cc +19 -28
  204. data/src/core/lib/security/credentials/plugin/plugin_credentials.cc +6 -6
  205. data/src/core/lib/security/credentials/ssl/ssl_credentials.cc +20 -0
  206. data/src/core/lib/security/credentials/ssl/ssl_credentials.h +10 -0
  207. data/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.h +10 -0
  208. data/src/core/lib/security/security_connector/fake/fake_security_connector.cc +10 -10
  209. data/src/core/lib/security/security_connector/security_connector.cc +2 -0
  210. data/src/core/lib/security/security_connector/security_connector.h +1 -1
  211. data/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc +18 -11
  212. data/src/core/lib/security/security_connector/ssl/ssl_security_connector.h +5 -0
  213. data/src/core/lib/security/security_connector/ssl_utils.cc +44 -23
  214. data/src/core/lib/security/security_connector/ssl_utils.h +6 -2
  215. data/src/core/lib/security/security_connector/tls/tls_security_connector.cc +27 -24
  216. data/src/core/lib/security/transport/auth_filters.h +0 -5
  217. data/src/core/lib/security/transport/client_auth_filter.cc +10 -9
  218. data/src/core/lib/security/util/json_util.cc +12 -13
  219. data/src/core/lib/slice/slice.cc +38 -1
  220. data/src/core/lib/slice/slice_internal.h +1 -0
  221. data/src/core/lib/surface/call.cc +40 -41
  222. data/src/core/lib/surface/completion_queue.cc +271 -14
  223. data/src/core/lib/surface/completion_queue.h +8 -0
  224. data/src/core/lib/surface/init.cc +2 -0
  225. data/src/core/lib/surface/server.cc +565 -632
  226. data/src/core/lib/surface/server.h +34 -12
  227. data/src/core/lib/surface/version.cc +2 -2
  228. data/src/core/lib/transport/transport.h +6 -0
  229. data/src/core/lib/uri/uri_parser.cc +8 -15
  230. data/src/core/plugin_registry/grpc_plugin_registry.cc +4 -0
  231. data/src/core/tsi/alts/handshaker/alts_handshaker_client.cc +23 -13
  232. data/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc +2 -0
  233. data/src/core/tsi/alts/handshaker/transport_security_common_api.cc +2 -0
  234. data/src/core/tsi/ssl_transport_security.cc +108 -11
  235. data/src/core/tsi/ssl_transport_security.h +14 -2
  236. data/src/core/tsi/transport_security_interface.h +5 -0
  237. data/src/ruby/bin/math_services_pb.rb +4 -4
  238. data/src/ruby/ext/grpc/extconf.rb +5 -2
  239. data/src/ruby/ext/grpc/rb_call.c +3 -2
  240. data/src/ruby/ext/grpc/rb_call.h +4 -0
  241. data/src/ruby/ext/grpc/rb_call_credentials.c +57 -12
  242. data/src/ruby/ext/grpc/rb_grpc_imports.generated.c +4 -0
  243. data/src/ruby/ext/grpc/rb_grpc_imports.generated.h +6 -0
  244. data/src/ruby/lib/grpc/generic/client_stub.rb +1 -1
  245. data/src/ruby/lib/grpc/generic/interceptors.rb +1 -1
  246. data/src/ruby/lib/grpc/version.rb +1 -1
  247. data/src/ruby/pb/grpc/health/v1/health_services_pb.rb +2 -2
  248. data/src/ruby/pb/src/proto/grpc/testing/messages_pb.rb +5 -0
  249. data/src/ruby/pb/src/proto/grpc/testing/test_services_pb.rb +28 -12
  250. data/src/ruby/spec/pb/codegen/grpc/testing/package_options_import2.proto +23 -0
  251. data/src/ruby/spec/pb/codegen/grpc/testing/package_options_ruby_style.proto +2 -0
  252. data/src/ruby/spec/pb/codegen/grpc/testing/same_package_service_name.proto +27 -0
  253. data/src/ruby/spec/pb/codegen/grpc/testing/same_ruby_package_service_name.proto +29 -0
  254. data/src/ruby/spec/pb/codegen/package_option_spec.rb +25 -1
  255. data/src/ruby/spec/support/services.rb +10 -4
  256. data/src/ruby/spec/user_agent_spec.rb +74 -0
  257. data/third_party/boringssl-with-bazel/err_data.c +89 -83
  258. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_bitstr.c +3 -3
  259. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_enum.c +3 -3
  260. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_int.c +1 -1
  261. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_object.c +3 -3
  262. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_time.c +2 -2
  263. data/third_party/boringssl-with-bazel/src/crypto/asn1/a_type.c +1 -1
  264. data/third_party/boringssl-with-bazel/src/crypto/asn1/asn1_lib.c +1 -1
  265. data/third_party/boringssl-with-bazel/src/crypto/asn1/asn_pack.c +1 -1
  266. data/third_party/boringssl-with-bazel/src/crypto/asn1/f_enum.c +1 -1
  267. data/third_party/boringssl-with-bazel/src/crypto/asn1/f_int.c +1 -1
  268. data/third_party/boringssl-with-bazel/src/crypto/asn1/f_string.c +1 -1
  269. data/third_party/boringssl-with-bazel/src/crypto/cipher_extra/tls_cbc.c +1 -0
  270. data/third_party/boringssl-with-bazel/src/crypto/ec_extra/hash_to_curve.c +12 -52
  271. data/third_party/boringssl-with-bazel/src/crypto/ec_extra/internal.h +0 -22
  272. data/third_party/boringssl-with-bazel/src/crypto/evp/evp_asn1.c +143 -0
  273. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/aes/mode_wrappers.c +17 -1
  274. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/internal.h +11 -1
  275. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/internal.h +2 -1
  276. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/p224-64.c +13 -11
  277. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/p256-x86_64.c +24 -23
  278. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/p256.c +20 -16
  279. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/simple_mul.c +2 -2
  280. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/ec/util.c +3 -3
  281. data/third_party/boringssl-with-bazel/src/crypto/fipsmodule/self_check/self_check.c +62 -0
  282. data/third_party/boringssl-with-bazel/src/crypto/mem.c +29 -15
  283. data/third_party/boringssl-with-bazel/src/crypto/pkcs8/internal.h +7 -0
  284. data/third_party/boringssl-with-bazel/src/crypto/pkcs8/pkcs8_x509.c +36 -5
  285. data/third_party/boringssl-with-bazel/src/crypto/trust_token/internal.h +0 -29
  286. data/third_party/boringssl-with-bazel/src/crypto/trust_token/pmbtoken.c +116 -363
  287. data/third_party/boringssl-with-bazel/src/crypto/trust_token/trust_token.c +7 -45
  288. data/third_party/boringssl-with-bazel/src/crypto/x509/a_strex.c +4 -4
  289. data/third_party/boringssl-with-bazel/src/crypto/x509/algorithm.c +8 -0
  290. data/third_party/boringssl-with-bazel/src/crypto/x509/asn1_gen.c +4 -4
  291. data/third_party/boringssl-with-bazel/src/crypto/x509/x509.c +0 -67
  292. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_cmp.c +13 -6
  293. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_req.c +10 -0
  294. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_set.c +41 -0
  295. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_trs.c +4 -1
  296. data/third_party/boringssl-with-bazel/src/crypto/x509/x509_vfy.c +28 -9
  297. data/third_party/boringssl-with-bazel/src/crypto/x509/x509cset.c +25 -0
  298. data/third_party/boringssl-with-bazel/src/crypto/x509/x_crl.c +35 -13
  299. data/third_party/boringssl-with-bazel/src/crypto/x509/x_pubkey.c +0 -154
  300. data/third_party/boringssl-with-bazel/src/crypto/x509/x_x509.c +28 -6
  301. data/third_party/boringssl-with-bazel/src/crypto/x509v3/internal.h +5 -0
  302. data/third_party/boringssl-with-bazel/src/crypto/x509v3/v3_purp.c +74 -35
  303. data/third_party/boringssl-with-bazel/src/include/openssl/aes.h +16 -4
  304. data/third_party/boringssl-with-bazel/src/include/openssl/asn1.h +22 -22
  305. data/third_party/boringssl-with-bazel/src/include/openssl/base.h +1 -1
  306. data/third_party/boringssl-with-bazel/src/include/openssl/evp.h +69 -0
  307. data/third_party/boringssl-with-bazel/src/include/openssl/ssl.h +33 -16
  308. data/third_party/boringssl-with-bazel/src/include/openssl/trust_token.h +1 -10
  309. data/third_party/boringssl-with-bazel/src/include/openssl/x509.h +789 -715
  310. data/third_party/boringssl-with-bazel/src/ssl/handoff.cc +3 -3
  311. data/third_party/boringssl-with-bazel/src/ssl/handshake.cc +9 -2
  312. data/third_party/boringssl-with-bazel/src/ssl/handshake_client.cc +2 -2
  313. data/third_party/boringssl-with-bazel/src/ssl/handshake_server.cc +9 -0
  314. data/third_party/boringssl-with-bazel/src/ssl/internal.h +17 -14
  315. data/third_party/boringssl-with-bazel/src/ssl/ssl_asn1.cc +7 -7
  316. data/third_party/boringssl-with-bazel/src/ssl/ssl_lib.cc +28 -0
  317. data/third_party/boringssl-with-bazel/src/ssl/ssl_session.cc +4 -24
  318. data/third_party/boringssl-with-bazel/src/ssl/ssl_versions.cc +5 -5
  319. data/third_party/boringssl-with-bazel/src/ssl/t1_enc.cc +45 -24
  320. data/third_party/boringssl-with-bazel/src/ssl/tls13_client.cc +31 -21
  321. data/third_party/boringssl-with-bazel/src/ssl/tls13_server.cc +12 -9
  322. data/third_party/re2/re2/bitmap256.h +117 -0
  323. data/third_party/re2/re2/bitstate.cc +385 -0
  324. data/third_party/re2/re2/compile.cc +1279 -0
  325. data/third_party/re2/re2/dfa.cc +2130 -0
  326. data/third_party/re2/re2/filtered_re2.cc +121 -0
  327. data/third_party/re2/re2/filtered_re2.h +109 -0
  328. data/third_party/re2/re2/mimics_pcre.cc +197 -0
  329. data/third_party/re2/re2/nfa.cc +713 -0
  330. data/third_party/re2/re2/onepass.cc +623 -0
  331. data/third_party/re2/re2/parse.cc +2464 -0
  332. data/third_party/re2/re2/perl_groups.cc +119 -0
  333. data/third_party/re2/re2/pod_array.h +55 -0
  334. data/third_party/re2/re2/prefilter.cc +710 -0
  335. data/third_party/re2/re2/prefilter.h +108 -0
  336. data/third_party/re2/re2/prefilter_tree.cc +407 -0
  337. data/third_party/re2/re2/prefilter_tree.h +139 -0
  338. data/third_party/re2/re2/prog.cc +988 -0
  339. data/third_party/re2/re2/prog.h +436 -0
  340. data/third_party/re2/re2/re2.cc +1362 -0
  341. data/third_party/re2/re2/re2.h +1002 -0
  342. data/third_party/re2/re2/regexp.cc +980 -0
  343. data/third_party/re2/re2/regexp.h +659 -0
  344. data/third_party/re2/re2/set.cc +154 -0
  345. data/third_party/re2/re2/set.h +80 -0
  346. data/third_party/re2/re2/simplify.cc +657 -0
  347. data/third_party/re2/re2/sparse_array.h +392 -0
  348. data/third_party/re2/re2/sparse_set.h +264 -0
  349. data/third_party/re2/re2/stringpiece.cc +65 -0
  350. data/third_party/re2/re2/stringpiece.h +210 -0
  351. data/third_party/re2/re2/tostring.cc +351 -0
  352. data/third_party/re2/re2/unicode_casefold.cc +582 -0
  353. data/third_party/re2/re2/unicode_casefold.h +78 -0
  354. data/third_party/re2/re2/unicode_groups.cc +6269 -0
  355. data/third_party/re2/re2/unicode_groups.h +67 -0
  356. data/third_party/re2/re2/walker-inl.h +246 -0
  357. data/third_party/re2/util/benchmark.h +156 -0
  358. data/third_party/re2/util/flags.h +26 -0
  359. data/third_party/re2/util/logging.h +109 -0
  360. data/third_party/re2/util/malloc_counter.h +19 -0
  361. data/third_party/re2/util/mix.h +41 -0
  362. data/third_party/re2/util/mutex.h +148 -0
  363. data/third_party/re2/util/pcre.cc +1025 -0
  364. data/third_party/re2/util/pcre.h +681 -0
  365. data/third_party/re2/util/rune.cc +260 -0
  366. data/third_party/re2/util/strutil.cc +149 -0
  367. data/third_party/re2/util/strutil.h +21 -0
  368. data/third_party/re2/util/test.h +50 -0
  369. data/third_party/re2/util/utf.h +44 -0
  370. data/third_party/re2/util/util.h +42 -0
  371. data/third_party/upb/upb/decode.c +467 -504
  372. data/third_party/upb/upb/encode.c +163 -121
  373. data/third_party/upb/upb/msg.c +130 -64
  374. data/third_party/upb/upb/msg.h +418 -14
  375. data/third_party/upb/upb/port_def.inc +35 -6
  376. data/third_party/upb/upb/port_undef.inc +8 -1
  377. data/third_party/upb/upb/table.c +53 -75
  378. data/third_party/upb/upb/table.int.h +11 -43
  379. data/third_party/upb/upb/upb.c +148 -124
  380. data/third_party/upb/upb/upb.h +65 -147
  381. data/third_party/upb/upb/upb.hpp +86 -0
  382. metadata +90 -30
  383. data/third_party/upb/upb/generated_util.h +0 -105
@@ -0,0 +1,436 @@
1
+ // Copyright 2007 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_PROG_H_
6
+ #define RE2_PROG_H_
7
+
8
+ // Compiled representation of regular expressions.
9
+ // See regexp.h for the Regexp class, which represents a regular
10
+ // expression symbolically.
11
+
12
+ #include <stdint.h>
13
+ #include <functional>
14
+ #include <mutex>
15
+ #include <string>
16
+ #include <vector>
17
+ #include <type_traits>
18
+
19
+ #include "util/util.h"
20
+ #include "util/logging.h"
21
+ #include "re2/pod_array.h"
22
+ #include "re2/re2.h"
23
+ #include "re2/sparse_array.h"
24
+ #include "re2/sparse_set.h"
25
+
26
+ namespace re2 {
27
+
28
+ // Opcodes for Inst
29
+ enum InstOp {
30
+ kInstAlt = 0, // choose between out_ and out1_
31
+ kInstAltMatch, // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa.
32
+ kInstByteRange, // next (possible case-folded) byte must be in [lo_, hi_]
33
+ kInstCapture, // capturing parenthesis number cap_
34
+ kInstEmptyWidth, // empty-width special (^ $ ...); bit(s) set in empty_
35
+ kInstMatch, // found a match!
36
+ kInstNop, // no-op; occasionally unavoidable
37
+ kInstFail, // never match; occasionally unavoidable
38
+ kNumInst,
39
+ };
40
+
41
+ // Bit flags for empty-width specials
42
+ enum EmptyOp {
43
+ kEmptyBeginLine = 1<<0, // ^ - beginning of line
44
+ kEmptyEndLine = 1<<1, // $ - end of line
45
+ kEmptyBeginText = 1<<2, // \A - beginning of text
46
+ kEmptyEndText = 1<<3, // \z - end of text
47
+ kEmptyWordBoundary = 1<<4, // \b - word boundary
48
+ kEmptyNonWordBoundary = 1<<5, // \B - not \b
49
+ kEmptyAllFlags = (1<<6)-1,
50
+ };
51
+
52
+ class DFA;
53
+ class Regexp;
54
+
55
+ // Compiled form of regexp program.
56
+ class Prog {
57
+ public:
58
+ Prog();
59
+ ~Prog();
60
+
61
+ // Single instruction in regexp program.
62
+ class Inst {
63
+ public:
64
+ // See the assertion below for why this is so.
65
+ Inst() = default;
66
+
67
+ // Copyable.
68
+ Inst(const Inst&) = default;
69
+ Inst& operator=(const Inst&) = default;
70
+
71
+ // Constructors per opcode
72
+ void InitAlt(uint32_t out, uint32_t out1);
73
+ void InitByteRange(int lo, int hi, int foldcase, uint32_t out);
74
+ void InitCapture(int cap, uint32_t out);
75
+ void InitEmptyWidth(EmptyOp empty, uint32_t out);
76
+ void InitMatch(int id);
77
+ void InitNop(uint32_t out);
78
+ void InitFail();
79
+
80
+ // Getters
81
+ int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
82
+ InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
83
+ int last() { return (out_opcode_>>3)&1; }
84
+ int out() { return out_opcode_>>4; }
85
+ int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
86
+ int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
87
+ int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
88
+ int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
89
+ int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
90
+ int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
91
+ int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
92
+ EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
93
+
94
+ bool greedy(Prog* p) {
95
+ DCHECK_EQ(opcode(), kInstAltMatch);
96
+ return p->inst(out())->opcode() == kInstByteRange ||
97
+ (p->inst(out())->opcode() == kInstNop &&
98
+ p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
99
+ }
100
+
101
+ // Does this inst (an kInstByteRange) match c?
102
+ inline bool Matches(int c) {
103
+ DCHECK_EQ(opcode(), kInstByteRange);
104
+ if (foldcase() && 'A' <= c && c <= 'Z')
105
+ c += 'a' - 'A';
106
+ return lo_ <= c && c <= hi_;
107
+ }
108
+
109
+ // Returns string representation for debugging.
110
+ std::string Dump();
111
+
112
+ // Maximum instruction id.
113
+ // (Must fit in out_opcode_. PatchList/last steal another bit.)
114
+ static const int kMaxInst = (1<<28) - 1;
115
+
116
+ private:
117
+ void set_opcode(InstOp opcode) {
118
+ out_opcode_ = (out()<<4) | (last()<<3) | opcode;
119
+ }
120
+
121
+ void set_last() {
122
+ out_opcode_ = (out()<<4) | (1<<3) | opcode();
123
+ }
124
+
125
+ void set_out(int out) {
126
+ out_opcode_ = (out<<4) | (last()<<3) | opcode();
127
+ }
128
+
129
+ void set_out_opcode(int out, InstOp opcode) {
130
+ out_opcode_ = (out<<4) | (last()<<3) | opcode;
131
+ }
132
+
133
+ uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
134
+ union { // additional instruction arguments:
135
+ uint32_t out1_; // opcode == kInstAlt
136
+ // alternate next instruction
137
+
138
+ int32_t cap_; // opcode == kInstCapture
139
+ // Index of capture register (holds text
140
+ // position recorded by capturing parentheses).
141
+ // For \n (the submatch for the nth parentheses),
142
+ // the left parenthesis captures into register 2*n
143
+ // and the right one captures into register 2*n+1.
144
+
145
+ int32_t match_id_; // opcode == kInstMatch
146
+ // Match ID to identify this match (for re2::Set).
147
+
148
+ struct { // opcode == kInstByteRange
149
+ uint8_t lo_; // byte range is lo_-hi_ inclusive
150
+ uint8_t hi_; //
151
+ uint16_t hint_foldcase_; // 15 bits: hint, 1 (low) bit: foldcase
152
+ // hint to execution engines: the delta to the
153
+ // next instruction (in the current list) worth
154
+ // exploring iff this instruction matched; 0
155
+ // means there are no remaining possibilities,
156
+ // which is most likely for character classes.
157
+ // foldcase: A-Z -> a-z before checking range.
158
+ };
159
+
160
+ EmptyOp empty_; // opcode == kInstEmptyWidth
161
+ // empty_ is bitwise OR of kEmpty* flags above.
162
+ };
163
+
164
+ friend class Compiler;
165
+ friend struct PatchList;
166
+ friend class Prog;
167
+ };
168
+
169
+ // Inst must be trivial so that we can freely clear it with memset(3).
170
+ // Arrays of Inst are initialised by copying the initial elements with
171
+ // memmove(3) and then clearing any remaining elements with memset(3).
172
+ static_assert(std::is_trivial<Inst>::value, "Inst must be trivial");
173
+
174
+ // Whether to anchor the search.
175
+ enum Anchor {
176
+ kUnanchored, // match anywhere
177
+ kAnchored, // match only starting at beginning of text
178
+ };
179
+
180
+ // Kind of match to look for (for anchor != kFullMatch)
181
+ //
182
+ // kLongestMatch mode finds the overall longest
183
+ // match but still makes its submatch choices the way
184
+ // Perl would, not in the way prescribed by POSIX.
185
+ // The POSIX rules are much more expensive to implement,
186
+ // and no one has needed them.
187
+ //
188
+ // kFullMatch is not strictly necessary -- we could use
189
+ // kLongestMatch and then check the length of the match -- but
190
+ // the matching code can run faster if it knows to consider only
191
+ // full matches.
192
+ enum MatchKind {
193
+ kFirstMatch, // like Perl, PCRE
194
+ kLongestMatch, // like egrep or POSIX
195
+ kFullMatch, // match only entire text; implies anchor==kAnchored
196
+ kManyMatch // for SearchDFA, records set of matches
197
+ };
198
+
199
+ Inst *inst(int id) { return &inst_[id]; }
200
+ int start() { return start_; }
201
+ void set_start(int start) { start_ = start; }
202
+ int start_unanchored() { return start_unanchored_; }
203
+ void set_start_unanchored(int start) { start_unanchored_ = start; }
204
+ int size() { return size_; }
205
+ bool reversed() { return reversed_; }
206
+ void set_reversed(bool reversed) { reversed_ = reversed; }
207
+ int list_count() { return list_count_; }
208
+ int inst_count(InstOp op) { return inst_count_[op]; }
209
+ uint16_t* list_heads() { return list_heads_.data(); }
210
+ int64_t dfa_mem() { return dfa_mem_; }
211
+ void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
212
+ bool anchor_start() { return anchor_start_; }
213
+ void set_anchor_start(bool b) { anchor_start_ = b; }
214
+ bool anchor_end() { return anchor_end_; }
215
+ void set_anchor_end(bool b) { anchor_end_ = b; }
216
+ int bytemap_range() { return bytemap_range_; }
217
+ const uint8_t* bytemap() { return bytemap_; }
218
+ bool can_prefix_accel() { return prefix_size_ != 0; }
219
+
220
+ // Accelerates to the first likely occurrence of the prefix.
221
+ // Returns a pointer to the first byte or NULL if not found.
222
+ const void* PrefixAccel(const void* data, size_t size) {
223
+ DCHECK_GE(prefix_size_, 1);
224
+ return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
225
+ : PrefixAccel_FrontAndBack(data, size);
226
+ }
227
+
228
+ // An implementation of prefix accel that looks for prefix_front_ and
229
+ // prefix_back_ to return fewer false positives than memchr(3) alone.
230
+ const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
231
+
232
+ // Returns string representation of program for debugging.
233
+ std::string Dump();
234
+ std::string DumpUnanchored();
235
+ std::string DumpByteMap();
236
+
237
+ // Returns the set of kEmpty flags that are in effect at
238
+ // position p within context.
239
+ static uint32_t EmptyFlags(const StringPiece& context, const char* p);
240
+
241
+ // Returns whether byte c is a word character: ASCII only.
242
+ // Used by the implementation of \b and \B.
243
+ // This is not right for Unicode, but:
244
+ // - it's hard to get right in a byte-at-a-time matching world
245
+ // (the DFA has only one-byte lookahead).
246
+ // - even if the lookahead were possible, the Progs would be huge.
247
+ // This crude approximation is the same one PCRE uses.
248
+ static bool IsWordChar(uint8_t c) {
249
+ return ('A' <= c && c <= 'Z') ||
250
+ ('a' <= c && c <= 'z') ||
251
+ ('0' <= c && c <= '9') ||
252
+ c == '_';
253
+ }
254
+
255
+ // Execution engines. They all search for the regexp (run the prog)
256
+ // in text, which is in the larger context (used for ^ $ \b etc).
257
+ // Anchor and kind control the kind of search.
258
+ // Returns true if match found, false if not.
259
+ // If match found, fills match[0..nmatch-1] with submatch info.
260
+ // match[0] is overall match, match[1] is first set of parens, etc.
261
+ // If a particular submatch is not matched during the regexp match,
262
+ // it is set to NULL.
263
+ //
264
+ // Matching text == StringPiece(NULL, 0) is treated as any other empty
265
+ // string, but note that on return, it will not be possible to distinguish
266
+ // submatches that matched that empty string from submatches that didn't
267
+ // match anything. Either way, match[i] == NULL.
268
+
269
+ // Search using NFA: can find submatches but kind of slow.
270
+ bool SearchNFA(const StringPiece& text, const StringPiece& context,
271
+ Anchor anchor, MatchKind kind,
272
+ StringPiece* match, int nmatch);
273
+
274
+ // Search using DFA: much faster than NFA but only finds
275
+ // end of match and can use a lot more memory.
276
+ // Returns whether a match was found.
277
+ // If the DFA runs out of memory, sets *failed to true and returns false.
278
+ // If matches != NULL and kind == kManyMatch and there is a match,
279
+ // SearchDFA fills matches with the match IDs of the final matching state.
280
+ bool SearchDFA(const StringPiece& text, const StringPiece& context,
281
+ Anchor anchor, MatchKind kind, StringPiece* match0,
282
+ bool* failed, SparseSet* matches);
283
+
284
+ // The callback issued after building each DFA state with BuildEntireDFA().
285
+ // If next is null, then the memory budget has been exhausted and building
286
+ // will halt. Otherwise, the state has been built and next points to an array
287
+ // of bytemap_range()+1 slots holding the next states as per the bytemap and
288
+ // kByteEndText. The number of the state is implied by the callback sequence:
289
+ // the first callback is for state 0, the second callback is for state 1, ...
290
+ // match indicates whether the state is a matching state.
291
+ using DFAStateCallback = std::function<void(const int* next, bool match)>;
292
+
293
+ // Build the entire DFA for the given match kind.
294
+ // Usually the DFA is built out incrementally, as needed, which
295
+ // avoids lots of unnecessary work.
296
+ // If cb is not empty, it receives one callback per state built.
297
+ // Returns the number of states built.
298
+ // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
299
+ int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
300
+
301
+ // Controls whether the DFA should bail out early if the NFA would be faster.
302
+ // FOR TESTING ONLY.
303
+ static void TEST_dfa_should_bail_when_slow(bool b);
304
+
305
+ // Compute bytemap.
306
+ void ComputeByteMap();
307
+
308
+ // Run peep-hole optimizer on program.
309
+ void Optimize();
310
+
311
+ // One-pass NFA: only correct if IsOnePass() is true,
312
+ // but much faster than NFA (competitive with PCRE)
313
+ // for those expressions.
314
+ bool IsOnePass();
315
+ bool SearchOnePass(const StringPiece& text, const StringPiece& context,
316
+ Anchor anchor, MatchKind kind,
317
+ StringPiece* match, int nmatch);
318
+
319
+ // Bit-state backtracking. Fast on small cases but uses memory
320
+ // proportional to the product of the list count and the text size.
321
+ bool CanBitState() { return list_heads_.data() != NULL; }
322
+ bool SearchBitState(const StringPiece& text, const StringPiece& context,
323
+ Anchor anchor, MatchKind kind,
324
+ StringPiece* match, int nmatch);
325
+
326
+ static const int kMaxOnePassCapture = 5; // $0 through $4
327
+
328
+ // Backtracking search: the gold standard against which the other
329
+ // implementations are checked. FOR TESTING ONLY.
330
+ // It allocates a ton of memory to avoid running forever.
331
+ // It is also recursive, so can't use in production (will overflow stacks).
332
+ // The name "Unsafe" here is supposed to be a flag that
333
+ // you should not be using this function.
334
+ bool UnsafeSearchBacktrack(const StringPiece& text,
335
+ const StringPiece& context,
336
+ Anchor anchor, MatchKind kind,
337
+ StringPiece* match, int nmatch);
338
+
339
+ // Computes range for any strings matching regexp. The min and max can in
340
+ // some cases be arbitrarily precise, so the caller gets to specify the
341
+ // maximum desired length of string returned.
342
+ //
343
+ // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
344
+ // string s that is an anchored match for this regexp satisfies
345
+ // min <= s && s <= max.
346
+ //
347
+ // Note that PossibleMatchRange() will only consider the first copy of an
348
+ // infinitely repeated element (i.e., any regexp element followed by a '*' or
349
+ // '+' operator). Regexps with "{N}" constructions are not affected, as those
350
+ // do not compile down to infinite repetitions.
351
+ //
352
+ // Returns true on success, false on error.
353
+ bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
354
+
355
+ // EXPERIMENTAL! SUBJECT TO CHANGE!
356
+ // Outputs the program fanout into the given sparse array.
357
+ void Fanout(SparseArray<int>* fanout);
358
+
359
+ // Compiles a collection of regexps to Prog. Each regexp will have
360
+ // its own Match instruction recording the index in the output vector.
361
+ static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
362
+
363
+ // Flattens the Prog from "tree" form to "list" form. This is an in-place
364
+ // operation in the sense that the old instructions are lost.
365
+ void Flatten();
366
+
367
+ // Walks the Prog; the "successor roots" or predecessors of the reachable
368
+ // instructions are marked in rootmap or predmap/predvec, respectively.
369
+ // reachable and stk are preallocated scratch structures.
370
+ void MarkSuccessors(SparseArray<int>* rootmap,
371
+ SparseArray<int>* predmap,
372
+ std::vector<std::vector<int>>* predvec,
373
+ SparseSet* reachable, std::vector<int>* stk);
374
+
375
+ // Walks the Prog from the given "root" instruction; the "dominator root"
376
+ // of the reachable instructions (if such exists) is marked in rootmap.
377
+ // reachable and stk are preallocated scratch structures.
378
+ void MarkDominator(int root, SparseArray<int>* rootmap,
379
+ SparseArray<int>* predmap,
380
+ std::vector<std::vector<int>>* predvec,
381
+ SparseSet* reachable, std::vector<int>* stk);
382
+
383
+ // Walks the Prog from the given "root" instruction; the reachable
384
+ // instructions are emitted in "list" form and appended to flat.
385
+ // reachable and stk are preallocated scratch structures.
386
+ void EmitList(int root, SparseArray<int>* rootmap,
387
+ std::vector<Inst>* flat,
388
+ SparseSet* reachable, std::vector<int>* stk);
389
+
390
+ // Computes hints for ByteRange instructions in [begin, end).
391
+ void ComputeHints(std::vector<Inst>* flat, int begin, int end);
392
+
393
+ private:
394
+ friend class Compiler;
395
+
396
+ DFA* GetDFA(MatchKind kind);
397
+ void DeleteDFA(DFA* dfa);
398
+
399
+ bool anchor_start_; // regexp has explicit start anchor
400
+ bool anchor_end_; // regexp has explicit end anchor
401
+ bool reversed_; // whether program runs backward over input
402
+ bool did_flatten_; // has Flatten been called?
403
+ bool did_onepass_; // has IsOnePass been called?
404
+
405
+ int start_; // entry point for program
406
+ int start_unanchored_; // unanchored entry point for program
407
+ int size_; // number of instructions
408
+ int bytemap_range_; // bytemap_[x] < bytemap_range_
409
+ size_t prefix_size_; // size of prefix (0 if no prefix)
410
+ int prefix_front_; // first byte of prefix (-1 if no prefix)
411
+ int prefix_back_; // last byte of prefix (-1 if no prefix)
412
+
413
+ int list_count_; // count of lists (see above)
414
+ int inst_count_[kNumInst]; // count of instructions by opcode
415
+ PODArray<uint16_t> list_heads_; // sparse array enumerating list heads
416
+ // not populated if size_ is overly large
417
+
418
+ PODArray<Inst> inst_; // pointer to instruction array
419
+ PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
420
+
421
+ int64_t dfa_mem_; // Maximum memory for DFAs.
422
+ DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
423
+ DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
424
+
425
+ uint8_t bytemap_[256]; // map from input bytes to byte classes
426
+
427
+ std::once_flag dfa_first_once_;
428
+ std::once_flag dfa_longest_once_;
429
+
430
+ Prog(const Prog&) = delete;
431
+ Prog& operator=(const Prog&) = delete;
432
+ };
433
+
434
+ } // namespace re2
435
+
436
+ #endif // RE2_PROG_H_
@@ -0,0 +1,1362 @@
1
+ // Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Regular expression interface RE2.
6
+ //
7
+ // Originally the PCRE C++ wrapper, but adapted to use
8
+ // the new automata-based regular expression engines.
9
+
10
+ #include "re2/re2.h"
11
+
12
+ #include <assert.h>
13
+ #include <ctype.h>
14
+ #include <errno.h>
15
+ #ifdef _MSC_VER
16
+ #include <intrin.h>
17
+ #endif
18
+ #include <stdint.h>
19
+ #include <stdlib.h>
20
+ #include <string.h>
21
+ #include <algorithm>
22
+ #include <atomic>
23
+ #include <iterator>
24
+ #include <mutex>
25
+ #include <string>
26
+ #include <utility>
27
+ #include <vector>
28
+
29
+ #include "util/util.h"
30
+ #include "util/logging.h"
31
+ #include "util/strutil.h"
32
+ #include "util/utf.h"
33
+ #include "re2/prog.h"
34
+ #include "re2/regexp.h"
35
+ #include "re2/sparse_array.h"
36
+
37
+ namespace re2 {
38
+
39
+ // Maximum number of args we can set
40
+ static const int kMaxArgs = 16;
41
+ static const int kVecSize = 1+kMaxArgs;
42
+
43
+ const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
44
+
45
+ RE2::Options::Options(RE2::CannedOptions opt)
46
+ : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
47
+ posix_syntax_(opt == RE2::POSIX),
48
+ longest_match_(opt == RE2::POSIX),
49
+ log_errors_(opt != RE2::Quiet),
50
+ max_mem_(kDefaultMaxMem),
51
+ literal_(false),
52
+ never_nl_(false),
53
+ dot_nl_(false),
54
+ never_capture_(false),
55
+ case_sensitive_(true),
56
+ perl_classes_(false),
57
+ word_boundary_(false),
58
+ one_line_(false) {
59
+ }
60
+
61
+ // static empty objects for use as const references.
62
+ // To avoid global constructors, allocated in RE2::Init().
63
+ static const std::string* empty_string;
64
+ static const std::map<std::string, int>* empty_named_groups;
65
+ static const std::map<int, std::string>* empty_group_names;
66
+
67
+ // Converts from Regexp error code to RE2 error code.
68
+ // Maybe some day they will diverge. In any event, this
69
+ // hides the existence of Regexp from RE2 users.
70
+ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
71
+ switch (code) {
72
+ case re2::kRegexpSuccess:
73
+ return RE2::NoError;
74
+ case re2::kRegexpInternalError:
75
+ return RE2::ErrorInternal;
76
+ case re2::kRegexpBadEscape:
77
+ return RE2::ErrorBadEscape;
78
+ case re2::kRegexpBadCharClass:
79
+ return RE2::ErrorBadCharClass;
80
+ case re2::kRegexpBadCharRange:
81
+ return RE2::ErrorBadCharRange;
82
+ case re2::kRegexpMissingBracket:
83
+ return RE2::ErrorMissingBracket;
84
+ case re2::kRegexpMissingParen:
85
+ return RE2::ErrorMissingParen;
86
+ case re2::kRegexpTrailingBackslash:
87
+ return RE2::ErrorTrailingBackslash;
88
+ case re2::kRegexpRepeatArgument:
89
+ return RE2::ErrorRepeatArgument;
90
+ case re2::kRegexpRepeatSize:
91
+ return RE2::ErrorRepeatSize;
92
+ case re2::kRegexpRepeatOp:
93
+ return RE2::ErrorRepeatOp;
94
+ case re2::kRegexpBadPerlOp:
95
+ return RE2::ErrorBadPerlOp;
96
+ case re2::kRegexpBadUTF8:
97
+ return RE2::ErrorBadUTF8;
98
+ case re2::kRegexpBadNamedCapture:
99
+ return RE2::ErrorBadNamedCapture;
100
+ }
101
+ return RE2::ErrorInternal;
102
+ }
103
+
104
+ static std::string trunc(const StringPiece& pattern) {
105
+ if (pattern.size() < 100)
106
+ return std::string(pattern);
107
+ return std::string(pattern.substr(0, 100)) + "...";
108
+ }
109
+
110
+
111
+ RE2::RE2(const char* pattern) {
112
+ Init(pattern, DefaultOptions);
113
+ }
114
+
115
+ RE2::RE2(const std::string& pattern) {
116
+ Init(pattern, DefaultOptions);
117
+ }
118
+
119
+ RE2::RE2(const StringPiece& pattern) {
120
+ Init(pattern, DefaultOptions);
121
+ }
122
+
123
+ RE2::RE2(const StringPiece& pattern, const Options& options) {
124
+ Init(pattern, options);
125
+ }
126
+
127
+ int RE2::Options::ParseFlags() const {
128
+ int flags = Regexp::ClassNL;
129
+ switch (encoding()) {
130
+ default:
131
+ if (log_errors())
132
+ LOG(ERROR) << "Unknown encoding " << encoding();
133
+ break;
134
+ case RE2::Options::EncodingUTF8:
135
+ break;
136
+ case RE2::Options::EncodingLatin1:
137
+ flags |= Regexp::Latin1;
138
+ break;
139
+ }
140
+
141
+ if (!posix_syntax())
142
+ flags |= Regexp::LikePerl;
143
+
144
+ if (literal())
145
+ flags |= Regexp::Literal;
146
+
147
+ if (never_nl())
148
+ flags |= Regexp::NeverNL;
149
+
150
+ if (dot_nl())
151
+ flags |= Regexp::DotNL;
152
+
153
+ if (never_capture())
154
+ flags |= Regexp::NeverCapture;
155
+
156
+ if (!case_sensitive())
157
+ flags |= Regexp::FoldCase;
158
+
159
+ if (perl_classes())
160
+ flags |= Regexp::PerlClasses;
161
+
162
+ if (word_boundary())
163
+ flags |= Regexp::PerlB;
164
+
165
+ if (one_line())
166
+ flags |= Regexp::OneLine;
167
+
168
+ return flags;
169
+ }
170
+
171
+ void RE2::Init(const StringPiece& pattern, const Options& options) {
172
+ static std::once_flag empty_once;
173
+ std::call_once(empty_once, []() {
174
+ empty_string = new std::string;
175
+ empty_named_groups = new std::map<std::string, int>;
176
+ empty_group_names = new std::map<int, std::string>;
177
+ });
178
+
179
+ pattern_.assign(pattern.data(), pattern.size());
180
+ options_.Copy(options);
181
+ entire_regexp_ = NULL;
182
+ error_ = empty_string;
183
+ error_code_ = NoError;
184
+ error_arg_.clear();
185
+ prefix_.clear();
186
+ prefix_foldcase_ = false;
187
+ suffix_regexp_ = NULL;
188
+ prog_ = NULL;
189
+ num_captures_ = -1;
190
+ is_one_pass_ = false;
191
+
192
+ rprog_ = NULL;
193
+ named_groups_ = NULL;
194
+ group_names_ = NULL;
195
+
196
+ RegexpStatus status;
197
+ entire_regexp_ = Regexp::Parse(
198
+ pattern_,
199
+ static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
200
+ &status);
201
+ if (entire_regexp_ == NULL) {
202
+ if (options_.log_errors()) {
203
+ LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
204
+ << status.Text();
205
+ }
206
+ error_ = new std::string(status.Text());
207
+ error_code_ = RegexpErrorToRE2(status.code());
208
+ error_arg_ = std::string(status.error_arg());
209
+ return;
210
+ }
211
+
212
+ re2::Regexp* suffix;
213
+ if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
214
+ suffix_regexp_ = suffix;
215
+ else
216
+ suffix_regexp_ = entire_regexp_->Incref();
217
+
218
+ // Two thirds of the memory goes to the forward Prog,
219
+ // one third to the reverse prog, because the forward
220
+ // Prog has two DFAs but the reverse prog has one.
221
+ prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
222
+ if (prog_ == NULL) {
223
+ if (options_.log_errors())
224
+ LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
225
+ error_ = new std::string("pattern too large - compile failed");
226
+ error_code_ = RE2::ErrorPatternTooLarge;
227
+ return;
228
+ }
229
+
230
+ // We used to compute this lazily, but it's used during the
231
+ // typical control flow for a match call, so we now compute
232
+ // it eagerly, which avoids the overhead of std::once_flag.
233
+ num_captures_ = suffix_regexp_->NumCaptures();
234
+
235
+ // Could delay this until the first match call that
236
+ // cares about submatch information, but the one-pass
237
+ // machine's memory gets cut from the DFA memory budget,
238
+ // and that is harder to do if the DFA has already
239
+ // been built.
240
+ is_one_pass_ = prog_->IsOnePass();
241
+ }
242
+
243
+ // Returns rprog_, computing it if needed.
244
+ re2::Prog* RE2::ReverseProg() const {
245
+ std::call_once(rprog_once_, [](const RE2* re) {
246
+ re->rprog_ =
247
+ re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
248
+ if (re->rprog_ == NULL) {
249
+ if (re->options_.log_errors())
250
+ LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
251
+ // We no longer touch error_ and error_code_ because failing to compile
252
+ // the reverse Prog is not a showstopper: falling back to NFA execution
253
+ // is fine. More importantly, an RE2 object is supposed to be logically
254
+ // immutable: whatever ok() would have returned after Init() completed,
255
+ // it should continue to return that no matter what ReverseProg() does.
256
+ }
257
+ }, this);
258
+ return rprog_;
259
+ }
260
+
261
+ RE2::~RE2() {
262
+ if (suffix_regexp_)
263
+ suffix_regexp_->Decref();
264
+ if (entire_regexp_)
265
+ entire_regexp_->Decref();
266
+ delete prog_;
267
+ delete rprog_;
268
+ if (error_ != empty_string)
269
+ delete error_;
270
+ if (named_groups_ != NULL && named_groups_ != empty_named_groups)
271
+ delete named_groups_;
272
+ if (group_names_ != NULL && group_names_ != empty_group_names)
273
+ delete group_names_;
274
+ }
275
+
276
+ int RE2::ProgramSize() const {
277
+ if (prog_ == NULL)
278
+ return -1;
279
+ return prog_->size();
280
+ }
281
+
282
+ int RE2::ReverseProgramSize() const {
283
+ if (prog_ == NULL)
284
+ return -1;
285
+ Prog* prog = ReverseProg();
286
+ if (prog == NULL)
287
+ return -1;
288
+ return prog->size();
289
+ }
290
+
291
+ // Finds the most significant non-zero bit in n.
292
+ static int FindMSBSet(uint32_t n) {
293
+ DCHECK_NE(n, 0);
294
+ #if defined(__GNUC__)
295
+ return 31 ^ __builtin_clz(n);
296
+ #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
297
+ unsigned long c;
298
+ _BitScanReverse(&c, n);
299
+ return static_cast<int>(c);
300
+ #else
301
+ int c = 0;
302
+ for (int shift = 1 << 4; shift != 0; shift >>= 1) {
303
+ uint32_t word = n >> shift;
304
+ if (word != 0) {
305
+ n = word;
306
+ c += shift;
307
+ }
308
+ }
309
+ return c;
310
+ #endif
311
+ }
312
+
313
+ static int Fanout(Prog* prog, std::vector<int>* histogram) {
314
+ SparseArray<int> fanout(prog->size());
315
+ prog->Fanout(&fanout);
316
+ int data[32] = {};
317
+ int size = 0;
318
+ for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
319
+ if (i->value() == 0)
320
+ continue;
321
+ uint32_t value = i->value();
322
+ int bucket = FindMSBSet(value);
323
+ bucket += value & (value-1) ? 1 : 0;
324
+ ++data[bucket];
325
+ size = std::max(size, bucket+1);
326
+ }
327
+ if (histogram != NULL)
328
+ histogram->assign(data, data+size);
329
+ return size-1;
330
+ }
331
+
332
+ int RE2::ProgramFanout(std::vector<int>* histogram) const {
333
+ if (prog_ == NULL)
334
+ return -1;
335
+ return Fanout(prog_, histogram);
336
+ }
337
+
338
+ int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
339
+ if (prog_ == NULL)
340
+ return -1;
341
+ Prog* prog = ReverseProg();
342
+ if (prog == NULL)
343
+ return -1;
344
+ return Fanout(prog, histogram);
345
+ }
346
+
347
+ // Returns named_groups_, computing it if needed.
348
+ const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
349
+ std::call_once(named_groups_once_, [](const RE2* re) {
350
+ if (re->suffix_regexp_ != NULL)
351
+ re->named_groups_ = re->suffix_regexp_->NamedCaptures();
352
+ if (re->named_groups_ == NULL)
353
+ re->named_groups_ = empty_named_groups;
354
+ }, this);
355
+ return *named_groups_;
356
+ }
357
+
358
+ // Returns group_names_, computing it if needed.
359
+ const std::map<int, std::string>& RE2::CapturingGroupNames() const {
360
+ std::call_once(group_names_once_, [](const RE2* re) {
361
+ if (re->suffix_regexp_ != NULL)
362
+ re->group_names_ = re->suffix_regexp_->CaptureNames();
363
+ if (re->group_names_ == NULL)
364
+ re->group_names_ = empty_group_names;
365
+ }, this);
366
+ return *group_names_;
367
+ }
368
+
369
+ /***** Convenience interfaces *****/
370
+
371
+ bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
372
+ const Arg* const args[], int n) {
373
+ return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
374
+ }
375
+
376
+ bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
377
+ const Arg* const args[], int n) {
378
+ return re.DoMatch(text, UNANCHORED, NULL, args, n);
379
+ }
380
+
381
+ bool RE2::ConsumeN(StringPiece* input, const RE2& re,
382
+ const Arg* const args[], int n) {
383
+ size_t consumed;
384
+ if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
385
+ input->remove_prefix(consumed);
386
+ return true;
387
+ } else {
388
+ return false;
389
+ }
390
+ }
391
+
392
+ bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
393
+ const Arg* const args[], int n) {
394
+ size_t consumed;
395
+ if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
396
+ input->remove_prefix(consumed);
397
+ return true;
398
+ } else {
399
+ return false;
400
+ }
401
+ }
402
+
403
+ bool RE2::Replace(std::string* str,
404
+ const RE2& re,
405
+ const StringPiece& rewrite) {
406
+ StringPiece vec[kVecSize];
407
+ int nvec = 1 + MaxSubmatch(rewrite);
408
+ if (nvec > 1 + re.NumberOfCapturingGroups())
409
+ return false;
410
+ if (nvec > static_cast<int>(arraysize(vec)))
411
+ return false;
412
+ if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
413
+ return false;
414
+
415
+ std::string s;
416
+ if (!re.Rewrite(&s, rewrite, vec, nvec))
417
+ return false;
418
+
419
+ assert(vec[0].data() >= str->data());
420
+ assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
421
+ str->replace(vec[0].data() - str->data(), vec[0].size(), s);
422
+ return true;
423
+ }
424
+
425
+ int RE2::GlobalReplace(std::string* str,
426
+ const RE2& re,
427
+ const StringPiece& rewrite) {
428
+ StringPiece vec[kVecSize];
429
+ int nvec = 1 + MaxSubmatch(rewrite);
430
+ if (nvec > 1 + re.NumberOfCapturingGroups())
431
+ return false;
432
+ if (nvec > static_cast<int>(arraysize(vec)))
433
+ return false;
434
+
435
+ const char* p = str->data();
436
+ const char* ep = p + str->size();
437
+ const char* lastend = NULL;
438
+ std::string out;
439
+ int count = 0;
440
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
441
+ // Iterate just once when fuzzing. Otherwise, we easily get bogged down
442
+ // and coverage is unlikely to improve despite significant expense.
443
+ while (p == str->data()) {
444
+ #else
445
+ while (p <= ep) {
446
+ #endif
447
+ if (!re.Match(*str, static_cast<size_t>(p - str->data()),
448
+ str->size(), UNANCHORED, vec, nvec))
449
+ break;
450
+ if (p < vec[0].data())
451
+ out.append(p, vec[0].data() - p);
452
+ if (vec[0].data() == lastend && vec[0].empty()) {
453
+ // Disallow empty match at end of last match: skip ahead.
454
+ //
455
+ // fullrune() takes int, not ptrdiff_t. However, it just looks
456
+ // at the leading byte and treats any length >= 4 the same.
457
+ if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
458
+ fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
459
+ // re is in UTF-8 mode and there is enough left of str
460
+ // to allow us to advance by up to UTFmax bytes.
461
+ Rune r;
462
+ int n = chartorune(&r, p);
463
+ // Some copies of chartorune have a bug that accepts
464
+ // encodings of values in (10FFFF, 1FFFFF] as valid.
465
+ if (r > Runemax) {
466
+ n = 1;
467
+ r = Runeerror;
468
+ }
469
+ if (!(n == 1 && r == Runeerror)) { // no decoding error
470
+ out.append(p, n);
471
+ p += n;
472
+ continue;
473
+ }
474
+ }
475
+ // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
476
+ // we fell through from above and the GIGO principle applies.
477
+ if (p < ep)
478
+ out.append(p, 1);
479
+ p++;
480
+ continue;
481
+ }
482
+ re.Rewrite(&out, rewrite, vec, nvec);
483
+ p = vec[0].data() + vec[0].size();
484
+ lastend = p;
485
+ count++;
486
+ }
487
+
488
+ if (count == 0)
489
+ return 0;
490
+
491
+ if (p < ep)
492
+ out.append(p, ep - p);
493
+ using std::swap;
494
+ swap(out, *str);
495
+ return count;
496
+ }
497
+
498
+ bool RE2::Extract(const StringPiece& text,
499
+ const RE2& re,
500
+ const StringPiece& rewrite,
501
+ std::string* out) {
502
+ StringPiece vec[kVecSize];
503
+ int nvec = 1 + MaxSubmatch(rewrite);
504
+ if (nvec > 1 + re.NumberOfCapturingGroups())
505
+ return false;
506
+ if (nvec > static_cast<int>(arraysize(vec)))
507
+ return false;
508
+ if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
509
+ return false;
510
+
511
+ out->clear();
512
+ return re.Rewrite(out, rewrite, vec, nvec);
513
+ }
514
+
515
+ std::string RE2::QuoteMeta(const StringPiece& unquoted) {
516
+ std::string result;
517
+ result.reserve(unquoted.size() << 1);
518
+
519
+ // Escape any ascii character not in [A-Za-z_0-9].
520
+ //
521
+ // Note that it's legal to escape a character even if it has no
522
+ // special meaning in a regular expression -- so this function does
523
+ // that. (This also makes it identical to the perl function of the
524
+ // same name except for the null-character special case;
525
+ // see `perldoc -f quotemeta`.)
526
+ for (size_t ii = 0; ii < unquoted.size(); ++ii) {
527
+ // Note that using 'isalnum' here raises the benchmark time from
528
+ // 32ns to 58ns:
529
+ if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
530
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
531
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
532
+ unquoted[ii] != '_' &&
533
+ // If this is the part of a UTF8 or Latin1 character, we need
534
+ // to copy this byte without escaping. Experimentally this is
535
+ // what works correctly with the regexp library.
536
+ !(unquoted[ii] & 128)) {
537
+ if (unquoted[ii] == '\0') { // Special handling for null chars.
538
+ // Note that this special handling is not strictly required for RE2,
539
+ // but this quoting is required for other regexp libraries such as
540
+ // PCRE.
541
+ // Can't use "\\0" since the next character might be a digit.
542
+ result += "\\x00";
543
+ continue;
544
+ }
545
+ result += '\\';
546
+ }
547
+ result += unquoted[ii];
548
+ }
549
+
550
+ return result;
551
+ }
552
+
553
+ bool RE2::PossibleMatchRange(std::string* min, std::string* max,
554
+ int maxlen) const {
555
+ if (prog_ == NULL)
556
+ return false;
557
+
558
+ int n = static_cast<int>(prefix_.size());
559
+ if (n > maxlen)
560
+ n = maxlen;
561
+
562
+ // Determine initial min max from prefix_ literal.
563
+ *min = prefix_.substr(0, n);
564
+ *max = prefix_.substr(0, n);
565
+ if (prefix_foldcase_) {
566
+ // prefix is ASCII lowercase; change *min to uppercase.
567
+ for (int i = 0; i < n; i++) {
568
+ char& c = (*min)[i];
569
+ if ('a' <= c && c <= 'z')
570
+ c += 'A' - 'a';
571
+ }
572
+ }
573
+
574
+ // Add to prefix min max using PossibleMatchRange on regexp.
575
+ std::string dmin, dmax;
576
+ maxlen -= n;
577
+ if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
578
+ min->append(dmin);
579
+ max->append(dmax);
580
+ } else if (!max->empty()) {
581
+ // prog_->PossibleMatchRange has failed us,
582
+ // but we still have useful information from prefix_.
583
+ // Round up *max to allow any possible suffix.
584
+ PrefixSuccessor(max);
585
+ } else {
586
+ // Nothing useful.
587
+ *min = "";
588
+ *max = "";
589
+ return false;
590
+ }
591
+
592
+ return true;
593
+ }
594
+
595
+ // Avoid possible locale nonsense in standard strcasecmp.
596
+ // The string a is known to be all lowercase.
597
+ static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
598
+ const char* ae = a + len;
599
+
600
+ for (; a < ae; a++, b++) {
601
+ uint8_t x = *a;
602
+ uint8_t y = *b;
603
+ if ('A' <= y && y <= 'Z')
604
+ y += 'a' - 'A';
605
+ if (x != y)
606
+ return x - y;
607
+ }
608
+ return 0;
609
+ }
610
+
611
+
612
+ /***** Actual matching and rewriting code *****/
613
+
614
+ bool RE2::Match(const StringPiece& text,
615
+ size_t startpos,
616
+ size_t endpos,
617
+ Anchor re_anchor,
618
+ StringPiece* submatch,
619
+ int nsubmatch) const {
620
+ if (!ok()) {
621
+ if (options_.log_errors())
622
+ LOG(ERROR) << "Invalid RE2: " << *error_;
623
+ return false;
624
+ }
625
+
626
+ if (startpos > endpos || endpos > text.size()) {
627
+ if (options_.log_errors())
628
+ LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
629
+ << "startpos: " << startpos << ", "
630
+ << "endpos: " << endpos << ", "
631
+ << "text size: " << text.size() << "]";
632
+ return false;
633
+ }
634
+
635
+ StringPiece subtext = text;
636
+ subtext.remove_prefix(startpos);
637
+ subtext.remove_suffix(text.size() - endpos);
638
+
639
+ // Use DFAs to find exact location of match, filter out non-matches.
640
+
641
+ // Don't ask for the location if we won't use it.
642
+ // SearchDFA can do extra optimizations in that case.
643
+ StringPiece match;
644
+ StringPiece* matchp = &match;
645
+ if (nsubmatch == 0)
646
+ matchp = NULL;
647
+
648
+ int ncap = 1 + NumberOfCapturingGroups();
649
+ if (ncap > nsubmatch)
650
+ ncap = nsubmatch;
651
+
652
+ // If the regexp is anchored explicitly, must not be in middle of text.
653
+ if (prog_->anchor_start() && startpos != 0)
654
+ return false;
655
+ if (prog_->anchor_end() && endpos != text.size())
656
+ return false;
657
+
658
+ // If the regexp is anchored explicitly, update re_anchor
659
+ // so that we can potentially fall into a faster case below.
660
+ if (prog_->anchor_start() && prog_->anchor_end())
661
+ re_anchor = ANCHOR_BOTH;
662
+ else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
663
+ re_anchor = ANCHOR_START;
664
+
665
+ // Check for the required prefix, if any.
666
+ size_t prefixlen = 0;
667
+ if (!prefix_.empty()) {
668
+ if (startpos != 0)
669
+ return false;
670
+ prefixlen = prefix_.size();
671
+ if (prefixlen > subtext.size())
672
+ return false;
673
+ if (prefix_foldcase_) {
674
+ if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
675
+ return false;
676
+ } else {
677
+ if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
678
+ return false;
679
+ }
680
+ subtext.remove_prefix(prefixlen);
681
+ // If there is a required prefix, the anchor must be at least ANCHOR_START.
682
+ if (re_anchor != ANCHOR_BOTH)
683
+ re_anchor = ANCHOR_START;
684
+ }
685
+
686
+ Prog::Anchor anchor = Prog::kUnanchored;
687
+ Prog::MatchKind kind = Prog::kFirstMatch;
688
+ if (options_.longest_match())
689
+ kind = Prog::kLongestMatch;
690
+
691
+ bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
692
+
693
+ // BitState allocates a bitmap of size prog_->list_count() * text.size().
694
+ // It also allocates a stack of 3-word structures which could potentially
695
+ // grow as large as prog_->list_count() * text.size(), but in practice is
696
+ // much smaller.
697
+ const int kMaxBitStateBitmapSize = 256*1024; // bitmap size <= max (bits)
698
+ bool can_bit_state = prog_->CanBitState();
699
+ size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
700
+
701
+ #ifdef RE2_HAVE_THREAD_LOCAL
702
+ hooks::context = this;
703
+ #endif
704
+ bool dfa_failed = false;
705
+ bool skipped_test = false;
706
+ switch (re_anchor) {
707
+ default:
708
+ LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
709
+ return false;
710
+
711
+ case UNANCHORED: {
712
+ if (prog_->anchor_end()) {
713
+ // This is a very special case: we don't need the forward DFA because
714
+ // we already know where the match must end! Instead, the reverse DFA
715
+ // can say whether there is a match and (optionally) where it starts.
716
+ Prog* prog = ReverseProg();
717
+ if (prog == NULL) {
718
+ // Fall back to NFA below.
719
+ skipped_test = true;
720
+ break;
721
+ }
722
+ if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
723
+ Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
724
+ if (dfa_failed) {
725
+ if (options_.log_errors())
726
+ LOG(ERROR) << "DFA out of memory: "
727
+ << "pattern length " << pattern_.size() << ", "
728
+ << "program size " << prog->size() << ", "
729
+ << "list count " << prog->list_count() << ", "
730
+ << "bytemap range " << prog->bytemap_range();
731
+ // Fall back to NFA below.
732
+ skipped_test = true;
733
+ break;
734
+ }
735
+ return false;
736
+ }
737
+ if (matchp == NULL) // Matched. Don't care where.
738
+ return true;
739
+ break;
740
+ }
741
+
742
+ if (!prog_->SearchDFA(subtext, text, anchor, kind,
743
+ matchp, &dfa_failed, NULL)) {
744
+ if (dfa_failed) {
745
+ if (options_.log_errors())
746
+ LOG(ERROR) << "DFA out of memory: "
747
+ << "pattern length " << pattern_.size() << ", "
748
+ << "program size " << prog_->size() << ", "
749
+ << "list count " << prog_->list_count() << ", "
750
+ << "bytemap range " << prog_->bytemap_range();
751
+ // Fall back to NFA below.
752
+ skipped_test = true;
753
+ break;
754
+ }
755
+ return false;
756
+ }
757
+ if (matchp == NULL) // Matched. Don't care where.
758
+ return true;
759
+ // SearchDFA set match.end() but didn't know where the
760
+ // match started. Run the regexp backward from match.end()
761
+ // to find the longest possible match -- that's where it started.
762
+ Prog* prog = ReverseProg();
763
+ if (prog == NULL) {
764
+ // Fall back to NFA below.
765
+ skipped_test = true;
766
+ break;
767
+ }
768
+ if (!prog->SearchDFA(match, text, Prog::kAnchored,
769
+ Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
770
+ if (dfa_failed) {
771
+ if (options_.log_errors())
772
+ LOG(ERROR) << "DFA out of memory: "
773
+ << "pattern length " << pattern_.size() << ", "
774
+ << "program size " << prog->size() << ", "
775
+ << "list count " << prog->list_count() << ", "
776
+ << "bytemap range " << prog->bytemap_range();
777
+ // Fall back to NFA below.
778
+ skipped_test = true;
779
+ break;
780
+ }
781
+ if (options_.log_errors())
782
+ LOG(ERROR) << "SearchDFA inconsistency";
783
+ return false;
784
+ }
785
+ break;
786
+ }
787
+
788
+ case ANCHOR_BOTH:
789
+ case ANCHOR_START:
790
+ if (re_anchor == ANCHOR_BOTH)
791
+ kind = Prog::kFullMatch;
792
+ anchor = Prog::kAnchored;
793
+
794
+ // If only a small amount of text and need submatch
795
+ // information anyway and we're going to use OnePass or BitState
796
+ // to get it, we might as well not even bother with the DFA:
797
+ // OnePass or BitState will be fast enough.
798
+ // On tiny texts, OnePass outruns even the DFA, and
799
+ // it doesn't have the shared state and occasional mutex that
800
+ // the DFA does.
801
+ if (can_one_pass && text.size() <= 4096 &&
802
+ (ncap > 1 || text.size() <= 8)) {
803
+ skipped_test = true;
804
+ break;
805
+ }
806
+ if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
807
+ skipped_test = true;
808
+ break;
809
+ }
810
+ if (!prog_->SearchDFA(subtext, text, anchor, kind,
811
+ &match, &dfa_failed, NULL)) {
812
+ if (dfa_failed) {
813
+ if (options_.log_errors())
814
+ LOG(ERROR) << "DFA out of memory: "
815
+ << "pattern length " << pattern_.size() << ", "
816
+ << "program size " << prog_->size() << ", "
817
+ << "list count " << prog_->list_count() << ", "
818
+ << "bytemap range " << prog_->bytemap_range();
819
+ // Fall back to NFA below.
820
+ skipped_test = true;
821
+ break;
822
+ }
823
+ return false;
824
+ }
825
+ break;
826
+ }
827
+
828
+ if (!skipped_test && ncap <= 1) {
829
+ // We know exactly where it matches. That's enough.
830
+ if (ncap == 1)
831
+ submatch[0] = match;
832
+ } else {
833
+ StringPiece subtext1;
834
+ if (skipped_test) {
835
+ // DFA ran out of memory or was skipped:
836
+ // need to search in entire original text.
837
+ subtext1 = subtext;
838
+ } else {
839
+ // DFA found the exact match location:
840
+ // let NFA run an anchored, full match search
841
+ // to find submatch locations.
842
+ subtext1 = match;
843
+ anchor = Prog::kAnchored;
844
+ kind = Prog::kFullMatch;
845
+ }
846
+
847
+ if (can_one_pass && anchor != Prog::kUnanchored) {
848
+ if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
849
+ if (!skipped_test && options_.log_errors())
850
+ LOG(ERROR) << "SearchOnePass inconsistency";
851
+ return false;
852
+ }
853
+ } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
854
+ if (!prog_->SearchBitState(subtext1, text, anchor,
855
+ kind, submatch, ncap)) {
856
+ if (!skipped_test && options_.log_errors())
857
+ LOG(ERROR) << "SearchBitState inconsistency";
858
+ return false;
859
+ }
860
+ } else {
861
+ if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
862
+ if (!skipped_test && options_.log_errors())
863
+ LOG(ERROR) << "SearchNFA inconsistency";
864
+ return false;
865
+ }
866
+ }
867
+ }
868
+
869
+ // Adjust overall match for required prefix that we stripped off.
870
+ if (prefixlen > 0 && nsubmatch > 0)
871
+ submatch[0] = StringPiece(submatch[0].data() - prefixlen,
872
+ submatch[0].size() + prefixlen);
873
+
874
+ // Zero submatches that don't exist in the regexp.
875
+ for (int i = ncap; i < nsubmatch; i++)
876
+ submatch[i] = StringPiece();
877
+ return true;
878
+ }
879
+
880
+ // Internal matcher - like Match() but takes Args not StringPieces.
881
+ bool RE2::DoMatch(const StringPiece& text,
882
+ Anchor re_anchor,
883
+ size_t* consumed,
884
+ const Arg* const* args,
885
+ int n) const {
886
+ if (!ok()) {
887
+ if (options_.log_errors())
888
+ LOG(ERROR) << "Invalid RE2: " << *error_;
889
+ return false;
890
+ }
891
+
892
+ if (NumberOfCapturingGroups() < n) {
893
+ // RE has fewer capturing groups than number of Arg pointers passed in.
894
+ return false;
895
+ }
896
+
897
+ // Count number of capture groups needed.
898
+ int nvec;
899
+ if (n == 0 && consumed == NULL)
900
+ nvec = 0;
901
+ else
902
+ nvec = n+1;
903
+
904
+ StringPiece* vec;
905
+ StringPiece stkvec[kVecSize];
906
+ StringPiece* heapvec = NULL;
907
+
908
+ if (nvec <= static_cast<int>(arraysize(stkvec))) {
909
+ vec = stkvec;
910
+ } else {
911
+ vec = new StringPiece[nvec];
912
+ heapvec = vec;
913
+ }
914
+
915
+ if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
916
+ delete[] heapvec;
917
+ return false;
918
+ }
919
+
920
+ if (consumed != NULL)
921
+ *consumed = static_cast<size_t>(vec[0].end() - text.begin());
922
+
923
+ if (n == 0 || args == NULL) {
924
+ // We are not interested in results
925
+ delete[] heapvec;
926
+ return true;
927
+ }
928
+
929
+ // If we got here, we must have matched the whole pattern.
930
+ for (int i = 0; i < n; i++) {
931
+ const StringPiece& s = vec[i+1];
932
+ if (!args[i]->Parse(s.data(), s.size())) {
933
+ // TODO: Should we indicate what the error was?
934
+ delete[] heapvec;
935
+ return false;
936
+ }
937
+ }
938
+
939
+ delete[] heapvec;
940
+ return true;
941
+ }
942
+
943
+ // Checks that the rewrite string is well-formed with respect to this
944
+ // regular expression.
945
+ bool RE2::CheckRewriteString(const StringPiece& rewrite,
946
+ std::string* error) const {
947
+ int max_token = -1;
948
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
949
+ s < end; s++) {
950
+ int c = *s;
951
+ if (c != '\\') {
952
+ continue;
953
+ }
954
+ if (++s == end) {
955
+ *error = "Rewrite schema error: '\\' not allowed at end.";
956
+ return false;
957
+ }
958
+ c = *s;
959
+ if (c == '\\') {
960
+ continue;
961
+ }
962
+ if (!isdigit(c)) {
963
+ *error = "Rewrite schema error: "
964
+ "'\\' must be followed by a digit or '\\'.";
965
+ return false;
966
+ }
967
+ int n = (c - '0');
968
+ if (max_token < n) {
969
+ max_token = n;
970
+ }
971
+ }
972
+
973
+ if (max_token > NumberOfCapturingGroups()) {
974
+ *error = StringPrintf(
975
+ "Rewrite schema requests %d matches, but the regexp only has %d "
976
+ "parenthesized subexpressions.",
977
+ max_token, NumberOfCapturingGroups());
978
+ return false;
979
+ }
980
+ return true;
981
+ }
982
+
983
+ // Returns the maximum submatch needed for the rewrite to be done by Replace().
984
+ // E.g. if rewrite == "foo \\2,\\1", returns 2.
985
+ int RE2::MaxSubmatch(const StringPiece& rewrite) {
986
+ int max = 0;
987
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
988
+ s < end; s++) {
989
+ if (*s == '\\') {
990
+ s++;
991
+ int c = (s < end) ? *s : -1;
992
+ if (isdigit(c)) {
993
+ int n = (c - '0');
994
+ if (n > max)
995
+ max = n;
996
+ }
997
+ }
998
+ }
999
+ return max;
1000
+ }
1001
+
1002
+ // Append the "rewrite" string, with backslash subsitutions from "vec",
1003
+ // to string "out".
1004
+ bool RE2::Rewrite(std::string* out,
1005
+ const StringPiece& rewrite,
1006
+ const StringPiece* vec,
1007
+ int veclen) const {
1008
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
1009
+ s < end; s++) {
1010
+ if (*s != '\\') {
1011
+ out->push_back(*s);
1012
+ continue;
1013
+ }
1014
+ s++;
1015
+ int c = (s < end) ? *s : -1;
1016
+ if (isdigit(c)) {
1017
+ int n = (c - '0');
1018
+ if (n >= veclen) {
1019
+ if (options_.log_errors()) {
1020
+ LOG(ERROR) << "invalid substitution \\" << n
1021
+ << " from " << veclen << " groups";
1022
+ }
1023
+ return false;
1024
+ }
1025
+ StringPiece snip = vec[n];
1026
+ if (!snip.empty())
1027
+ out->append(snip.data(), snip.size());
1028
+ } else if (c == '\\') {
1029
+ out->push_back('\\');
1030
+ } else {
1031
+ if (options_.log_errors())
1032
+ LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
1033
+ return false;
1034
+ }
1035
+ }
1036
+ return true;
1037
+ }
1038
+
1039
+ /***** Parsers for various types *****/
1040
+
1041
+ bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
1042
+ // We fail if somebody asked us to store into a non-NULL void* pointer
1043
+ return (dest == NULL);
1044
+ }
1045
+
1046
+ bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
1047
+ if (dest == NULL) return true;
1048
+ reinterpret_cast<std::string*>(dest)->assign(str, n);
1049
+ return true;
1050
+ }
1051
+
1052
+ bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
1053
+ if (dest == NULL) return true;
1054
+ *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
1055
+ return true;
1056
+ }
1057
+
1058
+ bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
1059
+ if (n != 1) return false;
1060
+ if (dest == NULL) return true;
1061
+ *(reinterpret_cast<char*>(dest)) = str[0];
1062
+ return true;
1063
+ }
1064
+
1065
+ bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
1066
+ if (n != 1) return false;
1067
+ if (dest == NULL) return true;
1068
+ *(reinterpret_cast<signed char*>(dest)) = str[0];
1069
+ return true;
1070
+ }
1071
+
1072
+ bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
1073
+ if (n != 1) return false;
1074
+ if (dest == NULL) return true;
1075
+ *(reinterpret_cast<unsigned char*>(dest)) = str[0];
1076
+ return true;
1077
+ }
1078
+
1079
+ // Largest number spec that we are willing to parse
1080
+ static const int kMaxNumberLength = 32;
1081
+
1082
+ // REQUIRES "buf" must have length at least nbuf.
1083
+ // Copies "str" into "buf" and null-terminates.
1084
+ // Overwrites *np with the new length.
1085
+ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
1086
+ size_t* np, bool accept_spaces) {
1087
+ size_t n = *np;
1088
+ if (n == 0) return "";
1089
+ if (n > 0 && isspace(*str)) {
1090
+ // We are less forgiving than the strtoxxx() routines and do not
1091
+ // allow leading spaces. We do allow leading spaces for floats.
1092
+ if (!accept_spaces) {
1093
+ return "";
1094
+ }
1095
+ while (n > 0 && isspace(*str)) {
1096
+ n--;
1097
+ str++;
1098
+ }
1099
+ }
1100
+
1101
+ // Although buf has a fixed maximum size, we can still handle
1102
+ // arbitrarily large integers correctly by omitting leading zeros.
1103
+ // (Numbers that are still too long will be out of range.)
1104
+ // Before deciding whether str is too long,
1105
+ // remove leading zeros with s/000+/00/.
1106
+ // Leaving the leading two zeros in place means that
1107
+ // we don't change 0000x123 (invalid) into 0x123 (valid).
1108
+ // Skip over leading - before replacing.
1109
+ bool neg = false;
1110
+ if (n >= 1 && str[0] == '-') {
1111
+ neg = true;
1112
+ n--;
1113
+ str++;
1114
+ }
1115
+
1116
+ if (n >= 3 && str[0] == '0' && str[1] == '0') {
1117
+ while (n >= 3 && str[2] == '0') {
1118
+ n--;
1119
+ str++;
1120
+ }
1121
+ }
1122
+
1123
+ if (neg) { // make room in buf for -
1124
+ n++;
1125
+ str--;
1126
+ }
1127
+
1128
+ if (n > nbuf-1) return "";
1129
+
1130
+ memmove(buf, str, n);
1131
+ if (neg) {
1132
+ buf[0] = '-';
1133
+ }
1134
+ buf[n] = '\0';
1135
+ *np = n;
1136
+ return buf;
1137
+ }
1138
+
1139
+ bool RE2::Arg::parse_long_radix(const char* str,
1140
+ size_t n,
1141
+ void* dest,
1142
+ int radix) {
1143
+ if (n == 0) return false;
1144
+ char buf[kMaxNumberLength+1];
1145
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
1146
+ char* end;
1147
+ errno = 0;
1148
+ long r = strtol(str, &end, radix);
1149
+ if (end != str + n) return false; // Leftover junk
1150
+ if (errno) return false;
1151
+ if (dest == NULL) return true;
1152
+ *(reinterpret_cast<long*>(dest)) = r;
1153
+ return true;
1154
+ }
1155
+
1156
+ bool RE2::Arg::parse_ulong_radix(const char* str,
1157
+ size_t n,
1158
+ void* dest,
1159
+ int radix) {
1160
+ if (n == 0) return false;
1161
+ char buf[kMaxNumberLength+1];
1162
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
1163
+ if (str[0] == '-') {
1164
+ // strtoul() will silently accept negative numbers and parse
1165
+ // them. This module is more strict and treats them as errors.
1166
+ return false;
1167
+ }
1168
+
1169
+ char* end;
1170
+ errno = 0;
1171
+ unsigned long r = strtoul(str, &end, radix);
1172
+ if (end != str + n) return false; // Leftover junk
1173
+ if (errno) return false;
1174
+ if (dest == NULL) return true;
1175
+ *(reinterpret_cast<unsigned long*>(dest)) = r;
1176
+ return true;
1177
+ }
1178
+
1179
+ bool RE2::Arg::parse_short_radix(const char* str,
1180
+ size_t n,
1181
+ void* dest,
1182
+ int radix) {
1183
+ long r;
1184
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
1185
+ if ((short)r != r) return false; // Out of range
1186
+ if (dest == NULL) return true;
1187
+ *(reinterpret_cast<short*>(dest)) = (short)r;
1188
+ return true;
1189
+ }
1190
+
1191
+ bool RE2::Arg::parse_ushort_radix(const char* str,
1192
+ size_t n,
1193
+ void* dest,
1194
+ int radix) {
1195
+ unsigned long r;
1196
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
1197
+ if ((unsigned short)r != r) return false; // Out of range
1198
+ if (dest == NULL) return true;
1199
+ *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
1200
+ return true;
1201
+ }
1202
+
1203
+ bool RE2::Arg::parse_int_radix(const char* str,
1204
+ size_t n,
1205
+ void* dest,
1206
+ int radix) {
1207
+ long r;
1208
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
1209
+ if ((int)r != r) return false; // Out of range
1210
+ if (dest == NULL) return true;
1211
+ *(reinterpret_cast<int*>(dest)) = (int)r;
1212
+ return true;
1213
+ }
1214
+
1215
+ bool RE2::Arg::parse_uint_radix(const char* str,
1216
+ size_t n,
1217
+ void* dest,
1218
+ int radix) {
1219
+ unsigned long r;
1220
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
1221
+ if ((unsigned int)r != r) return false; // Out of range
1222
+ if (dest == NULL) return true;
1223
+ *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
1224
+ return true;
1225
+ }
1226
+
1227
+ bool RE2::Arg::parse_longlong_radix(const char* str,
1228
+ size_t n,
1229
+ void* dest,
1230
+ int radix) {
1231
+ if (n == 0) return false;
1232
+ char buf[kMaxNumberLength+1];
1233
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
1234
+ char* end;
1235
+ errno = 0;
1236
+ long long r = strtoll(str, &end, radix);
1237
+ if (end != str + n) return false; // Leftover junk
1238
+ if (errno) return false;
1239
+ if (dest == NULL) return true;
1240
+ *(reinterpret_cast<long long*>(dest)) = r;
1241
+ return true;
1242
+ }
1243
+
1244
+ bool RE2::Arg::parse_ulonglong_radix(const char* str,
1245
+ size_t n,
1246
+ void* dest,
1247
+ int radix) {
1248
+ if (n == 0) return false;
1249
+ char buf[kMaxNumberLength+1];
1250
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
1251
+ if (str[0] == '-') {
1252
+ // strtoull() will silently accept negative numbers and parse
1253
+ // them. This module is more strict and treats them as errors.
1254
+ return false;
1255
+ }
1256
+ char* end;
1257
+ errno = 0;
1258
+ unsigned long long r = strtoull(str, &end, radix);
1259
+ if (end != str + n) return false; // Leftover junk
1260
+ if (errno) return false;
1261
+ if (dest == NULL) return true;
1262
+ *(reinterpret_cast<unsigned long long*>(dest)) = r;
1263
+ return true;
1264
+ }
1265
+
1266
+ static bool parse_double_float(const char* str, size_t n, bool isfloat,
1267
+ void* dest) {
1268
+ if (n == 0) return false;
1269
+ static const int kMaxLength = 200;
1270
+ char buf[kMaxLength+1];
1271
+ str = TerminateNumber(buf, sizeof buf, str, &n, true);
1272
+ char* end;
1273
+ errno = 0;
1274
+ double r;
1275
+ if (isfloat) {
1276
+ r = strtof(str, &end);
1277
+ } else {
1278
+ r = strtod(str, &end);
1279
+ }
1280
+ if (end != str + n) return false; // Leftover junk
1281
+ if (errno) return false;
1282
+ if (dest == NULL) return true;
1283
+ if (isfloat) {
1284
+ *(reinterpret_cast<float*>(dest)) = (float)r;
1285
+ } else {
1286
+ *(reinterpret_cast<double*>(dest)) = r;
1287
+ }
1288
+ return true;
1289
+ }
1290
+
1291
+ bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
1292
+ return parse_double_float(str, n, false, dest);
1293
+ }
1294
+
1295
+ bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
1296
+ return parse_double_float(str, n, true, dest);
1297
+ }
1298
+
1299
+ #define DEFINE_INTEGER_PARSER(name) \
1300
+ bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) { \
1301
+ return parse_##name##_radix(str, n, dest, 10); \
1302
+ } \
1303
+ bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
1304
+ return parse_##name##_radix(str, n, dest, 16); \
1305
+ } \
1306
+ bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
1307
+ return parse_##name##_radix(str, n, dest, 8); \
1308
+ } \
1309
+ bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n, \
1310
+ void* dest) { \
1311
+ return parse_##name##_radix(str, n, dest, 0); \
1312
+ }
1313
+
1314
+ DEFINE_INTEGER_PARSER(short)
1315
+ DEFINE_INTEGER_PARSER(ushort)
1316
+ DEFINE_INTEGER_PARSER(int)
1317
+ DEFINE_INTEGER_PARSER(uint)
1318
+ DEFINE_INTEGER_PARSER(long)
1319
+ DEFINE_INTEGER_PARSER(ulong)
1320
+ DEFINE_INTEGER_PARSER(longlong)
1321
+ DEFINE_INTEGER_PARSER(ulonglong)
1322
+
1323
+ #undef DEFINE_INTEGER_PARSER
1324
+
1325
+ namespace hooks {
1326
+
1327
+ #ifdef RE2_HAVE_THREAD_LOCAL
1328
+ thread_local const RE2* context = NULL;
1329
+ #endif
1330
+
1331
+ template <typename T>
1332
+ union Hook {
1333
+ void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
1334
+ T* Load() const { return cb_.load(std::memory_order_acquire); }
1335
+
1336
+ #if !defined(__clang__) && defined(_MSC_VER)
1337
+ // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
1338
+ // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
1339
+ static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
1340
+ "std::atomic<T*> must be always lock-free");
1341
+ T* cb_for_constinit_;
1342
+ #endif
1343
+
1344
+ std::atomic<T*> cb_;
1345
+ };
1346
+
1347
+ template <typename T>
1348
+ static void DoNothing(const T&) {}
1349
+
1350
+ #define DEFINE_HOOK(type, name) \
1351
+ static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
1352
+ void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
1353
+ type##Callback* Get##type##Hook() { return name##_hook.Load(); }
1354
+
1355
+ DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
1356
+ DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
1357
+
1358
+ #undef DEFINE_HOOK
1359
+
1360
+ } // namespace hooks
1361
+
1362
+ } // namespace re2