rbs 2.8.4 → 3.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (434) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +12 -4
  3. data/.github/workflows/comments.yml +11 -11
  4. data/.github/workflows/dependabot.yml +30 -0
  5. data/.github/workflows/ruby.yml +40 -49
  6. data/.github/workflows/typecheck.yml +36 -0
  7. data/.github/workflows/windows.yml +28 -0
  8. data/.gitignore +1 -0
  9. data/.rubocop.yml +42 -2
  10. data/CHANGELOG.md +845 -1
  11. data/README.md +64 -4
  12. data/Rakefile +198 -18
  13. data/Steepfile +11 -11
  14. data/config.yml +311 -0
  15. data/core/array.rbs +2189 -1914
  16. data/core/basic_object.rbs +59 -84
  17. data/core/binding.rbs +7 -69
  18. data/core/builtin.rbs +210 -11
  19. data/core/class.rbs +37 -0
  20. data/core/comparable.rbs +23 -25
  21. data/core/complex.rbs +449 -227
  22. data/core/constants.rbs +29 -21
  23. data/core/data.rbs +415 -0
  24. data/core/dir.rbs +698 -415
  25. data/core/encoding.rbs +468 -843
  26. data/core/enumerable.rbs +495 -455
  27. data/core/enumerator/product.rbs +92 -0
  28. data/core/enumerator.rbs +106 -9
  29. data/core/env.rbs +1 -1
  30. data/core/errno.rbs +506 -605
  31. data/core/errors.rbs +15 -17
  32. data/core/exception.rbs +361 -145
  33. data/core/false_class.rbs +39 -26
  34. data/core/fiber.rbs +121 -14
  35. data/core/file.rbs +1262 -320
  36. data/core/file_test.rbs +62 -45
  37. data/core/float.rbs +187 -208
  38. data/core/gc.rbs +446 -196
  39. data/core/global_variables.rbs +29 -29
  40. data/core/hash.rbs +242 -349
  41. data/core/integer.rbs +246 -308
  42. data/core/io/buffer.rbs +373 -122
  43. data/core/io/wait.rbs +29 -17
  44. data/core/io.rbs +1881 -1518
  45. data/core/kernel.rbs +2116 -1538
  46. data/core/marshal.rbs +24 -14
  47. data/core/match_data.rbs +413 -166
  48. data/core/math.rbs +531 -291
  49. data/core/method.rbs +101 -32
  50. data/core/module.rbs +228 -64
  51. data/core/nil_class.rbs +106 -47
  52. data/core/numeric.rbs +206 -292
  53. data/core/object.rbs +73 -1168
  54. data/core/object_space/weak_key_map.rbs +166 -0
  55. data/core/object_space.rbs +5 -3
  56. data/core/proc.rbs +280 -39
  57. data/core/process.rbs +1318 -658
  58. data/core/ractor.rbs +200 -134
  59. data/core/random.rbs +21 -4
  60. data/core/range.rbs +309 -153
  61. data/core/rational.rbs +4 -12
  62. data/core/rb_config.rbs +64 -43
  63. data/core/rbs/unnamed/argf.rbs +411 -147
  64. data/core/rbs/unnamed/env_class.rbs +137 -253
  65. data/core/rbs/unnamed/random.rbs +49 -26
  66. data/core/refinement.rbs +16 -1
  67. data/core/regexp.rbs +1568 -862
  68. data/core/ruby_vm.rbs +719 -7
  69. data/core/rubygems/config_file.rbs +3 -0
  70. data/core/rubygems/errors.rbs +69 -6
  71. data/core/rubygems/rubygems.rbs +71 -17
  72. data/core/rubygems/version.rbs +11 -7
  73. data/{stdlib/set/0 → core}/set.rbs +80 -91
  74. data/core/signal.rbs +14 -8
  75. data/core/string.rbs +1732 -1607
  76. data/core/struct.rbs +467 -95
  77. data/core/symbol.rbs +215 -245
  78. data/core/thread.rbs +133 -89
  79. data/core/thread_group.rbs +9 -9
  80. data/core/time.rbs +1141 -841
  81. data/core/trace_point.rbs +181 -121
  82. data/core/true_class.rbs +58 -32
  83. data/core/unbound_method.rbs +103 -30
  84. data/core/warning.rbs +50 -5
  85. data/docs/CONTRIBUTING.md +1 -1
  86. data/docs/architecture.md +110 -0
  87. data/docs/collection.md +59 -5
  88. data/docs/data_and_struct.md +86 -0
  89. data/docs/gem.md +57 -0
  90. data/docs/rbs_by_example.md +16 -35
  91. data/docs/repo.md +1 -1
  92. data/docs/sigs.md +7 -7
  93. data/docs/stdlib.md +63 -5
  94. data/docs/syntax.md +255 -61
  95. data/docs/tools.md +1 -0
  96. data/ext/rbs_extension/extconf.rb +10 -0
  97. data/ext/rbs_extension/lexer.c +1741 -1548
  98. data/ext/rbs_extension/lexer.h +11 -1
  99. data/ext/rbs_extension/lexer.re +12 -6
  100. data/ext/rbs_extension/lexstate.c +26 -3
  101. data/ext/rbs_extension/location.c +119 -111
  102. data/ext/rbs_extension/location.h +32 -7
  103. data/ext/rbs_extension/main.c +3 -0
  104. data/ext/rbs_extension/parser.c +883 -481
  105. data/ext/rbs_extension/parserstate.c +65 -25
  106. data/ext/rbs_extension/parserstate.h +13 -3
  107. data/ext/rbs_extension/rbs_extension.h +1 -10
  108. data/ext/rbs_extension/unescape.c +7 -47
  109. data/goodcheck.yml +2 -2
  110. data/{ext/rbs_extension → include/rbs}/constants.h +26 -15
  111. data/include/rbs/ruby_objs.h +72 -0
  112. data/include/rbs.h +7 -0
  113. data/lib/rbs/annotate/annotations.rb +3 -3
  114. data/lib/rbs/annotate/formatter.rb +13 -3
  115. data/lib/rbs/annotate/rdoc_annotator.rb +1 -1
  116. data/lib/rbs/annotate/rdoc_source.rb +12 -3
  117. data/lib/rbs/ast/declarations.rb +85 -2
  118. data/lib/rbs/ast/directives.rb +39 -0
  119. data/lib/rbs/ast/members.rb +49 -15
  120. data/lib/rbs/ast/type_param.rb +104 -15
  121. data/lib/rbs/ast/visitor.rb +137 -0
  122. data/lib/rbs/buffer.rb +5 -0
  123. data/lib/rbs/cli/colored_io.rb +48 -0
  124. data/lib/rbs/cli/diff.rb +83 -0
  125. data/lib/rbs/cli/validate.rb +356 -0
  126. data/lib/rbs/cli.rb +253 -143
  127. data/lib/rbs/collection/cleaner.rb +8 -1
  128. data/lib/rbs/collection/config/lockfile.rb +92 -0
  129. data/lib/rbs/collection/config/lockfile_generator.rb +154 -65
  130. data/lib/rbs/collection/config.rb +19 -46
  131. data/lib/rbs/collection/installer.rb +12 -13
  132. data/lib/rbs/collection/sources/base.rb +2 -2
  133. data/lib/rbs/collection/sources/git.rb +146 -69
  134. data/lib/rbs/collection/sources/local.rb +81 -0
  135. data/lib/rbs/collection/sources/rubygems.rb +10 -12
  136. data/lib/rbs/collection/sources/stdlib.rb +14 -13
  137. data/lib/rbs/collection/sources.rb +15 -2
  138. data/lib/rbs/collection.rb +2 -1
  139. data/lib/rbs/definition.rb +13 -16
  140. data/lib/rbs/definition_builder/ancestor_builder.rb +100 -24
  141. data/lib/rbs/definition_builder/method_builder.rb +4 -4
  142. data/lib/rbs/definition_builder.rb +489 -584
  143. data/lib/rbs/diff.rb +125 -0
  144. data/lib/rbs/environment/use_map.rb +77 -0
  145. data/lib/rbs/environment.rb +406 -105
  146. data/lib/rbs/environment_loader.rb +48 -44
  147. data/lib/rbs/environment_walker.rb +1 -1
  148. data/lib/rbs/errors.rb +175 -56
  149. data/lib/rbs/file_finder.rb +28 -0
  150. data/lib/rbs/location_aux.rb +8 -7
  151. data/lib/rbs/locator.rb +37 -15
  152. data/lib/rbs/method_type.rb +23 -0
  153. data/lib/rbs/namespace.rb +1 -0
  154. data/lib/rbs/parser/lex_result.rb +15 -0
  155. data/lib/rbs/parser/token.rb +23 -0
  156. data/lib/rbs/parser_aux.rb +22 -13
  157. data/lib/rbs/prototype/helpers.rb +48 -22
  158. data/lib/rbs/prototype/node_usage.rb +99 -0
  159. data/lib/rbs/prototype/rb.rb +125 -31
  160. data/lib/rbs/prototype/rbi.rb +49 -36
  161. data/lib/rbs/prototype/runtime/helpers.rb +59 -0
  162. data/lib/rbs/prototype/runtime/reflection.rb +19 -0
  163. data/lib/rbs/prototype/runtime/value_object_generator.rb +279 -0
  164. data/lib/rbs/prototype/runtime.rb +273 -159
  165. data/lib/rbs/resolver/constant_resolver.rb +24 -8
  166. data/lib/rbs/resolver/type_name_resolver.rb +41 -7
  167. data/lib/rbs/sorter.rb +153 -123
  168. data/lib/rbs/substitution.rb +19 -0
  169. data/lib/rbs/subtractor.rb +201 -0
  170. data/lib/rbs/test/errors.rb +24 -11
  171. data/lib/rbs/test/guaranteed.rb +30 -0
  172. data/lib/rbs/test/hook.rb +45 -40
  173. data/lib/rbs/test/setup.rb +1 -1
  174. data/lib/rbs/test/tester.rb +1 -1
  175. data/lib/rbs/test/type_check.rb +120 -23
  176. data/lib/rbs/test.rb +6 -3
  177. data/lib/rbs/type_alias_dependency.rb +13 -3
  178. data/lib/rbs/type_alias_regularity.rb +21 -14
  179. data/lib/rbs/type_name.rb +18 -13
  180. data/lib/rbs/types.rb +352 -18
  181. data/lib/rbs/unit_test/convertibles.rb +176 -0
  182. data/lib/rbs/unit_test/spy.rb +136 -0
  183. data/lib/rbs/unit_test/type_assertions.rb +341 -0
  184. data/lib/rbs/unit_test/with_aliases.rb +143 -0
  185. data/lib/rbs/unit_test.rb +6 -0
  186. data/lib/rbs/validator.rb +55 -30
  187. data/lib/rbs/variance_calculator.rb +26 -23
  188. data/lib/rbs/vendorer.rb +3 -3
  189. data/lib/rbs/version.rb +1 -1
  190. data/lib/rbs/writer.rb +69 -22
  191. data/lib/rbs.rb +7 -2
  192. data/lib/rdoc/discover.rb +1 -1
  193. data/lib/rdoc_plugin/parser.rb +5 -5
  194. data/rbs.gemspec +12 -2
  195. data/schema/decls.json +1 -1
  196. data/schema/members.json +15 -10
  197. data/sig/ancestor_builder.rbs +4 -0
  198. data/sig/ancestor_graph.rbs +22 -2
  199. data/sig/annotate/formatter.rbs +2 -2
  200. data/sig/annotate/rdoc_annotater.rbs +1 -1
  201. data/sig/cli/colored_io.rbs +15 -0
  202. data/sig/cli/diff.rbs +21 -0
  203. data/sig/cli/validate.rbs +43 -0
  204. data/sig/cli.rbs +4 -0
  205. data/sig/collection/config/lockfile.rbs +74 -0
  206. data/sig/collection/config/lockfile_generator.rbs +66 -0
  207. data/sig/collection/config.rbs +5 -48
  208. data/sig/collection/installer.rbs +1 -1
  209. data/sig/collection/sources.rbs +105 -33
  210. data/sig/constant.rbs +1 -1
  211. data/sig/declarations.rbs +42 -3
  212. data/sig/definition.rbs +26 -10
  213. data/sig/definition_builder.rbs +103 -81
  214. data/sig/diff.rbs +28 -0
  215. data/sig/directives.rbs +61 -0
  216. data/sig/environment.rbs +175 -29
  217. data/sig/environment_loader.rbs +20 -18
  218. data/sig/errors.rbs +123 -2
  219. data/sig/file_finder.rbs +28 -0
  220. data/sig/location.rbs +0 -3
  221. data/sig/locator.rbs +14 -2
  222. data/sig/manifest.yaml +0 -1
  223. data/sig/members.rbs +32 -9
  224. data/sig/method_types.rbs +10 -4
  225. data/sig/namespace.rbs +2 -3
  226. data/sig/parser.rbs +55 -16
  227. data/sig/prototype/helpers.rbs +4 -0
  228. data/sig/prototype/node_usage.rbs +20 -0
  229. data/sig/prototype/rb.rbs +10 -2
  230. data/sig/prototype/rbi.rbs +2 -0
  231. data/sig/prototype/runtime.rbs +182 -0
  232. data/sig/rbs.rbs +1 -1
  233. data/sig/rdoc/rbs.rbs +4 -0
  234. data/sig/repository.rbs +7 -5
  235. data/sig/resolver/constant_resolver.rbs +3 -4
  236. data/sig/resolver/context.rbs +1 -1
  237. data/sig/resolver/type_name_resolver.rbs +5 -1
  238. data/sig/shims/bundler.rbs +38 -0
  239. data/sig/shims/rubygems.rbs +19 -0
  240. data/sig/sorter.rbs +23 -5
  241. data/sig/substitution.rbs +6 -0
  242. data/sig/subtractor.rbs +37 -0
  243. data/sig/test/errors.rbs +52 -0
  244. data/sig/test/guranteed.rbs +9 -0
  245. data/sig/test/type_check.rbs +19 -0
  246. data/sig/test.rbs +82 -0
  247. data/sig/type_alias_dependency.rbs +31 -0
  248. data/sig/type_alias_regularity.rbs +12 -6
  249. data/sig/type_param.rbs +45 -9
  250. data/sig/typename.rbs +8 -5
  251. data/sig/types.rbs +119 -12
  252. data/sig/unit_test/convertibles.rbs +154 -0
  253. data/sig/unit_test/spy.rbs +28 -0
  254. data/sig/unit_test/type_assertions.rbs +194 -0
  255. data/sig/unit_test/with_aliases.rbs +136 -0
  256. data/sig/use_map.rbs +35 -0
  257. data/sig/validator.rbs +12 -5
  258. data/sig/variance_calculator.rbs +3 -1
  259. data/sig/vendorer.rbs +1 -1
  260. data/sig/visitor.rbs +47 -0
  261. data/sig/writer.rbs +6 -2
  262. data/src/constants.c +153 -0
  263. data/src/ruby_objs.c +793 -0
  264. data/stdlib/base64/0/base64.rbs +298 -45
  265. data/stdlib/benchmark/0/benchmark.rbs +12 -3
  266. data/stdlib/bigdecimal/0/big_decimal.rbs +62 -198
  267. data/stdlib/cgi/0/core.rbs +68 -15
  268. data/stdlib/cgi/0/manifest.yaml +1 -0
  269. data/stdlib/coverage/0/coverage.rbs +50 -11
  270. data/stdlib/csv/0/csv.rbs +90 -119
  271. data/stdlib/csv/0/manifest.yaml +1 -0
  272. data/stdlib/date/0/date.rbs +806 -735
  273. data/stdlib/date/0/date_time.rbs +70 -211
  274. data/stdlib/dbm/0/dbm.rbs +0 -2
  275. data/stdlib/delegate/0/delegator.rbs +184 -0
  276. data/stdlib/delegate/0/kernel.rbs +47 -0
  277. data/stdlib/delegate/0/simple_delegator.rbs +96 -0
  278. data/stdlib/did_you_mean/0/did_you_mean.rbs +3 -8
  279. data/stdlib/digest/0/digest.rbs +48 -35
  280. data/stdlib/erb/0/erb.rbs +15 -39
  281. data/stdlib/etc/0/etc.rbs +174 -54
  282. data/stdlib/fileutils/0/fileutils.rbs +1234 -385
  283. data/stdlib/forwardable/0/forwardable.rbs +4 -4
  284. data/stdlib/io-console/0/io-console.rbs +82 -17
  285. data/stdlib/ipaddr/0/ipaddr.rbs +11 -6
  286. data/stdlib/json/0/json.rbs +434 -151
  287. data/stdlib/kconv/0/kconv.rbs +166 -0
  288. data/stdlib/logger/0/formatter.rbs +0 -2
  289. data/stdlib/logger/0/log_device.rbs +1 -3
  290. data/stdlib/logger/0/logger.rbs +465 -328
  291. data/stdlib/minitest/0/kernel.rbs +2 -2
  292. data/stdlib/minitest/0/minitest/abstract_reporter.rbs +4 -1
  293. data/stdlib/minitest/0/minitest/assertion.rbs +1 -0
  294. data/stdlib/minitest/0/minitest/assertions.rbs +58 -13
  295. data/stdlib/minitest/0/minitest/backtrace_filter.rbs +7 -0
  296. data/stdlib/minitest/0/minitest/bench_spec.rbs +8 -8
  297. data/stdlib/minitest/0/minitest/benchmark.rbs +17 -16
  298. data/stdlib/minitest/0/minitest/compress.rbs +13 -0
  299. data/stdlib/minitest/0/minitest/error_on_warning.rbs +3 -0
  300. data/stdlib/minitest/0/minitest/mock.rbs +9 -5
  301. data/stdlib/minitest/0/minitest/parallel/executor.rbs +4 -0
  302. data/stdlib/minitest/0/minitest/parallel/test/class_methods.rbs +0 -1
  303. data/stdlib/minitest/0/minitest/pride_io.rbs +8 -0
  304. data/stdlib/minitest/0/minitest/pride_lol.rbs +2 -0
  305. data/stdlib/minitest/0/minitest/progress_reporter.rbs +1 -1
  306. data/stdlib/minitest/0/minitest/reportable.rbs +2 -0
  307. data/stdlib/minitest/0/minitest/runnable.rbs +33 -1
  308. data/stdlib/minitest/0/minitest/spec/dsl/instance_methods.rbs +1 -1
  309. data/stdlib/minitest/0/minitest/spec/dsl.rbs +10 -6
  310. data/stdlib/minitest/0/minitest/spec.rbs +1 -1
  311. data/stdlib/minitest/0/minitest/statistics_reporter.rbs +5 -0
  312. data/stdlib/minitest/0/minitest/summary_reporter.rbs +0 -7
  313. data/stdlib/minitest/0/minitest/test/lifecycle_hooks.rbs +7 -7
  314. data/stdlib/minitest/0/minitest/test.rbs +7 -14
  315. data/stdlib/minitest/0/minitest/unexpected_error.rbs +2 -0
  316. data/stdlib/minitest/0/minitest/unexpected_warning.rbs +6 -0
  317. data/stdlib/minitest/0/minitest/unit.rbs +1 -2
  318. data/stdlib/minitest/0/minitest.rbs +41 -892
  319. data/stdlib/monitor/0/monitor.rbs +91 -10
  320. data/stdlib/mutex_m/0/mutex_m.rbs +0 -2
  321. data/stdlib/net-http/0/manifest.yaml +1 -1
  322. data/stdlib/net-http/0/net-http.rbs +3858 -964
  323. data/stdlib/net-protocol/0/manifest.yaml +2 -0
  324. data/stdlib/net-protocol/0/net-protocol.rbs +56 -0
  325. data/stdlib/net-smtp/0/manifest.yaml +2 -0
  326. data/stdlib/net-smtp/0/net-smtp.rbs +55 -0
  327. data/stdlib/nkf/0/nkf.rbs +35 -5
  328. data/stdlib/objspace/0/objspace.rbs +40 -18
  329. data/stdlib/observable/0/observable.rbs +217 -0
  330. data/stdlib/open-uri/0/manifest.yaml +4 -0
  331. data/stdlib/open-uri/0/open-uri.rbs +393 -0
  332. data/stdlib/open3/0/open3.rbs +147 -0
  333. data/stdlib/openssl/0/manifest.yaml +1 -0
  334. data/stdlib/openssl/0/openssl.rbs +681 -316
  335. data/stdlib/optparse/0/optparse.rbs +100 -65
  336. data/stdlib/pathname/0/pathname.rbs +24 -15
  337. data/stdlib/pp/0/manifest.yaml +2 -0
  338. data/stdlib/pp/0/pp.rbs +300 -0
  339. data/stdlib/prettyprint/0/prettyprint.rbs +2 -6
  340. data/stdlib/pstore/0/pstore.rbs +370 -156
  341. data/stdlib/psych/0/core_ext.rbs +12 -0
  342. data/stdlib/{yaml → psych}/0/dbm.rbs +3 -3
  343. data/stdlib/psych/0/manifest.yaml +3 -0
  344. data/stdlib/psych/0/psych.rbs +402 -0
  345. data/stdlib/{yaml → psych}/0/store.rbs +2 -2
  346. data/stdlib/pty/0/pty.rbs +63 -11
  347. data/stdlib/rdoc/0/code_object.rbs +51 -0
  348. data/stdlib/rdoc/0/comment.rbs +59 -0
  349. data/stdlib/rdoc/0/context.rbs +153 -0
  350. data/stdlib/rdoc/0/markup.rbs +117 -0
  351. data/stdlib/rdoc/0/parser.rbs +56 -0
  352. data/stdlib/rdoc/0/rdoc.rbs +13 -380
  353. data/stdlib/rdoc/0/ri.rbs +17 -0
  354. data/stdlib/rdoc/0/store.rbs +48 -0
  355. data/stdlib/rdoc/0/top_level.rbs +97 -0
  356. data/stdlib/resolv/0/resolv.rbs +16 -79
  357. data/stdlib/ripper/0/ripper.rbs +1648 -0
  358. data/stdlib/securerandom/0/securerandom.rbs +7 -2
  359. data/stdlib/shellwords/0/shellwords.rbs +11 -12
  360. data/stdlib/singleton/0/singleton.rbs +0 -3
  361. data/stdlib/socket/0/addrinfo.rbs +13 -18
  362. data/stdlib/socket/0/basic_socket.rbs +5 -10
  363. data/stdlib/socket/0/ip_socket.rbs +0 -2
  364. data/stdlib/socket/0/socket.rbs +77 -46
  365. data/stdlib/socket/0/tcp_server.rbs +0 -5
  366. data/stdlib/socket/0/tcp_socket.rbs +36 -3
  367. data/stdlib/socket/0/udp_socket.rbs +4 -5
  368. data/stdlib/socket/0/unix_server.rbs +0 -5
  369. data/stdlib/socket/0/unix_socket.rbs +2 -4
  370. data/{core/string_io.rbs → stdlib/stringio/0/stringio.rbs} +188 -107
  371. data/stdlib/strscan/0/string_scanner.rbs +1269 -425
  372. data/stdlib/tempfile/0/tempfile.rbs +224 -61
  373. data/stdlib/time/0/time.rbs +48 -35
  374. data/stdlib/timeout/0/timeout.rbs +17 -8
  375. data/stdlib/tmpdir/0/tmpdir.rbs +10 -3
  376. data/stdlib/tsort/0/tsort.rbs +0 -4
  377. data/stdlib/uri/0/common.rbs +271 -144
  378. data/stdlib/uri/0/file.rbs +5 -0
  379. data/stdlib/uri/0/ftp.rbs +1 -1
  380. data/stdlib/uri/0/generic.rbs +26 -22
  381. data/stdlib/uri/0/http.rbs +4 -4
  382. data/stdlib/uri/0/ldap.rbs +1 -1
  383. data/stdlib/uri/0/mailto.rbs +84 -0
  384. data/stdlib/uri/0/rfc2396_parser.rbs +3 -0
  385. data/stdlib/yaml/0/manifest.yaml +1 -2
  386. data/stdlib/yaml/0/yaml.rbs +1 -199
  387. data/stdlib/zlib/0/buf_error.rbs +10 -0
  388. data/stdlib/zlib/0/data_error.rbs +10 -0
  389. data/stdlib/zlib/0/deflate.rbs +210 -0
  390. data/stdlib/zlib/0/error.rbs +20 -0
  391. data/stdlib/zlib/0/gzip_file/crc_error.rbs +12 -0
  392. data/stdlib/zlib/0/gzip_file/error.rbs +23 -0
  393. data/stdlib/zlib/0/gzip_file/length_error.rbs +12 -0
  394. data/stdlib/zlib/0/gzip_file/no_footer.rbs +11 -0
  395. data/stdlib/zlib/0/gzip_file.rbs +156 -0
  396. data/stdlib/zlib/0/gzip_reader.rbs +293 -0
  397. data/stdlib/zlib/0/gzip_writer.rbs +166 -0
  398. data/stdlib/zlib/0/inflate.rbs +180 -0
  399. data/stdlib/zlib/0/mem_error.rbs +10 -0
  400. data/stdlib/zlib/0/need_dict.rbs +13 -0
  401. data/stdlib/zlib/0/stream_end.rbs +11 -0
  402. data/stdlib/zlib/0/stream_error.rbs +11 -0
  403. data/stdlib/zlib/0/version_error.rbs +11 -0
  404. data/stdlib/zlib/0/zlib.rbs +1 -3
  405. data/stdlib/zlib/0/zstream.rbs +200 -0
  406. data/templates/include/rbs/constants.h.erb +20 -0
  407. data/templates/include/rbs/ruby_objs.h.erb +10 -0
  408. data/templates/src/constants.c.erb +36 -0
  409. data/templates/src/ruby_objs.c.erb +27 -0
  410. data/templates/template.rb +122 -0
  411. metadata +136 -36
  412. data/Gemfile +0 -33
  413. data/Gemfile.lock +0 -118
  414. data/core/deprecated.rbs +0 -9
  415. data/ext/rbs_extension/constants.c +0 -135
  416. data/ext/rbs_extension/ruby_objs.c +0 -525
  417. data/ext/rbs_extension/ruby_objs.h +0 -43
  418. data/lib/rbs/constant_table.rb +0 -167
  419. data/lib/rbs/parser_compat/lexer_error.rb +0 -6
  420. data/lib/rbs/parser_compat/located_value.rb +0 -7
  421. data/lib/rbs/parser_compat/semantics_error.rb +0 -6
  422. data/lib/rbs/parser_compat/syntax_error.rb +0 -6
  423. data/lib/rbs/test/spy.rb +0 -6
  424. data/lib/rbs/type_name_resolver.rb +0 -67
  425. data/sig/constant_table.rbs +0 -30
  426. data/sig/shims/abstract_syntax_tree.rbs +0 -25
  427. data/sig/shims/pp.rbs +0 -3
  428. data/sig/shims/ripper.rbs +0 -8
  429. data/sig/shims.rbs +0 -69
  430. data/sig/type_name_resolver.rbs +0 -26
  431. data/stdlib/minitest/0/manifest.yaml +0 -2
  432. data/stdlib/prime/0/integer-extension.rbs +0 -41
  433. data/stdlib/prime/0/manifest.yaml +0 -2
  434. data/stdlib/prime/0/prime.rbs +0 -372
data/core/regexp.rbs CHANGED
@@ -1,115 +1,265 @@
1
1
  # <!-- rdoc-file=re.c -->
2
- # A Regexp holds a regular expression, used to match a pattern against strings.
3
- # Regexps are created using the `/.../` and `%r{...}` literals, and by the
4
- # Regexp::new constructor.
2
+ # A [regular expression](https://en.wikipedia.org/wiki/Regular_expression) (also
3
+ # called a *regexp*) is a *match pattern* (also simply called a *pattern*).
5
4
  #
6
- # You can create a Regexp object explicitly with:
5
+ # A common notation for a regexp uses enclosing slash characters:
7
6
  #
8
- # * A [regexp literal](doc/syntax/literals_rdoc.html#label-Regexp+Literals).
7
+ # /foo/
9
8
  #
9
+ # A regexp may be applied to a *target string*; The part of the string (if any)
10
+ # that matches the pattern is called a *match*, and may be said *to match*:
10
11
  #
11
- # Regular expressions (*regexp*s) are patterns which describe the contents of a
12
- # string. They're used for testing whether a string contains a given pattern, or
13
- # extracting the portions that match. They are created with the `/`*pat*`/` and
14
- # `%r{`*pat*`}` literals or the `Regexp.new` constructor.
12
+ # re = /red/
13
+ # re.match?('redirect') # => true # Match at beginning of target.
14
+ # re.match?('bored') # => true # Match at end of target.
15
+ # re.match?('credit') # => true # Match within target.
16
+ # re.match?('foo') # => false # No match.
15
17
  #
16
- # A regexp is usually delimited with forward slashes (`/`). For example:
18
+ # ## Regexp Uses
17
19
  #
18
- # /hay/ =~ 'haystack' #=> 0
19
- # /y/.match('haystack') #=> #<MatchData "y">
20
+ # A regexp may be used:
20
21
  #
21
- # If a string contains the pattern it is said to *match*. A literal string
22
- # matches itself.
22
+ # * To extract substrings based on a given pattern:
23
23
  #
24
- # Here 'haystack' does not contain the pattern 'needle', so it doesn't match:
24
+ # re = /foo/ # => /foo/
25
+ # re.match('food') # => #<MatchData "foo">
26
+ # re.match('good') # => nil
25
27
  #
26
- # /needle/.match('haystack') #=> nil
28
+ # See sections [Method match](rdoc-ref:Regexp@Method+match) and [Operator
29
+ # =~](rdoc-ref:Regexp@Operator+-3D~).
27
30
  #
28
- # Here 'haystack' contains the pattern 'hay', so it matches:
31
+ # * To determine whether a string matches a given pattern:
29
32
  #
30
- # /hay/.match('haystack') #=> #<MatchData "hay">
33
+ # re.match?('food') # => true
34
+ # re.match?('good') # => false
31
35
  #
32
- # Specifically, `/st/` requires that the string contains the letter *s* followed
33
- # by the letter *t*, so it matches *haystack*, also.
36
+ # See section [Method match?](rdoc-ref:Regexp@Method+match-3F).
34
37
  #
35
- # ## `=~` and Regexp#match
38
+ # * As an argument for calls to certain methods in other classes and modules;
39
+ # most such methods accept an argument that may be either a string or the
40
+ # (much more powerful) regexp.
36
41
  #
37
- # Pattern matching may be achieved by using `=~` operator or Regexp#match
38
- # method.
42
+ # See [Regexp Methods](rdoc-ref:regexp/methods.rdoc).
39
43
  #
40
- # ### `=~` operator
44
+ # ## Regexp Objects
41
45
  #
42
- # `=~` is Ruby's basic pattern-matching operator. When one operand is a regular
43
- # expression and the other is a string then the regular expression is used as a
44
- # pattern to match against the string. (This operator is equivalently defined
45
- # by Regexp and String so the order of String and Regexp do not matter. Other
46
- # classes may have different implementations of `=~`.) If a match is found, the
47
- # operator returns index of first match in string, otherwise it returns `nil`.
46
+ # A regexp object has:
48
47
  #
49
- # /hay/ =~ 'haystack' #=> 0
50
- # 'haystack' =~ /hay/ #=> 0
51
- # /a/ =~ 'haystack' #=> 1
52
- # /u/ =~ 'haystack' #=> nil
48
+ # * A source; see [Sources](rdoc-ref:Regexp@Sources).
53
49
  #
54
- # Using `=~` operator with a String and Regexp the `$~` global variable is set
55
- # after a successful match. `$~` holds a MatchData object. Regexp.last_match is
56
- # equivalent to `$~`.
50
+ # * Several modes; see [Modes](rdoc-ref:Regexp@Modes).
57
51
  #
58
- # ### Regexp#match method
52
+ # * A timeout; see [Timeouts](rdoc-ref:Regexp@Timeouts).
59
53
  #
60
- # The #match method returns a MatchData object:
54
+ # * An encoding; see [Encodings](rdoc-ref:Regexp@Encodings).
61
55
  #
62
- # /st/.match('haystack') #=> #<MatchData "st">
56
+ # ## Creating a Regexp
63
57
  #
64
- # ## Metacharacters and Escapes
58
+ # A regular expression may be created with:
65
59
  #
66
- # The following are *metacharacters* `(`, `)`, `[`, `]`, `{`, `}`, `.`, `?`,
67
- # `+`, `*`. They have a specific meaning when appearing in a pattern. To match
68
- # them literally they must be backslash-escaped. To match a backslash literally,
69
- # backslash-escape it: `\\\`.
60
+ # * A regexp literal using slash characters (see [Regexp
61
+ # Literals](rdoc-ref:syntax/literals.rdoc@Regexp+Literals)):
70
62
  #
71
- # /1 \+ 2 = 3\?/.match('Does 1 + 2 = 3?') #=> #<MatchData "1 + 2 = 3?">
72
- # /a\\\\b/.match('a\\\\b') #=> #<MatchData "a\\b">
63
+ # # This is a very common usage.
64
+ # /foo/ # => /foo/
73
65
  #
74
- # Patterns behave like double-quoted strings and can contain the same backslash
75
- # escapes (the meaning of `\s` is different, however, see
76
- # [below](#label-Character+Classes)).
66
+ # * A `%r` regexp literal (see [%r: Regexp
67
+ # Literals](rdoc-ref:syntax/literals.rdoc@25r-3A+Regexp+Literals)):
77
68
  #
78
- # /\s\u{6771 4eac 90fd}/.match("Go to 東京都")
79
- # #=> #<MatchData " 東京都">
69
+ # # Same delimiter character at beginning and end;
70
+ # # useful for avoiding escaping characters
71
+ # %r/name\/value pair/ # => /name\/value pair/
72
+ # %r:name/value pair: # => /name\/value pair/
73
+ # %r|name/value pair| # => /name\/value pair/
80
74
  #
81
- # Arbitrary Ruby expressions can be embedded into patterns with the `#{...}`
82
- # construct.
75
+ # # Certain "paired" characters can be delimiters.
76
+ # %r[foo] # => /foo/
77
+ # %r{foo} # => /foo/
78
+ # %r(foo) # => /foo/
79
+ # %r<foo> # => /foo/
83
80
  #
84
- # place = "東京都"
85
- # /#{place}/.match("Go to 東京都")
86
- # #=> #<MatchData "東京都">
81
+ # * Method Regexp.new.
87
82
  #
88
- # ## Character Classes
83
+ # ## Method `match`
89
84
  #
90
- # A *character class* is delimited with square brackets (`[`, `]`) and lists
91
- # characters that may appear at that point in the match. `/[ab]/` means *a* or
92
- # *b*, as opposed to `/ab/` which means *a* followed by *b*.
85
+ # Each of the methods Regexp#match, String#match, and Symbol#match returns a
86
+ # MatchData object if a match was found, `nil` otherwise; each also sets [global
87
+ # variables](rdoc-ref:Regexp@Global+Variables):
93
88
  #
94
- # /W[aeiou]rd/.match("Word") #=> #<MatchData "Word">
89
+ # 'food'.match(/foo/) # => #<MatchData "foo">
90
+ # 'food'.match(/bar/) # => nil
95
91
  #
96
- # Within a character class the hyphen (`-`) is a metacharacter denoting an
97
- # inclusive range of characters. `[abcd]` is equivalent to `[a-d]`. A range can
98
- # be followed by another range, so `[abcdwxyz]` is equivalent to `[a-dw-z]`. The
99
- # order in which ranges or individual characters appear inside a character class
100
- # is irrelevant.
92
+ # ## Operator `=~`
101
93
  #
102
- # /[0-9a-f]/.match('9f') #=> #<MatchData "9">
103
- # /[9f]/.match('9f') #=> #<MatchData "9">
94
+ # Each of the operators Regexp#=~, String#=~, and Symbol#=~ returns an integer
95
+ # offset if a match was found, `nil` otherwise; each also sets [global
96
+ # variables](rdoc-ref:Regexp@Global+Variables):
104
97
  #
105
- # If the first character of a character class is a caret (`^`) the class is
106
- # inverted: it matches any character *except* those named.
98
+ # /bar/ =~ 'foo bar' # => 4
99
+ # 'foo bar' =~ /bar/ # => 4
100
+ # /baz/ =~ 'foo bar' # => nil
107
101
  #
108
- # /[^a-eg-z]/.match('f') #=> #<MatchData "f">
102
+ # ## Method `match?`
103
+ #
104
+ # Each of the methods Regexp#match?, String#match?, and Symbol#match? returns
105
+ # `true` if a match was found, `false` otherwise; none sets [global
106
+ # variables](rdoc-ref:Regexp@Global+Variables):
107
+ #
108
+ # 'food'.match?(/foo/) # => true
109
+ # 'food'.match?(/bar/) # => false
110
+ #
111
+ # ## Global Variables
112
+ #
113
+ # Certain regexp-oriented methods assign values to global variables:
114
+ #
115
+ # * `#match`: see [Method match](rdoc-ref:Regexp@Method+match).
116
+ # * `#=~`: see [Operator =~](rdoc-ref:Regexp@Operator+-3D~).
117
+ #
118
+ # The affected global variables are:
119
+ #
120
+ # * `$~`: Returns a MatchData object, or `nil`.
121
+ # * `$&`: Returns the matched part of the string, or `nil`.
122
+ # * `$``: Returns the part of the string to the left of the match, or `nil`.
123
+ # * `$'`: Returns the part of the string to the right of the match, or `nil`.
124
+ # * `$+`: Returns the last group matched, or `nil`.
125
+ # * `$1`, `$2`, etc.: Returns the first, second, etc., matched group, or
126
+ # `nil`. Note that `$0` is quite different; it returns the name of the
127
+ # currently executing program.
128
+ #
129
+ # Examples:
130
+ #
131
+ # # Matched string, but no matched groups.
132
+ # 'foo bar bar baz'.match('bar')
133
+ # $~ # => #<MatchData "bar">
134
+ # $& # => "bar"
135
+ # $` # => "foo "
136
+ # $' # => " bar baz"
137
+ # $+ # => nil
138
+ # $1 # => nil
139
+ #
140
+ # # Matched groups.
141
+ # /s(\w{2}).*(c)/.match('haystack')
142
+ # $~ # => #<MatchData "stac" 1:"ta" 2:"c">
143
+ # $& # => "stac"
144
+ # $` # => "hay"
145
+ # $' # => "k"
146
+ # $+ # => "c"
147
+ # $1 # => "ta"
148
+ # $2 # => "c"
149
+ # $3 # => nil
150
+ #
151
+ # # No match.
152
+ # 'foo'.match('bar')
153
+ # $~ # => nil
154
+ # $& # => nil
155
+ # $` # => nil
156
+ # $' # => nil
157
+ # $+ # => nil
158
+ # $1 # => nil
159
+ #
160
+ # Note that Regexp#match?, String#match?, and Symbol#match? do not set global
161
+ # variables.
162
+ #
163
+ # ## Sources
164
+ #
165
+ # As seen above, the simplest regexp uses a literal expression as its source:
166
+ #
167
+ # re = /foo/ # => /foo/
168
+ # re.match('food') # => #<MatchData "foo">
169
+ # re.match('good') # => nil
170
+ #
171
+ # A rich collection of available *subexpressions* gives the regexp great power
172
+ # and flexibility:
173
+ #
174
+ # * [Special characters](rdoc-ref:Regexp@Special+Characters)
175
+ # * [Source literals](rdoc-ref:Regexp@Source+Literals)
176
+ # * [Character classes](rdoc-ref:Regexp@Character+Classes)
177
+ # * [Shorthand character classes](rdoc-ref:Regexp@Shorthand+Character+Classes)
178
+ # * [Anchors](rdoc-ref:Regexp@Anchors)
179
+ # * [Alternation](rdoc-ref:Regexp@Alternation)
180
+ # * [Quantifiers](rdoc-ref:Regexp@Quantifiers)
181
+ # * [Groups and captures](rdoc-ref:Regexp@Groups+and+Captures)
182
+ # * [Unicode](rdoc-ref:Regexp@Unicode)
183
+ # * [POSIX Bracket Expressions](rdoc-ref:Regexp@POSIX+Bracket+Expressions)
184
+ # * [Comments](rdoc-ref:Regexp@Comments)
185
+ #
186
+ # ### Special Characters
187
+ #
188
+ # Regexp special characters, called *metacharacters*, have special meanings in
189
+ # certain contexts; depending on the context, these are sometimes
190
+ # metacharacters:
191
+ #
192
+ # . ? - + * ^ \ | $ ( ) [ ] { }
193
+ #
194
+ # To match a metacharacter literally, backslash-escape it:
195
+ #
196
+ # # Matches one or more 'o' characters.
197
+ # /o+/.match('foo') # => #<MatchData "oo">
198
+ # # Would match 'o+'.
199
+ # /o\+/.match('foo') # => nil
200
+ #
201
+ # To match a backslash literally, backslash-escape it:
202
+ #
203
+ # /\./.match('\.') # => #<MatchData ".">
204
+ # /\\./.match('\.') # => #<MatchData "\\.">
205
+ #
206
+ # Method Regexp.escape returns an escaped string:
207
+ #
208
+ # Regexp.escape('.?-+*^\|$()[]{}')
209
+ # # => "\\.\\?\\-\\+\\*\\^\\\\\\|\\$\\(\\)\\[\\]\\{\\}"
210
+ #
211
+ # ### Source Literals
212
+ #
213
+ # The source literal largely behaves like a double-quoted string; see
214
+ # [Double-Quoted String
215
+ # Literals](rdoc-ref:syntax/literals.rdoc@Double-Quoted+String+Literals).
216
+ #
217
+ # In particular, a source literal may contain interpolated expressions:
218
+ #
219
+ # s = 'foo' # => "foo"
220
+ # /#{s}/ # => /foo/
221
+ # /#{s.capitalize}/ # => /Foo/
222
+ # /#{2 + 2}/ # => /4/
223
+ #
224
+ # There are differences between an ordinary string literal and a source literal;
225
+ # see [Shorthand Character
226
+ # Classes](rdoc-ref:Regexp@Shorthand+Character+Classes).
227
+ #
228
+ # * `\s` in an ordinary string literal is equivalent to a space character; in
229
+ # a source literal, it's shorthand for matching a whitespace character.
230
+ # * In an ordinary string literal, these are (needlessly) escaped characters;
231
+ # in a source literal, they are shorthands for various matching characters:
232
+ #
233
+ # \w \W \d \D \h \H \S \R
234
+ #
235
+ # ### Character Classes
236
+ #
237
+ # A *character class* is delimited by square brackets; it specifies that certain
238
+ # characters match at a given point in the target string:
239
+ #
240
+ # # This character class will match any vowel.
241
+ # re = /B[aeiou]rd/
242
+ # re.match('Bird') # => #<MatchData "Bird">
243
+ # re.match('Bard') # => #<MatchData "Bard">
244
+ # re.match('Byrd') # => nil
245
+ #
246
+ # A character class may contain hyphen characters to specify ranges of
247
+ # characters:
248
+ #
249
+ # # These regexps have the same effect.
250
+ # /[abcdef]/.match('foo') # => #<MatchData "f">
251
+ # /[a-f]/.match('foo') # => #<MatchData "f">
252
+ # /[a-cd-f]/.match('foo') # => #<MatchData "f">
253
+ #
254
+ # When the first character of a character class is a caret (`^`), the sense of
255
+ # the class is inverted: it matches any character *except* those specified.
256
+ #
257
+ # /[^a-eg-z]/.match('f') # => #<MatchData "f">
109
258
  #
110
259
  # A character class may contain another character class. By itself this isn't
111
- # useful because `[a-z[0-9]]` describes the same set as `[a-z0-9]`. However,
112
- # character classes also support the `&&` operator which performs set
260
+ # useful because `[a-z[0-9]]` describes the same set as `[a-z0-9]`.
261
+ #
262
+ # However, character classes also support the `&&` operator, which performs set
113
263
  # intersection on its arguments. The two can be combined as follows:
114
264
  #
115
265
  # /[a-w&&[^c-g]z]/ # ([a-w] AND ([^c-g] OR z))
@@ -118,238 +268,470 @@
118
268
  #
119
269
  # /[abh-w]/
120
270
  #
121
- # The following metacharacters also behave like character classes:
122
- #
123
- # * `/./` - Any character except a newline.
124
- # * `/./m` - Any character (the `m` modifier enables multiline mode)
125
- # * `/\w/` - A word character (`[a-zA-Z0-9_]`)
126
- # * `/\W/` - A non-word character (`[^a-zA-Z0-9_]`). Please take a look at
127
- # [Bug #4044](https://bugs.ruby-lang.org/issues/4044) if using `/\W/` with
128
- # the `/i` modifier.
129
- # * `/\d/` - A digit character (`[0-9]`)
130
- # * `/\D/` - A non-digit character (`[^0-9]`)
131
- # * `/\h/` - A hexdigit character (`[0-9a-fA-F]`)
132
- # * `/\H/` - A non-hexdigit character (`[^0-9a-fA-F]`)
133
- # * `/\s/` - A whitespace character: `/[ \t\r\n\f\v]/`
134
- # * `/\S/` - A non-whitespace character: `/[^ \t\r\n\f\v]/`
135
- # * `/\R/` - A linebreak: `\n`, `\v`, `\f`, `\r` `\u0085` (NEXT LINE),
136
- # `\u2028` (LINE SEPARATOR), `\u2029` (PARAGRAPH SEPARATOR) or `\r\n`.
137
- #
138
- #
139
- # POSIX *bracket expressions* are also similar to character classes. They
140
- # provide a portable alternative to the above, with the added benefit that they
141
- # encompass non-ASCII characters. For instance, `/\d/` matches only the ASCII
142
- # decimal digits (0-9); whereas `/[[:digit:]]/` matches any character in the
143
- # Unicode *Nd* category.
144
- #
145
- # * `/[[:alnum:]]/` - Alphabetic and numeric character
146
- # * `/[[:alpha:]]/` - Alphabetic character
147
- # * `/[[:blank:]]/` - Space or tab
148
- # * `/[[:cntrl:]]/` - Control character
149
- # * `/[[:digit:]]/` - Digit
150
- # * `/[[:graph:]]/` - Non-blank character (excludes spaces, control
151
- # characters, and similar)
152
- # * `/[[:lower:]]/` - Lowercase alphabetical character
153
- # * `/[[:print:]]/` - Like [:graph:], but includes the space character
154
- # * `/[[:punct:]]/` - Punctuation character
155
- # * `/[[:space:]]/` - Whitespace character (`[:blank:]`, newline, carriage
156
- # return, etc.)
157
- # * `/[[:upper:]]/` - Uppercase alphabetical
158
- # * `/[[:xdigit:]]/` - Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
271
+ # ### Shorthand Character Classes
159
272
  #
273
+ # Each of the following metacharacters serves as a shorthand for a character
274
+ # class:
160
275
  #
161
- # Ruby also supports the following non-POSIX character classes:
276
+ # * `/./`: Matches any character except a newline:
162
277
  #
163
- # * `/[[:word:]]/` - A character in one of the following Unicode general
164
- # categories *Letter*, *Mark*, *Number*, *Connector_Punctuation*
165
- # * `/[[:ascii:]]/` - A character in the ASCII character set
278
+ # /./.match('foo') # => #<MatchData "f">
279
+ # /./.match("\n") # => nil
166
280
  #
167
- # # U+06F2 is "EXTENDED ARABIC-INDIC DIGIT TWO"
168
- # /[[:digit:]]/.match("\u06F2") #=> #<MatchData "\u{06F2}">
169
- # /[[:upper:]][[:lower:]]/.match("Hello") #=> #<MatchData "He">
170
- # /[[:xdigit:]][[:xdigit:]]/.match("A6") #=> #<MatchData "A6">
281
+ # * `/./m`: Matches any character, including a newline; see [Multiline
282
+ # Mode](rdoc-ref:Regexp@Multiline+Mode):
171
283
  #
284
+ # /./m.match("\n") # => #<MatchData "\n">
172
285
  #
173
- # ## Repetition
286
+ # * `/\w/`: Matches a word character: equivalent to `[a-zA-Z0-9_]`:
174
287
  #
175
- # The constructs described so far match a single character. They can be followed
176
- # by a repetition metacharacter to specify how many times they need to occur.
177
- # Such metacharacters are called *quantifiers*.
288
+ # /\w/.match(' foo') # => #<MatchData "f">
289
+ # /\w/.match(' _') # => #<MatchData "_">
290
+ # /\w/.match(' ') # => nil
178
291
  #
179
- # * `*` - Zero or more times
180
- # * `+` - One or more times
181
- # * `?` - Zero or one times (optional)
182
- # * `{`*n*`}` - Exactly *n* times
183
- # * `{`*n*`,}` - *n* or more times
184
- # * `{,`*m*`}` - *m* or less times
185
- # * `{`*n*`,`*m*`}` - At least *n* and at most *m* times
292
+ # * `/\W/`: Matches a non-word character: equivalent to `[^a-zA-Z0-9_]`:
186
293
  #
294
+ # /\W/.match(' ') # => #<MatchData " ">
295
+ # /\W/.match('_') # => nil
187
296
  #
188
- # At least one uppercase character ('H'), at least one lowercase character
189
- # ('e'), two 'l' characters, then one 'o':
297
+ # * `/\d/`: Matches a digit character: equivalent to `[0-9]`:
190
298
  #
191
- # "Hello".match(/[[:upper:]]+[[:lower:]]+l{2}o/) #=> #<MatchData "Hello">
299
+ # /\d/.match('THX1138') # => #<MatchData "1">
300
+ # /\d/.match('foo') # => nil
192
301
  #
193
- # ### Greedy match
302
+ # * `/\D/`: Matches a non-digit character: equivalent to `[^0-9]`:
194
303
  #
195
- # Repetition is *greedy* by default: as many occurrences as possible are matched
196
- # while still allowing the overall match to succeed. By contrast, *lazy*
197
- # matching makes the minimal amount of matches necessary for overall success.
198
- # Most greedy metacharacters can be made lazy by following them with `?`. For
199
- # the `{n}` pattern, because it specifies an exact number of characters to match
200
- # and not a variable number of characters, the `?` metacharacter instead makes
201
- # the repeated pattern optional.
304
+ # /\D/.match('123Jump!') # => #<MatchData "J">
305
+ # /\D/.match('123') # => nil
202
306
  #
203
- # Both patterns below match the string. The first uses a greedy quantifier so
204
- # '.+' matches '<a><b>'; the second uses a lazy quantifier so '.+?' matches
205
- # '<a>':
307
+ # * `/\h/`: Matches a hexdigit character: equivalent to `[0-9a-fA-F]`:
206
308
  #
207
- # /<.+>/.match("<a><b>") #=> #<MatchData "<a><b>">
208
- # /<.+?>/.match("<a><b>") #=> #<MatchData "<a>">
309
+ # /\h/.match('xyz fedcba9876543210') # => #<MatchData "f">
310
+ # /\h/.match('xyz') # => nil
209
311
  #
210
- # ### Possessive match
312
+ # * `/\H/`: Matches a non-hexdigit character: equivalent to `[^0-9a-fA-F]`:
211
313
  #
212
- # A quantifier followed by `+` matches *possessively*: once it has matched it
213
- # does not backtrack. They behave like greedy quantifiers, but having matched
214
- # they refuse to "give up" their match even if this jeopardises the overall
215
- # match.
314
+ # /\H/.match('fedcba9876543210xyz') # => #<MatchData "x">
315
+ # /\H/.match('fedcba9876543210') # => nil
216
316
  #
217
- # /<.*><.+>/.match("<a><b>") #=> #<MatchData "<a><b>">
218
- # /<.*+><.+>/.match("<a><b>") #=> nil
219
- # /<.*><.++>/.match("<a><b>") #=> nil
317
+ # * `/\s/`: Matches a whitespace character: equivalent to `/[ \t\r\n\f\v]/`:
220
318
  #
221
- # ## Capturing
319
+ # /\s/.match('foo bar') # => #<MatchData " ">
320
+ # /\s/.match('foo') # => nil
222
321
  #
223
- # Parentheses can be used for *capturing*. The text enclosed by the *n*th group
224
- # of parentheses can be subsequently referred to with *n*. Within a pattern use
225
- # the *backreference* `\n` (e.g. `\1`); outside of the pattern use
226
- # `MatchData[n]` (e.g. `MatchData[1]`).
322
+ # * `/\S/`: Matches a non-whitespace character: equivalent to `/[^
323
+ # \t\r\n\f\v]/`:
227
324
  #
228
- # In this example, `'at'` is captured by the first group of parentheses, then
229
- # referred to later with `\1`:
325
+ # /\S/.match(" \t\r\n\f\v foo") # => #<MatchData "f">
326
+ # /\S/.match(" \t\r\n\f\v") # => nil
230
327
  #
231
- # /[csh](..) [csh]\1 in/.match("The cat sat in the hat")
232
- # #=> #<MatchData "cat sat in" 1:"at">
328
+ # * `/\R/`: Matches a linebreak, platform-independently:
233
329
  #
234
- # Regexp#match returns a MatchData object which makes the captured text
235
- # available with its #[] method:
330
+ # /\R/.match("\r") # => #<MatchData "\r"> # Carriage return (CR)
331
+ # /\R/.match("\n") # => #<MatchData "\n"> # Newline (LF)
332
+ # /\R/.match("\f") # => #<MatchData "\f"> # Formfeed (FF)
333
+ # /\R/.match("\v") # => #<MatchData "\v"> # Vertical tab (VT)
334
+ # /\R/.match("\r\n") # => #<MatchData "\r\n"> # CRLF
335
+ # /\R/.match("\u0085") # => #<MatchData "\u0085"> # Next line (NEL)
336
+ # /\R/.match("\u2028") # => #<MatchData "\u2028"> # Line separator (LSEP)
337
+ # /\R/.match("\u2029") # => #<MatchData "\u2029"> # Paragraph separator (PSEP)
236
338
  #
237
- # /[csh](..) [csh]\1 in/.match("The cat sat in the hat")[1] #=> 'at'
339
+ # ### Anchors
238
340
  #
239
- # While Ruby supports an arbitrary number of numbered captured groups, only
240
- # groups 1-9 are supported using the `\n` backreference syntax.
341
+ # An anchor is a metasequence that matches a zero-width position between
342
+ # characters in the target string.
241
343
  #
242
- # Ruby also supports `\0` as a special backreference, which references the
243
- # entire matched string. This is also available at `MatchData[0]`. Note that
244
- # the `\0` backreference cannot be used inside the regexp, as backreferences can
245
- # only be used after the end of the capture group, and the `\0` backreference
246
- # uses the implicit capture group of the entire match. However, you can use
247
- # this backreference when doing substitution:
344
+ # For a subexpression with no anchor, matching may begin anywhere in the target
345
+ # string:
248
346
  #
249
- # "The cat sat in the hat".gsub(/[csh]at/, '\0s')
250
- # # => "The cats sats in the hats"
347
+ # /real/.match('surrealist') # => #<MatchData "real">
251
348
  #
252
- # ### Named captures
349
+ # For a subexpression with an anchor, matching must begin at the matched anchor.
253
350
  #
254
- # Capture groups can be referred to by name when defined with the
255
- # `(?<`*name*`>)` or `(?'`*name*`')` constructs.
351
+ # #### Boundary Anchors
256
352
  #
257
- # /\$(?<dollars>\d+)\.(?<cents>\d+)/.match("$3.67")
258
- # #=> #<MatchData "$3.67" dollars:"3" cents:"67">
259
- # /\$(?<dollars>\d+)\.(?<cents>\d+)/.match("$3.67")[:dollars] #=> "3"
353
+ # Each of these anchors matches a boundary:
260
354
  #
261
- # Named groups can be backreferenced with `\k<`*name*`>`, where *name* is the
262
- # group name.
355
+ # * `^`: Matches the beginning of a line:
263
356
  #
264
- # /(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy')
265
- # #=> #<MatchData "ototo" vowel:"o">
357
+ # /^bar/.match("foo\nbar") # => #<MatchData "bar">
358
+ # /^ar/.match("foo\nbar") # => nil
359
+ #
360
+ # * `$`: Matches the end of a line:
361
+ #
362
+ # /bar$/.match("foo\nbar") # => #<MatchData "bar">
363
+ # /ba$/.match("foo\nbar") # => nil
364
+ #
365
+ # * `\A`: Matches the beginning of the string:
366
+ #
367
+ # /\Afoo/.match('foo bar') # => #<MatchData "foo">
368
+ # /\Afoo/.match(' foo bar') # => nil
369
+ #
370
+ # * `\Z`: Matches the end of the string; if string ends with a single newline,
371
+ # it matches just before the ending newline:
372
+ #
373
+ # /foo\Z/.match('bar foo') # => #<MatchData "foo">
374
+ # /foo\Z/.match('foo bar') # => nil
375
+ # /foo\Z/.match("bar foo\n") # => #<MatchData "foo">
376
+ # /foo\Z/.match("bar foo\n\n") # => nil
377
+ #
378
+ # * `\z`: Matches the end of the string:
379
+ #
380
+ # /foo\z/.match('bar foo') # => #<MatchData "foo">
381
+ # /foo\z/.match('foo bar') # => nil
382
+ # /foo\z/.match("bar foo\n") # => nil
383
+ #
384
+ # * `\b`: Matches word boundary when not inside brackets; matches backspace
385
+ # (`"0x08"`) when inside brackets:
386
+ #
387
+ # /foo\b/.match('foo bar') # => #<MatchData "foo">
388
+ # /foo\b/.match('foobar') # => nil
389
+ #
390
+ # * `\B`: Matches non-word boundary:
391
+ #
392
+ # /foo\B/.match('foobar') # => #<MatchData "foo">
393
+ # /foo\B/.match('foo bar') # => nil
394
+ #
395
+ # * `\G`: Matches first matching position:
396
+ #
397
+ # In methods like String#gsub and String#scan, it changes on each iteration.
398
+ # It initially matches the beginning of subject, and in each following
399
+ # iteration it matches where the last match finished.
400
+ #
401
+ # " a b c".gsub(/ /, '_') # => "____a_b_c"
402
+ # " a b c".gsub(/\G /, '_') # => "____a b c"
403
+ #
404
+ # In methods like Regexp#match and String#match that take an optional
405
+ # offset, it matches where the search begins.
406
+ #
407
+ # "hello, world".match(/,/, 3) # => #<MatchData ",">
408
+ # "hello, world".match(/\G,/, 3) # => nil
409
+ #
410
+ # #### Lookaround Anchors
411
+ #
412
+ # Lookahead anchors:
413
+ #
414
+ # * `(?=*pat*)`: Positive lookahead assertion: ensures that the following
415
+ # characters match *pat*, but doesn't include those characters in the
416
+ # matched substring.
417
+ #
418
+ # * `(?!*pat*)`: Negative lookahead assertion: ensures that the following
419
+ # characters *do not* match *pat*, but doesn't include those characters in
420
+ # the matched substring.
421
+ #
422
+ # Lookbehind anchors:
423
+ #
424
+ # * `(?<=*pat*)`: Positive lookbehind assertion: ensures that the preceding
425
+ # characters match *pat*, but doesn't include those characters in the
426
+ # matched substring.
427
+ #
428
+ # * `(?<!*pat*)`: Negative lookbehind assertion: ensures that the preceding
429
+ # characters do not match *pat*, but doesn't include those characters in the
430
+ # matched substring.
431
+ #
432
+ # The pattern below uses positive lookahead and positive lookbehind to match
433
+ # text appearing in **...** tags without including the tags in the match:
434
+ #
435
+ # /(?<=<b>)\w+(?=<\/b>)/.match("Fortune favors the <b>bold</b>.")
436
+ # # => #<MatchData "bold">
437
+ #
438
+ # #### Match-Reset Anchor
439
+ #
440
+ # * `\K`: Match reset: the matched content preceding `\K` in the regexp is
441
+ # excluded from the result. For example, the following two regexps are
442
+ # almost equivalent:
443
+ #
444
+ # /ab\Kc/.match('abc') # => #<MatchData "c">
445
+ # /(?<=ab)c/.match('abc') # => #<MatchData "c">
446
+ #
447
+ # These match same string and `$&` equals `'c'`, while the matched position
448
+ # is different.
449
+ #
450
+ # As are the following two regexps:
451
+ #
452
+ # /(a)\K(b)\Kc/
453
+ # /(?<=(?<=(a))(b))c/
454
+ #
455
+ # ### Alternation
456
+ #
457
+ # The vertical bar metacharacter (`|`) may be used within parentheses to express
458
+ # alternation: two or more subexpressions any of which may match the target
459
+ # string.
460
+ #
461
+ # Two alternatives:
462
+ #
463
+ # re = /(a|b)/
464
+ # re.match('foo') # => nil
465
+ # re.match('bar') # => #<MatchData "b" 1:"b">
466
+ #
467
+ # Four alternatives:
468
+ #
469
+ # re = /(a|b|c|d)/
470
+ # re.match('shazam') # => #<MatchData "a" 1:"a">
471
+ # re.match('cold') # => #<MatchData "c" 1:"c">
472
+ #
473
+ # Each alternative is a subexpression, and may be composed of other
474
+ # subexpressions:
475
+ #
476
+ # re = /([a-c]|[x-z])/
477
+ # re.match('bar') # => #<MatchData "b" 1:"b">
478
+ # re.match('ooz') # => #<MatchData "z" 1:"z">
479
+ #
480
+ # Method Regexp.union provides a convenient way to construct a regexp with
481
+ # alternatives.
482
+ #
483
+ # ### Quantifiers
266
484
  #
267
- # **Note**: A regexp can't use named backreferences and numbered backreferences
268
- # simultaneously. Also, if a named capture is used in a regexp, then parentheses
269
- # used for grouping which would otherwise result in a unnamed capture are
270
- # treated as non-capturing.
485
+ # A simple regexp matches one character:
271
486
  #
272
- # /(\w)(\w)/.match("ab").captures # => ["a", "b"]
273
- # /(\w)(\w)/.match("ab").named_captures # => {}
487
+ # /\w/.match('Hello') # => #<MatchData "H">
274
488
  #
275
- # /(?<c>\w)(\w)/.match("ab").captures # => ["a"]
276
- # /(?<c>\w)(\w)/.match("ab").named_captures # => {"c"=>"a"}
489
+ # An added *quantifier* specifies how many matches are required or allowed:
277
490
  #
278
- # When named capture groups are used with a literal regexp on the left-hand side
279
- # of an expression and the `=~` operator, the captured text is also assigned to
280
- # local variables with corresponding names.
491
+ # * `*` - Matches zero or more times:
281
492
  #
282
- # /\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ "$3.67" #=> 0
283
- # dollars #=> "3"
493
+ # /\w*/.match('')
494
+ # # => #<MatchData "">
495
+ # /\w*/.match('x')
496
+ # # => #<MatchData "x">
497
+ # /\w*/.match('xyz')
498
+ # # => #<MatchData "yz">
284
499
  #
285
- # ## Grouping
500
+ # * `+` - Matches one or more times:
286
501
  #
287
- # Parentheses also *group* the terms they enclose, allowing them to be
288
- # quantified as one *atomic* whole.
502
+ # /\w+/.match('') # => nil
503
+ # /\w+/.match('x') # => #<MatchData "x">
504
+ # /\w+/.match('xyz') # => #<MatchData "xyz">
289
505
  #
290
- # The pattern below matches a vowel followed by 2 word characters:
506
+ # * `?` - Matches zero or one times:
291
507
  #
292
- # /[aeiou]\w{2}/.match("Caenorhabditis elegans") #=> #<MatchData "aen">
508
+ # /\w?/.match('') # => #<MatchData "">
509
+ # /\w?/.match('x') # => #<MatchData "x">
510
+ # /\w?/.match('xyz') # => #<MatchData "x">
293
511
  #
294
- # Whereas the following pattern matches a vowel followed by a word character,
295
- # twice, i.e. `[aeiou]\w[aeiou]\w`: 'enor'.
512
+ # * `{`*n*`}` - Matches exactly *n* times:
296
513
  #
297
- # /([aeiou]\w){2}/.match("Caenorhabditis elegans")
298
- # #=> #<MatchData "enor" 1:"or">
514
+ # /\w{2}/.match('') # => nil
515
+ # /\w{2}/.match('x') # => nil
516
+ # /\w{2}/.match('xyz') # => #<MatchData "xy">
299
517
  #
300
- # The `(?:`...`)` construct provides grouping without capturing. That is, it
301
- # combines the terms it contains into an atomic whole without creating a
302
- # backreference. This benefits performance at the slight expense of readability.
518
+ # * `{`*min*`,}` - Matches *min* or more times:
303
519
  #
304
- # The first group of parentheses captures 'n' and the second 'ti'. The second
305
- # group is referred to later with the backreference `\2`:
520
+ # /\w{2,}/.match('') # => nil
521
+ # /\w{2,}/.match('x') # => nil
522
+ # /\w{2,}/.match('xy') # => #<MatchData "xy">
523
+ # /\w{2,}/.match('xyz') # => #<MatchData "xyz">
306
524
  #
307
- # /I(n)ves(ti)ga\2ons/.match("Investigations")
308
- # #=> #<MatchData "Investigations" 1:"n" 2:"ti">
525
+ # * `{,`*max*`}` - Matches *max* or fewer times:
309
526
  #
310
- # The first group of parentheses is now made non-capturing with '?:', so it
311
- # still matches 'n', but doesn't create the backreference. Thus, the
312
- # backreference `\1` now refers to 'ti'.
527
+ # /\w{,2}/.match('') # => #<MatchData "">
528
+ # /\w{,2}/.match('x') # => #<MatchData "x">
529
+ # /\w{,2}/.match('xyz') # => #<MatchData "xy">
313
530
  #
314
- # /I(?:n)ves(ti)ga\1ons/.match("Investigations")
315
- # #=> #<MatchData "Investigations" 1:"ti">
531
+ # * `{`*min*`,`*max*`}` - Matches at least *min* times and at most *max*
532
+ # times:
316
533
  #
317
- # ### Atomic Grouping
534
+ # /\w{1,2}/.match('') # => nil
535
+ # /\w{1,2}/.match('x') # => #<MatchData "x">
536
+ # /\w{1,2}/.match('xyz') # => #<MatchData "xy">
318
537
  #
319
- # Grouping can be made *atomic* with `(?>`*pat*`)`. This causes the
320
- # subexpression *pat* to be matched independently of the rest of the expression
321
- # such that what it matches becomes fixed for the remainder of the match, unless
322
- # the entire subexpression must be abandoned and subsequently revisited. In this
323
- # way *pat* is treated as a non-divisible whole. Atomic grouping is typically
324
- # used to optimise patterns so as to prevent the regular expression engine from
325
- # backtracking needlessly.
538
+ # #### Greedy, Lazy, or Possessive Matching
326
539
  #
327
- # The `"` in the pattern below matches the first character of the string, then
328
- # `.*` matches *Quote"*. This causes the overall match to fail, so the text
329
- # matched by `.*` is backtracked by one position, which leaves the final
330
- # character of the string available to match `"`
540
+ # Quantifier matching may be greedy, lazy, or possessive:
331
541
  #
332
- # /".*"/.match('"Quote"') #=> #<MatchData "\"Quote\"">
542
+ # * In *greedy* matching, as many occurrences as possible are matched while
543
+ # still allowing the overall match to succeed. Greedy quantifiers: `*`, `+`,
544
+ # `?`, `{min, max}` and its variants.
545
+ # * In *lazy* matching, the minimum number of occurrences are matched. Lazy
546
+ # quantifiers: `*?`, `+?`, `??`, `{min, max}?` and its variants.
547
+ # * In *possessive* matching, once a match is found, there is no backtracking;
548
+ # that match is retained, even if it jeopardises the overall match.
549
+ # Possessive quantifiers: `*+`, `++`, `?+`. Note that `{min, max}` and its
550
+ # variants do *not* support possessive matching.
333
551
  #
334
- # If `.*` is grouped atomically, it refuses to backtrack *Quote"*, even though
335
- # this means that the overall match fails
552
+ # More:
336
553
  #
337
- # /"(?>.*)"/.match('"Quote"') #=> nil
554
+ # * About greedy and lazy matching, see [Choosing Minimal or Maximal
555
+ # Repetition](https://doc.lagout.org/programmation/Regular%20Expressions/Reg
556
+ # ular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Progr
557
+ # amming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%20201
558
+ # 2-09-06%5D.pdf#tutorial-backtrack).
559
+ # * About possessive matching, see [Eliminate Needless
560
+ # Backtracking](https://doc.lagout.org/programmation/Regular%20Expressions/R
561
+ # egular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Pro
562
+ # gramming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202
563
+ # 012-09-06%5D.pdf#tutorial-backtrack).
338
564
  #
339
- # ## Subexpression Calls
565
+ # ### Groups and Captures
340
566
  #
341
- # The `\g<`*name*`>` syntax matches the previous subexpression named *name*,
342
- # which can be a group name or number, again. This differs from backreferences
343
- # in that it re-executes the group rather than simply trying to re-match the
344
- # same text.
567
+ # A simple regexp has (at most) one match:
345
568
  #
346
- # This pattern matches a *(* character and assigns it to the `paren` group,
347
- # tries to call that the `paren` sub-expression again but fails, then matches a
348
- # literal *)*:
569
+ # re = /\d\d\d\d-\d\d-\d\d/
570
+ # re.match('1943-02-04') # => #<MatchData "1943-02-04">
571
+ # re.match('1943-02-04').size # => 1
572
+ # re.match('foo') # => nil
349
573
  #
350
- # /\A(?<paren>\(\g<paren>*\))*\z/ =~ '()'
574
+ # Adding one or more pairs of parentheses, `(*subexpression*)`, defines
575
+ # *groups*, which may result in multiple matched substrings, called *captures*:
351
576
  #
352
- # /\A(?<paren>\(\g<paren>*\))*\z/ =~ '(())' #=> 0
577
+ # re = /(\d\d\d\d)-(\d\d)-(\d\d)/
578
+ # re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
579
+ # re.match('1943-02-04').size # => 4
580
+ #
581
+ # The first capture is the entire matched string; the other captures are the
582
+ # matched substrings from the groups.
583
+ #
584
+ # A group may have a [quantifier](rdoc-ref:Regexp@Quantifiers):
585
+ #
586
+ # re = /July 4(th)?/
587
+ # re.match('July 4') # => #<MatchData "July 4" 1:nil>
588
+ # re.match('July 4th') # => #<MatchData "July 4th" 1:"th">
589
+ #
590
+ # re = /(foo)*/
591
+ # re.match('') # => #<MatchData "" 1:nil>
592
+ # re.match('foo') # => #<MatchData "foo" 1:"foo">
593
+ # re.match('foofoo') # => #<MatchData "foofoo" 1:"foo">
594
+ #
595
+ # re = /(foo)+/
596
+ # re.match('') # => nil
597
+ # re.match('foo') # => #<MatchData "foo" 1:"foo">
598
+ # re.match('foofoo') # => #<MatchData "foofoo" 1:"foo">
599
+ #
600
+ # The returned MatchData object gives access to the matched substrings:
601
+ #
602
+ # re = /(\d\d\d\d)-(\d\d)-(\d\d)/
603
+ # md = re.match('1943-02-04')
604
+ # # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
605
+ # md[0] # => "1943-02-04"
606
+ # md[1] # => "1943"
607
+ # md[2] # => "02"
608
+ # md[3] # => "04"
609
+ #
610
+ # #### Non-Capturing Groups
611
+ #
612
+ # A group may be made non-capturing; it is still a group (and, for example, can
613
+ # have a quantifier), but its matching substring is not included among the
614
+ # captures.
615
+ #
616
+ # A non-capturing group begins with `?:` (inside the parentheses):
617
+ #
618
+ # # Don't capture the year.
619
+ # re = /(?:\d\d\d\d)-(\d\d)-(\d\d)/
620
+ # md = re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"02" 2:"04">
621
+ #
622
+ # #### Backreferences
623
+ #
624
+ # A group match may also be referenced within the regexp itself; such a
625
+ # reference is called a `backreference`:
626
+ #
627
+ # /[csh](..) [csh]\1 in/.match('The cat sat in the hat')
628
+ # # => #<MatchData "cat sat in" 1:"at">
629
+ #
630
+ # This table shows how each subexpression in the regexp above matches a
631
+ # substring in the target string:
632
+ #
633
+ # | Subexpression in Regexp | Matching Substring in Target String |
634
+ # |---------------------------|-------------------------------------|
635
+ # | First '[csh]' | Character 'c' |
636
+ # | '(..)' | First substring 'at' |
637
+ # | First space ' ' | First space character ' ' |
638
+ # | Second '[csh]' | Character 's' |
639
+ # | '\1' (backreference 'at') | Second substring 'at' |
640
+ # | ' in' | Substring ' in' |
641
+ #
642
+ # A regexp may contain any number of groups:
643
+ #
644
+ # * For a large number of groups:
645
+ #
646
+ # * The ordinary `\*n`* notation applies only for *n* in range (1..9).
647
+ # * The `MatchData[*n*]` notation applies for any non-negative *n*.
648
+ #
649
+ # * `\0` is a special backreference, referring to the entire matched string;
650
+ # it may not be used within the regexp itself, but may be used outside it
651
+ # (for example, in a substitution method call):
652
+ #
653
+ # 'The cat sat in the hat'.gsub(/[csh]at/, '\0s')
654
+ # # => "The cats sats in the hats"
655
+ #
656
+ # #### Named Captures
657
+ #
658
+ # As seen above, a capture can be referred to by its number. A capture can also
659
+ # have a name, prefixed as `?<*name*>` or `?'*name*'`, and the name (symbolized)
660
+ # may be used as an index in `MatchData[]`:
661
+ #
662
+ # md = /\$(?<dollars>\d+)\.(?'cents'\d+)/.match("$3.67")
663
+ # # => #<MatchData "$3.67" dollars:"3" cents:"67">
664
+ # md[:dollars] # => "3"
665
+ # md[:cents] # => "67"
666
+ # # The capture numbers are still valid.
667
+ # md[2] # => "67"
668
+ #
669
+ # When a regexp contains a named capture, there are no unnamed captures:
670
+ #
671
+ # /\$(?<dollars>\d+)\.(\d+)/.match("$3.67")
672
+ # # => #<MatchData "$3.67" dollars:"3">
673
+ #
674
+ # A named group may be backreferenced as `\k<*name*>`:
675
+ #
676
+ # /(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy')
677
+ # # => #<MatchData "ototo" vowel:"o">
678
+ #
679
+ # When (and only when) a regexp contains named capture groups and appears before
680
+ # the `=~` operator, the captured substrings are assigned to local variables
681
+ # with corresponding names:
682
+ #
683
+ # /\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ '$3.67'
684
+ # dollars # => "3"
685
+ # cents # => "67"
686
+ #
687
+ # Method Regexp#named_captures returns a hash of the capture names and
688
+ # substrings; method Regexp#names returns an array of the capture names.
689
+ #
690
+ # #### Atomic Grouping
691
+ #
692
+ # A group may be made *atomic* with `(?>`*subexpression*`)`.
693
+ #
694
+ # This causes the subexpression to be matched independently of the rest of the
695
+ # expression, so that the matched substring becomes fixed for the remainder of
696
+ # the match, unless the entire subexpression must be abandoned and subsequently
697
+ # revisited.
698
+ #
699
+ # In this way *subexpression* is treated as a non-divisible whole. Atomic
700
+ # grouping is typically used to optimise patterns to prevent needless
701
+ # backtracking .
702
+ #
703
+ # Example (without atomic grouping):
704
+ #
705
+ # /".*"/.match('"Quote"') # => #<MatchData "\"Quote\"">
706
+ #
707
+ # Analysis:
708
+ #
709
+ # 1. The leading subexpression `"` in the pattern matches the first character
710
+ # `"` in the target string.
711
+ # 2. The next subexpression `.*` matches the next substring `Quote“` (including
712
+ # the trailing double-quote).
713
+ # 3. Now there is nothing left in the target string to match the trailing
714
+ # subexpression `"` in the pattern; this would cause the overall match to
715
+ # fail.
716
+ # 4. The matched substring is backtracked by one position: `Quote`.
717
+ # 5. The final subexpression `"` now matches the final substring `"`, and the
718
+ # overall match succeeds.
719
+ #
720
+ # If subexpression `.*` is grouped atomically, the backtracking is disabled, and
721
+ # the overall match fails:
722
+ #
723
+ # /"(?>.*)"/.match('"Quote"') # => nil
724
+ #
725
+ # Atomic grouping can affect performance; see [Atomic
726
+ # Group](https://www.regular-expressions.info/atomic.html).
727
+ #
728
+ # #### Subexpression Calls
729
+ #
730
+ # As seen above, a backreference number (`\*n`*) or name (`\k<*name*>`) gives
731
+ # access to a captured *substring*; the corresponding regexp *subexpression* may
732
+ # also be accessed, via the number (`\\g*n`*) or name (`\g<*name*>`):
733
+ #
734
+ # /\A(?<paren>\(\g<paren>*\))*\z/.match('(())')
353
735
  # # ^1
354
736
  # # ^2
355
737
  # # ^3
@@ -361,407 +743,587 @@
361
743
  # # ^9
362
744
  # # ^10
363
745
  #
746
+ # The pattern:
747
+ #
364
748
  # 1. Matches at the beginning of the string, i.e. before the first character.
365
- # 2. Enters a named capture group called `paren`
366
- # 3. Matches a literal *(*, the first character in the string
367
- # 4. Calls the `paren` group again, i.e. recurses back to the second step
368
- # 5. Re-enters the `paren` group
369
- # 6. Matches a literal *(*, the second character in the string
370
- # 7. Try to call `paren` a third time, but fail because doing so would prevent
371
- # an overall successful match
372
- # 8. Match a literal *)*, the third character in the string. Marks the end of
373
- # the second recursive call
374
- # 9. Match a literal *)*, the fourth character in the string
375
- # 10. Match the end of the string
376
- #
377
- #
378
- # ## Alternation
379
- #
380
- # The vertical bar metacharacter (`|`) combines several expressions into a
381
- # single one that matches any of the expressions. Each expression is an
382
- # *alternative*.
383
- #
384
- # /\w(and|or)\w/.match("Feliformia") #=> #<MatchData "form" 1:"or">
385
- # /\w(and|or)\w/.match("furandi") #=> #<MatchData "randi" 1:"and">
386
- # /\w(and|or)\w/.match("dissemblance") #=> nil
387
- #
388
- # ## Character Properties
389
- #
390
- # The `\p{}` construct matches characters with the named property, much like
391
- # POSIX bracket classes.
392
- #
393
- # * `/\p{Alnum}/` - Alphabetic and numeric character
394
- # * `/\p{Alpha}/` - Alphabetic character
395
- # * `/\p{Blank}/` - Space or tab
396
- # * `/\p{Cntrl}/` - Control character
397
- # * `/\p{Digit}/` - Digit
398
- # * `/\p{Graph}/` - Non-blank character (excludes spaces, control characters,
399
- # and similar)
400
- # * `/\p{Lower}/` - Lowercase alphabetical character
401
- # * `/\p{Print}/` - Like `\p{Graph}`, but includes the space character
402
- # * `/\p{Punct}/` - Punctuation character
403
- # * `/\p{Space}/` - Whitespace character (`[:blank:]`, newline, carriage
749
+ # 2. Enters a named group `paren`.
750
+ # 3. Matches the first character in the string, `'('`.
751
+ # 4. Calls the `paren` group again, i.e. recurses back to the second step.
752
+ # 5. Re-enters the `paren` group.
753
+ # 6. Matches the second character in the string, `'('`.
754
+ # 7. Attempts to call `paren` a third time, but fails because doing so would
755
+ # prevent an overall successful match.
756
+ # 8. Matches the third character in the string, `')'`; marks the end of the
757
+ # second recursive call
758
+ # 9. Matches the fourth character in the string, `')'`.
759
+ # 10. Matches the end of the string.
760
+ #
761
+ # See [Subexpression
762
+ # calls](https://learnbyexample.github.io/Ruby_Regexp/groupings-and-backreferenc
763
+ # es.html?highlight=subexpression#subexpression-calls).
764
+ #
765
+ # #### Conditionals
766
+ #
767
+ # The conditional construct takes the form `(?(*cond*)*yes*|*no*)`, where:
768
+ #
769
+ # * *cond* may be a capture number or name.
770
+ # * The match to be applied is *yes* if *cond* is captured; otherwise the
771
+ # match to be applied is *no*.
772
+ # * If not needed, `|*no`* may be omitted.
773
+ #
774
+ # Examples:
775
+ #
776
+ # re = /\A(foo)?(?(1)(T)|(F))\z/
777
+ # re.match('fooT') # => #<MatchData "fooT" 1:"foo" 2:"T" 3:nil>
778
+ # re.match('F') # => #<MatchData "F" 1:nil 2:nil 3:"F">
779
+ # re.match('fooF') # => nil
780
+ # re.match('T') # => nil
781
+ #
782
+ # re = /\A(?<xyzzy>foo)?(?(<xyzzy>)(T)|(F))\z/
783
+ # re.match('fooT') # => #<MatchData "fooT" xyzzy:"foo">
784
+ # re.match('F') # => #<MatchData "F" xyzzy:nil>
785
+ # re.match('fooF') # => nil
786
+ # re.match('T') # => nil
787
+ #
788
+ # #### Absence Operator
789
+ #
790
+ # The absence operator is a special group that matches anything which does *not*
791
+ # match the contained subexpressions.
792
+ #
793
+ # /(?~real)/.match('surrealist') # => #<MatchData "surrea">
794
+ # /(?~real)ist/.match('surrealist') # => #<MatchData "ealist">
795
+ # /sur(?~real)ist/.match('surrealist') # => nil
796
+ #
797
+ # ### Unicode
798
+ #
799
+ # #### Unicode Properties
800
+ #
801
+ # The `/\p{*property_name*}/` construct (with lowercase `p`) matches characters
802
+ # using a Unicode property name, much like a character class; property `Alpha`
803
+ # specifies alphabetic characters:
804
+ #
805
+ # /\p{Alpha}/.match('a') # => #<MatchData "a">
806
+ # /\p{Alpha}/.match('1') # => nil
807
+ #
808
+ # A property can be inverted by prefixing the name with a caret character (`^`):
809
+ #
810
+ # /\p{^Alpha}/.match('1') # => #<MatchData "1">
811
+ # /\p{^Alpha}/.match('a') # => nil
812
+ #
813
+ # Or by using `\P` (uppercase `P`):
814
+ #
815
+ # /\P{Alpha}/.match('1') # => #<MatchData "1">
816
+ # /\P{Alpha}/.match('a') # => nil
817
+ #
818
+ # See [Unicode Properties](rdoc-ref:regexp/unicode_properties.rdoc) for regexps
819
+ # based on the numerous properties.
820
+ #
821
+ # Some commonly-used properties correspond to POSIX bracket expressions:
822
+ #
823
+ # * `/\p{Alnum}/`: Alphabetic and numeric character
824
+ # * `/\p{Alpha}/`: Alphabetic character
825
+ # * `/\p{Blank}/`: Space or tab
826
+ # * `/\p{Cntrl}/`: Control character
827
+ # * `/\p{Digit}/`: Digit characters, and similar)
828
+ # * `/\p{Lower}/`: Lowercase alphabetical character
829
+ # * `/\p{Print}/`: Like `\p{Graph}`, but includes the space character
830
+ # * `/\p{Punct}/`: Punctuation character
831
+ # * `/\p{Space}/`: Whitespace character (`[:blank:]`, newline, carriage
404
832
  # return, etc.)
405
- # * `/\p{Upper}/` - Uppercase alphabetical
406
- # * `/\p{XDigit}/` - Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
407
- # * `/\p{Word}/` - A member of one of the following Unicode general category
408
- # *Letter*, *Mark*, *Number*, *Connector_Punctuation*
409
- # * `/\p{ASCII}/` - A character in the ASCII character set
410
- # * `/\p{Any}/` - Any Unicode character (including unassigned characters)
411
- # * `/\p{Assigned}/` - An assigned character
412
- #
413
- #
414
- # A Unicode character's *General Category* value can also be matched with
415
- # `\p{`*Ab*`}` where *Ab* is the category's abbreviation as described below:
416
- #
417
- # * `/\p{L}/` - 'Letter'
418
- # * `/\p{Ll}/` - 'Letter: Lowercase'
419
- # * `/\p{Lm}/` - 'Letter: Mark'
420
- # * `/\p{Lo}/` - 'Letter: Other'
421
- # * `/\p{Lt}/` - 'Letter: Titlecase'
422
- # * `/\p{Lu}/` - 'Letter: Uppercase
423
- # * `/\p{Lo}/` - 'Letter: Other'
424
- # * `/\p{M}/` - 'Mark'
425
- # * `/\p{Mn}/` - 'Mark: Nonspacing'
426
- # * `/\p{Mc}/` - 'Mark: Spacing Combining'
427
- # * `/\p{Me}/` - 'Mark: Enclosing'
428
- # * `/\p{N}/` - 'Number'
429
- # * `/\p{Nd}/` - 'Number: Decimal Digit'
430
- # * `/\p{Nl}/` - 'Number: Letter'
431
- # * `/\p{No}/` - 'Number: Other'
432
- # * `/\p{P}/` - 'Punctuation'
433
- # * `/\p{Pc}/` - 'Punctuation: Connector'
434
- # * `/\p{Pd}/` - 'Punctuation: Dash'
435
- # * `/\p{Ps}/` - 'Punctuation: Open'
436
- # * `/\p{Pe}/` - 'Punctuation: Close'
437
- # * `/\p{Pi}/` - 'Punctuation: Initial Quote'
438
- # * `/\p{Pf}/` - 'Punctuation: Final Quote'
439
- # * `/\p{Po}/` - 'Punctuation: Other'
440
- # * `/\p{S}/` - 'Symbol'
441
- # * `/\p{Sm}/` - 'Symbol: Math'
442
- # * `/\p{Sc}/` - 'Symbol: Currency'
443
- # * `/\p{Sc}/` - 'Symbol: Currency'
444
- # * `/\p{Sk}/` - 'Symbol: Modifier'
445
- # * `/\p{So}/` - 'Symbol: Other'
446
- # * `/\p{Z}/` - 'Separator'
447
- # * `/\p{Zs}/` - 'Separator: Space'
448
- # * `/\p{Zl}/` - 'Separator: Line'
449
- # * `/\p{Zp}/` - 'Separator: Paragraph'
450
- # * `/\p{C}/` - 'Other'
451
- # * `/\p{Cc}/` - 'Other: Control'
452
- # * `/\p{Cf}/` - 'Other: Format'
453
- # * `/\p{Cn}/` - 'Other: Not Assigned'
454
- # * `/\p{Co}/` - 'Other: Private Use'
455
- # * `/\p{Cs}/` - 'Other: Surrogate'
456
- #
457
- #
458
- # Lastly, `\p{}` matches a character's Unicode *script*. The following scripts
459
- # are supported: *Arabic*, *Armenian*, *Balinese*, *Bengali*, *Bopomofo*,
460
- # *Braille*, *Buginese*, *Buhid*, *Canadian_Aboriginal*, *Carian*, *Cham*,
461
- # *Cherokee*, *Common*, *Coptic*, *Cuneiform*, *Cypriot*, *Cyrillic*, *Deseret*,
462
- # *Devanagari*, *Ethiopic*, *Georgian*, *Glagolitic*, *Gothic*, *Greek*,
463
- # *Gujarati*, *Gurmukhi*, *Han*, *Hangul*, *Hanunoo*, *Hebrew*, *Hiragana*,
464
- # *Inherited*, *Kannada*, *Katakana*, *Kayah_Li*, *Kharoshthi*, *Khmer*, *Lao*,
465
- # *Latin*, *Lepcha*, *Limbu*, *Linear_B*, *Lycian*, *Lydian*, *Malayalam*,
466
- # *Mongolian*, *Myanmar*, *New_Tai_Lue*, *Nko*, *Ogham*, *Ol_Chiki*,
467
- # *Old_Italic*, *Old_Persian*, *Oriya*, *Osmanya*, *Phags_Pa*, *Phoenician*,
468
- # *Rejang*, *Runic*, *Saurashtra*, *Shavian*, *Sinhala*, *Sundanese*,
469
- # *Syloti_Nagri*, *Syriac*, *Tagalog*, *Tagbanwa*, *Tai_Le*, *Tamil*, *Telugu*,
470
- # *Thaana*, *Thai*, *Tibetan*, *Tifinagh*, *Ugaritic*, *Vai*, and *Yi*.
471
- #
472
- # Unicode codepoint U+06E9 is named "ARABIC PLACE OF SAJDAH" and belongs to the
473
- # Arabic script:
474
- #
475
- # /\p{Arabic}/.match("\u06E9") #=> #<MatchData "\u06E9">
476
- #
477
- # All character properties can be inverted by prefixing their name with a caret
478
- # (`^`).
479
- #
480
- # Letter 'A' is not in the Unicode Ll (Letter; Lowercase) category, so this
481
- # match succeeds:
482
- #
483
- # /\p{^Ll}/.match("A") #=> #<MatchData "A">
484
- #
485
- # ## Anchors
486
- #
487
- # Anchors are metacharacter that match the zero-width positions between
488
- # characters, *anchoring* the match to a specific position.
489
- #
490
- # * `^` - Matches beginning of line
491
- # * `$` - Matches end of line
492
- # * `\A` - Matches beginning of string.
493
- # * `\Z` - Matches end of string. If string ends with a newline, it matches
494
- # just before newline
495
- # * `\z` - Matches end of string
496
- # * `\G` - Matches first matching position:
497
- #
498
- # In methods like `String#gsub` and `String#scan`, it changes on each
499
- # iteration. It initially matches the beginning of subject, and in each
500
- # following iteration it matches where the last match finished.
501
- #
502
- # " a b c".gsub(/ /, '_') #=> "____a_b_c"
503
- # " a b c".gsub(/\G /, '_') #=> "____a b c"
504
- #
505
- # In methods like `Regexp#match` and `String#match` that take an (optional)
506
- # offset, it matches where the search begins.
833
+ # * `/\p{Upper}/`: Uppercase alphabetical
834
+ # * `/\p{XDigit}/`: Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
507
835
  #
508
- # "hello, world".match(/,/, 3) #=> #<MatchData ",">
509
- # "hello, world".match(/\G,/, 3) #=> nil
836
+ # These are also commonly used:
837
+ #
838
+ # * `/\p{Emoji}/`: Unicode emoji.
839
+ # * `/\p{Graph}/`: Characters excluding `/\p{Cntrl}/` and `/\p{Space}/`. Note
840
+ # that invisible characters under the Unicode
841
+ # ["Format"](https://www.compart.com/en/unicode/category/Cf) category are
842
+ # included.
843
+ # * `/\p{Word}/`: A member in one of these Unicode character categories (see
844
+ # below) or having one of these Unicode properties:
845
+ #
846
+ # * Unicode categories:
847
+ # * `Mark` (`M`).
848
+ # * `Decimal Number` (`Nd`)
849
+ # * `Connector Punctuation` (`Pc`).
510
850
  #
511
- # * `\b` - Matches word boundaries when outside brackets; backspace (0x08)
512
- # when inside brackets
513
- # * `\B` - Matches non-word boundaries
514
- # * `(?=`*pat*`)` - *Positive lookahead* assertion: ensures that the following
515
- # characters match *pat*, but doesn't include those characters in the
516
- # matched text
517
- # * `(?!`*pat*`)` - *Negative lookahead* assertion: ensures that the following
518
- # characters do not match *pat*, but doesn't include those characters in the
519
- # matched text
520
- # * `(?<=`*pat*`)` - *Positive lookbehind* assertion: ensures that the
521
- # preceding characters match *pat*, but doesn't include those characters in
522
- # the matched text
523
- # * `(?<!`*pat*`)` - *Negative lookbehind* assertion: ensures that the
524
- # preceding characters do not match *pat*, but doesn't include those
525
- # characters in the matched text
526
- # * `\K` - Uses an positive lookbehind of the content preceding `\K` in the
527
- # regexp. For example, the following two regexps are almost equivalent:
528
- #
529
- # /ab\Kc/
530
- # /(?<=ab)c/
851
+ # * Unicode properties:
852
+ # * `Alpha`
853
+ # * `Join_Control`
531
854
  #
532
- # As are the following two regexps:
855
+ # * `/\p{ASCII}/`: A character in the ASCII character set.
856
+ # * `/\p{Any}/`: Any Unicode character (including unassigned characters).
857
+ # * `/\p{Assigned}/`: An assigned character.
533
858
  #
534
- # /(a)\K(b)\Kc/
535
- # /(?<=(?<=(a))(b))c/
859
+ # #### Unicode Character Categories
536
860
  #
861
+ # A Unicode character category name:
537
862
  #
538
- # If a pattern isn't anchored it can begin at any point in the string:
863
+ # * May be either its full name or its abbreviated name.
864
+ # * Is case-insensitive.
865
+ # * Treats a space, a hyphen, and an underscore as equivalent.
539
866
  #
540
- # /real/.match("surrealist") #=> #<MatchData "real">
867
+ # Examples:
541
868
  #
542
- # Anchoring the pattern to the beginning of the string forces the match to start
543
- # there. 'real' doesn't occur at the beginning of the string, so now the match
544
- # fails:
869
+ # /\p{lu}/ # => /\p{lu}/
870
+ # /\p{LU}/ # => /\p{LU}/
871
+ # /\p{Uppercase Letter}/ # => /\p{Uppercase Letter}/
872
+ # /\p{Uppercase_Letter}/ # => /\p{Uppercase_Letter}/
873
+ # /\p{UPPERCASE-LETTER}/ # => /\p{UPPERCASE-LETTER}/
545
874
  #
546
- # /\Areal/.match("surrealist") #=> nil
875
+ # Below are the Unicode character category abbreviations and names. Enumerations
876
+ # of characters in each category are at the links.
547
877
  #
548
- # The match below fails because although 'Demand' contains 'and', the pattern
549
- # does not occur at a word boundary.
878
+ # Letters:
550
879
  #
551
- # /\band/.match("Demand")
880
+ # * `L`, `Letter`: `LC`, `Lm`, or `Lo`.
881
+ # * `LC`, `Cased_Letter`: `Ll`, `Lt`, or `Lu`.
882
+ # * [Lu, Lowercase_Letter](https://www.compart.com/en/unicode/category/Ll).
883
+ # * [Lu, Modifier_Letter](https://www.compart.com/en/unicode/category/Lm).
884
+ # * [Lu, Other_Letter](https://www.compart.com/en/unicode/category/Lo).
885
+ # * [Lu, Titlecase_Letter](https://www.compart.com/en/unicode/category/Lt).
886
+ # * [Lu, Uppercase_Letter](https://www.compart.com/en/unicode/category/Lu).
552
887
  #
553
- # Whereas in the following example 'and' has been anchored to a non-word
554
- # boundary so instead of matching the first 'and' it matches from the fourth
555
- # letter of 'demand' instead:
888
+ # Marks:
556
889
  #
557
- # /\Band.+/.match("Supply and demand curve") #=> #<MatchData "and curve">
890
+ # * `M`, `Mark`: `Mc`, `Me`, or `Mn`.
891
+ # * [Mc, Spacing_Mark](https://www.compart.com/en/unicode/category/Mc).
892
+ # * [Me, Enclosing_Mark](https://www.compart.com/en/unicode/category/Me).
893
+ # * [Mn, Nonapacing_Mark](https://www.compart.com/en/unicode/category/Mn).
558
894
  #
559
- # The pattern below uses positive lookahead and positive lookbehind to match
560
- # text appearing in tags without including the tags in the match:
895
+ # Numbers:
896
+ #
897
+ # * `N`, `Number`: `Nd`, `Nl`, or `No`.
898
+ # * [Nd, Decimal_Number](https://www.compart.com/en/unicode/category/Nd).
899
+ # * [Nl, Letter_Number](https://www.compart.com/en/unicode/category/Nl).
900
+ # * [No, Other_Number](https://www.compart.com/en/unicode/category/No).
901
+ #
902
+ # Punctuation:
903
+ #
904
+ # * `P`, `Punctuation`: `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
905
+ # * [Pc,
906
+ # Connector_Punctuation](https://www.compart.com/en/unicode/category/Pc).
907
+ # * [Pd, Dash_Punctuation](https://www.compart.com/en/unicode/category/Pd).
908
+ # * [Pe, Close_Punctuation](https://www.compart.com/en/unicode/category/Pe).
909
+ # * [Pf, Final_Punctuation](https://www.compart.com/en/unicode/category/Pf).
910
+ # * [Pi, Initial_Punctuation](https://www.compart.com/en/unicode/category/Pi).
911
+ # * [Po, Other_Punctuation](https://www.compart.com/en/unicode/category/Po).
912
+ # * [Ps, Open_Punctuation](https://www.compart.com/en/unicode/category/Ps).
913
+ #
914
+ # * `S`, `Symbol`: `Sc`, `Sk`, `Sm`, or `So`.
915
+ # * [Sc, Currency_Symbol](https://www.compart.com/en/unicode/category/Sc).
916
+ # * [Sk, Modifier_Symbol](https://www.compart.com/en/unicode/category/Sk).
917
+ # * [Sm, Math_Symbol](https://www.compart.com/en/unicode/category/Sm).
918
+ # * [So, Other_Symbol](https://www.compart.com/en/unicode/category/So).
919
+ #
920
+ # * `Z`, `Separator`: `Zl`, `Zp`, or `Zs`.
921
+ # * [Zl, Line_Separator](https://www.compart.com/en/unicode/category/Zl).
922
+ # * [Zp, Paragraph_Separator](https://www.compart.com/en/unicode/category/Zp).
923
+ # * [Zs, Space_Separator](https://www.compart.com/en/unicode/category/Zs).
924
+ #
925
+ # * `C`, `Other`: `Cc`, `Cf`, `Cn`, `Co`, or `Cs`.
926
+ # * [Cc, Control](https://www.compart.com/en/unicode/category/Cc).
927
+ # * [Cf, Format](https://www.compart.com/en/unicode/category/Cf).
928
+ # * [Cn, Unassigned](https://www.compart.com/en/unicode/category/Cn).
929
+ # * [Co, Private_Use](https://www.compart.com/en/unicode/category/Co).
930
+ # * [Cs, Surrogate](https://www.compart.com/en/unicode/category/Cs).
931
+ #
932
+ # #### Unicode Scripts and Blocks
933
+ #
934
+ # Among the Unicode properties are:
935
+ #
936
+ # * [Unicode scripts](https://en.wikipedia.org/wiki/Script_(Unicode)); see
937
+ # [supported scripts](https://www.unicode.org/standard/supported.html).
938
+ # * [Unicode blocks](https://en.wikipedia.org/wiki/Unicode_block); see
939
+ # [supported blocks](http://www.unicode.org/Public/UNIDATA/Blocks.txt).
940
+ #
941
+ # ### POSIX Bracket Expressions
942
+ #
943
+ # A POSIX *bracket expression* is also similar to a character class. These
944
+ # expressions provide a portable alternative to the above, with the added
945
+ # benefit of encompassing non-ASCII characters:
946
+ #
947
+ # * `/\d/` matches only ASCII decimal digits `0` through `9`.
948
+ # * `/[[:digit:]]/` matches any character in the Unicode `Decimal Number`
949
+ # (`Nd`) category; see below.
950
+ #
951
+ # The POSIX bracket expressions:
952
+ #
953
+ # * `/[[:digit:]]/`: Matches a [Unicode
954
+ # digit](https://www.compart.com/en/unicode/category/Nd):
955
+ #
956
+ # /[[:digit:]]/.match('9') # => #<MatchData "9">
957
+ # /[[:digit:]]/.match("\u1fbf9") # => #<MatchData "9">
958
+ #
959
+ # * `/[[:xdigit:]]/`: Matches a digit allowed in a hexadecimal number;
960
+ # equivalent to `[0-9a-fA-F]`.
961
+ #
962
+ # * `/[[:upper:]]/`: Matches a [Unicode uppercase
963
+ # letter](https://www.compart.com/en/unicode/category/Lu):
561
964
  #
562
- # /(?<=<b>)\w+(?=<\/b>)/.match("Fortune favours the <b>bold</b>")
563
- # #=> #<MatchData "bold">
965
+ # /[[:upper:]]/.match('A') # => #<MatchData "A">
966
+ # /[[:upper:]]/.match("\u00c6") # => #<MatchData "Æ">
564
967
  #
565
- # ## Options
968
+ # * `/[[:lower:]]/`: Matches a [Unicode lowercase
969
+ # letter](https://www.compart.com/en/unicode/category/Ll):
566
970
  #
567
- # The end delimiter for a regexp can be followed by one or more single-letter
568
- # options which control how the pattern can match.
971
+ # /[[:lower:]]/.match('a') # => #<MatchData "a">
972
+ # /[[:lower:]]/.match("\u01fd") # => #<MatchData "ǽ">
569
973
  #
570
- # * `/pat/i` - Ignore case
571
- # * `/pat/m` - Treat a newline as a character matched by `.`
572
- # * `/pat/x` - Ignore whitespace and comments in the pattern
573
- # * `/pat/o` - Perform `#{}` interpolation only once
974
+ # * `/[[:alpha:]]/`: Matches `/[[:upper:]]/` or `/[[:lower:]]/`.
574
975
  #
976
+ # * `/[[:alnum:]]/`: Matches `/[[:alpha:]]/` or `/[[:digit:]]/`.
575
977
  #
576
- # `i`, `m`, and `x` can also be applied on the subexpression level with the
577
- # `(?`*on*`-`*off*`)` construct, which enables options *on*, and disables
578
- # options *off* for the expression enclosed by the parentheses:
978
+ # * `/[[:space:]]/`: Matches [Unicode space
979
+ # character](https://www.compart.com/en/unicode/category/Zs):
579
980
  #
580
- # /a(?i:b)c/.match('aBc') #=> #<MatchData "aBc">
581
- # /a(?-i:b)c/i.match('ABC') #=> nil
981
+ # /[[:space:]]/.match(' ') # => #<MatchData " ">
982
+ # /[[:space:]]/.match("\u2005") # => #<MatchData " ">
582
983
  #
583
- # Additionally, these options can also be toggled for the remainder of the
584
- # pattern:
984
+ # * `/[[:blank:]]/`: Matches `/[[:space:]]/` or tab character:
585
985
  #
586
- # /a(?i)bc/.match('abC') #=> #<MatchData "abC">
986
+ # /[[:blank:]]/.match(' ') # => #<MatchData " ">
987
+ # /[[:blank:]]/.match("\u2005") # => #<MatchData " ">
988
+ # /[[:blank:]]/.match("\t") # => #<MatchData "\t">
587
989
  #
588
- # Options may also be used with `Regexp.new`:
990
+ # * `/[[:cntrl:]]/`: Matches [Unicode control
991
+ # character](https://www.compart.com/en/unicode/category/Cc):
589
992
  #
590
- # Regexp.new("abc", Regexp::IGNORECASE) #=> /abc/i
591
- # Regexp.new("abc", Regexp::MULTILINE) #=> /abc/m
592
- # Regexp.new("abc # Comment", Regexp::EXTENDED) #=> /abc # Comment/x
593
- # Regexp.new("abc", Regexp::IGNORECASE | Regexp::MULTILINE) #=> /abc/mi
993
+ # /[[:cntrl:]]/.match("\u0000") # => #<MatchData "\u0000">
994
+ # /[[:cntrl:]]/.match("\u009f") # => #<MatchData "\u009F">
594
995
  #
595
- # ## Free-Spacing Mode and Comments
996
+ # * `/[[:graph:]]/`: Matches any character except `/[[:space:]]/` or
997
+ # `/[[:cntrl:]]/`.
596
998
  #
597
- # As mentioned above, the `x` option enables *free-spacing* mode. Literal white
598
- # space inside the pattern is ignored, and the octothorpe (`#`) character
599
- # introduces a comment until the end of the line. This allows the components of
600
- # the pattern to be organized in a potentially more readable fashion.
999
+ # * `/[[:print:]]/`: Matches `/[[:graph:]]/` or space character.
601
1000
  #
602
- # A contrived pattern to match a number with optional decimal places:
1001
+ # * `/[[:punct:]]/`: Matches any (Unicode punctuation
1002
+ # character}[https://www.compart.com/en/unicode/category/Po]:
603
1003
  #
604
- # float_pat = /\A
605
- # [[:digit:]]+ # 1 or more digits before the decimal point
606
- # (\. # Decimal point
607
- # [[:digit:]]+ # 1 or more digits after the decimal point
608
- # )? # The decimal point and following digits are optional
609
- # \Z/x
610
- # float_pat.match('3.14') #=> #<MatchData "3.14" 1:".14">
1004
+ # Ruby also supports these (non-POSIX) bracket expressions:
611
1005
  #
612
- # There are a number of strategies for matching whitespace:
1006
+ # * `/[[:ascii:]]/`: Matches a character in the ASCII character set.
1007
+ # * `/[[:word:]]/`: Matches a character in one of these Unicode character
1008
+ # categories or having one of these Unicode properties:
613
1009
  #
614
- # * Use a pattern such as `\s` or `\p{Space}`.
615
- # * Use escaped whitespace such as `\ `, i.e. a space preceded by a backslash.
616
- # * Use a character class such as `[ ]`.
1010
+ # * Unicode categories:
1011
+ # * `Mark` (`M`).
1012
+ # * `Decimal Number` (`Nd`)
1013
+ # * `Connector Punctuation` (`Pc`).
617
1014
  #
1015
+ # * Unicode properties:
1016
+ # * `Alpha`
1017
+ # * `Join_Control`
618
1018
  #
619
- # Comments can be included in a non-`x` pattern with the `(?#`*comment*`)`
620
- # construct, where *comment* is arbitrary text ignored by the regexp engine.
1019
+ # ### Comments
621
1020
  #
622
- # Comments in regexp literals cannot include unescaped terminator characters.
1021
+ # A comment may be included in a regexp pattern using the `(?#`*comment*`)`
1022
+ # construct, where *comment* is a substring that is to be ignored. arbitrary
1023
+ # text ignored by the regexp engine:
623
1024
  #
624
- # ## Encoding
1025
+ # /foo(?#Ignore me)bar/.match('foobar') # => #<MatchData "foobar">
625
1026
  #
626
- # Regular expressions are assumed to use the source encoding. This can be
627
- # overridden with one of the following modifiers.
1027
+ # The comment may not include an unescaped terminator character.
628
1028
  #
629
- # * `/`*pat*`/u` - UTF-8
630
- # * `/`*pat*`/e` - EUC-JP
631
- # * `/`*pat*`/s` - Windows-31J
632
- # * `/`*pat*`/n` - ASCII-8BIT
1029
+ # See also [Extended Mode](rdoc-ref:Regexp@Extended+Mode).
633
1030
  #
1031
+ # ## Modes
634
1032
  #
635
- # A regexp can be matched against a string when they either share an encoding,
636
- # or the regexp's encoding is *US-ASCII* and the string's encoding is
637
- # ASCII-compatible.
1033
+ # Each of these modifiers sets a mode for the regexp:
1034
+ #
1035
+ # * `i`: `/*pattern*/i` sets [Case-Insensitive
1036
+ # Mode](rdoc-ref:Regexp@Case-Insensitive+Mode).
1037
+ # * `m`: `/*pattern*/m` sets [Multiline Mode](rdoc-ref:Regexp@Multiline+Mode).
1038
+ # * `x`: `/*pattern*/x` sets [Extended Mode](rdoc-ref:Regexp@Extended+Mode).
1039
+ # * `o`: `/*pattern*/o` sets [Interpolation
1040
+ # Mode](rdoc-ref:Regexp@Interpolation+Mode).
1041
+ #
1042
+ # Any, all, or none of these may be applied.
1043
+ #
1044
+ # Modifiers `i`, `m`, and `x` may be applied to subexpressions:
1045
+ #
1046
+ # * `(?*modifier*)` turns the mode "on" for ensuing subexpressions
1047
+ # * `(?-*modifier*)` turns the mode "off" for ensuing subexpressions
1048
+ # * `(?*modifier*:*subexp*)` turns the mode "on" for *subexp* within the group
1049
+ # * `(?-*modifier*:*subexp*)` turns the mode "off" for *subexp* within the
1050
+ # group
1051
+ #
1052
+ # Example:
1053
+ #
1054
+ # re = /(?i)te(?-i)st/
1055
+ # re.match('test') # => #<MatchData "test">
1056
+ # re.match('TEst') # => #<MatchData "TEst">
1057
+ # re.match('TEST') # => nil
1058
+ # re.match('teST') # => nil
1059
+ #
1060
+ # re = /t(?i:e)st/
1061
+ # re.match('test') # => #<MatchData "test">
1062
+ # re.match('tEst') # => #<MatchData "tEst">
1063
+ # re.match('tEST') # => nil
1064
+ #
1065
+ # Method Regexp#options returns an integer whose value showing the settings for
1066
+ # case-insensitivity mode, multiline mode, and extended mode.
1067
+ #
1068
+ # ### Case-Insensitive Mode
1069
+ #
1070
+ # By default, a regexp is case-sensitive:
1071
+ #
1072
+ # /foo/.match('FOO') # => nil
1073
+ #
1074
+ # Modifier `i` enables case-insensitive mode:
1075
+ #
1076
+ # /foo/i.match('FOO')
1077
+ # # => #<MatchData "FOO">
1078
+ #
1079
+ # Method Regexp#casefold? returns whether the mode is case-insensitive.
1080
+ #
1081
+ # ### Multiline Mode
1082
+ #
1083
+ # The multiline-mode in Ruby is what is commonly called a "dot-all mode":
1084
+ #
1085
+ # * Without the `m` modifier, the subexpression `.` does not match newlines:
1086
+ #
1087
+ # /a.c/.match("a\nc") # => nil
1088
+ #
1089
+ # * With the modifier, it does match:
1090
+ #
1091
+ # /a.c/m.match("a\nc") # => #<MatchData "a\nc">
1092
+ #
1093
+ # Unlike other languages, the modifier `m` does not affect the anchors `^` and
1094
+ # `$`. These anchors always match at line-boundaries in Ruby.
1095
+ #
1096
+ # ### Extended Mode
1097
+ #
1098
+ # Modifier `x` enables extended mode, which means that:
1099
+ #
1100
+ # * Literal white space in the pattern is to be ignored.
1101
+ # * Character `#` marks the remainder of its containing line as a comment,
1102
+ # which is also to be ignored for matching purposes.
1103
+ #
1104
+ # In extended mode, whitespace and comments may be used to form a
1105
+ # self-documented regexp.
1106
+ #
1107
+ # Regexp not in extended mode (matches some Roman numerals):
1108
+ #
1109
+ # pattern = '^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$'
1110
+ # re = /#{pattern}/
1111
+ # re.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III">
1112
+ #
1113
+ # Regexp in extended mode:
1114
+ #
1115
+ # pattern = <<-EOT
1116
+ # ^ # beginning of string
1117
+ # M{0,3} # thousands - 0 to 3 Ms
1118
+ # (CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs),
1119
+ # # or 500-800 (D, followed by 0 to 3 Cs)
1120
+ # (XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs),
1121
+ # # or 50-80 (L, followed by 0 to 3 Xs)
1122
+ # (IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is),
1123
+ # # or 5-8 (V, followed by 0 to 3 Is)
1124
+ # $ # end of string
1125
+ # EOT
1126
+ # re = /#{pattern}/x
1127
+ # re.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III">
1128
+ #
1129
+ # ### Interpolation Mode
1130
+ #
1131
+ # Modifier `o` means that the first time a literal regexp with interpolations is
1132
+ # encountered, the generated Regexp object is saved and used for all future
1133
+ # evaluations of that literal regexp. Without modifier `o`, the generated Regexp
1134
+ # is not saved, so each evaluation of the literal regexp generates a new Regexp
1135
+ # object.
1136
+ #
1137
+ # Without modifier `o`:
1138
+ #
1139
+ # def letters; sleep 5; /[A-Z][a-z]/; end
1140
+ # words = %w[abc def xyz]
1141
+ # start = Time.now
1142
+ # words.each {|word| word.match(/\A[#{letters}]+\z/) }
1143
+ # Time.now - start # => 15.0174892
1144
+ #
1145
+ # With modifier `o`:
1146
+ #
1147
+ # start = Time.now
1148
+ # words.each {|word| word.match(/\A[#{letters}]+\z/o) }
1149
+ # Time.now - start # => 5.0010866
1150
+ #
1151
+ # Note that if the literal regexp does not have interpolations, the `o` behavior
1152
+ # is the default.
1153
+ #
1154
+ # ## Encodings
1155
+ #
1156
+ # By default, a regexp with only US-ASCII characters has US-ASCII encoding:
1157
+ #
1158
+ # re = /foo/
1159
+ # re.source.encoding # => #<Encoding:US-ASCII>
1160
+ # re.encoding # => #<Encoding:US-ASCII>
1161
+ #
1162
+ # A regular expression containing non-US-ASCII characters is assumed to use the
1163
+ # source encoding. This can be overridden with one of the following modifiers.
1164
+ #
1165
+ # * `/*pat*/n`: US-ASCII if only containing US-ASCII characters, otherwise
1166
+ # ASCII-8BIT:
1167
+ #
1168
+ # /foo/n.encoding # => #<Encoding:US-ASCII>
1169
+ # /foo\xff/n.encoding # => #<Encoding:ASCII-8BIT>
1170
+ # /foo\x7f/n.encoding # => #<Encoding:US-ASCII>
1171
+ #
1172
+ # * `/*pat*/u`: UTF-8
1173
+ #
1174
+ # /foo/u.encoding # => #<Encoding:UTF-8>
1175
+ #
1176
+ # * `/*pat*/e`: EUC-JP
1177
+ #
1178
+ # /foo/e.encoding # => #<Encoding:EUC-JP>
1179
+ #
1180
+ # * `/*pat*/s`: Windows-31J
1181
+ #
1182
+ # /foo/s.encoding # => #<Encoding:Windows-31J>
1183
+ #
1184
+ # A regexp can be matched against a target string when either:
1185
+ #
1186
+ # * They have the same encoding.
1187
+ # * The regexp's encoding is a fixed encoding and the string contains only
1188
+ # ASCII characters. Method Regexp#fixed_encoding? returns whether the regexp
1189
+ # has a *fixed* encoding.
638
1190
  #
639
1191
  # If a match between incompatible encodings is attempted an
640
1192
  # `Encoding::CompatibilityError` exception is raised.
641
1193
  #
642
- # The `Regexp#fixed_encoding?` predicate indicates whether the regexp has a
643
- # *fixed* encoding, that is one incompatible with ASCII. A regexp's encoding can
644
- # be explicitly fixed by supplying `Regexp::FIXEDENCODING` as the second
645
- # argument of `Regexp.new`:
1194
+ # Example:
1195
+ #
1196
+ # re = eval("# encoding: ISO-8859-1\n/foo\\xff?/")
1197
+ # re.encoding # => #<Encoding:ISO-8859-1>
1198
+ # re =~ "foo".encode("UTF-8") # => 0
1199
+ # re =~ "foo\u0100" # Raises Encoding::CompatibilityError
646
1200
  #
647
- # r = Regexp.new("a".force_encoding("iso-8859-1"),Regexp::FIXEDENCODING)
648
- # r =~ "a\u3042"
649
- # # raises Encoding::CompatibilityError: incompatible encoding regexp match
650
- # # (ISO-8859-1 regexp with UTF-8 string)
1201
+ # The encoding may be explicitly fixed by including Regexp::FIXEDENCODING in the
1202
+ # second argument for Regexp.new:
651
1203
  #
652
- # ## Special global variables
1204
+ # # Regexp with encoding ISO-8859-1.
1205
+ # re = Regexp.new("a".force_encoding('iso-8859-1'), Regexp::FIXEDENCODING)
1206
+ # re.encoding # => #<Encoding:ISO-8859-1>
1207
+ # # Target string with encoding UTF-8.
1208
+ # s = "a\u3042"
1209
+ # s.encoding # => #<Encoding:UTF-8>
1210
+ # re.match(s) # Raises Encoding::CompatibilityError.
653
1211
  #
654
- # Pattern matching sets some global variables :
655
- # * `$~` is equivalent to Regexp.last_match;
656
- # * `$&` contains the complete matched text;
657
- # * `$`` contains string before match;
658
- # * `$'` contains string after match;
659
- # * `$1`, `$2` and so on contain text matching first, second, etc capture
660
- # group;
661
- # * `$+` contains last capture group.
1212
+ # ## Timeouts
662
1213
  #
1214
+ # When either a regexp source or a target string comes from untrusted input,
1215
+ # malicious values could become a denial-of-service attack; to prevent such an
1216
+ # attack, it is wise to set a timeout.
663
1217
  #
664
- # Example:
1218
+ # Regexp has two timeout values:
665
1219
  #
666
- # m = /s(\w{2}).*(c)/.match('haystack') #=> #<MatchData "stac" 1:"ta" 2:"c">
667
- # $~ #=> #<MatchData "stac" 1:"ta" 2:"c">
668
- # Regexp.last_match #=> #<MatchData "stac" 1:"ta" 2:"c">
1220
+ # * A class default timeout, used for a regexp whose instance timeout is
1221
+ # `nil`; this default is initially `nil`, and may be set by method
1222
+ # Regexp.timeout=:
669
1223
  #
670
- # $& #=> "stac"
671
- # # same as m[0]
672
- # $` #=> "hay"
673
- # # same as m.pre_match
674
- # $' #=> "k"
675
- # # same as m.post_match
676
- # $1 #=> "ta"
677
- # # same as m[1]
678
- # $2 #=> "c"
679
- # # same as m[2]
680
- # $3 #=> nil
681
- # # no third group in pattern
682
- # $+ #=> "c"
683
- # # same as m[-1]
1224
+ # Regexp.timeout # => nil
1225
+ # Regexp.timeout = 3.0
1226
+ # Regexp.timeout # => 3.0
684
1227
  #
685
- # These global variables are thread-local and method-local variables.
1228
+ # * An instance timeout, which defaults to `nil` and may be set in Regexp.new:
686
1229
  #
687
- # ## Performance
1230
+ # re = Regexp.new('foo', timeout: 5.0)
1231
+ # re.timeout # => 5.0
688
1232
  #
689
- # Certain pathological combinations of constructs can lead to abysmally bad
690
- # performance.
1233
+ # When regexp.timeout is `nil`, the timeout "falls through" to Regexp.timeout;
1234
+ # when regexp.timeout is non-`nil`, that value controls timing out:
691
1235
  #
692
- # Consider a string of 25 *a*s, a *d*, 4 *a*s, and a *c*.
1236
+ # | regexp.timeout Value | Regexp.timeout Value | Result |
1237
+ # |----------------------|----------------------|-----------------------------|
1238
+ # | nil | nil | Never times out. |
1239
+ # | nil | Float | Times out in Float seconds. |
1240
+ # | Float | Any | Times out in Float seconds. |
693
1241
  #
694
- # s = 'a' * 25 + 'd' + 'a' * 4 + 'c'
695
- # #=> "aaaaaaaaaaaaaaaaaaaaaaaaadaaaac"
1242
+ # ## Optimization
696
1243
  #
697
- # The following patterns match instantly as you would expect:
1244
+ # For certain values of the pattern and target string, matching time can grow
1245
+ # polynomially or exponentially in relation to the input size; the potential
1246
+ # vulnerability arising from this is the [regular expression
1247
+ # denial-of-service](https://en.wikipedia.org/wiki/ReDoS) (ReDoS) attack.
698
1248
  #
699
- # /(b|a)/ =~ s #=> 0
700
- # /(b|a+)/ =~ s #=> 0
701
- # /(b|a+)*/ =~ s #=> 0
1249
+ # Regexp matching can apply an optimization to prevent ReDoS attacks. When the
1250
+ # optimization is applied, matching time increases linearly (not polynomially or
1251
+ # exponentially) in relation to the input size, and a ReDoS attach is not
1252
+ # possible.
702
1253
  #
703
- # However, the following pattern takes appreciably longer:
1254
+ # This optimization is applied if the pattern meets these criteria:
704
1255
  #
705
- # /(b|a+)*c/ =~ s #=> 26
1256
+ # * No backreferences.
1257
+ # * No subexpression calls.
1258
+ # * No nested lookaround anchors or atomic groups.
1259
+ # * No nested quantifiers with counting (i.e. no nested `{n}`, `{min,}`,
1260
+ # `{,max}`, or `{min,max}` style quantifiers)
706
1261
  #
707
- # This happens because an atom in the regexp is quantified by both an immediate
708
- # `+` and an enclosing `*` with nothing to differentiate which is in control of
709
- # any particular character. The nondeterminism that results produces
710
- # super-linear performance. (Consult *Mastering Regular Expressions* (3rd ed.),
711
- # pp 222, by *Jeffery Friedl*, for an in-depth analysis). This particular case
712
- # can be fixed by use of atomic grouping, which prevents the unnecessary
713
- # backtracking:
1262
+ # You can use method Regexp.linear_time? to determine whether a pattern meets
1263
+ # these criteria:
714
1264
  #
715
- # (start = Time.now) && /(b|a+)*c/ =~ s && (Time.now - start)
716
- # #=> 24.702736882
717
- # (start = Time.now) && /(?>b|a+)*c/ =~ s && (Time.now - start)
718
- # #=> 0.000166571
1265
+ # Regexp.linear_time?(/a*/) # => true
1266
+ # Regexp.linear_time?('a*') # => true
1267
+ # Regexp.linear_time?(/(a*)\1/) # => false
719
1268
  #
720
- # A similar case is typified by the following example, which takes approximately
721
- # 60 seconds to execute for me:
1269
+ # However, an untrusted source may not be safe even if the method returns
1270
+ # `true`, because the optimization uses memoization (which may invoke large
1271
+ # memory consumption).
722
1272
  #
723
- # Match a string of 29 *a*s against a pattern of 29 optional *a*s followed by 29
724
- # mandatory *a*s:
1273
+ # ## References
725
1274
  #
726
- # Regexp.new('a?' * 29 + 'a' * 29) =~ 'a' * 29
1275
+ # Read (online PDF books):
727
1276
  #
728
- # The 29 optional *a*s match the string, but this prevents the 29 mandatory *a*s
729
- # that follow from matching. Ruby must then backtrack repeatedly so as to
730
- # satisfy as many of the optional matches as it can while still matching the
731
- # mandatory 29. It is plain to us that none of the optional matches can succeed,
732
- # but this fact unfortunately eludes Ruby.
1277
+ # * [Mastering Regular
1278
+ # Expressions](https://ia902508.us.archive.org/10/items/allitebooks-02/Maste
1279
+ # ring%20Regular%20Expressions%2C%203rd%20Edition.pdf) by Jeffrey E.F.
1280
+ # Friedl.
1281
+ # * [Regular Expressions
1282
+ # Cookbook](https://doc.lagout.org/programmation/Regular%20Expressions/Regul
1283
+ # ar%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Program
1284
+ # ming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-
1285
+ # 09-06%5D.pdf) by Jan Goyvaerts & Steven Levithan.
733
1286
  #
734
- # The best way to improve performance is to significantly reduce the amount of
735
- # backtracking needed. For this case, instead of individually matching 29
736
- # optional *a*s, a range of optional *a*s can be matched all at once with
737
- # *a{0,29}*:
1287
+ # Explore, test (interactive online editor):
738
1288
  #
739
- # Regexp.new('a{0,29}' + 'a' * 29) =~ 'a' * 29
1289
+ # * [Rubular](https://rubular.com/).
740
1290
  #
741
1291
  class Regexp
742
- # <!--
743
- # rdoc-file=re.c
744
- # - Regexp.new(string, [options]) -> regexp
745
- # - Regexp.new(regexp) -> regexp
746
- # - Regexp.compile(string, [options]) -> regexp
747
- # - Regexp.compile(regexp) -> regexp
748
- # -->
749
- # Constructs a new regular expression from `pattern`, which can be either a
750
- # String or a Regexp (in which case that regexp's options are propagated), and
751
- # new options may not be specified (a change as of Ruby 1.8).
752
- #
753
- # If `options` is an Integer, it should be one or more of the constants
754
- # Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE, *or*-ed together.
755
- # Otherwise, if `options` is not `nil` or `false`, the regexp will be case
756
- # insensitive.
757
- #
758
- # r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
759
- # r2 = Regexp.new('cat', true) #=> /cat/i
760
- # r3 = Regexp.new(r2) #=> /cat/i
761
- # r4 = Regexp.new('dog', Regexp::EXTENDED | Regexp::IGNORECASE) #=> /dog/ix
762
- #
763
- def initialize: (String string, ?Integer | nil | false | top options, ?String kcode) -> Object
764
- | (Regexp regexp) -> void
1292
+ # Represents an object's ability to be converted to a `Regexp`.
1293
+ #
1294
+ # This is only used in `Regexp.try_convert` and `Regexp.union` within the standard library.
1295
+ interface _ToRegexp
1296
+ # Converts `self` to a `Regexp`.
1297
+ def to_regexp: () -> Regexp
1298
+ end
1299
+
1300
+ class TimeoutError < RegexpError
1301
+ end
1302
+
1303
+ # <!-- rdoc-file=re.c -->
1304
+ # see Regexp.options and Regexp.new
1305
+ #
1306
+ EXTENDED: Integer
1307
+
1308
+ # <!-- rdoc-file=re.c -->
1309
+ # see Regexp.options and Regexp.new
1310
+ #
1311
+ FIXEDENCODING: Integer
1312
+
1313
+ # <!-- rdoc-file=re.c -->
1314
+ # see Regexp.options and Regexp.new
1315
+ #
1316
+ IGNORECASE: Integer
1317
+
1318
+ # <!-- rdoc-file=re.c -->
1319
+ # see Regexp.options and Regexp.new
1320
+ #
1321
+ MULTILINE: Integer
1322
+
1323
+ # <!-- rdoc-file=re.c -->
1324
+ # see Regexp.options and Regexp.new
1325
+ #
1326
+ NOENCODING: Integer
765
1327
 
766
1328
  # <!--
767
1329
  # rdoc-file=re.c
@@ -773,192 +1335,324 @@ class Regexp
773
1335
 
774
1336
  # <!--
775
1337
  # rdoc-file=re.c
776
- # - Regexp.escape(str) -> string
777
- # - Regexp.quote(str) -> string
1338
+ # - Regexp.escape(string) -> new_string
778
1339
  # -->
779
- # Escapes any characters that would have special meaning in a regular
780
- # expression. Returns a new escaped string with the same or compatible encoding.
781
- # For any string, `Regexp.new(Regexp.escape(*str*))=~*str`* will be true.
1340
+ # Returns a new string that escapes any characters that have special meaning in
1341
+ # a regular expression:
1342
+ #
1343
+ # s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\."
782
1344
  #
783
- # Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
1345
+ # For any string `s`, this call returns a MatchData object:
784
1346
  #
785
- def self.escape: (String | Symbol str) -> String
1347
+ # r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
1348
+ # r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
1349
+ #
1350
+ def self.escape: (interned str) -> String
786
1351
 
787
1352
  # <!--
788
1353
  # rdoc-file=re.c
789
- # - Regexp.last_match -> matchdata
790
- # - Regexp.last_match(n) -> str
1354
+ # - Regexp.last_match -> matchdata or nil
1355
+ # - Regexp.last_match(n) -> string or nil
1356
+ # - Regexp.last_match(name) -> string or nil
791
1357
  # -->
792
- # The first form returns the MatchData object generated by the last successful
793
- # pattern match. Equivalent to reading the special global variable `$~` (see
794
- # Special global variables in Regexp for details).
1358
+ # With no argument, returns the value of `$~`, which is the result of the most
1359
+ # recent pattern match (see [Regexp global
1360
+ # variables](rdoc-ref:Regexp@Global+Variables)):
1361
+ #
1362
+ # /c(.)t/ =~ 'cat' # => 0
1363
+ # Regexp.last_match # => #<MatchData "cat" 1:"a">
1364
+ # /a/ =~ 'foo' # => nil
1365
+ # Regexp.last_match # => nil
1366
+ #
1367
+ # With non-negative integer argument `n`, returns the _n_th field in the
1368
+ # matchdata, if any, or nil if none:
795
1369
  #
796
- # The second form returns the *n*th field in this MatchData object. *n* can be a
797
- # string or symbol to reference a named capture.
1370
+ # /c(.)t/ =~ 'cat' # => 0
1371
+ # Regexp.last_match(0) # => "cat"
1372
+ # Regexp.last_match(1) # => "a"
1373
+ # Regexp.last_match(2) # => nil
798
1374
  #
799
- # Note that the last_match is local to the thread and method scope of the method
800
- # that did the pattern match.
1375
+ # With negative integer argument `n`, counts backwards from the last field:
801
1376
  #
802
- # /c(.)t/ =~ 'cat' #=> 0
803
- # Regexp.last_match #=> #<MatchData "cat" 1:"a">
804
- # Regexp.last_match(0) #=> "cat"
805
- # Regexp.last_match(1) #=> "a"
806
- # Regexp.last_match(2) #=> nil
1377
+ # Regexp.last_match(-1) # => "a"
807
1378
  #
808
- # /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
809
- # Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
810
- # Regexp.last_match(:lhs) #=> "var"
811
- # Regexp.last_match(:rhs) #=> "val"
1379
+ # With string or symbol argument `name`, returns the string value for the named
1380
+ # capture, if any:
1381
+ #
1382
+ # /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
1383
+ # Regexp.last_match # => #<MatchData "var = val" lhs:"var"rhs:"val">
1384
+ # Regexp.last_match(:lhs) # => "var"
1385
+ # Regexp.last_match('rhs') # => "val"
1386
+ # Regexp.last_match('foo') # Raises IndexError.
812
1387
  #
813
1388
  def self.last_match: () -> MatchData?
814
- | (Integer n) -> String?
815
- | (Symbol | String n) -> String?
1389
+ | (MatchData::capture capture) -> String?
1390
+
1391
+ # <!--
1392
+ # rdoc-file=re.c
1393
+ # - Regexp.linear_time?(re)
1394
+ # - Regexp.linear_time?(string, options = 0)
1395
+ # -->
1396
+ # Returns `true` if matching against `re` can be done in linear time to the
1397
+ # input string.
1398
+ #
1399
+ # Regexp.linear_time?(/re/) # => true
1400
+ #
1401
+ # Note that this is a property of the ruby interpreter, not of the argument
1402
+ # regular expression. Identical regexp can or cannot run in linear time
1403
+ # depending on your ruby binary. Neither forward nor backward compatibility is
1404
+ # guaranteed about the return value of this method. Our current algorithm is
1405
+ # (*1) but this is subject to change in the future. Alternative implementations
1406
+ # can also behave differently. They might always return false for everything.
1407
+ #
1408
+ # (*1): https://doi.org/10.1109/SP40001.2021.00032
1409
+ #
1410
+ def self.linear_time?: (Regexp regex, ?nil, ?timeout: untyped) -> bool
1411
+ | (string regex, ?int | string | bool | nil options, ?timeout: untyped) -> bool
1412
+
1413
+ # <!--
1414
+ # rdoc-file=re.c
1415
+ # - Regexp.escape(string) -> new_string
1416
+ # -->
1417
+ # Returns a new string that escapes any characters that have special meaning in
1418
+ # a regular expression:
1419
+ #
1420
+ # s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\."
1421
+ #
1422
+ # For any string `s`, this call returns a MatchData object:
1423
+ #
1424
+ # r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
1425
+ # r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
1426
+ #
1427
+ alias self.quote self.escape
816
1428
 
817
1429
  # <!--
818
1430
  # rdoc-file=re.c
819
- # - Regexp.escape(str) -> string
820
- # - Regexp.quote(str) -> string
1431
+ # - Regexp.try_convert(object) -> regexp or nil
821
1432
  # -->
822
- # Escapes any characters that would have special meaning in a regular
823
- # expression. Returns a new escaped string with the same or compatible encoding.
824
- # For any string, `Regexp.new(Regexp.escape(*str*))=~*str`* will be true.
1433
+ # Returns `object` if it is a regexp:
1434
+ #
1435
+ # Regexp.try_convert(/re/) # => /re/
1436
+ #
1437
+ # Otherwise if `object` responds to `:to_regexp`, calls `object.to_regexp` and
1438
+ # returns the result.
1439
+ #
1440
+ # Returns `nil` if `object` does not respond to `:to_regexp`.
825
1441
  #
826
- # Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
1442
+ # Regexp.try_convert('re') # => nil
1443
+ #
1444
+ # Raises an exception unless `object.to_regexp` returns a regexp.
1445
+ #
1446
+ def self.try_convert: (Regexp | _ToRegexp regexp_like) -> Regexp
1447
+ | (untyped other) -> Regexp?
1448
+
1449
+ # <!--
1450
+ # rdoc-file=re.c
1451
+ # - Regexp.timeout -> float or nil
1452
+ # -->
1453
+ # It returns the current default timeout interval for Regexp matching in second.
1454
+ # `nil` means no default timeout configuration.
827
1455
  #
828
- def self.quote: (String | Symbol str) -> String
1456
+ def self.timeout: () -> Float?
829
1457
 
830
1458
  # <!--
831
1459
  # rdoc-file=re.c
832
- # - Regexp.try_convert(obj) -> re or nil
1460
+ # - Regexp.timeout = float or nil
833
1461
  # -->
834
- # Try to convert *obj* into a Regexp, using to_regexp method. Returns converted
835
- # regexp or nil if *obj* cannot be converted for any reason.
1462
+ # It sets the default timeout interval for Regexp matching in second. `nil`
1463
+ # means no default timeout configuration. This configuration is process-global.
1464
+ # If you want to set timeout for each Regexp, use `timeout` keyword for
1465
+ # `Regexp.new`.
1466
+ #
1467
+ # Regexp.timeout = 1
1468
+ # /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
1469
+ #
1470
+ def self.timeout=: [T < _ToF] (T timeout) -> T
1471
+
1472
+ # <!--
1473
+ # rdoc-file=re.c
1474
+ # - Regexp.union(*patterns) -> regexp
1475
+ # - Regexp.union(array_of_patterns) -> regexp
1476
+ # -->
1477
+ # Returns a new regexp that is the union of the given patterns:
1478
+ #
1479
+ # r = Regexp.union(%w[cat dog]) # => /cat|dog/
1480
+ # r.match('cat') # => #<MatchData "cat">
1481
+ # r.match('dog') # => #<MatchData "dog">
1482
+ # r.match('cog') # => nil
1483
+ #
1484
+ # For each pattern that is a string, `Regexp.new(pattern)` is used:
1485
+ #
1486
+ # Regexp.union('penzance') # => /penzance/
1487
+ # Regexp.union('a+b*c') # => /a\+b\*c/
1488
+ # Regexp.union('skiing', 'sledding') # => /skiing|sledding/
1489
+ # Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
1490
+ #
1491
+ # For each pattern that is a regexp, it is used as is, including its flags:
1492
+ #
1493
+ # Regexp.union(/foo/i, /bar/m, /baz/x)
1494
+ # # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
1495
+ # Regexp.union([/foo/i, /bar/m, /baz/x])
1496
+ # # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
836
1497
  #
837
- # Regexp.try_convert(/re/) #=> /re/
838
- # Regexp.try_convert("re") #=> nil
1498
+ # With no arguments, returns `/(?!)/`:
839
1499
  #
840
- # o = Object.new
841
- # Regexp.try_convert(o) #=> nil
842
- # def o.to_regexp() /foo/ end
843
- # Regexp.try_convert(o) #=> /foo/
1500
+ # Regexp.union # => /(?!)/
844
1501
  #
845
- def self.try_convert: (untyped obj) -> Regexp?
1502
+ # If any regexp pattern contains captures, the behavior is unspecified.
1503
+ #
1504
+ def self.union: (*Regexp | _ToRegexp | string patterns) -> Regexp
1505
+ | (array[Regexp | _ToRegexp | string] patterns) -> Regexp
1506
+ | (Symbol | [Symbol] symbol_pattern) -> Regexp
846
1507
 
847
1508
  # <!--
848
1509
  # rdoc-file=re.c
849
- # - Regexp.union(pat1, pat2, ...) -> new_regexp
850
- # - Regexp.union(pats_ary) -> new_regexp
1510
+ # - Regexp.new(string, options = 0, timeout: nil) -> regexp
1511
+ # - Regexp.new(regexp, timeout: nil) -> regexp
851
1512
  # -->
852
- # Return a Regexp object that is the union of the given *pattern*s, i.e., will
853
- # match any of its parts. The *pattern*s can be Regexp objects, in which case
854
- # their options will be preserved, or Strings. If no patterns are given, returns
855
- # `/(?!)/`. The behavior is unspecified if any given *pattern* contains
856
- # capture.
857
- #
858
- # Regexp.union #=> /(?!)/
859
- # Regexp.union("penzance") #=> /penzance/
860
- # Regexp.union("a+b*c") #=> /a\+b\*c/
861
- # Regexp.union("skiing", "sledding") #=> /skiing|sledding/
862
- # Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
863
- # Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
864
- #
865
- # Note: the arguments for ::union will try to be converted into a regular
866
- # expression literal via #to_regexp.
867
- #
868
- def self.union: () -> Regexp
869
- | (String | Regexp pat1, *String | Regexp pat2) -> Regexp
870
- | (::Array[String | Regexp]) -> Regexp
1513
+ # With argument `string` given, returns a new regexp with the given string and
1514
+ # options:
1515
+ #
1516
+ # r = Regexp.new('foo') # => /foo/
1517
+ # r.source # => "foo"
1518
+ # r.options # => 0
1519
+ #
1520
+ # Optional argument `options` is one of the following:
1521
+ #
1522
+ # * A String of options:
1523
+ #
1524
+ # Regexp.new('foo', 'i') # => /foo/i
1525
+ # Regexp.new('foo', 'im') # => /foo/im
1526
+ #
1527
+ # * The bit-wise OR of one or more of the constants Regexp::EXTENDED,
1528
+ # Regexp::IGNORECASE, Regexp::MULTILINE, and Regexp::NOENCODING:
1529
+ #
1530
+ # Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
1531
+ # Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
1532
+ # Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
1533
+ # Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
1534
+ # flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
1535
+ # Regexp.new('foo', flags) # => /foo/mix
1536
+ #
1537
+ # * `nil` or `false`, which is ignored.
1538
+ # * Any other truthy value, in which case the regexp will be case-insensitive.
1539
+ #
1540
+ # If optional keyword argument `timeout` is given, its float value overrides the
1541
+ # timeout interval for the class, Regexp.timeout. If `nil` is passed as
1542
+ # +timeout, it uses the timeout interval for the class, Regexp.timeout.
1543
+ #
1544
+ # With argument `regexp` given, returns a new regexp. The source, options,
1545
+ # timeout are the same as `regexp`. `options` and `n_flag` arguments are
1546
+ # ineffective. The timeout can be overridden by `timeout` keyword.
1547
+ #
1548
+ # options = Regexp::MULTILINE
1549
+ # r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
1550
+ # r2 = Regexp.new(r) # => /foo/m
1551
+ # r2.timeout # => 1.1
1552
+ # r3 = Regexp.new(r, timeout: 3.14) # => /foo/m
1553
+ # r3.timeout # => 3.14
1554
+ #
1555
+ def initialize: (Regexp regexp, ?timeout: _ToF?) -> void
1556
+ | (string pattern, ?int | string | bool | nil options, ?timeout: _ToF?) -> void
871
1557
 
872
- public
1558
+ def initialize_copy: (self object) -> self
873
1559
 
874
1560
  # <!-- rdoc-file=re.c -->
875
- # Equality---Two regexps are equal if their patterns are identical, they have
876
- # the same character set code, and their `casefold?` values are the same.
1561
+ # Returns `true` if `object` is another Regexp whose pattern, flags, and
1562
+ # encoding are the same as `self`, `false` otherwise:
877
1563
  #
878
- # /abc/ == /abc/x #=> false
879
- # /abc/ == /abc/i #=> false
880
- # /abc/ == /abc/u #=> false
881
- # /abc/u == /abc/n #=> false
1564
+ # /foo/ == Regexp.new('foo') # => true
1565
+ # /foo/ == /foo/i # => false
1566
+ # /foo/ == Regexp.new('food') # => false
1567
+ # /foo/ == Regexp.new("abc".force_encoding("euc-jp")) # => false
882
1568
  #
883
1569
  def ==: (untyped other) -> bool
884
1570
 
885
1571
  # <!--
886
1572
  # rdoc-file=re.c
887
- # - rxp === str -> true or false
1573
+ # - regexp === string -> true or false
888
1574
  # -->
889
- # Case Equality---Used in case statements.
1575
+ # Returns `true` if `self` finds a match in `string`:
890
1576
  #
891
- # a = "HELLO"
892
- # case a
893
- # when /\A[a-z]*\z/; print "Lower case\n"
894
- # when /\A[A-Z]*\z/; print "Upper case\n"
895
- # else; print "Mixed case\n"
896
- # end
897
- # #=> "Upper case"
1577
+ # /^[a-z]*$/ === 'HELLO' # => false
1578
+ # /^[A-Z]*$/ === 'HELLO' # => true
898
1579
  #
899
- # Following a regular expression literal with the #=== operator allows you to
900
- # compare against a String.
1580
+ # This method is called in case statements:
901
1581
  #
902
- # /^[a-z]*$/ === "HELLO" #=> false
903
- # /^[A-Z]*$/ === "HELLO" #=> true
1582
+ # s = 'HELLO'
1583
+ # case s
1584
+ # when /\A[a-z]*\z/; print "Lower case\n"
1585
+ # when /\A[A-Z]*\z/; print "Upper case\n"
1586
+ # else print "Mixed case\n"
1587
+ # end # => "Upper case"
904
1588
  #
905
1589
  def ===: (untyped other) -> bool
906
1590
 
907
1591
  # <!--
908
1592
  # rdoc-file=re.c
909
- # - rxp =~ str -> integer or nil
1593
+ # - regexp =~ string -> integer or nil
910
1594
  # -->
911
- # Match---Matches *rxp* against *str*.
1595
+ # Returns the integer index (in characters) of the first match for `self` and
1596
+ # `string`, or `nil` if none; also sets the [rdoc-ref:Regexp global
1597
+ # variables](rdoc-ref:Regexp@Global+Variables):
1598
+ #
1599
+ # /at/ =~ 'input data' # => 7
1600
+ # $~ # => #<MatchData "at">
1601
+ # /ax/ =~ 'input data' # => nil
1602
+ # $~ # => nil
912
1603
  #
913
- # /at/ =~ "input data" #=> 7
914
- # /ax/ =~ "input data" #=> nil
1604
+ # Assigns named captures to local variables of the same names if and only if
1605
+ # `self`:
915
1606
  #
916
- # If `=~` is used with a regexp literal with named captures, captured strings
917
- # (or nil) is assigned to local variables named by the capture names.
1607
+ # * Is a regexp literal; see [Regexp
1608
+ # Literals](rdoc-ref:syntax/literals.rdoc@Regexp+Literals).
1609
+ # * Does not contain interpolations; see [Regexp
1610
+ # interpolation](rdoc-ref:Regexp@Interpolation+Mode).
1611
+ # * Is at the left of the expression.
918
1612
  #
919
- # /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y "
920
- # p lhs #=> "x"
921
- # p rhs #=> "y"
1613
+ # Example:
922
1614
  #
923
- # If it is not matched, nil is assigned for the variables.
1615
+ # /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = y '
1616
+ # p lhs # => "x"
1617
+ # p rhs # => "y"
924
1618
  #
925
- # /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = "
926
- # p lhs #=> nil
927
- # p rhs #=> nil
1619
+ # Assigns `nil` if not matched:
928
1620
  #
929
- # This assignment is implemented in the Ruby parser. The parser detects
930
- # 'regexp-literal =~ expression' for the assignment. The regexp must be a
931
- # literal without interpolation and placed at left hand side.
1621
+ # /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = '
1622
+ # p lhs # => nil
1623
+ # p rhs # => nil
932
1624
  #
933
- # The assignment does not occur if the regexp is not a literal.
1625
+ # Does not make local variable assignments if `self` is not a regexp literal:
934
1626
  #
935
- # re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
936
- # re =~ " x = y "
937
- # p lhs # undefined local variable
938
- # p rhs # undefined local variable
1627
+ # r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
1628
+ # r =~ ' x = y '
1629
+ # p foo # Undefined local variable
1630
+ # p bar # Undefined local variable
939
1631
  #
940
- # A regexp interpolation, `#{}`, also disables the assignment.
1632
+ # The assignment does not occur if the regexp is not at the left:
941
1633
  #
942
- # rhs_pat = /(?<rhs>\w+)/
943
- # /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
944
- # p lhs # undefined local variable
1634
+ # ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
1635
+ # p foo, foo # Undefined local variables
945
1636
  #
946
- # The assignment does not occur if the regexp is placed at the right hand side.
1637
+ # A regexp interpolation, `#{}`, also disables the assignment:
947
1638
  #
948
- # " x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
949
- # p lhs, rhs # undefined local variable
1639
+ # r = /(?<foo>\w+)/
1640
+ # /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
1641
+ # p foo # Undefined local variable
950
1642
  #
951
- def =~: (String? str) -> Integer?
1643
+ def =~: (interned? string) -> Integer?
1644
+ | (nil) -> nil
952
1645
 
953
1646
  # <!--
954
1647
  # rdoc-file=re.c
955
- # - rxp.casefold? -> true or false
1648
+ # - casefold?-> true or false
956
1649
  # -->
957
- # Returns the value of the case-insensitive flag.
1650
+ # Returns `true` if the case-insensitivity flag in `self` is set, `false`
1651
+ # otherwise:
958
1652
  #
959
- # /a/.casefold? #=> false
960
- # /a/i.casefold? #=> true
961
- # /(?i:a)/.casefold? #=> false
1653
+ # /a/.casefold? # => false
1654
+ # /a/i.casefold? # => true
1655
+ # /(?i:a)/.casefold? # => false
962
1656
  #
963
1657
  def casefold?: () -> bool
964
1658
 
@@ -972,253 +1666,265 @@ class Regexp
972
1666
 
973
1667
  # <!--
974
1668
  # rdoc-file=re.c
975
- # - rxp == other_rxp -> true or false
976
- # - rxp.eql?(other_rxp) -> true or false
1669
+ # - regexp == object -> true or false
977
1670
  # -->
978
- # Equality---Two regexps are equal if their patterns are identical, they have
979
- # the same character set code, and their `casefold?` values are the same.
1671
+ # Returns `true` if `object` is another Regexp whose pattern, flags, and
1672
+ # encoding are the same as `self`, `false` otherwise:
980
1673
  #
981
- # /abc/ == /abc/x #=> false
982
- # /abc/ == /abc/i #=> false
983
- # /abc/ == /abc/u #=> false
984
- # /abc/u == /abc/n #=> false
1674
+ # /foo/ == Regexp.new('foo') # => true
1675
+ # /foo/ == /foo/i # => false
1676
+ # /foo/ == Regexp.new('food') # => false
1677
+ # /foo/ == Regexp.new("abc".force_encoding("euc-jp")) # => false
985
1678
  #
986
- def eql?: (untyped other) -> bool
1679
+ alias eql? ==
987
1680
 
988
1681
  # <!--
989
1682
  # rdoc-file=re.c
990
- # - rxp.fixed_encoding? -> true or false
1683
+ # - fixed_encoding? -> true or false
991
1684
  # -->
992
- # Returns false if rxp is applicable to a string with any ASCII compatible
993
- # encoding. Returns true otherwise.
994
- #
995
- # r = /a/
996
- # r.fixed_encoding? #=> false
997
- # r =~ "\u{6666} a" #=> 2
998
- # r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2
999
- # r =~ "abc".force_encoding("euc-jp") #=> 0
1000
- #
1001
- # r = /a/u
1002
- # r.fixed_encoding? #=> true
1003
- # r.encoding #=> #<Encoding:UTF-8>
1004
- # r =~ "\u{6666} a" #=> 2
1005
- # r =~ "\xa1\xa2".force_encoding("euc-jp") #=> Encoding::CompatibilityError
1006
- # r =~ "abc".force_encoding("euc-jp") #=> 0
1007
- #
1008
- # r = /\u{6666}/
1009
- # r.fixed_encoding? #=> true
1010
- # r.encoding #=> #<Encoding:UTF-8>
1011
- # r =~ "\u{6666} a" #=> 0
1012
- # r =~ "\xa1\xa2".force_encoding("euc-jp") #=> Encoding::CompatibilityError
1013
- # r =~ "abc".force_encoding("euc-jp") #=> nil
1685
+ # Returns `false` if `self` is applicable to a string with any ASCII-compatible
1686
+ # encoding; otherwise returns `true`:
1687
+ #
1688
+ # r = /a/ # => /a/
1689
+ # r.fixed_encoding? # => false
1690
+ # r.match?("\u{6666} a") # => true
1691
+ # r.match?("\xa1\xa2 a".force_encoding("euc-jp")) # => true
1692
+ # r.match?("abc".force_encoding("euc-jp")) # => true
1693
+ #
1694
+ # r = /a/u # => /a/
1695
+ # r.fixed_encoding? # => true
1696
+ # r.match?("\u{6666} a") # => true
1697
+ # r.match?("\xa1\xa2".force_encoding("euc-jp")) # Raises exception.
1698
+ # r.match?("abc".force_encoding("euc-jp")) # => true
1699
+ #
1700
+ # r = /\u{6666}/ # => /\u{6666}/
1701
+ # r.fixed_encoding? # => true
1702
+ # r.encoding # => #<Encoding:UTF-8>
1703
+ # r.match?("\u{6666} a") # => true
1704
+ # r.match?("\xa1\xa2".force_encoding("euc-jp")) # Raises exception.
1705
+ # r.match?("abc".force_encoding("euc-jp")) # => false
1014
1706
  #
1015
1707
  def fixed_encoding?: () -> bool
1016
1708
 
1017
1709
  # <!--
1018
1710
  # rdoc-file=re.c
1019
- # - rxp.hash -> integer
1711
+ # - hash -> integer
1020
1712
  # -->
1021
- # Produce a hash based on the text and options of this regular expression.
1713
+ # Returns the integer hash value for `self`.
1022
1714
  #
1023
- # See also Object#hash.
1715
+ # Related: Object#hash.
1024
1716
  #
1025
1717
  def hash: () -> Integer
1026
1718
 
1027
1719
  # <!--
1028
1720
  # rdoc-file=re.c
1029
- # - rxp.inspect -> string
1721
+ # - inspect -> string
1030
1722
  # -->
1031
- # Produce a nicely formatted string-version of *rxp*. Perhaps surprisingly,
1032
- # `#inspect` actually produces the more natural version of the string than
1033
- # `#to_s`.
1723
+ # Returns a nicely-formatted string representation of `self`:
1034
1724
  #
1035
- # /ab+c/ix.inspect #=> "/ab+c/ix"
1725
+ # /ab+c/ix.inspect # => "/ab+c/ix"
1726
+ #
1727
+ # Related: Regexp#to_s.
1036
1728
  #
1037
1729
  def inspect: () -> String
1038
1730
 
1039
1731
  # <!--
1040
1732
  # rdoc-file=re.c
1041
- # - rxp.match(str, pos=0) -> matchdata or nil
1042
- # - rxp.match(str, pos=0) {|match| block } -> obj
1733
+ # - match(string, offset = 0) -> matchdata or nil
1734
+ # - match(string, offset = 0) {|matchdata| ... } -> object
1043
1735
  # -->
1044
- # Returns a MatchData object describing the match, or `nil` if there was no
1045
- # match. This is equivalent to retrieving the value of the special variable `$~`
1046
- # following a normal match. If the second parameter is present, it specifies
1047
- # the position in the string to begin the search.
1048
- #
1049
- # /(.)(.)(.)/.match("abc")[2] #=> "b"
1050
- # /(.)(.)/.match("abc", 1)[2] #=> "c"
1051
- #
1052
- # If a block is given, invoke the block with MatchData if match succeed, so that
1053
- # you can write
1054
- #
1055
- # /M(.*)/.match("Matz") do |m|
1056
- # puts m[0]
1057
- # puts m[1]
1058
- # end
1059
- #
1060
- # instead of
1061
- #
1062
- # if m = /M(.*)/.match("Matz")
1063
- # puts m[0]
1064
- # puts m[1]
1065
- # end
1066
- #
1067
- # The return value is a value from block execution in this case.
1068
- #
1069
- def match: (String? | Symbol | _ToStr str, ?Integer pos) -> MatchData?
1070
- | [T] (String? | Symbol | _ToStr str, ?Integer pos) { (MatchData) -> T } -> T?
1736
+ # With no block given, returns the MatchData object that describes the match, if
1737
+ # any, or `nil` if none; the search begins at the given character `offset` in
1738
+ # `string`:
1739
+ #
1740
+ # /abra/.match('abracadabra') # => #<MatchData "abra">
1741
+ # /abra/.match('abracadabra', 4) # => #<MatchData "abra">
1742
+ # /abra/.match('abracadabra', 8) # => nil
1743
+ # /abra/.match('abracadabra', 800) # => nil
1744
+ #
1745
+ # string = "\u{5d0 5d1 5e8 5d0}cadabra"
1746
+ # /abra/.match(string, 7) #=> #<MatchData "abra">
1747
+ # /abra/.match(string, 8) #=> nil
1748
+ # /abra/.match(string.b, 8) #=> #<MatchData "abra">
1749
+ #
1750
+ # With a block given, calls the block if and only if a match is found; returns
1751
+ # the block's value:
1752
+ #
1753
+ # /abra/.match('abracadabra') {|matchdata| p matchdata }
1754
+ # # => #<MatchData "abra">
1755
+ # /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
1756
+ # # => #<MatchData "abra">
1757
+ # /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
1758
+ # # => nil
1759
+ # /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
1760
+ # # => nil
1761
+ #
1762
+ # Output (from the first two blocks above):
1763
+ #
1764
+ # #<MatchData "abra">
1765
+ # #<MatchData "abra">
1766
+ #
1767
+ # /(.)(.)(.)/.match("abc")[2] # => "b"
1768
+ # /(.)(.)/.match("abc", 1)[2] # => "c"
1769
+ #
1770
+ def match: (interned? str, ?int offset) -> MatchData?
1771
+ | [T] (interned? str, ?int offset) { (MatchData matchdata) -> T } -> T?
1772
+ | (nil, ?int offset) ?{ (MatchData matchdata) -> void } -> nil
1071
1773
 
1072
1774
  # <!--
1073
1775
  # rdoc-file=re.c
1074
- # - rxp.match?(str) -> true or false
1075
- # - rxp.match?(str, pos=0) -> true or false
1776
+ # - match?(string) -> true or false
1777
+ # - match?(string, offset = 0) -> true or false
1076
1778
  # -->
1077
1779
  # Returns `true` or `false` to indicate whether the regexp is matched or not
1078
1780
  # without updating $~ and other related variables. If the second parameter is
1079
1781
  # present, it specifies the position in the string to begin the search.
1080
1782
  #
1081
- # /R.../.match?("Ruby") #=> true
1082
- # /R.../.match?("Ruby", 1) #=> false
1083
- # /P.../.match?("Ruby") #=> false
1084
- # $& #=> nil
1783
+ # /R.../.match?("Ruby") # => true
1784
+ # /R.../.match?("Ruby", 1) # => false
1785
+ # /P.../.match?("Ruby") # => false
1786
+ # $& # => nil
1085
1787
  #
1086
- def match?: (String? | Symbol | _ToStr str, ?Integer pos) -> bool
1788
+ def match?: (interned str, ?int offset) -> bool
1789
+ | (nil, ?int offset) -> false
1087
1790
 
1088
1791
  # <!--
1089
1792
  # rdoc-file=re.c
1090
- # - rxp.named_captures -> hash
1793
+ # - named_captures -> hash
1091
1794
  # -->
1092
- # Returns a hash representing information about named captures of *rxp*.
1795
+ # Returns a hash representing named captures of `self` (see [Named
1796
+ # Captures](rdoc-ref:Regexp@Named+Captures)):
1093
1797
  #
1094
- # A key of the hash is a name of the named captures. A value of the hash is an
1095
- # array which is list of indexes of corresponding named captures.
1798
+ # * Each key is the name of a named capture.
1799
+ # * Each value is an array of integer indexes for that named capture.
1096
1800
  #
1097
- # /(?<foo>.)(?<bar>.)/.named_captures
1098
- # #=> {"foo"=>[1], "bar"=>[2]}
1801
+ # Examples:
1099
1802
  #
1100
- # /(?<foo>.)(?<foo>.)/.named_captures
1101
- # #=> {"foo"=>[1, 2]}
1803
+ # /(?<foo>.)(?<bar>.)/.named_captures # => {"foo"=>[1], "bar"=>[2]}
1804
+ # /(?<foo>.)(?<foo>.)/.named_captures # => {"foo"=>[1, 2]}
1805
+ # /(.)(.)/.named_captures # => {}
1102
1806
  #
1103
- # If there are no named captures, an empty hash is returned.
1807
+ def named_captures: () -> Hash[String, Array[Integer]]
1808
+
1809
+ # <!--
1810
+ # rdoc-file=re.c
1811
+ # - names -> array_of_names
1812
+ # -->
1813
+ # Returns an array of names of captures (see [Named
1814
+ # Captures](rdoc-ref:Regexp@Named+Captures)):
1104
1815
  #
1105
- # /(.)(.)/.named_captures
1106
- # #=> {}
1816
+ # /(?<foo>.)(?<bar>.)(?<baz>.)/.names # => ["foo", "bar", "baz"]
1817
+ # /(?<foo>.)(?<foo>.)/.names # => ["foo"]
1818
+ # /(.)(.)/.names # => []
1107
1819
  #
1108
- def named_captures: () -> ::Hash[String, ::Array[Integer]]
1820
+ def names: () -> Array[String]
1109
1821
 
1110
1822
  # <!--
1111
1823
  # rdoc-file=re.c
1112
- # - rxp.names -> [name1, name2, ...]
1824
+ # - options -> integer
1113
1825
  # -->
1114
- # Returns a list of names of captures as an array of strings.
1826
+ # Returns an integer whose bits show the options set in `self`.
1827
+ #
1828
+ # The option bits are:
1829
+ #
1830
+ # Regexp::IGNORECASE # => 1
1831
+ # Regexp::EXTENDED # => 2
1832
+ # Regexp::MULTILINE # => 4
1115
1833
  #
1116
- # /(?<foo>.)(?<bar>.)(?<baz>.)/.names
1117
- # #=> ["foo", "bar", "baz"]
1834
+ # Examples:
1118
1835
  #
1119
- # /(?<foo>.)(?<foo>.)/.names
1120
- # #=> ["foo"]
1836
+ # /foo/.options # => 0
1837
+ # /foo/i.options # => 1
1838
+ # /foo/x.options # => 2
1839
+ # /foo/m.options # => 4
1840
+ # /foo/mix.options # => 7
1121
1841
  #
1122
- # /(.)(.)/.names
1123
- # #=> []
1842
+ # Note that additional bits may be set in the returned integer; these are
1843
+ # maintained internally in `self`, are ignored if passed to Regexp.new, and may
1844
+ # be ignored by the caller:
1845
+ #
1846
+ # Returns the set of bits corresponding to the options used when creating this
1847
+ # regexp (see Regexp::new for details). Note that additional bits may be set in
1848
+ # the returned options: these are used internally by the regular expression
1849
+ # code. These extra bits are ignored if the options are passed to Regexp::new:
1124
1850
  #
1125
- def names: () -> ::Array[String]
1851
+ # r = /\xa1\xa2/e # => /\xa1\xa2/
1852
+ # r.source # => "\\xa1\\xa2"
1853
+ # r.options # => 16
1854
+ # Regexp.new(r.source, r.options) # => /\xa1\xa2/
1855
+ #
1856
+ def options: () -> Integer
1126
1857
 
1127
1858
  # <!--
1128
1859
  # rdoc-file=re.c
1129
- # - rxp.options -> integer
1860
+ # - source -> string
1130
1861
  # -->
1131
- # Returns the set of bits corresponding to the options used when creating this
1132
- # Regexp (see Regexp::new for details. Note that additional bits may be set in
1133
- # the returned options: these are used internally by the regular expression
1134
- # code. These extra bits are ignored if the options are passed to Regexp::new.
1862
+ # Returns the original string of `self`:
1135
1863
  #
1136
- # Regexp::IGNORECASE #=> 1
1137
- # Regexp::EXTENDED #=> 2
1138
- # Regexp::MULTILINE #=> 4
1864
+ # /ab+c/ix.source # => "ab+c"
1139
1865
  #
1140
- # /cat/.options #=> 0
1141
- # /cat/ix.options #=> 3
1142
- # Regexp.new('cat', true).options #=> 1
1143
- # /\xa1\xa2/e.options #=> 16
1866
+ # Regexp escape sequences are retained:
1144
1867
  #
1145
- # r = /cat/ix
1146
- # Regexp.new(r.source, r.options) #=> /cat/ix
1868
+ # /\x20\+/.source # => "\\x20\\+"
1147
1869
  #
1148
- def options: () -> Integer
1870
+ # Lexer escape characters are not retained:
1871
+ #
1872
+ # /\//.source # => "/"
1873
+ #
1874
+ def source: () -> String
1149
1875
 
1150
1876
  # <!--
1151
1877
  # rdoc-file=re.c
1152
- # - rxp.source -> str
1878
+ # - to_s -> string
1153
1879
  # -->
1154
- # Returns the original string of the pattern.
1880
+ # Returns a string showing the options and string of `self`:
1155
1881
  #
1156
- # /ab+c/ix.source #=> "ab+c"
1882
+ # r0 = /ab+c/ix
1883
+ # s0 = r0.to_s # => "(?ix-m:ab+c)"
1157
1884
  #
1158
- # Note that escape sequences are retained as is.
1885
+ # The returned string may be used as an argument to Regexp.new, or as
1886
+ # interpolated text for a [Regexp
1887
+ # interpolation](rdoc-ref:Regexp@Interpolation+Mode):
1159
1888
  #
1160
- # /\x20\+/.source #=> "\\x20\\+"
1889
+ # r1 = Regexp.new(s0) # => /(?ix-m:ab+c)/
1890
+ # r2 = /#{s0}/ # => /(?ix-m:ab+c)/
1161
1891
  #
1162
- def source: () -> String
1892
+ # Note that `r1` and `r2` are not equal to `r0` because their original strings
1893
+ # are different:
1894
+ #
1895
+ # r0 == r1 # => false
1896
+ # r0.source # => "ab+c"
1897
+ # r1.source # => "(?ix-m:ab+c)"
1898
+ #
1899
+ # Related: Regexp#inspect.
1900
+ #
1901
+ def to_s: () -> String
1163
1902
 
1164
1903
  # <!--
1165
1904
  # rdoc-file=re.c
1166
- # - rxp.to_s -> str
1905
+ # - rxp.timeout -> float or nil
1167
1906
  # -->
1168
- # Returns a string containing the regular expression and its options (using the
1169
- # `(?opts:source)` notation. This string can be fed back in to Regexp::new to a
1170
- # regular expression with the same semantics as the original. (However,
1171
- # `Regexp#==` may not return true when comparing the two, as the source of the
1172
- # regular expression itself may differ, as the example shows). Regexp#inspect
1173
- # produces a generally more readable version of *rxp*.
1174
- #
1175
- # r1 = /ab+c/ix #=> /ab+c/ix
1176
- # s1 = r1.to_s #=> "(?ix-m:ab+c)"
1177
- # r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
1178
- # r1 == r2 #=> false
1179
- # r1.source #=> "ab+c"
1180
- # r2.source #=> "(?ix-m:ab+c)"
1907
+ # It returns the timeout interval for Regexp matching in second. `nil` means no
1908
+ # default timeout configuration.
1181
1909
  #
1182
- def to_s: () -> String
1910
+ # This configuration is per-object. The global configuration set by
1911
+ # Regexp.timeout= is ignored if per-object configuration is set.
1912
+ #
1913
+ # re = Regexp.new("^a*b?a*$", timeout: 1)
1914
+ # re.timeout #=> 1.0
1915
+ # re =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
1916
+ #
1917
+ %a{pure}
1918
+ def timeout: () -> Float?
1183
1919
 
1184
1920
  # <!--
1185
1921
  # rdoc-file=re.c
1186
- # - ~ rxp -> integer or nil
1922
+ # - ~ rxp -> integer or nil
1187
1923
  # -->
1188
- # Match---Matches *rxp* against the contents of `$_`. Equivalent to *`rxp* =~
1189
- # $_`.
1924
+ # Equivalent to *`rxp* =~ $_`:
1190
1925
  #
1191
1926
  # $_ = "input data"
1192
- # ~ /at/ #=> 7
1927
+ # ~ /at/ # => 7
1193
1928
  #
1194
1929
  def ~: () -> Integer?
1195
-
1196
- private
1197
-
1198
- def initialize_copy: (self object) -> self
1199
1930
  end
1200
-
1201
- # <!-- rdoc-file=re.c -->
1202
- # see Regexp.options and Regexp.new
1203
- #
1204
- Regexp::EXTENDED: Integer
1205
-
1206
- # <!-- rdoc-file=re.c -->
1207
- # see Regexp.options and Regexp.new
1208
- #
1209
- Regexp::FIXEDENCODING: Integer
1210
-
1211
- # <!-- rdoc-file=re.c -->
1212
- # see Regexp.options and Regexp.new
1213
- #
1214
- Regexp::IGNORECASE: Integer
1215
-
1216
- # <!-- rdoc-file=re.c -->
1217
- # see Regexp.options and Regexp.new
1218
- #
1219
- Regexp::MULTILINE: Integer
1220
-
1221
- # <!-- rdoc-file=re.c -->
1222
- # see Regexp.options and Regexp.new
1223
- #
1224
- Regexp::NOENCODING: Integer