rbs 2.8.4 → 3.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +12 -4
- data/.github/workflows/comments.yml +11 -11
- data/.github/workflows/dependabot.yml +30 -0
- data/.github/workflows/ruby.yml +40 -49
- data/.github/workflows/typecheck.yml +36 -0
- data/.github/workflows/windows.yml +28 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +42 -2
- data/CHANGELOG.md +845 -1
- data/README.md +64 -4
- data/Rakefile +198 -18
- data/Steepfile +11 -11
- data/config.yml +311 -0
- data/core/array.rbs +2189 -1914
- data/core/basic_object.rbs +59 -84
- data/core/binding.rbs +7 -69
- data/core/builtin.rbs +210 -11
- data/core/class.rbs +37 -0
- data/core/comparable.rbs +23 -25
- data/core/complex.rbs +449 -227
- data/core/constants.rbs +29 -21
- data/core/data.rbs +415 -0
- data/core/dir.rbs +698 -415
- data/core/encoding.rbs +468 -843
- data/core/enumerable.rbs +495 -455
- data/core/enumerator/product.rbs +92 -0
- data/core/enumerator.rbs +106 -9
- data/core/env.rbs +1 -1
- data/core/errno.rbs +506 -605
- data/core/errors.rbs +15 -17
- data/core/exception.rbs +361 -145
- data/core/false_class.rbs +39 -26
- data/core/fiber.rbs +121 -14
- data/core/file.rbs +1262 -320
- data/core/file_test.rbs +62 -45
- data/core/float.rbs +187 -208
- data/core/gc.rbs +446 -196
- data/core/global_variables.rbs +29 -29
- data/core/hash.rbs +242 -349
- data/core/integer.rbs +246 -308
- data/core/io/buffer.rbs +373 -122
- data/core/io/wait.rbs +29 -17
- data/core/io.rbs +1881 -1518
- data/core/kernel.rbs +2116 -1538
- data/core/marshal.rbs +24 -14
- data/core/match_data.rbs +413 -166
- data/core/math.rbs +531 -291
- data/core/method.rbs +101 -32
- data/core/module.rbs +228 -64
- data/core/nil_class.rbs +106 -47
- data/core/numeric.rbs +206 -292
- data/core/object.rbs +73 -1168
- data/core/object_space/weak_key_map.rbs +166 -0
- data/core/object_space.rbs +5 -3
- data/core/proc.rbs +280 -39
- data/core/process.rbs +1318 -658
- data/core/ractor.rbs +200 -134
- data/core/random.rbs +21 -4
- data/core/range.rbs +309 -153
- data/core/rational.rbs +4 -12
- data/core/rb_config.rbs +64 -43
- data/core/rbs/unnamed/argf.rbs +411 -147
- data/core/rbs/unnamed/env_class.rbs +137 -253
- data/core/rbs/unnamed/random.rbs +49 -26
- data/core/refinement.rbs +16 -1
- data/core/regexp.rbs +1568 -862
- data/core/ruby_vm.rbs +719 -7
- data/core/rubygems/config_file.rbs +3 -0
- data/core/rubygems/errors.rbs +69 -6
- data/core/rubygems/rubygems.rbs +71 -17
- data/core/rubygems/version.rbs +11 -7
- data/{stdlib/set/0 → core}/set.rbs +80 -91
- data/core/signal.rbs +14 -8
- data/core/string.rbs +1732 -1607
- data/core/struct.rbs +467 -95
- data/core/symbol.rbs +215 -245
- data/core/thread.rbs +133 -89
- data/core/thread_group.rbs +9 -9
- data/core/time.rbs +1141 -841
- data/core/trace_point.rbs +181 -121
- data/core/true_class.rbs +58 -32
- data/core/unbound_method.rbs +103 -30
- data/core/warning.rbs +50 -5
- data/docs/CONTRIBUTING.md +1 -1
- data/docs/architecture.md +110 -0
- data/docs/collection.md +59 -5
- data/docs/data_and_struct.md +86 -0
- data/docs/gem.md +57 -0
- data/docs/rbs_by_example.md +16 -35
- data/docs/repo.md +1 -1
- data/docs/sigs.md +7 -7
- data/docs/stdlib.md +63 -5
- data/docs/syntax.md +255 -61
- data/docs/tools.md +1 -0
- data/ext/rbs_extension/extconf.rb +10 -0
- data/ext/rbs_extension/lexer.c +1741 -1548
- data/ext/rbs_extension/lexer.h +11 -1
- data/ext/rbs_extension/lexer.re +12 -6
- data/ext/rbs_extension/lexstate.c +26 -3
- data/ext/rbs_extension/location.c +119 -111
- data/ext/rbs_extension/location.h +32 -7
- data/ext/rbs_extension/main.c +3 -0
- data/ext/rbs_extension/parser.c +883 -481
- data/ext/rbs_extension/parserstate.c +65 -25
- data/ext/rbs_extension/parserstate.h +13 -3
- data/ext/rbs_extension/rbs_extension.h +1 -10
- data/ext/rbs_extension/unescape.c +7 -47
- data/goodcheck.yml +2 -2
- data/{ext/rbs_extension → include/rbs}/constants.h +26 -15
- data/include/rbs/ruby_objs.h +72 -0
- data/include/rbs.h +7 -0
- data/lib/rbs/annotate/annotations.rb +3 -3
- data/lib/rbs/annotate/formatter.rb +13 -3
- data/lib/rbs/annotate/rdoc_annotator.rb +1 -1
- data/lib/rbs/annotate/rdoc_source.rb +12 -3
- data/lib/rbs/ast/declarations.rb +85 -2
- data/lib/rbs/ast/directives.rb +39 -0
- data/lib/rbs/ast/members.rb +49 -15
- data/lib/rbs/ast/type_param.rb +104 -15
- data/lib/rbs/ast/visitor.rb +137 -0
- data/lib/rbs/buffer.rb +5 -0
- data/lib/rbs/cli/colored_io.rb +48 -0
- data/lib/rbs/cli/diff.rb +83 -0
- data/lib/rbs/cli/validate.rb +356 -0
- data/lib/rbs/cli.rb +253 -143
- data/lib/rbs/collection/cleaner.rb +8 -1
- data/lib/rbs/collection/config/lockfile.rb +92 -0
- data/lib/rbs/collection/config/lockfile_generator.rb +154 -65
- data/lib/rbs/collection/config.rb +19 -46
- data/lib/rbs/collection/installer.rb +12 -13
- data/lib/rbs/collection/sources/base.rb +2 -2
- data/lib/rbs/collection/sources/git.rb +146 -69
- data/lib/rbs/collection/sources/local.rb +81 -0
- data/lib/rbs/collection/sources/rubygems.rb +10 -12
- data/lib/rbs/collection/sources/stdlib.rb +14 -13
- data/lib/rbs/collection/sources.rb +15 -2
- data/lib/rbs/collection.rb +2 -1
- data/lib/rbs/definition.rb +13 -16
- data/lib/rbs/definition_builder/ancestor_builder.rb +100 -24
- data/lib/rbs/definition_builder/method_builder.rb +4 -4
- data/lib/rbs/definition_builder.rb +489 -584
- data/lib/rbs/diff.rb +125 -0
- data/lib/rbs/environment/use_map.rb +77 -0
- data/lib/rbs/environment.rb +406 -105
- data/lib/rbs/environment_loader.rb +48 -44
- data/lib/rbs/environment_walker.rb +1 -1
- data/lib/rbs/errors.rb +175 -56
- data/lib/rbs/file_finder.rb +28 -0
- data/lib/rbs/location_aux.rb +8 -7
- data/lib/rbs/locator.rb +37 -15
- data/lib/rbs/method_type.rb +23 -0
- data/lib/rbs/namespace.rb +1 -0
- data/lib/rbs/parser/lex_result.rb +15 -0
- data/lib/rbs/parser/token.rb +23 -0
- data/lib/rbs/parser_aux.rb +22 -13
- data/lib/rbs/prototype/helpers.rb +48 -22
- data/lib/rbs/prototype/node_usage.rb +99 -0
- data/lib/rbs/prototype/rb.rb +125 -31
- data/lib/rbs/prototype/rbi.rb +49 -36
- data/lib/rbs/prototype/runtime/helpers.rb +59 -0
- data/lib/rbs/prototype/runtime/reflection.rb +19 -0
- data/lib/rbs/prototype/runtime/value_object_generator.rb +279 -0
- data/lib/rbs/prototype/runtime.rb +273 -159
- data/lib/rbs/resolver/constant_resolver.rb +24 -8
- data/lib/rbs/resolver/type_name_resolver.rb +41 -7
- data/lib/rbs/sorter.rb +153 -123
- data/lib/rbs/substitution.rb +19 -0
- data/lib/rbs/subtractor.rb +201 -0
- data/lib/rbs/test/errors.rb +24 -11
- data/lib/rbs/test/guaranteed.rb +30 -0
- data/lib/rbs/test/hook.rb +45 -40
- data/lib/rbs/test/setup.rb +1 -1
- data/lib/rbs/test/tester.rb +1 -1
- data/lib/rbs/test/type_check.rb +120 -23
- data/lib/rbs/test.rb +6 -3
- data/lib/rbs/type_alias_dependency.rb +13 -3
- data/lib/rbs/type_alias_regularity.rb +21 -14
- data/lib/rbs/type_name.rb +18 -13
- data/lib/rbs/types.rb +352 -18
- data/lib/rbs/unit_test/convertibles.rb +176 -0
- data/lib/rbs/unit_test/spy.rb +136 -0
- data/lib/rbs/unit_test/type_assertions.rb +341 -0
- data/lib/rbs/unit_test/with_aliases.rb +143 -0
- data/lib/rbs/unit_test.rb +6 -0
- data/lib/rbs/validator.rb +55 -30
- data/lib/rbs/variance_calculator.rb +26 -23
- data/lib/rbs/vendorer.rb +3 -3
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs/writer.rb +69 -22
- data/lib/rbs.rb +7 -2
- data/lib/rdoc/discover.rb +1 -1
- data/lib/rdoc_plugin/parser.rb +5 -5
- data/rbs.gemspec +12 -2
- data/schema/decls.json +1 -1
- data/schema/members.json +15 -10
- data/sig/ancestor_builder.rbs +4 -0
- data/sig/ancestor_graph.rbs +22 -2
- data/sig/annotate/formatter.rbs +2 -2
- data/sig/annotate/rdoc_annotater.rbs +1 -1
- data/sig/cli/colored_io.rbs +15 -0
- data/sig/cli/diff.rbs +21 -0
- data/sig/cli/validate.rbs +43 -0
- data/sig/cli.rbs +4 -0
- data/sig/collection/config/lockfile.rbs +74 -0
- data/sig/collection/config/lockfile_generator.rbs +66 -0
- data/sig/collection/config.rbs +5 -48
- data/sig/collection/installer.rbs +1 -1
- data/sig/collection/sources.rbs +105 -33
- data/sig/constant.rbs +1 -1
- data/sig/declarations.rbs +42 -3
- data/sig/definition.rbs +26 -10
- data/sig/definition_builder.rbs +103 -81
- data/sig/diff.rbs +28 -0
- data/sig/directives.rbs +61 -0
- data/sig/environment.rbs +175 -29
- data/sig/environment_loader.rbs +20 -18
- data/sig/errors.rbs +123 -2
- data/sig/file_finder.rbs +28 -0
- data/sig/location.rbs +0 -3
- data/sig/locator.rbs +14 -2
- data/sig/manifest.yaml +0 -1
- data/sig/members.rbs +32 -9
- data/sig/method_types.rbs +10 -4
- data/sig/namespace.rbs +2 -3
- data/sig/parser.rbs +55 -16
- data/sig/prototype/helpers.rbs +4 -0
- data/sig/prototype/node_usage.rbs +20 -0
- data/sig/prototype/rb.rbs +10 -2
- data/sig/prototype/rbi.rbs +2 -0
- data/sig/prototype/runtime.rbs +182 -0
- data/sig/rbs.rbs +1 -1
- data/sig/rdoc/rbs.rbs +4 -0
- data/sig/repository.rbs +7 -5
- data/sig/resolver/constant_resolver.rbs +3 -4
- data/sig/resolver/context.rbs +1 -1
- data/sig/resolver/type_name_resolver.rbs +5 -1
- data/sig/shims/bundler.rbs +38 -0
- data/sig/shims/rubygems.rbs +19 -0
- data/sig/sorter.rbs +23 -5
- data/sig/substitution.rbs +6 -0
- data/sig/subtractor.rbs +37 -0
- data/sig/test/errors.rbs +52 -0
- data/sig/test/guranteed.rbs +9 -0
- data/sig/test/type_check.rbs +19 -0
- data/sig/test.rbs +82 -0
- data/sig/type_alias_dependency.rbs +31 -0
- data/sig/type_alias_regularity.rbs +12 -6
- data/sig/type_param.rbs +45 -9
- data/sig/typename.rbs +8 -5
- data/sig/types.rbs +119 -12
- data/sig/unit_test/convertibles.rbs +154 -0
- data/sig/unit_test/spy.rbs +28 -0
- data/sig/unit_test/type_assertions.rbs +194 -0
- data/sig/unit_test/with_aliases.rbs +136 -0
- data/sig/use_map.rbs +35 -0
- data/sig/validator.rbs +12 -5
- data/sig/variance_calculator.rbs +3 -1
- data/sig/vendorer.rbs +1 -1
- data/sig/visitor.rbs +47 -0
- data/sig/writer.rbs +6 -2
- data/src/constants.c +153 -0
- data/src/ruby_objs.c +793 -0
- data/stdlib/base64/0/base64.rbs +298 -45
- data/stdlib/benchmark/0/benchmark.rbs +12 -3
- data/stdlib/bigdecimal/0/big_decimal.rbs +62 -198
- data/stdlib/cgi/0/core.rbs +68 -15
- data/stdlib/cgi/0/manifest.yaml +1 -0
- data/stdlib/coverage/0/coverage.rbs +50 -11
- data/stdlib/csv/0/csv.rbs +90 -119
- data/stdlib/csv/0/manifest.yaml +1 -0
- data/stdlib/date/0/date.rbs +806 -735
- data/stdlib/date/0/date_time.rbs +70 -211
- data/stdlib/dbm/0/dbm.rbs +0 -2
- data/stdlib/delegate/0/delegator.rbs +184 -0
- data/stdlib/delegate/0/kernel.rbs +47 -0
- data/stdlib/delegate/0/simple_delegator.rbs +96 -0
- data/stdlib/did_you_mean/0/did_you_mean.rbs +3 -8
- data/stdlib/digest/0/digest.rbs +48 -35
- data/stdlib/erb/0/erb.rbs +15 -39
- data/stdlib/etc/0/etc.rbs +174 -54
- data/stdlib/fileutils/0/fileutils.rbs +1234 -385
- data/stdlib/forwardable/0/forwardable.rbs +4 -4
- data/stdlib/io-console/0/io-console.rbs +82 -17
- data/stdlib/ipaddr/0/ipaddr.rbs +11 -6
- data/stdlib/json/0/json.rbs +434 -151
- data/stdlib/kconv/0/kconv.rbs +166 -0
- data/stdlib/logger/0/formatter.rbs +0 -2
- data/stdlib/logger/0/log_device.rbs +1 -3
- data/stdlib/logger/0/logger.rbs +465 -328
- data/stdlib/minitest/0/kernel.rbs +2 -2
- data/stdlib/minitest/0/minitest/abstract_reporter.rbs +4 -1
- data/stdlib/minitest/0/minitest/assertion.rbs +1 -0
- data/stdlib/minitest/0/minitest/assertions.rbs +58 -13
- data/stdlib/minitest/0/minitest/backtrace_filter.rbs +7 -0
- data/stdlib/minitest/0/minitest/bench_spec.rbs +8 -8
- data/stdlib/minitest/0/minitest/benchmark.rbs +17 -16
- data/stdlib/minitest/0/minitest/compress.rbs +13 -0
- data/stdlib/minitest/0/minitest/error_on_warning.rbs +3 -0
- data/stdlib/minitest/0/minitest/mock.rbs +9 -5
- data/stdlib/minitest/0/minitest/parallel/executor.rbs +4 -0
- data/stdlib/minitest/0/minitest/parallel/test/class_methods.rbs +0 -1
- data/stdlib/minitest/0/minitest/pride_io.rbs +8 -0
- data/stdlib/minitest/0/minitest/pride_lol.rbs +2 -0
- data/stdlib/minitest/0/minitest/progress_reporter.rbs +1 -1
- data/stdlib/minitest/0/minitest/reportable.rbs +2 -0
- data/stdlib/minitest/0/minitest/runnable.rbs +33 -1
- data/stdlib/minitest/0/minitest/spec/dsl/instance_methods.rbs +1 -1
- data/stdlib/minitest/0/minitest/spec/dsl.rbs +10 -6
- data/stdlib/minitest/0/minitest/spec.rbs +1 -1
- data/stdlib/minitest/0/minitest/statistics_reporter.rbs +5 -0
- data/stdlib/minitest/0/minitest/summary_reporter.rbs +0 -7
- data/stdlib/minitest/0/minitest/test/lifecycle_hooks.rbs +7 -7
- data/stdlib/minitest/0/minitest/test.rbs +7 -14
- data/stdlib/minitest/0/minitest/unexpected_error.rbs +2 -0
- data/stdlib/minitest/0/minitest/unexpected_warning.rbs +6 -0
- data/stdlib/minitest/0/minitest/unit.rbs +1 -2
- data/stdlib/minitest/0/minitest.rbs +41 -892
- data/stdlib/monitor/0/monitor.rbs +91 -10
- data/stdlib/mutex_m/0/mutex_m.rbs +0 -2
- data/stdlib/net-http/0/manifest.yaml +1 -1
- data/stdlib/net-http/0/net-http.rbs +3858 -964
- data/stdlib/net-protocol/0/manifest.yaml +2 -0
- data/stdlib/net-protocol/0/net-protocol.rbs +56 -0
- data/stdlib/net-smtp/0/manifest.yaml +2 -0
- data/stdlib/net-smtp/0/net-smtp.rbs +55 -0
- data/stdlib/nkf/0/nkf.rbs +35 -5
- data/stdlib/objspace/0/objspace.rbs +40 -18
- data/stdlib/observable/0/observable.rbs +217 -0
- data/stdlib/open-uri/0/manifest.yaml +4 -0
- data/stdlib/open-uri/0/open-uri.rbs +393 -0
- data/stdlib/open3/0/open3.rbs +147 -0
- data/stdlib/openssl/0/manifest.yaml +1 -0
- data/stdlib/openssl/0/openssl.rbs +681 -316
- data/stdlib/optparse/0/optparse.rbs +100 -65
- data/stdlib/pathname/0/pathname.rbs +24 -15
- data/stdlib/pp/0/manifest.yaml +2 -0
- data/stdlib/pp/0/pp.rbs +300 -0
- data/stdlib/prettyprint/0/prettyprint.rbs +2 -6
- data/stdlib/pstore/0/pstore.rbs +370 -156
- data/stdlib/psych/0/core_ext.rbs +12 -0
- data/stdlib/{yaml → psych}/0/dbm.rbs +3 -3
- data/stdlib/psych/0/manifest.yaml +3 -0
- data/stdlib/psych/0/psych.rbs +402 -0
- data/stdlib/{yaml → psych}/0/store.rbs +2 -2
- data/stdlib/pty/0/pty.rbs +63 -11
- data/stdlib/rdoc/0/code_object.rbs +51 -0
- data/stdlib/rdoc/0/comment.rbs +59 -0
- data/stdlib/rdoc/0/context.rbs +153 -0
- data/stdlib/rdoc/0/markup.rbs +117 -0
- data/stdlib/rdoc/0/parser.rbs +56 -0
- data/stdlib/rdoc/0/rdoc.rbs +13 -380
- data/stdlib/rdoc/0/ri.rbs +17 -0
- data/stdlib/rdoc/0/store.rbs +48 -0
- data/stdlib/rdoc/0/top_level.rbs +97 -0
- data/stdlib/resolv/0/resolv.rbs +16 -79
- data/stdlib/ripper/0/ripper.rbs +1648 -0
- data/stdlib/securerandom/0/securerandom.rbs +7 -2
- data/stdlib/shellwords/0/shellwords.rbs +11 -12
- data/stdlib/singleton/0/singleton.rbs +0 -3
- data/stdlib/socket/0/addrinfo.rbs +13 -18
- data/stdlib/socket/0/basic_socket.rbs +5 -10
- data/stdlib/socket/0/ip_socket.rbs +0 -2
- data/stdlib/socket/0/socket.rbs +77 -46
- data/stdlib/socket/0/tcp_server.rbs +0 -5
- data/stdlib/socket/0/tcp_socket.rbs +36 -3
- data/stdlib/socket/0/udp_socket.rbs +4 -5
- data/stdlib/socket/0/unix_server.rbs +0 -5
- data/stdlib/socket/0/unix_socket.rbs +2 -4
- data/{core/string_io.rbs → stdlib/stringio/0/stringio.rbs} +188 -107
- data/stdlib/strscan/0/string_scanner.rbs +1269 -425
- data/stdlib/tempfile/0/tempfile.rbs +224 -61
- data/stdlib/time/0/time.rbs +48 -35
- data/stdlib/timeout/0/timeout.rbs +17 -8
- data/stdlib/tmpdir/0/tmpdir.rbs +10 -3
- data/stdlib/tsort/0/tsort.rbs +0 -4
- data/stdlib/uri/0/common.rbs +271 -144
- data/stdlib/uri/0/file.rbs +5 -0
- data/stdlib/uri/0/ftp.rbs +1 -1
- data/stdlib/uri/0/generic.rbs +26 -22
- data/stdlib/uri/0/http.rbs +4 -4
- data/stdlib/uri/0/ldap.rbs +1 -1
- data/stdlib/uri/0/mailto.rbs +84 -0
- data/stdlib/uri/0/rfc2396_parser.rbs +3 -0
- data/stdlib/yaml/0/manifest.yaml +1 -2
- data/stdlib/yaml/0/yaml.rbs +1 -199
- data/stdlib/zlib/0/buf_error.rbs +10 -0
- data/stdlib/zlib/0/data_error.rbs +10 -0
- data/stdlib/zlib/0/deflate.rbs +210 -0
- data/stdlib/zlib/0/error.rbs +20 -0
- data/stdlib/zlib/0/gzip_file/crc_error.rbs +12 -0
- data/stdlib/zlib/0/gzip_file/error.rbs +23 -0
- data/stdlib/zlib/0/gzip_file/length_error.rbs +12 -0
- data/stdlib/zlib/0/gzip_file/no_footer.rbs +11 -0
- data/stdlib/zlib/0/gzip_file.rbs +156 -0
- data/stdlib/zlib/0/gzip_reader.rbs +293 -0
- data/stdlib/zlib/0/gzip_writer.rbs +166 -0
- data/stdlib/zlib/0/inflate.rbs +180 -0
- data/stdlib/zlib/0/mem_error.rbs +10 -0
- data/stdlib/zlib/0/need_dict.rbs +13 -0
- data/stdlib/zlib/0/stream_end.rbs +11 -0
- data/stdlib/zlib/0/stream_error.rbs +11 -0
- data/stdlib/zlib/0/version_error.rbs +11 -0
- data/stdlib/zlib/0/zlib.rbs +1 -3
- data/stdlib/zlib/0/zstream.rbs +200 -0
- data/templates/include/rbs/constants.h.erb +20 -0
- data/templates/include/rbs/ruby_objs.h.erb +10 -0
- data/templates/src/constants.c.erb +36 -0
- data/templates/src/ruby_objs.c.erb +27 -0
- data/templates/template.rb +122 -0
- metadata +136 -36
- data/Gemfile +0 -33
- data/Gemfile.lock +0 -118
- data/core/deprecated.rbs +0 -9
- data/ext/rbs_extension/constants.c +0 -135
- data/ext/rbs_extension/ruby_objs.c +0 -525
- data/ext/rbs_extension/ruby_objs.h +0 -43
- data/lib/rbs/constant_table.rb +0 -167
- data/lib/rbs/parser_compat/lexer_error.rb +0 -6
- data/lib/rbs/parser_compat/located_value.rb +0 -7
- data/lib/rbs/parser_compat/semantics_error.rb +0 -6
- data/lib/rbs/parser_compat/syntax_error.rb +0 -6
- data/lib/rbs/test/spy.rb +0 -6
- data/lib/rbs/type_name_resolver.rb +0 -67
- data/sig/constant_table.rbs +0 -30
- data/sig/shims/abstract_syntax_tree.rbs +0 -25
- data/sig/shims/pp.rbs +0 -3
- data/sig/shims/ripper.rbs +0 -8
- data/sig/shims.rbs +0 -69
- data/sig/type_name_resolver.rbs +0 -26
- data/stdlib/minitest/0/manifest.yaml +0 -2
- data/stdlib/prime/0/integer-extension.rbs +0 -41
- data/stdlib/prime/0/manifest.yaml +0 -2
- data/stdlib/prime/0/prime.rbs +0 -372
data/core/regexp.rbs
CHANGED
@@ -1,115 +1,265 @@
|
|
1
1
|
# <!-- rdoc-file=re.c -->
|
2
|
-
# A
|
3
|
-
#
|
4
|
-
# Regexp::new constructor.
|
2
|
+
# A [regular expression](https://en.wikipedia.org/wiki/Regular_expression) (also
|
3
|
+
# called a *regexp*) is a *match pattern* (also simply called a *pattern*).
|
5
4
|
#
|
6
|
-
#
|
5
|
+
# A common notation for a regexp uses enclosing slash characters:
|
7
6
|
#
|
8
|
-
#
|
7
|
+
# /foo/
|
9
8
|
#
|
9
|
+
# A regexp may be applied to a *target string*; The part of the string (if any)
|
10
|
+
# that matches the pattern is called a *match*, and may be said *to match*:
|
10
11
|
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
12
|
+
# re = /red/
|
13
|
+
# re.match?('redirect') # => true # Match at beginning of target.
|
14
|
+
# re.match?('bored') # => true # Match at end of target.
|
15
|
+
# re.match?('credit') # => true # Match within target.
|
16
|
+
# re.match?('foo') # => false # No match.
|
15
17
|
#
|
16
|
-
#
|
18
|
+
# ## Regexp Uses
|
17
19
|
#
|
18
|
-
#
|
19
|
-
# /y/.match('haystack') #=> #<MatchData "y">
|
20
|
+
# A regexp may be used:
|
20
21
|
#
|
21
|
-
#
|
22
|
-
# matches itself.
|
22
|
+
# * To extract substrings based on a given pattern:
|
23
23
|
#
|
24
|
-
#
|
24
|
+
# re = /foo/ # => /foo/
|
25
|
+
# re.match('food') # => #<MatchData "foo">
|
26
|
+
# re.match('good') # => nil
|
25
27
|
#
|
26
|
-
#
|
28
|
+
# See sections [Method match](rdoc-ref:Regexp@Method+match) and [Operator
|
29
|
+
# =~](rdoc-ref:Regexp@Operator+-3D~).
|
27
30
|
#
|
28
|
-
#
|
31
|
+
# * To determine whether a string matches a given pattern:
|
29
32
|
#
|
30
|
-
#
|
33
|
+
# re.match?('food') # => true
|
34
|
+
# re.match?('good') # => false
|
31
35
|
#
|
32
|
-
#
|
33
|
-
# by the letter *t*, so it matches *haystack*, also.
|
36
|
+
# See section [Method match?](rdoc-ref:Regexp@Method+match-3F).
|
34
37
|
#
|
35
|
-
#
|
38
|
+
# * As an argument for calls to certain methods in other classes and modules;
|
39
|
+
# most such methods accept an argument that may be either a string or the
|
40
|
+
# (much more powerful) regexp.
|
36
41
|
#
|
37
|
-
#
|
38
|
-
# method.
|
42
|
+
# See [Regexp Methods](rdoc-ref:regexp/methods.rdoc).
|
39
43
|
#
|
40
|
-
#
|
44
|
+
# ## Regexp Objects
|
41
45
|
#
|
42
|
-
#
|
43
|
-
# expression and the other is a string then the regular expression is used as a
|
44
|
-
# pattern to match against the string. (This operator is equivalently defined
|
45
|
-
# by Regexp and String so the order of String and Regexp do not matter. Other
|
46
|
-
# classes may have different implementations of `=~`.) If a match is found, the
|
47
|
-
# operator returns index of first match in string, otherwise it returns `nil`.
|
46
|
+
# A regexp object has:
|
48
47
|
#
|
49
|
-
#
|
50
|
-
# 'haystack' =~ /hay/ #=> 0
|
51
|
-
# /a/ =~ 'haystack' #=> 1
|
52
|
-
# /u/ =~ 'haystack' #=> nil
|
48
|
+
# * A source; see [Sources](rdoc-ref:Regexp@Sources).
|
53
49
|
#
|
54
|
-
#
|
55
|
-
# after a successful match. `$~` holds a MatchData object. Regexp.last_match is
|
56
|
-
# equivalent to `$~`.
|
50
|
+
# * Several modes; see [Modes](rdoc-ref:Regexp@Modes).
|
57
51
|
#
|
58
|
-
#
|
52
|
+
# * A timeout; see [Timeouts](rdoc-ref:Regexp@Timeouts).
|
59
53
|
#
|
60
|
-
#
|
54
|
+
# * An encoding; see [Encodings](rdoc-ref:Regexp@Encodings).
|
61
55
|
#
|
62
|
-
#
|
56
|
+
# ## Creating a Regexp
|
63
57
|
#
|
64
|
-
#
|
58
|
+
# A regular expression may be created with:
|
65
59
|
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
# them literally they must be backslash-escaped. To match a backslash literally,
|
69
|
-
# backslash-escape it: `\\\`.
|
60
|
+
# * A regexp literal using slash characters (see [Regexp
|
61
|
+
# Literals](rdoc-ref:syntax/literals.rdoc@Regexp+Literals)):
|
70
62
|
#
|
71
|
-
#
|
72
|
-
#
|
63
|
+
# # This is a very common usage.
|
64
|
+
# /foo/ # => /foo/
|
73
65
|
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
# [below](#label-Character+Classes)).
|
66
|
+
# * A `%r` regexp literal (see [%r: Regexp
|
67
|
+
# Literals](rdoc-ref:syntax/literals.rdoc@25r-3A+Regexp+Literals)):
|
77
68
|
#
|
78
|
-
#
|
79
|
-
#
|
69
|
+
# # Same delimiter character at beginning and end;
|
70
|
+
# # useful for avoiding escaping characters
|
71
|
+
# %r/name\/value pair/ # => /name\/value pair/
|
72
|
+
# %r:name/value pair: # => /name\/value pair/
|
73
|
+
# %r|name/value pair| # => /name\/value pair/
|
80
74
|
#
|
81
|
-
#
|
82
|
-
#
|
75
|
+
# # Certain "paired" characters can be delimiters.
|
76
|
+
# %r[foo] # => /foo/
|
77
|
+
# %r{foo} # => /foo/
|
78
|
+
# %r(foo) # => /foo/
|
79
|
+
# %r<foo> # => /foo/
|
83
80
|
#
|
84
|
-
#
|
85
|
-
# /#{place}/.match("Go to 東京都")
|
86
|
-
# #=> #<MatchData "東京都">
|
81
|
+
# * Method Regexp.new.
|
87
82
|
#
|
88
|
-
# ##
|
83
|
+
# ## Method `match`
|
89
84
|
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
#
|
85
|
+
# Each of the methods Regexp#match, String#match, and Symbol#match returns a
|
86
|
+
# MatchData object if a match was found, `nil` otherwise; each also sets [global
|
87
|
+
# variables](rdoc-ref:Regexp@Global+Variables):
|
93
88
|
#
|
94
|
-
#
|
89
|
+
# 'food'.match(/foo/) # => #<MatchData "foo">
|
90
|
+
# 'food'.match(/bar/) # => nil
|
95
91
|
#
|
96
|
-
#
|
97
|
-
# inclusive range of characters. `[abcd]` is equivalent to `[a-d]`. A range can
|
98
|
-
# be followed by another range, so `[abcdwxyz]` is equivalent to `[a-dw-z]`. The
|
99
|
-
# order in which ranges or individual characters appear inside a character class
|
100
|
-
# is irrelevant.
|
92
|
+
# ## Operator `=~`
|
101
93
|
#
|
102
|
-
#
|
103
|
-
#
|
94
|
+
# Each of the operators Regexp#=~, String#=~, and Symbol#=~ returns an integer
|
95
|
+
# offset if a match was found, `nil` otherwise; each also sets [global
|
96
|
+
# variables](rdoc-ref:Regexp@Global+Variables):
|
104
97
|
#
|
105
|
-
#
|
106
|
-
#
|
98
|
+
# /bar/ =~ 'foo bar' # => 4
|
99
|
+
# 'foo bar' =~ /bar/ # => 4
|
100
|
+
# /baz/ =~ 'foo bar' # => nil
|
107
101
|
#
|
108
|
-
#
|
102
|
+
# ## Method `match?`
|
103
|
+
#
|
104
|
+
# Each of the methods Regexp#match?, String#match?, and Symbol#match? returns
|
105
|
+
# `true` if a match was found, `false` otherwise; none sets [global
|
106
|
+
# variables](rdoc-ref:Regexp@Global+Variables):
|
107
|
+
#
|
108
|
+
# 'food'.match?(/foo/) # => true
|
109
|
+
# 'food'.match?(/bar/) # => false
|
110
|
+
#
|
111
|
+
# ## Global Variables
|
112
|
+
#
|
113
|
+
# Certain regexp-oriented methods assign values to global variables:
|
114
|
+
#
|
115
|
+
# * `#match`: see [Method match](rdoc-ref:Regexp@Method+match).
|
116
|
+
# * `#=~`: see [Operator =~](rdoc-ref:Regexp@Operator+-3D~).
|
117
|
+
#
|
118
|
+
# The affected global variables are:
|
119
|
+
#
|
120
|
+
# * `$~`: Returns a MatchData object, or `nil`.
|
121
|
+
# * `$&`: Returns the matched part of the string, or `nil`.
|
122
|
+
# * `$``: Returns the part of the string to the left of the match, or `nil`.
|
123
|
+
# * `$'`: Returns the part of the string to the right of the match, or `nil`.
|
124
|
+
# * `$+`: Returns the last group matched, or `nil`.
|
125
|
+
# * `$1`, `$2`, etc.: Returns the first, second, etc., matched group, or
|
126
|
+
# `nil`. Note that `$0` is quite different; it returns the name of the
|
127
|
+
# currently executing program.
|
128
|
+
#
|
129
|
+
# Examples:
|
130
|
+
#
|
131
|
+
# # Matched string, but no matched groups.
|
132
|
+
# 'foo bar bar baz'.match('bar')
|
133
|
+
# $~ # => #<MatchData "bar">
|
134
|
+
# $& # => "bar"
|
135
|
+
# $` # => "foo "
|
136
|
+
# $' # => " bar baz"
|
137
|
+
# $+ # => nil
|
138
|
+
# $1 # => nil
|
139
|
+
#
|
140
|
+
# # Matched groups.
|
141
|
+
# /s(\w{2}).*(c)/.match('haystack')
|
142
|
+
# $~ # => #<MatchData "stac" 1:"ta" 2:"c">
|
143
|
+
# $& # => "stac"
|
144
|
+
# $` # => "hay"
|
145
|
+
# $' # => "k"
|
146
|
+
# $+ # => "c"
|
147
|
+
# $1 # => "ta"
|
148
|
+
# $2 # => "c"
|
149
|
+
# $3 # => nil
|
150
|
+
#
|
151
|
+
# # No match.
|
152
|
+
# 'foo'.match('bar')
|
153
|
+
# $~ # => nil
|
154
|
+
# $& # => nil
|
155
|
+
# $` # => nil
|
156
|
+
# $' # => nil
|
157
|
+
# $+ # => nil
|
158
|
+
# $1 # => nil
|
159
|
+
#
|
160
|
+
# Note that Regexp#match?, String#match?, and Symbol#match? do not set global
|
161
|
+
# variables.
|
162
|
+
#
|
163
|
+
# ## Sources
|
164
|
+
#
|
165
|
+
# As seen above, the simplest regexp uses a literal expression as its source:
|
166
|
+
#
|
167
|
+
# re = /foo/ # => /foo/
|
168
|
+
# re.match('food') # => #<MatchData "foo">
|
169
|
+
# re.match('good') # => nil
|
170
|
+
#
|
171
|
+
# A rich collection of available *subexpressions* gives the regexp great power
|
172
|
+
# and flexibility:
|
173
|
+
#
|
174
|
+
# * [Special characters](rdoc-ref:Regexp@Special+Characters)
|
175
|
+
# * [Source literals](rdoc-ref:Regexp@Source+Literals)
|
176
|
+
# * [Character classes](rdoc-ref:Regexp@Character+Classes)
|
177
|
+
# * [Shorthand character classes](rdoc-ref:Regexp@Shorthand+Character+Classes)
|
178
|
+
# * [Anchors](rdoc-ref:Regexp@Anchors)
|
179
|
+
# * [Alternation](rdoc-ref:Regexp@Alternation)
|
180
|
+
# * [Quantifiers](rdoc-ref:Regexp@Quantifiers)
|
181
|
+
# * [Groups and captures](rdoc-ref:Regexp@Groups+and+Captures)
|
182
|
+
# * [Unicode](rdoc-ref:Regexp@Unicode)
|
183
|
+
# * [POSIX Bracket Expressions](rdoc-ref:Regexp@POSIX+Bracket+Expressions)
|
184
|
+
# * [Comments](rdoc-ref:Regexp@Comments)
|
185
|
+
#
|
186
|
+
# ### Special Characters
|
187
|
+
#
|
188
|
+
# Regexp special characters, called *metacharacters*, have special meanings in
|
189
|
+
# certain contexts; depending on the context, these are sometimes
|
190
|
+
# metacharacters:
|
191
|
+
#
|
192
|
+
# . ? - + * ^ \ | $ ( ) [ ] { }
|
193
|
+
#
|
194
|
+
# To match a metacharacter literally, backslash-escape it:
|
195
|
+
#
|
196
|
+
# # Matches one or more 'o' characters.
|
197
|
+
# /o+/.match('foo') # => #<MatchData "oo">
|
198
|
+
# # Would match 'o+'.
|
199
|
+
# /o\+/.match('foo') # => nil
|
200
|
+
#
|
201
|
+
# To match a backslash literally, backslash-escape it:
|
202
|
+
#
|
203
|
+
# /\./.match('\.') # => #<MatchData ".">
|
204
|
+
# /\\./.match('\.') # => #<MatchData "\\.">
|
205
|
+
#
|
206
|
+
# Method Regexp.escape returns an escaped string:
|
207
|
+
#
|
208
|
+
# Regexp.escape('.?-+*^\|$()[]{}')
|
209
|
+
# # => "\\.\\?\\-\\+\\*\\^\\\\\\|\\$\\(\\)\\[\\]\\{\\}"
|
210
|
+
#
|
211
|
+
# ### Source Literals
|
212
|
+
#
|
213
|
+
# The source literal largely behaves like a double-quoted string; see
|
214
|
+
# [Double-Quoted String
|
215
|
+
# Literals](rdoc-ref:syntax/literals.rdoc@Double-Quoted+String+Literals).
|
216
|
+
#
|
217
|
+
# In particular, a source literal may contain interpolated expressions:
|
218
|
+
#
|
219
|
+
# s = 'foo' # => "foo"
|
220
|
+
# /#{s}/ # => /foo/
|
221
|
+
# /#{s.capitalize}/ # => /Foo/
|
222
|
+
# /#{2 + 2}/ # => /4/
|
223
|
+
#
|
224
|
+
# There are differences between an ordinary string literal and a source literal;
|
225
|
+
# see [Shorthand Character
|
226
|
+
# Classes](rdoc-ref:Regexp@Shorthand+Character+Classes).
|
227
|
+
#
|
228
|
+
# * `\s` in an ordinary string literal is equivalent to a space character; in
|
229
|
+
# a source literal, it's shorthand for matching a whitespace character.
|
230
|
+
# * In an ordinary string literal, these are (needlessly) escaped characters;
|
231
|
+
# in a source literal, they are shorthands for various matching characters:
|
232
|
+
#
|
233
|
+
# \w \W \d \D \h \H \S \R
|
234
|
+
#
|
235
|
+
# ### Character Classes
|
236
|
+
#
|
237
|
+
# A *character class* is delimited by square brackets; it specifies that certain
|
238
|
+
# characters match at a given point in the target string:
|
239
|
+
#
|
240
|
+
# # This character class will match any vowel.
|
241
|
+
# re = /B[aeiou]rd/
|
242
|
+
# re.match('Bird') # => #<MatchData "Bird">
|
243
|
+
# re.match('Bard') # => #<MatchData "Bard">
|
244
|
+
# re.match('Byrd') # => nil
|
245
|
+
#
|
246
|
+
# A character class may contain hyphen characters to specify ranges of
|
247
|
+
# characters:
|
248
|
+
#
|
249
|
+
# # These regexps have the same effect.
|
250
|
+
# /[abcdef]/.match('foo') # => #<MatchData "f">
|
251
|
+
# /[a-f]/.match('foo') # => #<MatchData "f">
|
252
|
+
# /[a-cd-f]/.match('foo') # => #<MatchData "f">
|
253
|
+
#
|
254
|
+
# When the first character of a character class is a caret (`^`), the sense of
|
255
|
+
# the class is inverted: it matches any character *except* those specified.
|
256
|
+
#
|
257
|
+
# /[^a-eg-z]/.match('f') # => #<MatchData "f">
|
109
258
|
#
|
110
259
|
# A character class may contain another character class. By itself this isn't
|
111
|
-
# useful because `[a-z[0-9]]` describes the same set as `[a-z0-9]`.
|
112
|
-
#
|
260
|
+
# useful because `[a-z[0-9]]` describes the same set as `[a-z0-9]`.
|
261
|
+
#
|
262
|
+
# However, character classes also support the `&&` operator, which performs set
|
113
263
|
# intersection on its arguments. The two can be combined as follows:
|
114
264
|
#
|
115
265
|
# /[a-w&&[^c-g]z]/ # ([a-w] AND ([^c-g] OR z))
|
@@ -118,238 +268,470 @@
|
|
118
268
|
#
|
119
269
|
# /[abh-w]/
|
120
270
|
#
|
121
|
-
#
|
122
|
-
#
|
123
|
-
# * `/./` - Any character except a newline.
|
124
|
-
# * `/./m` - Any character (the `m` modifier enables multiline mode)
|
125
|
-
# * `/\w/` - A word character (`[a-zA-Z0-9_]`)
|
126
|
-
# * `/\W/` - A non-word character (`[^a-zA-Z0-9_]`). Please take a look at
|
127
|
-
# [Bug #4044](https://bugs.ruby-lang.org/issues/4044) if using `/\W/` with
|
128
|
-
# the `/i` modifier.
|
129
|
-
# * `/\d/` - A digit character (`[0-9]`)
|
130
|
-
# * `/\D/` - A non-digit character (`[^0-9]`)
|
131
|
-
# * `/\h/` - A hexdigit character (`[0-9a-fA-F]`)
|
132
|
-
# * `/\H/` - A non-hexdigit character (`[^0-9a-fA-F]`)
|
133
|
-
# * `/\s/` - A whitespace character: `/[ \t\r\n\f\v]/`
|
134
|
-
# * `/\S/` - A non-whitespace character: `/[^ \t\r\n\f\v]/`
|
135
|
-
# * `/\R/` - A linebreak: `\n`, `\v`, `\f`, `\r` `\u0085` (NEXT LINE),
|
136
|
-
# `\u2028` (LINE SEPARATOR), `\u2029` (PARAGRAPH SEPARATOR) or `\r\n`.
|
137
|
-
#
|
138
|
-
#
|
139
|
-
# POSIX *bracket expressions* are also similar to character classes. They
|
140
|
-
# provide a portable alternative to the above, with the added benefit that they
|
141
|
-
# encompass non-ASCII characters. For instance, `/\d/` matches only the ASCII
|
142
|
-
# decimal digits (0-9); whereas `/[[:digit:]]/` matches any character in the
|
143
|
-
# Unicode *Nd* category.
|
144
|
-
#
|
145
|
-
# * `/[[:alnum:]]/` - Alphabetic and numeric character
|
146
|
-
# * `/[[:alpha:]]/` - Alphabetic character
|
147
|
-
# * `/[[:blank:]]/` - Space or tab
|
148
|
-
# * `/[[:cntrl:]]/` - Control character
|
149
|
-
# * `/[[:digit:]]/` - Digit
|
150
|
-
# * `/[[:graph:]]/` - Non-blank character (excludes spaces, control
|
151
|
-
# characters, and similar)
|
152
|
-
# * `/[[:lower:]]/` - Lowercase alphabetical character
|
153
|
-
# * `/[[:print:]]/` - Like [:graph:], but includes the space character
|
154
|
-
# * `/[[:punct:]]/` - Punctuation character
|
155
|
-
# * `/[[:space:]]/` - Whitespace character (`[:blank:]`, newline, carriage
|
156
|
-
# return, etc.)
|
157
|
-
# * `/[[:upper:]]/` - Uppercase alphabetical
|
158
|
-
# * `/[[:xdigit:]]/` - Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
|
271
|
+
# ### Shorthand Character Classes
|
159
272
|
#
|
273
|
+
# Each of the following metacharacters serves as a shorthand for a character
|
274
|
+
# class:
|
160
275
|
#
|
161
|
-
#
|
276
|
+
# * `/./`: Matches any character except a newline:
|
162
277
|
#
|
163
|
-
#
|
164
|
-
#
|
165
|
-
# * `/[[:ascii:]]/` - A character in the ASCII character set
|
278
|
+
# /./.match('foo') # => #<MatchData "f">
|
279
|
+
# /./.match("\n") # => nil
|
166
280
|
#
|
167
|
-
#
|
168
|
-
#
|
169
|
-
# /[[:upper:]][[:lower:]]/.match("Hello") #=> #<MatchData "He">
|
170
|
-
# /[[:xdigit:]][[:xdigit:]]/.match("A6") #=> #<MatchData "A6">
|
281
|
+
# * `/./m`: Matches any character, including a newline; see [Multiline
|
282
|
+
# Mode](rdoc-ref:Regexp@Multiline+Mode):
|
171
283
|
#
|
284
|
+
# /./m.match("\n") # => #<MatchData "\n">
|
172
285
|
#
|
173
|
-
#
|
286
|
+
# * `/\w/`: Matches a word character: equivalent to `[a-zA-Z0-9_]`:
|
174
287
|
#
|
175
|
-
#
|
176
|
-
#
|
177
|
-
#
|
288
|
+
# /\w/.match(' foo') # => #<MatchData "f">
|
289
|
+
# /\w/.match(' _') # => #<MatchData "_">
|
290
|
+
# /\w/.match(' ') # => nil
|
178
291
|
#
|
179
|
-
# *
|
180
|
-
# * `+` - One or more times
|
181
|
-
# * `?` - Zero or one times (optional)
|
182
|
-
# * `{`*n*`}` - Exactly *n* times
|
183
|
-
# * `{`*n*`,}` - *n* or more times
|
184
|
-
# * `{,`*m*`}` - *m* or less times
|
185
|
-
# * `{`*n*`,`*m*`}` - At least *n* and at most *m* times
|
292
|
+
# * `/\W/`: Matches a non-word character: equivalent to `[^a-zA-Z0-9_]`:
|
186
293
|
#
|
294
|
+
# /\W/.match(' ') # => #<MatchData " ">
|
295
|
+
# /\W/.match('_') # => nil
|
187
296
|
#
|
188
|
-
#
|
189
|
-
# ('e'), two 'l' characters, then one 'o':
|
297
|
+
# * `/\d/`: Matches a digit character: equivalent to `[0-9]`:
|
190
298
|
#
|
191
|
-
#
|
299
|
+
# /\d/.match('THX1138') # => #<MatchData "1">
|
300
|
+
# /\d/.match('foo') # => nil
|
192
301
|
#
|
193
|
-
#
|
302
|
+
# * `/\D/`: Matches a non-digit character: equivalent to `[^0-9]`:
|
194
303
|
#
|
195
|
-
#
|
196
|
-
#
|
197
|
-
# matching makes the minimal amount of matches necessary for overall success.
|
198
|
-
# Most greedy metacharacters can be made lazy by following them with `?`. For
|
199
|
-
# the `{n}` pattern, because it specifies an exact number of characters to match
|
200
|
-
# and not a variable number of characters, the `?` metacharacter instead makes
|
201
|
-
# the repeated pattern optional.
|
304
|
+
# /\D/.match('123Jump!') # => #<MatchData "J">
|
305
|
+
# /\D/.match('123') # => nil
|
202
306
|
#
|
203
|
-
#
|
204
|
-
# '.+' matches '<a><b>'; the second uses a lazy quantifier so '.+?' matches
|
205
|
-
# '<a>':
|
307
|
+
# * `/\h/`: Matches a hexdigit character: equivalent to `[0-9a-fA-F]`:
|
206
308
|
#
|
207
|
-
#
|
208
|
-
#
|
309
|
+
# /\h/.match('xyz fedcba9876543210') # => #<MatchData "f">
|
310
|
+
# /\h/.match('xyz') # => nil
|
209
311
|
#
|
210
|
-
#
|
312
|
+
# * `/\H/`: Matches a non-hexdigit character: equivalent to `[^0-9a-fA-F]`:
|
211
313
|
#
|
212
|
-
#
|
213
|
-
#
|
214
|
-
# they refuse to "give up" their match even if this jeopardises the overall
|
215
|
-
# match.
|
314
|
+
# /\H/.match('fedcba9876543210xyz') # => #<MatchData "x">
|
315
|
+
# /\H/.match('fedcba9876543210') # => nil
|
216
316
|
#
|
217
|
-
#
|
218
|
-
# /<.*+><.+>/.match("<a><b>") #=> nil
|
219
|
-
# /<.*><.++>/.match("<a><b>") #=> nil
|
317
|
+
# * `/\s/`: Matches a whitespace character: equivalent to `/[ \t\r\n\f\v]/`:
|
220
318
|
#
|
221
|
-
#
|
319
|
+
# /\s/.match('foo bar') # => #<MatchData " ">
|
320
|
+
# /\s/.match('foo') # => nil
|
222
321
|
#
|
223
|
-
#
|
224
|
-
#
|
225
|
-
# the *backreference* `\n` (e.g. `\1`); outside of the pattern use
|
226
|
-
# `MatchData[n]` (e.g. `MatchData[1]`).
|
322
|
+
# * `/\S/`: Matches a non-whitespace character: equivalent to `/[^
|
323
|
+
# \t\r\n\f\v]/`:
|
227
324
|
#
|
228
|
-
#
|
229
|
-
#
|
325
|
+
# /\S/.match(" \t\r\n\f\v foo") # => #<MatchData "f">
|
326
|
+
# /\S/.match(" \t\r\n\f\v") # => nil
|
230
327
|
#
|
231
|
-
#
|
232
|
-
# #=> #<MatchData "cat sat in" 1:"at">
|
328
|
+
# * `/\R/`: Matches a linebreak, platform-independently:
|
233
329
|
#
|
234
|
-
#
|
235
|
-
#
|
330
|
+
# /\R/.match("\r") # => #<MatchData "\r"> # Carriage return (CR)
|
331
|
+
# /\R/.match("\n") # => #<MatchData "\n"> # Newline (LF)
|
332
|
+
# /\R/.match("\f") # => #<MatchData "\f"> # Formfeed (FF)
|
333
|
+
# /\R/.match("\v") # => #<MatchData "\v"> # Vertical tab (VT)
|
334
|
+
# /\R/.match("\r\n") # => #<MatchData "\r\n"> # CRLF
|
335
|
+
# /\R/.match("\u0085") # => #<MatchData "\u0085"> # Next line (NEL)
|
336
|
+
# /\R/.match("\u2028") # => #<MatchData "\u2028"> # Line separator (LSEP)
|
337
|
+
# /\R/.match("\u2029") # => #<MatchData "\u2029"> # Paragraph separator (PSEP)
|
236
338
|
#
|
237
|
-
#
|
339
|
+
# ### Anchors
|
238
340
|
#
|
239
|
-
#
|
240
|
-
#
|
341
|
+
# An anchor is a metasequence that matches a zero-width position between
|
342
|
+
# characters in the target string.
|
241
343
|
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
# the `\0` backreference cannot be used inside the regexp, as backreferences can
|
245
|
-
# only be used after the end of the capture group, and the `\0` backreference
|
246
|
-
# uses the implicit capture group of the entire match. However, you can use
|
247
|
-
# this backreference when doing substitution:
|
344
|
+
# For a subexpression with no anchor, matching may begin anywhere in the target
|
345
|
+
# string:
|
248
346
|
#
|
249
|
-
#
|
250
|
-
# # => "The cats sats in the hats"
|
347
|
+
# /real/.match('surrealist') # => #<MatchData "real">
|
251
348
|
#
|
252
|
-
#
|
349
|
+
# For a subexpression with an anchor, matching must begin at the matched anchor.
|
253
350
|
#
|
254
|
-
#
|
255
|
-
# `(?<`*name*`>)` or `(?'`*name*`')` constructs.
|
351
|
+
# #### Boundary Anchors
|
256
352
|
#
|
257
|
-
#
|
258
|
-
# #=> #<MatchData "$3.67" dollars:"3" cents:"67">
|
259
|
-
# /\$(?<dollars>\d+)\.(?<cents>\d+)/.match("$3.67")[:dollars] #=> "3"
|
353
|
+
# Each of these anchors matches a boundary:
|
260
354
|
#
|
261
|
-
#
|
262
|
-
# group name.
|
355
|
+
# * `^`: Matches the beginning of a line:
|
263
356
|
#
|
264
|
-
#
|
265
|
-
#
|
357
|
+
# /^bar/.match("foo\nbar") # => #<MatchData "bar">
|
358
|
+
# /^ar/.match("foo\nbar") # => nil
|
359
|
+
#
|
360
|
+
# * `$`: Matches the end of a line:
|
361
|
+
#
|
362
|
+
# /bar$/.match("foo\nbar") # => #<MatchData "bar">
|
363
|
+
# /ba$/.match("foo\nbar") # => nil
|
364
|
+
#
|
365
|
+
# * `\A`: Matches the beginning of the string:
|
366
|
+
#
|
367
|
+
# /\Afoo/.match('foo bar') # => #<MatchData "foo">
|
368
|
+
# /\Afoo/.match(' foo bar') # => nil
|
369
|
+
#
|
370
|
+
# * `\Z`: Matches the end of the string; if string ends with a single newline,
|
371
|
+
# it matches just before the ending newline:
|
372
|
+
#
|
373
|
+
# /foo\Z/.match('bar foo') # => #<MatchData "foo">
|
374
|
+
# /foo\Z/.match('foo bar') # => nil
|
375
|
+
# /foo\Z/.match("bar foo\n") # => #<MatchData "foo">
|
376
|
+
# /foo\Z/.match("bar foo\n\n") # => nil
|
377
|
+
#
|
378
|
+
# * `\z`: Matches the end of the string:
|
379
|
+
#
|
380
|
+
# /foo\z/.match('bar foo') # => #<MatchData "foo">
|
381
|
+
# /foo\z/.match('foo bar') # => nil
|
382
|
+
# /foo\z/.match("bar foo\n") # => nil
|
383
|
+
#
|
384
|
+
# * `\b`: Matches word boundary when not inside brackets; matches backspace
|
385
|
+
# (`"0x08"`) when inside brackets:
|
386
|
+
#
|
387
|
+
# /foo\b/.match('foo bar') # => #<MatchData "foo">
|
388
|
+
# /foo\b/.match('foobar') # => nil
|
389
|
+
#
|
390
|
+
# * `\B`: Matches non-word boundary:
|
391
|
+
#
|
392
|
+
# /foo\B/.match('foobar') # => #<MatchData "foo">
|
393
|
+
# /foo\B/.match('foo bar') # => nil
|
394
|
+
#
|
395
|
+
# * `\G`: Matches first matching position:
|
396
|
+
#
|
397
|
+
# In methods like String#gsub and String#scan, it changes on each iteration.
|
398
|
+
# It initially matches the beginning of subject, and in each following
|
399
|
+
# iteration it matches where the last match finished.
|
400
|
+
#
|
401
|
+
# " a b c".gsub(/ /, '_') # => "____a_b_c"
|
402
|
+
# " a b c".gsub(/\G /, '_') # => "____a b c"
|
403
|
+
#
|
404
|
+
# In methods like Regexp#match and String#match that take an optional
|
405
|
+
# offset, it matches where the search begins.
|
406
|
+
#
|
407
|
+
# "hello, world".match(/,/, 3) # => #<MatchData ",">
|
408
|
+
# "hello, world".match(/\G,/, 3) # => nil
|
409
|
+
#
|
410
|
+
# #### Lookaround Anchors
|
411
|
+
#
|
412
|
+
# Lookahead anchors:
|
413
|
+
#
|
414
|
+
# * `(?=*pat*)`: Positive lookahead assertion: ensures that the following
|
415
|
+
# characters match *pat*, but doesn't include those characters in the
|
416
|
+
# matched substring.
|
417
|
+
#
|
418
|
+
# * `(?!*pat*)`: Negative lookahead assertion: ensures that the following
|
419
|
+
# characters *do not* match *pat*, but doesn't include those characters in
|
420
|
+
# the matched substring.
|
421
|
+
#
|
422
|
+
# Lookbehind anchors:
|
423
|
+
#
|
424
|
+
# * `(?<=*pat*)`: Positive lookbehind assertion: ensures that the preceding
|
425
|
+
# characters match *pat*, but doesn't include those characters in the
|
426
|
+
# matched substring.
|
427
|
+
#
|
428
|
+
# * `(?<!*pat*)`: Negative lookbehind assertion: ensures that the preceding
|
429
|
+
# characters do not match *pat*, but doesn't include those characters in the
|
430
|
+
# matched substring.
|
431
|
+
#
|
432
|
+
# The pattern below uses positive lookahead and positive lookbehind to match
|
433
|
+
# text appearing in **...** tags without including the tags in the match:
|
434
|
+
#
|
435
|
+
# /(?<=<b>)\w+(?=<\/b>)/.match("Fortune favors the <b>bold</b>.")
|
436
|
+
# # => #<MatchData "bold">
|
437
|
+
#
|
438
|
+
# #### Match-Reset Anchor
|
439
|
+
#
|
440
|
+
# * `\K`: Match reset: the matched content preceding `\K` in the regexp is
|
441
|
+
# excluded from the result. For example, the following two regexps are
|
442
|
+
# almost equivalent:
|
443
|
+
#
|
444
|
+
# /ab\Kc/.match('abc') # => #<MatchData "c">
|
445
|
+
# /(?<=ab)c/.match('abc') # => #<MatchData "c">
|
446
|
+
#
|
447
|
+
# These match same string and `$&` equals `'c'`, while the matched position
|
448
|
+
# is different.
|
449
|
+
#
|
450
|
+
# As are the following two regexps:
|
451
|
+
#
|
452
|
+
# /(a)\K(b)\Kc/
|
453
|
+
# /(?<=(?<=(a))(b))c/
|
454
|
+
#
|
455
|
+
# ### Alternation
|
456
|
+
#
|
457
|
+
# The vertical bar metacharacter (`|`) may be used within parentheses to express
|
458
|
+
# alternation: two or more subexpressions any of which may match the target
|
459
|
+
# string.
|
460
|
+
#
|
461
|
+
# Two alternatives:
|
462
|
+
#
|
463
|
+
# re = /(a|b)/
|
464
|
+
# re.match('foo') # => nil
|
465
|
+
# re.match('bar') # => #<MatchData "b" 1:"b">
|
466
|
+
#
|
467
|
+
# Four alternatives:
|
468
|
+
#
|
469
|
+
# re = /(a|b|c|d)/
|
470
|
+
# re.match('shazam') # => #<MatchData "a" 1:"a">
|
471
|
+
# re.match('cold') # => #<MatchData "c" 1:"c">
|
472
|
+
#
|
473
|
+
# Each alternative is a subexpression, and may be composed of other
|
474
|
+
# subexpressions:
|
475
|
+
#
|
476
|
+
# re = /([a-c]|[x-z])/
|
477
|
+
# re.match('bar') # => #<MatchData "b" 1:"b">
|
478
|
+
# re.match('ooz') # => #<MatchData "z" 1:"z">
|
479
|
+
#
|
480
|
+
# Method Regexp.union provides a convenient way to construct a regexp with
|
481
|
+
# alternatives.
|
482
|
+
#
|
483
|
+
# ### Quantifiers
|
266
484
|
#
|
267
|
-
#
|
268
|
-
# simultaneously. Also, if a named capture is used in a regexp, then parentheses
|
269
|
-
# used for grouping which would otherwise result in a unnamed capture are
|
270
|
-
# treated as non-capturing.
|
485
|
+
# A simple regexp matches one character:
|
271
486
|
#
|
272
|
-
#
|
273
|
-
# /(\w)(\w)/.match("ab").named_captures # => {}
|
487
|
+
# /\w/.match('Hello') # => #<MatchData "H">
|
274
488
|
#
|
275
|
-
#
|
276
|
-
# /(?<c>\w)(\w)/.match("ab").named_captures # => {"c"=>"a"}
|
489
|
+
# An added *quantifier* specifies how many matches are required or allowed:
|
277
490
|
#
|
278
|
-
#
|
279
|
-
# of an expression and the `=~` operator, the captured text is also assigned to
|
280
|
-
# local variables with corresponding names.
|
491
|
+
# * `*` - Matches zero or more times:
|
281
492
|
#
|
282
|
-
#
|
283
|
-
#
|
493
|
+
# /\w*/.match('')
|
494
|
+
# # => #<MatchData "">
|
495
|
+
# /\w*/.match('x')
|
496
|
+
# # => #<MatchData "x">
|
497
|
+
# /\w*/.match('xyz')
|
498
|
+
# # => #<MatchData "yz">
|
284
499
|
#
|
285
|
-
#
|
500
|
+
# * `+` - Matches one or more times:
|
286
501
|
#
|
287
|
-
#
|
288
|
-
#
|
502
|
+
# /\w+/.match('') # => nil
|
503
|
+
# /\w+/.match('x') # => #<MatchData "x">
|
504
|
+
# /\w+/.match('xyz') # => #<MatchData "xyz">
|
289
505
|
#
|
290
|
-
#
|
506
|
+
# * `?` - Matches zero or one times:
|
291
507
|
#
|
292
|
-
#
|
508
|
+
# /\w?/.match('') # => #<MatchData "">
|
509
|
+
# /\w?/.match('x') # => #<MatchData "x">
|
510
|
+
# /\w?/.match('xyz') # => #<MatchData "x">
|
293
511
|
#
|
294
|
-
#
|
295
|
-
# twice, i.e. `[aeiou]\w[aeiou]\w`: 'enor'.
|
512
|
+
# * `{`*n*`}` - Matches exactly *n* times:
|
296
513
|
#
|
297
|
-
#
|
298
|
-
#
|
514
|
+
# /\w{2}/.match('') # => nil
|
515
|
+
# /\w{2}/.match('x') # => nil
|
516
|
+
# /\w{2}/.match('xyz') # => #<MatchData "xy">
|
299
517
|
#
|
300
|
-
#
|
301
|
-
# combines the terms it contains into an atomic whole without creating a
|
302
|
-
# backreference. This benefits performance at the slight expense of readability.
|
518
|
+
# * `{`*min*`,}` - Matches *min* or more times:
|
303
519
|
#
|
304
|
-
#
|
305
|
-
#
|
520
|
+
# /\w{2,}/.match('') # => nil
|
521
|
+
# /\w{2,}/.match('x') # => nil
|
522
|
+
# /\w{2,}/.match('xy') # => #<MatchData "xy">
|
523
|
+
# /\w{2,}/.match('xyz') # => #<MatchData "xyz">
|
306
524
|
#
|
307
|
-
#
|
308
|
-
# #=> #<MatchData "Investigations" 1:"n" 2:"ti">
|
525
|
+
# * `{,`*max*`}` - Matches *max* or fewer times:
|
309
526
|
#
|
310
|
-
#
|
311
|
-
#
|
312
|
-
#
|
527
|
+
# /\w{,2}/.match('') # => #<MatchData "">
|
528
|
+
# /\w{,2}/.match('x') # => #<MatchData "x">
|
529
|
+
# /\w{,2}/.match('xyz') # => #<MatchData "xy">
|
313
530
|
#
|
314
|
-
#
|
315
|
-
#
|
531
|
+
# * `{`*min*`,`*max*`}` - Matches at least *min* times and at most *max*
|
532
|
+
# times:
|
316
533
|
#
|
317
|
-
#
|
534
|
+
# /\w{1,2}/.match('') # => nil
|
535
|
+
# /\w{1,2}/.match('x') # => #<MatchData "x">
|
536
|
+
# /\w{1,2}/.match('xyz') # => #<MatchData "xy">
|
318
537
|
#
|
319
|
-
#
|
320
|
-
# subexpression *pat* to be matched independently of the rest of the expression
|
321
|
-
# such that what it matches becomes fixed for the remainder of the match, unless
|
322
|
-
# the entire subexpression must be abandoned and subsequently revisited. In this
|
323
|
-
# way *pat* is treated as a non-divisible whole. Atomic grouping is typically
|
324
|
-
# used to optimise patterns so as to prevent the regular expression engine from
|
325
|
-
# backtracking needlessly.
|
538
|
+
# #### Greedy, Lazy, or Possessive Matching
|
326
539
|
#
|
327
|
-
#
|
328
|
-
# `.*` matches *Quote"*. This causes the overall match to fail, so the text
|
329
|
-
# matched by `.*` is backtracked by one position, which leaves the final
|
330
|
-
# character of the string available to match `"`
|
540
|
+
# Quantifier matching may be greedy, lazy, or possessive:
|
331
541
|
#
|
332
|
-
#
|
542
|
+
# * In *greedy* matching, as many occurrences as possible are matched while
|
543
|
+
# still allowing the overall match to succeed. Greedy quantifiers: `*`, `+`,
|
544
|
+
# `?`, `{min, max}` and its variants.
|
545
|
+
# * In *lazy* matching, the minimum number of occurrences are matched. Lazy
|
546
|
+
# quantifiers: `*?`, `+?`, `??`, `{min, max}?` and its variants.
|
547
|
+
# * In *possessive* matching, once a match is found, there is no backtracking;
|
548
|
+
# that match is retained, even if it jeopardises the overall match.
|
549
|
+
# Possessive quantifiers: `*+`, `++`, `?+`. Note that `{min, max}` and its
|
550
|
+
# variants do *not* support possessive matching.
|
333
551
|
#
|
334
|
-
#
|
335
|
-
# this means that the overall match fails
|
552
|
+
# More:
|
336
553
|
#
|
337
|
-
#
|
554
|
+
# * About greedy and lazy matching, see [Choosing Minimal or Maximal
|
555
|
+
# Repetition](https://doc.lagout.org/programmation/Regular%20Expressions/Reg
|
556
|
+
# ular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Progr
|
557
|
+
# amming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%20201
|
558
|
+
# 2-09-06%5D.pdf#tutorial-backtrack).
|
559
|
+
# * About possessive matching, see [Eliminate Needless
|
560
|
+
# Backtracking](https://doc.lagout.org/programmation/Regular%20Expressions/R
|
561
|
+
# egular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Pro
|
562
|
+
# gramming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202
|
563
|
+
# 012-09-06%5D.pdf#tutorial-backtrack).
|
338
564
|
#
|
339
|
-
#
|
565
|
+
# ### Groups and Captures
|
340
566
|
#
|
341
|
-
#
|
342
|
-
# which can be a group name or number, again. This differs from backreferences
|
343
|
-
# in that it re-executes the group rather than simply trying to re-match the
|
344
|
-
# same text.
|
567
|
+
# A simple regexp has (at most) one match:
|
345
568
|
#
|
346
|
-
#
|
347
|
-
#
|
348
|
-
#
|
569
|
+
# re = /\d\d\d\d-\d\d-\d\d/
|
570
|
+
# re.match('1943-02-04') # => #<MatchData "1943-02-04">
|
571
|
+
# re.match('1943-02-04').size # => 1
|
572
|
+
# re.match('foo') # => nil
|
349
573
|
#
|
350
|
-
#
|
574
|
+
# Adding one or more pairs of parentheses, `(*subexpression*)`, defines
|
575
|
+
# *groups*, which may result in multiple matched substrings, called *captures*:
|
351
576
|
#
|
352
|
-
#
|
577
|
+
# re = /(\d\d\d\d)-(\d\d)-(\d\d)/
|
578
|
+
# re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
|
579
|
+
# re.match('1943-02-04').size # => 4
|
580
|
+
#
|
581
|
+
# The first capture is the entire matched string; the other captures are the
|
582
|
+
# matched substrings from the groups.
|
583
|
+
#
|
584
|
+
# A group may have a [quantifier](rdoc-ref:Regexp@Quantifiers):
|
585
|
+
#
|
586
|
+
# re = /July 4(th)?/
|
587
|
+
# re.match('July 4') # => #<MatchData "July 4" 1:nil>
|
588
|
+
# re.match('July 4th') # => #<MatchData "July 4th" 1:"th">
|
589
|
+
#
|
590
|
+
# re = /(foo)*/
|
591
|
+
# re.match('') # => #<MatchData "" 1:nil>
|
592
|
+
# re.match('foo') # => #<MatchData "foo" 1:"foo">
|
593
|
+
# re.match('foofoo') # => #<MatchData "foofoo" 1:"foo">
|
594
|
+
#
|
595
|
+
# re = /(foo)+/
|
596
|
+
# re.match('') # => nil
|
597
|
+
# re.match('foo') # => #<MatchData "foo" 1:"foo">
|
598
|
+
# re.match('foofoo') # => #<MatchData "foofoo" 1:"foo">
|
599
|
+
#
|
600
|
+
# The returned MatchData object gives access to the matched substrings:
|
601
|
+
#
|
602
|
+
# re = /(\d\d\d\d)-(\d\d)-(\d\d)/
|
603
|
+
# md = re.match('1943-02-04')
|
604
|
+
# # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
|
605
|
+
# md[0] # => "1943-02-04"
|
606
|
+
# md[1] # => "1943"
|
607
|
+
# md[2] # => "02"
|
608
|
+
# md[3] # => "04"
|
609
|
+
#
|
610
|
+
# #### Non-Capturing Groups
|
611
|
+
#
|
612
|
+
# A group may be made non-capturing; it is still a group (and, for example, can
|
613
|
+
# have a quantifier), but its matching substring is not included among the
|
614
|
+
# captures.
|
615
|
+
#
|
616
|
+
# A non-capturing group begins with `?:` (inside the parentheses):
|
617
|
+
#
|
618
|
+
# # Don't capture the year.
|
619
|
+
# re = /(?:\d\d\d\d)-(\d\d)-(\d\d)/
|
620
|
+
# md = re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"02" 2:"04">
|
621
|
+
#
|
622
|
+
# #### Backreferences
|
623
|
+
#
|
624
|
+
# A group match may also be referenced within the regexp itself; such a
|
625
|
+
# reference is called a `backreference`:
|
626
|
+
#
|
627
|
+
# /[csh](..) [csh]\1 in/.match('The cat sat in the hat')
|
628
|
+
# # => #<MatchData "cat sat in" 1:"at">
|
629
|
+
#
|
630
|
+
# This table shows how each subexpression in the regexp above matches a
|
631
|
+
# substring in the target string:
|
632
|
+
#
|
633
|
+
# | Subexpression in Regexp | Matching Substring in Target String |
|
634
|
+
# |---------------------------|-------------------------------------|
|
635
|
+
# | First '[csh]' | Character 'c' |
|
636
|
+
# | '(..)' | First substring 'at' |
|
637
|
+
# | First space ' ' | First space character ' ' |
|
638
|
+
# | Second '[csh]' | Character 's' |
|
639
|
+
# | '\1' (backreference 'at') | Second substring 'at' |
|
640
|
+
# | ' in' | Substring ' in' |
|
641
|
+
#
|
642
|
+
# A regexp may contain any number of groups:
|
643
|
+
#
|
644
|
+
# * For a large number of groups:
|
645
|
+
#
|
646
|
+
# * The ordinary `\*n`* notation applies only for *n* in range (1..9).
|
647
|
+
# * The `MatchData[*n*]` notation applies for any non-negative *n*.
|
648
|
+
#
|
649
|
+
# * `\0` is a special backreference, referring to the entire matched string;
|
650
|
+
# it may not be used within the regexp itself, but may be used outside it
|
651
|
+
# (for example, in a substitution method call):
|
652
|
+
#
|
653
|
+
# 'The cat sat in the hat'.gsub(/[csh]at/, '\0s')
|
654
|
+
# # => "The cats sats in the hats"
|
655
|
+
#
|
656
|
+
# #### Named Captures
|
657
|
+
#
|
658
|
+
# As seen above, a capture can be referred to by its number. A capture can also
|
659
|
+
# have a name, prefixed as `?<*name*>` or `?'*name*'`, and the name (symbolized)
|
660
|
+
# may be used as an index in `MatchData[]`:
|
661
|
+
#
|
662
|
+
# md = /\$(?<dollars>\d+)\.(?'cents'\d+)/.match("$3.67")
|
663
|
+
# # => #<MatchData "$3.67" dollars:"3" cents:"67">
|
664
|
+
# md[:dollars] # => "3"
|
665
|
+
# md[:cents] # => "67"
|
666
|
+
# # The capture numbers are still valid.
|
667
|
+
# md[2] # => "67"
|
668
|
+
#
|
669
|
+
# When a regexp contains a named capture, there are no unnamed captures:
|
670
|
+
#
|
671
|
+
# /\$(?<dollars>\d+)\.(\d+)/.match("$3.67")
|
672
|
+
# # => #<MatchData "$3.67" dollars:"3">
|
673
|
+
#
|
674
|
+
# A named group may be backreferenced as `\k<*name*>`:
|
675
|
+
#
|
676
|
+
# /(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy')
|
677
|
+
# # => #<MatchData "ototo" vowel:"o">
|
678
|
+
#
|
679
|
+
# When (and only when) a regexp contains named capture groups and appears before
|
680
|
+
# the `=~` operator, the captured substrings are assigned to local variables
|
681
|
+
# with corresponding names:
|
682
|
+
#
|
683
|
+
# /\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ '$3.67'
|
684
|
+
# dollars # => "3"
|
685
|
+
# cents # => "67"
|
686
|
+
#
|
687
|
+
# Method Regexp#named_captures returns a hash of the capture names and
|
688
|
+
# substrings; method Regexp#names returns an array of the capture names.
|
689
|
+
#
|
690
|
+
# #### Atomic Grouping
|
691
|
+
#
|
692
|
+
# A group may be made *atomic* with `(?>`*subexpression*`)`.
|
693
|
+
#
|
694
|
+
# This causes the subexpression to be matched independently of the rest of the
|
695
|
+
# expression, so that the matched substring becomes fixed for the remainder of
|
696
|
+
# the match, unless the entire subexpression must be abandoned and subsequently
|
697
|
+
# revisited.
|
698
|
+
#
|
699
|
+
# In this way *subexpression* is treated as a non-divisible whole. Atomic
|
700
|
+
# grouping is typically used to optimise patterns to prevent needless
|
701
|
+
# backtracking .
|
702
|
+
#
|
703
|
+
# Example (without atomic grouping):
|
704
|
+
#
|
705
|
+
# /".*"/.match('"Quote"') # => #<MatchData "\"Quote\"">
|
706
|
+
#
|
707
|
+
# Analysis:
|
708
|
+
#
|
709
|
+
# 1. The leading subexpression `"` in the pattern matches the first character
|
710
|
+
# `"` in the target string.
|
711
|
+
# 2. The next subexpression `.*` matches the next substring `Quote“` (including
|
712
|
+
# the trailing double-quote).
|
713
|
+
# 3. Now there is nothing left in the target string to match the trailing
|
714
|
+
# subexpression `"` in the pattern; this would cause the overall match to
|
715
|
+
# fail.
|
716
|
+
# 4. The matched substring is backtracked by one position: `Quote`.
|
717
|
+
# 5. The final subexpression `"` now matches the final substring `"`, and the
|
718
|
+
# overall match succeeds.
|
719
|
+
#
|
720
|
+
# If subexpression `.*` is grouped atomically, the backtracking is disabled, and
|
721
|
+
# the overall match fails:
|
722
|
+
#
|
723
|
+
# /"(?>.*)"/.match('"Quote"') # => nil
|
724
|
+
#
|
725
|
+
# Atomic grouping can affect performance; see [Atomic
|
726
|
+
# Group](https://www.regular-expressions.info/atomic.html).
|
727
|
+
#
|
728
|
+
# #### Subexpression Calls
|
729
|
+
#
|
730
|
+
# As seen above, a backreference number (`\*n`*) or name (`\k<*name*>`) gives
|
731
|
+
# access to a captured *substring*; the corresponding regexp *subexpression* may
|
732
|
+
# also be accessed, via the number (`\\g*n`*) or name (`\g<*name*>`):
|
733
|
+
#
|
734
|
+
# /\A(?<paren>\(\g<paren>*\))*\z/.match('(())')
|
353
735
|
# # ^1
|
354
736
|
# # ^2
|
355
737
|
# # ^3
|
@@ -361,407 +743,587 @@
|
|
361
743
|
# # ^9
|
362
744
|
# # ^10
|
363
745
|
#
|
746
|
+
# The pattern:
|
747
|
+
#
|
364
748
|
# 1. Matches at the beginning of the string, i.e. before the first character.
|
365
|
-
# 2. Enters a named
|
366
|
-
# 3. Matches
|
367
|
-
# 4. Calls the `paren` group again, i.e. recurses back to the
|
368
|
-
# 5. Re-enters the `paren` group
|
369
|
-
# 6. Matches
|
370
|
-
# 7.
|
371
|
-
# an overall successful match
|
372
|
-
# 8.
|
373
|
-
#
|
374
|
-
# 9.
|
375
|
-
# 10.
|
376
|
-
#
|
377
|
-
#
|
378
|
-
#
|
379
|
-
#
|
380
|
-
#
|
381
|
-
#
|
382
|
-
#
|
383
|
-
#
|
384
|
-
#
|
385
|
-
#
|
386
|
-
#
|
387
|
-
#
|
388
|
-
#
|
389
|
-
#
|
390
|
-
#
|
391
|
-
#
|
392
|
-
#
|
393
|
-
#
|
394
|
-
#
|
395
|
-
#
|
396
|
-
#
|
397
|
-
#
|
398
|
-
#
|
399
|
-
#
|
400
|
-
#
|
401
|
-
#
|
402
|
-
#
|
403
|
-
#
|
749
|
+
# 2. Enters a named group `paren`.
|
750
|
+
# 3. Matches the first character in the string, `'('`.
|
751
|
+
# 4. Calls the `paren` group again, i.e. recurses back to the second step.
|
752
|
+
# 5. Re-enters the `paren` group.
|
753
|
+
# 6. Matches the second character in the string, `'('`.
|
754
|
+
# 7. Attempts to call `paren` a third time, but fails because doing so would
|
755
|
+
# prevent an overall successful match.
|
756
|
+
# 8. Matches the third character in the string, `')'`; marks the end of the
|
757
|
+
# second recursive call
|
758
|
+
# 9. Matches the fourth character in the string, `')'`.
|
759
|
+
# 10. Matches the end of the string.
|
760
|
+
#
|
761
|
+
# See [Subexpression
|
762
|
+
# calls](https://learnbyexample.github.io/Ruby_Regexp/groupings-and-backreferenc
|
763
|
+
# es.html?highlight=subexpression#subexpression-calls).
|
764
|
+
#
|
765
|
+
# #### Conditionals
|
766
|
+
#
|
767
|
+
# The conditional construct takes the form `(?(*cond*)*yes*|*no*)`, where:
|
768
|
+
#
|
769
|
+
# * *cond* may be a capture number or name.
|
770
|
+
# * The match to be applied is *yes* if *cond* is captured; otherwise the
|
771
|
+
# match to be applied is *no*.
|
772
|
+
# * If not needed, `|*no`* may be omitted.
|
773
|
+
#
|
774
|
+
# Examples:
|
775
|
+
#
|
776
|
+
# re = /\A(foo)?(?(1)(T)|(F))\z/
|
777
|
+
# re.match('fooT') # => #<MatchData "fooT" 1:"foo" 2:"T" 3:nil>
|
778
|
+
# re.match('F') # => #<MatchData "F" 1:nil 2:nil 3:"F">
|
779
|
+
# re.match('fooF') # => nil
|
780
|
+
# re.match('T') # => nil
|
781
|
+
#
|
782
|
+
# re = /\A(?<xyzzy>foo)?(?(<xyzzy>)(T)|(F))\z/
|
783
|
+
# re.match('fooT') # => #<MatchData "fooT" xyzzy:"foo">
|
784
|
+
# re.match('F') # => #<MatchData "F" xyzzy:nil>
|
785
|
+
# re.match('fooF') # => nil
|
786
|
+
# re.match('T') # => nil
|
787
|
+
#
|
788
|
+
# #### Absence Operator
|
789
|
+
#
|
790
|
+
# The absence operator is a special group that matches anything which does *not*
|
791
|
+
# match the contained subexpressions.
|
792
|
+
#
|
793
|
+
# /(?~real)/.match('surrealist') # => #<MatchData "surrea">
|
794
|
+
# /(?~real)ist/.match('surrealist') # => #<MatchData "ealist">
|
795
|
+
# /sur(?~real)ist/.match('surrealist') # => nil
|
796
|
+
#
|
797
|
+
# ### Unicode
|
798
|
+
#
|
799
|
+
# #### Unicode Properties
|
800
|
+
#
|
801
|
+
# The `/\p{*property_name*}/` construct (with lowercase `p`) matches characters
|
802
|
+
# using a Unicode property name, much like a character class; property `Alpha`
|
803
|
+
# specifies alphabetic characters:
|
804
|
+
#
|
805
|
+
# /\p{Alpha}/.match('a') # => #<MatchData "a">
|
806
|
+
# /\p{Alpha}/.match('1') # => nil
|
807
|
+
#
|
808
|
+
# A property can be inverted by prefixing the name with a caret character (`^`):
|
809
|
+
#
|
810
|
+
# /\p{^Alpha}/.match('1') # => #<MatchData "1">
|
811
|
+
# /\p{^Alpha}/.match('a') # => nil
|
812
|
+
#
|
813
|
+
# Or by using `\P` (uppercase `P`):
|
814
|
+
#
|
815
|
+
# /\P{Alpha}/.match('1') # => #<MatchData "1">
|
816
|
+
# /\P{Alpha}/.match('a') # => nil
|
817
|
+
#
|
818
|
+
# See [Unicode Properties](rdoc-ref:regexp/unicode_properties.rdoc) for regexps
|
819
|
+
# based on the numerous properties.
|
820
|
+
#
|
821
|
+
# Some commonly-used properties correspond to POSIX bracket expressions:
|
822
|
+
#
|
823
|
+
# * `/\p{Alnum}/`: Alphabetic and numeric character
|
824
|
+
# * `/\p{Alpha}/`: Alphabetic character
|
825
|
+
# * `/\p{Blank}/`: Space or tab
|
826
|
+
# * `/\p{Cntrl}/`: Control character
|
827
|
+
# * `/\p{Digit}/`: Digit characters, and similar)
|
828
|
+
# * `/\p{Lower}/`: Lowercase alphabetical character
|
829
|
+
# * `/\p{Print}/`: Like `\p{Graph}`, but includes the space character
|
830
|
+
# * `/\p{Punct}/`: Punctuation character
|
831
|
+
# * `/\p{Space}/`: Whitespace character (`[:blank:]`, newline, carriage
|
404
832
|
# return, etc.)
|
405
|
-
# * `/\p{Upper}
|
406
|
-
# * `/\p{XDigit}
|
407
|
-
# * `/\p{Word}/` - A member of one of the following Unicode general category
|
408
|
-
# *Letter*, *Mark*, *Number*, *Connector_Punctuation*
|
409
|
-
# * `/\p{ASCII}/` - A character in the ASCII character set
|
410
|
-
# * `/\p{Any}/` - Any Unicode character (including unassigned characters)
|
411
|
-
# * `/\p{Assigned}/` - An assigned character
|
412
|
-
#
|
413
|
-
#
|
414
|
-
# A Unicode character's *General Category* value can also be matched with
|
415
|
-
# `\p{`*Ab*`}` where *Ab* is the category's abbreviation as described below:
|
416
|
-
#
|
417
|
-
# * `/\p{L}/` - 'Letter'
|
418
|
-
# * `/\p{Ll}/` - 'Letter: Lowercase'
|
419
|
-
# * `/\p{Lm}/` - 'Letter: Mark'
|
420
|
-
# * `/\p{Lo}/` - 'Letter: Other'
|
421
|
-
# * `/\p{Lt}/` - 'Letter: Titlecase'
|
422
|
-
# * `/\p{Lu}/` - 'Letter: Uppercase
|
423
|
-
# * `/\p{Lo}/` - 'Letter: Other'
|
424
|
-
# * `/\p{M}/` - 'Mark'
|
425
|
-
# * `/\p{Mn}/` - 'Mark: Nonspacing'
|
426
|
-
# * `/\p{Mc}/` - 'Mark: Spacing Combining'
|
427
|
-
# * `/\p{Me}/` - 'Mark: Enclosing'
|
428
|
-
# * `/\p{N}/` - 'Number'
|
429
|
-
# * `/\p{Nd}/` - 'Number: Decimal Digit'
|
430
|
-
# * `/\p{Nl}/` - 'Number: Letter'
|
431
|
-
# * `/\p{No}/` - 'Number: Other'
|
432
|
-
# * `/\p{P}/` - 'Punctuation'
|
433
|
-
# * `/\p{Pc}/` - 'Punctuation: Connector'
|
434
|
-
# * `/\p{Pd}/` - 'Punctuation: Dash'
|
435
|
-
# * `/\p{Ps}/` - 'Punctuation: Open'
|
436
|
-
# * `/\p{Pe}/` - 'Punctuation: Close'
|
437
|
-
# * `/\p{Pi}/` - 'Punctuation: Initial Quote'
|
438
|
-
# * `/\p{Pf}/` - 'Punctuation: Final Quote'
|
439
|
-
# * `/\p{Po}/` - 'Punctuation: Other'
|
440
|
-
# * `/\p{S}/` - 'Symbol'
|
441
|
-
# * `/\p{Sm}/` - 'Symbol: Math'
|
442
|
-
# * `/\p{Sc}/` - 'Symbol: Currency'
|
443
|
-
# * `/\p{Sc}/` - 'Symbol: Currency'
|
444
|
-
# * `/\p{Sk}/` - 'Symbol: Modifier'
|
445
|
-
# * `/\p{So}/` - 'Symbol: Other'
|
446
|
-
# * `/\p{Z}/` - 'Separator'
|
447
|
-
# * `/\p{Zs}/` - 'Separator: Space'
|
448
|
-
# * `/\p{Zl}/` - 'Separator: Line'
|
449
|
-
# * `/\p{Zp}/` - 'Separator: Paragraph'
|
450
|
-
# * `/\p{C}/` - 'Other'
|
451
|
-
# * `/\p{Cc}/` - 'Other: Control'
|
452
|
-
# * `/\p{Cf}/` - 'Other: Format'
|
453
|
-
# * `/\p{Cn}/` - 'Other: Not Assigned'
|
454
|
-
# * `/\p{Co}/` - 'Other: Private Use'
|
455
|
-
# * `/\p{Cs}/` - 'Other: Surrogate'
|
456
|
-
#
|
457
|
-
#
|
458
|
-
# Lastly, `\p{}` matches a character's Unicode *script*. The following scripts
|
459
|
-
# are supported: *Arabic*, *Armenian*, *Balinese*, *Bengali*, *Bopomofo*,
|
460
|
-
# *Braille*, *Buginese*, *Buhid*, *Canadian_Aboriginal*, *Carian*, *Cham*,
|
461
|
-
# *Cherokee*, *Common*, *Coptic*, *Cuneiform*, *Cypriot*, *Cyrillic*, *Deseret*,
|
462
|
-
# *Devanagari*, *Ethiopic*, *Georgian*, *Glagolitic*, *Gothic*, *Greek*,
|
463
|
-
# *Gujarati*, *Gurmukhi*, *Han*, *Hangul*, *Hanunoo*, *Hebrew*, *Hiragana*,
|
464
|
-
# *Inherited*, *Kannada*, *Katakana*, *Kayah_Li*, *Kharoshthi*, *Khmer*, *Lao*,
|
465
|
-
# *Latin*, *Lepcha*, *Limbu*, *Linear_B*, *Lycian*, *Lydian*, *Malayalam*,
|
466
|
-
# *Mongolian*, *Myanmar*, *New_Tai_Lue*, *Nko*, *Ogham*, *Ol_Chiki*,
|
467
|
-
# *Old_Italic*, *Old_Persian*, *Oriya*, *Osmanya*, *Phags_Pa*, *Phoenician*,
|
468
|
-
# *Rejang*, *Runic*, *Saurashtra*, *Shavian*, *Sinhala*, *Sundanese*,
|
469
|
-
# *Syloti_Nagri*, *Syriac*, *Tagalog*, *Tagbanwa*, *Tai_Le*, *Tamil*, *Telugu*,
|
470
|
-
# *Thaana*, *Thai*, *Tibetan*, *Tifinagh*, *Ugaritic*, *Vai*, and *Yi*.
|
471
|
-
#
|
472
|
-
# Unicode codepoint U+06E9 is named "ARABIC PLACE OF SAJDAH" and belongs to the
|
473
|
-
# Arabic script:
|
474
|
-
#
|
475
|
-
# /\p{Arabic}/.match("\u06E9") #=> #<MatchData "\u06E9">
|
476
|
-
#
|
477
|
-
# All character properties can be inverted by prefixing their name with a caret
|
478
|
-
# (`^`).
|
479
|
-
#
|
480
|
-
# Letter 'A' is not in the Unicode Ll (Letter; Lowercase) category, so this
|
481
|
-
# match succeeds:
|
482
|
-
#
|
483
|
-
# /\p{^Ll}/.match("A") #=> #<MatchData "A">
|
484
|
-
#
|
485
|
-
# ## Anchors
|
486
|
-
#
|
487
|
-
# Anchors are metacharacter that match the zero-width positions between
|
488
|
-
# characters, *anchoring* the match to a specific position.
|
489
|
-
#
|
490
|
-
# * `^` - Matches beginning of line
|
491
|
-
# * `$` - Matches end of line
|
492
|
-
# * `\A` - Matches beginning of string.
|
493
|
-
# * `\Z` - Matches end of string. If string ends with a newline, it matches
|
494
|
-
# just before newline
|
495
|
-
# * `\z` - Matches end of string
|
496
|
-
# * `\G` - Matches first matching position:
|
497
|
-
#
|
498
|
-
# In methods like `String#gsub` and `String#scan`, it changes on each
|
499
|
-
# iteration. It initially matches the beginning of subject, and in each
|
500
|
-
# following iteration it matches where the last match finished.
|
501
|
-
#
|
502
|
-
# " a b c".gsub(/ /, '_') #=> "____a_b_c"
|
503
|
-
# " a b c".gsub(/\G /, '_') #=> "____a b c"
|
504
|
-
#
|
505
|
-
# In methods like `Regexp#match` and `String#match` that take an (optional)
|
506
|
-
# offset, it matches where the search begins.
|
833
|
+
# * `/\p{Upper}/`: Uppercase alphabetical
|
834
|
+
# * `/\p{XDigit}/`: Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
|
507
835
|
#
|
508
|
-
#
|
509
|
-
#
|
836
|
+
# These are also commonly used:
|
837
|
+
#
|
838
|
+
# * `/\p{Emoji}/`: Unicode emoji.
|
839
|
+
# * `/\p{Graph}/`: Characters excluding `/\p{Cntrl}/` and `/\p{Space}/`. Note
|
840
|
+
# that invisible characters under the Unicode
|
841
|
+
# ["Format"](https://www.compart.com/en/unicode/category/Cf) category are
|
842
|
+
# included.
|
843
|
+
# * `/\p{Word}/`: A member in one of these Unicode character categories (see
|
844
|
+
# below) or having one of these Unicode properties:
|
845
|
+
#
|
846
|
+
# * Unicode categories:
|
847
|
+
# * `Mark` (`M`).
|
848
|
+
# * `Decimal Number` (`Nd`)
|
849
|
+
# * `Connector Punctuation` (`Pc`).
|
510
850
|
#
|
511
|
-
#
|
512
|
-
#
|
513
|
-
#
|
514
|
-
# * `(?=`*pat*`)` - *Positive lookahead* assertion: ensures that the following
|
515
|
-
# characters match *pat*, but doesn't include those characters in the
|
516
|
-
# matched text
|
517
|
-
# * `(?!`*pat*`)` - *Negative lookahead* assertion: ensures that the following
|
518
|
-
# characters do not match *pat*, but doesn't include those characters in the
|
519
|
-
# matched text
|
520
|
-
# * `(?<=`*pat*`)` - *Positive lookbehind* assertion: ensures that the
|
521
|
-
# preceding characters match *pat*, but doesn't include those characters in
|
522
|
-
# the matched text
|
523
|
-
# * `(?<!`*pat*`)` - *Negative lookbehind* assertion: ensures that the
|
524
|
-
# preceding characters do not match *pat*, but doesn't include those
|
525
|
-
# characters in the matched text
|
526
|
-
# * `\K` - Uses an positive lookbehind of the content preceding `\K` in the
|
527
|
-
# regexp. For example, the following two regexps are almost equivalent:
|
528
|
-
#
|
529
|
-
# /ab\Kc/
|
530
|
-
# /(?<=ab)c/
|
851
|
+
# * Unicode properties:
|
852
|
+
# * `Alpha`
|
853
|
+
# * `Join_Control`
|
531
854
|
#
|
532
|
-
#
|
855
|
+
# * `/\p{ASCII}/`: A character in the ASCII character set.
|
856
|
+
# * `/\p{Any}/`: Any Unicode character (including unassigned characters).
|
857
|
+
# * `/\p{Assigned}/`: An assigned character.
|
533
858
|
#
|
534
|
-
#
|
535
|
-
# /(?<=(?<=(a))(b))c/
|
859
|
+
# #### Unicode Character Categories
|
536
860
|
#
|
861
|
+
# A Unicode character category name:
|
537
862
|
#
|
538
|
-
#
|
863
|
+
# * May be either its full name or its abbreviated name.
|
864
|
+
# * Is case-insensitive.
|
865
|
+
# * Treats a space, a hyphen, and an underscore as equivalent.
|
539
866
|
#
|
540
|
-
#
|
867
|
+
# Examples:
|
541
868
|
#
|
542
|
-
#
|
543
|
-
#
|
544
|
-
#
|
869
|
+
# /\p{lu}/ # => /\p{lu}/
|
870
|
+
# /\p{LU}/ # => /\p{LU}/
|
871
|
+
# /\p{Uppercase Letter}/ # => /\p{Uppercase Letter}/
|
872
|
+
# /\p{Uppercase_Letter}/ # => /\p{Uppercase_Letter}/
|
873
|
+
# /\p{UPPERCASE-LETTER}/ # => /\p{UPPERCASE-LETTER}/
|
545
874
|
#
|
546
|
-
#
|
875
|
+
# Below are the Unicode character category abbreviations and names. Enumerations
|
876
|
+
# of characters in each category are at the links.
|
547
877
|
#
|
548
|
-
#
|
549
|
-
# does not occur at a word boundary.
|
878
|
+
# Letters:
|
550
879
|
#
|
551
|
-
#
|
880
|
+
# * `L`, `Letter`: `LC`, `Lm`, or `Lo`.
|
881
|
+
# * `LC`, `Cased_Letter`: `Ll`, `Lt`, or `Lu`.
|
882
|
+
# * [Lu, Lowercase_Letter](https://www.compart.com/en/unicode/category/Ll).
|
883
|
+
# * [Lu, Modifier_Letter](https://www.compart.com/en/unicode/category/Lm).
|
884
|
+
# * [Lu, Other_Letter](https://www.compart.com/en/unicode/category/Lo).
|
885
|
+
# * [Lu, Titlecase_Letter](https://www.compart.com/en/unicode/category/Lt).
|
886
|
+
# * [Lu, Uppercase_Letter](https://www.compart.com/en/unicode/category/Lu).
|
552
887
|
#
|
553
|
-
#
|
554
|
-
# boundary so instead of matching the first 'and' it matches from the fourth
|
555
|
-
# letter of 'demand' instead:
|
888
|
+
# Marks:
|
556
889
|
#
|
557
|
-
#
|
890
|
+
# * `M`, `Mark`: `Mc`, `Me`, or `Mn`.
|
891
|
+
# * [Mc, Spacing_Mark](https://www.compart.com/en/unicode/category/Mc).
|
892
|
+
# * [Me, Enclosing_Mark](https://www.compart.com/en/unicode/category/Me).
|
893
|
+
# * [Mn, Nonapacing_Mark](https://www.compart.com/en/unicode/category/Mn).
|
558
894
|
#
|
559
|
-
#
|
560
|
-
#
|
895
|
+
# Numbers:
|
896
|
+
#
|
897
|
+
# * `N`, `Number`: `Nd`, `Nl`, or `No`.
|
898
|
+
# * [Nd, Decimal_Number](https://www.compart.com/en/unicode/category/Nd).
|
899
|
+
# * [Nl, Letter_Number](https://www.compart.com/en/unicode/category/Nl).
|
900
|
+
# * [No, Other_Number](https://www.compart.com/en/unicode/category/No).
|
901
|
+
#
|
902
|
+
# Punctuation:
|
903
|
+
#
|
904
|
+
# * `P`, `Punctuation`: `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
|
905
|
+
# * [Pc,
|
906
|
+
# Connector_Punctuation](https://www.compart.com/en/unicode/category/Pc).
|
907
|
+
# * [Pd, Dash_Punctuation](https://www.compart.com/en/unicode/category/Pd).
|
908
|
+
# * [Pe, Close_Punctuation](https://www.compart.com/en/unicode/category/Pe).
|
909
|
+
# * [Pf, Final_Punctuation](https://www.compart.com/en/unicode/category/Pf).
|
910
|
+
# * [Pi, Initial_Punctuation](https://www.compart.com/en/unicode/category/Pi).
|
911
|
+
# * [Po, Other_Punctuation](https://www.compart.com/en/unicode/category/Po).
|
912
|
+
# * [Ps, Open_Punctuation](https://www.compart.com/en/unicode/category/Ps).
|
913
|
+
#
|
914
|
+
# * `S`, `Symbol`: `Sc`, `Sk`, `Sm`, or `So`.
|
915
|
+
# * [Sc, Currency_Symbol](https://www.compart.com/en/unicode/category/Sc).
|
916
|
+
# * [Sk, Modifier_Symbol](https://www.compart.com/en/unicode/category/Sk).
|
917
|
+
# * [Sm, Math_Symbol](https://www.compart.com/en/unicode/category/Sm).
|
918
|
+
# * [So, Other_Symbol](https://www.compart.com/en/unicode/category/So).
|
919
|
+
#
|
920
|
+
# * `Z`, `Separator`: `Zl`, `Zp`, or `Zs`.
|
921
|
+
# * [Zl, Line_Separator](https://www.compart.com/en/unicode/category/Zl).
|
922
|
+
# * [Zp, Paragraph_Separator](https://www.compart.com/en/unicode/category/Zp).
|
923
|
+
# * [Zs, Space_Separator](https://www.compart.com/en/unicode/category/Zs).
|
924
|
+
#
|
925
|
+
# * `C`, `Other`: `Cc`, `Cf`, `Cn`, `Co`, or `Cs`.
|
926
|
+
# * [Cc, Control](https://www.compart.com/en/unicode/category/Cc).
|
927
|
+
# * [Cf, Format](https://www.compart.com/en/unicode/category/Cf).
|
928
|
+
# * [Cn, Unassigned](https://www.compart.com/en/unicode/category/Cn).
|
929
|
+
# * [Co, Private_Use](https://www.compart.com/en/unicode/category/Co).
|
930
|
+
# * [Cs, Surrogate](https://www.compart.com/en/unicode/category/Cs).
|
931
|
+
#
|
932
|
+
# #### Unicode Scripts and Blocks
|
933
|
+
#
|
934
|
+
# Among the Unicode properties are:
|
935
|
+
#
|
936
|
+
# * [Unicode scripts](https://en.wikipedia.org/wiki/Script_(Unicode)); see
|
937
|
+
# [supported scripts](https://www.unicode.org/standard/supported.html).
|
938
|
+
# * [Unicode blocks](https://en.wikipedia.org/wiki/Unicode_block); see
|
939
|
+
# [supported blocks](http://www.unicode.org/Public/UNIDATA/Blocks.txt).
|
940
|
+
#
|
941
|
+
# ### POSIX Bracket Expressions
|
942
|
+
#
|
943
|
+
# A POSIX *bracket expression* is also similar to a character class. These
|
944
|
+
# expressions provide a portable alternative to the above, with the added
|
945
|
+
# benefit of encompassing non-ASCII characters:
|
946
|
+
#
|
947
|
+
# * `/\d/` matches only ASCII decimal digits `0` through `9`.
|
948
|
+
# * `/[[:digit:]]/` matches any character in the Unicode `Decimal Number`
|
949
|
+
# (`Nd`) category; see below.
|
950
|
+
#
|
951
|
+
# The POSIX bracket expressions:
|
952
|
+
#
|
953
|
+
# * `/[[:digit:]]/`: Matches a [Unicode
|
954
|
+
# digit](https://www.compart.com/en/unicode/category/Nd):
|
955
|
+
#
|
956
|
+
# /[[:digit:]]/.match('9') # => #<MatchData "9">
|
957
|
+
# /[[:digit:]]/.match("\u1fbf9") # => #<MatchData "9">
|
958
|
+
#
|
959
|
+
# * `/[[:xdigit:]]/`: Matches a digit allowed in a hexadecimal number;
|
960
|
+
# equivalent to `[0-9a-fA-F]`.
|
961
|
+
#
|
962
|
+
# * `/[[:upper:]]/`: Matches a [Unicode uppercase
|
963
|
+
# letter](https://www.compart.com/en/unicode/category/Lu):
|
561
964
|
#
|
562
|
-
#
|
563
|
-
#
|
965
|
+
# /[[:upper:]]/.match('A') # => #<MatchData "A">
|
966
|
+
# /[[:upper:]]/.match("\u00c6") # => #<MatchData "Æ">
|
564
967
|
#
|
565
|
-
#
|
968
|
+
# * `/[[:lower:]]/`: Matches a [Unicode lowercase
|
969
|
+
# letter](https://www.compart.com/en/unicode/category/Ll):
|
566
970
|
#
|
567
|
-
#
|
568
|
-
#
|
971
|
+
# /[[:lower:]]/.match('a') # => #<MatchData "a">
|
972
|
+
# /[[:lower:]]/.match("\u01fd") # => #<MatchData "ǽ">
|
569
973
|
#
|
570
|
-
# * `/
|
571
|
-
# * `/pat/m` - Treat a newline as a character matched by `.`
|
572
|
-
# * `/pat/x` - Ignore whitespace and comments in the pattern
|
573
|
-
# * `/pat/o` - Perform `#{}` interpolation only once
|
974
|
+
# * `/[[:alpha:]]/`: Matches `/[[:upper:]]/` or `/[[:lower:]]/`.
|
574
975
|
#
|
976
|
+
# * `/[[:alnum:]]/`: Matches `/[[:alpha:]]/` or `/[[:digit:]]/`.
|
575
977
|
#
|
576
|
-
#
|
577
|
-
#
|
578
|
-
# options *off* for the expression enclosed by the parentheses:
|
978
|
+
# * `/[[:space:]]/`: Matches [Unicode space
|
979
|
+
# character](https://www.compart.com/en/unicode/category/Zs):
|
579
980
|
#
|
580
|
-
#
|
581
|
-
#
|
981
|
+
# /[[:space:]]/.match(' ') # => #<MatchData " ">
|
982
|
+
# /[[:space:]]/.match("\u2005") # => #<MatchData " ">
|
582
983
|
#
|
583
|
-
#
|
584
|
-
# pattern:
|
984
|
+
# * `/[[:blank:]]/`: Matches `/[[:space:]]/` or tab character:
|
585
985
|
#
|
586
|
-
#
|
986
|
+
# /[[:blank:]]/.match(' ') # => #<MatchData " ">
|
987
|
+
# /[[:blank:]]/.match("\u2005") # => #<MatchData " ">
|
988
|
+
# /[[:blank:]]/.match("\t") # => #<MatchData "\t">
|
587
989
|
#
|
588
|
-
#
|
990
|
+
# * `/[[:cntrl:]]/`: Matches [Unicode control
|
991
|
+
# character](https://www.compart.com/en/unicode/category/Cc):
|
589
992
|
#
|
590
|
-
#
|
591
|
-
#
|
592
|
-
# Regexp.new("abc # Comment", Regexp::EXTENDED) #=> /abc # Comment/x
|
593
|
-
# Regexp.new("abc", Regexp::IGNORECASE | Regexp::MULTILINE) #=> /abc/mi
|
993
|
+
# /[[:cntrl:]]/.match("\u0000") # => #<MatchData "\u0000">
|
994
|
+
# /[[:cntrl:]]/.match("\u009f") # => #<MatchData "\u009F">
|
594
995
|
#
|
595
|
-
#
|
996
|
+
# * `/[[:graph:]]/`: Matches any character except `/[[:space:]]/` or
|
997
|
+
# `/[[:cntrl:]]/`.
|
596
998
|
#
|
597
|
-
#
|
598
|
-
# space inside the pattern is ignored, and the octothorpe (`#`) character
|
599
|
-
# introduces a comment until the end of the line. This allows the components of
|
600
|
-
# the pattern to be organized in a potentially more readable fashion.
|
999
|
+
# * `/[[:print:]]/`: Matches `/[[:graph:]]/` or space character.
|
601
1000
|
#
|
602
|
-
#
|
1001
|
+
# * `/[[:punct:]]/`: Matches any (Unicode punctuation
|
1002
|
+
# character}[https://www.compart.com/en/unicode/category/Po]:
|
603
1003
|
#
|
604
|
-
#
|
605
|
-
# [[:digit:]]+ # 1 or more digits before the decimal point
|
606
|
-
# (\. # Decimal point
|
607
|
-
# [[:digit:]]+ # 1 or more digits after the decimal point
|
608
|
-
# )? # The decimal point and following digits are optional
|
609
|
-
# \Z/x
|
610
|
-
# float_pat.match('3.14') #=> #<MatchData "3.14" 1:".14">
|
1004
|
+
# Ruby also supports these (non-POSIX) bracket expressions:
|
611
1005
|
#
|
612
|
-
#
|
1006
|
+
# * `/[[:ascii:]]/`: Matches a character in the ASCII character set.
|
1007
|
+
# * `/[[:word:]]/`: Matches a character in one of these Unicode character
|
1008
|
+
# categories or having one of these Unicode properties:
|
613
1009
|
#
|
614
|
-
#
|
615
|
-
#
|
616
|
-
#
|
1010
|
+
# * Unicode categories:
|
1011
|
+
# * `Mark` (`M`).
|
1012
|
+
# * `Decimal Number` (`Nd`)
|
1013
|
+
# * `Connector Punctuation` (`Pc`).
|
617
1014
|
#
|
1015
|
+
# * Unicode properties:
|
1016
|
+
# * `Alpha`
|
1017
|
+
# * `Join_Control`
|
618
1018
|
#
|
619
|
-
# Comments
|
620
|
-
# construct, where *comment* is arbitrary text ignored by the regexp engine.
|
1019
|
+
# ### Comments
|
621
1020
|
#
|
622
|
-
#
|
1021
|
+
# A comment may be included in a regexp pattern using the `(?#`*comment*`)`
|
1022
|
+
# construct, where *comment* is a substring that is to be ignored. arbitrary
|
1023
|
+
# text ignored by the regexp engine:
|
623
1024
|
#
|
624
|
-
#
|
1025
|
+
# /foo(?#Ignore me)bar/.match('foobar') # => #<MatchData "foobar">
|
625
1026
|
#
|
626
|
-
#
|
627
|
-
# overridden with one of the following modifiers.
|
1027
|
+
# The comment may not include an unescaped terminator character.
|
628
1028
|
#
|
629
|
-
#
|
630
|
-
# * `/`*pat*`/e` - EUC-JP
|
631
|
-
# * `/`*pat*`/s` - Windows-31J
|
632
|
-
# * `/`*pat*`/n` - ASCII-8BIT
|
1029
|
+
# See also [Extended Mode](rdoc-ref:Regexp@Extended+Mode).
|
633
1030
|
#
|
1031
|
+
# ## Modes
|
634
1032
|
#
|
635
|
-
#
|
636
|
-
#
|
637
|
-
#
|
1033
|
+
# Each of these modifiers sets a mode for the regexp:
|
1034
|
+
#
|
1035
|
+
# * `i`: `/*pattern*/i` sets [Case-Insensitive
|
1036
|
+
# Mode](rdoc-ref:Regexp@Case-Insensitive+Mode).
|
1037
|
+
# * `m`: `/*pattern*/m` sets [Multiline Mode](rdoc-ref:Regexp@Multiline+Mode).
|
1038
|
+
# * `x`: `/*pattern*/x` sets [Extended Mode](rdoc-ref:Regexp@Extended+Mode).
|
1039
|
+
# * `o`: `/*pattern*/o` sets [Interpolation
|
1040
|
+
# Mode](rdoc-ref:Regexp@Interpolation+Mode).
|
1041
|
+
#
|
1042
|
+
# Any, all, or none of these may be applied.
|
1043
|
+
#
|
1044
|
+
# Modifiers `i`, `m`, and `x` may be applied to subexpressions:
|
1045
|
+
#
|
1046
|
+
# * `(?*modifier*)` turns the mode "on" for ensuing subexpressions
|
1047
|
+
# * `(?-*modifier*)` turns the mode "off" for ensuing subexpressions
|
1048
|
+
# * `(?*modifier*:*subexp*)` turns the mode "on" for *subexp* within the group
|
1049
|
+
# * `(?-*modifier*:*subexp*)` turns the mode "off" for *subexp* within the
|
1050
|
+
# group
|
1051
|
+
#
|
1052
|
+
# Example:
|
1053
|
+
#
|
1054
|
+
# re = /(?i)te(?-i)st/
|
1055
|
+
# re.match('test') # => #<MatchData "test">
|
1056
|
+
# re.match('TEst') # => #<MatchData "TEst">
|
1057
|
+
# re.match('TEST') # => nil
|
1058
|
+
# re.match('teST') # => nil
|
1059
|
+
#
|
1060
|
+
# re = /t(?i:e)st/
|
1061
|
+
# re.match('test') # => #<MatchData "test">
|
1062
|
+
# re.match('tEst') # => #<MatchData "tEst">
|
1063
|
+
# re.match('tEST') # => nil
|
1064
|
+
#
|
1065
|
+
# Method Regexp#options returns an integer whose value showing the settings for
|
1066
|
+
# case-insensitivity mode, multiline mode, and extended mode.
|
1067
|
+
#
|
1068
|
+
# ### Case-Insensitive Mode
|
1069
|
+
#
|
1070
|
+
# By default, a regexp is case-sensitive:
|
1071
|
+
#
|
1072
|
+
# /foo/.match('FOO') # => nil
|
1073
|
+
#
|
1074
|
+
# Modifier `i` enables case-insensitive mode:
|
1075
|
+
#
|
1076
|
+
# /foo/i.match('FOO')
|
1077
|
+
# # => #<MatchData "FOO">
|
1078
|
+
#
|
1079
|
+
# Method Regexp#casefold? returns whether the mode is case-insensitive.
|
1080
|
+
#
|
1081
|
+
# ### Multiline Mode
|
1082
|
+
#
|
1083
|
+
# The multiline-mode in Ruby is what is commonly called a "dot-all mode":
|
1084
|
+
#
|
1085
|
+
# * Without the `m` modifier, the subexpression `.` does not match newlines:
|
1086
|
+
#
|
1087
|
+
# /a.c/.match("a\nc") # => nil
|
1088
|
+
#
|
1089
|
+
# * With the modifier, it does match:
|
1090
|
+
#
|
1091
|
+
# /a.c/m.match("a\nc") # => #<MatchData "a\nc">
|
1092
|
+
#
|
1093
|
+
# Unlike other languages, the modifier `m` does not affect the anchors `^` and
|
1094
|
+
# `$`. These anchors always match at line-boundaries in Ruby.
|
1095
|
+
#
|
1096
|
+
# ### Extended Mode
|
1097
|
+
#
|
1098
|
+
# Modifier `x` enables extended mode, which means that:
|
1099
|
+
#
|
1100
|
+
# * Literal white space in the pattern is to be ignored.
|
1101
|
+
# * Character `#` marks the remainder of its containing line as a comment,
|
1102
|
+
# which is also to be ignored for matching purposes.
|
1103
|
+
#
|
1104
|
+
# In extended mode, whitespace and comments may be used to form a
|
1105
|
+
# self-documented regexp.
|
1106
|
+
#
|
1107
|
+
# Regexp not in extended mode (matches some Roman numerals):
|
1108
|
+
#
|
1109
|
+
# pattern = '^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$'
|
1110
|
+
# re = /#{pattern}/
|
1111
|
+
# re.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III">
|
1112
|
+
#
|
1113
|
+
# Regexp in extended mode:
|
1114
|
+
#
|
1115
|
+
# pattern = <<-EOT
|
1116
|
+
# ^ # beginning of string
|
1117
|
+
# M{0,3} # thousands - 0 to 3 Ms
|
1118
|
+
# (CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs),
|
1119
|
+
# # or 500-800 (D, followed by 0 to 3 Cs)
|
1120
|
+
# (XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs),
|
1121
|
+
# # or 50-80 (L, followed by 0 to 3 Xs)
|
1122
|
+
# (IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is),
|
1123
|
+
# # or 5-8 (V, followed by 0 to 3 Is)
|
1124
|
+
# $ # end of string
|
1125
|
+
# EOT
|
1126
|
+
# re = /#{pattern}/x
|
1127
|
+
# re.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III">
|
1128
|
+
#
|
1129
|
+
# ### Interpolation Mode
|
1130
|
+
#
|
1131
|
+
# Modifier `o` means that the first time a literal regexp with interpolations is
|
1132
|
+
# encountered, the generated Regexp object is saved and used for all future
|
1133
|
+
# evaluations of that literal regexp. Without modifier `o`, the generated Regexp
|
1134
|
+
# is not saved, so each evaluation of the literal regexp generates a new Regexp
|
1135
|
+
# object.
|
1136
|
+
#
|
1137
|
+
# Without modifier `o`:
|
1138
|
+
#
|
1139
|
+
# def letters; sleep 5; /[A-Z][a-z]/; end
|
1140
|
+
# words = %w[abc def xyz]
|
1141
|
+
# start = Time.now
|
1142
|
+
# words.each {|word| word.match(/\A[#{letters}]+\z/) }
|
1143
|
+
# Time.now - start # => 15.0174892
|
1144
|
+
#
|
1145
|
+
# With modifier `o`:
|
1146
|
+
#
|
1147
|
+
# start = Time.now
|
1148
|
+
# words.each {|word| word.match(/\A[#{letters}]+\z/o) }
|
1149
|
+
# Time.now - start # => 5.0010866
|
1150
|
+
#
|
1151
|
+
# Note that if the literal regexp does not have interpolations, the `o` behavior
|
1152
|
+
# is the default.
|
1153
|
+
#
|
1154
|
+
# ## Encodings
|
1155
|
+
#
|
1156
|
+
# By default, a regexp with only US-ASCII characters has US-ASCII encoding:
|
1157
|
+
#
|
1158
|
+
# re = /foo/
|
1159
|
+
# re.source.encoding # => #<Encoding:US-ASCII>
|
1160
|
+
# re.encoding # => #<Encoding:US-ASCII>
|
1161
|
+
#
|
1162
|
+
# A regular expression containing non-US-ASCII characters is assumed to use the
|
1163
|
+
# source encoding. This can be overridden with one of the following modifiers.
|
1164
|
+
#
|
1165
|
+
# * `/*pat*/n`: US-ASCII if only containing US-ASCII characters, otherwise
|
1166
|
+
# ASCII-8BIT:
|
1167
|
+
#
|
1168
|
+
# /foo/n.encoding # => #<Encoding:US-ASCII>
|
1169
|
+
# /foo\xff/n.encoding # => #<Encoding:ASCII-8BIT>
|
1170
|
+
# /foo\x7f/n.encoding # => #<Encoding:US-ASCII>
|
1171
|
+
#
|
1172
|
+
# * `/*pat*/u`: UTF-8
|
1173
|
+
#
|
1174
|
+
# /foo/u.encoding # => #<Encoding:UTF-8>
|
1175
|
+
#
|
1176
|
+
# * `/*pat*/e`: EUC-JP
|
1177
|
+
#
|
1178
|
+
# /foo/e.encoding # => #<Encoding:EUC-JP>
|
1179
|
+
#
|
1180
|
+
# * `/*pat*/s`: Windows-31J
|
1181
|
+
#
|
1182
|
+
# /foo/s.encoding # => #<Encoding:Windows-31J>
|
1183
|
+
#
|
1184
|
+
# A regexp can be matched against a target string when either:
|
1185
|
+
#
|
1186
|
+
# * They have the same encoding.
|
1187
|
+
# * The regexp's encoding is a fixed encoding and the string contains only
|
1188
|
+
# ASCII characters. Method Regexp#fixed_encoding? returns whether the regexp
|
1189
|
+
# has a *fixed* encoding.
|
638
1190
|
#
|
639
1191
|
# If a match between incompatible encodings is attempted an
|
640
1192
|
# `Encoding::CompatibilityError` exception is raised.
|
641
1193
|
#
|
642
|
-
#
|
643
|
-
#
|
644
|
-
#
|
645
|
-
#
|
1194
|
+
# Example:
|
1195
|
+
#
|
1196
|
+
# re = eval("# encoding: ISO-8859-1\n/foo\\xff?/")
|
1197
|
+
# re.encoding # => #<Encoding:ISO-8859-1>
|
1198
|
+
# re =~ "foo".encode("UTF-8") # => 0
|
1199
|
+
# re =~ "foo\u0100" # Raises Encoding::CompatibilityError
|
646
1200
|
#
|
647
|
-
#
|
648
|
-
#
|
649
|
-
# # raises Encoding::CompatibilityError: incompatible encoding regexp match
|
650
|
-
# # (ISO-8859-1 regexp with UTF-8 string)
|
1201
|
+
# The encoding may be explicitly fixed by including Regexp::FIXEDENCODING in the
|
1202
|
+
# second argument for Regexp.new:
|
651
1203
|
#
|
652
|
-
#
|
1204
|
+
# # Regexp with encoding ISO-8859-1.
|
1205
|
+
# re = Regexp.new("a".force_encoding('iso-8859-1'), Regexp::FIXEDENCODING)
|
1206
|
+
# re.encoding # => #<Encoding:ISO-8859-1>
|
1207
|
+
# # Target string with encoding UTF-8.
|
1208
|
+
# s = "a\u3042"
|
1209
|
+
# s.encoding # => #<Encoding:UTF-8>
|
1210
|
+
# re.match(s) # Raises Encoding::CompatibilityError.
|
653
1211
|
#
|
654
|
-
#
|
655
|
-
# * `$~` is equivalent to Regexp.last_match;
|
656
|
-
# * `$&` contains the complete matched text;
|
657
|
-
# * `$`` contains string before match;
|
658
|
-
# * `$'` contains string after match;
|
659
|
-
# * `$1`, `$2` and so on contain text matching first, second, etc capture
|
660
|
-
# group;
|
661
|
-
# * `$+` contains last capture group.
|
1212
|
+
# ## Timeouts
|
662
1213
|
#
|
1214
|
+
# When either a regexp source or a target string comes from untrusted input,
|
1215
|
+
# malicious values could become a denial-of-service attack; to prevent such an
|
1216
|
+
# attack, it is wise to set a timeout.
|
663
1217
|
#
|
664
|
-
#
|
1218
|
+
# Regexp has two timeout values:
|
665
1219
|
#
|
666
|
-
#
|
667
|
-
#
|
668
|
-
# Regexp.
|
1220
|
+
# * A class default timeout, used for a regexp whose instance timeout is
|
1221
|
+
# `nil`; this default is initially `nil`, and may be set by method
|
1222
|
+
# Regexp.timeout=:
|
669
1223
|
#
|
670
|
-
#
|
671
|
-
#
|
672
|
-
#
|
673
|
-
# # same as m.pre_match
|
674
|
-
# $' #=> "k"
|
675
|
-
# # same as m.post_match
|
676
|
-
# $1 #=> "ta"
|
677
|
-
# # same as m[1]
|
678
|
-
# $2 #=> "c"
|
679
|
-
# # same as m[2]
|
680
|
-
# $3 #=> nil
|
681
|
-
# # no third group in pattern
|
682
|
-
# $+ #=> "c"
|
683
|
-
# # same as m[-1]
|
1224
|
+
# Regexp.timeout # => nil
|
1225
|
+
# Regexp.timeout = 3.0
|
1226
|
+
# Regexp.timeout # => 3.0
|
684
1227
|
#
|
685
|
-
#
|
1228
|
+
# * An instance timeout, which defaults to `nil` and may be set in Regexp.new:
|
686
1229
|
#
|
687
|
-
#
|
1230
|
+
# re = Regexp.new('foo', timeout: 5.0)
|
1231
|
+
# re.timeout # => 5.0
|
688
1232
|
#
|
689
|
-
#
|
690
|
-
#
|
1233
|
+
# When regexp.timeout is `nil`, the timeout "falls through" to Regexp.timeout;
|
1234
|
+
# when regexp.timeout is non-`nil`, that value controls timing out:
|
691
1235
|
#
|
692
|
-
#
|
1236
|
+
# | regexp.timeout Value | Regexp.timeout Value | Result |
|
1237
|
+
# |----------------------|----------------------|-----------------------------|
|
1238
|
+
# | nil | nil | Never times out. |
|
1239
|
+
# | nil | Float | Times out in Float seconds. |
|
1240
|
+
# | Float | Any | Times out in Float seconds. |
|
693
1241
|
#
|
694
|
-
#
|
695
|
-
# #=> "aaaaaaaaaaaaaaaaaaaaaaaaadaaaac"
|
1242
|
+
# ## Optimization
|
696
1243
|
#
|
697
|
-
#
|
1244
|
+
# For certain values of the pattern and target string, matching time can grow
|
1245
|
+
# polynomially or exponentially in relation to the input size; the potential
|
1246
|
+
# vulnerability arising from this is the [regular expression
|
1247
|
+
# denial-of-service](https://en.wikipedia.org/wiki/ReDoS) (ReDoS) attack.
|
698
1248
|
#
|
699
|
-
#
|
700
|
-
#
|
701
|
-
#
|
1249
|
+
# Regexp matching can apply an optimization to prevent ReDoS attacks. When the
|
1250
|
+
# optimization is applied, matching time increases linearly (not polynomially or
|
1251
|
+
# exponentially) in relation to the input size, and a ReDoS attach is not
|
1252
|
+
# possible.
|
702
1253
|
#
|
703
|
-
#
|
1254
|
+
# This optimization is applied if the pattern meets these criteria:
|
704
1255
|
#
|
705
|
-
#
|
1256
|
+
# * No backreferences.
|
1257
|
+
# * No subexpression calls.
|
1258
|
+
# * No nested lookaround anchors or atomic groups.
|
1259
|
+
# * No nested quantifiers with counting (i.e. no nested `{n}`, `{min,}`,
|
1260
|
+
# `{,max}`, or `{min,max}` style quantifiers)
|
706
1261
|
#
|
707
|
-
#
|
708
|
-
#
|
709
|
-
# any particular character. The nondeterminism that results produces
|
710
|
-
# super-linear performance. (Consult *Mastering Regular Expressions* (3rd ed.),
|
711
|
-
# pp 222, by *Jeffery Friedl*, for an in-depth analysis). This particular case
|
712
|
-
# can be fixed by use of atomic grouping, which prevents the unnecessary
|
713
|
-
# backtracking:
|
1262
|
+
# You can use method Regexp.linear_time? to determine whether a pattern meets
|
1263
|
+
# these criteria:
|
714
1264
|
#
|
715
|
-
# (
|
716
|
-
#
|
717
|
-
# (
|
718
|
-
# #=> 0.000166571
|
1265
|
+
# Regexp.linear_time?(/a*/) # => true
|
1266
|
+
# Regexp.linear_time?('a*') # => true
|
1267
|
+
# Regexp.linear_time?(/(a*)\1/) # => false
|
719
1268
|
#
|
720
|
-
#
|
721
|
-
#
|
1269
|
+
# However, an untrusted source may not be safe even if the method returns
|
1270
|
+
# `true`, because the optimization uses memoization (which may invoke large
|
1271
|
+
# memory consumption).
|
722
1272
|
#
|
723
|
-
#
|
724
|
-
# mandatory *a*s:
|
1273
|
+
# ## References
|
725
1274
|
#
|
726
|
-
#
|
1275
|
+
# Read (online PDF books):
|
727
1276
|
#
|
728
|
-
#
|
729
|
-
#
|
730
|
-
#
|
731
|
-
#
|
732
|
-
#
|
1277
|
+
# * [Mastering Regular
|
1278
|
+
# Expressions](https://ia902508.us.archive.org/10/items/allitebooks-02/Maste
|
1279
|
+
# ring%20Regular%20Expressions%2C%203rd%20Edition.pdf) by Jeffrey E.F.
|
1280
|
+
# Friedl.
|
1281
|
+
# * [Regular Expressions
|
1282
|
+
# Cookbook](https://doc.lagout.org/programmation/Regular%20Expressions/Regul
|
1283
|
+
# ar%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Program
|
1284
|
+
# ming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-
|
1285
|
+
# 09-06%5D.pdf) by Jan Goyvaerts & Steven Levithan.
|
733
1286
|
#
|
734
|
-
#
|
735
|
-
# backtracking needed. For this case, instead of individually matching 29
|
736
|
-
# optional *a*s, a range of optional *a*s can be matched all at once with
|
737
|
-
# *a{0,29}*:
|
1287
|
+
# Explore, test (interactive online editor):
|
738
1288
|
#
|
739
|
-
#
|
1289
|
+
# * [Rubular](https://rubular.com/).
|
740
1290
|
#
|
741
1291
|
class Regexp
|
742
|
-
#
|
743
|
-
#
|
744
|
-
#
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
#
|
754
|
-
#
|
755
|
-
#
|
756
|
-
|
757
|
-
|
758
|
-
#
|
759
|
-
#
|
760
|
-
#
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
1292
|
+
# Represents an object's ability to be converted to a `Regexp`.
|
1293
|
+
#
|
1294
|
+
# This is only used in `Regexp.try_convert` and `Regexp.union` within the standard library.
|
1295
|
+
interface _ToRegexp
|
1296
|
+
# Converts `self` to a `Regexp`.
|
1297
|
+
def to_regexp: () -> Regexp
|
1298
|
+
end
|
1299
|
+
|
1300
|
+
class TimeoutError < RegexpError
|
1301
|
+
end
|
1302
|
+
|
1303
|
+
# <!-- rdoc-file=re.c -->
|
1304
|
+
# see Regexp.options and Regexp.new
|
1305
|
+
#
|
1306
|
+
EXTENDED: Integer
|
1307
|
+
|
1308
|
+
# <!-- rdoc-file=re.c -->
|
1309
|
+
# see Regexp.options and Regexp.new
|
1310
|
+
#
|
1311
|
+
FIXEDENCODING: Integer
|
1312
|
+
|
1313
|
+
# <!-- rdoc-file=re.c -->
|
1314
|
+
# see Regexp.options and Regexp.new
|
1315
|
+
#
|
1316
|
+
IGNORECASE: Integer
|
1317
|
+
|
1318
|
+
# <!-- rdoc-file=re.c -->
|
1319
|
+
# see Regexp.options and Regexp.new
|
1320
|
+
#
|
1321
|
+
MULTILINE: Integer
|
1322
|
+
|
1323
|
+
# <!-- rdoc-file=re.c -->
|
1324
|
+
# see Regexp.options and Regexp.new
|
1325
|
+
#
|
1326
|
+
NOENCODING: Integer
|
765
1327
|
|
766
1328
|
# <!--
|
767
1329
|
# rdoc-file=re.c
|
@@ -773,192 +1335,324 @@ class Regexp
|
|
773
1335
|
|
774
1336
|
# <!--
|
775
1337
|
# rdoc-file=re.c
|
776
|
-
# - Regexp.escape(
|
777
|
-
# - Regexp.quote(str) -> string
|
1338
|
+
# - Regexp.escape(string) -> new_string
|
778
1339
|
# -->
|
779
|
-
#
|
780
|
-
#
|
781
|
-
#
|
1340
|
+
# Returns a new string that escapes any characters that have special meaning in
|
1341
|
+
# a regular expression:
|
1342
|
+
#
|
1343
|
+
# s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\."
|
782
1344
|
#
|
783
|
-
#
|
1345
|
+
# For any string `s`, this call returns a MatchData object:
|
784
1346
|
#
|
785
|
-
|
1347
|
+
# r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
|
1348
|
+
# r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
|
1349
|
+
#
|
1350
|
+
def self.escape: (interned str) -> String
|
786
1351
|
|
787
1352
|
# <!--
|
788
1353
|
# rdoc-file=re.c
|
789
|
-
# - Regexp.last_match
|
790
|
-
# - Regexp.last_match(n)
|
1354
|
+
# - Regexp.last_match -> matchdata or nil
|
1355
|
+
# - Regexp.last_match(n) -> string or nil
|
1356
|
+
# - Regexp.last_match(name) -> string or nil
|
791
1357
|
# -->
|
792
|
-
#
|
793
|
-
# pattern match
|
794
|
-
#
|
1358
|
+
# With no argument, returns the value of `$~`, which is the result of the most
|
1359
|
+
# recent pattern match (see [Regexp global
|
1360
|
+
# variables](rdoc-ref:Regexp@Global+Variables)):
|
1361
|
+
#
|
1362
|
+
# /c(.)t/ =~ 'cat' # => 0
|
1363
|
+
# Regexp.last_match # => #<MatchData "cat" 1:"a">
|
1364
|
+
# /a/ =~ 'foo' # => nil
|
1365
|
+
# Regexp.last_match # => nil
|
1366
|
+
#
|
1367
|
+
# With non-negative integer argument `n`, returns the _n_th field in the
|
1368
|
+
# matchdata, if any, or nil if none:
|
795
1369
|
#
|
796
|
-
#
|
797
|
-
#
|
1370
|
+
# /c(.)t/ =~ 'cat' # => 0
|
1371
|
+
# Regexp.last_match(0) # => "cat"
|
1372
|
+
# Regexp.last_match(1) # => "a"
|
1373
|
+
# Regexp.last_match(2) # => nil
|
798
1374
|
#
|
799
|
-
#
|
800
|
-
# that did the pattern match.
|
1375
|
+
# With negative integer argument `n`, counts backwards from the last field:
|
801
1376
|
#
|
802
|
-
#
|
803
|
-
# Regexp.last_match #=> #<MatchData "cat" 1:"a">
|
804
|
-
# Regexp.last_match(0) #=> "cat"
|
805
|
-
# Regexp.last_match(1) #=> "a"
|
806
|
-
# Regexp.last_match(2) #=> nil
|
1377
|
+
# Regexp.last_match(-1) # => "a"
|
807
1378
|
#
|
808
|
-
#
|
809
|
-
#
|
810
|
-
#
|
811
|
-
#
|
1379
|
+
# With string or symbol argument `name`, returns the string value for the named
|
1380
|
+
# capture, if any:
|
1381
|
+
#
|
1382
|
+
# /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
|
1383
|
+
# Regexp.last_match # => #<MatchData "var = val" lhs:"var"rhs:"val">
|
1384
|
+
# Regexp.last_match(:lhs) # => "var"
|
1385
|
+
# Regexp.last_match('rhs') # => "val"
|
1386
|
+
# Regexp.last_match('foo') # Raises IndexError.
|
812
1387
|
#
|
813
1388
|
def self.last_match: () -> MatchData?
|
814
|
-
| (
|
815
|
-
|
1389
|
+
| (MatchData::capture capture) -> String?
|
1390
|
+
|
1391
|
+
# <!--
|
1392
|
+
# rdoc-file=re.c
|
1393
|
+
# - Regexp.linear_time?(re)
|
1394
|
+
# - Regexp.linear_time?(string, options = 0)
|
1395
|
+
# -->
|
1396
|
+
# Returns `true` if matching against `re` can be done in linear time to the
|
1397
|
+
# input string.
|
1398
|
+
#
|
1399
|
+
# Regexp.linear_time?(/re/) # => true
|
1400
|
+
#
|
1401
|
+
# Note that this is a property of the ruby interpreter, not of the argument
|
1402
|
+
# regular expression. Identical regexp can or cannot run in linear time
|
1403
|
+
# depending on your ruby binary. Neither forward nor backward compatibility is
|
1404
|
+
# guaranteed about the return value of this method. Our current algorithm is
|
1405
|
+
# (*1) but this is subject to change in the future. Alternative implementations
|
1406
|
+
# can also behave differently. They might always return false for everything.
|
1407
|
+
#
|
1408
|
+
# (*1): https://doi.org/10.1109/SP40001.2021.00032
|
1409
|
+
#
|
1410
|
+
def self.linear_time?: (Regexp regex, ?nil, ?timeout: untyped) -> bool
|
1411
|
+
| (string regex, ?int | string | bool | nil options, ?timeout: untyped) -> bool
|
1412
|
+
|
1413
|
+
# <!--
|
1414
|
+
# rdoc-file=re.c
|
1415
|
+
# - Regexp.escape(string) -> new_string
|
1416
|
+
# -->
|
1417
|
+
# Returns a new string that escapes any characters that have special meaning in
|
1418
|
+
# a regular expression:
|
1419
|
+
#
|
1420
|
+
# s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\."
|
1421
|
+
#
|
1422
|
+
# For any string `s`, this call returns a MatchData object:
|
1423
|
+
#
|
1424
|
+
# r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
|
1425
|
+
# r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
|
1426
|
+
#
|
1427
|
+
alias self.quote self.escape
|
816
1428
|
|
817
1429
|
# <!--
|
818
1430
|
# rdoc-file=re.c
|
819
|
-
# - Regexp.
|
820
|
-
# - Regexp.quote(str) -> string
|
1431
|
+
# - Regexp.try_convert(object) -> regexp or nil
|
821
1432
|
# -->
|
822
|
-
#
|
823
|
-
#
|
824
|
-
#
|
1433
|
+
# Returns `object` if it is a regexp:
|
1434
|
+
#
|
1435
|
+
# Regexp.try_convert(/re/) # => /re/
|
1436
|
+
#
|
1437
|
+
# Otherwise if `object` responds to `:to_regexp`, calls `object.to_regexp` and
|
1438
|
+
# returns the result.
|
1439
|
+
#
|
1440
|
+
# Returns `nil` if `object` does not respond to `:to_regexp`.
|
825
1441
|
#
|
826
|
-
# Regexp.
|
1442
|
+
# Regexp.try_convert('re') # => nil
|
1443
|
+
#
|
1444
|
+
# Raises an exception unless `object.to_regexp` returns a regexp.
|
1445
|
+
#
|
1446
|
+
def self.try_convert: (Regexp | _ToRegexp regexp_like) -> Regexp
|
1447
|
+
| (untyped other) -> Regexp?
|
1448
|
+
|
1449
|
+
# <!--
|
1450
|
+
# rdoc-file=re.c
|
1451
|
+
# - Regexp.timeout -> float or nil
|
1452
|
+
# -->
|
1453
|
+
# It returns the current default timeout interval for Regexp matching in second.
|
1454
|
+
# `nil` means no default timeout configuration.
|
827
1455
|
#
|
828
|
-
def self.
|
1456
|
+
def self.timeout: () -> Float?
|
829
1457
|
|
830
1458
|
# <!--
|
831
1459
|
# rdoc-file=re.c
|
832
|
-
# - Regexp.
|
1460
|
+
# - Regexp.timeout = float or nil
|
833
1461
|
# -->
|
834
|
-
#
|
835
|
-
#
|
1462
|
+
# It sets the default timeout interval for Regexp matching in second. `nil`
|
1463
|
+
# means no default timeout configuration. This configuration is process-global.
|
1464
|
+
# If you want to set timeout for each Regexp, use `timeout` keyword for
|
1465
|
+
# `Regexp.new`.
|
1466
|
+
#
|
1467
|
+
# Regexp.timeout = 1
|
1468
|
+
# /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
|
1469
|
+
#
|
1470
|
+
def self.timeout=: [T < _ToF] (T timeout) -> T
|
1471
|
+
|
1472
|
+
# <!--
|
1473
|
+
# rdoc-file=re.c
|
1474
|
+
# - Regexp.union(*patterns) -> regexp
|
1475
|
+
# - Regexp.union(array_of_patterns) -> regexp
|
1476
|
+
# -->
|
1477
|
+
# Returns a new regexp that is the union of the given patterns:
|
1478
|
+
#
|
1479
|
+
# r = Regexp.union(%w[cat dog]) # => /cat|dog/
|
1480
|
+
# r.match('cat') # => #<MatchData "cat">
|
1481
|
+
# r.match('dog') # => #<MatchData "dog">
|
1482
|
+
# r.match('cog') # => nil
|
1483
|
+
#
|
1484
|
+
# For each pattern that is a string, `Regexp.new(pattern)` is used:
|
1485
|
+
#
|
1486
|
+
# Regexp.union('penzance') # => /penzance/
|
1487
|
+
# Regexp.union('a+b*c') # => /a\+b\*c/
|
1488
|
+
# Regexp.union('skiing', 'sledding') # => /skiing|sledding/
|
1489
|
+
# Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
|
1490
|
+
#
|
1491
|
+
# For each pattern that is a regexp, it is used as is, including its flags:
|
1492
|
+
#
|
1493
|
+
# Regexp.union(/foo/i, /bar/m, /baz/x)
|
1494
|
+
# # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
|
1495
|
+
# Regexp.union([/foo/i, /bar/m, /baz/x])
|
1496
|
+
# # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
|
836
1497
|
#
|
837
|
-
#
|
838
|
-
# Regexp.try_convert("re") #=> nil
|
1498
|
+
# With no arguments, returns `/(?!)/`:
|
839
1499
|
#
|
840
|
-
#
|
841
|
-
# Regexp.try_convert(o) #=> nil
|
842
|
-
# def o.to_regexp() /foo/ end
|
843
|
-
# Regexp.try_convert(o) #=> /foo/
|
1500
|
+
# Regexp.union # => /(?!)/
|
844
1501
|
#
|
845
|
-
|
1502
|
+
# If any regexp pattern contains captures, the behavior is unspecified.
|
1503
|
+
#
|
1504
|
+
def self.union: (*Regexp | _ToRegexp | string patterns) -> Regexp
|
1505
|
+
| (array[Regexp | _ToRegexp | string] patterns) -> Regexp
|
1506
|
+
| (Symbol | [Symbol] symbol_pattern) -> Regexp
|
846
1507
|
|
847
1508
|
# <!--
|
848
1509
|
# rdoc-file=re.c
|
849
|
-
# - Regexp.
|
850
|
-
# - Regexp.
|
1510
|
+
# - Regexp.new(string, options = 0, timeout: nil) -> regexp
|
1511
|
+
# - Regexp.new(regexp, timeout: nil) -> regexp
|
851
1512
|
# -->
|
852
|
-
#
|
853
|
-
#
|
854
|
-
#
|
855
|
-
#
|
856
|
-
#
|
857
|
-
#
|
858
|
-
#
|
859
|
-
#
|
860
|
-
#
|
861
|
-
#
|
862
|
-
#
|
863
|
-
#
|
864
|
-
#
|
865
|
-
#
|
866
|
-
#
|
867
|
-
#
|
868
|
-
|
869
|
-
|
870
|
-
|
1513
|
+
# With argument `string` given, returns a new regexp with the given string and
|
1514
|
+
# options:
|
1515
|
+
#
|
1516
|
+
# r = Regexp.new('foo') # => /foo/
|
1517
|
+
# r.source # => "foo"
|
1518
|
+
# r.options # => 0
|
1519
|
+
#
|
1520
|
+
# Optional argument `options` is one of the following:
|
1521
|
+
#
|
1522
|
+
# * A String of options:
|
1523
|
+
#
|
1524
|
+
# Regexp.new('foo', 'i') # => /foo/i
|
1525
|
+
# Regexp.new('foo', 'im') # => /foo/im
|
1526
|
+
#
|
1527
|
+
# * The bit-wise OR of one or more of the constants Regexp::EXTENDED,
|
1528
|
+
# Regexp::IGNORECASE, Regexp::MULTILINE, and Regexp::NOENCODING:
|
1529
|
+
#
|
1530
|
+
# Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
|
1531
|
+
# Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
|
1532
|
+
# Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
|
1533
|
+
# Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
|
1534
|
+
# flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
|
1535
|
+
# Regexp.new('foo', flags) # => /foo/mix
|
1536
|
+
#
|
1537
|
+
# * `nil` or `false`, which is ignored.
|
1538
|
+
# * Any other truthy value, in which case the regexp will be case-insensitive.
|
1539
|
+
#
|
1540
|
+
# If optional keyword argument `timeout` is given, its float value overrides the
|
1541
|
+
# timeout interval for the class, Regexp.timeout. If `nil` is passed as
|
1542
|
+
# +timeout, it uses the timeout interval for the class, Regexp.timeout.
|
1543
|
+
#
|
1544
|
+
# With argument `regexp` given, returns a new regexp. The source, options,
|
1545
|
+
# timeout are the same as `regexp`. `options` and `n_flag` arguments are
|
1546
|
+
# ineffective. The timeout can be overridden by `timeout` keyword.
|
1547
|
+
#
|
1548
|
+
# options = Regexp::MULTILINE
|
1549
|
+
# r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
|
1550
|
+
# r2 = Regexp.new(r) # => /foo/m
|
1551
|
+
# r2.timeout # => 1.1
|
1552
|
+
# r3 = Regexp.new(r, timeout: 3.14) # => /foo/m
|
1553
|
+
# r3.timeout # => 3.14
|
1554
|
+
#
|
1555
|
+
def initialize: (Regexp regexp, ?timeout: _ToF?) -> void
|
1556
|
+
| (string pattern, ?int | string | bool | nil options, ?timeout: _ToF?) -> void
|
871
1557
|
|
872
|
-
|
1558
|
+
def initialize_copy: (self object) -> self
|
873
1559
|
|
874
1560
|
# <!-- rdoc-file=re.c -->
|
875
|
-
#
|
876
|
-
# the same
|
1561
|
+
# Returns `true` if `object` is another Regexp whose pattern, flags, and
|
1562
|
+
# encoding are the same as `self`, `false` otherwise:
|
877
1563
|
#
|
878
|
-
# /
|
879
|
-
# /
|
880
|
-
# /
|
881
|
-
# /
|
1564
|
+
# /foo/ == Regexp.new('foo') # => true
|
1565
|
+
# /foo/ == /foo/i # => false
|
1566
|
+
# /foo/ == Regexp.new('food') # => false
|
1567
|
+
# /foo/ == Regexp.new("abc".force_encoding("euc-jp")) # => false
|
882
1568
|
#
|
883
1569
|
def ==: (untyped other) -> bool
|
884
1570
|
|
885
1571
|
# <!--
|
886
1572
|
# rdoc-file=re.c
|
887
|
-
# -
|
1573
|
+
# - regexp === string -> true or false
|
888
1574
|
# -->
|
889
|
-
#
|
1575
|
+
# Returns `true` if `self` finds a match in `string`:
|
890
1576
|
#
|
891
|
-
# a
|
892
|
-
#
|
893
|
-
# when /\A[a-z]*\z/; print "Lower case\n"
|
894
|
-
# when /\A[A-Z]*\z/; print "Upper case\n"
|
895
|
-
# else; print "Mixed case\n"
|
896
|
-
# end
|
897
|
-
# #=> "Upper case"
|
1577
|
+
# /^[a-z]*$/ === 'HELLO' # => false
|
1578
|
+
# /^[A-Z]*$/ === 'HELLO' # => true
|
898
1579
|
#
|
899
|
-
#
|
900
|
-
# compare against a String.
|
1580
|
+
# This method is called in case statements:
|
901
1581
|
#
|
902
|
-
#
|
903
|
-
#
|
1582
|
+
# s = 'HELLO'
|
1583
|
+
# case s
|
1584
|
+
# when /\A[a-z]*\z/; print "Lower case\n"
|
1585
|
+
# when /\A[A-Z]*\z/; print "Upper case\n"
|
1586
|
+
# else print "Mixed case\n"
|
1587
|
+
# end # => "Upper case"
|
904
1588
|
#
|
905
1589
|
def ===: (untyped other) -> bool
|
906
1590
|
|
907
1591
|
# <!--
|
908
1592
|
# rdoc-file=re.c
|
909
|
-
# -
|
1593
|
+
# - regexp =~ string -> integer or nil
|
910
1594
|
# -->
|
911
|
-
#
|
1595
|
+
# Returns the integer index (in characters) of the first match for `self` and
|
1596
|
+
# `string`, or `nil` if none; also sets the [rdoc-ref:Regexp global
|
1597
|
+
# variables](rdoc-ref:Regexp@Global+Variables):
|
1598
|
+
#
|
1599
|
+
# /at/ =~ 'input data' # => 7
|
1600
|
+
# $~ # => #<MatchData "at">
|
1601
|
+
# /ax/ =~ 'input data' # => nil
|
1602
|
+
# $~ # => nil
|
912
1603
|
#
|
913
|
-
#
|
914
|
-
#
|
1604
|
+
# Assigns named captures to local variables of the same names if and only if
|
1605
|
+
# `self`:
|
915
1606
|
#
|
916
|
-
#
|
917
|
-
#
|
1607
|
+
# * Is a regexp literal; see [Regexp
|
1608
|
+
# Literals](rdoc-ref:syntax/literals.rdoc@Regexp+Literals).
|
1609
|
+
# * Does not contain interpolations; see [Regexp
|
1610
|
+
# interpolation](rdoc-ref:Regexp@Interpolation+Mode).
|
1611
|
+
# * Is at the left of the expression.
|
918
1612
|
#
|
919
|
-
#
|
920
|
-
# p lhs #=> "x"
|
921
|
-
# p rhs #=> "y"
|
1613
|
+
# Example:
|
922
1614
|
#
|
923
|
-
#
|
1615
|
+
# /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = y '
|
1616
|
+
# p lhs # => "x"
|
1617
|
+
# p rhs # => "y"
|
924
1618
|
#
|
925
|
-
#
|
926
|
-
# p lhs #=> nil
|
927
|
-
# p rhs #=> nil
|
1619
|
+
# Assigns `nil` if not matched:
|
928
1620
|
#
|
929
|
-
#
|
930
|
-
#
|
931
|
-
#
|
1621
|
+
# /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = '
|
1622
|
+
# p lhs # => nil
|
1623
|
+
# p rhs # => nil
|
932
1624
|
#
|
933
|
-
#
|
1625
|
+
# Does not make local variable assignments if `self` is not a regexp literal:
|
934
1626
|
#
|
935
|
-
#
|
936
|
-
#
|
937
|
-
# p
|
938
|
-
# p
|
1627
|
+
# r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
|
1628
|
+
# r =~ ' x = y '
|
1629
|
+
# p foo # Undefined local variable
|
1630
|
+
# p bar # Undefined local variable
|
939
1631
|
#
|
940
|
-
#
|
1632
|
+
# The assignment does not occur if the regexp is not at the left:
|
941
1633
|
#
|
942
|
-
#
|
943
|
-
#
|
944
|
-
# p lhs # undefined local variable
|
1634
|
+
# ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
|
1635
|
+
# p foo, foo # Undefined local variables
|
945
1636
|
#
|
946
|
-
#
|
1637
|
+
# A regexp interpolation, `#{}`, also disables the assignment:
|
947
1638
|
#
|
948
|
-
#
|
949
|
-
#
|
1639
|
+
# r = /(?<foo>\w+)/
|
1640
|
+
# /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
|
1641
|
+
# p foo # Undefined local variable
|
950
1642
|
#
|
951
|
-
def =~: (
|
1643
|
+
def =~: (interned? string) -> Integer?
|
1644
|
+
| (nil) -> nil
|
952
1645
|
|
953
1646
|
# <!--
|
954
1647
|
# rdoc-file=re.c
|
955
|
-
# -
|
1648
|
+
# - casefold?-> true or false
|
956
1649
|
# -->
|
957
|
-
# Returns
|
1650
|
+
# Returns `true` if the case-insensitivity flag in `self` is set, `false`
|
1651
|
+
# otherwise:
|
958
1652
|
#
|
959
|
-
# /a/.casefold?
|
960
|
-
# /a/i.casefold?
|
961
|
-
# /(?i:a)/.casefold?
|
1653
|
+
# /a/.casefold? # => false
|
1654
|
+
# /a/i.casefold? # => true
|
1655
|
+
# /(?i:a)/.casefold? # => false
|
962
1656
|
#
|
963
1657
|
def casefold?: () -> bool
|
964
1658
|
|
@@ -972,253 +1666,265 @@ class Regexp
|
|
972
1666
|
|
973
1667
|
# <!--
|
974
1668
|
# rdoc-file=re.c
|
975
|
-
# -
|
976
|
-
# - rxp.eql?(other_rxp) -> true or false
|
1669
|
+
# - regexp == object -> true or false
|
977
1670
|
# -->
|
978
|
-
#
|
979
|
-
# the same
|
1671
|
+
# Returns `true` if `object` is another Regexp whose pattern, flags, and
|
1672
|
+
# encoding are the same as `self`, `false` otherwise:
|
980
1673
|
#
|
981
|
-
# /
|
982
|
-
# /
|
983
|
-
# /
|
984
|
-
# /
|
1674
|
+
# /foo/ == Regexp.new('foo') # => true
|
1675
|
+
# /foo/ == /foo/i # => false
|
1676
|
+
# /foo/ == Regexp.new('food') # => false
|
1677
|
+
# /foo/ == Regexp.new("abc".force_encoding("euc-jp")) # => false
|
985
1678
|
#
|
986
|
-
|
1679
|
+
alias eql? ==
|
987
1680
|
|
988
1681
|
# <!--
|
989
1682
|
# rdoc-file=re.c
|
990
|
-
# -
|
1683
|
+
# - fixed_encoding? -> true or false
|
991
1684
|
# -->
|
992
|
-
# Returns false if
|
993
|
-
# encoding
|
994
|
-
#
|
995
|
-
# r = /a/
|
996
|
-
# r.fixed_encoding?
|
997
|
-
# r
|
998
|
-
# r
|
999
|
-
# r
|
1000
|
-
#
|
1001
|
-
# r = /a/u
|
1002
|
-
# r.fixed_encoding?
|
1003
|
-
# r.
|
1004
|
-
# r
|
1005
|
-
# r
|
1006
|
-
#
|
1007
|
-
#
|
1008
|
-
# r
|
1009
|
-
# r.
|
1010
|
-
# r.
|
1011
|
-
# r
|
1012
|
-
# r
|
1013
|
-
# r =~ "abc".force_encoding("euc-jp") #=> nil
|
1685
|
+
# Returns `false` if `self` is applicable to a string with any ASCII-compatible
|
1686
|
+
# encoding; otherwise returns `true`:
|
1687
|
+
#
|
1688
|
+
# r = /a/ # => /a/
|
1689
|
+
# r.fixed_encoding? # => false
|
1690
|
+
# r.match?("\u{6666} a") # => true
|
1691
|
+
# r.match?("\xa1\xa2 a".force_encoding("euc-jp")) # => true
|
1692
|
+
# r.match?("abc".force_encoding("euc-jp")) # => true
|
1693
|
+
#
|
1694
|
+
# r = /a/u # => /a/
|
1695
|
+
# r.fixed_encoding? # => true
|
1696
|
+
# r.match?("\u{6666} a") # => true
|
1697
|
+
# r.match?("\xa1\xa2".force_encoding("euc-jp")) # Raises exception.
|
1698
|
+
# r.match?("abc".force_encoding("euc-jp")) # => true
|
1699
|
+
#
|
1700
|
+
# r = /\u{6666}/ # => /\u{6666}/
|
1701
|
+
# r.fixed_encoding? # => true
|
1702
|
+
# r.encoding # => #<Encoding:UTF-8>
|
1703
|
+
# r.match?("\u{6666} a") # => true
|
1704
|
+
# r.match?("\xa1\xa2".force_encoding("euc-jp")) # Raises exception.
|
1705
|
+
# r.match?("abc".force_encoding("euc-jp")) # => false
|
1014
1706
|
#
|
1015
1707
|
def fixed_encoding?: () -> bool
|
1016
1708
|
|
1017
1709
|
# <!--
|
1018
1710
|
# rdoc-file=re.c
|
1019
|
-
# -
|
1711
|
+
# - hash -> integer
|
1020
1712
|
# -->
|
1021
|
-
#
|
1713
|
+
# Returns the integer hash value for `self`.
|
1022
1714
|
#
|
1023
|
-
#
|
1715
|
+
# Related: Object#hash.
|
1024
1716
|
#
|
1025
1717
|
def hash: () -> Integer
|
1026
1718
|
|
1027
1719
|
# <!--
|
1028
1720
|
# rdoc-file=re.c
|
1029
|
-
# -
|
1721
|
+
# - inspect -> string
|
1030
1722
|
# -->
|
1031
|
-
#
|
1032
|
-
# `#inspect` actually produces the more natural version of the string than
|
1033
|
-
# `#to_s`.
|
1723
|
+
# Returns a nicely-formatted string representation of `self`:
|
1034
1724
|
#
|
1035
|
-
# /ab+c/ix.inspect
|
1725
|
+
# /ab+c/ix.inspect # => "/ab+c/ix"
|
1726
|
+
#
|
1727
|
+
# Related: Regexp#to_s.
|
1036
1728
|
#
|
1037
1729
|
def inspect: () -> String
|
1038
1730
|
|
1039
1731
|
# <!--
|
1040
1732
|
# rdoc-file=re.c
|
1041
|
-
# -
|
1042
|
-
# -
|
1733
|
+
# - match(string, offset = 0) -> matchdata or nil
|
1734
|
+
# - match(string, offset = 0) {|matchdata| ... } -> object
|
1043
1735
|
# -->
|
1044
|
-
#
|
1045
|
-
#
|
1046
|
-
#
|
1047
|
-
#
|
1048
|
-
#
|
1049
|
-
# /
|
1050
|
-
# /
|
1051
|
-
#
|
1052
|
-
#
|
1053
|
-
#
|
1054
|
-
#
|
1055
|
-
# /
|
1056
|
-
#
|
1057
|
-
#
|
1058
|
-
#
|
1059
|
-
#
|
1060
|
-
#
|
1061
|
-
#
|
1062
|
-
#
|
1063
|
-
#
|
1064
|
-
#
|
1065
|
-
#
|
1066
|
-
#
|
1067
|
-
#
|
1068
|
-
#
|
1069
|
-
|
1070
|
-
|
1736
|
+
# With no block given, returns the MatchData object that describes the match, if
|
1737
|
+
# any, or `nil` if none; the search begins at the given character `offset` in
|
1738
|
+
# `string`:
|
1739
|
+
#
|
1740
|
+
# /abra/.match('abracadabra') # => #<MatchData "abra">
|
1741
|
+
# /abra/.match('abracadabra', 4) # => #<MatchData "abra">
|
1742
|
+
# /abra/.match('abracadabra', 8) # => nil
|
1743
|
+
# /abra/.match('abracadabra', 800) # => nil
|
1744
|
+
#
|
1745
|
+
# string = "\u{5d0 5d1 5e8 5d0}cadabra"
|
1746
|
+
# /abra/.match(string, 7) #=> #<MatchData "abra">
|
1747
|
+
# /abra/.match(string, 8) #=> nil
|
1748
|
+
# /abra/.match(string.b, 8) #=> #<MatchData "abra">
|
1749
|
+
#
|
1750
|
+
# With a block given, calls the block if and only if a match is found; returns
|
1751
|
+
# the block's value:
|
1752
|
+
#
|
1753
|
+
# /abra/.match('abracadabra') {|matchdata| p matchdata }
|
1754
|
+
# # => #<MatchData "abra">
|
1755
|
+
# /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
|
1756
|
+
# # => #<MatchData "abra">
|
1757
|
+
# /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
|
1758
|
+
# # => nil
|
1759
|
+
# /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
|
1760
|
+
# # => nil
|
1761
|
+
#
|
1762
|
+
# Output (from the first two blocks above):
|
1763
|
+
#
|
1764
|
+
# #<MatchData "abra">
|
1765
|
+
# #<MatchData "abra">
|
1766
|
+
#
|
1767
|
+
# /(.)(.)(.)/.match("abc")[2] # => "b"
|
1768
|
+
# /(.)(.)/.match("abc", 1)[2] # => "c"
|
1769
|
+
#
|
1770
|
+
def match: (interned? str, ?int offset) -> MatchData?
|
1771
|
+
| [T] (interned? str, ?int offset) { (MatchData matchdata) -> T } -> T?
|
1772
|
+
| (nil, ?int offset) ?{ (MatchData matchdata) -> void } -> nil
|
1071
1773
|
|
1072
1774
|
# <!--
|
1073
1775
|
# rdoc-file=re.c
|
1074
|
-
# -
|
1075
|
-
# -
|
1776
|
+
# - match?(string) -> true or false
|
1777
|
+
# - match?(string, offset = 0) -> true or false
|
1076
1778
|
# -->
|
1077
1779
|
# Returns `true` or `false` to indicate whether the regexp is matched or not
|
1078
1780
|
# without updating $~ and other related variables. If the second parameter is
|
1079
1781
|
# present, it specifies the position in the string to begin the search.
|
1080
1782
|
#
|
1081
|
-
# /R.../.match?("Ruby")
|
1082
|
-
# /R.../.match?("Ruby", 1)
|
1083
|
-
# /P.../.match?("Ruby")
|
1084
|
-
# $&
|
1783
|
+
# /R.../.match?("Ruby") # => true
|
1784
|
+
# /R.../.match?("Ruby", 1) # => false
|
1785
|
+
# /P.../.match?("Ruby") # => false
|
1786
|
+
# $& # => nil
|
1085
1787
|
#
|
1086
|
-
def match?: (
|
1788
|
+
def match?: (interned str, ?int offset) -> bool
|
1789
|
+
| (nil, ?int offset) -> false
|
1087
1790
|
|
1088
1791
|
# <!--
|
1089
1792
|
# rdoc-file=re.c
|
1090
|
-
# -
|
1793
|
+
# - named_captures -> hash
|
1091
1794
|
# -->
|
1092
|
-
# Returns a hash representing
|
1795
|
+
# Returns a hash representing named captures of `self` (see [Named
|
1796
|
+
# Captures](rdoc-ref:Regexp@Named+Captures)):
|
1093
1797
|
#
|
1094
|
-
#
|
1095
|
-
#
|
1798
|
+
# * Each key is the name of a named capture.
|
1799
|
+
# * Each value is an array of integer indexes for that named capture.
|
1096
1800
|
#
|
1097
|
-
#
|
1098
|
-
# #=> {"foo"=>[1], "bar"=>[2]}
|
1801
|
+
# Examples:
|
1099
1802
|
#
|
1100
|
-
# /(?<foo>.)(?<
|
1101
|
-
#
|
1803
|
+
# /(?<foo>.)(?<bar>.)/.named_captures # => {"foo"=>[1], "bar"=>[2]}
|
1804
|
+
# /(?<foo>.)(?<foo>.)/.named_captures # => {"foo"=>[1, 2]}
|
1805
|
+
# /(.)(.)/.named_captures # => {}
|
1102
1806
|
#
|
1103
|
-
|
1807
|
+
def named_captures: () -> Hash[String, Array[Integer]]
|
1808
|
+
|
1809
|
+
# <!--
|
1810
|
+
# rdoc-file=re.c
|
1811
|
+
# - names -> array_of_names
|
1812
|
+
# -->
|
1813
|
+
# Returns an array of names of captures (see [Named
|
1814
|
+
# Captures](rdoc-ref:Regexp@Named+Captures)):
|
1104
1815
|
#
|
1105
|
-
# /(
|
1106
|
-
#
|
1816
|
+
# /(?<foo>.)(?<bar>.)(?<baz>.)/.names # => ["foo", "bar", "baz"]
|
1817
|
+
# /(?<foo>.)(?<foo>.)/.names # => ["foo"]
|
1818
|
+
# /(.)(.)/.names # => []
|
1107
1819
|
#
|
1108
|
-
def
|
1820
|
+
def names: () -> Array[String]
|
1109
1821
|
|
1110
1822
|
# <!--
|
1111
1823
|
# rdoc-file=re.c
|
1112
|
-
# -
|
1824
|
+
# - options -> integer
|
1113
1825
|
# -->
|
1114
|
-
# Returns
|
1826
|
+
# Returns an integer whose bits show the options set in `self`.
|
1827
|
+
#
|
1828
|
+
# The option bits are:
|
1829
|
+
#
|
1830
|
+
# Regexp::IGNORECASE # => 1
|
1831
|
+
# Regexp::EXTENDED # => 2
|
1832
|
+
# Regexp::MULTILINE # => 4
|
1115
1833
|
#
|
1116
|
-
#
|
1117
|
-
# #=> ["foo", "bar", "baz"]
|
1834
|
+
# Examples:
|
1118
1835
|
#
|
1119
|
-
# /
|
1120
|
-
#
|
1836
|
+
# /foo/.options # => 0
|
1837
|
+
# /foo/i.options # => 1
|
1838
|
+
# /foo/x.options # => 2
|
1839
|
+
# /foo/m.options # => 4
|
1840
|
+
# /foo/mix.options # => 7
|
1121
1841
|
#
|
1122
|
-
#
|
1123
|
-
#
|
1842
|
+
# Note that additional bits may be set in the returned integer; these are
|
1843
|
+
# maintained internally in `self`, are ignored if passed to Regexp.new, and may
|
1844
|
+
# be ignored by the caller:
|
1845
|
+
#
|
1846
|
+
# Returns the set of bits corresponding to the options used when creating this
|
1847
|
+
# regexp (see Regexp::new for details). Note that additional bits may be set in
|
1848
|
+
# the returned options: these are used internally by the regular expression
|
1849
|
+
# code. These extra bits are ignored if the options are passed to Regexp::new:
|
1124
1850
|
#
|
1125
|
-
|
1851
|
+
# r = /\xa1\xa2/e # => /\xa1\xa2/
|
1852
|
+
# r.source # => "\\xa1\\xa2"
|
1853
|
+
# r.options # => 16
|
1854
|
+
# Regexp.new(r.source, r.options) # => /\xa1\xa2/
|
1855
|
+
#
|
1856
|
+
def options: () -> Integer
|
1126
1857
|
|
1127
1858
|
# <!--
|
1128
1859
|
# rdoc-file=re.c
|
1129
|
-
# -
|
1860
|
+
# - source -> string
|
1130
1861
|
# -->
|
1131
|
-
# Returns the
|
1132
|
-
# Regexp (see Regexp::new for details. Note that additional bits may be set in
|
1133
|
-
# the returned options: these are used internally by the regular expression
|
1134
|
-
# code. These extra bits are ignored if the options are passed to Regexp::new.
|
1862
|
+
# Returns the original string of `self`:
|
1135
1863
|
#
|
1136
|
-
#
|
1137
|
-
# Regexp::EXTENDED #=> 2
|
1138
|
-
# Regexp::MULTILINE #=> 4
|
1864
|
+
# /ab+c/ix.source # => "ab+c"
|
1139
1865
|
#
|
1140
|
-
#
|
1141
|
-
# /cat/ix.options #=> 3
|
1142
|
-
# Regexp.new('cat', true).options #=> 1
|
1143
|
-
# /\xa1\xa2/e.options #=> 16
|
1866
|
+
# Regexp escape sequences are retained:
|
1144
1867
|
#
|
1145
|
-
#
|
1146
|
-
# Regexp.new(r.source, r.options) #=> /cat/ix
|
1868
|
+
# /\x20\+/.source # => "\\x20\\+"
|
1147
1869
|
#
|
1148
|
-
|
1870
|
+
# Lexer escape characters are not retained:
|
1871
|
+
#
|
1872
|
+
# /\//.source # => "/"
|
1873
|
+
#
|
1874
|
+
def source: () -> String
|
1149
1875
|
|
1150
1876
|
# <!--
|
1151
1877
|
# rdoc-file=re.c
|
1152
|
-
# -
|
1878
|
+
# - to_s -> string
|
1153
1879
|
# -->
|
1154
|
-
# Returns the
|
1880
|
+
# Returns a string showing the options and string of `self`:
|
1155
1881
|
#
|
1156
|
-
# /ab+c/ix
|
1882
|
+
# r0 = /ab+c/ix
|
1883
|
+
# s0 = r0.to_s # => "(?ix-m:ab+c)"
|
1157
1884
|
#
|
1158
|
-
#
|
1885
|
+
# The returned string may be used as an argument to Regexp.new, or as
|
1886
|
+
# interpolated text for a [Regexp
|
1887
|
+
# interpolation](rdoc-ref:Regexp@Interpolation+Mode):
|
1159
1888
|
#
|
1160
|
-
#
|
1889
|
+
# r1 = Regexp.new(s0) # => /(?ix-m:ab+c)/
|
1890
|
+
# r2 = /#{s0}/ # => /(?ix-m:ab+c)/
|
1161
1891
|
#
|
1162
|
-
|
1892
|
+
# Note that `r1` and `r2` are not equal to `r0` because their original strings
|
1893
|
+
# are different:
|
1894
|
+
#
|
1895
|
+
# r0 == r1 # => false
|
1896
|
+
# r0.source # => "ab+c"
|
1897
|
+
# r1.source # => "(?ix-m:ab+c)"
|
1898
|
+
#
|
1899
|
+
# Related: Regexp#inspect.
|
1900
|
+
#
|
1901
|
+
def to_s: () -> String
|
1163
1902
|
|
1164
1903
|
# <!--
|
1165
1904
|
# rdoc-file=re.c
|
1166
|
-
# - rxp.
|
1905
|
+
# - rxp.timeout -> float or nil
|
1167
1906
|
# -->
|
1168
|
-
#
|
1169
|
-
#
|
1170
|
-
# regular expression with the same semantics as the original. (However,
|
1171
|
-
# `Regexp#==` may not return true when comparing the two, as the source of the
|
1172
|
-
# regular expression itself may differ, as the example shows). Regexp#inspect
|
1173
|
-
# produces a generally more readable version of *rxp*.
|
1174
|
-
#
|
1175
|
-
# r1 = /ab+c/ix #=> /ab+c/ix
|
1176
|
-
# s1 = r1.to_s #=> "(?ix-m:ab+c)"
|
1177
|
-
# r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
|
1178
|
-
# r1 == r2 #=> false
|
1179
|
-
# r1.source #=> "ab+c"
|
1180
|
-
# r2.source #=> "(?ix-m:ab+c)"
|
1907
|
+
# It returns the timeout interval for Regexp matching in second. `nil` means no
|
1908
|
+
# default timeout configuration.
|
1181
1909
|
#
|
1182
|
-
|
1910
|
+
# This configuration is per-object. The global configuration set by
|
1911
|
+
# Regexp.timeout= is ignored if per-object configuration is set.
|
1912
|
+
#
|
1913
|
+
# re = Regexp.new("^a*b?a*$", timeout: 1)
|
1914
|
+
# re.timeout #=> 1.0
|
1915
|
+
# re =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
|
1916
|
+
#
|
1917
|
+
%a{pure}
|
1918
|
+
def timeout: () -> Float?
|
1183
1919
|
|
1184
1920
|
# <!--
|
1185
1921
|
# rdoc-file=re.c
|
1186
|
-
# - ~ rxp
|
1922
|
+
# - ~ rxp -> integer or nil
|
1187
1923
|
# -->
|
1188
|
-
#
|
1189
|
-
# $_`.
|
1924
|
+
# Equivalent to *`rxp* =~ $_`:
|
1190
1925
|
#
|
1191
1926
|
# $_ = "input data"
|
1192
|
-
# ~ /at/
|
1927
|
+
# ~ /at/ # => 7
|
1193
1928
|
#
|
1194
1929
|
def ~: () -> Integer?
|
1195
|
-
|
1196
|
-
private
|
1197
|
-
|
1198
|
-
def initialize_copy: (self object) -> self
|
1199
1930
|
end
|
1200
|
-
|
1201
|
-
# <!-- rdoc-file=re.c -->
|
1202
|
-
# see Regexp.options and Regexp.new
|
1203
|
-
#
|
1204
|
-
Regexp::EXTENDED: Integer
|
1205
|
-
|
1206
|
-
# <!-- rdoc-file=re.c -->
|
1207
|
-
# see Regexp.options and Regexp.new
|
1208
|
-
#
|
1209
|
-
Regexp::FIXEDENCODING: Integer
|
1210
|
-
|
1211
|
-
# <!-- rdoc-file=re.c -->
|
1212
|
-
# see Regexp.options and Regexp.new
|
1213
|
-
#
|
1214
|
-
Regexp::IGNORECASE: Integer
|
1215
|
-
|
1216
|
-
# <!-- rdoc-file=re.c -->
|
1217
|
-
# see Regexp.options and Regexp.new
|
1218
|
-
#
|
1219
|
-
Regexp::MULTILINE: Integer
|
1220
|
-
|
1221
|
-
# <!-- rdoc-file=re.c -->
|
1222
|
-
# see Regexp.options and Regexp.new
|
1223
|
-
#
|
1224
|
-
Regexp::NOENCODING: Integer
|