rbs 4.0.0.dev.4 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +14 -14
- data/.github/workflows/bundle-update.yml +60 -0
- data/.github/workflows/c-check.yml +18 -11
- data/.github/workflows/comments.yml +5 -3
- data/.github/workflows/dependabot.yml +2 -2
- data/.github/workflows/ruby.yml +27 -34
- data/.github/workflows/rust.yml +95 -0
- data/.github/workflows/typecheck.yml +2 -2
- data/.github/workflows/windows.yml +2 -2
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +323 -0
- data/README.md +1 -1
- data/Rakefile +43 -33
- data/Steepfile +1 -0
- data/config.yml +426 -24
- data/core/array.rbs +307 -227
- data/core/basic_object.rbs +9 -8
- data/core/binding.rbs +0 -2
- data/core/builtin.rbs +2 -2
- data/core/class.rbs +6 -5
- data/core/comparable.rbs +55 -34
- data/core/complex.rbs +104 -78
- data/core/dir.rbs +61 -49
- data/core/encoding.rbs +12 -15
- data/core/enumerable.rbs +179 -87
- data/core/enumerator/arithmetic_sequence.rbs +70 -0
- data/core/enumerator.rbs +65 -2
- data/core/errno.rbs +11 -2
- data/core/errors.rbs +58 -29
- data/core/exception.rbs +13 -13
- data/core/fiber.rbs +74 -54
- data/core/file.rbs +280 -177
- data/core/file_test.rbs +3 -3
- data/core/float.rbs +257 -92
- data/core/gc.rbs +425 -281
- data/core/hash.rbs +1045 -739
- data/core/integer.rbs +135 -137
- data/core/io/buffer.rbs +53 -42
- data/core/io/wait.rbs +13 -35
- data/core/io.rbs +192 -144
- data/core/kernel.rbs +216 -155
- data/core/marshal.rbs +4 -4
- data/core/match_data.rbs +15 -13
- data/core/math.rbs +107 -66
- data/core/method.rbs +69 -33
- data/core/module.rbs +244 -106
- data/core/nil_class.rbs +7 -6
- data/core/numeric.rbs +74 -63
- data/core/object.rbs +9 -11
- data/core/object_space.rbs +30 -23
- data/core/pathname.rbs +1322 -0
- data/core/proc.rbs +95 -58
- data/core/process.rbs +222 -202
- data/core/ractor.rbs +371 -515
- data/core/random.rbs +21 -3
- data/core/range.rbs +159 -57
- data/core/rational.rbs +60 -89
- data/core/rbs/unnamed/argf.rbs +60 -53
- data/core/rbs/unnamed/env_class.rbs +19 -14
- data/core/rbs/unnamed/main_class.rbs +123 -0
- data/core/rbs/unnamed/random.rbs +11 -118
- data/core/regexp.rbs +258 -214
- data/core/ruby.rbs +53 -0
- data/core/ruby_vm.rbs +38 -34
- data/core/rubygems/config_file.rbs +5 -5
- data/core/rubygems/errors.rbs +4 -71
- data/core/rubygems/requirement.rbs +5 -5
- data/core/rubygems/rubygems.rbs +16 -82
- data/core/rubygems/version.rbs +2 -3
- data/core/set.rbs +490 -360
- data/core/signal.rbs +26 -16
- data/core/string.rbs +3234 -1285
- data/core/struct.rbs +27 -26
- data/core/symbol.rbs +41 -34
- data/core/thread.rbs +135 -67
- data/core/time.rbs +81 -50
- data/core/trace_point.rbs +41 -35
- data/core/true_class.rbs +2 -2
- data/core/unbound_method.rbs +24 -16
- data/core/warning.rbs +7 -7
- data/docs/aliases.md +79 -0
- data/docs/collection.md +3 -3
- data/docs/config.md +171 -0
- data/docs/encoding.md +56 -0
- data/docs/gem.md +0 -1
- data/docs/inline.md +576 -0
- data/docs/sigs.md +3 -3
- data/docs/syntax.md +46 -16
- data/docs/type_fingerprint.md +21 -0
- data/exe/rbs +1 -1
- data/ext/rbs_extension/ast_translation.c +544 -116
- data/ext/rbs_extension/ast_translation.h +3 -0
- data/ext/rbs_extension/class_constants.c +16 -2
- data/ext/rbs_extension/class_constants.h +8 -0
- data/ext/rbs_extension/extconf.rb +5 -1
- data/ext/rbs_extension/legacy_location.c +33 -56
- data/ext/rbs_extension/legacy_location.h +37 -0
- data/ext/rbs_extension/main.c +44 -35
- data/include/rbs/ast.h +448 -173
- data/include/rbs/defines.h +27 -0
- data/include/rbs/lexer.h +30 -11
- data/include/rbs/location.h +25 -44
- data/include/rbs/parser.h +6 -6
- data/include/rbs/string.h +0 -2
- data/include/rbs/util/rbs_allocator.h +34 -13
- data/include/rbs/util/rbs_assert.h +12 -1
- data/include/rbs/util/rbs_constant_pool.h +0 -3
- data/include/rbs/util/rbs_encoding.h +2 -0
- data/include/rbs/util/rbs_unescape.h +2 -1
- data/include/rbs.h +8 -0
- data/lib/rbs/ast/annotation.rb +1 -1
- data/lib/rbs/ast/comment.rb +1 -1
- data/lib/rbs/ast/declarations.rb +10 -10
- data/lib/rbs/ast/members.rb +14 -14
- data/lib/rbs/ast/ruby/annotations.rb +293 -3
- data/lib/rbs/ast/ruby/comment_block.rb +24 -0
- data/lib/rbs/ast/ruby/declarations.rb +198 -3
- data/lib/rbs/ast/ruby/helpers/constant_helper.rb +4 -0
- data/lib/rbs/ast/ruby/members.rb +532 -22
- data/lib/rbs/ast/type_param.rb +24 -4
- data/lib/rbs/buffer.rb +20 -15
- data/lib/rbs/cli/diff.rb +16 -15
- data/lib/rbs/cli/validate.rb +38 -106
- data/lib/rbs/cli.rb +52 -19
- data/lib/rbs/collection/config/lockfile_generator.rb +14 -2
- data/lib/rbs/collection/sources/git.rb +1 -0
- data/lib/rbs/definition.rb +1 -1
- data/lib/rbs/definition_builder/ancestor_builder.rb +62 -9
- data/lib/rbs/definition_builder/method_builder.rb +20 -0
- data/lib/rbs/definition_builder.rb +147 -25
- data/lib/rbs/diff.rb +7 -1
- data/lib/rbs/environment.rb +227 -74
- data/lib/rbs/environment_loader.rb +0 -6
- data/lib/rbs/errors.rb +27 -18
- data/lib/rbs/inline_parser.rb +342 -6
- data/lib/rbs/location_aux.rb +1 -1
- data/lib/rbs/locator.rb +5 -1
- data/lib/rbs/method_type.rb +5 -3
- data/lib/rbs/parser_aux.rb +20 -7
- data/lib/rbs/prototype/helpers.rb +57 -0
- data/lib/rbs/prototype/rb.rb +3 -28
- data/lib/rbs/prototype/rbi.rb +3 -20
- data/lib/rbs/prototype/runtime.rb +8 -0
- data/lib/rbs/resolver/constant_resolver.rb +2 -2
- data/lib/rbs/resolver/type_name_resolver.rb +116 -38
- data/lib/rbs/subtractor.rb +3 -1
- data/lib/rbs/test/type_check.rb +19 -2
- data/lib/rbs/type_name.rb +1 -1
- data/lib/rbs/types.rb +88 -78
- data/lib/rbs/unit_test/type_assertions.rb +35 -8
- data/lib/rbs/validator.rb +2 -2
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs.rb +1 -2
- data/lib/rdoc/discover.rb +1 -1
- data/lib/rdoc_plugin/parser.rb +1 -1
- data/rbs.gemspec +4 -3
- data/rust/.gitignore +1 -0
- data/rust/Cargo.lock +378 -0
- data/rust/Cargo.toml +7 -0
- data/rust/ruby-rbs/Cargo.toml +22 -0
- data/rust/ruby-rbs/build.rs +764 -0
- data/rust/ruby-rbs/examples/locations.rs +60 -0
- data/rust/ruby-rbs/src/lib.rs +1 -0
- data/rust/ruby-rbs/src/node/mod.rs +742 -0
- data/rust/ruby-rbs/tests/sanity.rs +47 -0
- data/rust/ruby-rbs/vendor/rbs/config.yml +1 -0
- data/rust/ruby-rbs-sys/Cargo.toml +23 -0
- data/rust/ruby-rbs-sys/build.rs +204 -0
- data/rust/ruby-rbs-sys/src/lib.rs +50 -0
- data/rust/ruby-rbs-sys/vendor/rbs/include +1 -0
- data/rust/ruby-rbs-sys/vendor/rbs/src +1 -0
- data/rust/ruby-rbs-sys/wrapper.h +1 -0
- data/schema/typeParam.json +17 -1
- data/sig/ast/ruby/annotations.rbs +315 -4
- data/sig/ast/ruby/comment_block.rbs +8 -0
- data/sig/ast/ruby/declarations.rbs +102 -4
- data/sig/ast/ruby/members.rbs +108 -2
- data/sig/cli/diff.rbs +5 -11
- data/sig/cli/validate.rbs +12 -8
- data/sig/cli.rbs +18 -18
- data/sig/definition.rbs +6 -1
- data/sig/definition_builder.rbs +2 -0
- data/sig/environment.rbs +70 -12
- data/sig/errors.rbs +13 -14
- data/sig/inline_parser.rbs +39 -2
- data/sig/locator.rbs +0 -2
- data/sig/manifest.yaml +0 -1
- data/sig/method_builder.rbs +3 -1
- data/sig/parser.rbs +31 -13
- data/sig/prototype/helpers.rbs +2 -0
- data/sig/resolver/type_name_resolver.rbs +35 -7
- data/sig/source.rbs +3 -3
- data/sig/type_param.rbs +13 -8
- data/sig/types.rbs +6 -7
- data/sig/unit_test/spy.rbs +0 -8
- data/sig/unit_test/type_assertions.rbs +11 -0
- data/src/ast.c +410 -153
- data/src/lexer.c +1392 -1313
- data/src/lexer.re +3 -0
- data/src/lexstate.c +58 -37
- data/src/location.c +8 -48
- data/src/parser.c +977 -516
- data/src/string.c +0 -48
- data/src/util/rbs_allocator.c +89 -71
- data/src/util/rbs_assert.c +1 -1
- data/src/util/rbs_buffer.c +2 -2
- data/src/util/rbs_constant_pool.c +10 -14
- data/src/util/rbs_encoding.c +4 -8
- data/src/util/rbs_unescape.c +56 -20
- data/stdlib/bigdecimal/0/big_decimal.rbs +116 -98
- data/stdlib/bigdecimal-math/0/big_math.rbs +169 -8
- data/stdlib/cgi/0/core.rbs +9 -393
- data/stdlib/cgi/0/manifest.yaml +1 -0
- data/stdlib/cgi-escape/0/escape.rbs +171 -0
- data/stdlib/coverage/0/coverage.rbs +7 -4
- data/stdlib/date/0/date.rbs +92 -79
- data/stdlib/date/0/date_time.rbs +25 -24
- data/stdlib/delegate/0/delegator.rbs +10 -7
- data/stdlib/did_you_mean/0/did_you_mean.rbs +17 -16
- data/stdlib/digest/0/digest.rbs +110 -0
- data/stdlib/erb/0/erb.rbs +748 -347
- data/stdlib/etc/0/etc.rbs +55 -50
- data/stdlib/fileutils/0/fileutils.rbs +158 -139
- data/stdlib/forwardable/0/forwardable.rbs +13 -10
- data/stdlib/io-console/0/io-console.rbs +2 -2
- data/stdlib/json/0/json.rbs +217 -136
- data/stdlib/monitor/0/monitor.rbs +3 -3
- data/stdlib/net-http/0/net-http.rbs +162 -134
- data/stdlib/objspace/0/objspace.rbs +17 -34
- data/stdlib/open-uri/0/open-uri.rbs +48 -8
- data/stdlib/open3/0/open3.rbs +469 -10
- data/stdlib/openssl/0/openssl.rbs +475 -357
- data/stdlib/optparse/0/optparse.rbs +26 -17
- data/stdlib/pathname/0/pathname.rbs +11 -1381
- data/stdlib/pp/0/pp.rbs +9 -8
- data/stdlib/prettyprint/0/prettyprint.rbs +7 -7
- data/stdlib/pstore/0/pstore.rbs +35 -30
- data/stdlib/psych/0/psych.rbs +65 -12
- data/stdlib/psych/0/store.rbs +2 -4
- data/stdlib/pty/0/pty.rbs +9 -6
- data/stdlib/random-formatter/0/random-formatter.rbs +277 -0
- data/stdlib/rdoc/0/code_object.rbs +2 -1
- data/stdlib/rdoc/0/parser.rbs +1 -1
- data/stdlib/rdoc/0/rdoc.rbs +1 -1
- data/stdlib/rdoc/0/store.rbs +1 -1
- data/stdlib/resolv/0/resolv.rbs +25 -68
- data/stdlib/ripper/0/ripper.rbs +22 -19
- data/stdlib/securerandom/0/manifest.yaml +2 -0
- data/stdlib/securerandom/0/securerandom.rbs +7 -20
- data/stdlib/shellwords/0/shellwords.rbs +2 -2
- data/stdlib/singleton/0/singleton.rbs +3 -0
- data/stdlib/socket/0/addrinfo.rbs +7 -7
- data/stdlib/socket/0/basic_socket.rbs +3 -3
- data/stdlib/socket/0/ip_socket.rbs +10 -8
- data/stdlib/socket/0/socket.rbs +23 -10
- data/stdlib/socket/0/tcp_server.rbs +1 -1
- data/stdlib/socket/0/tcp_socket.rbs +11 -3
- data/stdlib/socket/0/udp_socket.rbs +1 -1
- data/stdlib/socket/0/unix_server.rbs +1 -1
- data/stdlib/stringio/0/stringio.rbs +1177 -85
- data/stdlib/strscan/0/string_scanner.rbs +27 -25
- data/stdlib/tempfile/0/tempfile.rbs +25 -21
- data/stdlib/time/0/time.rbs +8 -6
- data/stdlib/timeout/0/timeout.rbs +63 -7
- data/stdlib/tsort/0/cyclic.rbs +3 -0
- data/stdlib/tsort/0/tsort.rbs +7 -6
- data/stdlib/uri/0/common.rbs +42 -20
- data/stdlib/uri/0/file.rbs +3 -3
- data/stdlib/uri/0/generic.rbs +26 -18
- data/stdlib/uri/0/http.rbs +2 -2
- data/stdlib/uri/0/ldap.rbs +2 -2
- data/stdlib/uri/0/mailto.rbs +3 -3
- data/stdlib/uri/0/rfc2396_parser.rbs +12 -12
- data/stdlib/zlib/0/deflate.rbs +4 -3
- data/stdlib/zlib/0/gzip_reader.rbs +6 -6
- data/stdlib/zlib/0/gzip_writer.rbs +14 -12
- data/stdlib/zlib/0/inflate.rbs +1 -1
- data/stdlib/zlib/0/need_dict.rbs +1 -1
- data/stdlib/zlib/0/zstream.rbs +1 -0
- metadata +50 -6
data/core/regexp.rbs
CHANGED
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
# most such methods accept an argument that may be either a string or the
|
|
40
40
|
# (much more powerful) regexp.
|
|
41
41
|
#
|
|
42
|
-
# See [Regexp Methods](rdoc-ref:regexp/methods.rdoc).
|
|
42
|
+
# See [Regexp Methods](rdoc-ref:language/regexp/methods.rdoc).
|
|
43
43
|
#
|
|
44
44
|
# ## Regexp Objects
|
|
45
45
|
#
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
# # This is a very common usage.
|
|
64
64
|
# /foo/ # => /foo/
|
|
65
65
|
#
|
|
66
|
-
# * A
|
|
66
|
+
# * A <code>%r</code> regexp literal (see [%r: Regexp
|
|
67
67
|
# Literals](rdoc-ref:syntax/literals.rdoc@25r-3A+Regexp+Literals)):
|
|
68
68
|
#
|
|
69
69
|
# # Same delimiter character at beginning and end;
|
|
@@ -89,7 +89,7 @@
|
|
|
89
89
|
# 'food'.match(/foo/) # => #<MatchData "foo">
|
|
90
90
|
# 'food'.match(/bar/) # => nil
|
|
91
91
|
#
|
|
92
|
-
# ## Operator
|
|
92
|
+
# ## Operator <code>=~</code>
|
|
93
93
|
#
|
|
94
94
|
# Each of the operators Regexp#=~, String#=~, and Symbol#=~ returns an integer
|
|
95
95
|
# offset if a match was found, `nil` otherwise; each also sets [global
|
|
@@ -99,7 +99,7 @@
|
|
|
99
99
|
# 'foo bar' =~ /bar/ # => 4
|
|
100
100
|
# /baz/ =~ 'foo bar' # => nil
|
|
101
101
|
#
|
|
102
|
-
# ## Method
|
|
102
|
+
# ## Method <code>match?</code>
|
|
103
103
|
#
|
|
104
104
|
# Each of the methods Regexp#match?, String#match?, and Symbol#match? returns
|
|
105
105
|
# `true` if a match was found, `false` otherwise; none sets [global
|
|
@@ -112,19 +112,24 @@
|
|
|
112
112
|
#
|
|
113
113
|
# Certain regexp-oriented methods assign values to global variables:
|
|
114
114
|
#
|
|
115
|
-
# *
|
|
116
|
-
# *
|
|
115
|
+
# * <code>#match</code>: see [Method match](rdoc-ref:Regexp@Method+match).
|
|
116
|
+
# * <code>#=~</code>: see [Operator =~](rdoc-ref:Regexp@Operator+-3D~).
|
|
117
117
|
#
|
|
118
118
|
# The affected global variables are:
|
|
119
119
|
#
|
|
120
|
-
# *
|
|
121
|
-
# *
|
|
122
|
-
# *
|
|
123
|
-
#
|
|
124
|
-
# *
|
|
125
|
-
#
|
|
126
|
-
#
|
|
127
|
-
#
|
|
120
|
+
# * <code>$~</code>: Returns a MatchData object, or `nil`.
|
|
121
|
+
# * <code>$&</code>: Returns the matched part of the string, or `nil`.
|
|
122
|
+
# * <code>$`</code>: Returns the part of the string to the left of the match,
|
|
123
|
+
# or `nil`.
|
|
124
|
+
# * <code>$'</code>: Returns the part of the string to the right of the match,
|
|
125
|
+
# or `nil`.
|
|
126
|
+
# * <code>$+</code>: Returns the last group matched, or `nil`.
|
|
127
|
+
# * <code>$1</code>, <code>$2</code>, etc.: Returns the first, second, etc.,
|
|
128
|
+
# matched group, or `nil`. Note that <code>$0</code> is quite different; it
|
|
129
|
+
# returns the name of the currently executing program.
|
|
130
|
+
#
|
|
131
|
+
# These variables, except for <code>$~</code>, are shorthands for methods of
|
|
132
|
+
# <code>$~</code>. See MatchData@Global+variables+equivalence.
|
|
128
133
|
#
|
|
129
134
|
# Examples:
|
|
130
135
|
#
|
|
@@ -225,8 +230,9 @@
|
|
|
225
230
|
# see [Shorthand Character
|
|
226
231
|
# Classes](rdoc-ref:Regexp@Shorthand+Character+Classes).
|
|
227
232
|
#
|
|
228
|
-
# *
|
|
229
|
-
# a source literal, it's shorthand for matching a whitespace
|
|
233
|
+
# * <code>\s</code> in an ordinary string literal is equivalent to a space
|
|
234
|
+
# character; in a source literal, it's shorthand for matching a whitespace
|
|
235
|
+
# character.
|
|
230
236
|
# * In an ordinary string literal, these are (needlessly) escaped characters;
|
|
231
237
|
# in a source literal, they are shorthands for various matching characters:
|
|
232
238
|
#
|
|
@@ -251,16 +257,19 @@
|
|
|
251
257
|
# /[a-f]/.match('foo') # => #<MatchData "f">
|
|
252
258
|
# /[a-cd-f]/.match('foo') # => #<MatchData "f">
|
|
253
259
|
#
|
|
254
|
-
# When the first character of a character class is a caret (
|
|
255
|
-
# the class is inverted: it matches any character *except* those
|
|
260
|
+
# When the first character of a character class is a caret (<code>^</code>), the
|
|
261
|
+
# sense of the class is inverted: it matches any character *except* those
|
|
262
|
+
# specified.
|
|
256
263
|
#
|
|
257
264
|
# /[^a-eg-z]/.match('f') # => #<MatchData "f">
|
|
258
265
|
#
|
|
259
266
|
# A character class may contain another character class. By itself this isn't
|
|
260
|
-
# useful because
|
|
267
|
+
# useful because <code>[a-z[0-9]]</code> describes the same set as
|
|
268
|
+
# <code>[a-z0-9]</code>.
|
|
261
269
|
#
|
|
262
|
-
# However, character classes also support the
|
|
263
|
-
# intersection on its arguments. The two can be combined as
|
|
270
|
+
# However, character classes also support the <code>&&</code> operator, which
|
|
271
|
+
# performs set intersection on its arguments. The two can be combined as
|
|
272
|
+
# follows:
|
|
264
273
|
#
|
|
265
274
|
# /[a-w&&[^c-g]z]/ # ([a-w] AND ([^c-g] OR z))
|
|
266
275
|
#
|
|
@@ -273,59 +282,66 @@
|
|
|
273
282
|
# Each of the following metacharacters serves as a shorthand for a character
|
|
274
283
|
# class:
|
|
275
284
|
#
|
|
276
|
-
# *
|
|
285
|
+
# * <code>/./</code>: Matches any character except a newline:
|
|
277
286
|
#
|
|
278
287
|
# /./.match('foo') # => #<MatchData "f">
|
|
279
288
|
# /./.match("\n") # => nil
|
|
280
289
|
#
|
|
281
|
-
# *
|
|
282
|
-
# Mode](rdoc-ref:Regexp@Multiline+Mode):
|
|
290
|
+
# * <code>/./m</code>: Matches any character, including a newline; see
|
|
291
|
+
# [Multiline Mode](rdoc-ref:Regexp@Multiline+Mode):
|
|
283
292
|
#
|
|
284
293
|
# /./m.match("\n") # => #<MatchData "\n">
|
|
285
294
|
#
|
|
286
|
-
# *
|
|
295
|
+
# * <code>/\w/</code>: Matches a word character: equivalent to
|
|
296
|
+
# <code>[a-zA-Z0-9_]</code>:
|
|
287
297
|
#
|
|
288
298
|
# /\w/.match(' foo') # => #<MatchData "f">
|
|
289
299
|
# /\w/.match(' _') # => #<MatchData "_">
|
|
290
300
|
# /\w/.match(' ') # => nil
|
|
291
301
|
#
|
|
292
|
-
# *
|
|
302
|
+
# * <code>/\W/</code>: Matches a non-word character: equivalent to
|
|
303
|
+
# <code>[^a-zA-Z0-9_]</code>:
|
|
293
304
|
#
|
|
294
305
|
# /\W/.match(' ') # => #<MatchData " ">
|
|
295
306
|
# /\W/.match('_') # => nil
|
|
296
307
|
#
|
|
297
|
-
# *
|
|
308
|
+
# * <code>/\d/</code>: Matches a digit character: equivalent to
|
|
309
|
+
# <code>[0-9]</code>:
|
|
298
310
|
#
|
|
299
311
|
# /\d/.match('THX1138') # => #<MatchData "1">
|
|
300
312
|
# /\d/.match('foo') # => nil
|
|
301
313
|
#
|
|
302
|
-
# *
|
|
314
|
+
# * <code>/\D/</code>: Matches a non-digit character: equivalent to
|
|
315
|
+
# <code>[^0-9]</code>:
|
|
303
316
|
#
|
|
304
317
|
# /\D/.match('123Jump!') # => #<MatchData "J">
|
|
305
318
|
# /\D/.match('123') # => nil
|
|
306
319
|
#
|
|
307
|
-
# *
|
|
320
|
+
# * <code>/\h/</code>: Matches a hexdigit character: equivalent to
|
|
321
|
+
# <code>[0-9a-fA-F]</code>:
|
|
308
322
|
#
|
|
309
323
|
# /\h/.match('xyz fedcba9876543210') # => #<MatchData "f">
|
|
310
324
|
# /\h/.match('xyz') # => nil
|
|
311
325
|
#
|
|
312
|
-
# *
|
|
326
|
+
# * <code>/\H/</code>: Matches a non-hexdigit character: equivalent to
|
|
327
|
+
# <code>[^0-9a-fA-F]</code>:
|
|
313
328
|
#
|
|
314
329
|
# /\H/.match('fedcba9876543210xyz') # => #<MatchData "x">
|
|
315
330
|
# /\H/.match('fedcba9876543210') # => nil
|
|
316
331
|
#
|
|
317
|
-
# *
|
|
332
|
+
# * <code>/\s/</code>: Matches a whitespace character: equivalent to <code>/[
|
|
333
|
+
# \t\r\n\f\v]/</code>:
|
|
318
334
|
#
|
|
319
335
|
# /\s/.match('foo bar') # => #<MatchData " ">
|
|
320
336
|
# /\s/.match('foo') # => nil
|
|
321
337
|
#
|
|
322
|
-
# *
|
|
323
|
-
# \t\r\n\f\v]
|
|
338
|
+
# * <code>/\S/</code>: Matches a non-whitespace character: equivalent to
|
|
339
|
+
# <code>/[^ \t\r\n\f\v]/</code>:
|
|
324
340
|
#
|
|
325
341
|
# /\S/.match(" \t\r\n\f\v foo") # => #<MatchData "f">
|
|
326
342
|
# /\S/.match(" \t\r\n\f\v") # => nil
|
|
327
343
|
#
|
|
328
|
-
# *
|
|
344
|
+
# * <code>/\R/</code>: Matches a linebreak, platform-independently:
|
|
329
345
|
#
|
|
330
346
|
# /\R/.match("\r") # => #<MatchData "\r"> # Carriage return (CR)
|
|
331
347
|
# /\R/.match("\n") # => #<MatchData "\n"> # Newline (LF)
|
|
@@ -352,47 +368,47 @@
|
|
|
352
368
|
#
|
|
353
369
|
# Each of these anchors matches a boundary:
|
|
354
370
|
#
|
|
355
|
-
# *
|
|
371
|
+
# * <code>^</code>: Matches the beginning of a line:
|
|
356
372
|
#
|
|
357
373
|
# /^bar/.match("foo\nbar") # => #<MatchData "bar">
|
|
358
374
|
# /^ar/.match("foo\nbar") # => nil
|
|
359
375
|
#
|
|
360
|
-
# *
|
|
376
|
+
# * <code>$</code>: Matches the end of a line:
|
|
361
377
|
#
|
|
362
378
|
# /bar$/.match("foo\nbar") # => #<MatchData "bar">
|
|
363
379
|
# /ba$/.match("foo\nbar") # => nil
|
|
364
380
|
#
|
|
365
|
-
# *
|
|
381
|
+
# * <code>\A</code>: Matches the beginning of the string:
|
|
366
382
|
#
|
|
367
383
|
# /\Afoo/.match('foo bar') # => #<MatchData "foo">
|
|
368
384
|
# /\Afoo/.match(' foo bar') # => nil
|
|
369
385
|
#
|
|
370
|
-
# *
|
|
371
|
-
# it matches just before the ending newline:
|
|
386
|
+
# * <code>\Z</code>: Matches the end of the string; if string ends with a
|
|
387
|
+
# single newline, it matches just before the ending newline:
|
|
372
388
|
#
|
|
373
389
|
# /foo\Z/.match('bar foo') # => #<MatchData "foo">
|
|
374
390
|
# /foo\Z/.match('foo bar') # => nil
|
|
375
391
|
# /foo\Z/.match("bar foo\n") # => #<MatchData "foo">
|
|
376
392
|
# /foo\Z/.match("bar foo\n\n") # => nil
|
|
377
393
|
#
|
|
378
|
-
# *
|
|
394
|
+
# * <code>\z</code>: Matches the end of the string:
|
|
379
395
|
#
|
|
380
396
|
# /foo\z/.match('bar foo') # => #<MatchData "foo">
|
|
381
397
|
# /foo\z/.match('foo bar') # => nil
|
|
382
398
|
# /foo\z/.match("bar foo\n") # => nil
|
|
383
399
|
#
|
|
384
|
-
# *
|
|
385
|
-
# (
|
|
400
|
+
# * <code>\b</code>: Matches word boundary when not inside brackets; matches
|
|
401
|
+
# backspace (<code>"0x08"</code>) when inside brackets:
|
|
386
402
|
#
|
|
387
403
|
# /foo\b/.match('foo bar') # => #<MatchData "foo">
|
|
388
404
|
# /foo\b/.match('foobar') # => nil
|
|
389
405
|
#
|
|
390
|
-
# *
|
|
406
|
+
# * <code>\B</code>: Matches non-word boundary:
|
|
391
407
|
#
|
|
392
408
|
# /foo\B/.match('foobar') # => #<MatchData "foo">
|
|
393
409
|
# /foo\B/.match('foo bar') # => nil
|
|
394
410
|
#
|
|
395
|
-
# *
|
|
411
|
+
# * <code>\G</code>: Matches first matching position:
|
|
396
412
|
#
|
|
397
413
|
# In methods like String#gsub and String#scan, it changes on each iteration.
|
|
398
414
|
# It initially matches the beginning of subject, and in each following
|
|
@@ -411,41 +427,45 @@
|
|
|
411
427
|
#
|
|
412
428
|
# Lookahead anchors:
|
|
413
429
|
#
|
|
414
|
-
# *
|
|
415
|
-
# characters match *pat*, but doesn't include those characters in
|
|
416
|
-
# matched substring.
|
|
417
|
-
#
|
|
418
|
-
# * `(?!*pat*)`: Negative lookahead assertion: ensures that the following
|
|
419
|
-
# characters *do not* match *pat*, but doesn't include those characters in
|
|
430
|
+
# * <code>(?=_pat_)</code>: Positive lookahead assertion: ensures that the
|
|
431
|
+
# following characters match *pat*, but doesn't include those characters in
|
|
420
432
|
# the matched substring.
|
|
421
433
|
#
|
|
434
|
+
# * <code>(?!_pat_)</code>: Negative lookahead assertion: ensures that the
|
|
435
|
+
# following characters *do not* match *pat*, but doesn't include those
|
|
436
|
+
# characters in the matched substring.
|
|
437
|
+
#
|
|
422
438
|
# Lookbehind anchors:
|
|
423
439
|
#
|
|
424
|
-
# *
|
|
425
|
-
# characters match *pat*, but doesn't include those characters in
|
|
426
|
-
# matched substring.
|
|
440
|
+
# * <code>(?<=_pat_)</code>: Positive lookbehind assertion: ensures that the
|
|
441
|
+
# preceding characters match *pat*, but doesn't include those characters in
|
|
442
|
+
# the matched substring.
|
|
427
443
|
#
|
|
428
|
-
# *
|
|
429
|
-
# characters do not match *pat*, but doesn't include those
|
|
430
|
-
# matched substring.
|
|
444
|
+
# * <code>(?<!_pat_)</code>: Negative lookbehind assertion: ensures that the
|
|
445
|
+
# preceding characters do not match *pat*, but doesn't include those
|
|
446
|
+
# characters in the matched substring.
|
|
431
447
|
#
|
|
432
448
|
# The pattern below uses positive lookahead and positive lookbehind to match
|
|
433
|
-
# text appearing in
|
|
449
|
+
# text appearing in <code><b></code>...<code></b></code> tags without including
|
|
450
|
+
# the tags in the match:
|
|
434
451
|
#
|
|
435
452
|
# /(?<=<b>)\w+(?=<\/b>)/.match("Fortune favors the <b>bold</b>.")
|
|
436
453
|
# # => #<MatchData "bold">
|
|
437
454
|
#
|
|
455
|
+
# The pattern in lookbehind must be fixed-width. But top-level alternatives can
|
|
456
|
+
# be of various lengths. ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
|
|
457
|
+
#
|
|
438
458
|
# #### Match-Reset Anchor
|
|
439
459
|
#
|
|
440
|
-
# *
|
|
441
|
-
# excluded from the result. For example,
|
|
442
|
-
# almost equivalent:
|
|
460
|
+
# * <code>\K</code>: Match reset: the matched content preceding
|
|
461
|
+
# <code>\K</code> in the regexp is excluded from the result. For example,
|
|
462
|
+
# the following two regexps are almost equivalent:
|
|
443
463
|
#
|
|
444
464
|
# /ab\Kc/.match('abc') # => #<MatchData "c">
|
|
445
465
|
# /(?<=ab)c/.match('abc') # => #<MatchData "c">
|
|
446
466
|
#
|
|
447
|
-
# These match same string and
|
|
448
|
-
# is different.
|
|
467
|
+
# These match same string and <code>$&</code> equals <code>'c'</code>, while
|
|
468
|
+
# the matched position is different.
|
|
449
469
|
#
|
|
450
470
|
# As are the following two regexps:
|
|
451
471
|
#
|
|
@@ -454,9 +474,9 @@
|
|
|
454
474
|
#
|
|
455
475
|
# ### Alternation
|
|
456
476
|
#
|
|
457
|
-
# The vertical bar metacharacter (
|
|
458
|
-
# alternation: two or more subexpressions any of which may match the
|
|
459
|
-
# string.
|
|
477
|
+
# The vertical bar metacharacter (<code>|</code>) may be used within parentheses
|
|
478
|
+
# to express alternation: two or more subexpressions any of which may match the
|
|
479
|
+
# target string.
|
|
460
480
|
#
|
|
461
481
|
# Two alternatives:
|
|
462
482
|
#
|
|
@@ -488,48 +508,48 @@
|
|
|
488
508
|
#
|
|
489
509
|
# An added *quantifier* specifies how many matches are required or allowed:
|
|
490
510
|
#
|
|
491
|
-
# *
|
|
511
|
+
# * <code>*</code> - Matches zero or more times:
|
|
492
512
|
#
|
|
493
513
|
# /\w*/.match('')
|
|
494
514
|
# # => #<MatchData "">
|
|
495
515
|
# /\w*/.match('x')
|
|
496
516
|
# # => #<MatchData "x">
|
|
497
517
|
# /\w*/.match('xyz')
|
|
498
|
-
# # => #<MatchData "
|
|
518
|
+
# # => #<MatchData "xyz">
|
|
499
519
|
#
|
|
500
|
-
# *
|
|
520
|
+
# * <code>+</code> - Matches one or more times:
|
|
501
521
|
#
|
|
502
522
|
# /\w+/.match('') # => nil
|
|
503
523
|
# /\w+/.match('x') # => #<MatchData "x">
|
|
504
524
|
# /\w+/.match('xyz') # => #<MatchData "xyz">
|
|
505
525
|
#
|
|
506
|
-
# *
|
|
526
|
+
# * <code>?</code> - Matches zero or one times:
|
|
507
527
|
#
|
|
508
528
|
# /\w?/.match('') # => #<MatchData "">
|
|
509
529
|
# /\w?/.match('x') # => #<MatchData "x">
|
|
510
530
|
# /\w?/.match('xyz') # => #<MatchData "x">
|
|
511
531
|
#
|
|
512
|
-
# *
|
|
532
|
+
# * <code>{</code>*n*<code>}</code> - Matches exactly *n* times:
|
|
513
533
|
#
|
|
514
534
|
# /\w{2}/.match('') # => nil
|
|
515
535
|
# /\w{2}/.match('x') # => nil
|
|
516
536
|
# /\w{2}/.match('xyz') # => #<MatchData "xy">
|
|
517
537
|
#
|
|
518
|
-
# *
|
|
538
|
+
# * <code>{</code>*min*<code>,}</code> - Matches *min* or more times:
|
|
519
539
|
#
|
|
520
540
|
# /\w{2,}/.match('') # => nil
|
|
521
541
|
# /\w{2,}/.match('x') # => nil
|
|
522
542
|
# /\w{2,}/.match('xy') # => #<MatchData "xy">
|
|
523
543
|
# /\w{2,}/.match('xyz') # => #<MatchData "xyz">
|
|
524
544
|
#
|
|
525
|
-
# *
|
|
545
|
+
# * <code>{,</code>*max*<code>}</code> - Matches *max* or fewer times:
|
|
526
546
|
#
|
|
527
547
|
# /\w{,2}/.match('') # => #<MatchData "">
|
|
528
548
|
# /\w{,2}/.match('x') # => #<MatchData "x">
|
|
529
549
|
# /\w{,2}/.match('xyz') # => #<MatchData "xy">
|
|
530
550
|
#
|
|
531
|
-
# *
|
|
532
|
-
# times:
|
|
551
|
+
# * <code>{</code>*min*<code>,</code>*max*<code>}</code> - Matches at least
|
|
552
|
+
# *min* times and at most *max* times:
|
|
533
553
|
#
|
|
534
554
|
# /\w{1,2}/.match('') # => nil
|
|
535
555
|
# /\w{1,2}/.match('x') # => #<MatchData "x">
|
|
@@ -540,14 +560,17 @@
|
|
|
540
560
|
# Quantifier matching may be greedy, lazy, or possessive:
|
|
541
561
|
#
|
|
542
562
|
# * In *greedy* matching, as many occurrences as possible are matched while
|
|
543
|
-
# still allowing the overall match to succeed. Greedy quantifiers:
|
|
544
|
-
#
|
|
563
|
+
# still allowing the overall match to succeed. Greedy quantifiers:
|
|
564
|
+
# <code>*</code>, <code>+</code>, <code>?</code>, <code>{min, max}</code>
|
|
565
|
+
# and its variants.
|
|
545
566
|
# * In *lazy* matching, the minimum number of occurrences are matched. Lazy
|
|
546
|
-
# quantifiers:
|
|
567
|
+
# quantifiers: <code>*?</code>, <code>+?</code>, <code>??</code>,
|
|
568
|
+
# <code>{min, max}?</code> and its variants.
|
|
547
569
|
# * In *possessive* matching, once a match is found, there is no backtracking;
|
|
548
570
|
# that match is retained, even if it jeopardises the overall match.
|
|
549
|
-
# Possessive quantifiers:
|
|
550
|
-
# variants do *not* support
|
|
571
|
+
# Possessive quantifiers: <code>*+</code>, <code>++</code>, <code>?+</code>.
|
|
572
|
+
# Note that <code>{min, max}</code> and its variants do *not* support
|
|
573
|
+
# possessive matching.
|
|
551
574
|
#
|
|
552
575
|
# More:
|
|
553
576
|
#
|
|
@@ -571,8 +594,9 @@
|
|
|
571
594
|
# re.match('1943-02-04').size # => 1
|
|
572
595
|
# re.match('foo') # => nil
|
|
573
596
|
#
|
|
574
|
-
# Adding one or more pairs of parentheses,
|
|
575
|
-
# *groups*, which may result in multiple matched substrings, called
|
|
597
|
+
# Adding one or more pairs of parentheses, <code>(_subexpression_)</code>,
|
|
598
|
+
# defines *groups*, which may result in multiple matched substrings, called
|
|
599
|
+
# *captures*:
|
|
576
600
|
#
|
|
577
601
|
# re = /(\d\d\d\d)-(\d\d)-(\d\d)/
|
|
578
602
|
# re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
|
|
@@ -613,7 +637,7 @@
|
|
|
613
637
|
# have a quantifier), but its matching substring is not included among the
|
|
614
638
|
# captures.
|
|
615
639
|
#
|
|
616
|
-
# A non-capturing group begins with
|
|
640
|
+
# A non-capturing group begins with <code>?:</code> (inside the parentheses):
|
|
617
641
|
#
|
|
618
642
|
# # Don't capture the year.
|
|
619
643
|
# re = /(?:\d\d\d\d)-(\d\d)-(\d\d)/
|
|
@@ -643,12 +667,14 @@
|
|
|
643
667
|
#
|
|
644
668
|
# * For a large number of groups:
|
|
645
669
|
#
|
|
646
|
-
# * The ordinary
|
|
647
|
-
#
|
|
670
|
+
# * The ordinary <code>\_n_</code> notation applies only for *n* in range
|
|
671
|
+
# (1..9).
|
|
672
|
+
# * The <code>MatchData[_n_]</code> notation applies for any non-negative
|
|
673
|
+
# *n*.
|
|
648
674
|
#
|
|
649
|
-
# *
|
|
650
|
-
# it may not be used within the regexp itself, but may be
|
|
651
|
-
# (for example, in a substitution method call):
|
|
675
|
+
# * <code>\0</code> is a special backreference, referring to the entire
|
|
676
|
+
# matched string; it may not be used within the regexp itself, but may be
|
|
677
|
+
# used outside it (for example, in a substitution method call):
|
|
652
678
|
#
|
|
653
679
|
# 'The cat sat in the hat'.gsub(/[csh]at/, '\0s')
|
|
654
680
|
# # => "The cats sats in the hats"
|
|
@@ -656,8 +682,8 @@
|
|
|
656
682
|
# #### Named Captures
|
|
657
683
|
#
|
|
658
684
|
# As seen above, a capture can be referred to by its number. A capture can also
|
|
659
|
-
# have a name, prefixed as
|
|
660
|
-
# may be used as an index in
|
|
685
|
+
# have a name, prefixed as <code>?<_name_></code> or <code>?'_name_'</code>, and
|
|
686
|
+
# the name (symbolized) may be used as an index in <code>MatchData[]</code>:
|
|
661
687
|
#
|
|
662
688
|
# md = /\$(?<dollars>\d+)\.(?'cents'\d+)/.match("$3.67")
|
|
663
689
|
# # => #<MatchData "$3.67" dollars:"3" cents:"67">
|
|
@@ -671,14 +697,14 @@
|
|
|
671
697
|
# /\$(?<dollars>\d+)\.(\d+)/.match("$3.67")
|
|
672
698
|
# # => #<MatchData "$3.67" dollars:"3">
|
|
673
699
|
#
|
|
674
|
-
# A named group may be backreferenced as
|
|
700
|
+
# A named group may be backreferenced as <code>\k<_name_></code>:
|
|
675
701
|
#
|
|
676
702
|
# /(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy')
|
|
677
703
|
# # => #<MatchData "ototo" vowel:"o">
|
|
678
704
|
#
|
|
679
705
|
# When (and only when) a regexp contains named capture groups and appears before
|
|
680
|
-
# the
|
|
681
|
-
# with corresponding names:
|
|
706
|
+
# the <code>=~</code> operator, the captured substrings are assigned to local
|
|
707
|
+
# variables with corresponding names:
|
|
682
708
|
#
|
|
683
709
|
# /\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ '$3.67'
|
|
684
710
|
# dollars # => "3"
|
|
@@ -689,7 +715,8 @@
|
|
|
689
715
|
#
|
|
690
716
|
# #### Atomic Grouping
|
|
691
717
|
#
|
|
692
|
-
# A group may be made *atomic* with
|
|
718
|
+
# A group may be made *atomic* with
|
|
719
|
+
# <code>(?></code>*subexpression*<code>)</code>.
|
|
693
720
|
#
|
|
694
721
|
# This causes the subexpression to be matched independently of the rest of the
|
|
695
722
|
# expression, so that the matched substring becomes fixed for the remainder of
|
|
@@ -706,19 +733,19 @@
|
|
|
706
733
|
#
|
|
707
734
|
# Analysis:
|
|
708
735
|
#
|
|
709
|
-
# 1. The leading subexpression
|
|
710
|
-
#
|
|
711
|
-
# 2. The next subexpression
|
|
712
|
-
# the trailing double-quote).
|
|
736
|
+
# 1. The leading subexpression <code>"</code> in the pattern matches the first
|
|
737
|
+
# character <code>"</code> in the target string.
|
|
738
|
+
# 2. The next subexpression <code>.*</code> matches the next substring
|
|
739
|
+
# <code>Quote"</code> (including the trailing double-quote).
|
|
713
740
|
# 3. Now there is nothing left in the target string to match the trailing
|
|
714
|
-
# subexpression
|
|
715
|
-
# fail.
|
|
741
|
+
# subexpression <code>"</code> in the pattern; this would cause the overall
|
|
742
|
+
# match to fail.
|
|
716
743
|
# 4. The matched substring is backtracked by one position: `Quote`.
|
|
717
|
-
# 5. The final subexpression
|
|
718
|
-
# overall match succeeds.
|
|
744
|
+
# 5. The final subexpression <code>"</code> now matches the final substring
|
|
745
|
+
# <code>"</code>, and the overall match succeeds.
|
|
719
746
|
#
|
|
720
|
-
# If subexpression
|
|
721
|
-
# the overall match fails:
|
|
747
|
+
# If subexpression <code>.*</code> is grouped atomically, the backtracking is
|
|
748
|
+
# disabled, and the overall match fails:
|
|
722
749
|
#
|
|
723
750
|
# /"(?>.*)"/.match('"Quote"') # => nil
|
|
724
751
|
#
|
|
@@ -727,9 +754,10 @@
|
|
|
727
754
|
#
|
|
728
755
|
# #### Subexpression Calls
|
|
729
756
|
#
|
|
730
|
-
# As seen above, a backreference number (
|
|
731
|
-
# access to a captured *substring*; the
|
|
732
|
-
# also be accessed, via the number
|
|
757
|
+
# As seen above, a backreference number (<code>\_n_</code>) or name
|
|
758
|
+
# (<code>\k<_name_></code>) gives access to a captured *substring*; the
|
|
759
|
+
# corresponding regexp *subexpression* may also be accessed, via the number
|
|
760
|
+
# (<code>\g<i>n</i></code>) or name (<code>\g<_name_></code>):
|
|
733
761
|
#
|
|
734
762
|
# /\A(?<paren>\(\g<paren>*\))*\z/.match('(())')
|
|
735
763
|
# # ^1
|
|
@@ -747,15 +775,15 @@
|
|
|
747
775
|
#
|
|
748
776
|
# 1. Matches at the beginning of the string, i.e. before the first character.
|
|
749
777
|
# 2. Enters a named group `paren`.
|
|
750
|
-
# 3. Matches the first character in the string,
|
|
778
|
+
# 3. Matches the first character in the string, <code>'('</code>.
|
|
751
779
|
# 4. Calls the `paren` group again, i.e. recurses back to the second step.
|
|
752
780
|
# 5. Re-enters the `paren` group.
|
|
753
|
-
# 6. Matches the second character in the string,
|
|
781
|
+
# 6. Matches the second character in the string, <code>'('</code>.
|
|
754
782
|
# 7. Attempts to call `paren` a third time, but fails because doing so would
|
|
755
783
|
# prevent an overall successful match.
|
|
756
|
-
# 8. Matches the third character in the string,
|
|
757
|
-
# second recursive call
|
|
758
|
-
# 9. Matches the fourth character in the string,
|
|
784
|
+
# 8. Matches the third character in the string, <code>')'</code>; marks the end
|
|
785
|
+
# of the second recursive call
|
|
786
|
+
# 9. Matches the fourth character in the string, <code>')'</code>.
|
|
759
787
|
# 10. Matches the end of the string.
|
|
760
788
|
#
|
|
761
789
|
# See [Subexpression
|
|
@@ -764,12 +792,13 @@
|
|
|
764
792
|
#
|
|
765
793
|
# #### Conditionals
|
|
766
794
|
#
|
|
767
|
-
# The conditional construct takes the form
|
|
795
|
+
# The conditional construct takes the form <code>(?(_cond_)_yes_|_no_)</code>,
|
|
796
|
+
# where:
|
|
768
797
|
#
|
|
769
798
|
# * *cond* may be a capture number or name.
|
|
770
799
|
# * The match to be applied is *yes* if *cond* is captured; otherwise the
|
|
771
800
|
# match to be applied is *no*.
|
|
772
|
-
# * If not needed,
|
|
801
|
+
# * If not needed, <code>|_no_</code> may be omitted.
|
|
773
802
|
#
|
|
774
803
|
# Examples:
|
|
775
804
|
#
|
|
@@ -798,50 +827,53 @@
|
|
|
798
827
|
#
|
|
799
828
|
# #### Unicode Properties
|
|
800
829
|
#
|
|
801
|
-
# The
|
|
802
|
-
# using a Unicode property name, much like a character class;
|
|
803
|
-
# specifies alphabetic characters:
|
|
830
|
+
# The <code>/\p{_property_name_}/</code> construct (with lowercase `p`) matches
|
|
831
|
+
# characters using a Unicode property name, much like a character class;
|
|
832
|
+
# property `Alpha` specifies alphabetic characters:
|
|
804
833
|
#
|
|
805
834
|
# /\p{Alpha}/.match('a') # => #<MatchData "a">
|
|
806
835
|
# /\p{Alpha}/.match('1') # => nil
|
|
807
836
|
#
|
|
808
|
-
# A property can be inverted by prefixing the name with a caret character
|
|
837
|
+
# A property can be inverted by prefixing the name with a caret character
|
|
838
|
+
# (<code>^</code>):
|
|
809
839
|
#
|
|
810
840
|
# /\p{^Alpha}/.match('1') # => #<MatchData "1">
|
|
811
841
|
# /\p{^Alpha}/.match('a') # => nil
|
|
812
842
|
#
|
|
813
|
-
# Or by using
|
|
843
|
+
# Or by using <code>\P</code> (uppercase `P`):
|
|
814
844
|
#
|
|
815
845
|
# /\P{Alpha}/.match('1') # => #<MatchData "1">
|
|
816
846
|
# /\P{Alpha}/.match('a') # => nil
|
|
817
847
|
#
|
|
818
|
-
# See [Unicode Properties](rdoc-ref:regexp/unicode_properties.rdoc) for
|
|
819
|
-
# based on the numerous properties.
|
|
848
|
+
# See [Unicode Properties](rdoc-ref:language/regexp/unicode_properties.rdoc) for
|
|
849
|
+
# regexps based on the numerous properties.
|
|
820
850
|
#
|
|
821
851
|
# Some commonly-used properties correspond to POSIX bracket expressions:
|
|
822
852
|
#
|
|
823
|
-
# *
|
|
824
|
-
# *
|
|
825
|
-
# *
|
|
826
|
-
# *
|
|
827
|
-
# *
|
|
828
|
-
# *
|
|
829
|
-
# *
|
|
830
|
-
#
|
|
831
|
-
# *
|
|
832
|
-
#
|
|
833
|
-
#
|
|
834
|
-
# *
|
|
853
|
+
# * <code>/\p{Alnum}/</code>: Alphabetic and numeric character
|
|
854
|
+
# * <code>/\p{Alpha}/</code>: Alphabetic character
|
|
855
|
+
# * <code>/\p{Blank}/</code>: Space or tab
|
|
856
|
+
# * <code>/\p{Cntrl}/</code>: Control character
|
|
857
|
+
# * <code>/\p{Digit}/</code>: Digit characters, and similar)
|
|
858
|
+
# * <code>/\p{Lower}/</code>: Lowercase alphabetical character
|
|
859
|
+
# * <code>/\p{Print}/</code>: Like <code>\p{Graph}</code>, but includes the
|
|
860
|
+
# space character
|
|
861
|
+
# * <code>/\p{Punct}/</code>: Punctuation character
|
|
862
|
+
# * <code>/\p{Space}/</code>: Whitespace character (<code>[:blank:]</code>,
|
|
863
|
+
# newline, carriage return, etc.)
|
|
864
|
+
# * <code>/\p{Upper}/</code>: Uppercase alphabetical
|
|
865
|
+
# * <code>/\p{XDigit}/</code>: Digit allowed in a hexadecimal number (i.e.,
|
|
866
|
+
# 0-9a-fA-F)
|
|
835
867
|
#
|
|
836
868
|
# These are also commonly used:
|
|
837
869
|
#
|
|
838
|
-
# *
|
|
839
|
-
# *
|
|
840
|
-
# that invisible characters under the
|
|
841
|
-
# ["Format"](https://www.compart.com/en/unicode/category/Cf)
|
|
842
|
-
# included.
|
|
843
|
-
# *
|
|
844
|
-
# below) or having one of these Unicode properties:
|
|
870
|
+
# * <code>/\p{Emoji}/</code>: Unicode emoji.
|
|
871
|
+
# * <code>/\p{Graph}/</code>: Characters excluding <code>/\p{Cntrl}/</code>
|
|
872
|
+
# and <code>/\p{Space}/</code>. Note that invisible characters under the
|
|
873
|
+
# Unicode ["Format"](https://www.compart.com/en/unicode/category/Cf)
|
|
874
|
+
# category are included.
|
|
875
|
+
# * <code>/\p{Word}/</code>: A member in one of these Unicode character
|
|
876
|
+
# categories (see below) or having one of these Unicode properties:
|
|
845
877
|
#
|
|
846
878
|
# * Unicode categories:
|
|
847
879
|
# * `Mark` (`M`).
|
|
@@ -852,9 +884,10 @@
|
|
|
852
884
|
# * `Alpha`
|
|
853
885
|
# * `Join_Control`
|
|
854
886
|
#
|
|
855
|
-
# *
|
|
856
|
-
# *
|
|
857
|
-
#
|
|
887
|
+
# * <code>/\p{ASCII}/</code>: A character in the ASCII character set.
|
|
888
|
+
# * <code>/\p{Any}/</code>: Any Unicode character (including unassigned
|
|
889
|
+
# characters).
|
|
890
|
+
# * <code>/\p{Assigned}/</code>: An assigned character.
|
|
858
891
|
#
|
|
859
892
|
# #### Unicode Character Categories
|
|
860
893
|
#
|
|
@@ -944,68 +977,73 @@
|
|
|
944
977
|
# expressions provide a portable alternative to the above, with the added
|
|
945
978
|
# benefit of encompassing non-ASCII characters:
|
|
946
979
|
#
|
|
947
|
-
# *
|
|
948
|
-
# *
|
|
949
|
-
# (`Nd`) category; see below.
|
|
980
|
+
# * <code>/\d/</code> matches only ASCII decimal digits `0` through `9`.
|
|
981
|
+
# * <code>/[[:digit:]]/</code> matches any character in the Unicode `Decimal
|
|
982
|
+
# Number` (`Nd`) category; see below.
|
|
950
983
|
#
|
|
951
984
|
# The POSIX bracket expressions:
|
|
952
985
|
#
|
|
953
|
-
# *
|
|
986
|
+
# * <code>/[[:digit:]]/</code>: Matches a [Unicode
|
|
954
987
|
# digit](https://www.compart.com/en/unicode/category/Nd):
|
|
955
988
|
#
|
|
956
989
|
# /[[:digit:]]/.match('9') # => #<MatchData "9">
|
|
957
990
|
# /[[:digit:]]/.match("\u1fbf9") # => #<MatchData "9">
|
|
958
991
|
#
|
|
959
|
-
# *
|
|
960
|
-
# equivalent to
|
|
992
|
+
# * <code>/[[:xdigit:]]/</code>: Matches a digit allowed in a hexadecimal
|
|
993
|
+
# number; equivalent to <code>[0-9a-fA-F]</code>.
|
|
961
994
|
#
|
|
962
|
-
# *
|
|
995
|
+
# * <code>/[[:upper:]]/</code>: Matches a [Unicode uppercase
|
|
963
996
|
# letter](https://www.compart.com/en/unicode/category/Lu):
|
|
964
997
|
#
|
|
965
998
|
# /[[:upper:]]/.match('A') # => #<MatchData "A">
|
|
966
999
|
# /[[:upper:]]/.match("\u00c6") # => #<MatchData "Æ">
|
|
967
1000
|
#
|
|
968
|
-
# *
|
|
1001
|
+
# * <code>/[[:lower:]]/</code>: Matches a [Unicode lowercase
|
|
969
1002
|
# letter](https://www.compart.com/en/unicode/category/Ll):
|
|
970
1003
|
#
|
|
971
1004
|
# /[[:lower:]]/.match('a') # => #<MatchData "a">
|
|
972
1005
|
# /[[:lower:]]/.match("\u01fd") # => #<MatchData "ǽ">
|
|
973
1006
|
#
|
|
974
|
-
# *
|
|
1007
|
+
# * <code>/[[:alpha:]]/</code>: Matches <code>/[[:upper:]]/</code> or
|
|
1008
|
+
# <code>/[[:lower:]]/</code>.
|
|
975
1009
|
#
|
|
976
|
-
# *
|
|
1010
|
+
# * <code>/[[:alnum:]]/</code>: Matches <code>/[[:alpha:]]/</code> or
|
|
1011
|
+
# <code>/[[:digit:]]/</code>.
|
|
977
1012
|
#
|
|
978
|
-
# *
|
|
1013
|
+
# * <code>/[[:space:]]/</code>: Matches [Unicode space
|
|
979
1014
|
# character](https://www.compart.com/en/unicode/category/Zs):
|
|
980
1015
|
#
|
|
981
1016
|
# /[[:space:]]/.match(' ') # => #<MatchData " ">
|
|
982
1017
|
# /[[:space:]]/.match("\u2005") # => #<MatchData " ">
|
|
983
1018
|
#
|
|
984
|
-
# *
|
|
1019
|
+
# * <code>/[[:blank:]]/</code>: Matches <code>/[[:space:]]/</code> or tab
|
|
1020
|
+
# character:
|
|
985
1021
|
#
|
|
986
1022
|
# /[[:blank:]]/.match(' ') # => #<MatchData " ">
|
|
987
1023
|
# /[[:blank:]]/.match("\u2005") # => #<MatchData " ">
|
|
988
1024
|
# /[[:blank:]]/.match("\t") # => #<MatchData "\t">
|
|
989
1025
|
#
|
|
990
|
-
# *
|
|
1026
|
+
# * <code>/[[:cntrl:]]/</code>: Matches [Unicode control
|
|
991
1027
|
# character](https://www.compart.com/en/unicode/category/Cc):
|
|
992
1028
|
#
|
|
993
1029
|
# /[[:cntrl:]]/.match("\u0000") # => #<MatchData "\u0000">
|
|
994
1030
|
# /[[:cntrl:]]/.match("\u009f") # => #<MatchData "\u009F">
|
|
995
1031
|
#
|
|
996
|
-
# *
|
|
997
|
-
#
|
|
1032
|
+
# * <code>/[[:graph:]]/</code>: Matches any character except
|
|
1033
|
+
# <code>/[[:space:]]/</code> or <code>/[[:cntrl:]]/</code>.
|
|
998
1034
|
#
|
|
999
|
-
# *
|
|
1035
|
+
# * <code>/[[:print:]]/</code>: Matches <code>/[[:graph:]]/</code> or space
|
|
1036
|
+
# character.
|
|
1000
1037
|
#
|
|
1001
|
-
# *
|
|
1038
|
+
# * <code>/[[:punct:]]/</code>: Matches any (Unicode punctuation
|
|
1002
1039
|
# character}[https://www.compart.com/en/unicode/category/Po]:
|
|
1003
1040
|
#
|
|
1004
1041
|
# Ruby also supports these (non-POSIX) bracket expressions:
|
|
1005
1042
|
#
|
|
1006
|
-
# *
|
|
1007
|
-
#
|
|
1008
|
-
#
|
|
1043
|
+
# * <code>/[[:ascii:]]/</code>: Matches a character in the ASCII character
|
|
1044
|
+
# set.
|
|
1045
|
+
# * <code>/[[:word:]]/</code>: Matches a character in one of these Unicode
|
|
1046
|
+
# character categories or having one of these Unicode properties:
|
|
1009
1047
|
#
|
|
1010
1048
|
# * Unicode categories:
|
|
1011
1049
|
# * `Mark` (`M`).
|
|
@@ -1018,9 +1056,9 @@
|
|
|
1018
1056
|
#
|
|
1019
1057
|
# ### Comments
|
|
1020
1058
|
#
|
|
1021
|
-
# A comment may be included in a regexp pattern using the
|
|
1022
|
-
# construct, where *comment* is a
|
|
1023
|
-
# text ignored by the regexp engine:
|
|
1059
|
+
# A comment may be included in a regexp pattern using the
|
|
1060
|
+
# <code>(?#</code>*comment*<code>)</code> construct, where *comment* is a
|
|
1061
|
+
# substring that is to be ignored. arbitrary text ignored by the regexp engine:
|
|
1024
1062
|
#
|
|
1025
1063
|
# /foo(?#Ignore me)bar/.match('foobar') # => #<MatchData "foobar">
|
|
1026
1064
|
#
|
|
@@ -1032,22 +1070,26 @@
|
|
|
1032
1070
|
#
|
|
1033
1071
|
# Each of these modifiers sets a mode for the regexp:
|
|
1034
1072
|
#
|
|
1035
|
-
# * `i`:
|
|
1073
|
+
# * `i`: <code>/_pattern_/i</code> sets [Case-Insensitive
|
|
1036
1074
|
# Mode](rdoc-ref:Regexp@Case-Insensitive+Mode).
|
|
1037
|
-
# * `m`:
|
|
1038
|
-
#
|
|
1039
|
-
# * `
|
|
1075
|
+
# * `m`: <code>/_pattern_/m</code> sets [Multiline
|
|
1076
|
+
# Mode](rdoc-ref:Regexp@Multiline+Mode).
|
|
1077
|
+
# * `x`: <code>/_pattern_/x</code> sets [Extended
|
|
1078
|
+
# Mode](rdoc-ref:Regexp@Extended+Mode).
|
|
1079
|
+
# * `o`: <code>/_pattern_/o</code> sets [Interpolation
|
|
1040
1080
|
# Mode](rdoc-ref:Regexp@Interpolation+Mode).
|
|
1041
1081
|
#
|
|
1042
1082
|
# Any, all, or none of these may be applied.
|
|
1043
1083
|
#
|
|
1044
1084
|
# Modifiers `i`, `m`, and `x` may be applied to subexpressions:
|
|
1045
1085
|
#
|
|
1046
|
-
# *
|
|
1047
|
-
# *
|
|
1048
|
-
#
|
|
1049
|
-
# *
|
|
1050
|
-
# group
|
|
1086
|
+
# * <code>(?_modifier_)</code> turns the mode "on" for ensuing subexpressions
|
|
1087
|
+
# * <code>(?-_modifier_)</code> turns the mode "off" for ensuing
|
|
1088
|
+
# subexpressions
|
|
1089
|
+
# * <code>(?_modifier_:_subexp_)</code> turns the mode "on" for *subexp*
|
|
1090
|
+
# within the group
|
|
1091
|
+
# * <code>(?-_modifier_:_subexp_)</code> turns the mode "off" for *subexp*
|
|
1092
|
+
# within the group
|
|
1051
1093
|
#
|
|
1052
1094
|
# Example:
|
|
1053
1095
|
#
|
|
@@ -1082,7 +1124,8 @@
|
|
|
1082
1124
|
#
|
|
1083
1125
|
# The multiline-mode in Ruby is what is commonly called a "dot-all mode":
|
|
1084
1126
|
#
|
|
1085
|
-
# * Without the `m` modifier, the subexpression
|
|
1127
|
+
# * Without the `m` modifier, the subexpression <code>.</code> does not match
|
|
1128
|
+
# newlines:
|
|
1086
1129
|
#
|
|
1087
1130
|
# /a.c/.match("a\nc") # => nil
|
|
1088
1131
|
#
|
|
@@ -1090,16 +1133,17 @@
|
|
|
1090
1133
|
#
|
|
1091
1134
|
# /a.c/m.match("a\nc") # => #<MatchData "a\nc">
|
|
1092
1135
|
#
|
|
1093
|
-
# Unlike other languages, the modifier `m` does not affect the anchors
|
|
1094
|
-
#
|
|
1136
|
+
# Unlike other languages, the modifier `m` does not affect the anchors
|
|
1137
|
+
# <code>^</code> and <code>$</code>. These anchors always match at
|
|
1138
|
+
# line-boundaries in Ruby.
|
|
1095
1139
|
#
|
|
1096
1140
|
# ### Extended Mode
|
|
1097
1141
|
#
|
|
1098
1142
|
# Modifier `x` enables extended mode, which means that:
|
|
1099
1143
|
#
|
|
1100
1144
|
# * Literal white space in the pattern is to be ignored.
|
|
1101
|
-
# * Character
|
|
1102
|
-
# which is also to be ignored for matching purposes.
|
|
1145
|
+
# * Character <code>#</code> marks the remainder of its containing line as a
|
|
1146
|
+
# comment, which is also to be ignored for matching purposes.
|
|
1103
1147
|
#
|
|
1104
1148
|
# In extended mode, whitespace and comments may be used to form a
|
|
1105
1149
|
# self-documented regexp.
|
|
@@ -1162,22 +1206,22 @@
|
|
|
1162
1206
|
# A regular expression containing non-US-ASCII characters is assumed to use the
|
|
1163
1207
|
# source encoding. This can be overridden with one of the following modifiers.
|
|
1164
1208
|
#
|
|
1165
|
-
# *
|
|
1166
|
-
# ASCII-8BIT:
|
|
1209
|
+
# * <code>/_pat_/n</code>: US-ASCII if only containing US-ASCII characters,
|
|
1210
|
+
# otherwise ASCII-8BIT:
|
|
1167
1211
|
#
|
|
1168
1212
|
# /foo/n.encoding # => #<Encoding:US-ASCII>
|
|
1169
1213
|
# /foo\xff/n.encoding # => #<Encoding:ASCII-8BIT>
|
|
1170
1214
|
# /foo\x7f/n.encoding # => #<Encoding:US-ASCII>
|
|
1171
1215
|
#
|
|
1172
|
-
# *
|
|
1216
|
+
# * <code>/_pat_/u</code>: UTF-8
|
|
1173
1217
|
#
|
|
1174
1218
|
# /foo/u.encoding # => #<Encoding:UTF-8>
|
|
1175
1219
|
#
|
|
1176
|
-
# *
|
|
1220
|
+
# * <code>/_pat_/e</code>: EUC-JP
|
|
1177
1221
|
#
|
|
1178
1222
|
# /foo/e.encoding # => #<Encoding:EUC-JP>
|
|
1179
1223
|
#
|
|
1180
|
-
# *
|
|
1224
|
+
# * <code>/_pat_/s</code>: Windows-31J
|
|
1181
1225
|
#
|
|
1182
1226
|
# /foo/s.encoding # => #<Encoding:Windows-31J>
|
|
1183
1227
|
#
|
|
@@ -1189,7 +1233,7 @@
|
|
|
1189
1233
|
# has a *fixed* encoding.
|
|
1190
1234
|
#
|
|
1191
1235
|
# If a match between incompatible encodings is attempted an
|
|
1192
|
-
#
|
|
1236
|
+
# <code>Encoding::CompatibilityError</code> exception is raised.
|
|
1193
1237
|
#
|
|
1194
1238
|
# Example:
|
|
1195
1239
|
#
|
|
@@ -1248,7 +1292,7 @@
|
|
|
1248
1292
|
#
|
|
1249
1293
|
# Regexp matching can apply an optimization to prevent ReDoS attacks. When the
|
|
1250
1294
|
# optimization is applied, matching time increases linearly (not polynomially or
|
|
1251
|
-
# exponentially) in relation to the input size, and a ReDoS
|
|
1295
|
+
# exponentially) in relation to the input size, and a ReDoS attack is not
|
|
1252
1296
|
# possible.
|
|
1253
1297
|
#
|
|
1254
1298
|
# This optimization is applied if the pattern meets these criteria:
|
|
@@ -1256,8 +1300,9 @@
|
|
|
1256
1300
|
# * No backreferences.
|
|
1257
1301
|
# * No subexpression calls.
|
|
1258
1302
|
# * No nested lookaround anchors or atomic groups.
|
|
1259
|
-
# * No nested quantifiers with counting (i.e. no nested
|
|
1260
|
-
#
|
|
1303
|
+
# * No nested quantifiers with counting (i.e. no nested <code>{n}</code>,
|
|
1304
|
+
# <code>{min,}</code>, <code>{,max}</code>, or <code>{min,max}</code> style
|
|
1305
|
+
# quantifiers)
|
|
1261
1306
|
#
|
|
1262
1307
|
# You can use method Regexp.linear_time? to determine whether a pattern meets
|
|
1263
1308
|
# these criteria:
|
|
@@ -1272,21 +1317,14 @@
|
|
|
1272
1317
|
#
|
|
1273
1318
|
# ## References
|
|
1274
1319
|
#
|
|
1275
|
-
# Read
|
|
1320
|
+
# Read:
|
|
1276
1321
|
#
|
|
1277
|
-
# *
|
|
1278
|
-
#
|
|
1279
|
-
# ring%20Regular%20Expressions%2C%203rd%20Edition.pdf) by Jeffrey E.F.
|
|
1280
|
-
# Friedl.
|
|
1281
|
-
# * [Regular Expressions
|
|
1282
|
-
# Cookbook](https://doc.lagout.org/programmation/Regular%20Expressions/Regul
|
|
1283
|
-
# ar%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Program
|
|
1284
|
-
# ming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-
|
|
1285
|
-
# 09-06%5D.pdf) by Jan Goyvaerts & Steven Levithan.
|
|
1322
|
+
# * *Mastering Regular Expressions* by Jeffrey E.F. Friedl.
|
|
1323
|
+
# * *Regular Expressions Cookbook* by Jan Goyvaerts & Steven Levithan.
|
|
1286
1324
|
#
|
|
1287
|
-
# Explore, test
|
|
1325
|
+
# Explore, test:
|
|
1288
1326
|
#
|
|
1289
|
-
# * [Rubular](https://rubular.com/).
|
|
1327
|
+
# * [Rubular](https://rubular.com/): interactive online editor.
|
|
1290
1328
|
#
|
|
1291
1329
|
class Regexp
|
|
1292
1330
|
# Represents an object's ability to be converted to a `Regexp`.
|
|
@@ -1297,6 +1335,9 @@ class Regexp
|
|
|
1297
1335
|
def to_regexp: () -> Regexp
|
|
1298
1336
|
end
|
|
1299
1337
|
|
|
1338
|
+
# <!-- rdoc-file=re.c -->
|
|
1339
|
+
# Raised when regexp matching timed out.
|
|
1340
|
+
#
|
|
1300
1341
|
class TimeoutError < RegexpError
|
|
1301
1342
|
end
|
|
1302
1343
|
|
|
@@ -1355,8 +1396,8 @@ class Regexp
|
|
|
1355
1396
|
# - Regexp.last_match(n) -> string or nil
|
|
1356
1397
|
# - Regexp.last_match(name) -> string or nil
|
|
1357
1398
|
# -->
|
|
1358
|
-
# With no argument, returns the value of
|
|
1359
|
-
# recent pattern match (see [Regexp global
|
|
1399
|
+
# With no argument, returns the value of <code>$~</code>, which is the result of
|
|
1400
|
+
# the most recent pattern match (see [Regexp global
|
|
1360
1401
|
# variables](rdoc-ref:Regexp@Global+Variables)):
|
|
1361
1402
|
#
|
|
1362
1403
|
# /c(.)t/ =~ 'cat' # => 0
|
|
@@ -1434,14 +1475,14 @@ class Regexp
|
|
|
1434
1475
|
#
|
|
1435
1476
|
# Regexp.try_convert(/re/) # => /re/
|
|
1436
1477
|
#
|
|
1437
|
-
# Otherwise if `object` responds to
|
|
1438
|
-
# returns the result.
|
|
1478
|
+
# Otherwise if `object` responds to <code>:to_regexp</code>, calls
|
|
1479
|
+
# <code>object.to_regexp</code> and returns the result.
|
|
1439
1480
|
#
|
|
1440
|
-
# Returns `nil` if `object` does not respond to
|
|
1481
|
+
# Returns `nil` if `object` does not respond to <code>:to_regexp</code>.
|
|
1441
1482
|
#
|
|
1442
1483
|
# Regexp.try_convert('re') # => nil
|
|
1443
1484
|
#
|
|
1444
|
-
# Raises an exception unless
|
|
1485
|
+
# Raises an exception unless <code>object.to_regexp</code> returns a regexp.
|
|
1445
1486
|
#
|
|
1446
1487
|
def self.try_convert: (Regexp | _ToRegexp regexp_like) -> Regexp
|
|
1447
1488
|
| (untyped other) -> Regexp?
|
|
@@ -1462,7 +1503,7 @@ class Regexp
|
|
|
1462
1503
|
# It sets the default timeout interval for Regexp matching in second. `nil`
|
|
1463
1504
|
# means no default timeout configuration. This configuration is process-global.
|
|
1464
1505
|
# If you want to set timeout for each Regexp, use `timeout` keyword for
|
|
1465
|
-
#
|
|
1506
|
+
# <code>Regexp.new</code>.
|
|
1466
1507
|
#
|
|
1467
1508
|
# Regexp.timeout = 1
|
|
1468
1509
|
# /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
|
|
@@ -1481,7 +1522,7 @@ class Regexp
|
|
|
1481
1522
|
# r.match('dog') # => #<MatchData "dog">
|
|
1482
1523
|
# r.match('cog') # => nil
|
|
1483
1524
|
#
|
|
1484
|
-
# For each pattern that is a string,
|
|
1525
|
+
# For each pattern that is a string, <code>Regexp.new(pattern)</code> is used:
|
|
1485
1526
|
#
|
|
1486
1527
|
# Regexp.union('penzance') # => /penzance/
|
|
1487
1528
|
# Regexp.union('a+b*c') # => /a\+b\*c/
|
|
@@ -1495,7 +1536,7 @@ class Regexp
|
|
|
1495
1536
|
# Regexp.union([/foo/i, /bar/m, /baz/x])
|
|
1496
1537
|
# # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
|
|
1497
1538
|
#
|
|
1498
|
-
# With no arguments, returns
|
|
1539
|
+
# With no arguments, returns <code>/(?!)/</code>:
|
|
1499
1540
|
#
|
|
1500
1541
|
# Regexp.union # => /(?!)/
|
|
1501
1542
|
#
|
|
@@ -1593,7 +1634,7 @@ class Regexp
|
|
|
1593
1634
|
# - regexp =~ string -> integer or nil
|
|
1594
1635
|
# -->
|
|
1595
1636
|
# Returns the integer index (in characters) of the first match for `self` and
|
|
1596
|
-
# `string`, or `nil` if none; also sets the [
|
|
1637
|
+
# `string`, or `nil` if none; also sets the [Regexp global
|
|
1597
1638
|
# variables](rdoc-ref:Regexp@Global+Variables):
|
|
1598
1639
|
#
|
|
1599
1640
|
# /at/ =~ 'input data' # => 7
|
|
@@ -1634,7 +1675,7 @@ class Regexp
|
|
|
1634
1675
|
# ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
|
|
1635
1676
|
# p foo, foo # Undefined local variables
|
|
1636
1677
|
#
|
|
1637
|
-
# A regexp interpolation,
|
|
1678
|
+
# A regexp interpolation, <code>#{}</code>, also disables the assignment:
|
|
1638
1679
|
#
|
|
1639
1680
|
# r = /(?<foo>\w+)/
|
|
1640
1681
|
# /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
|
|
@@ -1658,9 +1699,12 @@ class Regexp
|
|
|
1658
1699
|
|
|
1659
1700
|
# <!--
|
|
1660
1701
|
# rdoc-file=re.c
|
|
1661
|
-
# -
|
|
1702
|
+
# - encoding -> encoding
|
|
1662
1703
|
# -->
|
|
1663
|
-
# Returns
|
|
1704
|
+
# Returns an Encoding object that represents the encoding of `self`; see
|
|
1705
|
+
# [Encodings](rdoc-ref:encodings.rdoc).
|
|
1706
|
+
#
|
|
1707
|
+
# Related: see [Querying](rdoc-ref:String@Querying).
|
|
1664
1708
|
#
|
|
1665
1709
|
def encoding: () -> Encoding
|
|
1666
1710
|
|
|
@@ -1921,7 +1965,7 @@ class Regexp
|
|
|
1921
1965
|
# rdoc-file=re.c
|
|
1922
1966
|
# - ~ rxp -> integer or nil
|
|
1923
1967
|
# -->
|
|
1924
|
-
# Equivalent to
|
|
1968
|
+
# Equivalent to <code><i>rxp</i> =~ $_</code>:
|
|
1925
1969
|
#
|
|
1926
1970
|
# $_ = "input data"
|
|
1927
1971
|
# ~ /at/ # => 7
|