rbs 4.0.0.dev.5 → 4.0.1.dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clang-format +1 -0
- data/.github/workflows/c-check.yml +8 -4
- data/.github/workflows/comments.yml +3 -1
- data/.github/workflows/dependabot.yml +1 -1
- data/.github/workflows/ruby.yml +10 -0
- data/.github/workflows/rust.yml +95 -0
- data/CHANGELOG.md +323 -0
- data/Rakefile +12 -29
- data/Steepfile +1 -0
- data/config.yml +393 -37
- data/core/array.rbs +218 -188
- data/core/basic_object.rbs +9 -8
- data/core/class.rbs +6 -5
- data/core/comparable.rbs +45 -31
- data/core/complex.rbs +52 -40
- data/core/dir.rbs +57 -45
- data/core/encoding.rbs +5 -5
- data/core/enumerable.rbs +96 -91
- data/core/enumerator.rbs +4 -3
- data/core/errno.rbs +3 -2
- data/core/errors.rbs +31 -29
- data/core/exception.rbs +12 -12
- data/core/fiber.rbs +36 -36
- data/core/file.rbs +186 -113
- data/core/file_test.rbs +2 -2
- data/core/float.rbs +41 -32
- data/core/gc.rbs +78 -70
- data/core/hash.rbs +70 -60
- data/core/integer.rbs +32 -28
- data/core/io/buffer.rbs +36 -36
- data/core/io/wait.rbs +7 -7
- data/core/io.rbs +120 -135
- data/core/kernel.rbs +189 -139
- data/core/marshal.rbs +3 -3
- data/core/match_data.rbs +14 -12
- data/core/math.rbs +69 -67
- data/core/method.rbs +6 -6
- data/core/module.rbs +146 -85
- data/core/nil_class.rbs +4 -3
- data/core/numeric.rbs +35 -32
- data/core/object.rbs +6 -8
- data/core/object_space.rbs +11 -10
- data/core/pathname.rbs +131 -81
- data/core/proc.rbs +65 -33
- data/core/process.rbs +219 -201
- data/core/ractor.rbs +15 -11
- data/core/random.rbs +4 -3
- data/core/range.rbs +52 -47
- data/core/rational.rbs +5 -5
- data/core/rbs/unnamed/argf.rbs +58 -51
- data/core/rbs/unnamed/env_class.rbs +18 -13
- data/core/rbs/unnamed/main_class.rbs +123 -0
- data/core/rbs/unnamed/random.rbs +7 -5
- data/core/regexp.rbs +236 -197
- data/core/ruby.rbs +1 -1
- data/core/ruby_vm.rbs +32 -30
- data/core/rubygems/config_file.rbs +5 -5
- data/core/rubygems/errors.rbs +1 -1
- data/core/rubygems/requirement.rbs +5 -5
- data/core/rubygems/rubygems.rbs +5 -3
- data/core/set.rbs +17 -16
- data/core/signal.rbs +2 -2
- data/core/string.rbs +311 -292
- data/core/struct.rbs +26 -25
- data/core/symbol.rbs +25 -24
- data/core/thread.rbs +40 -34
- data/core/time.rbs +47 -42
- data/core/trace_point.rbs +34 -31
- data/core/true_class.rbs +2 -2
- data/core/unbound_method.rbs +10 -10
- data/core/warning.rbs +7 -7
- data/docs/collection.md +1 -1
- data/docs/config.md +171 -0
- data/docs/inline.md +110 -4
- data/docs/syntax.md +13 -12
- data/ext/rbs_extension/ast_translation.c +489 -135
- data/ext/rbs_extension/class_constants.c +8 -0
- data/ext/rbs_extension/class_constants.h +4 -0
- data/ext/rbs_extension/legacy_location.c +28 -51
- data/ext/rbs_extension/legacy_location.h +37 -0
- data/ext/rbs_extension/main.c +12 -20
- data/include/rbs/ast.h +423 -195
- data/include/rbs/lexer.h +2 -2
- data/include/rbs/location.h +25 -44
- data/include/rbs/parser.h +2 -2
- data/include/rbs/util/rbs_constant_pool.h +0 -3
- data/include/rbs.h +8 -0
- data/lib/rbs/ast/ruby/annotations.rb +157 -4
- data/lib/rbs/ast/ruby/members.rb +374 -22
- data/lib/rbs/cli/validate.rb +5 -60
- data/lib/rbs/collection/config/lockfile_generator.rb +6 -2
- data/lib/rbs/definition_builder.rb +60 -27
- data/lib/rbs/errors.rb +0 -11
- data/lib/rbs/inline_parser.rb +1 -1
- data/lib/rbs/parser_aux.rb +20 -7
- data/lib/rbs/prototype/helpers.rb +57 -0
- data/lib/rbs/prototype/rb.rb +1 -26
- data/lib/rbs/prototype/rbi.rb +1 -20
- data/lib/rbs/test/type_check.rb +3 -0
- data/lib/rbs/types.rb +62 -52
- data/lib/rbs/unit_test/type_assertions.rb +35 -8
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs.rb +0 -1
- data/rbs.gemspec +1 -1
- data/rust/.gitignore +1 -0
- data/rust/Cargo.lock +378 -0
- data/rust/Cargo.toml +7 -0
- data/rust/ruby-rbs/Cargo.toml +22 -0
- data/rust/ruby-rbs/build.rs +764 -0
- data/rust/ruby-rbs/examples/locations.rs +60 -0
- data/rust/ruby-rbs/src/lib.rs +1 -0
- data/rust/ruby-rbs/src/node/mod.rs +742 -0
- data/rust/ruby-rbs/tests/sanity.rs +47 -0
- data/rust/ruby-rbs/vendor/rbs/config.yml +1 -0
- data/rust/ruby-rbs-sys/Cargo.toml +23 -0
- data/rust/ruby-rbs-sys/build.rs +204 -0
- data/rust/ruby-rbs-sys/src/lib.rs +50 -0
- data/rust/ruby-rbs-sys/vendor/rbs/include +1 -0
- data/rust/ruby-rbs-sys/vendor/rbs/src +1 -0
- data/rust/ruby-rbs-sys/wrapper.h +1 -0
- data/sig/ast/ruby/annotations.rbs +191 -4
- data/sig/ast/ruby/members.rbs +21 -1
- data/sig/cli/validate.rbs +1 -6
- data/sig/definition_builder.rbs +2 -0
- data/sig/errors.rbs +0 -8
- data/sig/method_types.rbs +1 -1
- data/sig/parser.rbs +17 -13
- data/sig/prototype/helpers.rbs +2 -0
- data/sig/types.rbs +10 -11
- data/sig/unit_test/spy.rbs +0 -8
- data/sig/unit_test/type_assertions.rbs +11 -0
- data/src/ast.c +339 -161
- data/src/lexstate.c +1 -1
- data/src/location.c +7 -47
- data/src/parser.c +674 -480
- data/src/util/rbs_constant_pool.c +0 -4
- data/stdlib/bigdecimal/0/big_decimal.rbs +16 -16
- data/stdlib/cgi-escape/0/escape.rbs +4 -4
- data/stdlib/coverage/0/coverage.rbs +4 -3
- data/stdlib/date/0/date.rbs +33 -28
- data/stdlib/date/0/date_time.rbs +24 -23
- data/stdlib/did_you_mean/0/did_you_mean.rbs +17 -16
- data/stdlib/erb/0/erb.rbs +64 -53
- data/stdlib/etc/0/etc.rbs +55 -50
- data/stdlib/fileutils/0/fileutils.rbs +138 -125
- data/stdlib/forwardable/0/forwardable.rbs +10 -10
- data/stdlib/io-console/0/io-console.rbs +2 -2
- data/stdlib/json/0/json.rbs +135 -108
- data/stdlib/monitor/0/monitor.rbs +3 -3
- data/stdlib/net-http/0/net-http.rbs +159 -134
- data/stdlib/objspace/0/objspace.rbs +8 -7
- data/stdlib/open-uri/0/open-uri.rbs +8 -8
- data/stdlib/open3/0/open3.rbs +36 -35
- data/stdlib/openssl/0/openssl.rbs +144 -129
- data/stdlib/optparse/0/optparse.rbs +18 -14
- data/stdlib/pathname/0/pathname.rbs +2 -2
- data/stdlib/pp/0/pp.rbs +9 -8
- data/stdlib/prettyprint/0/prettyprint.rbs +7 -7
- data/stdlib/pstore/0/pstore.rbs +35 -30
- data/stdlib/psych/0/psych.rbs +61 -8
- data/stdlib/psych/0/store.rbs +2 -4
- data/stdlib/pty/0/pty.rbs +9 -6
- data/stdlib/random-formatter/0/random-formatter.rbs +2 -2
- data/stdlib/ripper/0/ripper.rbs +20 -17
- data/stdlib/securerandom/0/securerandom.rbs +1 -1
- data/stdlib/shellwords/0/shellwords.rbs +2 -2
- data/stdlib/socket/0/addrinfo.rbs +7 -7
- data/stdlib/socket/0/basic_socket.rbs +3 -3
- data/stdlib/socket/0/ip_socket.rbs +10 -8
- data/stdlib/socket/0/socket.rbs +10 -9
- data/stdlib/socket/0/tcp_server.rbs +1 -1
- data/stdlib/socket/0/tcp_socket.rbs +1 -1
- data/stdlib/socket/0/udp_socket.rbs +1 -1
- data/stdlib/socket/0/unix_server.rbs +1 -1
- data/stdlib/stringio/0/stringio.rbs +55 -54
- data/stdlib/strscan/0/string_scanner.rbs +46 -44
- data/stdlib/tempfile/0/tempfile.rbs +24 -20
- data/stdlib/time/0/time.rbs +7 -5
- data/stdlib/tsort/0/tsort.rbs +7 -6
- data/stdlib/uri/0/common.rbs +26 -18
- data/stdlib/uri/0/file.rbs +2 -2
- data/stdlib/uri/0/generic.rbs +2 -2
- data/stdlib/uri/0/http.rbs +2 -2
- data/stdlib/uri/0/ldap.rbs +2 -2
- data/stdlib/uri/0/mailto.rbs +3 -3
- data/stdlib/uri/0/rfc2396_parser.rbs +6 -5
- data/stdlib/zlib/0/deflate.rbs +4 -3
- data/stdlib/zlib/0/gzip_reader.rbs +4 -4
- data/stdlib/zlib/0/gzip_writer.rbs +14 -12
- data/stdlib/zlib/0/inflate.rbs +1 -1
- data/stdlib/zlib/0/need_dict.rbs +1 -1
- metadata +23 -5
- data/.github/workflows/valgrind.yml +0 -42
data/core/regexp.rbs
CHANGED
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
# # This is a very common usage.
|
|
64
64
|
# /foo/ # => /foo/
|
|
65
65
|
#
|
|
66
|
-
# * A
|
|
66
|
+
# * A <code>%r</code> regexp literal (see [%r: Regexp
|
|
67
67
|
# Literals](rdoc-ref:syntax/literals.rdoc@25r-3A+Regexp+Literals)):
|
|
68
68
|
#
|
|
69
69
|
# # Same delimiter character at beginning and end;
|
|
@@ -89,7 +89,7 @@
|
|
|
89
89
|
# 'food'.match(/foo/) # => #<MatchData "foo">
|
|
90
90
|
# 'food'.match(/bar/) # => nil
|
|
91
91
|
#
|
|
92
|
-
# ## Operator
|
|
92
|
+
# ## Operator <code>=~</code>
|
|
93
93
|
#
|
|
94
94
|
# Each of the operators Regexp#=~, String#=~, and Symbol#=~ returns an integer
|
|
95
95
|
# offset if a match was found, `nil` otherwise; each also sets [global
|
|
@@ -99,7 +99,7 @@
|
|
|
99
99
|
# 'foo bar' =~ /bar/ # => 4
|
|
100
100
|
# /baz/ =~ 'foo bar' # => nil
|
|
101
101
|
#
|
|
102
|
-
# ## Method
|
|
102
|
+
# ## Method <code>match?</code>
|
|
103
103
|
#
|
|
104
104
|
# Each of the methods Regexp#match?, String#match?, and Symbol#match? returns
|
|
105
105
|
# `true` if a match was found, `false` otherwise; none sets [global
|
|
@@ -112,22 +112,24 @@
|
|
|
112
112
|
#
|
|
113
113
|
# Certain regexp-oriented methods assign values to global variables:
|
|
114
114
|
#
|
|
115
|
-
# *
|
|
116
|
-
# *
|
|
115
|
+
# * <code>#match</code>: see [Method match](rdoc-ref:Regexp@Method+match).
|
|
116
|
+
# * <code>#=~</code>: see [Operator =~](rdoc-ref:Regexp@Operator+-3D~).
|
|
117
117
|
#
|
|
118
118
|
# The affected global variables are:
|
|
119
119
|
#
|
|
120
|
-
# *
|
|
121
|
-
# *
|
|
122
|
-
# *
|
|
123
|
-
#
|
|
124
|
-
# *
|
|
125
|
-
#
|
|
126
|
-
#
|
|
127
|
-
#
|
|
120
|
+
# * <code>$~</code>: Returns a MatchData object, or `nil`.
|
|
121
|
+
# * <code>$&</code>: Returns the matched part of the string, or `nil`.
|
|
122
|
+
# * <code>$`</code>: Returns the part of the string to the left of the match,
|
|
123
|
+
# or `nil`.
|
|
124
|
+
# * <code>$'</code>: Returns the part of the string to the right of the match,
|
|
125
|
+
# or `nil`.
|
|
126
|
+
# * <code>$+</code>: Returns the last group matched, or `nil`.
|
|
127
|
+
# * <code>$1</code>, <code>$2</code>, etc.: Returns the first, second, etc.,
|
|
128
|
+
# matched group, or `nil`. Note that <code>$0</code> is quite different; it
|
|
129
|
+
# returns the name of the currently executing program.
|
|
128
130
|
#
|
|
129
|
-
# These variables, except for
|
|
130
|
-
# MatchData@Global+variables+equivalence.
|
|
131
|
+
# These variables, except for <code>$~</code>, are shorthands for methods of
|
|
132
|
+
# <code>$~</code>. See MatchData@Global+variables+equivalence.
|
|
131
133
|
#
|
|
132
134
|
# Examples:
|
|
133
135
|
#
|
|
@@ -228,8 +230,9 @@
|
|
|
228
230
|
# see [Shorthand Character
|
|
229
231
|
# Classes](rdoc-ref:Regexp@Shorthand+Character+Classes).
|
|
230
232
|
#
|
|
231
|
-
# *
|
|
232
|
-
# a source literal, it's shorthand for matching a whitespace
|
|
233
|
+
# * <code>\s</code> in an ordinary string literal is equivalent to a space
|
|
234
|
+
# character; in a source literal, it's shorthand for matching a whitespace
|
|
235
|
+
# character.
|
|
233
236
|
# * In an ordinary string literal, these are (needlessly) escaped characters;
|
|
234
237
|
# in a source literal, they are shorthands for various matching characters:
|
|
235
238
|
#
|
|
@@ -254,16 +257,19 @@
|
|
|
254
257
|
# /[a-f]/.match('foo') # => #<MatchData "f">
|
|
255
258
|
# /[a-cd-f]/.match('foo') # => #<MatchData "f">
|
|
256
259
|
#
|
|
257
|
-
# When the first character of a character class is a caret (
|
|
258
|
-
# the class is inverted: it matches any character *except* those
|
|
260
|
+
# When the first character of a character class is a caret (<code>^</code>), the
|
|
261
|
+
# sense of the class is inverted: it matches any character *except* those
|
|
262
|
+
# specified.
|
|
259
263
|
#
|
|
260
264
|
# /[^a-eg-z]/.match('f') # => #<MatchData "f">
|
|
261
265
|
#
|
|
262
266
|
# A character class may contain another character class. By itself this isn't
|
|
263
|
-
# useful because
|
|
267
|
+
# useful because <code>[a-z[0-9]]</code> describes the same set as
|
|
268
|
+
# <code>[a-z0-9]</code>.
|
|
264
269
|
#
|
|
265
|
-
# However, character classes also support the
|
|
266
|
-
# intersection on its arguments. The two can be combined as
|
|
270
|
+
# However, character classes also support the <code>&&</code> operator, which
|
|
271
|
+
# performs set intersection on its arguments. The two can be combined as
|
|
272
|
+
# follows:
|
|
267
273
|
#
|
|
268
274
|
# /[a-w&&[^c-g]z]/ # ([a-w] AND ([^c-g] OR z))
|
|
269
275
|
#
|
|
@@ -276,59 +282,66 @@
|
|
|
276
282
|
# Each of the following metacharacters serves as a shorthand for a character
|
|
277
283
|
# class:
|
|
278
284
|
#
|
|
279
|
-
# *
|
|
285
|
+
# * <code>/./</code>: Matches any character except a newline:
|
|
280
286
|
#
|
|
281
287
|
# /./.match('foo') # => #<MatchData "f">
|
|
282
288
|
# /./.match("\n") # => nil
|
|
283
289
|
#
|
|
284
|
-
# *
|
|
285
|
-
# Mode](rdoc-ref:Regexp@Multiline+Mode):
|
|
290
|
+
# * <code>/./m</code>: Matches any character, including a newline; see
|
|
291
|
+
# [Multiline Mode](rdoc-ref:Regexp@Multiline+Mode):
|
|
286
292
|
#
|
|
287
293
|
# /./m.match("\n") # => #<MatchData "\n">
|
|
288
294
|
#
|
|
289
|
-
# *
|
|
295
|
+
# * <code>/\w/</code>: Matches a word character: equivalent to
|
|
296
|
+
# <code>[a-zA-Z0-9_]</code>:
|
|
290
297
|
#
|
|
291
298
|
# /\w/.match(' foo') # => #<MatchData "f">
|
|
292
299
|
# /\w/.match(' _') # => #<MatchData "_">
|
|
293
300
|
# /\w/.match(' ') # => nil
|
|
294
301
|
#
|
|
295
|
-
# *
|
|
302
|
+
# * <code>/\W/</code>: Matches a non-word character: equivalent to
|
|
303
|
+
# <code>[^a-zA-Z0-9_]</code>:
|
|
296
304
|
#
|
|
297
305
|
# /\W/.match(' ') # => #<MatchData " ">
|
|
298
306
|
# /\W/.match('_') # => nil
|
|
299
307
|
#
|
|
300
|
-
# *
|
|
308
|
+
# * <code>/\d/</code>: Matches a digit character: equivalent to
|
|
309
|
+
# <code>[0-9]</code>:
|
|
301
310
|
#
|
|
302
311
|
# /\d/.match('THX1138') # => #<MatchData "1">
|
|
303
312
|
# /\d/.match('foo') # => nil
|
|
304
313
|
#
|
|
305
|
-
# *
|
|
314
|
+
# * <code>/\D/</code>: Matches a non-digit character: equivalent to
|
|
315
|
+
# <code>[^0-9]</code>:
|
|
306
316
|
#
|
|
307
317
|
# /\D/.match('123Jump!') # => #<MatchData "J">
|
|
308
318
|
# /\D/.match('123') # => nil
|
|
309
319
|
#
|
|
310
|
-
# *
|
|
320
|
+
# * <code>/\h/</code>: Matches a hexdigit character: equivalent to
|
|
321
|
+
# <code>[0-9a-fA-F]</code>:
|
|
311
322
|
#
|
|
312
323
|
# /\h/.match('xyz fedcba9876543210') # => #<MatchData "f">
|
|
313
324
|
# /\h/.match('xyz') # => nil
|
|
314
325
|
#
|
|
315
|
-
# *
|
|
326
|
+
# * <code>/\H/</code>: Matches a non-hexdigit character: equivalent to
|
|
327
|
+
# <code>[^0-9a-fA-F]</code>:
|
|
316
328
|
#
|
|
317
329
|
# /\H/.match('fedcba9876543210xyz') # => #<MatchData "x">
|
|
318
330
|
# /\H/.match('fedcba9876543210') # => nil
|
|
319
331
|
#
|
|
320
|
-
# *
|
|
332
|
+
# * <code>/\s/</code>: Matches a whitespace character: equivalent to <code>/[
|
|
333
|
+
# \t\r\n\f\v]/</code>:
|
|
321
334
|
#
|
|
322
335
|
# /\s/.match('foo bar') # => #<MatchData " ">
|
|
323
336
|
# /\s/.match('foo') # => nil
|
|
324
337
|
#
|
|
325
|
-
# *
|
|
326
|
-
# \t\r\n\f\v]
|
|
338
|
+
# * <code>/\S/</code>: Matches a non-whitespace character: equivalent to
|
|
339
|
+
# <code>/[^ \t\r\n\f\v]/</code>:
|
|
327
340
|
#
|
|
328
341
|
# /\S/.match(" \t\r\n\f\v foo") # => #<MatchData "f">
|
|
329
342
|
# /\S/.match(" \t\r\n\f\v") # => nil
|
|
330
343
|
#
|
|
331
|
-
# *
|
|
344
|
+
# * <code>/\R/</code>: Matches a linebreak, platform-independently:
|
|
332
345
|
#
|
|
333
346
|
# /\R/.match("\r") # => #<MatchData "\r"> # Carriage return (CR)
|
|
334
347
|
# /\R/.match("\n") # => #<MatchData "\n"> # Newline (LF)
|
|
@@ -355,47 +368,47 @@
|
|
|
355
368
|
#
|
|
356
369
|
# Each of these anchors matches a boundary:
|
|
357
370
|
#
|
|
358
|
-
# *
|
|
371
|
+
# * <code>^</code>: Matches the beginning of a line:
|
|
359
372
|
#
|
|
360
373
|
# /^bar/.match("foo\nbar") # => #<MatchData "bar">
|
|
361
374
|
# /^ar/.match("foo\nbar") # => nil
|
|
362
375
|
#
|
|
363
|
-
# *
|
|
376
|
+
# * <code>$</code>: Matches the end of a line:
|
|
364
377
|
#
|
|
365
378
|
# /bar$/.match("foo\nbar") # => #<MatchData "bar">
|
|
366
379
|
# /ba$/.match("foo\nbar") # => nil
|
|
367
380
|
#
|
|
368
|
-
# *
|
|
381
|
+
# * <code>\A</code>: Matches the beginning of the string:
|
|
369
382
|
#
|
|
370
383
|
# /\Afoo/.match('foo bar') # => #<MatchData "foo">
|
|
371
384
|
# /\Afoo/.match(' foo bar') # => nil
|
|
372
385
|
#
|
|
373
|
-
# *
|
|
374
|
-
# it matches just before the ending newline:
|
|
386
|
+
# * <code>\Z</code>: Matches the end of the string; if string ends with a
|
|
387
|
+
# single newline, it matches just before the ending newline:
|
|
375
388
|
#
|
|
376
389
|
# /foo\Z/.match('bar foo') # => #<MatchData "foo">
|
|
377
390
|
# /foo\Z/.match('foo bar') # => nil
|
|
378
391
|
# /foo\Z/.match("bar foo\n") # => #<MatchData "foo">
|
|
379
392
|
# /foo\Z/.match("bar foo\n\n") # => nil
|
|
380
393
|
#
|
|
381
|
-
# *
|
|
394
|
+
# * <code>\z</code>: Matches the end of the string:
|
|
382
395
|
#
|
|
383
396
|
# /foo\z/.match('bar foo') # => #<MatchData "foo">
|
|
384
397
|
# /foo\z/.match('foo bar') # => nil
|
|
385
398
|
# /foo\z/.match("bar foo\n") # => nil
|
|
386
399
|
#
|
|
387
|
-
# *
|
|
388
|
-
# (
|
|
400
|
+
# * <code>\b</code>: Matches word boundary when not inside brackets; matches
|
|
401
|
+
# backspace (<code>"0x08"</code>) when inside brackets:
|
|
389
402
|
#
|
|
390
403
|
# /foo\b/.match('foo bar') # => #<MatchData "foo">
|
|
391
404
|
# /foo\b/.match('foobar') # => nil
|
|
392
405
|
#
|
|
393
|
-
# *
|
|
406
|
+
# * <code>\B</code>: Matches non-word boundary:
|
|
394
407
|
#
|
|
395
408
|
# /foo\B/.match('foobar') # => #<MatchData "foo">
|
|
396
409
|
# /foo\B/.match('foo bar') # => nil
|
|
397
410
|
#
|
|
398
|
-
# *
|
|
411
|
+
# * <code>\G</code>: Matches first matching position:
|
|
399
412
|
#
|
|
400
413
|
# In methods like String#gsub and String#scan, it changes on each iteration.
|
|
401
414
|
# It initially matches the beginning of subject, and in each following
|
|
@@ -414,26 +427,27 @@
|
|
|
414
427
|
#
|
|
415
428
|
# Lookahead anchors:
|
|
416
429
|
#
|
|
417
|
-
# *
|
|
418
|
-
# characters match *pat*, but doesn't include those characters in
|
|
419
|
-
# matched substring.
|
|
420
|
-
#
|
|
421
|
-
# * `(?!*pat*)`: Negative lookahead assertion: ensures that the following
|
|
422
|
-
# characters *do not* match *pat*, but doesn't include those characters in
|
|
430
|
+
# * <code>(?=_pat_)</code>: Positive lookahead assertion: ensures that the
|
|
431
|
+
# following characters match *pat*, but doesn't include those characters in
|
|
423
432
|
# the matched substring.
|
|
424
433
|
#
|
|
434
|
+
# * <code>(?!_pat_)</code>: Negative lookahead assertion: ensures that the
|
|
435
|
+
# following characters *do not* match *pat*, but doesn't include those
|
|
436
|
+
# characters in the matched substring.
|
|
437
|
+
#
|
|
425
438
|
# Lookbehind anchors:
|
|
426
439
|
#
|
|
427
|
-
# *
|
|
428
|
-
# characters match *pat*, but doesn't include those characters in
|
|
429
|
-
# matched substring.
|
|
440
|
+
# * <code>(?<=_pat_)</code>: Positive lookbehind assertion: ensures that the
|
|
441
|
+
# preceding characters match *pat*, but doesn't include those characters in
|
|
442
|
+
# the matched substring.
|
|
430
443
|
#
|
|
431
|
-
# *
|
|
432
|
-
# characters do not match *pat*, but doesn't include those
|
|
433
|
-
# matched substring.
|
|
444
|
+
# * <code>(?<!_pat_)</code>: Negative lookbehind assertion: ensures that the
|
|
445
|
+
# preceding characters do not match *pat*, but doesn't include those
|
|
446
|
+
# characters in the matched substring.
|
|
434
447
|
#
|
|
435
448
|
# The pattern below uses positive lookahead and positive lookbehind to match
|
|
436
|
-
# text appearing in
|
|
449
|
+
# text appearing in <code><b></code>...<code></b></code> tags without including
|
|
450
|
+
# the tags in the match:
|
|
437
451
|
#
|
|
438
452
|
# /(?<=<b>)\w+(?=<\/b>)/.match("Fortune favors the <b>bold</b>.")
|
|
439
453
|
# # => #<MatchData "bold">
|
|
@@ -443,15 +457,15 @@
|
|
|
443
457
|
#
|
|
444
458
|
# #### Match-Reset Anchor
|
|
445
459
|
#
|
|
446
|
-
# *
|
|
447
|
-
# excluded from the result. For example,
|
|
448
|
-
# almost equivalent:
|
|
460
|
+
# * <code>\K</code>: Match reset: the matched content preceding
|
|
461
|
+
# <code>\K</code> in the regexp is excluded from the result. For example,
|
|
462
|
+
# the following two regexps are almost equivalent:
|
|
449
463
|
#
|
|
450
464
|
# /ab\Kc/.match('abc') # => #<MatchData "c">
|
|
451
465
|
# /(?<=ab)c/.match('abc') # => #<MatchData "c">
|
|
452
466
|
#
|
|
453
|
-
# These match same string and
|
|
454
|
-
# is different.
|
|
467
|
+
# These match same string and <code>$&</code> equals <code>'c'</code>, while
|
|
468
|
+
# the matched position is different.
|
|
455
469
|
#
|
|
456
470
|
# As are the following two regexps:
|
|
457
471
|
#
|
|
@@ -460,9 +474,9 @@
|
|
|
460
474
|
#
|
|
461
475
|
# ### Alternation
|
|
462
476
|
#
|
|
463
|
-
# The vertical bar metacharacter (
|
|
464
|
-
# alternation: two or more subexpressions any of which may match the
|
|
465
|
-
# string.
|
|
477
|
+
# The vertical bar metacharacter (<code>|</code>) may be used within parentheses
|
|
478
|
+
# to express alternation: two or more subexpressions any of which may match the
|
|
479
|
+
# target string.
|
|
466
480
|
#
|
|
467
481
|
# Two alternatives:
|
|
468
482
|
#
|
|
@@ -494,7 +508,7 @@
|
|
|
494
508
|
#
|
|
495
509
|
# An added *quantifier* specifies how many matches are required or allowed:
|
|
496
510
|
#
|
|
497
|
-
# *
|
|
511
|
+
# * <code>*</code> - Matches zero or more times:
|
|
498
512
|
#
|
|
499
513
|
# /\w*/.match('')
|
|
500
514
|
# # => #<MatchData "">
|
|
@@ -503,39 +517,39 @@
|
|
|
503
517
|
# /\w*/.match('xyz')
|
|
504
518
|
# # => #<MatchData "xyz">
|
|
505
519
|
#
|
|
506
|
-
# *
|
|
520
|
+
# * <code>+</code> - Matches one or more times:
|
|
507
521
|
#
|
|
508
522
|
# /\w+/.match('') # => nil
|
|
509
523
|
# /\w+/.match('x') # => #<MatchData "x">
|
|
510
524
|
# /\w+/.match('xyz') # => #<MatchData "xyz">
|
|
511
525
|
#
|
|
512
|
-
# *
|
|
526
|
+
# * <code>?</code> - Matches zero or one times:
|
|
513
527
|
#
|
|
514
528
|
# /\w?/.match('') # => #<MatchData "">
|
|
515
529
|
# /\w?/.match('x') # => #<MatchData "x">
|
|
516
530
|
# /\w?/.match('xyz') # => #<MatchData "x">
|
|
517
531
|
#
|
|
518
|
-
# *
|
|
532
|
+
# * <code>{</code>*n*<code>}</code> - Matches exactly *n* times:
|
|
519
533
|
#
|
|
520
534
|
# /\w{2}/.match('') # => nil
|
|
521
535
|
# /\w{2}/.match('x') # => nil
|
|
522
536
|
# /\w{2}/.match('xyz') # => #<MatchData "xy">
|
|
523
537
|
#
|
|
524
|
-
# *
|
|
538
|
+
# * <code>{</code>*min*<code>,}</code> - Matches *min* or more times:
|
|
525
539
|
#
|
|
526
540
|
# /\w{2,}/.match('') # => nil
|
|
527
541
|
# /\w{2,}/.match('x') # => nil
|
|
528
542
|
# /\w{2,}/.match('xy') # => #<MatchData "xy">
|
|
529
543
|
# /\w{2,}/.match('xyz') # => #<MatchData "xyz">
|
|
530
544
|
#
|
|
531
|
-
# *
|
|
545
|
+
# * <code>{,</code>*max*<code>}</code> - Matches *max* or fewer times:
|
|
532
546
|
#
|
|
533
547
|
# /\w{,2}/.match('') # => #<MatchData "">
|
|
534
548
|
# /\w{,2}/.match('x') # => #<MatchData "x">
|
|
535
549
|
# /\w{,2}/.match('xyz') # => #<MatchData "xy">
|
|
536
550
|
#
|
|
537
|
-
# *
|
|
538
|
-
# times:
|
|
551
|
+
# * <code>{</code>*min*<code>,</code>*max*<code>}</code> - Matches at least
|
|
552
|
+
# *min* times and at most *max* times:
|
|
539
553
|
#
|
|
540
554
|
# /\w{1,2}/.match('') # => nil
|
|
541
555
|
# /\w{1,2}/.match('x') # => #<MatchData "x">
|
|
@@ -546,14 +560,17 @@
|
|
|
546
560
|
# Quantifier matching may be greedy, lazy, or possessive:
|
|
547
561
|
#
|
|
548
562
|
# * In *greedy* matching, as many occurrences as possible are matched while
|
|
549
|
-
# still allowing the overall match to succeed. Greedy quantifiers:
|
|
550
|
-
#
|
|
563
|
+
# still allowing the overall match to succeed. Greedy quantifiers:
|
|
564
|
+
# <code>*</code>, <code>+</code>, <code>?</code>, <code>{min, max}</code>
|
|
565
|
+
# and its variants.
|
|
551
566
|
# * In *lazy* matching, the minimum number of occurrences are matched. Lazy
|
|
552
|
-
# quantifiers:
|
|
567
|
+
# quantifiers: <code>*?</code>, <code>+?</code>, <code>??</code>,
|
|
568
|
+
# <code>{min, max}?</code> and its variants.
|
|
553
569
|
# * In *possessive* matching, once a match is found, there is no backtracking;
|
|
554
570
|
# that match is retained, even if it jeopardises the overall match.
|
|
555
|
-
# Possessive quantifiers:
|
|
556
|
-
# variants do *not* support
|
|
571
|
+
# Possessive quantifiers: <code>*+</code>, <code>++</code>, <code>?+</code>.
|
|
572
|
+
# Note that <code>{min, max}</code> and its variants do *not* support
|
|
573
|
+
# possessive matching.
|
|
557
574
|
#
|
|
558
575
|
# More:
|
|
559
576
|
#
|
|
@@ -577,8 +594,9 @@
|
|
|
577
594
|
# re.match('1943-02-04').size # => 1
|
|
578
595
|
# re.match('foo') # => nil
|
|
579
596
|
#
|
|
580
|
-
# Adding one or more pairs of parentheses,
|
|
581
|
-
# *groups*, which may result in multiple matched substrings, called
|
|
597
|
+
# Adding one or more pairs of parentheses, <code>(_subexpression_)</code>,
|
|
598
|
+
# defines *groups*, which may result in multiple matched substrings, called
|
|
599
|
+
# *captures*:
|
|
582
600
|
#
|
|
583
601
|
# re = /(\d\d\d\d)-(\d\d)-(\d\d)/
|
|
584
602
|
# re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
|
|
@@ -619,7 +637,7 @@
|
|
|
619
637
|
# have a quantifier), but its matching substring is not included among the
|
|
620
638
|
# captures.
|
|
621
639
|
#
|
|
622
|
-
# A non-capturing group begins with
|
|
640
|
+
# A non-capturing group begins with <code>?:</code> (inside the parentheses):
|
|
623
641
|
#
|
|
624
642
|
# # Don't capture the year.
|
|
625
643
|
# re = /(?:\d\d\d\d)-(\d\d)-(\d\d)/
|
|
@@ -649,12 +667,14 @@
|
|
|
649
667
|
#
|
|
650
668
|
# * For a large number of groups:
|
|
651
669
|
#
|
|
652
|
-
# * The ordinary
|
|
653
|
-
#
|
|
670
|
+
# * The ordinary <code>\_n_</code> notation applies only for *n* in range
|
|
671
|
+
# (1..9).
|
|
672
|
+
# * The <code>MatchData[_n_]</code> notation applies for any non-negative
|
|
673
|
+
# *n*.
|
|
654
674
|
#
|
|
655
|
-
# *
|
|
656
|
-
# it may not be used within the regexp itself, but may be
|
|
657
|
-
# (for example, in a substitution method call):
|
|
675
|
+
# * <code>\0</code> is a special backreference, referring to the entire
|
|
676
|
+
# matched string; it may not be used within the regexp itself, but may be
|
|
677
|
+
# used outside it (for example, in a substitution method call):
|
|
658
678
|
#
|
|
659
679
|
# 'The cat sat in the hat'.gsub(/[csh]at/, '\0s')
|
|
660
680
|
# # => "The cats sats in the hats"
|
|
@@ -662,8 +682,8 @@
|
|
|
662
682
|
# #### Named Captures
|
|
663
683
|
#
|
|
664
684
|
# As seen above, a capture can be referred to by its number. A capture can also
|
|
665
|
-
# have a name, prefixed as
|
|
666
|
-
# may be used as an index in
|
|
685
|
+
# have a name, prefixed as <code>?<_name_></code> or <code>?'_name_'</code>, and
|
|
686
|
+
# the name (symbolized) may be used as an index in <code>MatchData[]</code>:
|
|
667
687
|
#
|
|
668
688
|
# md = /\$(?<dollars>\d+)\.(?'cents'\d+)/.match("$3.67")
|
|
669
689
|
# # => #<MatchData "$3.67" dollars:"3" cents:"67">
|
|
@@ -677,14 +697,14 @@
|
|
|
677
697
|
# /\$(?<dollars>\d+)\.(\d+)/.match("$3.67")
|
|
678
698
|
# # => #<MatchData "$3.67" dollars:"3">
|
|
679
699
|
#
|
|
680
|
-
# A named group may be backreferenced as
|
|
700
|
+
# A named group may be backreferenced as <code>\k<_name_></code>:
|
|
681
701
|
#
|
|
682
702
|
# /(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy')
|
|
683
703
|
# # => #<MatchData "ototo" vowel:"o">
|
|
684
704
|
#
|
|
685
705
|
# When (and only when) a regexp contains named capture groups and appears before
|
|
686
|
-
# the
|
|
687
|
-
# with corresponding names:
|
|
706
|
+
# the <code>=~</code> operator, the captured substrings are assigned to local
|
|
707
|
+
# variables with corresponding names:
|
|
688
708
|
#
|
|
689
709
|
# /\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ '$3.67'
|
|
690
710
|
# dollars # => "3"
|
|
@@ -695,7 +715,8 @@
|
|
|
695
715
|
#
|
|
696
716
|
# #### Atomic Grouping
|
|
697
717
|
#
|
|
698
|
-
# A group may be made *atomic* with
|
|
718
|
+
# A group may be made *atomic* with
|
|
719
|
+
# <code>(?></code>*subexpression*<code>)</code>.
|
|
699
720
|
#
|
|
700
721
|
# This causes the subexpression to be matched independently of the rest of the
|
|
701
722
|
# expression, so that the matched substring becomes fixed for the remainder of
|
|
@@ -712,19 +733,19 @@
|
|
|
712
733
|
#
|
|
713
734
|
# Analysis:
|
|
714
735
|
#
|
|
715
|
-
# 1. The leading subexpression
|
|
716
|
-
#
|
|
717
|
-
# 2. The next subexpression
|
|
718
|
-
# the trailing double-quote).
|
|
736
|
+
# 1. The leading subexpression <code>"</code> in the pattern matches the first
|
|
737
|
+
# character <code>"</code> in the target string.
|
|
738
|
+
# 2. The next subexpression <code>.*</code> matches the next substring
|
|
739
|
+
# <code>Quote"</code> (including the trailing double-quote).
|
|
719
740
|
# 3. Now there is nothing left in the target string to match the trailing
|
|
720
|
-
# subexpression
|
|
721
|
-
# fail.
|
|
741
|
+
# subexpression <code>"</code> in the pattern; this would cause the overall
|
|
742
|
+
# match to fail.
|
|
722
743
|
# 4. The matched substring is backtracked by one position: `Quote`.
|
|
723
|
-
# 5. The final subexpression
|
|
724
|
-
# overall match succeeds.
|
|
744
|
+
# 5. The final subexpression <code>"</code> now matches the final substring
|
|
745
|
+
# <code>"</code>, and the overall match succeeds.
|
|
725
746
|
#
|
|
726
|
-
# If subexpression
|
|
727
|
-
# the overall match fails:
|
|
747
|
+
# If subexpression <code>.*</code> is grouped atomically, the backtracking is
|
|
748
|
+
# disabled, and the overall match fails:
|
|
728
749
|
#
|
|
729
750
|
# /"(?>.*)"/.match('"Quote"') # => nil
|
|
730
751
|
#
|
|
@@ -733,9 +754,10 @@
|
|
|
733
754
|
#
|
|
734
755
|
# #### Subexpression Calls
|
|
735
756
|
#
|
|
736
|
-
# As seen above, a backreference number (
|
|
737
|
-
# access to a captured *substring*; the
|
|
738
|
-
# also be accessed, via the number
|
|
757
|
+
# As seen above, a backreference number (<code>\_n_</code>) or name
|
|
758
|
+
# (<code>\k<_name_></code>) gives access to a captured *substring*; the
|
|
759
|
+
# corresponding regexp *subexpression* may also be accessed, via the number
|
|
760
|
+
# (<code>\g<i>n</i></code>) or name (<code>\g<_name_></code>):
|
|
739
761
|
#
|
|
740
762
|
# /\A(?<paren>\(\g<paren>*\))*\z/.match('(())')
|
|
741
763
|
# # ^1
|
|
@@ -753,15 +775,15 @@
|
|
|
753
775
|
#
|
|
754
776
|
# 1. Matches at the beginning of the string, i.e. before the first character.
|
|
755
777
|
# 2. Enters a named group `paren`.
|
|
756
|
-
# 3. Matches the first character in the string,
|
|
778
|
+
# 3. Matches the first character in the string, <code>'('</code>.
|
|
757
779
|
# 4. Calls the `paren` group again, i.e. recurses back to the second step.
|
|
758
780
|
# 5. Re-enters the `paren` group.
|
|
759
|
-
# 6. Matches the second character in the string,
|
|
781
|
+
# 6. Matches the second character in the string, <code>'('</code>.
|
|
760
782
|
# 7. Attempts to call `paren` a third time, but fails because doing so would
|
|
761
783
|
# prevent an overall successful match.
|
|
762
|
-
# 8. Matches the third character in the string,
|
|
763
|
-
# second recursive call
|
|
764
|
-
# 9. Matches the fourth character in the string,
|
|
784
|
+
# 8. Matches the third character in the string, <code>')'</code>; marks the end
|
|
785
|
+
# of the second recursive call
|
|
786
|
+
# 9. Matches the fourth character in the string, <code>')'</code>.
|
|
765
787
|
# 10. Matches the end of the string.
|
|
766
788
|
#
|
|
767
789
|
# See [Subexpression
|
|
@@ -770,12 +792,13 @@
|
|
|
770
792
|
#
|
|
771
793
|
# #### Conditionals
|
|
772
794
|
#
|
|
773
|
-
# The conditional construct takes the form
|
|
795
|
+
# The conditional construct takes the form <code>(?(_cond_)_yes_|_no_)</code>,
|
|
796
|
+
# where:
|
|
774
797
|
#
|
|
775
798
|
# * *cond* may be a capture number or name.
|
|
776
799
|
# * The match to be applied is *yes* if *cond* is captured; otherwise the
|
|
777
800
|
# match to be applied is *no*.
|
|
778
|
-
# * If not needed,
|
|
801
|
+
# * If not needed, <code>|_no_</code> may be omitted.
|
|
779
802
|
#
|
|
780
803
|
# Examples:
|
|
781
804
|
#
|
|
@@ -804,19 +827,20 @@
|
|
|
804
827
|
#
|
|
805
828
|
# #### Unicode Properties
|
|
806
829
|
#
|
|
807
|
-
# The
|
|
808
|
-
# using a Unicode property name, much like a character class;
|
|
809
|
-
# specifies alphabetic characters:
|
|
830
|
+
# The <code>/\p{_property_name_}/</code> construct (with lowercase `p`) matches
|
|
831
|
+
# characters using a Unicode property name, much like a character class;
|
|
832
|
+
# property `Alpha` specifies alphabetic characters:
|
|
810
833
|
#
|
|
811
834
|
# /\p{Alpha}/.match('a') # => #<MatchData "a">
|
|
812
835
|
# /\p{Alpha}/.match('1') # => nil
|
|
813
836
|
#
|
|
814
|
-
# A property can be inverted by prefixing the name with a caret character
|
|
837
|
+
# A property can be inverted by prefixing the name with a caret character
|
|
838
|
+
# (<code>^</code>):
|
|
815
839
|
#
|
|
816
840
|
# /\p{^Alpha}/.match('1') # => #<MatchData "1">
|
|
817
841
|
# /\p{^Alpha}/.match('a') # => nil
|
|
818
842
|
#
|
|
819
|
-
# Or by using
|
|
843
|
+
# Or by using <code>\P</code> (uppercase `P`):
|
|
820
844
|
#
|
|
821
845
|
# /\P{Alpha}/.match('1') # => #<MatchData "1">
|
|
822
846
|
# /\P{Alpha}/.match('a') # => nil
|
|
@@ -826,28 +850,30 @@
|
|
|
826
850
|
#
|
|
827
851
|
# Some commonly-used properties correspond to POSIX bracket expressions:
|
|
828
852
|
#
|
|
829
|
-
# *
|
|
830
|
-
# *
|
|
831
|
-
# *
|
|
832
|
-
# *
|
|
833
|
-
# *
|
|
834
|
-
# *
|
|
835
|
-
# *
|
|
836
|
-
#
|
|
837
|
-
# *
|
|
838
|
-
#
|
|
839
|
-
#
|
|
840
|
-
# *
|
|
853
|
+
# * <code>/\p{Alnum}/</code>: Alphabetic and numeric character
|
|
854
|
+
# * <code>/\p{Alpha}/</code>: Alphabetic character
|
|
855
|
+
# * <code>/\p{Blank}/</code>: Space or tab
|
|
856
|
+
# * <code>/\p{Cntrl}/</code>: Control character
|
|
857
|
+
# * <code>/\p{Digit}/</code>: Digit characters, and similar)
|
|
858
|
+
# * <code>/\p{Lower}/</code>: Lowercase alphabetical character
|
|
859
|
+
# * <code>/\p{Print}/</code>: Like <code>\p{Graph}</code>, but includes the
|
|
860
|
+
# space character
|
|
861
|
+
# * <code>/\p{Punct}/</code>: Punctuation character
|
|
862
|
+
# * <code>/\p{Space}/</code>: Whitespace character (<code>[:blank:]</code>,
|
|
863
|
+
# newline, carriage return, etc.)
|
|
864
|
+
# * <code>/\p{Upper}/</code>: Uppercase alphabetical
|
|
865
|
+
# * <code>/\p{XDigit}/</code>: Digit allowed in a hexadecimal number (i.e.,
|
|
866
|
+
# 0-9a-fA-F)
|
|
841
867
|
#
|
|
842
868
|
# These are also commonly used:
|
|
843
869
|
#
|
|
844
|
-
# *
|
|
845
|
-
# *
|
|
846
|
-
# that invisible characters under the
|
|
847
|
-
# ["Format"](https://www.compart.com/en/unicode/category/Cf)
|
|
848
|
-
# included.
|
|
849
|
-
# *
|
|
850
|
-
# below) or having one of these Unicode properties:
|
|
870
|
+
# * <code>/\p{Emoji}/</code>: Unicode emoji.
|
|
871
|
+
# * <code>/\p{Graph}/</code>: Characters excluding <code>/\p{Cntrl}/</code>
|
|
872
|
+
# and <code>/\p{Space}/</code>. Note that invisible characters under the
|
|
873
|
+
# Unicode ["Format"](https://www.compart.com/en/unicode/category/Cf)
|
|
874
|
+
# category are included.
|
|
875
|
+
# * <code>/\p{Word}/</code>: A member in one of these Unicode character
|
|
876
|
+
# categories (see below) or having one of these Unicode properties:
|
|
851
877
|
#
|
|
852
878
|
# * Unicode categories:
|
|
853
879
|
# * `Mark` (`M`).
|
|
@@ -858,9 +884,10 @@
|
|
|
858
884
|
# * `Alpha`
|
|
859
885
|
# * `Join_Control`
|
|
860
886
|
#
|
|
861
|
-
# *
|
|
862
|
-
# *
|
|
863
|
-
#
|
|
887
|
+
# * <code>/\p{ASCII}/</code>: A character in the ASCII character set.
|
|
888
|
+
# * <code>/\p{Any}/</code>: Any Unicode character (including unassigned
|
|
889
|
+
# characters).
|
|
890
|
+
# * <code>/\p{Assigned}/</code>: An assigned character.
|
|
864
891
|
#
|
|
865
892
|
# #### Unicode Character Categories
|
|
866
893
|
#
|
|
@@ -950,68 +977,73 @@
|
|
|
950
977
|
# expressions provide a portable alternative to the above, with the added
|
|
951
978
|
# benefit of encompassing non-ASCII characters:
|
|
952
979
|
#
|
|
953
|
-
# *
|
|
954
|
-
# *
|
|
955
|
-
# (`Nd`) category; see below.
|
|
980
|
+
# * <code>/\d/</code> matches only ASCII decimal digits `0` through `9`.
|
|
981
|
+
# * <code>/[[:digit:]]/</code> matches any character in the Unicode `Decimal
|
|
982
|
+
# Number` (`Nd`) category; see below.
|
|
956
983
|
#
|
|
957
984
|
# The POSIX bracket expressions:
|
|
958
985
|
#
|
|
959
|
-
# *
|
|
986
|
+
# * <code>/[[:digit:]]/</code>: Matches a [Unicode
|
|
960
987
|
# digit](https://www.compart.com/en/unicode/category/Nd):
|
|
961
988
|
#
|
|
962
989
|
# /[[:digit:]]/.match('9') # => #<MatchData "9">
|
|
963
990
|
# /[[:digit:]]/.match("\u1fbf9") # => #<MatchData "9">
|
|
964
991
|
#
|
|
965
|
-
# *
|
|
966
|
-
# equivalent to
|
|
992
|
+
# * <code>/[[:xdigit:]]/</code>: Matches a digit allowed in a hexadecimal
|
|
993
|
+
# number; equivalent to <code>[0-9a-fA-F]</code>.
|
|
967
994
|
#
|
|
968
|
-
# *
|
|
995
|
+
# * <code>/[[:upper:]]/</code>: Matches a [Unicode uppercase
|
|
969
996
|
# letter](https://www.compart.com/en/unicode/category/Lu):
|
|
970
997
|
#
|
|
971
998
|
# /[[:upper:]]/.match('A') # => #<MatchData "A">
|
|
972
999
|
# /[[:upper:]]/.match("\u00c6") # => #<MatchData "Æ">
|
|
973
1000
|
#
|
|
974
|
-
# *
|
|
1001
|
+
# * <code>/[[:lower:]]/</code>: Matches a [Unicode lowercase
|
|
975
1002
|
# letter](https://www.compart.com/en/unicode/category/Ll):
|
|
976
1003
|
#
|
|
977
1004
|
# /[[:lower:]]/.match('a') # => #<MatchData "a">
|
|
978
1005
|
# /[[:lower:]]/.match("\u01fd") # => #<MatchData "ǽ">
|
|
979
1006
|
#
|
|
980
|
-
# *
|
|
1007
|
+
# * <code>/[[:alpha:]]/</code>: Matches <code>/[[:upper:]]/</code> or
|
|
1008
|
+
# <code>/[[:lower:]]/</code>.
|
|
981
1009
|
#
|
|
982
|
-
# *
|
|
1010
|
+
# * <code>/[[:alnum:]]/</code>: Matches <code>/[[:alpha:]]/</code> or
|
|
1011
|
+
# <code>/[[:digit:]]/</code>.
|
|
983
1012
|
#
|
|
984
|
-
# *
|
|
1013
|
+
# * <code>/[[:space:]]/</code>: Matches [Unicode space
|
|
985
1014
|
# character](https://www.compart.com/en/unicode/category/Zs):
|
|
986
1015
|
#
|
|
987
1016
|
# /[[:space:]]/.match(' ') # => #<MatchData " ">
|
|
988
1017
|
# /[[:space:]]/.match("\u2005") # => #<MatchData " ">
|
|
989
1018
|
#
|
|
990
|
-
# *
|
|
1019
|
+
# * <code>/[[:blank:]]/</code>: Matches <code>/[[:space:]]/</code> or tab
|
|
1020
|
+
# character:
|
|
991
1021
|
#
|
|
992
1022
|
# /[[:blank:]]/.match(' ') # => #<MatchData " ">
|
|
993
1023
|
# /[[:blank:]]/.match("\u2005") # => #<MatchData " ">
|
|
994
1024
|
# /[[:blank:]]/.match("\t") # => #<MatchData "\t">
|
|
995
1025
|
#
|
|
996
|
-
# *
|
|
1026
|
+
# * <code>/[[:cntrl:]]/</code>: Matches [Unicode control
|
|
997
1027
|
# character](https://www.compart.com/en/unicode/category/Cc):
|
|
998
1028
|
#
|
|
999
1029
|
# /[[:cntrl:]]/.match("\u0000") # => #<MatchData "\u0000">
|
|
1000
1030
|
# /[[:cntrl:]]/.match("\u009f") # => #<MatchData "\u009F">
|
|
1001
1031
|
#
|
|
1002
|
-
# *
|
|
1003
|
-
#
|
|
1032
|
+
# * <code>/[[:graph:]]/</code>: Matches any character except
|
|
1033
|
+
# <code>/[[:space:]]/</code> or <code>/[[:cntrl:]]/</code>.
|
|
1004
1034
|
#
|
|
1005
|
-
# *
|
|
1035
|
+
# * <code>/[[:print:]]/</code>: Matches <code>/[[:graph:]]/</code> or space
|
|
1036
|
+
# character.
|
|
1006
1037
|
#
|
|
1007
|
-
# *
|
|
1038
|
+
# * <code>/[[:punct:]]/</code>: Matches any (Unicode punctuation
|
|
1008
1039
|
# character}[https://www.compart.com/en/unicode/category/Po]:
|
|
1009
1040
|
#
|
|
1010
1041
|
# Ruby also supports these (non-POSIX) bracket expressions:
|
|
1011
1042
|
#
|
|
1012
|
-
# *
|
|
1013
|
-
#
|
|
1014
|
-
#
|
|
1043
|
+
# * <code>/[[:ascii:]]/</code>: Matches a character in the ASCII character
|
|
1044
|
+
# set.
|
|
1045
|
+
# * <code>/[[:word:]]/</code>: Matches a character in one of these Unicode
|
|
1046
|
+
# character categories or having one of these Unicode properties:
|
|
1015
1047
|
#
|
|
1016
1048
|
# * Unicode categories:
|
|
1017
1049
|
# * `Mark` (`M`).
|
|
@@ -1024,9 +1056,9 @@
|
|
|
1024
1056
|
#
|
|
1025
1057
|
# ### Comments
|
|
1026
1058
|
#
|
|
1027
|
-
# A comment may be included in a regexp pattern using the
|
|
1028
|
-
# construct, where *comment* is a
|
|
1029
|
-
# text ignored by the regexp engine:
|
|
1059
|
+
# A comment may be included in a regexp pattern using the
|
|
1060
|
+
# <code>(?#</code>*comment*<code>)</code> construct, where *comment* is a
|
|
1061
|
+
# substring that is to be ignored. arbitrary text ignored by the regexp engine:
|
|
1030
1062
|
#
|
|
1031
1063
|
# /foo(?#Ignore me)bar/.match('foobar') # => #<MatchData "foobar">
|
|
1032
1064
|
#
|
|
@@ -1038,22 +1070,26 @@
|
|
|
1038
1070
|
#
|
|
1039
1071
|
# Each of these modifiers sets a mode for the regexp:
|
|
1040
1072
|
#
|
|
1041
|
-
# * `i`:
|
|
1073
|
+
# * `i`: <code>/_pattern_/i</code> sets [Case-Insensitive
|
|
1042
1074
|
# Mode](rdoc-ref:Regexp@Case-Insensitive+Mode).
|
|
1043
|
-
# * `m`:
|
|
1044
|
-
#
|
|
1045
|
-
# * `
|
|
1075
|
+
# * `m`: <code>/_pattern_/m</code> sets [Multiline
|
|
1076
|
+
# Mode](rdoc-ref:Regexp@Multiline+Mode).
|
|
1077
|
+
# * `x`: <code>/_pattern_/x</code> sets [Extended
|
|
1078
|
+
# Mode](rdoc-ref:Regexp@Extended+Mode).
|
|
1079
|
+
# * `o`: <code>/_pattern_/o</code> sets [Interpolation
|
|
1046
1080
|
# Mode](rdoc-ref:Regexp@Interpolation+Mode).
|
|
1047
1081
|
#
|
|
1048
1082
|
# Any, all, or none of these may be applied.
|
|
1049
1083
|
#
|
|
1050
1084
|
# Modifiers `i`, `m`, and `x` may be applied to subexpressions:
|
|
1051
1085
|
#
|
|
1052
|
-
# *
|
|
1053
|
-
# *
|
|
1054
|
-
#
|
|
1055
|
-
# *
|
|
1056
|
-
# group
|
|
1086
|
+
# * <code>(?_modifier_)</code> turns the mode "on" for ensuing subexpressions
|
|
1087
|
+
# * <code>(?-_modifier_)</code> turns the mode "off" for ensuing
|
|
1088
|
+
# subexpressions
|
|
1089
|
+
# * <code>(?_modifier_:_subexp_)</code> turns the mode "on" for *subexp*
|
|
1090
|
+
# within the group
|
|
1091
|
+
# * <code>(?-_modifier_:_subexp_)</code> turns the mode "off" for *subexp*
|
|
1092
|
+
# within the group
|
|
1057
1093
|
#
|
|
1058
1094
|
# Example:
|
|
1059
1095
|
#
|
|
@@ -1088,7 +1124,8 @@
|
|
|
1088
1124
|
#
|
|
1089
1125
|
# The multiline-mode in Ruby is what is commonly called a "dot-all mode":
|
|
1090
1126
|
#
|
|
1091
|
-
# * Without the `m` modifier, the subexpression
|
|
1127
|
+
# * Without the `m` modifier, the subexpression <code>.</code> does not match
|
|
1128
|
+
# newlines:
|
|
1092
1129
|
#
|
|
1093
1130
|
# /a.c/.match("a\nc") # => nil
|
|
1094
1131
|
#
|
|
@@ -1096,16 +1133,17 @@
|
|
|
1096
1133
|
#
|
|
1097
1134
|
# /a.c/m.match("a\nc") # => #<MatchData "a\nc">
|
|
1098
1135
|
#
|
|
1099
|
-
# Unlike other languages, the modifier `m` does not affect the anchors
|
|
1100
|
-
#
|
|
1136
|
+
# Unlike other languages, the modifier `m` does not affect the anchors
|
|
1137
|
+
# <code>^</code> and <code>$</code>. These anchors always match at
|
|
1138
|
+
# line-boundaries in Ruby.
|
|
1101
1139
|
#
|
|
1102
1140
|
# ### Extended Mode
|
|
1103
1141
|
#
|
|
1104
1142
|
# Modifier `x` enables extended mode, which means that:
|
|
1105
1143
|
#
|
|
1106
1144
|
# * Literal white space in the pattern is to be ignored.
|
|
1107
|
-
# * Character
|
|
1108
|
-
# which is also to be ignored for matching purposes.
|
|
1145
|
+
# * Character <code>#</code> marks the remainder of its containing line as a
|
|
1146
|
+
# comment, which is also to be ignored for matching purposes.
|
|
1109
1147
|
#
|
|
1110
1148
|
# In extended mode, whitespace and comments may be used to form a
|
|
1111
1149
|
# self-documented regexp.
|
|
@@ -1168,22 +1206,22 @@
|
|
|
1168
1206
|
# A regular expression containing non-US-ASCII characters is assumed to use the
|
|
1169
1207
|
# source encoding. This can be overridden with one of the following modifiers.
|
|
1170
1208
|
#
|
|
1171
|
-
# *
|
|
1172
|
-
# ASCII-8BIT:
|
|
1209
|
+
# * <code>/_pat_/n</code>: US-ASCII if only containing US-ASCII characters,
|
|
1210
|
+
# otherwise ASCII-8BIT:
|
|
1173
1211
|
#
|
|
1174
1212
|
# /foo/n.encoding # => #<Encoding:US-ASCII>
|
|
1175
1213
|
# /foo\xff/n.encoding # => #<Encoding:ASCII-8BIT>
|
|
1176
1214
|
# /foo\x7f/n.encoding # => #<Encoding:US-ASCII>
|
|
1177
1215
|
#
|
|
1178
|
-
# *
|
|
1216
|
+
# * <code>/_pat_/u</code>: UTF-8
|
|
1179
1217
|
#
|
|
1180
1218
|
# /foo/u.encoding # => #<Encoding:UTF-8>
|
|
1181
1219
|
#
|
|
1182
|
-
# *
|
|
1220
|
+
# * <code>/_pat_/e</code>: EUC-JP
|
|
1183
1221
|
#
|
|
1184
1222
|
# /foo/e.encoding # => #<Encoding:EUC-JP>
|
|
1185
1223
|
#
|
|
1186
|
-
# *
|
|
1224
|
+
# * <code>/_pat_/s</code>: Windows-31J
|
|
1187
1225
|
#
|
|
1188
1226
|
# /foo/s.encoding # => #<Encoding:Windows-31J>
|
|
1189
1227
|
#
|
|
@@ -1195,7 +1233,7 @@
|
|
|
1195
1233
|
# has a *fixed* encoding.
|
|
1196
1234
|
#
|
|
1197
1235
|
# If a match between incompatible encodings is attempted an
|
|
1198
|
-
#
|
|
1236
|
+
# <code>Encoding::CompatibilityError</code> exception is raised.
|
|
1199
1237
|
#
|
|
1200
1238
|
# Example:
|
|
1201
1239
|
#
|
|
@@ -1262,8 +1300,9 @@
|
|
|
1262
1300
|
# * No backreferences.
|
|
1263
1301
|
# * No subexpression calls.
|
|
1264
1302
|
# * No nested lookaround anchors or atomic groups.
|
|
1265
|
-
# * No nested quantifiers with counting (i.e. no nested
|
|
1266
|
-
#
|
|
1303
|
+
# * No nested quantifiers with counting (i.e. no nested <code>{n}</code>,
|
|
1304
|
+
# <code>{min,}</code>, <code>{,max}</code>, or <code>{min,max}</code> style
|
|
1305
|
+
# quantifiers)
|
|
1267
1306
|
#
|
|
1268
1307
|
# You can use method Regexp.linear_time? to determine whether a pattern meets
|
|
1269
1308
|
# these criteria:
|
|
@@ -1357,8 +1396,8 @@ class Regexp
|
|
|
1357
1396
|
# - Regexp.last_match(n) -> string or nil
|
|
1358
1397
|
# - Regexp.last_match(name) -> string or nil
|
|
1359
1398
|
# -->
|
|
1360
|
-
# With no argument, returns the value of
|
|
1361
|
-
# recent pattern match (see [Regexp global
|
|
1399
|
+
# With no argument, returns the value of <code>$~</code>, which is the result of
|
|
1400
|
+
# the most recent pattern match (see [Regexp global
|
|
1362
1401
|
# variables](rdoc-ref:Regexp@Global+Variables)):
|
|
1363
1402
|
#
|
|
1364
1403
|
# /c(.)t/ =~ 'cat' # => 0
|
|
@@ -1436,14 +1475,14 @@ class Regexp
|
|
|
1436
1475
|
#
|
|
1437
1476
|
# Regexp.try_convert(/re/) # => /re/
|
|
1438
1477
|
#
|
|
1439
|
-
# Otherwise if `object` responds to
|
|
1440
|
-
# returns the result.
|
|
1478
|
+
# Otherwise if `object` responds to <code>:to_regexp</code>, calls
|
|
1479
|
+
# <code>object.to_regexp</code> and returns the result.
|
|
1441
1480
|
#
|
|
1442
|
-
# Returns `nil` if `object` does not respond to
|
|
1481
|
+
# Returns `nil` if `object` does not respond to <code>:to_regexp</code>.
|
|
1443
1482
|
#
|
|
1444
1483
|
# Regexp.try_convert('re') # => nil
|
|
1445
1484
|
#
|
|
1446
|
-
# Raises an exception unless
|
|
1485
|
+
# Raises an exception unless <code>object.to_regexp</code> returns a regexp.
|
|
1447
1486
|
#
|
|
1448
1487
|
def self.try_convert: (Regexp | _ToRegexp regexp_like) -> Regexp
|
|
1449
1488
|
| (untyped other) -> Regexp?
|
|
@@ -1464,7 +1503,7 @@ class Regexp
|
|
|
1464
1503
|
# It sets the default timeout interval for Regexp matching in second. `nil`
|
|
1465
1504
|
# means no default timeout configuration. This configuration is process-global.
|
|
1466
1505
|
# If you want to set timeout for each Regexp, use `timeout` keyword for
|
|
1467
|
-
#
|
|
1506
|
+
# <code>Regexp.new</code>.
|
|
1468
1507
|
#
|
|
1469
1508
|
# Regexp.timeout = 1
|
|
1470
1509
|
# /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
|
|
@@ -1483,7 +1522,7 @@ class Regexp
|
|
|
1483
1522
|
# r.match('dog') # => #<MatchData "dog">
|
|
1484
1523
|
# r.match('cog') # => nil
|
|
1485
1524
|
#
|
|
1486
|
-
# For each pattern that is a string,
|
|
1525
|
+
# For each pattern that is a string, <code>Regexp.new(pattern)</code> is used:
|
|
1487
1526
|
#
|
|
1488
1527
|
# Regexp.union('penzance') # => /penzance/
|
|
1489
1528
|
# Regexp.union('a+b*c') # => /a\+b\*c/
|
|
@@ -1497,7 +1536,7 @@ class Regexp
|
|
|
1497
1536
|
# Regexp.union([/foo/i, /bar/m, /baz/x])
|
|
1498
1537
|
# # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
|
|
1499
1538
|
#
|
|
1500
|
-
# With no arguments, returns
|
|
1539
|
+
# With no arguments, returns <code>/(?!)/</code>:
|
|
1501
1540
|
#
|
|
1502
1541
|
# Regexp.union # => /(?!)/
|
|
1503
1542
|
#
|
|
@@ -1595,7 +1634,7 @@ class Regexp
|
|
|
1595
1634
|
# - regexp =~ string -> integer or nil
|
|
1596
1635
|
# -->
|
|
1597
1636
|
# Returns the integer index (in characters) of the first match for `self` and
|
|
1598
|
-
# `string`, or `nil` if none; also sets the [
|
|
1637
|
+
# `string`, or `nil` if none; also sets the [Regexp global
|
|
1599
1638
|
# variables](rdoc-ref:Regexp@Global+Variables):
|
|
1600
1639
|
#
|
|
1601
1640
|
# /at/ =~ 'input data' # => 7
|
|
@@ -1636,7 +1675,7 @@ class Regexp
|
|
|
1636
1675
|
# ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
|
|
1637
1676
|
# p foo, foo # Undefined local variables
|
|
1638
1677
|
#
|
|
1639
|
-
# A regexp interpolation,
|
|
1678
|
+
# A regexp interpolation, <code>#{}</code>, also disables the assignment:
|
|
1640
1679
|
#
|
|
1641
1680
|
# r = /(?<foo>\w+)/
|
|
1642
1681
|
# /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
|
|
@@ -1926,7 +1965,7 @@ class Regexp
|
|
|
1926
1965
|
# rdoc-file=re.c
|
|
1927
1966
|
# - ~ rxp -> integer or nil
|
|
1928
1967
|
# -->
|
|
1929
|
-
# Equivalent to
|
|
1968
|
+
# Equivalent to <code><i>rxp</i> =~ $_</code>:
|
|
1930
1969
|
#
|
|
1931
1970
|
# $_ = "input data"
|
|
1932
1971
|
# ~ /at/ # => 7
|