re2 2.23.0 → 2.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +107 -4
- data/Rakefile +0 -4
- data/dependencies.yml +2 -2
- data/ext/re2/extconf.rb +4 -5
- data/ext/re2/re2.cc +962 -275
- data/lib/re2/string.rb +6 -6
- data/lib/re2/version.rb +1 -1
- data/ports/archives/20260107.1.tar.gz +0 -0
- data/spec/re2/match_data_spec.rb +495 -2
- data/spec/re2/regexp_spec.rb +324 -1
- data/spec/re2/scanner_spec.rb +134 -13
- data/spec/re2/set_spec.rb +75 -4
- data/spec/re2_spec.rb +217 -43
- metadata +3 -3
- data/ports/archives/20250814.1.tar.gz +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ac8a9eeb58a93e3f4241d2677a51a15016d543fd55a003e358f1d8442717c517
|
|
4
|
+
data.tar.gz: d505664cefe09a08d4c14f174791bc3f213cec515897943f12adc2867a91df6f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 31affec3aeee1a47021d3e96360b52c7dc5065d2abcbd748c35d305e6fffdd3b7a521f92bb6c97aaca67b265bb9df34867f18c5e00edf647bd8e609e093c29aa
|
|
7
|
+
data.tar.gz: 76654d86eea56b7e66102fc6780d16f94eaba95f0d2c605a46a1eece51c2158414d26edfdcfc3732b2301fe35a6fdbe8860336ce8efc1bd416d0830ee0a64e93
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Python".
|
|
|
6
6
|
|
|
7
7
|
[](https://github.com/mudge/re2/actions)
|
|
8
8
|
|
|
9
|
-
**Current version:** 2.
|
|
9
|
+
**Current version:** 2.27.0
|
|
10
10
|
**Bundled RE2 version:** libre2.11 (2025-11-05)
|
|
11
11
|
|
|
12
12
|
```ruby
|
|
@@ -27,6 +27,8 @@ RE2('(\w+):(\d+)').full_match("ruby:1234")
|
|
|
27
27
|
* [Submatch extraction](#submatch-extraction)
|
|
28
28
|
* [Scanning text incrementally](#scanning-text-incrementally)
|
|
29
29
|
* [Searching simultaneously](#searching-simultaneously)
|
|
30
|
+
* [Replacing and extracting](#replacing-and-extracting)
|
|
31
|
+
* [Escaping](#escaping)
|
|
30
32
|
* [Encoding](#encoding)
|
|
31
33
|
* [Requirements](#requirements)
|
|
32
34
|
* [Native gems](#native-gems)
|
|
@@ -165,7 +167,72 @@ m["word"] #=> "ruby"
|
|
|
165
167
|
m["number"] #=> "1234"
|
|
166
168
|
```
|
|
167
169
|
|
|
168
|
-
|
|
170
|
+
Multiple submatches can be retrieved at the same time by numeric index or name with [`values_at`](https://mudge.name/re2/RE2/MatchData.html#values_at-instance_method):
|
|
171
|
+
|
|
172
|
+
```ruby
|
|
173
|
+
m = RE2('(?P<word>\w+):(?P<number>\d+):(\d+)').full_match("ruby:1234:5678")
|
|
174
|
+
#=> #<RE2::MatchData "ruby:1234:5678" 1:"ruby" 2:"1234" 3:"5678">
|
|
175
|
+
|
|
176
|
+
m.values_at("word", :number, 3)
|
|
177
|
+
#=> ["ruby", "1234", "5678"]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
All captures can be returned as an array with [`captures`](https://mudge.name/re2/RE2/MatchData.html#captures-instance_method):
|
|
181
|
+
|
|
182
|
+
```ruby
|
|
183
|
+
m = RE2('(?P<word>\w+):(?P<number>\d+):(\d+)').full_match("ruby:1234:5678")
|
|
184
|
+
#=> #<RE2::MatchData "ruby:1234:5678" 1:"ruby" 2:"1234" 3:"5678">
|
|
185
|
+
|
|
186
|
+
m.captures #=> ["ruby", "1234", "5678"]
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Capturing group names are available on both `RE2::Regexp` and `RE2::MatchData`:
|
|
190
|
+
|
|
191
|
+
```ruby
|
|
192
|
+
re = RE2('(?P<word>\w+):(?P<number>\d+):(\d+)')
|
|
193
|
+
re.names #=> ["number", "word"]
|
|
194
|
+
|
|
195
|
+
m = re.full_match("ruby:1234:5678")
|
|
196
|
+
m.names #=> ["number", "word"]
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Named captures can be returned as a hash with [`named_captures`](https://mudge.name/re2/RE2/MatchData.html#named_captures-instance_method):
|
|
200
|
+
|
|
201
|
+
```ruby
|
|
202
|
+
m = RE2('(?P<word>\w+):(?P<number>\d+):(\d+)').full_match("ruby:1234:5678")
|
|
203
|
+
#=> #<RE2::MatchData "ruby:1234:5678" 1:"ruby" 2:"1234" 3:"5678">
|
|
204
|
+
|
|
205
|
+
m.named_captures
|
|
206
|
+
#=> {"number" => "1234", "word" => "ruby"}
|
|
207
|
+
m.named_captures(symbolize_names: true)
|
|
208
|
+
#=> {number: "1234", word: "ruby"}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
This is [also available](https://mudge.name/re2/RE2/Regexp.html#named_captures-instance_method) on the original `RE2::Regexp` but will return the corresponding numerical index for each group:
|
|
212
|
+
|
|
213
|
+
```ruby
|
|
214
|
+
re = RE2('(?P<word>\w+):(?P<number>\d+):(\d+)')
|
|
215
|
+
re.named_captures
|
|
216
|
+
#=> {"number" => 2, "word" => 1}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
The strings before and after a match can be returned with [`pre_match`](https://mudge.name/re2/RE2/MatchData.html#pre_match-instance_method) and [`post_match`](https://mudge.name/re2/RE2/MatchData.html#post_match-instance_method):
|
|
220
|
+
|
|
221
|
+
```ruby
|
|
222
|
+
m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
223
|
+
m.pre_match #=> "bob "
|
|
224
|
+
m.post_match #=> " 456"
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
The [`offset`](https://mudge.name/re2/RE2/MatchData.html#offset-instance_method) and [`match_length`](https://mudge.name/re2/RE2/MatchData.html#match_length-instance_method) of a match can be retrieved by index or name:
|
|
228
|
+
|
|
229
|
+
```ruby
|
|
230
|
+
m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
231
|
+
m.offset(1) #=> [4, 7]
|
|
232
|
+
m.match_length(1) #=> 3
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
`RE2::MatchData` objects can also be used with Ruby's [pattern matching](https://docs.ruby-lang.org/en/3.2/syntax/pattern_matching_rdoc.html):
|
|
169
236
|
|
|
170
237
|
```ruby
|
|
171
238
|
case RE2('(\w+):(\d+)').full_match("ruby:1234")
|
|
@@ -238,6 +305,42 @@ set.match("abcdefghi") #=> [0, 1, 2]
|
|
|
238
305
|
set.match("ghidefabc") #=> [2, 1, 0]
|
|
239
306
|
```
|
|
240
307
|
|
|
308
|
+
### Replacing and extracting
|
|
309
|
+
|
|
310
|
+
[`RE2.replace`](https://mudge.name/re2/RE2.html#replace-class_method) returns a copy of a given string with the first occurrence of a pattern replaced with a given rewrite string:
|
|
311
|
+
|
|
312
|
+
```ruby
|
|
313
|
+
RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
The pattern can be given as either a string or an `RE2::Regexp`:
|
|
317
|
+
|
|
318
|
+
```ruby
|
|
319
|
+
re = RE2('hel+o')
|
|
320
|
+
RE2.replace("hello there", re, "yo") #=> "yo there"
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
To replace _all_ matches and not just the first, use [`RE2.global_replace`](https://mudge.name/re2/RE2.html#global_replace-class_method):
|
|
324
|
+
|
|
325
|
+
```ruby
|
|
326
|
+
RE2.global_replace("hallo thare", "a", "e") #=> "hello there"
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
To extract matches with a given rewrite string including substitutions, use [`RE2.extract`](https://mudge.name/re2/RE2.html#extract-class_method):
|
|
330
|
+
|
|
331
|
+
```ruby
|
|
332
|
+
RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
|
|
333
|
+
#=> "example-alice"
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
### Escaping
|
|
337
|
+
|
|
338
|
+
To escape all potentially meaningful regexp characters in a string, use [`RE2.escape`](https://mudge.name/re2/RE2.html#escape-class_method):
|
|
339
|
+
|
|
340
|
+
```ruby
|
|
341
|
+
RE2.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
342
|
+
```
|
|
343
|
+
|
|
241
344
|
### Encoding
|
|
242
345
|
|
|
243
346
|
> [!WARNING]
|
|
@@ -250,10 +353,10 @@ the right encoding so this is the responsibility of the caller, e.g.
|
|
|
250
353
|
|
|
251
354
|
```ruby
|
|
252
355
|
# By default, RE2 will process patterns and text as UTF-8
|
|
253
|
-
RE2(non_utf8_pattern.encode("UTF-8")).
|
|
356
|
+
RE2(non_utf8_pattern.encode("UTF-8")).partial_match(non_utf8_text.encode("UTF-8"))
|
|
254
357
|
|
|
255
358
|
# If the :utf8 option is false, RE2 will process patterns and text as ISO-8859-1
|
|
256
|
-
RE2(non_latin1_pattern.encode("ISO-8859-1"), utf8: false).
|
|
359
|
+
RE2(non_latin1_pattern.encode("ISO-8859-1"), utf8: false).partial_match(non_latin1_text.encode("ISO-8859-1"))
|
|
257
360
|
```
|
|
258
361
|
|
|
259
362
|
## Requirements
|
data/Rakefile
CHANGED
data/dependencies.yml
CHANGED
|
@@ -3,5 +3,5 @@ libre2:
|
|
|
3
3
|
version: '2025-11-05'
|
|
4
4
|
sha256: 87f6029d2f6de8aa023654240a03ada90e876ce9a4676e258dd01ea4c26ffd67
|
|
5
5
|
abseil:
|
|
6
|
-
version: '
|
|
7
|
-
sha256:
|
|
6
|
+
version: '20260107.1'
|
|
7
|
+
sha256: 4314e2a7cbac89cac25a2f2322870f343d81579756ceff7f431803c2c9090195
|
data/ext/re2/extconf.rb
CHANGED
|
@@ -110,7 +110,7 @@ module RE2
|
|
|
110
110
|
process_recipe(abseil_recipe) do |recipe|
|
|
111
111
|
recipe.configure_options << '-DABSL_PROPAGATE_CXX_STD=ON'
|
|
112
112
|
# Workaround for https://github.com/abseil/abseil-cpp/issues/1510
|
|
113
|
-
recipe.configure_options << '-DCMAKE_CXX_FLAGS=-DABSL_FORCE_WAITER_MODE=4' if MiniPortile.windows?
|
|
113
|
+
recipe.configure_options << '-DCMAKE_CXX_FLAGS=-DABSL_FORCE_WAITER_MODE=4 -D_WIN32_WINNT=0x0601' if MiniPortile.windows?
|
|
114
114
|
end
|
|
115
115
|
|
|
116
116
|
process_recipe(re2_recipe) do |recipe|
|
|
@@ -139,7 +139,6 @@ module RE2
|
|
|
139
139
|
compile_options = +"-x c++"
|
|
140
140
|
|
|
141
141
|
have_library("stdc++")
|
|
142
|
-
have_header("stdint.h")
|
|
143
142
|
|
|
144
143
|
minimal_program = <<~SRC
|
|
145
144
|
#include <re2/re2.h>
|
|
@@ -153,7 +152,7 @@ module RE2
|
|
|
153
152
|
if re2_requires_version_flag
|
|
154
153
|
# Recent versions of RE2 depend directly on Abseil, which requires a
|
|
155
154
|
# compiler with C++17 support.
|
|
156
|
-
abort "Cannot compile re2 with your compiler: recent versions require C++17 support." unless %w[c++20 c++17 c++11
|
|
155
|
+
abort "Cannot compile re2 with your compiler: recent versions require C++17 support." unless %w[c++20 c++17 c++11].any? do |std|
|
|
157
156
|
checking_for("re2 that compiles with #{std} standard") do
|
|
158
157
|
if try_compile(minimal_program, compile_options + " -std=#{std}")
|
|
159
158
|
compile_options << " -std=#{std}"
|
|
@@ -268,8 +267,8 @@ module RE2
|
|
|
268
267
|
message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n"
|
|
269
268
|
|
|
270
269
|
recipe.host = target_host
|
|
271
|
-
# Ensure x64-mingw-ucrt
|
|
272
|
-
#
|
|
270
|
+
# Ensure x64-mingw-ucrt uses different library paths since the host is
|
|
271
|
+
# the same (x86_64-w64-mingw32).
|
|
273
272
|
recipe.target = File.join(recipe.target, target_arch) if cross_build_p
|
|
274
273
|
|
|
275
274
|
yield recipe
|