re2 2.4.3 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +2 -0
- data/README.md +281 -192
- data/Rakefile +1 -1
- data/dependencies.yml +4 -4
- data/ext/re2/extconf.rb +250 -358
- data/ext/re2/re2.cc +505 -284
- data/ext/re2/recipes.rb +31 -20
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +11 -0
- data/lib/re2/string.rb +12 -59
- data/lib/re2/version.rb +10 -1
- data/lib/re2.rb +9 -3
- data/ports/archives/20240116.1.tar.gz +0 -0
- data/ports/archives/re2-2024-04-01.tar.gz +0 -0
- data/re2.gemspec +5 -2
- data/spec/kernel_spec.rb +10 -2
- data/spec/re2/match_data_spec.rb +98 -28
- data/spec/re2/regexp_spec.rb +546 -113
- data/spec/re2/scanner_spec.rb +26 -9
- data/spec/re2/set_spec.rb +28 -18
- data/spec/re2/string_spec.rb +2 -0
- data/spec/re2_spec.rb +34 -4
- data/spec/spec_helper.rb +2 -0
- metadata +10 -9
- data/ports/archives/20230802.1.tar.gz +0 -0
- data/ports/archives/re2-2023-11-01.tar.gz +0 -0
data/spec/re2/regexp_spec.rb
CHANGED
@@ -1,21 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
RSpec.describe RE2::Regexp do
|
2
4
|
describe "#initialize" do
|
3
5
|
it "returns an instance given only a pattern" do
|
4
6
|
re = RE2::Regexp.new('woo')
|
7
|
+
|
5
8
|
expect(re).to be_a(RE2::Regexp)
|
6
9
|
end
|
7
10
|
|
8
11
|
it "returns an instance given a pattern and options" do
|
9
|
-
re = RE2::Regexp.new('woo', :
|
12
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
13
|
+
|
10
14
|
expect(re).to be_a(RE2::Regexp)
|
11
15
|
end
|
12
16
|
|
17
|
+
it "accepts patterns containing null bytes" do
|
18
|
+
re = RE2::Regexp.new("a\0b")
|
19
|
+
|
20
|
+
expect(re.pattern).to eq("a\0b")
|
21
|
+
end
|
22
|
+
|
13
23
|
it "raises an error if given an inappropriate type" do
|
14
24
|
expect { RE2::Regexp.new(nil) }.to raise_error(TypeError)
|
15
25
|
end
|
16
26
|
|
17
27
|
it "allows invalid patterns to be created" do
|
18
|
-
re = RE2::Regexp.new('???', :
|
28
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
29
|
+
|
19
30
|
expect(re).to be_a(RE2::Regexp)
|
20
31
|
end
|
21
32
|
|
@@ -29,20 +40,28 @@ RSpec.describe RE2::Regexp do
|
|
29
40
|
describe ".compile" do
|
30
41
|
it "returns an instance given only a pattern" do
|
31
42
|
re = RE2::Regexp.compile('woo')
|
43
|
+
|
32
44
|
expect(re).to be_a(RE2::Regexp)
|
33
45
|
end
|
34
46
|
|
35
47
|
it "returns an instance given a pattern and options" do
|
36
|
-
re = RE2::Regexp.compile('woo', :
|
48
|
+
re = RE2::Regexp.compile('woo', case_sensitive: false)
|
37
49
|
expect(re).to be_a(RE2::Regexp)
|
38
50
|
end
|
39
51
|
|
52
|
+
it "accepts patterns containing null bytes" do
|
53
|
+
re = RE2::Regexp.compile("a\0b")
|
54
|
+
|
55
|
+
expect(re.pattern).to eq("a\0b")
|
56
|
+
end
|
57
|
+
|
40
58
|
it "raises an error if given an inappropriate type" do
|
41
59
|
expect { RE2::Regexp.compile(nil) }.to raise_error(TypeError)
|
42
60
|
end
|
43
61
|
|
44
62
|
it "allows invalid patterns to be created" do
|
45
|
-
re = RE2::Regexp.compile('???', :
|
63
|
+
re = RE2::Regexp.compile('???', log_errors: false)
|
64
|
+
|
46
65
|
expect(re).to be_a(RE2::Regexp)
|
47
66
|
end
|
48
67
|
|
@@ -60,34 +79,38 @@ RSpec.describe RE2::Regexp do
|
|
60
79
|
end
|
61
80
|
|
62
81
|
it "is populated with default options when nothing has been set" do
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
82
|
+
expect(RE2::Regexp.new('woo').options).to include(
|
83
|
+
utf8: true,
|
84
|
+
posix_syntax: false,
|
85
|
+
longest_match: false,
|
86
|
+
log_errors: true,
|
87
|
+
literal: false,
|
88
|
+
never_nl: false,
|
89
|
+
case_sensitive: true,
|
90
|
+
perl_classes: false,
|
91
|
+
word_boundary: false,
|
92
|
+
one_line: false
|
93
|
+
)
|
74
94
|
end
|
75
95
|
|
76
96
|
it "is populated with overridden options when specified" do
|
77
|
-
options = RE2::Regexp.new('woo', :
|
78
|
-
|
97
|
+
options = RE2::Regexp.new('woo', case_sensitive: false).options
|
98
|
+
|
99
|
+
expect(options).to include(case_sensitive: false)
|
79
100
|
end
|
80
101
|
end
|
81
102
|
|
82
103
|
describe "#error" do
|
83
104
|
it "returns nil if there is no error" do
|
84
105
|
error = RE2::Regexp.new('woo').error
|
106
|
+
|
85
107
|
expect(error).to be_nil
|
86
108
|
end
|
87
109
|
|
88
|
-
# Use log_errors
|
110
|
+
# Use log_errors: false to suppress RE2's logging to STDERR.
|
89
111
|
it "contains the error string if there is an error" do
|
90
|
-
error = RE2::Regexp.new('wo(o', :
|
112
|
+
error = RE2::Regexp.new('wo(o', log_errors: false).error
|
113
|
+
|
91
114
|
expect(error).to eq("missing ): wo(o")
|
92
115
|
end
|
93
116
|
end
|
@@ -95,11 +118,13 @@ RSpec.describe RE2::Regexp do
|
|
95
118
|
describe "#error_arg" do
|
96
119
|
it "returns nil if there is no error" do
|
97
120
|
error_arg = RE2::Regexp.new('woo').error_arg
|
121
|
+
|
98
122
|
expect(error_arg).to be_nil
|
99
123
|
end
|
100
124
|
|
101
|
-
it "returns the offending portion of the
|
102
|
-
error_arg = RE2::Regexp.new('wo(o', :
|
125
|
+
it "returns the offending portion of the pattern if there is an error" do
|
126
|
+
error_arg = RE2::Regexp.new('wo(o', log_errors: false).error_arg
|
127
|
+
|
103
128
|
expect(error_arg).to eq("wo(o")
|
104
129
|
end
|
105
130
|
end
|
@@ -112,7 +137,8 @@ RSpec.describe RE2::Regexp do
|
|
112
137
|
end
|
113
138
|
|
114
139
|
it "returns -1 for an invalid pattern" do
|
115
|
-
program_size = RE2::Regexp.new('???', :
|
140
|
+
program_size = RE2::Regexp.new('???', log_errors: false).program_size
|
141
|
+
|
116
142
|
expect(program_size).to eq(-1)
|
117
143
|
end
|
118
144
|
end
|
@@ -120,18 +146,27 @@ RSpec.describe RE2::Regexp do
|
|
120
146
|
describe "#to_str" do
|
121
147
|
it "returns the original pattern" do
|
122
148
|
string = RE2::Regexp.new('w(o)(o)').to_str
|
149
|
+
|
123
150
|
expect(string).to eq("w(o)(o)")
|
124
151
|
end
|
152
|
+
|
153
|
+
it "returns the pattern even if invalid" do
|
154
|
+
string = RE2::Regexp.new('???', log_errors: false).to_str
|
155
|
+
|
156
|
+
expect(string).to eq("???")
|
157
|
+
end
|
125
158
|
end
|
126
159
|
|
127
160
|
describe "#pattern" do
|
128
161
|
it "returns the original pattern" do
|
129
162
|
pattern = RE2::Regexp.new('w(o)(o)').pattern
|
163
|
+
|
130
164
|
expect(pattern).to eq("w(o)(o)")
|
131
165
|
end
|
132
166
|
|
133
167
|
it "returns the pattern even if invalid" do
|
134
|
-
pattern = RE2::Regexp.new('???', :
|
168
|
+
pattern = RE2::Regexp.new('???', log_errors: false).pattern
|
169
|
+
|
135
170
|
expect(pattern).to eq("???")
|
136
171
|
end
|
137
172
|
end
|
@@ -139,8 +174,15 @@ RSpec.describe RE2::Regexp do
|
|
139
174
|
describe "#inspect" do
|
140
175
|
it "shows the class name and original pattern" do
|
141
176
|
string = RE2::Regexp.new('w(o)(o)').inspect
|
177
|
+
|
142
178
|
expect(string).to eq("#<RE2::Regexp /w(o)(o)/>")
|
143
179
|
end
|
180
|
+
|
181
|
+
it "respects the pattern's original encoding" do
|
182
|
+
string = RE2::Regexp.new('w(o)(o)', utf8: false).inspect
|
183
|
+
|
184
|
+
expect(string.encoding).to eq(Encoding::ISO_8859_1)
|
185
|
+
end
|
144
186
|
end
|
145
187
|
|
146
188
|
describe "#utf8?" do
|
@@ -149,7 +191,8 @@ RSpec.describe RE2::Regexp do
|
|
149
191
|
end
|
150
192
|
|
151
193
|
it "can be overridden on initialization" do
|
152
|
-
re = RE2::Regexp.new('woo', :
|
194
|
+
re = RE2::Regexp.new('woo', utf8: false)
|
195
|
+
|
153
196
|
expect(re).to_not be_utf8
|
154
197
|
end
|
155
198
|
end
|
@@ -160,7 +203,8 @@ RSpec.describe RE2::Regexp do
|
|
160
203
|
end
|
161
204
|
|
162
205
|
it "can be overridden on initialization" do
|
163
|
-
re = RE2::Regexp.new('woo', :
|
206
|
+
re = RE2::Regexp.new('woo', posix_syntax: true)
|
207
|
+
|
164
208
|
expect(re).to be_posix_syntax
|
165
209
|
end
|
166
210
|
end
|
@@ -171,7 +215,8 @@ RSpec.describe RE2::Regexp do
|
|
171
215
|
end
|
172
216
|
|
173
217
|
it "can be overridden on initialization" do
|
174
|
-
re = RE2::Regexp.new('woo', :
|
218
|
+
re = RE2::Regexp.new('woo', literal: true)
|
219
|
+
|
175
220
|
expect(re).to be_literal
|
176
221
|
end
|
177
222
|
end
|
@@ -182,7 +227,8 @@ RSpec.describe RE2::Regexp do
|
|
182
227
|
end
|
183
228
|
|
184
229
|
it "can be overridden on initialization" do
|
185
|
-
re = RE2::Regexp.new('woo', :
|
230
|
+
re = RE2::Regexp.new('woo', never_nl: true)
|
231
|
+
|
186
232
|
expect(re).to be_never_nl
|
187
233
|
end
|
188
234
|
end
|
@@ -193,7 +239,7 @@ RSpec.describe RE2::Regexp do
|
|
193
239
|
end
|
194
240
|
|
195
241
|
it "can be overridden on initialization" do
|
196
|
-
re = RE2::Regexp.new('woo', :
|
242
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
197
243
|
expect(re).to_not be_case_sensitive
|
198
244
|
end
|
199
245
|
end
|
@@ -204,7 +250,8 @@ RSpec.describe RE2::Regexp do
|
|
204
250
|
end
|
205
251
|
|
206
252
|
it "can be overridden on initialization" do
|
207
|
-
re = RE2::Regexp.new('woo', :
|
253
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
254
|
+
|
208
255
|
expect(re).to be_case_insensitive
|
209
256
|
end
|
210
257
|
end
|
@@ -215,7 +262,8 @@ RSpec.describe RE2::Regexp do
|
|
215
262
|
end
|
216
263
|
|
217
264
|
it "can be overridden on initialization" do
|
218
|
-
re = RE2::Regexp.new('woo', :
|
265
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
266
|
+
|
219
267
|
expect(re).to be_casefold
|
220
268
|
end
|
221
269
|
end
|
@@ -226,7 +274,8 @@ RSpec.describe RE2::Regexp do
|
|
226
274
|
end
|
227
275
|
|
228
276
|
it "can be overridden on initialization" do
|
229
|
-
re = RE2::Regexp.new('woo', :
|
277
|
+
re = RE2::Regexp.new('woo', longest_match: true)
|
278
|
+
|
230
279
|
expect(re).to be_longest_match
|
231
280
|
end
|
232
281
|
end
|
@@ -237,7 +286,8 @@ RSpec.describe RE2::Regexp do
|
|
237
286
|
end
|
238
287
|
|
239
288
|
it "can be overridden on initialization" do
|
240
|
-
re = RE2::Regexp.new('woo', :
|
289
|
+
re = RE2::Regexp.new('woo', log_errors: false)
|
290
|
+
|
241
291
|
expect(re).to_not be_log_errors
|
242
292
|
end
|
243
293
|
end
|
@@ -248,7 +298,8 @@ RSpec.describe RE2::Regexp do
|
|
248
298
|
end
|
249
299
|
|
250
300
|
it "can be overridden on initialization" do
|
251
|
-
re = RE2::Regexp.new('woo', :
|
301
|
+
re = RE2::Regexp.new('woo', perl_classes: true)
|
302
|
+
|
252
303
|
expect(re).to be_perl_classes
|
253
304
|
end
|
254
305
|
end
|
@@ -259,7 +310,8 @@ RSpec.describe RE2::Regexp do
|
|
259
310
|
end
|
260
311
|
|
261
312
|
it "can be overridden on initialization" do
|
262
|
-
re = RE2::Regexp.new('woo', :
|
313
|
+
re = RE2::Regexp.new('woo', word_boundary: true)
|
314
|
+
|
263
315
|
expect(re).to be_word_boundary
|
264
316
|
end
|
265
317
|
end
|
@@ -270,7 +322,8 @@ RSpec.describe RE2::Regexp do
|
|
270
322
|
end
|
271
323
|
|
272
324
|
it "can be overridden on initialization" do
|
273
|
-
re = RE2::Regexp.new('woo', :
|
325
|
+
re = RE2::Regexp.new('woo', one_line: true)
|
326
|
+
|
274
327
|
expect(re).to be_one_line
|
275
328
|
end
|
276
329
|
end
|
@@ -281,144 +334,406 @@ RSpec.describe RE2::Regexp do
|
|
281
334
|
end
|
282
335
|
|
283
336
|
it "can be overridden on initialization" do
|
284
|
-
re = RE2::Regexp.new('woo', :
|
337
|
+
re = RE2::Regexp.new('woo', max_mem: 1024)
|
338
|
+
|
285
339
|
expect(re.max_mem).to eq(1024)
|
286
340
|
end
|
287
341
|
end
|
288
342
|
|
289
343
|
describe "#match" do
|
290
|
-
|
344
|
+
it "returns match data given only text if the pattern has capturing groups" do
|
345
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
291
346
|
|
292
|
-
|
293
|
-
md = re.match("My name is Robert Paulson")
|
294
|
-
expect(md).to be_a(RE2::MatchData)
|
347
|
+
expect(re.match("My name is Alice Bloggs")).to be_a(RE2::MatchData)
|
295
348
|
end
|
296
349
|
|
297
|
-
it "returns
|
298
|
-
|
350
|
+
it "returns only true or false given only text if the pattern has no capturing groups" do
|
351
|
+
re = RE2::Regexp.new('My name is \w+ \w+')
|
352
|
+
|
353
|
+
expect(re.match("My name is Alice Bloggs")).to eq(true)
|
299
354
|
end
|
300
355
|
|
301
|
-
it "
|
302
|
-
|
303
|
-
|
356
|
+
it "supports matching against text containing null bytes" do
|
357
|
+
re = RE2::Regexp.new("a\0b")
|
358
|
+
|
359
|
+
expect(re.match("a\0b")).to eq(true)
|
304
360
|
end
|
305
361
|
|
306
|
-
it "returns
|
307
|
-
re = RE2::Regexp.new('My name is')
|
362
|
+
it "returns nil if the text does not match the pattern" do
|
363
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
308
364
|
|
309
|
-
expect(re.match(
|
365
|
+
expect(re.match("My age is 99")).to be_nil
|
310
366
|
end
|
311
367
|
|
312
|
-
it "
|
368
|
+
it "accepts text that can be coerced to a string" do
|
369
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
370
|
+
|
371
|
+
expect(re.match(StringLike.new("My name is Alice Bloggs"))).to be_a(RE2::MatchData)
|
372
|
+
end
|
373
|
+
|
374
|
+
it "raises an exception when given text that cannot be coerced to a string" do
|
375
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
376
|
+
|
313
377
|
expect { re.match(nil) }.to raise_error(TypeError)
|
314
378
|
end
|
315
379
|
|
316
|
-
it "
|
317
|
-
|
380
|
+
it "returns nil with an invalid pattern" do
|
381
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
382
|
+
|
383
|
+
expect(re.match("My name is Alice Bloggs")).to be_nil
|
318
384
|
end
|
319
385
|
|
320
|
-
it "
|
321
|
-
|
386
|
+
it "returns nil with an invalid pattern and options" do
|
387
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
388
|
+
|
389
|
+
expect(re.match('foo bar', startpos: 1)).to be_nil
|
322
390
|
end
|
323
391
|
|
324
|
-
it "
|
325
|
-
re = RE2::Regexp.new('
|
326
|
-
|
392
|
+
it "accepts an offset at which to start matching", :aggregate_failures do
|
393
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
394
|
+
md = re.match("one two three", startpos: 4)
|
395
|
+
|
396
|
+
expect(md[1]).to eq("two")
|
397
|
+
expect(md[2]).to eq("three")
|
398
|
+
end
|
399
|
+
|
400
|
+
it "returns nil if using a starting offset past the end of the text" do
|
401
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
402
|
+
|
403
|
+
re = RE2::Regexp.new('(\w+) (\w+)', log_errors: false)
|
404
|
+
|
405
|
+
expect(re.match("one two three", startpos: 20, endpos: 21)).to be_nil
|
406
|
+
end
|
407
|
+
|
408
|
+
it "raises an exception when given a negative starting offset" do
|
409
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
410
|
+
|
411
|
+
expect { re.match("one two three", startpos: -1) }.to raise_error(ArgumentError, "startpos should be >= 0")
|
327
412
|
end
|
328
413
|
|
329
|
-
|
330
|
-
|
414
|
+
it "raises an exception when given a starting offset past the default ending offset" do
|
415
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
331
416
|
|
332
|
-
|
333
|
-
|
334
|
-
|
417
|
+
expect { re.match("one two three", startpos: 30) }.to raise_error(ArgumentError, "startpos should be <= endpos")
|
418
|
+
end
|
419
|
+
|
420
|
+
it "accepts an offset at which to end matching", :aggregate_failures do
|
421
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
422
|
+
|
423
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
424
|
+
md = re.match("one two three", endpos: 6)
|
425
|
+
|
426
|
+
expect(md[1]).to eq("one")
|
427
|
+
expect(md[2]).to eq("tw")
|
428
|
+
end
|
429
|
+
|
430
|
+
it "returns nil if using a ending offset at the start of the text" do
|
431
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
432
|
+
|
433
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
434
|
+
|
435
|
+
expect(re.match("one two three", endpos: 0)).to be_nil
|
436
|
+
end
|
437
|
+
|
438
|
+
it "raises an exception when given a negative ending offset" do
|
439
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
440
|
+
|
441
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
442
|
+
|
443
|
+
expect { re.match("one two three", endpos: -1) }.to raise_error(ArgumentError, "endpos should be >= 0")
|
444
|
+
end
|
445
|
+
|
446
|
+
it "raises an exception when given an ending offset before the starting offset" do
|
447
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
448
|
+
|
449
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
450
|
+
|
451
|
+
expect { re.match("one two three", startpos: 3, endpos: 0) }.to raise_error(ArgumentError, "startpos should be <= endpos")
|
452
|
+
end
|
453
|
+
|
454
|
+
it "raises an error if given an ending offset and RE2 does not support it" do
|
455
|
+
skip "Underlying RE2::Match has endpos argument" if RE2::Regexp.match_has_endpos_argument?
|
456
|
+
|
457
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
458
|
+
|
459
|
+
expect { re.match("one two three", endpos: 3) }.to raise_error(RE2::Regexp::UnsupportedError)
|
460
|
+
end
|
461
|
+
|
462
|
+
it "does not anchor matches by default when extracting submatches" do
|
463
|
+
re = RE2::Regexp.new('(two)')
|
464
|
+
|
465
|
+
expect(re.match("one two three")).to be_a(RE2::MatchData)
|
466
|
+
end
|
467
|
+
|
468
|
+
it "does not anchor matches by default without extracting submatches" do
|
469
|
+
re = RE2::Regexp.new('(two)')
|
470
|
+
|
471
|
+
expect(re.match("one two three", submatches: 0)).to eq(true)
|
472
|
+
end
|
473
|
+
|
474
|
+
it "can explicitly match without anchoring when extracting submatches" do
|
475
|
+
re = RE2::Regexp.new('(two)')
|
476
|
+
|
477
|
+
expect(re.match("one two three", anchor: :unanchored)).to be_a(RE2::MatchData)
|
478
|
+
end
|
479
|
+
|
480
|
+
it "can explicitly match with neither anchoring nor extracting submatches" do
|
481
|
+
re = RE2::Regexp.new('(two)')
|
335
482
|
|
336
|
-
|
337
|
-
|
338
|
-
|
483
|
+
expect(re.match("one two three", anchor: :unanchored, submatches: 0)).to eq(true)
|
484
|
+
end
|
485
|
+
|
486
|
+
it "can anchor matches at the start when extracting submatches", :aggregate_failures do
|
487
|
+
re = RE2::Regexp.new('(two)')
|
488
|
+
|
489
|
+
expect(re.match("two three", anchor: :anchor_start)).to be_a(RE2::MatchData)
|
490
|
+
expect(re.match("one two three", anchor: :anchor_start)).to be_nil
|
491
|
+
end
|
492
|
+
|
493
|
+
it "can anchor matches at the start without extracting submatches", :aggregate_failures do
|
494
|
+
re = RE2::Regexp.new('(two)')
|
495
|
+
|
496
|
+
expect(re.match("two three", anchor: :anchor_start, submatches: 0)).to eq(true)
|
497
|
+
expect(re.match("one two three", anchor: :anchor_start, submatches: 0)).to eq(false)
|
498
|
+
end
|
499
|
+
|
500
|
+
it "can anchor matches at both ends when extracting submatches", :aggregate_failures do
|
501
|
+
re = RE2::Regexp.new('(two)')
|
339
502
|
|
340
|
-
|
341
|
-
|
342
|
-
|
503
|
+
expect(re.match("two three", anchor: :anchor_both)).to be_nil
|
504
|
+
expect(re.match("two", anchor: :anchor_both)).to be_a(RE2::MatchData)
|
505
|
+
end
|
506
|
+
|
507
|
+
it "does not anchor matches when given a nil anchor" do
|
508
|
+
re = RE2::Regexp.new('(two)')
|
509
|
+
|
510
|
+
expect(re.match("one two three", anchor: nil)).to be_a(RE2::MatchData)
|
511
|
+
end
|
343
512
|
|
344
|
-
|
345
|
-
|
346
|
-
|
513
|
+
it "raises an exception when given an invalid anchor" do
|
514
|
+
re = RE2::Regexp.new('(two)')
|
515
|
+
|
516
|
+
expect { re.match("one two three", anchor: :invalid) }.to raise_error(ArgumentError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both")
|
347
517
|
end
|
348
518
|
|
349
|
-
|
350
|
-
|
519
|
+
it "raises an exception when given a non-symbol anchor" do
|
520
|
+
re = RE2::Regexp.new('(two)')
|
521
|
+
|
522
|
+
expect { re.match("one two three", anchor: 0) }.to raise_error(TypeError)
|
523
|
+
end
|
351
524
|
|
352
|
-
|
353
|
-
|
354
|
-
|
525
|
+
it "extracts all submatches by default", :aggregate_failures do
|
526
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
527
|
+
md = re.match("one two three")
|
355
528
|
|
356
|
-
|
357
|
-
|
358
|
-
|
529
|
+
expect(md[1]).to eq("one")
|
530
|
+
expect(md[2]).to eq("two")
|
531
|
+
expect(md[3]).to eq("three")
|
532
|
+
end
|
533
|
+
|
534
|
+
it "supports extracting submatches containing null bytes" do
|
535
|
+
re = RE2::Regexp.new("(a\0b)")
|
536
|
+
md = re.match("a\0bc")
|
537
|
+
|
538
|
+
expect(md[1]).to eq("a\0b")
|
539
|
+
end
|
540
|
+
|
541
|
+
it "extracts a specific number of submatches", :aggregate_failures do
|
542
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
543
|
+
md = re.match("one two three", submatches: 2)
|
544
|
+
|
545
|
+
expect(md[1]).to eq("one")
|
546
|
+
expect(md[2]).to eq("two")
|
547
|
+
expect(md[3]).to be_nil
|
548
|
+
end
|
549
|
+
|
550
|
+
it "pads submatches with nil when requesting more than the number of capturing groups" do
|
551
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
552
|
+
md = re.match("one two three", submatches: 5)
|
553
|
+
|
554
|
+
expect(md.to_a).to eq(["one two three", "one", "two", "three", nil, nil])
|
555
|
+
end
|
556
|
+
|
557
|
+
it "raises an exception when given a negative number of submatches" do
|
558
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
559
|
+
|
560
|
+
expect { re.match("one two three", submatches: -1) }.to raise_error(ArgumentError, "number of matches should be >= 0")
|
561
|
+
end
|
562
|
+
|
563
|
+
it "raises an exception when given a non-numeric number of submatches" do
|
564
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
565
|
+
|
566
|
+
expect { re.match("one two three", submatches: :invalid) }.to raise_error(TypeError)
|
567
|
+
end
|
359
568
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
end
|
569
|
+
it "defaults to extracting all submatches when given nil", :aggregate_failures do
|
570
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
571
|
+
md = re.match("one two three", submatches: nil)
|
364
572
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
573
|
+
expect(md[1]).to eq("one")
|
574
|
+
expect(md[2]).to eq("two")
|
575
|
+
expect(md[3]).to eq("three")
|
576
|
+
end
|
577
|
+
|
578
|
+
it "accepts passing the number of submatches instead of options for backward compatibility", :aggregate_failures do
|
579
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
580
|
+
md = re.match("one two three", 2)
|
581
|
+
|
582
|
+
expect(md[1]).to eq("one")
|
583
|
+
expect(md[2]).to eq("two")
|
584
|
+
expect(md[3]).to be_nil
|
585
|
+
end
|
586
|
+
|
587
|
+
it "raises an exception when given invalid options" do
|
588
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
589
|
+
|
590
|
+
expect { re.match("one two three", :invalid) }.to raise_error(TypeError)
|
591
|
+
end
|
592
|
+
|
593
|
+
it "accepts anything that can be coerced to a hash as options", :aggregate_failures do
|
594
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
595
|
+
|
596
|
+
expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
|
371
597
|
end
|
372
598
|
end
|
373
599
|
|
374
600
|
describe "#match?" do
|
375
|
-
it "returns only true or false if
|
601
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
376
602
|
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
377
|
-
|
603
|
+
|
604
|
+
expect(re.match?("My name is Alice Bloggs")).to eq(true)
|
378
605
|
expect(re.match?("My age is 99")).to eq(false)
|
379
606
|
end
|
380
607
|
|
381
608
|
it "returns false if the pattern is invalid" do
|
382
|
-
re = RE2::Regexp.new('???', :
|
383
|
-
|
609
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
610
|
+
|
611
|
+
expect(re.match?("My name is Alice Bloggs")).to eq(false)
|
612
|
+
end
|
613
|
+
|
614
|
+
it "raises an exception if text cannot be coerced to a string" do
|
615
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
616
|
+
|
617
|
+
expect { re.match?(0) }.to raise_error(TypeError)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
describe "#partial_match?" do
|
622
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
623
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
624
|
+
|
625
|
+
expect(re.partial_match?("My name is Alice Bloggs")).to eq(true)
|
626
|
+
expect(re.partial_match?("My age is 99")).to eq(false)
|
627
|
+
end
|
628
|
+
|
629
|
+
it "supports matching against text containing null bytes", :aggregate_failures do
|
630
|
+
re = RE2::Regexp.new("a\0b")
|
631
|
+
|
632
|
+
expect(re.partial_match?("a\0b")).to eq(true)
|
633
|
+
expect(re.partial_match?("ab")).to eq(false)
|
634
|
+
end
|
635
|
+
|
636
|
+
it "returns false if the pattern is invalid" do
|
637
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
638
|
+
|
639
|
+
expect(re.partial_match?("My name is Alice Bloggs")).to eq(false)
|
640
|
+
end
|
641
|
+
|
642
|
+
it "raises an exception if text cannot be coerced to a string" do
|
643
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
644
|
+
|
645
|
+
expect { re.partial_match?(0) }.to raise_error(TypeError)
|
384
646
|
end
|
385
647
|
end
|
386
648
|
|
387
649
|
describe "#=~" do
|
388
|
-
it "returns only true or false if
|
650
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
389
651
|
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
390
|
-
|
652
|
+
|
653
|
+
expect(re =~ "My name is Alice Bloggs").to eq(true)
|
391
654
|
expect(re =~ "My age is 99").to eq(false)
|
392
655
|
end
|
393
|
-
end
|
394
656
|
|
395
|
-
|
396
|
-
|
657
|
+
it "supports matching against text containing null bytes", :aggregate_failures do
|
658
|
+
re = RE2::Regexp.new("a\0b")
|
659
|
+
|
660
|
+
expect(re =~ "a\0b").to eq(true)
|
661
|
+
expect(re =~ "ab").to eq(false)
|
662
|
+
end
|
663
|
+
|
664
|
+
it "returns false if the pattern is invalid" do
|
665
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
666
|
+
|
667
|
+
expect(re =~ "My name is Alice Bloggs").to eq(false)
|
668
|
+
end
|
669
|
+
|
670
|
+
it "raises an exception if text cannot be coerced to a string" do
|
397
671
|
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
398
|
-
|
399
|
-
expect
|
672
|
+
|
673
|
+
expect { re =~ 0 }.to raise_error(TypeError)
|
400
674
|
end
|
401
675
|
end
|
402
676
|
|
403
677
|
describe "#===" do
|
404
|
-
it "returns only true or false if
|
678
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
405
679
|
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
406
|
-
|
680
|
+
|
681
|
+
expect(re === "My name is Alice Bloggs").to eq(true)
|
407
682
|
expect(re === "My age is 99").to eq(false)
|
408
683
|
end
|
684
|
+
|
685
|
+
it "returns false if the pattern is invalid" do
|
686
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
687
|
+
|
688
|
+
expect(re === "My name is Alice Bloggs").to eq(false)
|
689
|
+
end
|
690
|
+
|
691
|
+
it "raises an exception if text cannot be coerced to a string" do
|
692
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
693
|
+
|
694
|
+
expect { re === 0 }.to raise_error(TypeError)
|
695
|
+
end
|
696
|
+
end
|
697
|
+
|
698
|
+
describe "#full_match?" do
|
699
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
700
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
701
|
+
|
702
|
+
expect(re.full_match?("My name is Alice Bloggs")).to eq(true)
|
703
|
+
expect(re.full_match?("My name is Alice Bloggs and I am 99")).to eq(false)
|
704
|
+
end
|
705
|
+
|
706
|
+
it "supports matching against text containing null bytes", :aggregate_failures do
|
707
|
+
re = RE2::Regexp.new("a\0b")
|
708
|
+
|
709
|
+
expect(re.full_match?("a\0b")).to eq(true)
|
710
|
+
expect(re.full_match?("a\0bc")).to eq(false)
|
711
|
+
end
|
712
|
+
|
713
|
+
it "returns false if the pattern is invalid" do
|
714
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
715
|
+
|
716
|
+
expect(re.full_match?("My name is Alice Bloggs")).to eq(false)
|
717
|
+
end
|
718
|
+
|
719
|
+
it "raises an exception if text cannot be coerced to a string" do
|
720
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
721
|
+
|
722
|
+
expect { re.full_match?(0) }.to raise_error(TypeError)
|
723
|
+
end
|
409
724
|
end
|
410
725
|
|
411
726
|
describe "#ok?" do
|
412
|
-
it "returns true for valid
|
727
|
+
it "returns true for valid patterns", :aggregate_failures do
|
413
728
|
expect(RE2::Regexp.new('woo')).to be_ok
|
414
729
|
expect(RE2::Regexp.new('wo(o)')).to be_ok
|
415
730
|
expect(RE2::Regexp.new('((\d)\w+){3,}')).to be_ok
|
416
731
|
end
|
417
732
|
|
418
|
-
it "returns false for invalid
|
419
|
-
expect(RE2::Regexp.new('wo(o', :
|
420
|
-
expect(RE2::Regexp.new('wo[o', :
|
421
|
-
expect(RE2::Regexp.new('*', :
|
733
|
+
it "returns false for invalid patterns", :aggregate_failures do
|
734
|
+
expect(RE2::Regexp.new('wo(o', log_errors: false)).to_not be_ok
|
735
|
+
expect(RE2::Regexp.new('wo[o', log_errors: false)).to_not be_ok
|
736
|
+
expect(RE2::Regexp.new('*', log_errors: false)).to_not be_ok
|
422
737
|
end
|
423
738
|
end
|
424
739
|
|
@@ -435,14 +750,14 @@ RSpec.describe RE2::Regexp do
|
|
435
750
|
end
|
436
751
|
|
437
752
|
describe "#number_of_capturing_groups" do
|
438
|
-
it "returns the number of groups in a
|
753
|
+
it "returns the number of groups in a pattern", :aggregate_failures do
|
439
754
|
expect(RE2::Regexp.new('(a)(b)(c)').number_of_capturing_groups).to eq(3)
|
440
755
|
expect(RE2::Regexp.new('abc').number_of_capturing_groups).to eq(0)
|
441
756
|
expect(RE2::Regexp.new('a((b)c)').number_of_capturing_groups).to eq(2)
|
442
757
|
end
|
443
758
|
|
444
|
-
it "returns -1 for an invalid
|
445
|
-
expect(RE2::Regexp.new('???', :
|
759
|
+
it "returns -1 for an invalid pattern" do
|
760
|
+
expect(RE2::Regexp.new('???', log_errors: false).number_of_capturing_groups).to eq(-1)
|
446
761
|
end
|
447
762
|
end
|
448
763
|
|
@@ -453,17 +768,18 @@ RSpec.describe RE2::Regexp do
|
|
453
768
|
|
454
769
|
it "maps names to indices with only one group" do
|
455
770
|
groups = RE2::Regexp.new('(?P<bob>a)').named_capturing_groups
|
456
|
-
|
771
|
+
|
772
|
+
expect(groups).to eq("bob" => 1)
|
457
773
|
end
|
458
774
|
|
459
775
|
it "maps names to indices with several groups" do
|
460
776
|
groups = RE2::Regexp.new('(?P<bob>a)(o)(?P<rob>e)').named_capturing_groups
|
461
|
-
|
462
|
-
expect(groups
|
777
|
+
|
778
|
+
expect(groups).to eq("bob" => 1, "rob" => 3)
|
463
779
|
end
|
464
780
|
|
465
781
|
it "returns an empty hash for an invalid regexp" do
|
466
|
-
expect(RE2::Regexp.new('???', :
|
782
|
+
expect(RE2::Regexp.new('???', log_errors: false).named_capturing_groups).to be_empty
|
467
783
|
end
|
468
784
|
end
|
469
785
|
|
@@ -474,5 +790,122 @@ RSpec.describe RE2::Regexp do
|
|
474
790
|
|
475
791
|
expect(scanner).to be_a(RE2::Scanner)
|
476
792
|
end
|
793
|
+
|
794
|
+
it "raises a type error if given invalid input" do
|
795
|
+
r = RE2::Regexp.new('(\w+)')
|
796
|
+
|
797
|
+
expect { r.scan(nil) }.to raise_error(TypeError)
|
798
|
+
end
|
799
|
+
end
|
800
|
+
|
801
|
+
describe "#partial_match" do
|
802
|
+
it "matches the pattern anywhere within the given text" do
|
803
|
+
r = RE2::Regexp.new('f(o+)')
|
804
|
+
|
805
|
+
expect(r.partial_match("foo bar")).to be_a(RE2::MatchData)
|
806
|
+
end
|
807
|
+
|
808
|
+
it "returns true or false if there are no capturing groups" do
|
809
|
+
r = RE2::Regexp.new('fo+')
|
810
|
+
|
811
|
+
expect(r.partial_match("foo bar")).to eq(true)
|
812
|
+
end
|
813
|
+
|
814
|
+
it "can set the number of submatches to extract", :aggregate_failures do
|
815
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
816
|
+
m = r.partial_match("fooaa bar", submatches: 1)
|
817
|
+
|
818
|
+
expect(m[1]).to eq("oo")
|
819
|
+
expect(m[2]).to be_nil
|
820
|
+
|
821
|
+
m = r.partial_match("fooaa bar", submatches: 2)
|
822
|
+
|
823
|
+
expect(m[1]).to eq("oo")
|
824
|
+
expect(m[2]).to eq("aa")
|
825
|
+
end
|
826
|
+
|
827
|
+
it "raises an error if given non-hash options" do
|
828
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
829
|
+
|
830
|
+
expect { r.partial_match("fooaa bar", "not a hash") }.to raise_error(TypeError)
|
831
|
+
end
|
832
|
+
|
833
|
+
it "accepts options that can be coerced to a hash", :aggregate_failures do
|
834
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
835
|
+
|
836
|
+
m = r.partial_match("fooaa bar", nil)
|
837
|
+
expect(m[1]).to eq('oo')
|
838
|
+
|
839
|
+
m = r.partial_match("fooaa bar", [])
|
840
|
+
expect(m[1]).to eq('oo')
|
841
|
+
end
|
842
|
+
|
843
|
+
it "accepts anything that can be coerced to a string" do
|
844
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
845
|
+
|
846
|
+
expect(r.partial_match(StringLike.new("fooaa bar"))).to be_a(RE2::MatchData)
|
847
|
+
end
|
848
|
+
|
849
|
+
it "does not allow the anchor to be overridden" do
|
850
|
+
r = RE2::Regexp.new('(\d+)')
|
851
|
+
|
852
|
+
expect(r.partial_match('ruby:1234', anchor: :anchor_both)).to be_a(RE2::MatchData)
|
853
|
+
end
|
854
|
+
end
|
855
|
+
|
856
|
+
describe "#full_match" do
|
857
|
+
it "only matches the pattern if all of the given text matches", :aggregate_failures do
|
858
|
+
r = RE2::Regexp.new('f(o+)')
|
859
|
+
|
860
|
+
expect(r.full_match("foo")).to be_a(RE2::MatchData)
|
861
|
+
expect(r.full_match("foo bar")).to be_nil
|
862
|
+
end
|
863
|
+
|
864
|
+
it "returns true or false if there are no capturing groups" do
|
865
|
+
r = RE2::Regexp.new('fo+')
|
866
|
+
|
867
|
+
expect(r.full_match("foo")).to eq(true)
|
868
|
+
end
|
869
|
+
|
870
|
+
it "can set the number of submatches to extract", :aggregate_failures do
|
871
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
872
|
+
m = r.full_match("fooaa", submatches: 1)
|
873
|
+
|
874
|
+
expect(m[1]).to eq("oo")
|
875
|
+
expect(m[2]).to be_nil
|
876
|
+
|
877
|
+
m = r.full_match("fooaa", submatches: 2)
|
878
|
+
|
879
|
+
expect(m[1]).to eq("oo")
|
880
|
+
expect(m[2]).to eq("aa")
|
881
|
+
end
|
882
|
+
|
883
|
+
it "raises an error if given non-hash options" do
|
884
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
885
|
+
|
886
|
+
expect { r.full_match("fooaa", "not a hash") }.to raise_error(TypeError)
|
887
|
+
end
|
888
|
+
|
889
|
+
it "accepts options that can be coerced to a hash", :aggregate_failures do
|
890
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
891
|
+
|
892
|
+
m = r.full_match("fooaa", nil)
|
893
|
+
expect(m[1]).to eq("oo")
|
894
|
+
|
895
|
+
m = r.full_match("fooaa", [])
|
896
|
+
expect(m[1]).to eq("oo")
|
897
|
+
end
|
898
|
+
|
899
|
+
it "accepts anything that can be coerced to a string" do
|
900
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
901
|
+
|
902
|
+
expect(r.full_match(StringLike.new("fooaa"), submatches: 0)).to eq(true)
|
903
|
+
end
|
904
|
+
|
905
|
+
it "does not allow the anchor to be overridden" do
|
906
|
+
r = RE2::Regexp.new('(\d+)')
|
907
|
+
|
908
|
+
expect(r.full_match('ruby:1234', anchor: :unanchored)).to be_nil
|
909
|
+
end
|
477
910
|
end
|
478
911
|
end
|