re2 2.15.0.rc1-x86-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/Gemfile +11 -0
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +28 -0
- data/README.md +396 -0
- data/Rakefile +94 -0
- data/dependencies.yml +7 -0
- data/ext/re2/extconf.rb +332 -0
- data/ext/re2/re2.cc +2254 -0
- data/ext/re2/recipes.rb +54 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +26 -0
- data/lib/re2/string.rb +38 -0
- data/lib/re2/version.rb +14 -0
- data/lib/re2.rb +20 -0
- data/re2.gemspec +47 -0
- data/spec/kernel_spec.rb +37 -0
- data/spec/re2/match_data_spec.rb +411 -0
- data/spec/re2/regexp_spec.rb +911 -0
- data/spec/re2/scanner_spec.rb +275 -0
- data/spec/re2/set_spec.rb +231 -0
- data/spec/re2/string_spec.rb +62 -0
- data/spec/re2_spec.rb +201 -0
- data/spec/spec_helper.rb +31 -0
- metadata +129 -0
@@ -0,0 +1,911 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe RE2::Regexp do
|
4
|
+
describe "#initialize" do
|
5
|
+
it "returns an instance given only a pattern" do
|
6
|
+
re = RE2::Regexp.new('woo')
|
7
|
+
|
8
|
+
expect(re).to be_a(RE2::Regexp)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "returns an instance given a pattern and options" do
|
12
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
13
|
+
|
14
|
+
expect(re).to be_a(RE2::Regexp)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "accepts patterns containing null bytes" do
|
18
|
+
re = RE2::Regexp.new("a\0b")
|
19
|
+
|
20
|
+
expect(re.pattern).to eq("a\0b")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "raises an error if given an inappropriate type" do
|
24
|
+
expect { RE2::Regexp.new(nil) }.to raise_error(TypeError)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "allows invalid patterns to be created" do
|
28
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
29
|
+
|
30
|
+
expect(re).to be_a(RE2::Regexp)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "supports passing something that can be coerced to a String as input" do
|
34
|
+
re = RE2::Regexp.new(StringLike.new('w(o)(o)'))
|
35
|
+
|
36
|
+
expect(re).to be_a(RE2::Regexp)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe ".compile" do
|
41
|
+
it "returns an instance given only a pattern" do
|
42
|
+
re = RE2::Regexp.compile('woo')
|
43
|
+
|
44
|
+
expect(re).to be_a(RE2::Regexp)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "returns an instance given a pattern and options" do
|
48
|
+
re = RE2::Regexp.compile('woo', case_sensitive: false)
|
49
|
+
expect(re).to be_a(RE2::Regexp)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "accepts patterns containing null bytes" do
|
53
|
+
re = RE2::Regexp.compile("a\0b")
|
54
|
+
|
55
|
+
expect(re.pattern).to eq("a\0b")
|
56
|
+
end
|
57
|
+
|
58
|
+
it "raises an error if given an inappropriate type" do
|
59
|
+
expect { RE2::Regexp.compile(nil) }.to raise_error(TypeError)
|
60
|
+
end
|
61
|
+
|
62
|
+
it "allows invalid patterns to be created" do
|
63
|
+
re = RE2::Regexp.compile('???', log_errors: false)
|
64
|
+
|
65
|
+
expect(re).to be_a(RE2::Regexp)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "supports passing something that can be coerced to a String as input" do
|
69
|
+
re = RE2::Regexp.compile(StringLike.new('w(o)(o)'))
|
70
|
+
|
71
|
+
expect(re).to be_a(RE2::Regexp)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe "#options" do
|
76
|
+
it "returns a hash of options" do
|
77
|
+
options = RE2::Regexp.new('woo').options
|
78
|
+
expect(options).to be_a(Hash)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "is populated with default options when nothing has been set" do
|
82
|
+
expect(RE2::Regexp.new('woo').options).to include(
|
83
|
+
utf8: true,
|
84
|
+
posix_syntax: false,
|
85
|
+
longest_match: false,
|
86
|
+
log_errors: true,
|
87
|
+
literal: false,
|
88
|
+
never_nl: false,
|
89
|
+
case_sensitive: true,
|
90
|
+
perl_classes: false,
|
91
|
+
word_boundary: false,
|
92
|
+
one_line: false
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
it "is populated with overridden options when specified" do
|
97
|
+
options = RE2::Regexp.new('woo', case_sensitive: false).options
|
98
|
+
|
99
|
+
expect(options).to include(case_sensitive: false)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "#error" do
|
104
|
+
it "returns nil if there is no error" do
|
105
|
+
error = RE2::Regexp.new('woo').error
|
106
|
+
|
107
|
+
expect(error).to be_nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# Use log_errors: false to suppress RE2's logging to STDERR.
|
111
|
+
it "contains the error string if there is an error" do
|
112
|
+
error = RE2::Regexp.new('wo(o', log_errors: false).error
|
113
|
+
|
114
|
+
expect(error).to eq("missing ): wo(o")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "#error_arg" do
|
119
|
+
it "returns nil if there is no error" do
|
120
|
+
error_arg = RE2::Regexp.new('woo').error_arg
|
121
|
+
|
122
|
+
expect(error_arg).to be_nil
|
123
|
+
end
|
124
|
+
|
125
|
+
it "returns the offending portion of the pattern if there is an error" do
|
126
|
+
error_arg = RE2::Regexp.new('wo(o', log_errors: false).error_arg
|
127
|
+
|
128
|
+
expect(error_arg).to eq("wo(o")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe "#program_size" do
|
133
|
+
it "returns a numeric value" do
|
134
|
+
program_size = RE2::Regexp.new('w(o)(o)').program_size
|
135
|
+
|
136
|
+
expect(program_size).to be_an(Integer)
|
137
|
+
end
|
138
|
+
|
139
|
+
it "returns -1 for an invalid pattern" do
|
140
|
+
program_size = RE2::Regexp.new('???', log_errors: false).program_size
|
141
|
+
|
142
|
+
expect(program_size).to eq(-1)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "#to_str" do
|
147
|
+
it "returns the original pattern" do
|
148
|
+
string = RE2::Regexp.new('w(o)(o)').to_str
|
149
|
+
|
150
|
+
expect(string).to eq("w(o)(o)")
|
151
|
+
end
|
152
|
+
|
153
|
+
it "returns the pattern even if invalid" do
|
154
|
+
string = RE2::Regexp.new('???', log_errors: false).to_str
|
155
|
+
|
156
|
+
expect(string).to eq("???")
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
describe "#pattern" do
|
161
|
+
it "returns the original pattern" do
|
162
|
+
pattern = RE2::Regexp.new('w(o)(o)').pattern
|
163
|
+
|
164
|
+
expect(pattern).to eq("w(o)(o)")
|
165
|
+
end
|
166
|
+
|
167
|
+
it "returns the pattern even if invalid" do
|
168
|
+
pattern = RE2::Regexp.new('???', log_errors: false).pattern
|
169
|
+
|
170
|
+
expect(pattern).to eq("???")
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe "#inspect" do
|
175
|
+
it "shows the class name and original pattern" do
|
176
|
+
string = RE2::Regexp.new('w(o)(o)').inspect
|
177
|
+
|
178
|
+
expect(string).to eq("#<RE2::Regexp /w(o)(o)/>")
|
179
|
+
end
|
180
|
+
|
181
|
+
it "respects the pattern's original encoding" do
|
182
|
+
string = RE2::Regexp.new('w(o)(o)', utf8: false).inspect
|
183
|
+
|
184
|
+
expect(string.encoding).to eq(Encoding::ISO_8859_1)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
describe "#utf8?" do
|
189
|
+
it "returns true by default" do
|
190
|
+
expect(RE2::Regexp.new('woo')).to be_utf8
|
191
|
+
end
|
192
|
+
|
193
|
+
it "can be overridden on initialization" do
|
194
|
+
re = RE2::Regexp.new('woo', utf8: false)
|
195
|
+
|
196
|
+
expect(re).to_not be_utf8
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
describe "#posix_syntax?" do
|
201
|
+
it "returns false by default" do
|
202
|
+
expect(RE2::Regexp.new('woo')).to_not be_posix_syntax
|
203
|
+
end
|
204
|
+
|
205
|
+
it "can be overridden on initialization" do
|
206
|
+
re = RE2::Regexp.new('woo', posix_syntax: true)
|
207
|
+
|
208
|
+
expect(re).to be_posix_syntax
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
describe "#literal?" do
|
213
|
+
it "returns false by default" do
|
214
|
+
expect(RE2::Regexp.new('woo')).to_not be_literal
|
215
|
+
end
|
216
|
+
|
217
|
+
it "can be overridden on initialization" do
|
218
|
+
re = RE2::Regexp.new('woo', literal: true)
|
219
|
+
|
220
|
+
expect(re).to be_literal
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
describe "#never_nl?" do
|
225
|
+
it "returns false by default" do
|
226
|
+
expect(RE2::Regexp.new('woo')).to_not be_never_nl
|
227
|
+
end
|
228
|
+
|
229
|
+
it "can be overridden on initialization" do
|
230
|
+
re = RE2::Regexp.new('woo', never_nl: true)
|
231
|
+
|
232
|
+
expect(re).to be_never_nl
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
describe "#case_sensitive?" do
|
237
|
+
it "returns true by default" do
|
238
|
+
expect(RE2::Regexp.new('woo')).to be_case_sensitive
|
239
|
+
end
|
240
|
+
|
241
|
+
it "can be overridden on initialization" do
|
242
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
243
|
+
expect(re).to_not be_case_sensitive
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
describe "#case_insensitive?" do
|
248
|
+
it "returns false by default" do
|
249
|
+
expect(RE2::Regexp.new('woo')).to_not be_case_insensitive
|
250
|
+
end
|
251
|
+
|
252
|
+
it "can be overridden on initialization" do
|
253
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
254
|
+
|
255
|
+
expect(re).to be_case_insensitive
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "#casefold?" do
|
260
|
+
it "returns true by default" do
|
261
|
+
expect(RE2::Regexp.new('woo')).to_not be_casefold
|
262
|
+
end
|
263
|
+
|
264
|
+
it "can be overridden on initialization" do
|
265
|
+
re = RE2::Regexp.new('woo', case_sensitive: false)
|
266
|
+
|
267
|
+
expect(re).to be_casefold
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
describe "#longest_match?" do
|
272
|
+
it "returns false by default" do
|
273
|
+
expect(RE2::Regexp.new('woo')).to_not be_casefold
|
274
|
+
end
|
275
|
+
|
276
|
+
it "can be overridden on initialization" do
|
277
|
+
re = RE2::Regexp.new('woo', longest_match: true)
|
278
|
+
|
279
|
+
expect(re).to be_longest_match
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
describe "#log_errors?" do
|
284
|
+
it "returns true by default" do
|
285
|
+
expect(RE2::Regexp.new('woo')).to be_log_errors
|
286
|
+
end
|
287
|
+
|
288
|
+
it "can be overridden on initialization" do
|
289
|
+
re = RE2::Regexp.new('woo', log_errors: false)
|
290
|
+
|
291
|
+
expect(re).to_not be_log_errors
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
describe "#perl_classes?" do
|
296
|
+
it "returns false by default" do
|
297
|
+
expect(RE2::Regexp.new('woo')).to_not be_perl_classes
|
298
|
+
end
|
299
|
+
|
300
|
+
it "can be overridden on initialization" do
|
301
|
+
re = RE2::Regexp.new('woo', perl_classes: true)
|
302
|
+
|
303
|
+
expect(re).to be_perl_classes
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
describe "#word_boundary?" do
|
308
|
+
it "returns false by default" do
|
309
|
+
expect(RE2::Regexp.new('woo')).to_not be_word_boundary
|
310
|
+
end
|
311
|
+
|
312
|
+
it "can be overridden on initialization" do
|
313
|
+
re = RE2::Regexp.new('woo', word_boundary: true)
|
314
|
+
|
315
|
+
expect(re).to be_word_boundary
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
describe "#one_line?" do
|
320
|
+
it "returns false by default" do
|
321
|
+
expect(RE2::Regexp.new('woo')).to_not be_one_line
|
322
|
+
end
|
323
|
+
|
324
|
+
it "can be overridden on initialization" do
|
325
|
+
re = RE2::Regexp.new('woo', one_line: true)
|
326
|
+
|
327
|
+
expect(re).to be_one_line
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
describe "#max_mem" do
|
332
|
+
it "returns the default max memory" do
|
333
|
+
expect(RE2::Regexp.new('woo').max_mem).to eq(8388608)
|
334
|
+
end
|
335
|
+
|
336
|
+
it "can be overridden on initialization" do
|
337
|
+
re = RE2::Regexp.new('woo', max_mem: 1024)
|
338
|
+
|
339
|
+
expect(re.max_mem).to eq(1024)
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
describe "#match" do
|
344
|
+
it "returns match data given only text if the pattern has capturing groups" do
|
345
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
346
|
+
|
347
|
+
expect(re.match("My name is Alice Bloggs")).to be_a(RE2::MatchData)
|
348
|
+
end
|
349
|
+
|
350
|
+
it "returns only true or false given only text if the pattern has no capturing groups" do
|
351
|
+
re = RE2::Regexp.new('My name is \w+ \w+')
|
352
|
+
|
353
|
+
expect(re.match("My name is Alice Bloggs")).to eq(true)
|
354
|
+
end
|
355
|
+
|
356
|
+
it "supports matching against text containing null bytes" do
|
357
|
+
re = RE2::Regexp.new("a\0b")
|
358
|
+
|
359
|
+
expect(re.match("a\0b")).to eq(true)
|
360
|
+
end
|
361
|
+
|
362
|
+
it "returns nil if the text does not match the pattern" do
|
363
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
364
|
+
|
365
|
+
expect(re.match("My age is 99")).to be_nil
|
366
|
+
end
|
367
|
+
|
368
|
+
it "accepts text that can be coerced to a string" do
|
369
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
370
|
+
|
371
|
+
expect(re.match(StringLike.new("My name is Alice Bloggs"))).to be_a(RE2::MatchData)
|
372
|
+
end
|
373
|
+
|
374
|
+
it "raises an exception when given text that cannot be coerced to a string" do
|
375
|
+
re = RE2::Regexp.new('My name is (\w+) (\w+)')
|
376
|
+
|
377
|
+
expect { re.match(nil) }.to raise_error(TypeError)
|
378
|
+
end
|
379
|
+
|
380
|
+
it "returns nil with an invalid pattern" do
|
381
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
382
|
+
|
383
|
+
expect(re.match("My name is Alice Bloggs")).to be_nil
|
384
|
+
end
|
385
|
+
|
386
|
+
it "returns nil with an invalid pattern and options" do
|
387
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
388
|
+
|
389
|
+
expect(re.match('foo bar', startpos: 1)).to be_nil
|
390
|
+
end
|
391
|
+
|
392
|
+
it "accepts an offset at which to start matching", :aggregate_failures do
|
393
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
394
|
+
md = re.match("one two three", startpos: 4)
|
395
|
+
|
396
|
+
expect(md[1]).to eq("two")
|
397
|
+
expect(md[2]).to eq("three")
|
398
|
+
end
|
399
|
+
|
400
|
+
it "returns nil if using a starting offset past the end of the text" do
|
401
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
402
|
+
|
403
|
+
re = RE2::Regexp.new('(\w+) (\w+)', log_errors: false)
|
404
|
+
|
405
|
+
expect(re.match("one two three", startpos: 20, endpos: 21)).to be_nil
|
406
|
+
end
|
407
|
+
|
408
|
+
it "raises an exception when given a negative starting offset" do
|
409
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
410
|
+
|
411
|
+
expect { re.match("one two three", startpos: -1) }.to raise_error(ArgumentError, "startpos should be >= 0")
|
412
|
+
end
|
413
|
+
|
414
|
+
it "raises an exception when given a starting offset past the default ending offset" do
|
415
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
416
|
+
|
417
|
+
expect { re.match("one two three", startpos: 30) }.to raise_error(ArgumentError, "startpos should be <= endpos")
|
418
|
+
end
|
419
|
+
|
420
|
+
it "accepts an offset at which to end matching", :aggregate_failures do
|
421
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
422
|
+
|
423
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
424
|
+
md = re.match("one two three", endpos: 6)
|
425
|
+
|
426
|
+
expect(md[1]).to eq("one")
|
427
|
+
expect(md[2]).to eq("tw")
|
428
|
+
end
|
429
|
+
|
430
|
+
it "returns nil if using a ending offset at the start of the text" do
|
431
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
432
|
+
|
433
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
434
|
+
|
435
|
+
expect(re.match("one two three", endpos: 0)).to be_nil
|
436
|
+
end
|
437
|
+
|
438
|
+
it "raises an exception when given a negative ending offset" do
|
439
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
440
|
+
|
441
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
442
|
+
|
443
|
+
expect { re.match("one two three", endpos: -1) }.to raise_error(ArgumentError, "endpos should be >= 0")
|
444
|
+
end
|
445
|
+
|
446
|
+
it "raises an exception when given an ending offset before the starting offset" do
|
447
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
448
|
+
|
449
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
450
|
+
|
451
|
+
expect { re.match("one two three", startpos: 3, endpos: 0) }.to raise_error(ArgumentError, "startpos should be <= endpos")
|
452
|
+
end
|
453
|
+
|
454
|
+
it "raises an error if given an ending offset and RE2 does not support it" do
|
455
|
+
skip "Underlying RE2::Match has endpos argument" if RE2::Regexp.match_has_endpos_argument?
|
456
|
+
|
457
|
+
re = RE2::Regexp.new('(\w+) (\w+)')
|
458
|
+
|
459
|
+
expect { re.match("one two three", endpos: 3) }.to raise_error(RE2::Regexp::UnsupportedError)
|
460
|
+
end
|
461
|
+
|
462
|
+
it "does not anchor matches by default when extracting submatches" do
|
463
|
+
re = RE2::Regexp.new('(two)')
|
464
|
+
|
465
|
+
expect(re.match("one two three")).to be_a(RE2::MatchData)
|
466
|
+
end
|
467
|
+
|
468
|
+
it "does not anchor matches by default without extracting submatches" do
|
469
|
+
re = RE2::Regexp.new('(two)')
|
470
|
+
|
471
|
+
expect(re.match("one two three", submatches: 0)).to eq(true)
|
472
|
+
end
|
473
|
+
|
474
|
+
it "can explicitly match without anchoring when extracting submatches" do
|
475
|
+
re = RE2::Regexp.new('(two)')
|
476
|
+
|
477
|
+
expect(re.match("one two three", anchor: :unanchored)).to be_a(RE2::MatchData)
|
478
|
+
end
|
479
|
+
|
480
|
+
it "can explicitly match with neither anchoring nor extracting submatches" do
|
481
|
+
re = RE2::Regexp.new('(two)')
|
482
|
+
|
483
|
+
expect(re.match("one two three", anchor: :unanchored, submatches: 0)).to eq(true)
|
484
|
+
end
|
485
|
+
|
486
|
+
it "can anchor matches at the start when extracting submatches", :aggregate_failures do
|
487
|
+
re = RE2::Regexp.new('(two)')
|
488
|
+
|
489
|
+
expect(re.match("two three", anchor: :anchor_start)).to be_a(RE2::MatchData)
|
490
|
+
expect(re.match("one two three", anchor: :anchor_start)).to be_nil
|
491
|
+
end
|
492
|
+
|
493
|
+
it "can anchor matches at the start without extracting submatches", :aggregate_failures do
|
494
|
+
re = RE2::Regexp.new('(two)')
|
495
|
+
|
496
|
+
expect(re.match("two three", anchor: :anchor_start, submatches: 0)).to eq(true)
|
497
|
+
expect(re.match("one two three", anchor: :anchor_start, submatches: 0)).to eq(false)
|
498
|
+
end
|
499
|
+
|
500
|
+
it "can anchor matches at both ends when extracting submatches", :aggregate_failures do
|
501
|
+
re = RE2::Regexp.new('(two)')
|
502
|
+
|
503
|
+
expect(re.match("two three", anchor: :anchor_both)).to be_nil
|
504
|
+
expect(re.match("two", anchor: :anchor_both)).to be_a(RE2::MatchData)
|
505
|
+
end
|
506
|
+
|
507
|
+
it "does not anchor matches when given a nil anchor" do
|
508
|
+
re = RE2::Regexp.new('(two)')
|
509
|
+
|
510
|
+
expect(re.match("one two three", anchor: nil)).to be_a(RE2::MatchData)
|
511
|
+
end
|
512
|
+
|
513
|
+
it "raises an exception when given an invalid anchor" do
|
514
|
+
re = RE2::Regexp.new('(two)')
|
515
|
+
|
516
|
+
expect { re.match("one two three", anchor: :invalid) }.to raise_error(ArgumentError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both")
|
517
|
+
end
|
518
|
+
|
519
|
+
it "raises an exception when given a non-symbol anchor" do
|
520
|
+
re = RE2::Regexp.new('(two)')
|
521
|
+
|
522
|
+
expect { re.match("one two three", anchor: 0) }.to raise_error(TypeError)
|
523
|
+
end
|
524
|
+
|
525
|
+
it "extracts all submatches by default", :aggregate_failures do
|
526
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
527
|
+
md = re.match("one two three")
|
528
|
+
|
529
|
+
expect(md[1]).to eq("one")
|
530
|
+
expect(md[2]).to eq("two")
|
531
|
+
expect(md[3]).to eq("three")
|
532
|
+
end
|
533
|
+
|
534
|
+
it "supports extracting submatches containing null bytes" do
|
535
|
+
re = RE2::Regexp.new("(a\0b)")
|
536
|
+
md = re.match("a\0bc")
|
537
|
+
|
538
|
+
expect(md[1]).to eq("a\0b")
|
539
|
+
end
|
540
|
+
|
541
|
+
it "extracts a specific number of submatches", :aggregate_failures do
|
542
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
543
|
+
md = re.match("one two three", submatches: 2)
|
544
|
+
|
545
|
+
expect(md[1]).to eq("one")
|
546
|
+
expect(md[2]).to eq("two")
|
547
|
+
expect(md[3]).to be_nil
|
548
|
+
end
|
549
|
+
|
550
|
+
it "pads submatches with nil when requesting more than the number of capturing groups" do
|
551
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
552
|
+
md = re.match("one two three", submatches: 5)
|
553
|
+
|
554
|
+
expect(md.to_a).to eq(["one two three", "one", "two", "three", nil, nil])
|
555
|
+
end
|
556
|
+
|
557
|
+
it "raises an exception when given a negative number of submatches" do
|
558
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
559
|
+
|
560
|
+
expect { re.match("one two three", submatches: -1) }.to raise_error(ArgumentError, "number of matches should be >= 0")
|
561
|
+
end
|
562
|
+
|
563
|
+
it "raises an exception when given a non-numeric number of submatches" do
|
564
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
565
|
+
|
566
|
+
expect { re.match("one two three", submatches: :invalid) }.to raise_error(TypeError)
|
567
|
+
end
|
568
|
+
|
569
|
+
it "defaults to extracting all submatches when given nil", :aggregate_failures do
|
570
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
571
|
+
md = re.match("one two three", submatches: nil)
|
572
|
+
|
573
|
+
expect(md[1]).to eq("one")
|
574
|
+
expect(md[2]).to eq("two")
|
575
|
+
expect(md[3]).to eq("three")
|
576
|
+
end
|
577
|
+
|
578
|
+
it "accepts passing the number of submatches instead of options for backward compatibility", :aggregate_failures do
|
579
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
580
|
+
md = re.match("one two three", 2)
|
581
|
+
|
582
|
+
expect(md[1]).to eq("one")
|
583
|
+
expect(md[2]).to eq("two")
|
584
|
+
expect(md[3]).to be_nil
|
585
|
+
end
|
586
|
+
|
587
|
+
it "raises an exception when given invalid options" do
|
588
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
589
|
+
|
590
|
+
expect { re.match("one two three", :invalid) }.to raise_error(TypeError)
|
591
|
+
end
|
592
|
+
|
593
|
+
it "accepts anything that can be coerced to a hash as options", :aggregate_failures do
|
594
|
+
re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
|
595
|
+
|
596
|
+
expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
|
597
|
+
end
|
598
|
+
end
|
599
|
+
|
600
|
+
describe "#match?" do
|
601
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
602
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
603
|
+
|
604
|
+
expect(re.match?("My name is Alice Bloggs")).to eq(true)
|
605
|
+
expect(re.match?("My age is 99")).to eq(false)
|
606
|
+
end
|
607
|
+
|
608
|
+
it "returns false if the pattern is invalid" do
|
609
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
610
|
+
|
611
|
+
expect(re.match?("My name is Alice Bloggs")).to eq(false)
|
612
|
+
end
|
613
|
+
|
614
|
+
it "raises an exception if text cannot be coerced to a string" do
|
615
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
616
|
+
|
617
|
+
expect { re.match?(0) }.to raise_error(TypeError)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
describe "#partial_match?" do
|
622
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
623
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
624
|
+
|
625
|
+
expect(re.partial_match?("My name is Alice Bloggs")).to eq(true)
|
626
|
+
expect(re.partial_match?("My age is 99")).to eq(false)
|
627
|
+
end
|
628
|
+
|
629
|
+
it "supports matching against text containing null bytes", :aggregate_failures do
|
630
|
+
re = RE2::Regexp.new("a\0b")
|
631
|
+
|
632
|
+
expect(re.partial_match?("a\0b")).to eq(true)
|
633
|
+
expect(re.partial_match?("ab")).to eq(false)
|
634
|
+
end
|
635
|
+
|
636
|
+
it "returns false if the pattern is invalid" do
|
637
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
638
|
+
|
639
|
+
expect(re.partial_match?("My name is Alice Bloggs")).to eq(false)
|
640
|
+
end
|
641
|
+
|
642
|
+
it "raises an exception if text cannot be coerced to a string" do
|
643
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
644
|
+
|
645
|
+
expect { re.partial_match?(0) }.to raise_error(TypeError)
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
649
|
+
describe "#=~" do
|
650
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
651
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
652
|
+
|
653
|
+
expect(re =~ "My name is Alice Bloggs").to eq(true)
|
654
|
+
expect(re =~ "My age is 99").to eq(false)
|
655
|
+
end
|
656
|
+
|
657
|
+
it "supports matching against text containing null bytes", :aggregate_failures do
|
658
|
+
re = RE2::Regexp.new("a\0b")
|
659
|
+
|
660
|
+
expect(re =~ "a\0b").to eq(true)
|
661
|
+
expect(re =~ "ab").to eq(false)
|
662
|
+
end
|
663
|
+
|
664
|
+
it "returns false if the pattern is invalid" do
|
665
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
666
|
+
|
667
|
+
expect(re =~ "My name is Alice Bloggs").to eq(false)
|
668
|
+
end
|
669
|
+
|
670
|
+
it "raises an exception if text cannot be coerced to a string" do
|
671
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
672
|
+
|
673
|
+
expect { re =~ 0 }.to raise_error(TypeError)
|
674
|
+
end
|
675
|
+
end
|
676
|
+
|
677
|
+
describe "#===" do
|
678
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
679
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
680
|
+
|
681
|
+
expect(re === "My name is Alice Bloggs").to eq(true)
|
682
|
+
expect(re === "My age is 99").to eq(false)
|
683
|
+
end
|
684
|
+
|
685
|
+
it "returns false if the pattern is invalid" do
|
686
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
687
|
+
|
688
|
+
expect(re === "My name is Alice Bloggs").to eq(false)
|
689
|
+
end
|
690
|
+
|
691
|
+
it "raises an exception if text cannot be coerced to a string" do
|
692
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
693
|
+
|
694
|
+
expect { re === 0 }.to raise_error(TypeError)
|
695
|
+
end
|
696
|
+
end
|
697
|
+
|
698
|
+
describe "#full_match?" do
|
699
|
+
it "returns only true or false even if there are capturing groups", :aggregate_failures do
|
700
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
701
|
+
|
702
|
+
expect(re.full_match?("My name is Alice Bloggs")).to eq(true)
|
703
|
+
expect(re.full_match?("My name is Alice Bloggs and I am 99")).to eq(false)
|
704
|
+
end
|
705
|
+
|
706
|
+
it "supports matching against text containing null bytes", :aggregate_failures do
|
707
|
+
re = RE2::Regexp.new("a\0b")
|
708
|
+
|
709
|
+
expect(re.full_match?("a\0b")).to eq(true)
|
710
|
+
expect(re.full_match?("a\0bc")).to eq(false)
|
711
|
+
end
|
712
|
+
|
713
|
+
it "returns false if the pattern is invalid" do
|
714
|
+
re = RE2::Regexp.new('???', log_errors: false)
|
715
|
+
|
716
|
+
expect(re.full_match?("My name is Alice Bloggs")).to eq(false)
|
717
|
+
end
|
718
|
+
|
719
|
+
it "raises an exception if text cannot be coerced to a string" do
|
720
|
+
re = RE2::Regexp.new('My name is (\S+) (\S+)')
|
721
|
+
|
722
|
+
expect { re.full_match?(0) }.to raise_error(TypeError)
|
723
|
+
end
|
724
|
+
end
|
725
|
+
|
726
|
+
describe "#ok?" do
|
727
|
+
it "returns true for valid patterns", :aggregate_failures do
|
728
|
+
expect(RE2::Regexp.new('woo')).to be_ok
|
729
|
+
expect(RE2::Regexp.new('wo(o)')).to be_ok
|
730
|
+
expect(RE2::Regexp.new('((\d)\w+){3,}')).to be_ok
|
731
|
+
end
|
732
|
+
|
733
|
+
it "returns false for invalid patterns", :aggregate_failures do
|
734
|
+
expect(RE2::Regexp.new('wo(o', log_errors: false)).to_not be_ok
|
735
|
+
expect(RE2::Regexp.new('wo[o', log_errors: false)).to_not be_ok
|
736
|
+
expect(RE2::Regexp.new('*', log_errors: false)).to_not be_ok
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
describe ".escape" do
|
741
|
+
it "transforms a string into a regexp" do
|
742
|
+
expect(RE2::Regexp.escape("1.5-2.0?")).to eq('1\.5\-2\.0\?')
|
743
|
+
end
|
744
|
+
end
|
745
|
+
|
746
|
+
describe ".quote" do
|
747
|
+
it "transforms a string into a regexp" do
|
748
|
+
expect(RE2::Regexp.quote("1.5-2.0?")).to eq('1\.5\-2\.0\?')
|
749
|
+
end
|
750
|
+
end
|
751
|
+
|
752
|
+
describe "#number_of_capturing_groups" do
|
753
|
+
it "returns the number of groups in a pattern", :aggregate_failures do
|
754
|
+
expect(RE2::Regexp.new('(a)(b)(c)').number_of_capturing_groups).to eq(3)
|
755
|
+
expect(RE2::Regexp.new('abc').number_of_capturing_groups).to eq(0)
|
756
|
+
expect(RE2::Regexp.new('a((b)c)').number_of_capturing_groups).to eq(2)
|
757
|
+
end
|
758
|
+
|
759
|
+
it "returns -1 for an invalid pattern" do
|
760
|
+
expect(RE2::Regexp.new('???', log_errors: false).number_of_capturing_groups).to eq(-1)
|
761
|
+
end
|
762
|
+
end
|
763
|
+
|
764
|
+
describe "#named_capturing_groups" do
|
765
|
+
it "returns a hash of names to indices" do
|
766
|
+
expect(RE2::Regexp.new('(?P<bob>a)').named_capturing_groups).to be_a(Hash)
|
767
|
+
end
|
768
|
+
|
769
|
+
it "maps names to indices with only one group" do
|
770
|
+
groups = RE2::Regexp.new('(?P<bob>a)').named_capturing_groups
|
771
|
+
|
772
|
+
expect(groups).to eq("bob" => 1)
|
773
|
+
end
|
774
|
+
|
775
|
+
it "maps names to indices with several groups" do
|
776
|
+
groups = RE2::Regexp.new('(?P<bob>a)(o)(?P<rob>e)').named_capturing_groups
|
777
|
+
|
778
|
+
expect(groups).to eq("bob" => 1, "rob" => 3)
|
779
|
+
end
|
780
|
+
|
781
|
+
it "returns an empty hash for an invalid regexp" do
|
782
|
+
expect(RE2::Regexp.new('???', log_errors: false).named_capturing_groups).to be_empty
|
783
|
+
end
|
784
|
+
end
|
785
|
+
|
786
|
+
describe "#scan" do
|
787
|
+
it "returns a scanner" do
|
788
|
+
r = RE2::Regexp.new('(\w+)')
|
789
|
+
scanner = r.scan("It is a truth universally acknowledged")
|
790
|
+
|
791
|
+
expect(scanner).to be_a(RE2::Scanner)
|
792
|
+
end
|
793
|
+
|
794
|
+
it "raises a type error if given invalid input" do
|
795
|
+
r = RE2::Regexp.new('(\w+)')
|
796
|
+
|
797
|
+
expect { r.scan(nil) }.to raise_error(TypeError)
|
798
|
+
end
|
799
|
+
end
|
800
|
+
|
801
|
+
describe "#partial_match" do
|
802
|
+
it "matches the pattern anywhere within the given text" do
|
803
|
+
r = RE2::Regexp.new('f(o+)')
|
804
|
+
|
805
|
+
expect(r.partial_match("foo bar")).to be_a(RE2::MatchData)
|
806
|
+
end
|
807
|
+
|
808
|
+
it "returns true or false if there are no capturing groups" do
|
809
|
+
r = RE2::Regexp.new('fo+')
|
810
|
+
|
811
|
+
expect(r.partial_match("foo bar")).to eq(true)
|
812
|
+
end
|
813
|
+
|
814
|
+
it "can set the number of submatches to extract", :aggregate_failures do
|
815
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
816
|
+
m = r.partial_match("fooaa bar", submatches: 1)
|
817
|
+
|
818
|
+
expect(m[1]).to eq("oo")
|
819
|
+
expect(m[2]).to be_nil
|
820
|
+
|
821
|
+
m = r.partial_match("fooaa bar", submatches: 2)
|
822
|
+
|
823
|
+
expect(m[1]).to eq("oo")
|
824
|
+
expect(m[2]).to eq("aa")
|
825
|
+
end
|
826
|
+
|
827
|
+
it "raises an error if given non-hash options" do
|
828
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
829
|
+
|
830
|
+
expect { r.partial_match("fooaa bar", "not a hash") }.to raise_error(TypeError)
|
831
|
+
end
|
832
|
+
|
833
|
+
it "accepts options that can be coerced to a hash", :aggregate_failures do
|
834
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
835
|
+
|
836
|
+
m = r.partial_match("fooaa bar", nil)
|
837
|
+
expect(m[1]).to eq('oo')
|
838
|
+
|
839
|
+
m = r.partial_match("fooaa bar", [])
|
840
|
+
expect(m[1]).to eq('oo')
|
841
|
+
end
|
842
|
+
|
843
|
+
it "accepts anything that can be coerced to a string" do
|
844
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
845
|
+
|
846
|
+
expect(r.partial_match(StringLike.new("fooaa bar"))).to be_a(RE2::MatchData)
|
847
|
+
end
|
848
|
+
|
849
|
+
it "does not allow the anchor to be overridden" do
|
850
|
+
r = RE2::Regexp.new('(\d+)')
|
851
|
+
|
852
|
+
expect(r.partial_match('ruby:1234', anchor: :anchor_both)).to be_a(RE2::MatchData)
|
853
|
+
end
|
854
|
+
end
|
855
|
+
|
856
|
+
describe "#full_match" do
|
857
|
+
it "only matches the pattern if all of the given text matches", :aggregate_failures do
|
858
|
+
r = RE2::Regexp.new('f(o+)')
|
859
|
+
|
860
|
+
expect(r.full_match("foo")).to be_a(RE2::MatchData)
|
861
|
+
expect(r.full_match("foo bar")).to be_nil
|
862
|
+
end
|
863
|
+
|
864
|
+
it "returns true or false if there are no capturing groups" do
|
865
|
+
r = RE2::Regexp.new('fo+')
|
866
|
+
|
867
|
+
expect(r.full_match("foo")).to eq(true)
|
868
|
+
end
|
869
|
+
|
870
|
+
it "can set the number of submatches to extract", :aggregate_failures do
|
871
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
872
|
+
m = r.full_match("fooaa", submatches: 1)
|
873
|
+
|
874
|
+
expect(m[1]).to eq("oo")
|
875
|
+
expect(m[2]).to be_nil
|
876
|
+
|
877
|
+
m = r.full_match("fooaa", submatches: 2)
|
878
|
+
|
879
|
+
expect(m[1]).to eq("oo")
|
880
|
+
expect(m[2]).to eq("aa")
|
881
|
+
end
|
882
|
+
|
883
|
+
it "raises an error if given non-hash options" do
|
884
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
885
|
+
|
886
|
+
expect { r.full_match("fooaa", "not a hash") }.to raise_error(TypeError)
|
887
|
+
end
|
888
|
+
|
889
|
+
it "accepts options that can be coerced to a hash", :aggregate_failures do
|
890
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
891
|
+
|
892
|
+
m = r.full_match("fooaa", nil)
|
893
|
+
expect(m[1]).to eq("oo")
|
894
|
+
|
895
|
+
m = r.full_match("fooaa", [])
|
896
|
+
expect(m[1]).to eq("oo")
|
897
|
+
end
|
898
|
+
|
899
|
+
it "accepts anything that can be coerced to a string" do
|
900
|
+
r = RE2::Regexp.new('f(o+)(a+)')
|
901
|
+
|
902
|
+
expect(r.full_match(StringLike.new("fooaa"), submatches: 0)).to eq(true)
|
903
|
+
end
|
904
|
+
|
905
|
+
it "does not allow the anchor to be overridden" do
|
906
|
+
r = RE2::Regexp.new('(\d+)')
|
907
|
+
|
908
|
+
expect(r.full_match('ruby:1234', anchor: :unanchored)).to be_nil
|
909
|
+
end
|
910
|
+
end
|
911
|
+
end
|