re2 2.4.3 → 2.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +2 -0
- data/README.md +281 -192
- data/Rakefile +1 -1
- data/dependencies.yml +4 -4
- data/ext/re2/extconf.rb +250 -358
- data/ext/re2/re2.cc +505 -284
- data/ext/re2/recipes.rb +31 -20
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +11 -0
- data/lib/re2/string.rb +12 -59
- data/lib/re2/version.rb +10 -1
- data/lib/re2.rb +9 -3
- data/ports/archives/20240116.1.tar.gz +0 -0
- data/ports/archives/re2-2024-04-01.tar.gz +0 -0
- data/re2.gemspec +5 -2
- data/spec/kernel_spec.rb +10 -2
- data/spec/re2/match_data_spec.rb +98 -28
- data/spec/re2/regexp_spec.rb +546 -113
- data/spec/re2/scanner_spec.rb +26 -9
- data/spec/re2/set_spec.rb +28 -18
- data/spec/re2/string_spec.rb +2 -0
- data/spec/re2_spec.rb +34 -4
- data/spec/spec_helper.rb +2 -0
- metadata +10 -9
- data/ports/archives/20230802.1.tar.gz +0 -0
- data/ports/archives/re2-2023-11-01.tar.gz +0 -0
data/spec/re2/match_data_spec.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'objspace'
|
3
4
|
|
4
5
|
RSpec.describe RE2::MatchData do
|
@@ -12,11 +13,13 @@ RSpec.describe RE2::MatchData do
|
|
12
13
|
describe "#to_a" do
|
13
14
|
it "is populated with the match and capturing groups" do
|
14
15
|
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
16
|
+
|
15
17
|
expect(a).to eq(["woo", "o", "o"])
|
16
18
|
end
|
17
19
|
|
18
20
|
it "populates optional capturing groups with nil if they are missing" do
|
19
21
|
a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
|
22
|
+
|
20
23
|
expect(a).to eq(["ab", nil, "a", "b"])
|
21
24
|
end
|
22
25
|
|
@@ -27,15 +30,16 @@ RSpec.describe RE2::MatchData do
|
|
27
30
|
end
|
28
31
|
|
29
32
|
it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
|
30
|
-
a = RE2::Regexp.new('w(o)(o)', :
|
33
|
+
a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a
|
31
34
|
|
32
35
|
expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
|
33
36
|
end
|
34
37
|
end
|
35
38
|
|
36
39
|
describe "#[]" do
|
37
|
-
it "accesses capturing groups by numerical index" do
|
40
|
+
it "accesses capturing groups by numerical index", :aggregate_failures do
|
38
41
|
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
42
|
+
|
39
43
|
expect(md[1]).to eq("1")
|
40
44
|
expect(md[2]).to eq("23")
|
41
45
|
end
|
@@ -47,46 +51,53 @@ RSpec.describe RE2::MatchData do
|
|
47
51
|
end
|
48
52
|
|
49
53
|
it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
|
50
|
-
md = RE2::Regexp.new('(\d)(\d{2})', :
|
54
|
+
md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123")
|
51
55
|
|
52
56
|
expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
|
53
57
|
end
|
54
58
|
|
55
59
|
it "has the whole match as the 0th item" do
|
56
60
|
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
61
|
+
|
57
62
|
expect(md[0]).to eq("123")
|
58
63
|
end
|
59
64
|
|
60
|
-
it "supports access by numerical ranges" do
|
65
|
+
it "supports access by numerical ranges", :aggregate_failures do
|
61
66
|
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
67
|
+
|
62
68
|
expect(md[1..3]).to eq(["123", "456", "789"])
|
63
69
|
expect(md[1...3]).to eq(["123", "456"])
|
64
70
|
end
|
65
71
|
|
66
|
-
it "supports slicing" do
|
72
|
+
it "supports slicing", :aggregate_failures do
|
67
73
|
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
74
|
+
|
68
75
|
expect(md[1, 3]).to eq(["123", "456", "789"])
|
69
76
|
expect(md[1, 2]).to eq(["123", "456"])
|
70
77
|
end
|
71
78
|
|
72
|
-
it "returns nil if attempting to access non-existent capturing groups by index" do
|
79
|
+
it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do
|
73
80
|
md = RE2::Regexp.new('(\d+)').match('bob 123')
|
81
|
+
|
74
82
|
expect(md[2]).to be_nil
|
75
83
|
expect(md[3]).to be_nil
|
76
84
|
end
|
77
85
|
|
78
86
|
it "allows access by string names when there are named groups" do
|
79
87
|
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
88
|
+
|
80
89
|
expect(md["numbers"]).to eq("123")
|
81
90
|
end
|
82
91
|
|
83
92
|
it "allows access by symbol names when there are named groups" do
|
84
93
|
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
94
|
+
|
85
95
|
expect(md[:numbers]).to eq("123")
|
86
96
|
end
|
87
97
|
|
88
|
-
it "allows access by names and indices with mixed groups" do
|
98
|
+
it "allows access by names and indices with mixed groups", :aggregate_failures do
|
89
99
|
md = RE2::Regexp.new('(?P<name>\w+)(\s*)(?P<numbers>\d+)').match("bob 123")
|
100
|
+
|
90
101
|
expect(md["name"]).to eq("bob")
|
91
102
|
expect(md[:name]).to eq("bob")
|
92
103
|
expect(md[2]).to eq(" ")
|
@@ -94,54 +105,60 @@ RSpec.describe RE2::MatchData do
|
|
94
105
|
expect(md[:numbers]).to eq("123")
|
95
106
|
end
|
96
107
|
|
97
|
-
it "returns nil if no such named group exists" do
|
108
|
+
it "returns nil if no such named group exists", :aggregate_failures do
|
98
109
|
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
110
|
+
|
99
111
|
expect(md["missing"]).to be_nil
|
100
112
|
expect(md[:missing]).to be_nil
|
101
113
|
end
|
102
114
|
|
103
115
|
it "raises an error if given an inappropriate index" do
|
104
116
|
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
117
|
+
|
105
118
|
expect { md[nil] }.to raise_error(TypeError)
|
106
119
|
end
|
107
120
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
121
|
+
it "returns UTF-8 encoded strings by default", :aggregate_failures do
|
122
|
+
md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
|
123
|
+
|
124
|
+
expect(md[0].encoding.name).to eq("UTF-8")
|
125
|
+
expect(md["name"].encoding.name).to eq("UTF-8")
|
126
|
+
expect(md[:name].encoding.name).to eq("UTF-8")
|
127
|
+
end
|
128
|
+
|
129
|
+
it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do
|
130
|
+
md = RE2::Regexp.new('(?P<name>\S+)', utf8: false).match('bob')
|
115
131
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
expect(md["name"].encoding.name).to eq("ISO-8859-1")
|
120
|
-
expect(md[:name].encoding.name).to eq("ISO-8859-1")
|
121
|
-
end
|
132
|
+
expect(md[0].encoding.name).to eq("ISO-8859-1")
|
133
|
+
expect(md["name"].encoding.name).to eq("ISO-8859-1")
|
134
|
+
expect(md[:name].encoding.name).to eq("ISO-8859-1")
|
122
135
|
end
|
123
136
|
end
|
124
137
|
|
125
138
|
describe "#string" do
|
126
139
|
it "returns the original string to match against" do
|
127
140
|
re = RE2::Regexp.new('(\D+)').match("bob")
|
141
|
+
|
128
142
|
expect(re.string).to eq("bob")
|
129
143
|
end
|
130
144
|
|
131
145
|
it "returns a copy, not the actual original" do
|
132
|
-
string = "bob"
|
146
|
+
string = +"bob"
|
133
147
|
re = RE2::Regexp.new('(\D+)').match(string)
|
148
|
+
|
134
149
|
expect(re.string).to_not equal(string)
|
135
150
|
end
|
136
151
|
|
137
152
|
it "returns a frozen string" do
|
138
153
|
re = RE2::Regexp.new('(\D+)').match("bob")
|
154
|
+
|
139
155
|
expect(re.string).to be_frozen
|
140
156
|
end
|
141
157
|
|
142
158
|
it "does not copy the string if it was already frozen" do
|
143
|
-
string = "bob"
|
159
|
+
string = "bob"
|
144
160
|
re = RE2::Regexp.new('(\D+)').match(string)
|
161
|
+
|
145
162
|
expect(re.string).to equal(string)
|
146
163
|
end
|
147
164
|
end
|
@@ -149,6 +166,7 @@ RSpec.describe RE2::MatchData do
|
|
149
166
|
describe "#size" do
|
150
167
|
it "returns the number of capturing groups plus the matching string" do
|
151
168
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
169
|
+
|
152
170
|
expect(md.size).to eq(3)
|
153
171
|
end
|
154
172
|
end
|
@@ -156,6 +174,7 @@ RSpec.describe RE2::MatchData do
|
|
156
174
|
describe "#length" do
|
157
175
|
it "returns the number of capturing groups plus the matching string" do
|
158
176
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
177
|
+
|
159
178
|
expect(md.length).to eq(3)
|
160
179
|
end
|
161
180
|
end
|
@@ -164,6 +183,7 @@ RSpec.describe RE2::MatchData do
|
|
164
183
|
it "returns the original RE2::Regexp used" do
|
165
184
|
re = RE2::Regexp.new('(\d+)')
|
166
185
|
md = re.match("123")
|
186
|
+
|
167
187
|
expect(md.regexp).to equal(re)
|
168
188
|
end
|
169
189
|
end
|
@@ -171,26 +191,36 @@ RSpec.describe RE2::MatchData do
|
|
171
191
|
describe "#inspect" do
|
172
192
|
it "returns a text representation of the object and indices" do
|
173
193
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
194
|
+
|
174
195
|
expect(md.inspect).to eq('#<RE2::MatchData "1234 56" 1:"1234" 2:"56">')
|
175
196
|
end
|
176
197
|
|
177
198
|
it "represents missing matches as nil" do
|
178
199
|
md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ")
|
200
|
+
|
179
201
|
expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
|
180
202
|
end
|
203
|
+
|
204
|
+
it "supports matches with null bytes" do
|
205
|
+
md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
|
206
|
+
|
207
|
+
expect(md.inspect).to eq("#<RE2::MatchData \"a\0b c\0d\" 1:\"a\0b\" 2:\"c\0d\">")
|
208
|
+
end
|
181
209
|
end
|
182
210
|
|
183
211
|
describe "#to_s" do
|
184
212
|
it "returns the matching part of the original string" do
|
185
213
|
md = RE2::Regexp.new('(\d{2,5})').match("one two 23456")
|
214
|
+
|
186
215
|
expect(md.to_s).to eq("23456")
|
187
216
|
end
|
188
217
|
end
|
189
218
|
|
190
219
|
describe "#to_ary" do
|
191
|
-
it "allows the object to be expanded with an asterisk" do
|
220
|
+
it "allows the object to be expanded with an asterisk", :aggregate_failures do
|
192
221
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
193
222
|
m1, m2, m3 = *md
|
223
|
+
|
194
224
|
expect(m1).to eq("1234 56")
|
195
225
|
expect(m2).to eq("1234")
|
196
226
|
expect(m3).to eq("56")
|
@@ -200,85 +230,125 @@ RSpec.describe RE2::MatchData do
|
|
200
230
|
describe "#begin" do
|
201
231
|
it "returns the offset of the start of a match by index" do
|
202
232
|
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
233
|
+
|
203
234
|
expect(md.string[md.begin(0)..-1]).to eq('woohoo')
|
204
235
|
end
|
205
236
|
|
206
237
|
it "returns the offset of the start of a match by string name" do
|
207
238
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
239
|
+
|
208
240
|
expect(md.string[md.begin('foo')..-1]).to eq('foobar')
|
209
241
|
end
|
210
242
|
|
211
243
|
it "returns the offset of the start of a match by symbol name" do
|
212
244
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
245
|
+
|
213
246
|
expect(md.string[md.begin(:foo)..-1]).to eq('foobar')
|
214
247
|
end
|
215
248
|
|
249
|
+
it "returns the offset of the start of a match by something that can be coerced to a String" do
|
250
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
251
|
+
|
252
|
+
expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar')
|
253
|
+
end
|
254
|
+
|
216
255
|
it "returns the offset despite multibyte characters" do
|
217
256
|
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
257
|
+
|
218
258
|
expect(md.string[md.begin(0)..-1]).to eq('Ruby')
|
219
259
|
end
|
220
260
|
|
221
261
|
it "returns nil for non-existent numerical matches" do
|
222
262
|
md = RE2::Regexp.new('(\d)').match('123')
|
263
|
+
|
223
264
|
expect(md.begin(10)).to be_nil
|
224
265
|
end
|
225
266
|
|
226
267
|
it "returns nil for negative numerical matches" do
|
227
268
|
md = RE2::Regexp.new('(\d)').match('123')
|
269
|
+
|
228
270
|
expect(md.begin(-4)).to be_nil
|
229
271
|
end
|
230
272
|
|
231
273
|
it "returns nil for non-existent named matches" do
|
232
274
|
md = RE2::Regexp.new('(\d)').match('123')
|
275
|
+
|
233
276
|
expect(md.begin('foo')).to be_nil
|
234
277
|
end
|
235
278
|
|
236
279
|
it "returns nil for non-existent symbol named matches" do
|
237
280
|
md = RE2::Regexp.new('(\d)').match('123')
|
281
|
+
|
238
282
|
expect(md.begin(:foo)).to be_nil
|
239
283
|
end
|
284
|
+
|
285
|
+
it "raises a type error if given an invalid name or number" do
|
286
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
287
|
+
|
288
|
+
expect { md.begin(nil) }.to raise_error(TypeError)
|
289
|
+
end
|
240
290
|
end
|
241
291
|
|
242
292
|
describe "#end" do
|
243
293
|
it "returns the offset of the character following the end of a match" do
|
244
294
|
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
295
|
+
|
245
296
|
expect(md.string[0...md.end(0)]).to eq('a woo')
|
246
297
|
end
|
247
298
|
|
248
299
|
it "returns the offset of a match by string name" do
|
249
300
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
301
|
+
|
250
302
|
expect(md.string[0...md.end('foo')]).to eq('a foo')
|
251
303
|
end
|
252
304
|
|
253
305
|
it "returns the offset of a match by symbol name" do
|
254
306
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
307
|
+
|
255
308
|
expect(md.string[0...md.end(:foo)]).to eq('a foo')
|
256
309
|
end
|
257
310
|
|
311
|
+
it "returns the offset of a match by something that can be coerced to a String" do
|
312
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
313
|
+
|
314
|
+
expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo')
|
315
|
+
end
|
316
|
+
|
258
317
|
it "returns the offset despite multibyte characters" do
|
259
318
|
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
319
|
+
|
260
320
|
expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
|
261
321
|
end
|
262
322
|
|
263
323
|
it "returns nil for non-existent numerical matches" do
|
264
324
|
md = RE2::Regexp.new('(\d)').match('123')
|
325
|
+
|
265
326
|
expect(md.end(10)).to be_nil
|
266
327
|
end
|
267
328
|
|
268
329
|
it "returns nil for negative numerical matches" do
|
269
330
|
md = RE2::Regexp.new('(\d)').match('123')
|
331
|
+
|
270
332
|
expect(md.end(-4)).to be_nil
|
271
333
|
end
|
272
334
|
|
273
335
|
it "returns nil for non-existent named matches" do
|
274
336
|
md = RE2::Regexp.new('(\d)').match('123')
|
337
|
+
|
275
338
|
expect(md.end('foo')).to be_nil
|
276
339
|
end
|
277
340
|
|
278
341
|
it "returns nil for non-existent symbol named matches" do
|
279
342
|
md = RE2::Regexp.new('(\d)').match('123')
|
343
|
+
|
280
344
|
expect(md.end(:foo)).to be_nil
|
281
345
|
end
|
346
|
+
|
347
|
+
it "raises a type error if given an invalid name or number" do
|
348
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
349
|
+
|
350
|
+
expect { md.end(nil) }.to raise_error(TypeError)
|
351
|
+
end
|
282
352
|
end
|
283
353
|
|
284
354
|
describe "#deconstruct" do
|
@@ -299,19 +369,19 @@ RSpec.describe RE2::MatchData do
|
|
299
369
|
it "returns all named captures if given nil" do
|
300
370
|
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
301
371
|
|
302
|
-
expect(md.deconstruct_keys(nil)).to eq(:
|
372
|
+
expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc')
|
303
373
|
end
|
304
374
|
|
305
375
|
it "returns only named captures if given names" do
|
306
376
|
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
307
377
|
|
308
|
-
expect(md.deconstruct_keys([:numbers])).to eq(:
|
378
|
+
expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123')
|
309
379
|
end
|
310
380
|
|
311
381
|
it "returns named captures up until an invalid name is given" do
|
312
382
|
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
313
383
|
|
314
|
-
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:
|
384
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123')
|
315
385
|
end
|
316
386
|
|
317
387
|
it "returns an empty hash if given more capture names than exist" do
|