re2 2.4.3 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +2 -0
- data/README.md +281 -192
- data/Rakefile +1 -1
- data/dependencies.yml +4 -4
- data/ext/re2/extconf.rb +250 -358
- data/ext/re2/re2.cc +505 -284
- data/ext/re2/recipes.rb +31 -20
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +11 -0
- data/lib/re2/string.rb +12 -59
- data/lib/re2/version.rb +10 -1
- data/lib/re2.rb +9 -3
- data/ports/archives/20240116.1.tar.gz +0 -0
- data/ports/archives/re2-2024-04-01.tar.gz +0 -0
- data/re2.gemspec +5 -2
- data/spec/kernel_spec.rb +10 -2
- data/spec/re2/match_data_spec.rb +98 -28
- data/spec/re2/regexp_spec.rb +546 -113
- data/spec/re2/scanner_spec.rb +26 -9
- data/spec/re2/set_spec.rb +28 -18
- data/spec/re2/string_spec.rb +2 -0
- data/spec/re2_spec.rb +34 -4
- data/spec/spec_helper.rb +2 -0
- metadata +10 -9
- data/ports/archives/20230802.1.tar.gz +0 -0
- data/ports/archives/re2-2023-11-01.tar.gz +0 -0
data/spec/re2/match_data_spec.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'objspace'
|
3
4
|
|
4
5
|
RSpec.describe RE2::MatchData do
|
@@ -12,11 +13,13 @@ RSpec.describe RE2::MatchData do
|
|
12
13
|
describe "#to_a" do
|
13
14
|
it "is populated with the match and capturing groups" do
|
14
15
|
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
16
|
+
|
15
17
|
expect(a).to eq(["woo", "o", "o"])
|
16
18
|
end
|
17
19
|
|
18
20
|
it "populates optional capturing groups with nil if they are missing" do
|
19
21
|
a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
|
22
|
+
|
20
23
|
expect(a).to eq(["ab", nil, "a", "b"])
|
21
24
|
end
|
22
25
|
|
@@ -27,15 +30,16 @@ RSpec.describe RE2::MatchData do
|
|
27
30
|
end
|
28
31
|
|
29
32
|
it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
|
30
|
-
a = RE2::Regexp.new('w(o)(o)', :
|
33
|
+
a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a
|
31
34
|
|
32
35
|
expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
|
33
36
|
end
|
34
37
|
end
|
35
38
|
|
36
39
|
describe "#[]" do
|
37
|
-
it "accesses capturing groups by numerical index" do
|
40
|
+
it "accesses capturing groups by numerical index", :aggregate_failures do
|
38
41
|
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
42
|
+
|
39
43
|
expect(md[1]).to eq("1")
|
40
44
|
expect(md[2]).to eq("23")
|
41
45
|
end
|
@@ -47,46 +51,53 @@ RSpec.describe RE2::MatchData do
|
|
47
51
|
end
|
48
52
|
|
49
53
|
it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
|
50
|
-
md = RE2::Regexp.new('(\d)(\d{2})', :
|
54
|
+
md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123")
|
51
55
|
|
52
56
|
expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
|
53
57
|
end
|
54
58
|
|
55
59
|
it "has the whole match as the 0th item" do
|
56
60
|
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
61
|
+
|
57
62
|
expect(md[0]).to eq("123")
|
58
63
|
end
|
59
64
|
|
60
|
-
it "supports access by numerical ranges" do
|
65
|
+
it "supports access by numerical ranges", :aggregate_failures do
|
61
66
|
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
67
|
+
|
62
68
|
expect(md[1..3]).to eq(["123", "456", "789"])
|
63
69
|
expect(md[1...3]).to eq(["123", "456"])
|
64
70
|
end
|
65
71
|
|
66
|
-
it "supports slicing" do
|
72
|
+
it "supports slicing", :aggregate_failures do
|
67
73
|
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
74
|
+
|
68
75
|
expect(md[1, 3]).to eq(["123", "456", "789"])
|
69
76
|
expect(md[1, 2]).to eq(["123", "456"])
|
70
77
|
end
|
71
78
|
|
72
|
-
it "returns nil if attempting to access non-existent capturing groups by index" do
|
79
|
+
it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do
|
73
80
|
md = RE2::Regexp.new('(\d+)').match('bob 123')
|
81
|
+
|
74
82
|
expect(md[2]).to be_nil
|
75
83
|
expect(md[3]).to be_nil
|
76
84
|
end
|
77
85
|
|
78
86
|
it "allows access by string names when there are named groups" do
|
79
87
|
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
88
|
+
|
80
89
|
expect(md["numbers"]).to eq("123")
|
81
90
|
end
|
82
91
|
|
83
92
|
it "allows access by symbol names when there are named groups" do
|
84
93
|
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
94
|
+
|
85
95
|
expect(md[:numbers]).to eq("123")
|
86
96
|
end
|
87
97
|
|
88
|
-
it "allows access by names and indices with mixed groups" do
|
98
|
+
it "allows access by names and indices with mixed groups", :aggregate_failures do
|
89
99
|
md = RE2::Regexp.new('(?P<name>\w+)(\s*)(?P<numbers>\d+)').match("bob 123")
|
100
|
+
|
90
101
|
expect(md["name"]).to eq("bob")
|
91
102
|
expect(md[:name]).to eq("bob")
|
92
103
|
expect(md[2]).to eq(" ")
|
@@ -94,54 +105,60 @@ RSpec.describe RE2::MatchData do
|
|
94
105
|
expect(md[:numbers]).to eq("123")
|
95
106
|
end
|
96
107
|
|
97
|
-
it "returns nil if no such named group exists" do
|
108
|
+
it "returns nil if no such named group exists", :aggregate_failures do
|
98
109
|
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
110
|
+
|
99
111
|
expect(md["missing"]).to be_nil
|
100
112
|
expect(md[:missing]).to be_nil
|
101
113
|
end
|
102
114
|
|
103
115
|
it "raises an error if given an inappropriate index" do
|
104
116
|
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
117
|
+
|
105
118
|
expect { md[nil] }.to raise_error(TypeError)
|
106
119
|
end
|
107
120
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
121
|
+
it "returns UTF-8 encoded strings by default", :aggregate_failures do
|
122
|
+
md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
|
123
|
+
|
124
|
+
expect(md[0].encoding.name).to eq("UTF-8")
|
125
|
+
expect(md["name"].encoding.name).to eq("UTF-8")
|
126
|
+
expect(md[:name].encoding.name).to eq("UTF-8")
|
127
|
+
end
|
128
|
+
|
129
|
+
it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do
|
130
|
+
md = RE2::Regexp.new('(?P<name>\S+)', utf8: false).match('bob')
|
115
131
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
expect(md["name"].encoding.name).to eq("ISO-8859-1")
|
120
|
-
expect(md[:name].encoding.name).to eq("ISO-8859-1")
|
121
|
-
end
|
132
|
+
expect(md[0].encoding.name).to eq("ISO-8859-1")
|
133
|
+
expect(md["name"].encoding.name).to eq("ISO-8859-1")
|
134
|
+
expect(md[:name].encoding.name).to eq("ISO-8859-1")
|
122
135
|
end
|
123
136
|
end
|
124
137
|
|
125
138
|
describe "#string" do
|
126
139
|
it "returns the original string to match against" do
|
127
140
|
re = RE2::Regexp.new('(\D+)').match("bob")
|
141
|
+
|
128
142
|
expect(re.string).to eq("bob")
|
129
143
|
end
|
130
144
|
|
131
145
|
it "returns a copy, not the actual original" do
|
132
|
-
string = "bob"
|
146
|
+
string = +"bob"
|
133
147
|
re = RE2::Regexp.new('(\D+)').match(string)
|
148
|
+
|
134
149
|
expect(re.string).to_not equal(string)
|
135
150
|
end
|
136
151
|
|
137
152
|
it "returns a frozen string" do
|
138
153
|
re = RE2::Regexp.new('(\D+)').match("bob")
|
154
|
+
|
139
155
|
expect(re.string).to be_frozen
|
140
156
|
end
|
141
157
|
|
142
158
|
it "does not copy the string if it was already frozen" do
|
143
|
-
string = "bob"
|
159
|
+
string = "bob"
|
144
160
|
re = RE2::Regexp.new('(\D+)').match(string)
|
161
|
+
|
145
162
|
expect(re.string).to equal(string)
|
146
163
|
end
|
147
164
|
end
|
@@ -149,6 +166,7 @@ RSpec.describe RE2::MatchData do
|
|
149
166
|
describe "#size" do
|
150
167
|
it "returns the number of capturing groups plus the matching string" do
|
151
168
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
169
|
+
|
152
170
|
expect(md.size).to eq(3)
|
153
171
|
end
|
154
172
|
end
|
@@ -156,6 +174,7 @@ RSpec.describe RE2::MatchData do
|
|
156
174
|
describe "#length" do
|
157
175
|
it "returns the number of capturing groups plus the matching string" do
|
158
176
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
177
|
+
|
159
178
|
expect(md.length).to eq(3)
|
160
179
|
end
|
161
180
|
end
|
@@ -164,6 +183,7 @@ RSpec.describe RE2::MatchData do
|
|
164
183
|
it "returns the original RE2::Regexp used" do
|
165
184
|
re = RE2::Regexp.new('(\d+)')
|
166
185
|
md = re.match("123")
|
186
|
+
|
167
187
|
expect(md.regexp).to equal(re)
|
168
188
|
end
|
169
189
|
end
|
@@ -171,26 +191,36 @@ RSpec.describe RE2::MatchData do
|
|
171
191
|
describe "#inspect" do
|
172
192
|
it "returns a text representation of the object and indices" do
|
173
193
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
194
|
+
|
174
195
|
expect(md.inspect).to eq('#<RE2::MatchData "1234 56" 1:"1234" 2:"56">')
|
175
196
|
end
|
176
197
|
|
177
198
|
it "represents missing matches as nil" do
|
178
199
|
md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ")
|
200
|
+
|
179
201
|
expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
|
180
202
|
end
|
203
|
+
|
204
|
+
it "supports matches with null bytes" do
|
205
|
+
md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
|
206
|
+
|
207
|
+
expect(md.inspect).to eq("#<RE2::MatchData \"a\0b c\0d\" 1:\"a\0b\" 2:\"c\0d\">")
|
208
|
+
end
|
181
209
|
end
|
182
210
|
|
183
211
|
describe "#to_s" do
|
184
212
|
it "returns the matching part of the original string" do
|
185
213
|
md = RE2::Regexp.new('(\d{2,5})').match("one two 23456")
|
214
|
+
|
186
215
|
expect(md.to_s).to eq("23456")
|
187
216
|
end
|
188
217
|
end
|
189
218
|
|
190
219
|
describe "#to_ary" do
|
191
|
-
it "allows the object to be expanded with an asterisk" do
|
220
|
+
it "allows the object to be expanded with an asterisk", :aggregate_failures do
|
192
221
|
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
193
222
|
m1, m2, m3 = *md
|
223
|
+
|
194
224
|
expect(m1).to eq("1234 56")
|
195
225
|
expect(m2).to eq("1234")
|
196
226
|
expect(m3).to eq("56")
|
@@ -200,85 +230,125 @@ RSpec.describe RE2::MatchData do
|
|
200
230
|
describe "#begin" do
|
201
231
|
it "returns the offset of the start of a match by index" do
|
202
232
|
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
233
|
+
|
203
234
|
expect(md.string[md.begin(0)..-1]).to eq('woohoo')
|
204
235
|
end
|
205
236
|
|
206
237
|
it "returns the offset of the start of a match by string name" do
|
207
238
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
239
|
+
|
208
240
|
expect(md.string[md.begin('foo')..-1]).to eq('foobar')
|
209
241
|
end
|
210
242
|
|
211
243
|
it "returns the offset of the start of a match by symbol name" do
|
212
244
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
245
|
+
|
213
246
|
expect(md.string[md.begin(:foo)..-1]).to eq('foobar')
|
214
247
|
end
|
215
248
|
|
249
|
+
it "returns the offset of the start of a match by something that can be coerced to a String" do
|
250
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
251
|
+
|
252
|
+
expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar')
|
253
|
+
end
|
254
|
+
|
216
255
|
it "returns the offset despite multibyte characters" do
|
217
256
|
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
257
|
+
|
218
258
|
expect(md.string[md.begin(0)..-1]).to eq('Ruby')
|
219
259
|
end
|
220
260
|
|
221
261
|
it "returns nil for non-existent numerical matches" do
|
222
262
|
md = RE2::Regexp.new('(\d)').match('123')
|
263
|
+
|
223
264
|
expect(md.begin(10)).to be_nil
|
224
265
|
end
|
225
266
|
|
226
267
|
it "returns nil for negative numerical matches" do
|
227
268
|
md = RE2::Regexp.new('(\d)').match('123')
|
269
|
+
|
228
270
|
expect(md.begin(-4)).to be_nil
|
229
271
|
end
|
230
272
|
|
231
273
|
it "returns nil for non-existent named matches" do
|
232
274
|
md = RE2::Regexp.new('(\d)').match('123')
|
275
|
+
|
233
276
|
expect(md.begin('foo')).to be_nil
|
234
277
|
end
|
235
278
|
|
236
279
|
it "returns nil for non-existent symbol named matches" do
|
237
280
|
md = RE2::Regexp.new('(\d)').match('123')
|
281
|
+
|
238
282
|
expect(md.begin(:foo)).to be_nil
|
239
283
|
end
|
284
|
+
|
285
|
+
it "raises a type error if given an invalid name or number" do
|
286
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
287
|
+
|
288
|
+
expect { md.begin(nil) }.to raise_error(TypeError)
|
289
|
+
end
|
240
290
|
end
|
241
291
|
|
242
292
|
describe "#end" do
|
243
293
|
it "returns the offset of the character following the end of a match" do
|
244
294
|
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
295
|
+
|
245
296
|
expect(md.string[0...md.end(0)]).to eq('a woo')
|
246
297
|
end
|
247
298
|
|
248
299
|
it "returns the offset of a match by string name" do
|
249
300
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
301
|
+
|
250
302
|
expect(md.string[0...md.end('foo')]).to eq('a foo')
|
251
303
|
end
|
252
304
|
|
253
305
|
it "returns the offset of a match by symbol name" do
|
254
306
|
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
307
|
+
|
255
308
|
expect(md.string[0...md.end(:foo)]).to eq('a foo')
|
256
309
|
end
|
257
310
|
|
311
|
+
it "returns the offset of a match by something that can be coerced to a String" do
|
312
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
313
|
+
|
314
|
+
expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo')
|
315
|
+
end
|
316
|
+
|
258
317
|
it "returns the offset despite multibyte characters" do
|
259
318
|
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
319
|
+
|
260
320
|
expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
|
261
321
|
end
|
262
322
|
|
263
323
|
it "returns nil for non-existent numerical matches" do
|
264
324
|
md = RE2::Regexp.new('(\d)').match('123')
|
325
|
+
|
265
326
|
expect(md.end(10)).to be_nil
|
266
327
|
end
|
267
328
|
|
268
329
|
it "returns nil for negative numerical matches" do
|
269
330
|
md = RE2::Regexp.new('(\d)').match('123')
|
331
|
+
|
270
332
|
expect(md.end(-4)).to be_nil
|
271
333
|
end
|
272
334
|
|
273
335
|
it "returns nil for non-existent named matches" do
|
274
336
|
md = RE2::Regexp.new('(\d)').match('123')
|
337
|
+
|
275
338
|
expect(md.end('foo')).to be_nil
|
276
339
|
end
|
277
340
|
|
278
341
|
it "returns nil for non-existent symbol named matches" do
|
279
342
|
md = RE2::Regexp.new('(\d)').match('123')
|
343
|
+
|
280
344
|
expect(md.end(:foo)).to be_nil
|
281
345
|
end
|
346
|
+
|
347
|
+
it "raises a type error if given an invalid name or number" do
|
348
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
349
|
+
|
350
|
+
expect { md.end(nil) }.to raise_error(TypeError)
|
351
|
+
end
|
282
352
|
end
|
283
353
|
|
284
354
|
describe "#deconstruct" do
|
@@ -299,19 +369,19 @@ RSpec.describe RE2::MatchData do
|
|
299
369
|
it "returns all named captures if given nil" do
|
300
370
|
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
301
371
|
|
302
|
-
expect(md.deconstruct_keys(nil)).to eq(:
|
372
|
+
expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc')
|
303
373
|
end
|
304
374
|
|
305
375
|
it "returns only named captures if given names" do
|
306
376
|
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
307
377
|
|
308
|
-
expect(md.deconstruct_keys([:numbers])).to eq(:
|
378
|
+
expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123')
|
309
379
|
end
|
310
380
|
|
311
381
|
it "returns named captures up until an invalid name is given" do
|
312
382
|
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
313
383
|
|
314
|
-
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:
|
384
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123')
|
315
385
|
end
|
316
386
|
|
317
387
|
it "returns an empty hash if given more capture names than exist" do
|