re2 2.15.0.rc1-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/Gemfile +11 -0
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +28 -0
- data/README.md +396 -0
- data/Rakefile +94 -0
- data/dependencies.yml +7 -0
- data/ext/re2/extconf.rb +332 -0
- data/ext/re2/re2.cc +2254 -0
- data/ext/re2/recipes.rb +54 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +26 -0
- data/lib/re2/string.rb +38 -0
- data/lib/re2/version.rb +14 -0
- data/lib/re2.rb +20 -0
- data/re2.gemspec +47 -0
- data/spec/kernel_spec.rb +37 -0
- data/spec/re2/match_data_spec.rb +411 -0
- data/spec/re2/regexp_spec.rb +911 -0
- data/spec/re2/scanner_spec.rb +275 -0
- data/spec/re2/set_spec.rb +231 -0
- data/spec/re2/string_spec.rb +62 -0
- data/spec/re2_spec.rb +201 -0
- data/spec/spec_helper.rb +31 -0
- metadata +129 -0
@@ -0,0 +1,411 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'objspace'
|
4
|
+
|
5
|
+
RSpec.describe RE2::MatchData do
|
6
|
+
it "reports a larger consuming memory size when it has more matches" do
|
7
|
+
matches1 = RE2::Regexp.new('w(o)').match('woo')
|
8
|
+
matches2 = RE2::Regexp.new('w(o)(o)').match('woo')
|
9
|
+
|
10
|
+
expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#to_a" do
|
14
|
+
it "is populated with the match and capturing groups" do
|
15
|
+
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
16
|
+
|
17
|
+
expect(a).to eq(["woo", "o", "o"])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "populates optional capturing groups with nil if they are missing" do
|
21
|
+
a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
|
22
|
+
|
23
|
+
expect(a).to eq(["ab", nil, "a", "b"])
|
24
|
+
end
|
25
|
+
|
26
|
+
it "returns UTF-8 strings if the pattern is UTF-8" do
|
27
|
+
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
28
|
+
|
29
|
+
expect(a.map(&:encoding)).to all eq(Encoding::UTF_8)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
|
33
|
+
a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a
|
34
|
+
|
35
|
+
expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "#[]" do
|
40
|
+
it "accesses capturing groups by numerical index", :aggregate_failures do
|
41
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
42
|
+
|
43
|
+
expect(md[1]).to eq("1")
|
44
|
+
expect(md[2]).to eq("23")
|
45
|
+
end
|
46
|
+
|
47
|
+
it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do
|
48
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
49
|
+
|
50
|
+
expect(md[1].encoding).to eq(Encoding::UTF_8)
|
51
|
+
end
|
52
|
+
|
53
|
+
it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
|
54
|
+
md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123")
|
55
|
+
|
56
|
+
expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "has the whole match as the 0th item" do
|
60
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
61
|
+
|
62
|
+
expect(md[0]).to eq("123")
|
63
|
+
end
|
64
|
+
|
65
|
+
it "supports access by numerical ranges", :aggregate_failures do
|
66
|
+
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
67
|
+
|
68
|
+
expect(md[1..3]).to eq(["123", "456", "789"])
|
69
|
+
expect(md[1...3]).to eq(["123", "456"])
|
70
|
+
end
|
71
|
+
|
72
|
+
it "supports slicing", :aggregate_failures do
|
73
|
+
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
74
|
+
|
75
|
+
expect(md[1, 3]).to eq(["123", "456", "789"])
|
76
|
+
expect(md[1, 2]).to eq(["123", "456"])
|
77
|
+
end
|
78
|
+
|
79
|
+
it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do
|
80
|
+
md = RE2::Regexp.new('(\d+)').match('bob 123')
|
81
|
+
|
82
|
+
expect(md[2]).to be_nil
|
83
|
+
expect(md[3]).to be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it "allows access by string names when there are named groups" do
|
87
|
+
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
88
|
+
|
89
|
+
expect(md["numbers"]).to eq("123")
|
90
|
+
end
|
91
|
+
|
92
|
+
it "allows access by symbol names when there are named groups" do
|
93
|
+
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
94
|
+
|
95
|
+
expect(md[:numbers]).to eq("123")
|
96
|
+
end
|
97
|
+
|
98
|
+
it "allows access by names and indices with mixed groups", :aggregate_failures do
|
99
|
+
md = RE2::Regexp.new('(?P<name>\w+)(\s*)(?P<numbers>\d+)').match("bob 123")
|
100
|
+
|
101
|
+
expect(md["name"]).to eq("bob")
|
102
|
+
expect(md[:name]).to eq("bob")
|
103
|
+
expect(md[2]).to eq(" ")
|
104
|
+
expect(md["numbers"]).to eq("123")
|
105
|
+
expect(md[:numbers]).to eq("123")
|
106
|
+
end
|
107
|
+
|
108
|
+
it "returns nil if no such named group exists", :aggregate_failures do
|
109
|
+
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
110
|
+
|
111
|
+
expect(md["missing"]).to be_nil
|
112
|
+
expect(md[:missing]).to be_nil
|
113
|
+
end
|
114
|
+
|
115
|
+
it "raises an error if given an inappropriate index" do
|
116
|
+
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
117
|
+
|
118
|
+
expect { md[nil] }.to raise_error(TypeError)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "returns UTF-8 encoded strings by default", :aggregate_failures do
|
122
|
+
md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
|
123
|
+
|
124
|
+
expect(md[0].encoding.name).to eq("UTF-8")
|
125
|
+
expect(md["name"].encoding.name).to eq("UTF-8")
|
126
|
+
expect(md[:name].encoding.name).to eq("UTF-8")
|
127
|
+
end
|
128
|
+
|
129
|
+
it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do
|
130
|
+
md = RE2::Regexp.new('(?P<name>\S+)', utf8: false).match('bob')
|
131
|
+
|
132
|
+
expect(md[0].encoding.name).to eq("ISO-8859-1")
|
133
|
+
expect(md["name"].encoding.name).to eq("ISO-8859-1")
|
134
|
+
expect(md[:name].encoding.name).to eq("ISO-8859-1")
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
describe "#string" do
|
139
|
+
it "returns the original string to match against" do
|
140
|
+
re = RE2::Regexp.new('(\D+)').match("bob")
|
141
|
+
|
142
|
+
expect(re.string).to eq("bob")
|
143
|
+
end
|
144
|
+
|
145
|
+
it "returns a copy, not the actual original" do
|
146
|
+
string = +"bob"
|
147
|
+
re = RE2::Regexp.new('(\D+)').match(string)
|
148
|
+
|
149
|
+
expect(re.string).to_not equal(string)
|
150
|
+
end
|
151
|
+
|
152
|
+
it "returns a frozen string" do
|
153
|
+
re = RE2::Regexp.new('(\D+)').match("bob")
|
154
|
+
|
155
|
+
expect(re.string).to be_frozen
|
156
|
+
end
|
157
|
+
|
158
|
+
it "does not copy the string if it was already frozen" do
|
159
|
+
string = "bob"
|
160
|
+
re = RE2::Regexp.new('(\D+)').match(string)
|
161
|
+
|
162
|
+
expect(re.string).to equal(string)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
describe "#size" do
|
167
|
+
it "returns the number of capturing groups plus the matching string" do
|
168
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
169
|
+
|
170
|
+
expect(md.size).to eq(3)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe "#length" do
|
175
|
+
it "returns the number of capturing groups plus the matching string" do
|
176
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
177
|
+
|
178
|
+
expect(md.length).to eq(3)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
describe "#regexp" do
|
183
|
+
it "returns the original RE2::Regexp used" do
|
184
|
+
re = RE2::Regexp.new('(\d+)')
|
185
|
+
md = re.match("123")
|
186
|
+
|
187
|
+
expect(md.regexp).to equal(re)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
describe "#inspect" do
|
192
|
+
it "returns a text representation of the object and indices" do
|
193
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
194
|
+
|
195
|
+
expect(md.inspect).to eq('#<RE2::MatchData "1234 56" 1:"1234" 2:"56">')
|
196
|
+
end
|
197
|
+
|
198
|
+
it "represents missing matches as nil" do
|
199
|
+
md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ")
|
200
|
+
|
201
|
+
expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
|
202
|
+
end
|
203
|
+
|
204
|
+
it "supports matches with null bytes" do
|
205
|
+
md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
|
206
|
+
|
207
|
+
expect(md.inspect).to eq("#<RE2::MatchData \"a\0b c\0d\" 1:\"a\0b\" 2:\"c\0d\">")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
describe "#to_s" do
|
212
|
+
it "returns the matching part of the original string" do
|
213
|
+
md = RE2::Regexp.new('(\d{2,5})').match("one two 23456")
|
214
|
+
|
215
|
+
expect(md.to_s).to eq("23456")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
describe "#to_ary" do
|
220
|
+
it "allows the object to be expanded with an asterisk", :aggregate_failures do
|
221
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
222
|
+
m1, m2, m3 = *md
|
223
|
+
|
224
|
+
expect(m1).to eq("1234 56")
|
225
|
+
expect(m2).to eq("1234")
|
226
|
+
expect(m3).to eq("56")
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
describe "#begin" do
|
231
|
+
it "returns the offset of the start of a match by index" do
|
232
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
233
|
+
|
234
|
+
expect(md.string[md.begin(0)..-1]).to eq('woohoo')
|
235
|
+
end
|
236
|
+
|
237
|
+
it "returns the offset of the start of a match by string name" do
|
238
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
239
|
+
|
240
|
+
expect(md.string[md.begin('foo')..-1]).to eq('foobar')
|
241
|
+
end
|
242
|
+
|
243
|
+
it "returns the offset of the start of a match by symbol name" do
|
244
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
245
|
+
|
246
|
+
expect(md.string[md.begin(:foo)..-1]).to eq('foobar')
|
247
|
+
end
|
248
|
+
|
249
|
+
it "returns the offset of the start of a match by something that can be coerced to a String" do
|
250
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
251
|
+
|
252
|
+
expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar')
|
253
|
+
end
|
254
|
+
|
255
|
+
it "returns the offset despite multibyte characters" do
|
256
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
257
|
+
|
258
|
+
expect(md.string[md.begin(0)..-1]).to eq('Ruby')
|
259
|
+
end
|
260
|
+
|
261
|
+
it "returns nil for non-existent numerical matches" do
|
262
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
263
|
+
|
264
|
+
expect(md.begin(10)).to be_nil
|
265
|
+
end
|
266
|
+
|
267
|
+
it "returns nil for negative numerical matches" do
|
268
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
269
|
+
|
270
|
+
expect(md.begin(-4)).to be_nil
|
271
|
+
end
|
272
|
+
|
273
|
+
it "returns nil for non-existent named matches" do
|
274
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
275
|
+
|
276
|
+
expect(md.begin('foo')).to be_nil
|
277
|
+
end
|
278
|
+
|
279
|
+
it "returns nil for non-existent symbol named matches" do
|
280
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
281
|
+
|
282
|
+
expect(md.begin(:foo)).to be_nil
|
283
|
+
end
|
284
|
+
|
285
|
+
it "raises a type error if given an invalid name or number" do
|
286
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
287
|
+
|
288
|
+
expect { md.begin(nil) }.to raise_error(TypeError)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
describe "#end" do
|
293
|
+
it "returns the offset of the character following the end of a match" do
|
294
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
295
|
+
|
296
|
+
expect(md.string[0...md.end(0)]).to eq('a woo')
|
297
|
+
end
|
298
|
+
|
299
|
+
it "returns the offset of a match by string name" do
|
300
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
301
|
+
|
302
|
+
expect(md.string[0...md.end('foo')]).to eq('a foo')
|
303
|
+
end
|
304
|
+
|
305
|
+
it "returns the offset of a match by symbol name" do
|
306
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
307
|
+
|
308
|
+
expect(md.string[0...md.end(:foo)]).to eq('a foo')
|
309
|
+
end
|
310
|
+
|
311
|
+
it "returns the offset of a match by something that can be coerced to a String" do
|
312
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
313
|
+
|
314
|
+
expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo')
|
315
|
+
end
|
316
|
+
|
317
|
+
it "returns the offset despite multibyte characters" do
|
318
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
319
|
+
|
320
|
+
expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
|
321
|
+
end
|
322
|
+
|
323
|
+
it "returns nil for non-existent numerical matches" do
|
324
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
325
|
+
|
326
|
+
expect(md.end(10)).to be_nil
|
327
|
+
end
|
328
|
+
|
329
|
+
it "returns nil for negative numerical matches" do
|
330
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
331
|
+
|
332
|
+
expect(md.end(-4)).to be_nil
|
333
|
+
end
|
334
|
+
|
335
|
+
it "returns nil for non-existent named matches" do
|
336
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
337
|
+
|
338
|
+
expect(md.end('foo')).to be_nil
|
339
|
+
end
|
340
|
+
|
341
|
+
it "returns nil for non-existent symbol named matches" do
|
342
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
343
|
+
|
344
|
+
expect(md.end(:foo)).to be_nil
|
345
|
+
end
|
346
|
+
|
347
|
+
it "raises a type error if given an invalid name or number" do
|
348
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
349
|
+
|
350
|
+
expect { md.end(nil) }.to raise_error(TypeError)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
describe "#deconstruct" do
|
355
|
+
it "returns all capturing groups" do
|
356
|
+
md = RE2::Regexp.new('w(o)(o)').match('woo')
|
357
|
+
|
358
|
+
expect(md.deconstruct).to eq(['o', 'o'])
|
359
|
+
end
|
360
|
+
|
361
|
+
it "includes optional capturing groups as nil" do
|
362
|
+
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
|
363
|
+
|
364
|
+
expect(md.deconstruct).to eq(['o', 'o', nil])
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
describe "#deconstruct_keys" do
|
369
|
+
it "returns all named captures if given nil" do
|
370
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
371
|
+
|
372
|
+
expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc')
|
373
|
+
end
|
374
|
+
|
375
|
+
it "returns only named captures if given names" do
|
376
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
377
|
+
|
378
|
+
expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123')
|
379
|
+
end
|
380
|
+
|
381
|
+
it "returns named captures up until an invalid name is given" do
|
382
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
383
|
+
|
384
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123')
|
385
|
+
end
|
386
|
+
|
387
|
+
it "returns an empty hash if given more capture names than exist" do
|
388
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
389
|
+
|
390
|
+
expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
|
391
|
+
end
|
392
|
+
|
393
|
+
it "returns an empty hash if there are no named capturing groups" do
|
394
|
+
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
|
395
|
+
|
396
|
+
expect(md.deconstruct_keys(nil)).to eq({})
|
397
|
+
end
|
398
|
+
|
399
|
+
it "raises an error if given a non-array of keys" do
|
400
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
401
|
+
|
402
|
+
expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
|
403
|
+
end
|
404
|
+
|
405
|
+
it "raises an error if given keys as non-symbols" do
|
406
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
407
|
+
|
408
|
+
expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|