re2 2.15.0.rc1-x86-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/Gemfile +11 -0
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +28 -0
- data/README.md +396 -0
- data/Rakefile +94 -0
- data/dependencies.yml +7 -0
- data/ext/re2/extconf.rb +332 -0
- data/ext/re2/re2.cc +2254 -0
- data/ext/re2/recipes.rb +54 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +26 -0
- data/lib/re2/string.rb +38 -0
- data/lib/re2/version.rb +14 -0
- data/lib/re2.rb +20 -0
- data/re2.gemspec +47 -0
- data/spec/kernel_spec.rb +37 -0
- data/spec/re2/match_data_spec.rb +411 -0
- data/spec/re2/regexp_spec.rb +911 -0
- data/spec/re2/scanner_spec.rb +275 -0
- data/spec/re2/set_spec.rb +231 -0
- data/spec/re2/string_spec.rb +62 -0
- data/spec/re2_spec.rb +201 -0
- data/spec/spec_helper.rb +31 -0
- metadata +129 -0
@@ -0,0 +1,411 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'objspace'
|
4
|
+
|
5
|
+
RSpec.describe RE2::MatchData do
|
6
|
+
it "reports a larger consuming memory size when it has more matches" do
|
7
|
+
matches1 = RE2::Regexp.new('w(o)').match('woo')
|
8
|
+
matches2 = RE2::Regexp.new('w(o)(o)').match('woo')
|
9
|
+
|
10
|
+
expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#to_a" do
|
14
|
+
it "is populated with the match and capturing groups" do
|
15
|
+
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
16
|
+
|
17
|
+
expect(a).to eq(["woo", "o", "o"])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "populates optional capturing groups with nil if they are missing" do
|
21
|
+
a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
|
22
|
+
|
23
|
+
expect(a).to eq(["ab", nil, "a", "b"])
|
24
|
+
end
|
25
|
+
|
26
|
+
it "returns UTF-8 strings if the pattern is UTF-8" do
|
27
|
+
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
28
|
+
|
29
|
+
expect(a.map(&:encoding)).to all eq(Encoding::UTF_8)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
|
33
|
+
a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a
|
34
|
+
|
35
|
+
expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "#[]" do
|
40
|
+
it "accesses capturing groups by numerical index", :aggregate_failures do
|
41
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
42
|
+
|
43
|
+
expect(md[1]).to eq("1")
|
44
|
+
expect(md[2]).to eq("23")
|
45
|
+
end
|
46
|
+
|
47
|
+
it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do
|
48
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
49
|
+
|
50
|
+
expect(md[1].encoding).to eq(Encoding::UTF_8)
|
51
|
+
end
|
52
|
+
|
53
|
+
it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
|
54
|
+
md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123")
|
55
|
+
|
56
|
+
expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "has the whole match as the 0th item" do
|
60
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
61
|
+
|
62
|
+
expect(md[0]).to eq("123")
|
63
|
+
end
|
64
|
+
|
65
|
+
it "supports access by numerical ranges", :aggregate_failures do
|
66
|
+
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
67
|
+
|
68
|
+
expect(md[1..3]).to eq(["123", "456", "789"])
|
69
|
+
expect(md[1...3]).to eq(["123", "456"])
|
70
|
+
end
|
71
|
+
|
72
|
+
it "supports slicing", :aggregate_failures do
|
73
|
+
md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
|
74
|
+
|
75
|
+
expect(md[1, 3]).to eq(["123", "456", "789"])
|
76
|
+
expect(md[1, 2]).to eq(["123", "456"])
|
77
|
+
end
|
78
|
+
|
79
|
+
it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do
|
80
|
+
md = RE2::Regexp.new('(\d+)').match('bob 123')
|
81
|
+
|
82
|
+
expect(md[2]).to be_nil
|
83
|
+
expect(md[3]).to be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it "allows access by string names when there are named groups" do
|
87
|
+
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
88
|
+
|
89
|
+
expect(md["numbers"]).to eq("123")
|
90
|
+
end
|
91
|
+
|
92
|
+
it "allows access by symbol names when there are named groups" do
|
93
|
+
md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
|
94
|
+
|
95
|
+
expect(md[:numbers]).to eq("123")
|
96
|
+
end
|
97
|
+
|
98
|
+
it "allows access by names and indices with mixed groups", :aggregate_failures do
|
99
|
+
md = RE2::Regexp.new('(?P<name>\w+)(\s*)(?P<numbers>\d+)').match("bob 123")
|
100
|
+
|
101
|
+
expect(md["name"]).to eq("bob")
|
102
|
+
expect(md[:name]).to eq("bob")
|
103
|
+
expect(md[2]).to eq(" ")
|
104
|
+
expect(md["numbers"]).to eq("123")
|
105
|
+
expect(md[:numbers]).to eq("123")
|
106
|
+
end
|
107
|
+
|
108
|
+
it "returns nil if no such named group exists", :aggregate_failures do
|
109
|
+
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
110
|
+
|
111
|
+
expect(md["missing"]).to be_nil
|
112
|
+
expect(md[:missing]).to be_nil
|
113
|
+
end
|
114
|
+
|
115
|
+
it "raises an error if given an inappropriate index" do
|
116
|
+
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
117
|
+
|
118
|
+
expect { md[nil] }.to raise_error(TypeError)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "returns UTF-8 encoded strings by default", :aggregate_failures do
|
122
|
+
md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
|
123
|
+
|
124
|
+
expect(md[0].encoding.name).to eq("UTF-8")
|
125
|
+
expect(md["name"].encoding.name).to eq("UTF-8")
|
126
|
+
expect(md[:name].encoding.name).to eq("UTF-8")
|
127
|
+
end
|
128
|
+
|
129
|
+
it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do
|
130
|
+
md = RE2::Regexp.new('(?P<name>\S+)', utf8: false).match('bob')
|
131
|
+
|
132
|
+
expect(md[0].encoding.name).to eq("ISO-8859-1")
|
133
|
+
expect(md["name"].encoding.name).to eq("ISO-8859-1")
|
134
|
+
expect(md[:name].encoding.name).to eq("ISO-8859-1")
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
describe "#string" do
|
139
|
+
it "returns the original string to match against" do
|
140
|
+
re = RE2::Regexp.new('(\D+)').match("bob")
|
141
|
+
|
142
|
+
expect(re.string).to eq("bob")
|
143
|
+
end
|
144
|
+
|
145
|
+
it "returns a copy, not the actual original" do
|
146
|
+
string = +"bob"
|
147
|
+
re = RE2::Regexp.new('(\D+)').match(string)
|
148
|
+
|
149
|
+
expect(re.string).to_not equal(string)
|
150
|
+
end
|
151
|
+
|
152
|
+
it "returns a frozen string" do
|
153
|
+
re = RE2::Regexp.new('(\D+)').match("bob")
|
154
|
+
|
155
|
+
expect(re.string).to be_frozen
|
156
|
+
end
|
157
|
+
|
158
|
+
it "does not copy the string if it was already frozen" do
|
159
|
+
string = "bob"
|
160
|
+
re = RE2::Regexp.new('(\D+)').match(string)
|
161
|
+
|
162
|
+
expect(re.string).to equal(string)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
describe "#size" do
|
167
|
+
it "returns the number of capturing groups plus the matching string" do
|
168
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
169
|
+
|
170
|
+
expect(md.size).to eq(3)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe "#length" do
|
175
|
+
it "returns the number of capturing groups plus the matching string" do
|
176
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
177
|
+
|
178
|
+
expect(md.length).to eq(3)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
describe "#regexp" do
|
183
|
+
it "returns the original RE2::Regexp used" do
|
184
|
+
re = RE2::Regexp.new('(\d+)')
|
185
|
+
md = re.match("123")
|
186
|
+
|
187
|
+
expect(md.regexp).to equal(re)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
describe "#inspect" do
|
192
|
+
it "returns a text representation of the object and indices" do
|
193
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
194
|
+
|
195
|
+
expect(md.inspect).to eq('#<RE2::MatchData "1234 56" 1:"1234" 2:"56">')
|
196
|
+
end
|
197
|
+
|
198
|
+
it "represents missing matches as nil" do
|
199
|
+
md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ")
|
200
|
+
|
201
|
+
expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
|
202
|
+
end
|
203
|
+
|
204
|
+
it "supports matches with null bytes" do
|
205
|
+
md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
|
206
|
+
|
207
|
+
expect(md.inspect).to eq("#<RE2::MatchData \"a\0b c\0d\" 1:\"a\0b\" 2:\"c\0d\">")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
describe "#to_s" do
|
212
|
+
it "returns the matching part of the original string" do
|
213
|
+
md = RE2::Regexp.new('(\d{2,5})').match("one two 23456")
|
214
|
+
|
215
|
+
expect(md.to_s).to eq("23456")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
describe "#to_ary" do
|
220
|
+
it "allows the object to be expanded with an asterisk", :aggregate_failures do
|
221
|
+
md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
|
222
|
+
m1, m2, m3 = *md
|
223
|
+
|
224
|
+
expect(m1).to eq("1234 56")
|
225
|
+
expect(m2).to eq("1234")
|
226
|
+
expect(m3).to eq("56")
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
describe "#begin" do
|
231
|
+
it "returns the offset of the start of a match by index" do
|
232
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
233
|
+
|
234
|
+
expect(md.string[md.begin(0)..-1]).to eq('woohoo')
|
235
|
+
end
|
236
|
+
|
237
|
+
it "returns the offset of the start of a match by string name" do
|
238
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
239
|
+
|
240
|
+
expect(md.string[md.begin('foo')..-1]).to eq('foobar')
|
241
|
+
end
|
242
|
+
|
243
|
+
it "returns the offset of the start of a match by symbol name" do
|
244
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
245
|
+
|
246
|
+
expect(md.string[md.begin(:foo)..-1]).to eq('foobar')
|
247
|
+
end
|
248
|
+
|
249
|
+
it "returns the offset of the start of a match by something that can be coerced to a String" do
|
250
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
251
|
+
|
252
|
+
expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar')
|
253
|
+
end
|
254
|
+
|
255
|
+
it "returns the offset despite multibyte characters" do
|
256
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
257
|
+
|
258
|
+
expect(md.string[md.begin(0)..-1]).to eq('Ruby')
|
259
|
+
end
|
260
|
+
|
261
|
+
it "returns nil for non-existent numerical matches" do
|
262
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
263
|
+
|
264
|
+
expect(md.begin(10)).to be_nil
|
265
|
+
end
|
266
|
+
|
267
|
+
it "returns nil for negative numerical matches" do
|
268
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
269
|
+
|
270
|
+
expect(md.begin(-4)).to be_nil
|
271
|
+
end
|
272
|
+
|
273
|
+
it "returns nil for non-existent named matches" do
|
274
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
275
|
+
|
276
|
+
expect(md.begin('foo')).to be_nil
|
277
|
+
end
|
278
|
+
|
279
|
+
it "returns nil for non-existent symbol named matches" do
|
280
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
281
|
+
|
282
|
+
expect(md.begin(:foo)).to be_nil
|
283
|
+
end
|
284
|
+
|
285
|
+
it "raises a type error if given an invalid name or number" do
|
286
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
287
|
+
|
288
|
+
expect { md.begin(nil) }.to raise_error(TypeError)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
describe "#end" do
|
293
|
+
it "returns the offset of the character following the end of a match" do
|
294
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
295
|
+
|
296
|
+
expect(md.string[0...md.end(0)]).to eq('a woo')
|
297
|
+
end
|
298
|
+
|
299
|
+
it "returns the offset of a match by string name" do
|
300
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
301
|
+
|
302
|
+
expect(md.string[0...md.end('foo')]).to eq('a foo')
|
303
|
+
end
|
304
|
+
|
305
|
+
it "returns the offset of a match by symbol name" do
|
306
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
307
|
+
|
308
|
+
expect(md.string[0...md.end(:foo)]).to eq('a foo')
|
309
|
+
end
|
310
|
+
|
311
|
+
it "returns the offset of a match by something that can be coerced to a String" do
|
312
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
313
|
+
|
314
|
+
expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo')
|
315
|
+
end
|
316
|
+
|
317
|
+
it "returns the offset despite multibyte characters" do
|
318
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
319
|
+
|
320
|
+
expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
|
321
|
+
end
|
322
|
+
|
323
|
+
it "returns nil for non-existent numerical matches" do
|
324
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
325
|
+
|
326
|
+
expect(md.end(10)).to be_nil
|
327
|
+
end
|
328
|
+
|
329
|
+
it "returns nil for negative numerical matches" do
|
330
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
331
|
+
|
332
|
+
expect(md.end(-4)).to be_nil
|
333
|
+
end
|
334
|
+
|
335
|
+
it "returns nil for non-existent named matches" do
|
336
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
337
|
+
|
338
|
+
expect(md.end('foo')).to be_nil
|
339
|
+
end
|
340
|
+
|
341
|
+
it "returns nil for non-existent symbol named matches" do
|
342
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
343
|
+
|
344
|
+
expect(md.end(:foo)).to be_nil
|
345
|
+
end
|
346
|
+
|
347
|
+
it "raises a type error if given an invalid name or number" do
|
348
|
+
md = RE2::Regexp.new('(\d)').match('123')
|
349
|
+
|
350
|
+
expect { md.end(nil) }.to raise_error(TypeError)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
describe "#deconstruct" do
|
355
|
+
it "returns all capturing groups" do
|
356
|
+
md = RE2::Regexp.new('w(o)(o)').match('woo')
|
357
|
+
|
358
|
+
expect(md.deconstruct).to eq(['o', 'o'])
|
359
|
+
end
|
360
|
+
|
361
|
+
it "includes optional capturing groups as nil" do
|
362
|
+
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
|
363
|
+
|
364
|
+
expect(md.deconstruct).to eq(['o', 'o', nil])
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
describe "#deconstruct_keys" do
|
369
|
+
it "returns all named captures if given nil" do
|
370
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
371
|
+
|
372
|
+
expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc')
|
373
|
+
end
|
374
|
+
|
375
|
+
it "returns only named captures if given names" do
|
376
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
377
|
+
|
378
|
+
expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123')
|
379
|
+
end
|
380
|
+
|
381
|
+
it "returns named captures up until an invalid name is given" do
|
382
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
383
|
+
|
384
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123')
|
385
|
+
end
|
386
|
+
|
387
|
+
it "returns an empty hash if given more capture names than exist" do
|
388
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
389
|
+
|
390
|
+
expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
|
391
|
+
end
|
392
|
+
|
393
|
+
it "returns an empty hash if there are no named capturing groups" do
|
394
|
+
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
|
395
|
+
|
396
|
+
expect(md.deconstruct_keys(nil)).to eq({})
|
397
|
+
end
|
398
|
+
|
399
|
+
it "raises an error if given a non-array of keys" do
|
400
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
401
|
+
|
402
|
+
expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
|
403
|
+
end
|
404
|
+
|
405
|
+
it "raises an error if given keys as non-symbols" do
|
406
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
407
|
+
|
408
|
+
expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|