re2 2.15.0.rc1-aarch64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,411 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'objspace'
4
+
5
+ RSpec.describe RE2::MatchData do
6
+ it "reports a larger consuming memory size when it has more matches" do
7
+ matches1 = RE2::Regexp.new('w(o)').match('woo')
8
+ matches2 = RE2::Regexp.new('w(o)(o)').match('woo')
9
+
10
+ expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2)
11
+ end
12
+
13
+ describe "#to_a" do
14
+ it "is populated with the match and capturing groups" do
15
+ a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
16
+
17
+ expect(a).to eq(["woo", "o", "o"])
18
+ end
19
+
20
+ it "populates optional capturing groups with nil if they are missing" do
21
+ a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
22
+
23
+ expect(a).to eq(["ab", nil, "a", "b"])
24
+ end
25
+
26
+ it "returns UTF-8 strings if the pattern is UTF-8" do
27
+ a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
28
+
29
+ expect(a.map(&:encoding)).to all eq(Encoding::UTF_8)
30
+ end
31
+
32
+ it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
33
+ a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a
34
+
35
+ expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
36
+ end
37
+ end
38
+
39
+ describe "#[]" do
40
+ it "accesses capturing groups by numerical index", :aggregate_failures do
41
+ md = RE2::Regexp.new('(\d)(\d{2})').match("123")
42
+
43
+ expect(md[1]).to eq("1")
44
+ expect(md[2]).to eq("23")
45
+ end
46
+
47
+ it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do
48
+ md = RE2::Regexp.new('(\d)(\d{2})').match("123")
49
+
50
+ expect(md[1].encoding).to eq(Encoding::UTF_8)
51
+ end
52
+
53
+ it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
54
+ md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123")
55
+
56
+ expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
57
+ end
58
+
59
+ it "has the whole match as the 0th item" do
60
+ md = RE2::Regexp.new('(\d)(\d{2})').match("123")
61
+
62
+ expect(md[0]).to eq("123")
63
+ end
64
+
65
+ it "supports access by numerical ranges", :aggregate_failures do
66
+ md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
67
+
68
+ expect(md[1..3]).to eq(["123", "456", "789"])
69
+ expect(md[1...3]).to eq(["123", "456"])
70
+ end
71
+
72
+ it "supports slicing", :aggregate_failures do
73
+ md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
74
+
75
+ expect(md[1, 3]).to eq(["123", "456", "789"])
76
+ expect(md[1, 2]).to eq(["123", "456"])
77
+ end
78
+
79
+ it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do
80
+ md = RE2::Regexp.new('(\d+)').match('bob 123')
81
+
82
+ expect(md[2]).to be_nil
83
+ expect(md[3]).to be_nil
84
+ end
85
+
86
+ it "allows access by string names when there are named groups" do
87
+ md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
88
+
89
+ expect(md["numbers"]).to eq("123")
90
+ end
91
+
92
+ it "allows access by symbol names when there are named groups" do
93
+ md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
94
+
95
+ expect(md[:numbers]).to eq("123")
96
+ end
97
+
98
+ it "allows access by names and indices with mixed groups", :aggregate_failures do
99
+ md = RE2::Regexp.new('(?P<name>\w+)(\s*)(?P<numbers>\d+)').match("bob 123")
100
+
101
+ expect(md["name"]).to eq("bob")
102
+ expect(md[:name]).to eq("bob")
103
+ expect(md[2]).to eq(" ")
104
+ expect(md["numbers"]).to eq("123")
105
+ expect(md[:numbers]).to eq("123")
106
+ end
107
+
108
+ it "returns nil if no such named group exists", :aggregate_failures do
109
+ md = RE2::Regexp.new('(\d+)').match("bob 123")
110
+
111
+ expect(md["missing"]).to be_nil
112
+ expect(md[:missing]).to be_nil
113
+ end
114
+
115
+ it "raises an error if given an inappropriate index" do
116
+ md = RE2::Regexp.new('(\d+)').match("bob 123")
117
+
118
+ expect { md[nil] }.to raise_error(TypeError)
119
+ end
120
+
121
+ it "returns UTF-8 encoded strings by default", :aggregate_failures do
122
+ md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
123
+
124
+ expect(md[0].encoding.name).to eq("UTF-8")
125
+ expect(md["name"].encoding.name).to eq("UTF-8")
126
+ expect(md[:name].encoding.name).to eq("UTF-8")
127
+ end
128
+
129
+ it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do
130
+ md = RE2::Regexp.new('(?P<name>\S+)', utf8: false).match('bob')
131
+
132
+ expect(md[0].encoding.name).to eq("ISO-8859-1")
133
+ expect(md["name"].encoding.name).to eq("ISO-8859-1")
134
+ expect(md[:name].encoding.name).to eq("ISO-8859-1")
135
+ end
136
+ end
137
+
138
+ describe "#string" do
139
+ it "returns the original string to match against" do
140
+ re = RE2::Regexp.new('(\D+)').match("bob")
141
+
142
+ expect(re.string).to eq("bob")
143
+ end
144
+
145
+ it "returns a copy, not the actual original" do
146
+ string = +"bob"
147
+ re = RE2::Regexp.new('(\D+)').match(string)
148
+
149
+ expect(re.string).to_not equal(string)
150
+ end
151
+
152
+ it "returns a frozen string" do
153
+ re = RE2::Regexp.new('(\D+)').match("bob")
154
+
155
+ expect(re.string).to be_frozen
156
+ end
157
+
158
+ it "does not copy the string if it was already frozen" do
159
+ string = "bob"
160
+ re = RE2::Regexp.new('(\D+)').match(string)
161
+
162
+ expect(re.string).to equal(string)
163
+ end
164
+ end
165
+
166
+ describe "#size" do
167
+ it "returns the number of capturing groups plus the matching string" do
168
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
169
+
170
+ expect(md.size).to eq(3)
171
+ end
172
+ end
173
+
174
+ describe "#length" do
175
+ it "returns the number of capturing groups plus the matching string" do
176
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
177
+
178
+ expect(md.length).to eq(3)
179
+ end
180
+ end
181
+
182
+ describe "#regexp" do
183
+ it "returns the original RE2::Regexp used" do
184
+ re = RE2::Regexp.new('(\d+)')
185
+ md = re.match("123")
186
+
187
+ expect(md.regexp).to equal(re)
188
+ end
189
+ end
190
+
191
+ describe "#inspect" do
192
+ it "returns a text representation of the object and indices" do
193
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
194
+
195
+ expect(md.inspect).to eq('#<RE2::MatchData "1234 56" 1:"1234" 2:"56">')
196
+ end
197
+
198
+ it "represents missing matches as nil" do
199
+ md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ")
200
+
201
+ expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
202
+ end
203
+
204
+ it "supports matches with null bytes" do
205
+ md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
206
+
207
+ expect(md.inspect).to eq("#<RE2::MatchData \"a\0b c\0d\" 1:\"a\0b\" 2:\"c\0d\">")
208
+ end
209
+ end
210
+
211
+ describe "#to_s" do
212
+ it "returns the matching part of the original string" do
213
+ md = RE2::Regexp.new('(\d{2,5})').match("one two 23456")
214
+
215
+ expect(md.to_s).to eq("23456")
216
+ end
217
+ end
218
+
219
+ describe "#to_ary" do
220
+ it "allows the object to be expanded with an asterisk", :aggregate_failures do
221
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
222
+ m1, m2, m3 = *md
223
+
224
+ expect(m1).to eq("1234 56")
225
+ expect(m2).to eq("1234")
226
+ expect(m3).to eq("56")
227
+ end
228
+ end
229
+
230
+ describe "#begin" do
231
+ it "returns the offset of the start of a match by index" do
232
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
233
+
234
+ expect(md.string[md.begin(0)..-1]).to eq('woohoo')
235
+ end
236
+
237
+ it "returns the offset of the start of a match by string name" do
238
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
239
+
240
+ expect(md.string[md.begin('foo')..-1]).to eq('foobar')
241
+ end
242
+
243
+ it "returns the offset of the start of a match by symbol name" do
244
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
245
+
246
+ expect(md.string[md.begin(:foo)..-1]).to eq('foobar')
247
+ end
248
+
249
+ it "returns the offset of the start of a match by something that can be coerced to a String" do
250
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
251
+
252
+ expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar')
253
+ end
254
+
255
+ it "returns the offset despite multibyte characters" do
256
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
257
+
258
+ expect(md.string[md.begin(0)..-1]).to eq('Ruby')
259
+ end
260
+
261
+ it "returns nil for non-existent numerical matches" do
262
+ md = RE2::Regexp.new('(\d)').match('123')
263
+
264
+ expect(md.begin(10)).to be_nil
265
+ end
266
+
267
+ it "returns nil for negative numerical matches" do
268
+ md = RE2::Regexp.new('(\d)').match('123')
269
+
270
+ expect(md.begin(-4)).to be_nil
271
+ end
272
+
273
+ it "returns nil for non-existent named matches" do
274
+ md = RE2::Regexp.new('(\d)').match('123')
275
+
276
+ expect(md.begin('foo')).to be_nil
277
+ end
278
+
279
+ it "returns nil for non-existent symbol named matches" do
280
+ md = RE2::Regexp.new('(\d)').match('123')
281
+
282
+ expect(md.begin(:foo)).to be_nil
283
+ end
284
+
285
+ it "raises a type error if given an invalid name or number" do
286
+ md = RE2::Regexp.new('(\d)').match('123')
287
+
288
+ expect { md.begin(nil) }.to raise_error(TypeError)
289
+ end
290
+ end
291
+
292
+ describe "#end" do
293
+ it "returns the offset of the character following the end of a match" do
294
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
295
+
296
+ expect(md.string[0...md.end(0)]).to eq('a woo')
297
+ end
298
+
299
+ it "returns the offset of a match by string name" do
300
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
301
+
302
+ expect(md.string[0...md.end('foo')]).to eq('a foo')
303
+ end
304
+
305
+ it "returns the offset of a match by symbol name" do
306
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
307
+
308
+ expect(md.string[0...md.end(:foo)]).to eq('a foo')
309
+ end
310
+
311
+ it "returns the offset of a match by something that can be coerced to a String" do
312
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
313
+
314
+ expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo')
315
+ end
316
+
317
+ it "returns the offset despite multibyte characters" do
318
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
319
+
320
+ expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
321
+ end
322
+
323
+ it "returns nil for non-existent numerical matches" do
324
+ md = RE2::Regexp.new('(\d)').match('123')
325
+
326
+ expect(md.end(10)).to be_nil
327
+ end
328
+
329
+ it "returns nil for negative numerical matches" do
330
+ md = RE2::Regexp.new('(\d)').match('123')
331
+
332
+ expect(md.end(-4)).to be_nil
333
+ end
334
+
335
+ it "returns nil for non-existent named matches" do
336
+ md = RE2::Regexp.new('(\d)').match('123')
337
+
338
+ expect(md.end('foo')).to be_nil
339
+ end
340
+
341
+ it "returns nil for non-existent symbol named matches" do
342
+ md = RE2::Regexp.new('(\d)').match('123')
343
+
344
+ expect(md.end(:foo)).to be_nil
345
+ end
346
+
347
+ it "raises a type error if given an invalid name or number" do
348
+ md = RE2::Regexp.new('(\d)').match('123')
349
+
350
+ expect { md.end(nil) }.to raise_error(TypeError)
351
+ end
352
+ end
353
+
354
+ describe "#deconstruct" do
355
+ it "returns all capturing groups" do
356
+ md = RE2::Regexp.new('w(o)(o)').match('woo')
357
+
358
+ expect(md.deconstruct).to eq(['o', 'o'])
359
+ end
360
+
361
+ it "includes optional capturing groups as nil" do
362
+ md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
363
+
364
+ expect(md.deconstruct).to eq(['o', 'o', nil])
365
+ end
366
+ end
367
+
368
+ describe "#deconstruct_keys" do
369
+ it "returns all named captures if given nil" do
370
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
371
+
372
+ expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc')
373
+ end
374
+
375
+ it "returns only named captures if given names" do
376
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
377
+
378
+ expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123')
379
+ end
380
+
381
+ it "returns named captures up until an invalid name is given" do
382
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
383
+
384
+ expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123')
385
+ end
386
+
387
+ it "returns an empty hash if given more capture names than exist" do
388
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
389
+
390
+ expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
391
+ end
392
+
393
+ it "returns an empty hash if there are no named capturing groups" do
394
+ md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
395
+
396
+ expect(md.deconstruct_keys(nil)).to eq({})
397
+ end
398
+
399
+ it "raises an error if given a non-array of keys" do
400
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
401
+
402
+ expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
403
+ end
404
+
405
+ it "raises an error if given keys as non-symbols" do
406
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
407
+
408
+ expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
409
+ end
410
+ end
411
+ end