re2 2.15.0.rc1-x86-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'objspace'
4
+
5
+ RSpec.describe RE2::MatchData do
6
+ it "reports a larger consuming memory size when it has more matches" do
7
+ matches1 = RE2::Regexp.new('w(o)').match('woo')
8
+ matches2 = RE2::Regexp.new('w(o)(o)').match('woo')
9
+
10
+ expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2)
11
+ end
12
+
13
+ describe "#to_a" do
14
+ it "is populated with the match and capturing groups" do
15
+ a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
16
+
17
+ expect(a).to eq(["woo", "o", "o"])
18
+ end
19
+
20
+ it "populates optional capturing groups with nil if they are missing" do
21
+ a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
22
+
23
+ expect(a).to eq(["ab", nil, "a", "b"])
24
+ end
25
+
26
+ it "returns UTF-8 strings if the pattern is UTF-8" do
27
+ a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
28
+
29
+ expect(a.map(&:encoding)).to all eq(Encoding::UTF_8)
30
+ end
31
+
32
+ it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
33
+ a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a
34
+
35
+ expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
36
+ end
37
+ end
38
+
39
+ describe "#[]" do
40
+ it "accesses capturing groups by numerical index", :aggregate_failures do
41
+ md = RE2::Regexp.new('(\d)(\d{2})').match("123")
42
+
43
+ expect(md[1]).to eq("1")
44
+ expect(md[2]).to eq("23")
45
+ end
46
+
47
+ it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do
48
+ md = RE2::Regexp.new('(\d)(\d{2})').match("123")
49
+
50
+ expect(md[1].encoding).to eq(Encoding::UTF_8)
51
+ end
52
+
53
+ it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
54
+ md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123")
55
+
56
+ expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
57
+ end
58
+
59
+ it "has the whole match as the 0th item" do
60
+ md = RE2::Regexp.new('(\d)(\d{2})').match("123")
61
+
62
+ expect(md[0]).to eq("123")
63
+ end
64
+
65
+ it "supports access by numerical ranges", :aggregate_failures do
66
+ md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
67
+
68
+ expect(md[1..3]).to eq(["123", "456", "789"])
69
+ expect(md[1...3]).to eq(["123", "456"])
70
+ end
71
+
72
+ it "supports slicing", :aggregate_failures do
73
+ md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
74
+
75
+ expect(md[1, 3]).to eq(["123", "456", "789"])
76
+ expect(md[1, 2]).to eq(["123", "456"])
77
+ end
78
+
79
+ it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do
80
+ md = RE2::Regexp.new('(\d+)').match('bob 123')
81
+
82
+ expect(md[2]).to be_nil
83
+ expect(md[3]).to be_nil
84
+ end
85
+
86
+ it "allows access by string names when there are named groups" do
87
+ md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
88
+
89
+ expect(md["numbers"]).to eq("123")
90
+ end
91
+
92
+ it "allows access by symbol names when there are named groups" do
93
+ md = RE2::Regexp.new('(?P<numbers>\d+)').match('bob 123')
94
+
95
+ expect(md[:numbers]).to eq("123")
96
+ end
97
+
98
+ it "allows access by names and indices with mixed groups", :aggregate_failures do
99
+ md = RE2::Regexp.new('(?P<name>\w+)(\s*)(?P<numbers>\d+)').match("bob 123")
100
+
101
+ expect(md["name"]).to eq("bob")
102
+ expect(md[:name]).to eq("bob")
103
+ expect(md[2]).to eq(" ")
104
+ expect(md["numbers"]).to eq("123")
105
+ expect(md[:numbers]).to eq("123")
106
+ end
107
+
108
+ it "returns nil if no such named group exists", :aggregate_failures do
109
+ md = RE2::Regexp.new('(\d+)').match("bob 123")
110
+
111
+ expect(md["missing"]).to be_nil
112
+ expect(md[:missing]).to be_nil
113
+ end
114
+
115
+ it "raises an error if given an inappropriate index" do
116
+ md = RE2::Regexp.new('(\d+)').match("bob 123")
117
+
118
+ expect { md[nil] }.to raise_error(TypeError)
119
+ end
120
+
121
+ it "returns UTF-8 encoded strings by default", :aggregate_failures do
122
+ md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
123
+
124
+ expect(md[0].encoding.name).to eq("UTF-8")
125
+ expect(md["name"].encoding.name).to eq("UTF-8")
126
+ expect(md[:name].encoding.name).to eq("UTF-8")
127
+ end
128
+
129
+ it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do
130
+ md = RE2::Regexp.new('(?P<name>\S+)', utf8: false).match('bob')
131
+
132
+ expect(md[0].encoding.name).to eq("ISO-8859-1")
133
+ expect(md["name"].encoding.name).to eq("ISO-8859-1")
134
+ expect(md[:name].encoding.name).to eq("ISO-8859-1")
135
+ end
136
+ end
137
+
138
+ describe "#string" do
139
+ it "returns the original string to match against" do
140
+ re = RE2::Regexp.new('(\D+)').match("bob")
141
+
142
+ expect(re.string).to eq("bob")
143
+ end
144
+
145
+ it "returns a copy, not the actual original" do
146
+ string = +"bob"
147
+ re = RE2::Regexp.new('(\D+)').match(string)
148
+
149
+ expect(re.string).to_not equal(string)
150
+ end
151
+
152
+ it "returns a frozen string" do
153
+ re = RE2::Regexp.new('(\D+)').match("bob")
154
+
155
+ expect(re.string).to be_frozen
156
+ end
157
+
158
+ it "does not copy the string if it was already frozen" do
159
+ string = "bob"
160
+ re = RE2::Regexp.new('(\D+)').match(string)
161
+
162
+ expect(re.string).to equal(string)
163
+ end
164
+ end
165
+
166
+ describe "#size" do
167
+ it "returns the number of capturing groups plus the matching string" do
168
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
169
+
170
+ expect(md.size).to eq(3)
171
+ end
172
+ end
173
+
174
+ describe "#length" do
175
+ it "returns the number of capturing groups plus the matching string" do
176
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
177
+
178
+ expect(md.length).to eq(3)
179
+ end
180
+ end
181
+
182
+ describe "#regexp" do
183
+ it "returns the original RE2::Regexp used" do
184
+ re = RE2::Regexp.new('(\d+)')
185
+ md = re.match("123")
186
+
187
+ expect(md.regexp).to equal(re)
188
+ end
189
+ end
190
+
191
+ describe "#inspect" do
192
+ it "returns a text representation of the object and indices" do
193
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
194
+
195
+ expect(md.inspect).to eq('#<RE2::MatchData "1234 56" 1:"1234" 2:"56">')
196
+ end
197
+
198
+ it "represents missing matches as nil" do
199
+ md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ")
200
+
201
+ expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
202
+ end
203
+
204
+ it "supports matches with null bytes" do
205
+ md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
206
+
207
+ expect(md.inspect).to eq("#<RE2::MatchData \"a\0b c\0d\" 1:\"a\0b\" 2:\"c\0d\">")
208
+ end
209
+ end
210
+
211
+ describe "#to_s" do
212
+ it "returns the matching part of the original string" do
213
+ md = RE2::Regexp.new('(\d{2,5})').match("one two 23456")
214
+
215
+ expect(md.to_s).to eq("23456")
216
+ end
217
+ end
218
+
219
+ describe "#to_ary" do
220
+ it "allows the object to be expanded with an asterisk", :aggregate_failures do
221
+ md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56")
222
+ m1, m2, m3 = *md
223
+
224
+ expect(m1).to eq("1234 56")
225
+ expect(m2).to eq("1234")
226
+ expect(m3).to eq("56")
227
+ end
228
+ end
229
+
230
+ describe "#begin" do
231
+ it "returns the offset of the start of a match by index" do
232
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
233
+
234
+ expect(md.string[md.begin(0)..-1]).to eq('woohoo')
235
+ end
236
+
237
+ it "returns the offset of the start of a match by string name" do
238
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
239
+
240
+ expect(md.string[md.begin('foo')..-1]).to eq('foobar')
241
+ end
242
+
243
+ it "returns the offset of the start of a match by symbol name" do
244
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
245
+
246
+ expect(md.string[md.begin(:foo)..-1]).to eq('foobar')
247
+ end
248
+
249
+ it "returns the offset of the start of a match by something that can be coerced to a String" do
250
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
251
+
252
+ expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar')
253
+ end
254
+
255
+ it "returns the offset despite multibyte characters" do
256
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
257
+
258
+ expect(md.string[md.begin(0)..-1]).to eq('Ruby')
259
+ end
260
+
261
+ it "returns nil for non-existent numerical matches" do
262
+ md = RE2::Regexp.new('(\d)').match('123')
263
+
264
+ expect(md.begin(10)).to be_nil
265
+ end
266
+
267
+ it "returns nil for negative numerical matches" do
268
+ md = RE2::Regexp.new('(\d)').match('123')
269
+
270
+ expect(md.begin(-4)).to be_nil
271
+ end
272
+
273
+ it "returns nil for non-existent named matches" do
274
+ md = RE2::Regexp.new('(\d)').match('123')
275
+
276
+ expect(md.begin('foo')).to be_nil
277
+ end
278
+
279
+ it "returns nil for non-existent symbol named matches" do
280
+ md = RE2::Regexp.new('(\d)').match('123')
281
+
282
+ expect(md.begin(:foo)).to be_nil
283
+ end
284
+
285
+ it "raises a type error if given an invalid name or number" do
286
+ md = RE2::Regexp.new('(\d)').match('123')
287
+
288
+ expect { md.begin(nil) }.to raise_error(TypeError)
289
+ end
290
+ end
291
+
292
+ describe "#end" do
293
+ it "returns the offset of the character following the end of a match" do
294
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
295
+
296
+ expect(md.string[0...md.end(0)]).to eq('a woo')
297
+ end
298
+
299
+ it "returns the offset of a match by string name" do
300
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
301
+
302
+ expect(md.string[0...md.end('foo')]).to eq('a foo')
303
+ end
304
+
305
+ it "returns the offset of a match by symbol name" do
306
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
307
+
308
+ expect(md.string[0...md.end(:foo)]).to eq('a foo')
309
+ end
310
+
311
+ it "returns the offset of a match by something that can be coerced to a String" do
312
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
313
+
314
+ expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo')
315
+ end
316
+
317
+ it "returns the offset despite multibyte characters" do
318
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
319
+
320
+ expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
321
+ end
322
+
323
+ it "returns nil for non-existent numerical matches" do
324
+ md = RE2::Regexp.new('(\d)').match('123')
325
+
326
+ expect(md.end(10)).to be_nil
327
+ end
328
+
329
+ it "returns nil for negative numerical matches" do
330
+ md = RE2::Regexp.new('(\d)').match('123')
331
+
332
+ expect(md.end(-4)).to be_nil
333
+ end
334
+
335
+ it "returns nil for non-existent named matches" do
336
+ md = RE2::Regexp.new('(\d)').match('123')
337
+
338
+ expect(md.end('foo')).to be_nil
339
+ end
340
+
341
+ it "returns nil for non-existent symbol named matches" do
342
+ md = RE2::Regexp.new('(\d)').match('123')
343
+
344
+ expect(md.end(:foo)).to be_nil
345
+ end
346
+
347
+ it "raises a type error if given an invalid name or number" do
348
+ md = RE2::Regexp.new('(\d)').match('123')
349
+
350
+ expect { md.end(nil) }.to raise_error(TypeError)
351
+ end
352
+ end
353
+
354
+ describe "#deconstruct" do
355
+ it "returns all capturing groups" do
356
+ md = RE2::Regexp.new('w(o)(o)').match('woo')
357
+
358
+ expect(md.deconstruct).to eq(['o', 'o'])
359
+ end
360
+
361
+ it "includes optional capturing groups as nil" do
362
+ md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
363
+
364
+ expect(md.deconstruct).to eq(['o', 'o', nil])
365
+ end
366
+ end
367
+
368
+ describe "#deconstruct_keys" do
369
+ it "returns all named captures if given nil" do
370
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
371
+
372
+ expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc')
373
+ end
374
+
375
+ it "returns only named captures if given names" do
376
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
377
+
378
+ expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123')
379
+ end
380
+
381
+ it "returns named captures up until an invalid name is given" do
382
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
383
+
384
+ expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123')
385
+ end
386
+
387
+ it "returns an empty hash if given more capture names than exist" do
388
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
389
+
390
+ expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
391
+ end
392
+
393
+ it "returns an empty hash if there are no named capturing groups" do
394
+ md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
395
+
396
+ expect(md.deconstruct_keys(nil)).to eq({})
397
+ end
398
+
399
+ it "raises an error if given a non-array of keys" do
400
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
401
+
402
+ expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
403
+ end
404
+
405
+ it "raises an error if given keys as non-symbols" do
406
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
407
+
408
+ expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
409
+ end
410
+ end
411
+ end