re2 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -3
- data/ext/re2/re2.cc +149 -16
- data/spec/re2/match_data_spec.rb +58 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2151621115d04b197403c0f67276347c205928136405c05aac1fc70bf3a00dec
|
4
|
+
data.tar.gz: 8ba805a95c535ab7d30296a830448dea51da4f5f699a6aa3f858296b2590d188
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 010e20dae629df302c35c6d5cc412c5a0add1cc3f0b6357114ce64c5d77e6c3bbca7401e57c0b86be5fe87acae17162cd7c860c079dc2c57898761f9fd8d4ce4
|
7
|
+
data.tar.gz: 6bfa2db432e91b87ab5d6fa21babd07a675df3b22904e20f1491322b1d34c8fe1c5814783049c33f84ab8f22e0b3c2ef9c2308161f8694dd6bcbd8ec4da278a4
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ re2 [, libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
|
10
10
|
|
11
11
|
Installation
|
@@ -137,7 +137,7 @@ the pattern. After all patterns have been added, the set can be compiled using
|
|
137
137
|
`RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
|
138
138
|
containing the indices of all the patterns that matched.
|
139
139
|
|
140
|
-
```
|
140
|
+
```ruby
|
141
141
|
set = RE2::Set.new
|
142
142
|
set.add("abc") #=> 0
|
143
143
|
set.add("def") #=> 1
|
@@ -147,6 +147,27 @@ set.match("abcdefghi") #=> [0, 1, 2]
|
|
147
147
|
set.match("ghidefabc") #=> [2, 1, 0]
|
148
148
|
```
|
149
149
|
|
150
|
+
As of 1.6.0, you can use [Ruby's pattern matching](https://docs.ruby-lang.org/en/3.0/syntax/pattern_matching_rdoc.html) against `RE2::MatchData` with both array patterns and hash patterns:
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
case RE2('(\w+) (\d+)').match("Alice 42")
|
154
|
+
in [name, age]
|
155
|
+
puts "My name is #{name} and I am #{age} years old"
|
156
|
+
else
|
157
|
+
puts "No match!"
|
158
|
+
end
|
159
|
+
# My name is Alice and I am 42 years old
|
160
|
+
|
161
|
+
|
162
|
+
case RE2('(?P<name>\w+) (?P<age>\d+)').match("Alice 42")
|
163
|
+
in {name:, age:}
|
164
|
+
puts "My name is #{name} and I am #{age} years old"
|
165
|
+
else
|
166
|
+
puts "No match!"
|
167
|
+
end
|
168
|
+
# My name is Alice and I am 42 years old
|
169
|
+
```
|
170
|
+
|
150
171
|
Features
|
151
172
|
--------
|
152
173
|
|
@@ -185,6 +206,8 @@ Features
|
|
185
206
|
[`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and
|
186
207
|
`RE2.quote(unquoted)`
|
187
208
|
|
209
|
+
* Pattern matching with `RE2::MatchData`
|
210
|
+
|
188
211
|
Contributions
|
189
212
|
-------------
|
190
213
|
|
data/ext/re2/re2.cc
CHANGED
@@ -687,6 +687,112 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
687
687
|
return result;
|
688
688
|
}
|
689
689
|
|
690
|
+
/*
|
691
|
+
* Returns the array of submatches for pattern matching.
|
692
|
+
*
|
693
|
+
* @return [Array<String, nil>] the array of submatches
|
694
|
+
* @example
|
695
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
696
|
+
* m.deconstruct #=> ["123"]
|
697
|
+
*
|
698
|
+
* @example pattern matching
|
699
|
+
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
700
|
+
* in x, y
|
701
|
+
* puts "Matched #{x} #{y}"
|
702
|
+
* else
|
703
|
+
* puts "Unrecognised match"
|
704
|
+
* end
|
705
|
+
*/
|
706
|
+
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
+
int i;
|
708
|
+
re2_matchdata *m;
|
709
|
+
re2_pattern *p;
|
710
|
+
re2::StringPiece *match;
|
711
|
+
VALUE array;
|
712
|
+
|
713
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
714
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
|
+
|
716
|
+
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
+
for (i = 1; i < m->number_of_matches; i++) {
|
718
|
+
match = &m->matches[i];
|
719
|
+
|
720
|
+
if (match->empty()) {
|
721
|
+
rb_ary_push(array, Qnil);
|
722
|
+
} else {
|
723
|
+
rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
|
724
|
+
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
|
725
|
+
}
|
726
|
+
}
|
727
|
+
|
728
|
+
return array;
|
729
|
+
}
|
730
|
+
|
731
|
+
/*
|
732
|
+
* Returns a hash of capturing group names to submatches for pattern matching.
|
733
|
+
*
|
734
|
+
* As this is used by Ruby's pattern matching, it will return an empty hash if given
|
735
|
+
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
|
+
* order but an invalid name will cause the hash to be immediately returned.
|
737
|
+
*
|
738
|
+
* @return [Hash] a hash of capturing group names to submatches
|
739
|
+
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
|
+
* @example
|
741
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
742
|
+
* m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
|
743
|
+
* m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
|
744
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
745
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
|
746
|
+
*
|
747
|
+
* @example pattern matching
|
748
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
749
|
+
* in numbers:, letters:
|
750
|
+
* puts "Numbers: #{numbers}, letters: #{letters}"
|
751
|
+
* else
|
752
|
+
* puts "Unrecognised match"
|
753
|
+
* end
|
754
|
+
*/
|
755
|
+
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
+
int i;
|
757
|
+
VALUE capturing_groups, key;
|
758
|
+
re2_matchdata *m;
|
759
|
+
re2_pattern *p;
|
760
|
+
map<string, int> groups;
|
761
|
+
map<string, int>::iterator iterator;
|
762
|
+
|
763
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
764
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
|
+
|
766
|
+
groups = p->pattern->NamedCapturingGroups();
|
767
|
+
capturing_groups = rb_hash_new();
|
768
|
+
|
769
|
+
if (NIL_P(keys)) {
|
770
|
+
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
|
771
|
+
rb_hash_aset(capturing_groups,
|
772
|
+
ID2SYM(rb_intern(iterator->first.data())),
|
773
|
+
re2_matchdata_nth_match(iterator->second, self));
|
774
|
+
}
|
775
|
+
} else {
|
776
|
+
Check_Type(keys, T_ARRAY);
|
777
|
+
|
778
|
+
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
+
for (i = 0; i < RARRAY_LEN(keys); i++) {
|
780
|
+
key = rb_ary_entry(keys, i);
|
781
|
+
Check_Type(key, T_SYMBOL);
|
782
|
+
string name(rb_id2name(SYM2ID(key)));
|
783
|
+
|
784
|
+
if (groups.count(name) == 0) {
|
785
|
+
break;
|
786
|
+
}
|
787
|
+
|
788
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
|
+
}
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
return capturing_groups;
|
794
|
+
}
|
795
|
+
|
690
796
|
/*
|
691
797
|
* Returns a new RE2 object with a compiled version of
|
692
798
|
* +pattern+ stored inside. Equivalent to +RE2.new+.
|
@@ -1245,7 +1351,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1245
1351
|
*
|
1246
1352
|
* @return [Boolean] whether the match was successful
|
1247
1353
|
*/
|
1248
|
-
static VALUE
|
1354
|
+
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1249
1355
|
VALUE argv[2];
|
1250
1356
|
argv[0] = text;
|
1251
1357
|
argv[1] = INT2FIX(0);
|
@@ -1514,6 +1620,7 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1514
1620
|
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1515
1621
|
*/
|
1516
1622
|
static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
1623
|
+
UNUSED(self);
|
1517
1624
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1518
1625
|
return Qtrue;
|
1519
1626
|
#else
|
@@ -1526,18 +1633,40 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
|
1526
1633
|
* integer indices of the matching patterns if matched or an empty array if
|
1527
1634
|
* there are no matches.
|
1528
1635
|
*
|
1529
|
-
* @
|
1530
|
-
*
|
1531
|
-
* @
|
1532
|
-
*
|
1533
|
-
*
|
1534
|
-
*
|
1535
|
-
*
|
1536
|
-
*
|
1537
|
-
*
|
1538
|
-
*
|
1539
|
-
*
|
1540
|
-
*
|
1636
|
+
* @return [Array<Integer>]
|
1637
|
+
*
|
1638
|
+
* @overload match(str)
|
1639
|
+
* Returns an array of integer indices of patterns matching the given string
|
1640
|
+
* (if any). Raises exceptions if there are any errors while matching.
|
1641
|
+
*
|
1642
|
+
* @param [String] str the text to match against
|
1643
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1644
|
+
* @raise [MatchError] if an error occurs while matching
|
1645
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1646
|
+
* @example
|
1647
|
+
* set = RE2::Set.new
|
1648
|
+
* set.add("abc")
|
1649
|
+
* set.add("def")
|
1650
|
+
* set.compile
|
1651
|
+
* set.match("abcdef") # => [0, 1]
|
1652
|
+
*
|
1653
|
+
* @overload match(str, options)
|
1654
|
+
* Returns an array of integer indices of patterns matching the given string
|
1655
|
+
* (if any). Raises exceptions if there are any errors while matching and the
|
1656
|
+
* :exception option is set to true.
|
1657
|
+
*
|
1658
|
+
* @param [String] str the text to match against
|
1659
|
+
* @param [Hash] options the options with which to match
|
1660
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
|
1661
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1662
|
+
* @raise [MatchError] if an error occurs while matching
|
1663
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1664
|
+
* @example
|
1665
|
+
* set = RE2::Set.new
|
1666
|
+
* set.add("abc")
|
1667
|
+
* set.add("def")
|
1668
|
+
* set.compile
|
1669
|
+
* set.match("abcdef", :exception => true) # => [0, 1]
|
1541
1670
|
*/
|
1542
1671
|
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1543
1672
|
VALUE str, options, exception_option;
|
@@ -1644,6 +1773,10 @@ void Init_re2(void) {
|
|
1644
1773
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1645
1774
|
rb_define_method(re2_cMatchData, "inspect",
|
1646
1775
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
1776
|
+
rb_define_method(re2_cMatchData, "deconstruct",
|
1777
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
1778
|
+
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
1779
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
1647
1780
|
|
1648
1781
|
rb_define_method(re2_cScanner, "string",
|
1649
1782
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
@@ -1674,11 +1807,11 @@ void Init_re2(void) {
|
|
1674
1807
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1675
1808
|
-1);
|
1676
1809
|
rb_define_method(re2_cRegexp, "match?",
|
1677
|
-
RUBY_METHOD_FUNC(
|
1810
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1678
1811
|
rb_define_method(re2_cRegexp, "=~",
|
1679
|
-
RUBY_METHOD_FUNC(
|
1812
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1680
1813
|
rb_define_method(re2_cRegexp, "===",
|
1681
|
-
RUBY_METHOD_FUNC(
|
1814
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1682
1815
|
rb_define_method(re2_cRegexp, "scan",
|
1683
1816
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1684
1817
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -241,4 +241,62 @@ RSpec.describe RE2::MatchData do
|
|
241
241
|
expect(md.end(:foo)).to be_nil
|
242
242
|
end
|
243
243
|
end
|
244
|
+
|
245
|
+
describe "#deconstruct" do
|
246
|
+
it "returns all capturing groups" do
|
247
|
+
md = RE2::Regexp.new('w(o)(o)').match('woo')
|
248
|
+
|
249
|
+
expect(md.deconstruct).to eq(['o', 'o'])
|
250
|
+
end
|
251
|
+
|
252
|
+
it "includes optional capturing groups as nil" do
|
253
|
+
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
|
254
|
+
|
255
|
+
expect(md.deconstruct).to eq(['o', 'o', nil])
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "#deconstruct_keys" do
|
260
|
+
it "returns all named captures if given nil" do
|
261
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
262
|
+
|
263
|
+
expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
|
264
|
+
end
|
265
|
+
|
266
|
+
it "returns only named captures if given names" do
|
267
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
268
|
+
|
269
|
+
expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
|
270
|
+
end
|
271
|
+
|
272
|
+
it "returns named captures up until an invalid name is given" do
|
273
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
274
|
+
|
275
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
|
276
|
+
end
|
277
|
+
|
278
|
+
it "returns an empty hash if given more capture names than exist" do
|
279
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
280
|
+
|
281
|
+
expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
|
282
|
+
end
|
283
|
+
|
284
|
+
it "returns an empty hash if there are no named capturing groups" do
|
285
|
+
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
|
286
|
+
|
287
|
+
expect(md.deconstruct_keys(nil)).to eq({})
|
288
|
+
end
|
289
|
+
|
290
|
+
it "raises an error if given a non-array of keys" do
|
291
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
292
|
+
|
293
|
+
expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
|
294
|
+
end
|
295
|
+
|
296
|
+
it "raises an error if given keys as non-symbols" do
|
297
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
298
|
+
|
299
|
+
expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
|
300
|
+
end
|
301
|
+
end
|
244
302
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-10-
|
11
|
+
date: 2022-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|