re2 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -3
- data/ext/re2/re2.cc +149 -16
- data/spec/re2/match_data_spec.rb +58 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2151621115d04b197403c0f67276347c205928136405c05aac1fc70bf3a00dec
|
4
|
+
data.tar.gz: 8ba805a95c535ab7d30296a830448dea51da4f5f699a6aa3f858296b2590d188
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 010e20dae629df302c35c6d5cc412c5a0add1cc3f0b6357114ce64c5d77e6c3bbca7401e57c0b86be5fe87acae17162cd7c860c079dc2c57898761f9fd8d4ce4
|
7
|
+
data.tar.gz: 6bfa2db432e91b87ab5d6fa21babd07a675df3b22904e20f1491322b1d34c8fe1c5814783049c33f84ab8f22e0b3c2ef9c2308161f8694dd6bcbd8ec4da278a4
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
|
|
4
4
|
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
5
|
library".
|
6
6
|
|
7
|
-
**Current version:** 1.
|
8
|
-
**Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
|
7
|
+
**Current version:** 1.6.0
|
8
|
+
**Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0, 3.1
|
9
9
|
**Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
|
10
10
|
|
11
11
|
Installation
|
@@ -137,7 +137,7 @@ the pattern. After all patterns have been added, the set can be compiled using
|
|
137
137
|
`RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
|
138
138
|
containing the indices of all the patterns that matched.
|
139
139
|
|
140
|
-
```
|
140
|
+
```ruby
|
141
141
|
set = RE2::Set.new
|
142
142
|
set.add("abc") #=> 0
|
143
143
|
set.add("def") #=> 1
|
@@ -147,6 +147,27 @@ set.match("abcdefghi") #=> [0, 1, 2]
|
|
147
147
|
set.match("ghidefabc") #=> [2, 1, 0]
|
148
148
|
```
|
149
149
|
|
150
|
+
As of 1.6.0, you can use [Ruby's pattern matching](https://docs.ruby-lang.org/en/3.0/syntax/pattern_matching_rdoc.html) against `RE2::MatchData` with both array patterns and hash patterns:
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
case RE2('(\w+) (\d+)').match("Alice 42")
|
154
|
+
in [name, age]
|
155
|
+
puts "My name is #{name} and I am #{age} years old"
|
156
|
+
else
|
157
|
+
puts "No match!"
|
158
|
+
end
|
159
|
+
# My name is Alice and I am 42 years old
|
160
|
+
|
161
|
+
|
162
|
+
case RE2('(?P<name>\w+) (?P<age>\d+)').match("Alice 42")
|
163
|
+
in {name:, age:}
|
164
|
+
puts "My name is #{name} and I am #{age} years old"
|
165
|
+
else
|
166
|
+
puts "No match!"
|
167
|
+
end
|
168
|
+
# My name is Alice and I am 42 years old
|
169
|
+
```
|
170
|
+
|
150
171
|
Features
|
151
172
|
--------
|
152
173
|
|
@@ -185,6 +206,8 @@ Features
|
|
185
206
|
[`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and
|
186
207
|
`RE2.quote(unquoted)`
|
187
208
|
|
209
|
+
* Pattern matching with `RE2::MatchData`
|
210
|
+
|
188
211
|
Contributions
|
189
212
|
-------------
|
190
213
|
|
data/ext/re2/re2.cc
CHANGED
@@ -687,6 +687,112 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
687
687
|
return result;
|
688
688
|
}
|
689
689
|
|
690
|
+
/*
|
691
|
+
* Returns the array of submatches for pattern matching.
|
692
|
+
*
|
693
|
+
* @return [Array<String, nil>] the array of submatches
|
694
|
+
* @example
|
695
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
696
|
+
* m.deconstruct #=> ["123"]
|
697
|
+
*
|
698
|
+
* @example pattern matching
|
699
|
+
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
700
|
+
* in x, y
|
701
|
+
* puts "Matched #{x} #{y}"
|
702
|
+
* else
|
703
|
+
* puts "Unrecognised match"
|
704
|
+
* end
|
705
|
+
*/
|
706
|
+
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
+
int i;
|
708
|
+
re2_matchdata *m;
|
709
|
+
re2_pattern *p;
|
710
|
+
re2::StringPiece *match;
|
711
|
+
VALUE array;
|
712
|
+
|
713
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
714
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
|
+
|
716
|
+
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
+
for (i = 1; i < m->number_of_matches; i++) {
|
718
|
+
match = &m->matches[i];
|
719
|
+
|
720
|
+
if (match->empty()) {
|
721
|
+
rb_ary_push(array, Qnil);
|
722
|
+
} else {
|
723
|
+
rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
|
724
|
+
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
|
725
|
+
}
|
726
|
+
}
|
727
|
+
|
728
|
+
return array;
|
729
|
+
}
|
730
|
+
|
731
|
+
/*
|
732
|
+
* Returns a hash of capturing group names to submatches for pattern matching.
|
733
|
+
*
|
734
|
+
* As this is used by Ruby's pattern matching, it will return an empty hash if given
|
735
|
+
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
|
+
* order but an invalid name will cause the hash to be immediately returned.
|
737
|
+
*
|
738
|
+
* @return [Hash] a hash of capturing group names to submatches
|
739
|
+
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
|
+
* @example
|
741
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
742
|
+
* m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
|
743
|
+
* m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
|
744
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
745
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
|
746
|
+
*
|
747
|
+
* @example pattern matching
|
748
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
749
|
+
* in numbers:, letters:
|
750
|
+
* puts "Numbers: #{numbers}, letters: #{letters}"
|
751
|
+
* else
|
752
|
+
* puts "Unrecognised match"
|
753
|
+
* end
|
754
|
+
*/
|
755
|
+
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
+
int i;
|
757
|
+
VALUE capturing_groups, key;
|
758
|
+
re2_matchdata *m;
|
759
|
+
re2_pattern *p;
|
760
|
+
map<string, int> groups;
|
761
|
+
map<string, int>::iterator iterator;
|
762
|
+
|
763
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
764
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
|
+
|
766
|
+
groups = p->pattern->NamedCapturingGroups();
|
767
|
+
capturing_groups = rb_hash_new();
|
768
|
+
|
769
|
+
if (NIL_P(keys)) {
|
770
|
+
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
|
771
|
+
rb_hash_aset(capturing_groups,
|
772
|
+
ID2SYM(rb_intern(iterator->first.data())),
|
773
|
+
re2_matchdata_nth_match(iterator->second, self));
|
774
|
+
}
|
775
|
+
} else {
|
776
|
+
Check_Type(keys, T_ARRAY);
|
777
|
+
|
778
|
+
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
+
for (i = 0; i < RARRAY_LEN(keys); i++) {
|
780
|
+
key = rb_ary_entry(keys, i);
|
781
|
+
Check_Type(key, T_SYMBOL);
|
782
|
+
string name(rb_id2name(SYM2ID(key)));
|
783
|
+
|
784
|
+
if (groups.count(name) == 0) {
|
785
|
+
break;
|
786
|
+
}
|
787
|
+
|
788
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
|
+
}
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
return capturing_groups;
|
794
|
+
}
|
795
|
+
|
690
796
|
/*
|
691
797
|
* Returns a new RE2 object with a compiled version of
|
692
798
|
* +pattern+ stored inside. Equivalent to +RE2.new+.
|
@@ -1245,7 +1351,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1245
1351
|
*
|
1246
1352
|
* @return [Boolean] whether the match was successful
|
1247
1353
|
*/
|
1248
|
-
static VALUE
|
1354
|
+
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1249
1355
|
VALUE argv[2];
|
1250
1356
|
argv[0] = text;
|
1251
1357
|
argv[1] = INT2FIX(0);
|
@@ -1514,6 +1620,7 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1514
1620
|
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1515
1621
|
*/
|
1516
1622
|
static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
1623
|
+
UNUSED(self);
|
1517
1624
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1518
1625
|
return Qtrue;
|
1519
1626
|
#else
|
@@ -1526,18 +1633,40 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
|
1526
1633
|
* integer indices of the matching patterns if matched or an empty array if
|
1527
1634
|
* there are no matches.
|
1528
1635
|
*
|
1529
|
-
* @
|
1530
|
-
*
|
1531
|
-
* @
|
1532
|
-
*
|
1533
|
-
*
|
1534
|
-
*
|
1535
|
-
*
|
1536
|
-
*
|
1537
|
-
*
|
1538
|
-
*
|
1539
|
-
*
|
1540
|
-
*
|
1636
|
+
* @return [Array<Integer>]
|
1637
|
+
*
|
1638
|
+
* @overload match(str)
|
1639
|
+
* Returns an array of integer indices of patterns matching the given string
|
1640
|
+
* (if any). Raises exceptions if there are any errors while matching.
|
1641
|
+
*
|
1642
|
+
* @param [String] str the text to match against
|
1643
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1644
|
+
* @raise [MatchError] if an error occurs while matching
|
1645
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1646
|
+
* @example
|
1647
|
+
* set = RE2::Set.new
|
1648
|
+
* set.add("abc")
|
1649
|
+
* set.add("def")
|
1650
|
+
* set.compile
|
1651
|
+
* set.match("abcdef") # => [0, 1]
|
1652
|
+
*
|
1653
|
+
* @overload match(str, options)
|
1654
|
+
* Returns an array of integer indices of patterns matching the given string
|
1655
|
+
* (if any). Raises exceptions if there are any errors while matching and the
|
1656
|
+
* :exception option is set to true.
|
1657
|
+
*
|
1658
|
+
* @param [String] str the text to match against
|
1659
|
+
* @param [Hash] options the options with which to match
|
1660
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
|
1661
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1662
|
+
* @raise [MatchError] if an error occurs while matching
|
1663
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1664
|
+
* @example
|
1665
|
+
* set = RE2::Set.new
|
1666
|
+
* set.add("abc")
|
1667
|
+
* set.add("def")
|
1668
|
+
* set.compile
|
1669
|
+
* set.match("abcdef", :exception => true) # => [0, 1]
|
1541
1670
|
*/
|
1542
1671
|
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1543
1672
|
VALUE str, options, exception_option;
|
@@ -1644,6 +1773,10 @@ void Init_re2(void) {
|
|
1644
1773
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1645
1774
|
rb_define_method(re2_cMatchData, "inspect",
|
1646
1775
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
1776
|
+
rb_define_method(re2_cMatchData, "deconstruct",
|
1777
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
1778
|
+
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
1779
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
1647
1780
|
|
1648
1781
|
rb_define_method(re2_cScanner, "string",
|
1649
1782
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
@@ -1674,11 +1807,11 @@ void Init_re2(void) {
|
|
1674
1807
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1675
1808
|
-1);
|
1676
1809
|
rb_define_method(re2_cRegexp, "match?",
|
1677
|
-
RUBY_METHOD_FUNC(
|
1810
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1678
1811
|
rb_define_method(re2_cRegexp, "=~",
|
1679
|
-
RUBY_METHOD_FUNC(
|
1812
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1680
1813
|
rb_define_method(re2_cRegexp, "===",
|
1681
|
-
RUBY_METHOD_FUNC(
|
1814
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1682
1815
|
rb_define_method(re2_cRegexp, "scan",
|
1683
1816
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1684
1817
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -241,4 +241,62 @@ RSpec.describe RE2::MatchData do
|
|
241
241
|
expect(md.end(:foo)).to be_nil
|
242
242
|
end
|
243
243
|
end
|
244
|
+
|
245
|
+
describe "#deconstruct" do
|
246
|
+
it "returns all capturing groups" do
|
247
|
+
md = RE2::Regexp.new('w(o)(o)').match('woo')
|
248
|
+
|
249
|
+
expect(md.deconstruct).to eq(['o', 'o'])
|
250
|
+
end
|
251
|
+
|
252
|
+
it "includes optional capturing groups as nil" do
|
253
|
+
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
|
254
|
+
|
255
|
+
expect(md.deconstruct).to eq(['o', 'o', nil])
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "#deconstruct_keys" do
|
260
|
+
it "returns all named captures if given nil" do
|
261
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
262
|
+
|
263
|
+
expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
|
264
|
+
end
|
265
|
+
|
266
|
+
it "returns only named captures if given names" do
|
267
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
268
|
+
|
269
|
+
expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
|
270
|
+
end
|
271
|
+
|
272
|
+
it "returns named captures up until an invalid name is given" do
|
273
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
274
|
+
|
275
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
|
276
|
+
end
|
277
|
+
|
278
|
+
it "returns an empty hash if given more capture names than exist" do
|
279
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
280
|
+
|
281
|
+
expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
|
282
|
+
end
|
283
|
+
|
284
|
+
it "returns an empty hash if there are no named capturing groups" do
|
285
|
+
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
|
286
|
+
|
287
|
+
expect(md.deconstruct_keys(nil)).to eq({})
|
288
|
+
end
|
289
|
+
|
290
|
+
it "raises an error if given a non-array of keys" do
|
291
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
292
|
+
|
293
|
+
expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
|
294
|
+
end
|
295
|
+
|
296
|
+
it "raises an error if given keys as non-symbols" do
|
297
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
298
|
+
|
299
|
+
expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
|
300
|
+
end
|
301
|
+
end
|
244
302
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-10-
|
11
|
+
date: 2022-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|