re2 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
4
- data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
3
+ metadata.gz: 2151621115d04b197403c0f67276347c205928136405c05aac1fc70bf3a00dec
4
+ data.tar.gz: 8ba805a95c535ab7d30296a830448dea51da4f5f699a6aa3f858296b2590d188
5
5
  SHA512:
6
- metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
7
- data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
6
+ metadata.gz: 010e20dae629df302c35c6d5cc412c5a0add1cc3f0b6357114ce64c5d77e6c3bbca7401e57c0b86be5fe87acae17162cd7c860c079dc2c57898761f9fd8d4ce4
7
+ data.tar.gz: 6bfa2db432e91b87ab5d6fa21babd07a675df3b22904e20f1491322b1d34c8fe1c5814783049c33f84ab8f22e0b3c2ef9c2308161f8694dd6bcbd8ec4da278a4
data/README.md CHANGED
@@ -4,8 +4,8 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 1.5.0
8
- **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
7
+ **Current version:** 1.6.0
8
+ **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0, 3.1
9
9
  **Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
10
10
 
11
11
  Installation
@@ -137,7 +137,7 @@ the pattern. After all patterns have been added, the set can be compiled using
137
137
  `RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
138
138
  containing the indices of all the patterns that matched.
139
139
 
140
- ``` ruby
140
+ ```ruby
141
141
  set = RE2::Set.new
142
142
  set.add("abc") #=> 0
143
143
  set.add("def") #=> 1
@@ -147,6 +147,27 @@ set.match("abcdefghi") #=> [0, 1, 2]
147
147
  set.match("ghidefabc") #=> [2, 1, 0]
148
148
  ```
149
149
 
150
+ As of 1.6.0, you can use [Ruby's pattern matching](https://docs.ruby-lang.org/en/3.0/syntax/pattern_matching_rdoc.html) against `RE2::MatchData` with both array patterns and hash patterns:
151
+
152
+ ```ruby
153
+ case RE2('(\w+) (\d+)').match("Alice 42")
154
+ in [name, age]
155
+ puts "My name is #{name} and I am #{age} years old"
156
+ else
157
+ puts "No match!"
158
+ end
159
+ # My name is Alice and I am 42 years old
160
+
161
+
162
+ case RE2('(?P<name>\w+) (?P<age>\d+)').match("Alice 42")
163
+ in {name:, age:}
164
+ puts "My name is #{name} and I am #{age} years old"
165
+ else
166
+ puts "No match!"
167
+ end
168
+ # My name is Alice and I am 42 years old
169
+ ```
170
+
150
171
  Features
151
172
  --------
152
173
 
@@ -185,6 +206,8 @@ Features
185
206
  [`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and
186
207
  `RE2.quote(unquoted)`
187
208
 
209
+ * Pattern matching with `RE2::MatchData`
210
+
188
211
  Contributions
189
212
  -------------
190
213
 
data/ext/re2/re2.cc CHANGED
@@ -687,6 +687,112 @@ static VALUE re2_matchdata_inspect(VALUE self) {
687
687
  return result;
688
688
  }
689
689
 
690
+ /*
691
+ * Returns the array of submatches for pattern matching.
692
+ *
693
+ * @return [Array<String, nil>] the array of submatches
694
+ * @example
695
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
696
+ * m.deconstruct #=> ["123"]
697
+ *
698
+ * @example pattern matching
699
+ * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
700
+ * in x, y
701
+ * puts "Matched #{x} #{y}"
702
+ * else
703
+ * puts "Unrecognised match"
704
+ * end
705
+ */
706
+ static VALUE re2_matchdata_deconstruct(VALUE self) {
707
+ int i;
708
+ re2_matchdata *m;
709
+ re2_pattern *p;
710
+ re2::StringPiece *match;
711
+ VALUE array;
712
+
713
+ Data_Get_Struct(self, re2_matchdata, m);
714
+ Data_Get_Struct(m->regexp, re2_pattern, p);
715
+
716
+ array = rb_ary_new2(m->number_of_matches - 1);
717
+ for (i = 1; i < m->number_of_matches; i++) {
718
+ match = &m->matches[i];
719
+
720
+ if (match->empty()) {
721
+ rb_ary_push(array, Qnil);
722
+ } else {
723
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
724
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
725
+ }
726
+ }
727
+
728
+ return array;
729
+ }
730
+
731
+ /*
732
+ * Returns a hash of capturing group names to submatches for pattern matching.
733
+ *
734
+ * As this is used by Ruby's pattern matching, it will return an empty hash if given
735
+ * more keys than there are capturing groups. Given keys will populate the hash in
736
+ * order but an invalid name will cause the hash to be immediately returned.
737
+ *
738
+ * @return [Hash] a hash of capturing group names to submatches
739
+ * @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
740
+ * @example
741
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
742
+ * m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
743
+ * m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
744
+ * m.deconstruct_keys([:fruit]) #=> {}
745
+ * m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
746
+ *
747
+ * @example pattern matching
748
+ * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
749
+ * in numbers:, letters:
750
+ * puts "Numbers: #{numbers}, letters: #{letters}"
751
+ * else
752
+ * puts "Unrecognised match"
753
+ * end
754
+ */
755
+ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
756
+ int i;
757
+ VALUE capturing_groups, key;
758
+ re2_matchdata *m;
759
+ re2_pattern *p;
760
+ map<string, int> groups;
761
+ map<string, int>::iterator iterator;
762
+
763
+ Data_Get_Struct(self, re2_matchdata, m);
764
+ Data_Get_Struct(m->regexp, re2_pattern, p);
765
+
766
+ groups = p->pattern->NamedCapturingGroups();
767
+ capturing_groups = rb_hash_new();
768
+
769
+ if (NIL_P(keys)) {
770
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
771
+ rb_hash_aset(capturing_groups,
772
+ ID2SYM(rb_intern(iterator->first.data())),
773
+ re2_matchdata_nth_match(iterator->second, self));
774
+ }
775
+ } else {
776
+ Check_Type(keys, T_ARRAY);
777
+
778
+ if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
779
+ for (i = 0; i < RARRAY_LEN(keys); i++) {
780
+ key = rb_ary_entry(keys, i);
781
+ Check_Type(key, T_SYMBOL);
782
+ string name(rb_id2name(SYM2ID(key)));
783
+
784
+ if (groups.count(name) == 0) {
785
+ break;
786
+ }
787
+
788
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
789
+ }
790
+ }
791
+ }
792
+
793
+ return capturing_groups;
794
+ }
795
+
690
796
  /*
691
797
  * Returns a new RE2 object with a compiled version of
692
798
  * +pattern+ stored inside. Equivalent to +RE2.new+.
@@ -1245,7 +1351,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1245
1351
  *
1246
1352
  * @return [Boolean] whether the match was successful
1247
1353
  */
1248
- static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
1354
+ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1249
1355
  VALUE argv[2];
1250
1356
  argv[0] = text;
1251
1357
  argv[1] = INT2FIX(0);
@@ -1514,6 +1620,7 @@ static VALUE re2_set_compile(VALUE self) {
1514
1620
  * @return [Bool] whether the underlying re2 outputs error information from Set matches
1515
1621
  */
1516
1622
  static VALUE re2_set_match_raises_errors_p(VALUE self) {
1623
+ UNUSED(self);
1517
1624
  #ifdef HAVE_ERROR_INFO_ARGUMENT
1518
1625
  return Qtrue;
1519
1626
  #else
@@ -1526,18 +1633,40 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1526
1633
  * integer indices of the matching patterns if matched or an empty array if
1527
1634
  * there are no matches.
1528
1635
  *
1529
- * @param [String] str the text to match against
1530
- * @param [Hash] options the options with which to match
1531
- * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1532
- * @return [Array<Integer>] the indices of matching regexps
1533
- * @raise [MatchError] if an error occurs while matching
1534
- * @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
1535
- * @example
1536
- * set = RE2::Set.new
1537
- * set.add("abc")
1538
- * set.add("def")
1539
- * set.compile
1540
- * set.match("abcdef") # => [0, 1]
1636
+ * @return [Array<Integer>]
1637
+ *
1638
+ * @overload match(str)
1639
+ * Returns an array of integer indices of patterns matching the given string
1640
+ * (if any). Raises exceptions if there are any errors while matching.
1641
+ *
1642
+ * @param [String] str the text to match against
1643
+ * @return [Array<Integer>] the indices of matching regexps
1644
+ * @raise [MatchError] if an error occurs while matching
1645
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1646
+ * @example
1647
+ * set = RE2::Set.new
1648
+ * set.add("abc")
1649
+ * set.add("def")
1650
+ * set.compile
1651
+ * set.match("abcdef") # => [0, 1]
1652
+ *
1653
+ * @overload match(str, options)
1654
+ * Returns an array of integer indices of patterns matching the given string
1655
+ * (if any). Raises exceptions if there are any errors while matching and the
1656
+ * :exception option is set to true.
1657
+ *
1658
+ * @param [String] str the text to match against
1659
+ * @param [Hash] options the options with which to match
1660
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1661
+ * @return [Array<Integer>] the indices of matching regexps
1662
+ * @raise [MatchError] if an error occurs while matching
1663
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1664
+ * @example
1665
+ * set = RE2::Set.new
1666
+ * set.add("abc")
1667
+ * set.add("def")
1668
+ * set.compile
1669
+ * set.match("abcdef", :exception => true) # => [0, 1]
1541
1670
  */
1542
1671
  static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1543
1672
  VALUE str, options, exception_option;
@@ -1644,6 +1773,10 @@ void Init_re2(void) {
1644
1773
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1645
1774
  rb_define_method(re2_cMatchData, "inspect",
1646
1775
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1776
+ rb_define_method(re2_cMatchData, "deconstruct",
1777
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
1778
+ rb_define_method(re2_cMatchData, "deconstruct_keys",
1779
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
1647
1780
 
1648
1781
  rb_define_method(re2_cScanner, "string",
1649
1782
  RUBY_METHOD_FUNC(re2_scanner_string), 0);
@@ -1674,11 +1807,11 @@ void Init_re2(void) {
1674
1807
  rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1675
1808
  -1);
1676
1809
  rb_define_method(re2_cRegexp, "match?",
1677
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1810
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1678
1811
  rb_define_method(re2_cRegexp, "=~",
1679
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1812
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1680
1813
  rb_define_method(re2_cRegexp, "===",
1681
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1814
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1682
1815
  rb_define_method(re2_cRegexp, "scan",
1683
1816
  RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1684
1817
  rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
@@ -241,4 +241,62 @@ RSpec.describe RE2::MatchData do
241
241
  expect(md.end(:foo)).to be_nil
242
242
  end
243
243
  end
244
+
245
+ describe "#deconstruct" do
246
+ it "returns all capturing groups" do
247
+ md = RE2::Regexp.new('w(o)(o)').match('woo')
248
+
249
+ expect(md.deconstruct).to eq(['o', 'o'])
250
+ end
251
+
252
+ it "includes optional capturing groups as nil" do
253
+ md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
254
+
255
+ expect(md.deconstruct).to eq(['o', 'o', nil])
256
+ end
257
+ end
258
+
259
+ describe "#deconstruct_keys" do
260
+ it "returns all named captures if given nil" do
261
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
262
+
263
+ expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
264
+ end
265
+
266
+ it "returns only named captures if given names" do
267
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
268
+
269
+ expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
270
+ end
271
+
272
+ it "returns named captures up until an invalid name is given" do
273
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
274
+
275
+ expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
276
+ end
277
+
278
+ it "returns an empty hash if given more capture names than exist" do
279
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
280
+
281
+ expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
282
+ end
283
+
284
+ it "returns an empty hash if there are no named capturing groups" do
285
+ md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
286
+
287
+ expect(md.deconstruct_keys(nil)).to eq({})
288
+ end
289
+
290
+ it "raises an error if given a non-array of keys" do
291
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
292
+
293
+ expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
294
+ end
295
+
296
+ it "raises an error if given keys as non-symbols" do
297
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
298
+
299
+ expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
300
+ end
301
+ end
244
302
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-16 00:00:00.000000000 Z
11
+ date: 2022-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler