re2 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
4
- data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
3
+ metadata.gz: 2151621115d04b197403c0f67276347c205928136405c05aac1fc70bf3a00dec
4
+ data.tar.gz: 8ba805a95c535ab7d30296a830448dea51da4f5f699a6aa3f858296b2590d188
5
5
  SHA512:
6
- metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
7
- data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
6
+ metadata.gz: 010e20dae629df302c35c6d5cc412c5a0add1cc3f0b6357114ce64c5d77e6c3bbca7401e57c0b86be5fe87acae17162cd7c860c079dc2c57898761f9fd8d4ce4
7
+ data.tar.gz: 6bfa2db432e91b87ab5d6fa21babd07a675df3b22904e20f1491322b1d34c8fe1c5814783049c33f84ab8f22e0b3c2ef9c2308161f8694dd6bcbd8ec4da278a4
data/README.md CHANGED
@@ -4,8 +4,8 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 1.5.0
8
- **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
7
+ **Current version:** 1.6.0
8
+ **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0, 3.1
9
9
  **Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
10
10
 
11
11
  Installation
@@ -137,7 +137,7 @@ the pattern. After all patterns have been added, the set can be compiled using
137
137
  `RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
138
138
  containing the indices of all the patterns that matched.
139
139
 
140
- ``` ruby
140
+ ```ruby
141
141
  set = RE2::Set.new
142
142
  set.add("abc") #=> 0
143
143
  set.add("def") #=> 1
@@ -147,6 +147,27 @@ set.match("abcdefghi") #=> [0, 1, 2]
147
147
  set.match("ghidefabc") #=> [2, 1, 0]
148
148
  ```
149
149
 
150
+ As of 1.6.0, you can use [Ruby's pattern matching](https://docs.ruby-lang.org/en/3.0/syntax/pattern_matching_rdoc.html) against `RE2::MatchData` with both array patterns and hash patterns:
151
+
152
+ ```ruby
153
+ case RE2('(\w+) (\d+)').match("Alice 42")
154
+ in [name, age]
155
+ puts "My name is #{name} and I am #{age} years old"
156
+ else
157
+ puts "No match!"
158
+ end
159
+ # My name is Alice and I am 42 years old
160
+
161
+
162
+ case RE2('(?P<name>\w+) (?P<age>\d+)').match("Alice 42")
163
+ in {name:, age:}
164
+ puts "My name is #{name} and I am #{age} years old"
165
+ else
166
+ puts "No match!"
167
+ end
168
+ # My name is Alice and I am 42 years old
169
+ ```
170
+
150
171
  Features
151
172
  --------
152
173
 
@@ -185,6 +206,8 @@ Features
185
206
  [`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and
186
207
  `RE2.quote(unquoted)`
187
208
 
209
+ * Pattern matching with `RE2::MatchData`
210
+
188
211
  Contributions
189
212
  -------------
190
213
 
data/ext/re2/re2.cc CHANGED
@@ -687,6 +687,112 @@ static VALUE re2_matchdata_inspect(VALUE self) {
687
687
  return result;
688
688
  }
689
689
 
690
+ /*
691
+ * Returns the array of submatches for pattern matching.
692
+ *
693
+ * @return [Array<String, nil>] the array of submatches
694
+ * @example
695
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
696
+ * m.deconstruct #=> ["123"]
697
+ *
698
+ * @example pattern matching
699
+ * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
700
+ * in x, y
701
+ * puts "Matched #{x} #{y}"
702
+ * else
703
+ * puts "Unrecognised match"
704
+ * end
705
+ */
706
+ static VALUE re2_matchdata_deconstruct(VALUE self) {
707
+ int i;
708
+ re2_matchdata *m;
709
+ re2_pattern *p;
710
+ re2::StringPiece *match;
711
+ VALUE array;
712
+
713
+ Data_Get_Struct(self, re2_matchdata, m);
714
+ Data_Get_Struct(m->regexp, re2_pattern, p);
715
+
716
+ array = rb_ary_new2(m->number_of_matches - 1);
717
+ for (i = 1; i < m->number_of_matches; i++) {
718
+ match = &m->matches[i];
719
+
720
+ if (match->empty()) {
721
+ rb_ary_push(array, Qnil);
722
+ } else {
723
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
724
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
725
+ }
726
+ }
727
+
728
+ return array;
729
+ }
730
+
731
+ /*
732
+ * Returns a hash of capturing group names to submatches for pattern matching.
733
+ *
734
+ * As this is used by Ruby's pattern matching, it will return an empty hash if given
735
+ * more keys than there are capturing groups. Given keys will populate the hash in
736
+ * order but an invalid name will cause the hash to be immediately returned.
737
+ *
738
+ * @return [Hash] a hash of capturing group names to submatches
739
+ * @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
740
+ * @example
741
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
742
+ * m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
743
+ * m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
744
+ * m.deconstruct_keys([:fruit]) #=> {}
745
+ * m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
746
+ *
747
+ * @example pattern matching
748
+ * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
749
+ * in numbers:, letters:
750
+ * puts "Numbers: #{numbers}, letters: #{letters}"
751
+ * else
752
+ * puts "Unrecognised match"
753
+ * end
754
+ */
755
+ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
756
+ int i;
757
+ VALUE capturing_groups, key;
758
+ re2_matchdata *m;
759
+ re2_pattern *p;
760
+ map<string, int> groups;
761
+ map<string, int>::iterator iterator;
762
+
763
+ Data_Get_Struct(self, re2_matchdata, m);
764
+ Data_Get_Struct(m->regexp, re2_pattern, p);
765
+
766
+ groups = p->pattern->NamedCapturingGroups();
767
+ capturing_groups = rb_hash_new();
768
+
769
+ if (NIL_P(keys)) {
770
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
771
+ rb_hash_aset(capturing_groups,
772
+ ID2SYM(rb_intern(iterator->first.data())),
773
+ re2_matchdata_nth_match(iterator->second, self));
774
+ }
775
+ } else {
776
+ Check_Type(keys, T_ARRAY);
777
+
778
+ if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
779
+ for (i = 0; i < RARRAY_LEN(keys); i++) {
780
+ key = rb_ary_entry(keys, i);
781
+ Check_Type(key, T_SYMBOL);
782
+ string name(rb_id2name(SYM2ID(key)));
783
+
784
+ if (groups.count(name) == 0) {
785
+ break;
786
+ }
787
+
788
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
789
+ }
790
+ }
791
+ }
792
+
793
+ return capturing_groups;
794
+ }
795
+
690
796
  /*
691
797
  * Returns a new RE2 object with a compiled version of
692
798
  * +pattern+ stored inside. Equivalent to +RE2.new+.
@@ -1245,7 +1351,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1245
1351
  *
1246
1352
  * @return [Boolean] whether the match was successful
1247
1353
  */
1248
- static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
1354
+ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1249
1355
  VALUE argv[2];
1250
1356
  argv[0] = text;
1251
1357
  argv[1] = INT2FIX(0);
@@ -1514,6 +1620,7 @@ static VALUE re2_set_compile(VALUE self) {
1514
1620
  * @return [Bool] whether the underlying re2 outputs error information from Set matches
1515
1621
  */
1516
1622
  static VALUE re2_set_match_raises_errors_p(VALUE self) {
1623
+ UNUSED(self);
1517
1624
  #ifdef HAVE_ERROR_INFO_ARGUMENT
1518
1625
  return Qtrue;
1519
1626
  #else
@@ -1526,18 +1633,40 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1526
1633
  * integer indices of the matching patterns if matched or an empty array if
1527
1634
  * there are no matches.
1528
1635
  *
1529
- * @param [String] str the text to match against
1530
- * @param [Hash] options the options with which to match
1531
- * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1532
- * @return [Array<Integer>] the indices of matching regexps
1533
- * @raise [MatchError] if an error occurs while matching
1534
- * @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
1535
- * @example
1536
- * set = RE2::Set.new
1537
- * set.add("abc")
1538
- * set.add("def")
1539
- * set.compile
1540
- * set.match("abcdef") # => [0, 1]
1636
+ * @return [Array<Integer>]
1637
+ *
1638
+ * @overload match(str)
1639
+ * Returns an array of integer indices of patterns matching the given string
1640
+ * (if any). Raises exceptions if there are any errors while matching.
1641
+ *
1642
+ * @param [String] str the text to match against
1643
+ * @return [Array<Integer>] the indices of matching regexps
1644
+ * @raise [MatchError] if an error occurs while matching
1645
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1646
+ * @example
1647
+ * set = RE2::Set.new
1648
+ * set.add("abc")
1649
+ * set.add("def")
1650
+ * set.compile
1651
+ * set.match("abcdef") # => [0, 1]
1652
+ *
1653
+ * @overload match(str, options)
1654
+ * Returns an array of integer indices of patterns matching the given string
1655
+ * (if any). Raises exceptions if there are any errors while matching and the
1656
+ * :exception option is set to true.
1657
+ *
1658
+ * @param [String] str the text to match against
1659
+ * @param [Hash] options the options with which to match
1660
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1661
+ * @return [Array<Integer>] the indices of matching regexps
1662
+ * @raise [MatchError] if an error occurs while matching
1663
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1664
+ * @example
1665
+ * set = RE2::Set.new
1666
+ * set.add("abc")
1667
+ * set.add("def")
1668
+ * set.compile
1669
+ * set.match("abcdef", :exception => true) # => [0, 1]
1541
1670
  */
1542
1671
  static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1543
1672
  VALUE str, options, exception_option;
@@ -1644,6 +1773,10 @@ void Init_re2(void) {
1644
1773
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1645
1774
  rb_define_method(re2_cMatchData, "inspect",
1646
1775
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1776
+ rb_define_method(re2_cMatchData, "deconstruct",
1777
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
1778
+ rb_define_method(re2_cMatchData, "deconstruct_keys",
1779
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
1647
1780
 
1648
1781
  rb_define_method(re2_cScanner, "string",
1649
1782
  RUBY_METHOD_FUNC(re2_scanner_string), 0);
@@ -1674,11 +1807,11 @@ void Init_re2(void) {
1674
1807
  rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1675
1808
  -1);
1676
1809
  rb_define_method(re2_cRegexp, "match?",
1677
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1810
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1678
1811
  rb_define_method(re2_cRegexp, "=~",
1679
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1812
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1680
1813
  rb_define_method(re2_cRegexp, "===",
1681
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1814
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1682
1815
  rb_define_method(re2_cRegexp, "scan",
1683
1816
  RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1684
1817
  rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
@@ -241,4 +241,62 @@ RSpec.describe RE2::MatchData do
241
241
  expect(md.end(:foo)).to be_nil
242
242
  end
243
243
  end
244
+
245
+ describe "#deconstruct" do
246
+ it "returns all capturing groups" do
247
+ md = RE2::Regexp.new('w(o)(o)').match('woo')
248
+
249
+ expect(md.deconstruct).to eq(['o', 'o'])
250
+ end
251
+
252
+ it "includes optional capturing groups as nil" do
253
+ md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
254
+
255
+ expect(md.deconstruct).to eq(['o', 'o', nil])
256
+ end
257
+ end
258
+
259
+ describe "#deconstruct_keys" do
260
+ it "returns all named captures if given nil" do
261
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
262
+
263
+ expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
264
+ end
265
+
266
+ it "returns only named captures if given names" do
267
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
268
+
269
+ expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
270
+ end
271
+
272
+ it "returns named captures up until an invalid name is given" do
273
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
274
+
275
+ expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
276
+ end
277
+
278
+ it "returns an empty hash if given more capture names than exist" do
279
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
280
+
281
+ expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
282
+ end
283
+
284
+ it "returns an empty hash if there are no named capturing groups" do
285
+ md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
286
+
287
+ expect(md.deconstruct_keys(nil)).to eq({})
288
+ end
289
+
290
+ it "raises an error if given a non-array of keys" do
291
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
292
+
293
+ expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
294
+ end
295
+
296
+ it "raises an error if given keys as non-symbols" do
297
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
298
+
299
+ expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
300
+ end
301
+ end
244
302
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-16 00:00:00.000000000 Z
11
+ date: 2022-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler