re2 2.24.0-aarch64-linux-gnu → 2.25.0-aarch64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/re2/re2.cc CHANGED
@@ -51,7 +51,7 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
51
51
  id_max_mem, id_literal, id_never_nl, id_case_sensitive,
52
52
  id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
53
53
  id_anchor, id_anchor_start, id_anchor_both, id_exception,
54
- id_submatches, id_startpos, id_endpos;
54
+ id_submatches, id_startpos, id_endpos, id_symbolize_names;
55
55
 
56
56
  inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
57
57
  if (encoding == RE2::Options::EncodingUTF8) {
@@ -128,6 +128,10 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
128
128
  static void re2_matchdata_mark(void *ptr) {
129
129
  re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
130
130
  rb_gc_mark_movable(m->regexp);
131
+
132
+ /* Text must not be movable because StringPiece matches hold pointers into
133
+ * its underlying buffer; moving the string would invalidate them.
134
+ */
131
135
  rb_gc_mark(m->text);
132
136
  }
133
137
 
@@ -172,6 +176,10 @@ static const rb_data_type_t re2_matchdata_data_type = {
172
176
  static void re2_scanner_mark(void *ptr) {
173
177
  re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
174
178
  rb_gc_mark_movable(s->regexp);
179
+
180
+ /* Text must not be movable because the StringPiece input holds a pointer
181
+ * into its underlying buffer; moving the string would invalidate it.
182
+ */
175
183
  rb_gc_mark(s->text);
176
184
  }
177
185
 
@@ -272,6 +280,34 @@ static re2_scanner *unwrap_re2_scanner(VALUE self) {
272
280
  return c;
273
281
  }
274
282
 
283
+ /*
284
+ * Returns an array of names of all named capturing groups. Names are returned
285
+ * in alphabetical order rather than definition order, as RE2 stores named
286
+ * groups internally in a sorted map.
287
+ *
288
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
289
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
290
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
291
+ *
292
+ * @return [Array<String>] an array of names of named capturing groups
293
+ * @example
294
+ * RE2::Regexp.new('(?P<a>\d+) (?P<b>\w+)').names #=> ["a", "b"]
295
+ */
296
+ static VALUE re2_regexp_names(const VALUE self) {
297
+ re2_pattern *p = unwrap_re2_regexp(self);
298
+
299
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
300
+ VALUE names = rb_ary_new2(groups.size());
301
+
302
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
303
+ rb_ary_push(names,
304
+ encoded_str_new(it->first.data(), it->first.size(),
305
+ p->pattern->options().encoding()));
306
+ }
307
+
308
+ return names;
309
+ }
310
+
275
311
  static VALUE re2_matchdata_allocate(VALUE klass) {
276
312
  re2_matchdata *m;
277
313
 
@@ -503,7 +539,7 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
503
539
  *
504
540
  * @return [Integer] the number of elements
505
541
  * @example
506
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
542
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
507
543
  * m.size #=> 2
508
544
  * m.length #=> 2
509
545
  */
@@ -520,7 +556,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
520
556
  * @return [Integer, nil] the offset of the start of the match or `nil` if
521
557
  * there is no such submatch
522
558
  * @example
523
- * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
559
+ * m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
524
560
  * m.begin(0) #=> 1
525
561
  * m.begin(1) #=> 4
526
562
  */
@@ -545,7 +581,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
545
581
  * @return [Integer, nil] the offset of the character following the end of the
546
582
  * match or `nil` if there is no such match
547
583
  * @example
548
- * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
584
+ * m = RE2::Regexp.new('ob (\d+) b').partial_match("bob 123 bob")
549
585
  * m.end(0) #=> 9
550
586
  * m.end(1) #=> 7
551
587
  */
@@ -562,12 +598,125 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
562
598
  }
563
599
  }
564
600
 
601
+ /*
602
+ * Returns the portion of the original string before the match.
603
+ *
604
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
605
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
606
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
607
+ *
608
+ * @return [String] the portion of the original string before the match
609
+ * @example
610
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
611
+ * m.pre_match #=> "bob "
612
+ */
613
+ static VALUE re2_matchdata_pre_match(const VALUE self) {
614
+ re2_matchdata *m = unwrap_re2_matchdata(self);
615
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
616
+
617
+ re2::StringPiece *match = &m->matches[0];
618
+ if (match->empty()) {
619
+ return Qnil;
620
+ }
621
+
622
+ long offset = match->data() - RSTRING_PTR(m->text);
623
+
624
+ return encoded_str_new(RSTRING_PTR(m->text), offset,
625
+ p->pattern->options().encoding());
626
+ }
627
+
628
+ /*
629
+ * Returns the portion of the original string after the match.
630
+ *
631
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
632
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
633
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
634
+ *
635
+ * @return [String] the portion of the original string after the match
636
+ * @example
637
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
638
+ * m.post_match #=> " 456"
639
+ */
640
+ static VALUE re2_matchdata_post_match(const VALUE self) {
641
+ re2_matchdata *m = unwrap_re2_matchdata(self);
642
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
643
+
644
+ re2::StringPiece *match = &m->matches[0];
645
+ if (match->empty()) {
646
+ return Qnil;
647
+ }
648
+
649
+ long start = (match->data() - RSTRING_PTR(m->text)) + match->size();
650
+ long remaining = RSTRING_LEN(m->text) - start;
651
+
652
+ return encoded_str_new(RSTRING_PTR(m->text) + start, remaining,
653
+ p->pattern->options().encoding());
654
+ }
655
+
656
+ /*
657
+ * Returns a two-element array containing the beginning and ending offsets of
658
+ * the nth match.
659
+ *
660
+ * @param [Integer, String, Symbol] n the name or number of the match
661
+ * @return [Array<Integer>, nil] a two-element array with the beginning and
662
+ * ending offsets of the match or `nil` if there is no such match
663
+ * @example
664
+ * m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
665
+ * m.offset(0) #=> [1, 7]
666
+ * m.offset(1) #=> [4, 7]
667
+ */
668
+ static VALUE re2_matchdata_offset(const VALUE self, VALUE n) {
669
+ re2_matchdata *m = unwrap_re2_matchdata(self);
670
+
671
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
672
+ if (match == NULL) {
673
+ return Qnil;
674
+ }
675
+
676
+ long start = match->data() - RSTRING_PTR(m->text);
677
+ long end_pos = start + match->size();
678
+
679
+ VALUE array = rb_ary_new2(2);
680
+ rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, start)));
681
+ rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, end_pos)));
682
+
683
+ return array;
684
+ }
685
+
686
+ /*
687
+ * Returns the length of the nth match in characters. This is equivalent to
688
+ * `m[n].length` but without allocating a new string.
689
+ *
690
+ * @param [Integer, String, Symbol] n the name or number of the match
691
+ * @return [Integer, nil] the length of the match or `nil` if there is no such
692
+ * match
693
+ * @example
694
+ * m = RE2::Regexp.new('(?P<word>\w+) (?P<number>\d+)').partial_match("alice 123")
695
+ * m.match_length(0) #=> 9
696
+ * m.match_length(1) #=> 5
697
+ * m.match_length(:number) #=> 3
698
+ */
699
+ static VALUE re2_matchdata_match_length(const VALUE self, VALUE n) {
700
+ re2_matchdata *m = unwrap_re2_matchdata(self);
701
+
702
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
703
+ if (match == NULL) {
704
+ return Qnil;
705
+ }
706
+
707
+ long start = match->data() - RSTRING_PTR(m->text);
708
+ long end_pos = start + match->size();
709
+ long char_len = rb_str_sublen(m->text, end_pos) - rb_str_sublen(m->text, start);
710
+
711
+ return LONG2NUM(char_len);
712
+ }
713
+
565
714
  /*
566
715
  * Returns the {RE2::Regexp} used in the match.
567
716
  *
568
717
  * @return [RE2::Regexp] the regular expression used in the match
569
718
  * @example
570
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
719
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
571
720
  * m.regexp #=> #<RE2::Regexp /(\d+)/>
572
721
  */
573
722
  static VALUE re2_matchdata_regexp(const VALUE self) {
@@ -606,7 +755,7 @@ static VALUE re2_regexp_allocate(VALUE klass) {
606
755
  *
607
756
  * @return [Array<String, nil>] the array of matches
608
757
  * @example
609
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
758
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
610
759
  * m.to_a #=> ["123", "123"]
611
760
  */
612
761
  static VALUE re2_matchdata_to_a(const VALUE self) {
@@ -673,7 +822,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
673
822
  * @param [Integer] index the index of the match to fetch
674
823
  * @return [String, nil] the specified match or `nil` if it isn't present
675
824
  * @example
676
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
825
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
677
826
  * m[0] #=> "123"
678
827
  *
679
828
  * @overload [](start, length)
@@ -683,7 +832,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
683
832
  * @param [Integer] length the number of elements to fetch
684
833
  * @return [Array<String, nil>] the specified matches
685
834
  * @example
686
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
835
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
687
836
  * m[0, 1] #=> ["123"]
688
837
  *
689
838
  * @overload [](range)
@@ -692,8 +841,8 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
692
841
  * @param [Range] range the range of match indexes to fetch
693
842
  * @return [Array<String, nil>] the specified matches
694
843
  * @example
695
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
696
- * m[0..1] #=> "[123", "123"]
844
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
845
+ * m[0..1] #=> ["123", "123"]
697
846
  *
698
847
  * @overload [](name)
699
848
  * Access a particular match by name.
@@ -701,7 +850,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
701
850
  * @param [String, Symbol] name the name of the match to fetch
702
851
  * @return [String, nil] the specific match or `nil` if it isn't present
703
852
  * @example
704
- * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
853
+ * m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
705
854
  * m["number"] #=> "123"
706
855
  * m[:number] #=> "123"
707
856
  */
@@ -725,6 +874,9 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
725
874
  * Returns the entire matched string.
726
875
  *
727
876
  * @return [String] the entire matched string
877
+ * @example
878
+ * m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
879
+ * m.to_s #=> "123"
728
880
  */
729
881
  static VALUE re2_matchdata_to_s(const VALUE self) {
730
882
  return re2_matchdata_nth_match(0, self);
@@ -739,7 +891,7 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
739
891
  *
740
892
  * @return [String] a printable version of the match
741
893
  * @example
742
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
894
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
743
895
  * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
744
896
  */
745
897
  static VALUE re2_matchdata_inspect(const VALUE self) {
@@ -774,7 +926,7 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
774
926
  }
775
927
 
776
928
  /*
777
- * Returns the array of submatches for pattern matching.
929
+ * Returns the array of submatches.
778
930
  *
779
931
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
780
932
  * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
@@ -783,11 +935,12 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
783
935
  *
784
936
  * @return [Array<String, nil>] the array of submatches
785
937
  * @example
786
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
938
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
939
+ * m.captures #=> ["123"]
787
940
  * m.deconstruct #=> ["123"]
788
941
  *
789
942
  * @example pattern matching
790
- * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
943
+ * case RE2::Regexp.new('(\d+) (\d+)').partial_match("bob 123 456")
791
944
  * in x, y
792
945
  * puts "Matched #{x} #{y}"
793
946
  * else
@@ -828,14 +981,14 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
828
981
  * @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
829
982
  * or `nil` to return all names
830
983
  * @example
831
- * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
984
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
832
985
  * m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
833
986
  * m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
834
987
  * m.deconstruct_keys([:fruit]) #=> {}
835
988
  * m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
836
989
  *
837
990
  * @example pattern matching
838
- * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
991
+ * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
839
992
  * in numbers:, letters:
840
993
  * puts "Numbers: #{numbers}, letters: #{letters}"
841
994
  * else
@@ -852,7 +1005,7 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
852
1005
  if (NIL_P(keys)) {
853
1006
  for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
854
1007
  rb_hash_aset(capturing_groups,
855
- ID2SYM(rb_intern(it->first.data())),
1008
+ ID2SYM(rb_intern2(it->first.data(), it->first.size())),
856
1009
  re2_matchdata_nth_match(it->second, self));
857
1010
  }
858
1011
  } else {
@@ -877,6 +1030,120 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
877
1030
  return capturing_groups;
878
1031
  }
879
1032
 
1033
+ /*
1034
+ * Returns a hash of capturing group names to matched strings.
1035
+ *
1036
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1037
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1038
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1039
+ *
1040
+ * @overload named_captures
1041
+ * Returns a hash with string keys.
1042
+ *
1043
+ * @return [Hash] a hash of capturing group names to matching strings
1044
+ * @example
1045
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
1046
+ * m.named_captures #=> {"numbers" => "123", "letters" => "abc"}
1047
+ *
1048
+ * @overload named_captures(symbolize_names:)
1049
+ * Returns a hash with string or symbol keys.
1050
+ *
1051
+ * @param [Boolean] symbolize_names whether to return group names as symbols
1052
+ * @return [Hash] a hash of capturing group names to matching strings
1053
+ * @example
1054
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
1055
+ * m.named_captures
1056
+ * #=> {"numbers" => "123", "letters" => "abc"}
1057
+ * m.named_captures(symbolize_names: true) #=> {numbers: "123", letters: "abc"}
1058
+ */
1059
+ static VALUE re2_matchdata_named_captures(int argc, VALUE *argv, const VALUE self) {
1060
+ VALUE opts;
1061
+ rb_scan_args(argc, argv, "0:", &opts);
1062
+
1063
+ bool symbolize = false;
1064
+ if (!NIL_P(opts)) {
1065
+ VALUE sym = rb_hash_aref(opts, ID2SYM(id_symbolize_names));
1066
+ symbolize = RTEST(sym);
1067
+ }
1068
+
1069
+ re2_matchdata *m = unwrap_re2_matchdata(self);
1070
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
1071
+
1072
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1073
+ VALUE result = rb_hash_new();
1074
+
1075
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1076
+ VALUE key;
1077
+ if (symbolize) {
1078
+ key = ID2SYM(rb_intern2(it->first.data(), it->first.size()));
1079
+ } else {
1080
+ key = encoded_str_new(it->first.data(), it->first.size(),
1081
+ p->pattern->options().encoding());
1082
+ }
1083
+ rb_hash_aset(result, key, re2_matchdata_nth_match(it->second, self));
1084
+ }
1085
+
1086
+ return result;
1087
+ }
1088
+
1089
+ /*
1090
+ * Returns an array of names of named capturing groups. Names are returned in
1091
+ * alphabetical order rather than definition order, as RE2 stores named groups
1092
+ * internally in a sorted map.
1093
+ *
1094
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1095
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1096
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1097
+ *
1098
+ * @return [Array<String>] an array of names of named capturing groups
1099
+ * @example
1100
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
1101
+ * m.names #=> ["letters", "numbers"]
1102
+ */
1103
+ static VALUE re2_matchdata_names(const VALUE self) {
1104
+ re2_matchdata *m = unwrap_re2_matchdata(self);
1105
+
1106
+ return re2_regexp_names(m->regexp);
1107
+ }
1108
+
1109
+ /*
1110
+ * Returns an array of match values at the given indices or names.
1111
+ *
1112
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1113
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1114
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1115
+ *
1116
+ * @param [Integer, String, Symbol] indexes the indices or names of
1117
+ * the matches to fetch
1118
+ * @return [Array<String, nil>] the values at the given indices or names
1119
+ * @example
1120
+ * m = RE2::Regexp.new('(?P<a>\d+) (?P<b>\d+)').partial_match("123 456")
1121
+ * m.values_at(1, 2) #=> ["123", "456"]
1122
+ * m.values_at(:a, :b) #=> ["123", "456"]
1123
+ * m.values_at(1, :b) #=> ["123", "456"]
1124
+ */
1125
+ static VALUE re2_matchdata_values_at(int argc, VALUE *argv, const VALUE self) {
1126
+ unwrap_re2_matchdata(self);
1127
+
1128
+ VALUE result = rb_ary_new2(argc);
1129
+
1130
+ for (int i = 0; i < argc; ++i) {
1131
+ VALUE idx = argv[i];
1132
+
1133
+ if (TYPE(idx) == T_STRING) {
1134
+ rb_ary_push(result, re2_matchdata_named_match(
1135
+ std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self));
1136
+ } else if (SYMBOL_P(idx)) {
1137
+ rb_ary_push(result, re2_matchdata_named_match(
1138
+ rb_id2name(SYM2ID(idx)), self));
1139
+ } else {
1140
+ rb_ary_push(result, re2_matchdata_nth_match(NUM2INT(idx), self));
1141
+ }
1142
+ }
1143
+
1144
+ return result;
1145
+ }
1146
+
880
1147
  static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
881
1148
  re2_matchdata *self_m;
882
1149
  re2_matchdata *other_m = unwrap_re2_matchdata(other);
@@ -1610,6 +1877,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1610
1877
  * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
1611
1878
  * `PartialMatch`}.
1612
1879
  *
1880
+ * @param [String] text the text to search
1613
1881
  * @return [Boolean] whether the match was successful
1614
1882
  * @raise [TypeError] if text cannot be coerced to a `String`
1615
1883
  */
@@ -1628,6 +1896,7 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1628
1896
  * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
1629
1897
  * `FullMatch`}.
1630
1898
  *
1899
+ * @param [String] text the text to search
1631
1900
  * @return [Boolean] whether the match was successful
1632
1901
  * @raise [TypeError] if text cannot be coerced to a `String`
1633
1902
  */
@@ -1718,11 +1987,11 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
1718
1987
  * @raise [TypeError] if the given rewrite or pattern (if not provided as a
1719
1988
  * {RE2::Regexp}) cannot be coerced to `String`s
1720
1989
  * @example
1721
- * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1990
+ * RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
1722
1991
  * re2 = RE2::Regexp.new("hel+o")
1723
- * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1992
+ * RE2.replace("hello there", re2, "yo") #=> "yo there"
1724
1993
  */
1725
- static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1994
+ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
1726
1995
  VALUE rewrite) {
1727
1996
  /* Ensure rewrite is a string. */
1728
1997
  StringValue(rewrite);
@@ -1772,10 +2041,10 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1772
2041
  * @return [String] the resulting string
1773
2042
  * @example
1774
2043
  * re2 = RE2::Regexp.new("oo?")
1775
- * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1776
- * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
2044
+ * RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
2045
+ * RE2.global_replace("hello there", "e", "i") #=> "hillo thiri"
1777
2046
  */
1778
- static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
2047
+ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
1779
2048
  VALUE rewrite) {
1780
2049
  /* Ensure rewrite is a string. */
1781
2050
  StringValue(rewrite);
@@ -1807,6 +2076,71 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1807
2076
  }
1808
2077
  }
1809
2078
 
2079
+ /*
2080
+ * If `pattern` matches `text`, returns a copy of `rewrite` with substitutions
2081
+ * using
2082
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L499-L510
2083
+ * `Extract`}. Non-matching portions of `text` are ignored.
2084
+ *
2085
+ * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
2086
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
2087
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
2088
+ *
2089
+ * @param [String] text the string from which to extract
2090
+ * @param [String, RE2::Regexp] pattern a regexp matching the text
2091
+ * @param [String] rewrite the rewrite string with `\1`-style substitutions
2092
+ * @return [String, nil] the extracted string on a successful match or nil if
2093
+ * there is no match
2094
+ * @raise [TypeError] if the given rewrite or pattern (if not provided as a
2095
+ * {RE2::Regexp}) cannot be coerced to `String`s
2096
+ * @example
2097
+ * RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
2098
+ * #=> "example-alice"
2099
+ * RE2.extract("no match", '(\d+)', '\1') #=> nil
2100
+ */
2101
+ static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
2102
+ VALUE rewrite) {
2103
+ /* Ensure rewrite and text are strings. */
2104
+ StringValue(rewrite);
2105
+ StringValue(text);
2106
+
2107
+ re2_pattern *p;
2108
+ std::string out;
2109
+ bool extracted;
2110
+
2111
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2112
+ p = unwrap_re2_regexp(pattern);
2113
+ extracted = RE2::Extract(
2114
+ re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2115
+ *p->pattern,
2116
+ re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
2117
+ &out);
2118
+
2119
+ if (extracted) {
2120
+ return encoded_str_new(out.data(), out.size(),
2121
+ p->pattern->options().encoding());
2122
+ } else {
2123
+ return Qnil;
2124
+ }
2125
+ } else {
2126
+ /* Ensure pattern is a string. */
2127
+ StringValue(pattern);
2128
+
2129
+ extracted = RE2::Extract(
2130
+ re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2131
+ RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
2132
+ re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
2133
+ &out);
2134
+
2135
+ if (extracted) {
2136
+ return encoded_str_new(out.data(), out.size(),
2137
+ RE2::Options::EncodingUTF8);
2138
+ } else {
2139
+ return Qnil;
2140
+ }
2141
+ }
2142
+ }
2143
+
1810
2144
  /*
1811
2145
  * Returns a version of `str` with all potentially meaningful regexp characters
1812
2146
  * escaped using
@@ -1818,9 +2152,12 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1818
2152
  * @raise [TypeError] if the given unquoted string cannot be coerced to a `String`
1819
2153
  * @return [String] the escaped string
1820
2154
  * @example
1821
- * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
2155
+ * RE2.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
2156
+ * RE2.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
2157
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
2158
+ * RE2::Regexp.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
1822
2159
  */
1823
- static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
2160
+ static VALUE re2_escape(VALUE, VALUE unquoted) {
1824
2161
  StringValue(unquoted);
1825
2162
 
1826
2163
  std::string quoted_string = RE2::QuoteMeta(
@@ -2203,6 +2540,14 @@ extern "C" void Init_re2(void) {
2203
2540
  RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
2204
2541
  rb_define_method(re2_cMatchData, "end",
2205
2542
  RUBY_METHOD_FUNC(re2_matchdata_end), 1);
2543
+ rb_define_method(re2_cMatchData, "pre_match",
2544
+ RUBY_METHOD_FUNC(re2_matchdata_pre_match), 0);
2545
+ rb_define_method(re2_cMatchData, "post_match",
2546
+ RUBY_METHOD_FUNC(re2_matchdata_post_match), 0);
2547
+ rb_define_method(re2_cMatchData, "offset",
2548
+ RUBY_METHOD_FUNC(re2_matchdata_offset), 1);
2549
+ rb_define_method(re2_cMatchData, "match_length",
2550
+ RUBY_METHOD_FUNC(re2_matchdata_match_length), 1);
2206
2551
  rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
2207
2552
  -1);
2208
2553
  rb_define_method(re2_cMatchData, "to_s",
@@ -2211,6 +2556,14 @@ extern "C" void Init_re2(void) {
2211
2556
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
2212
2557
  rb_define_method(re2_cMatchData, "deconstruct",
2213
2558
  RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
2559
+ rb_define_method(re2_cMatchData, "captures",
2560
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
2561
+ rb_define_method(re2_cMatchData, "named_captures",
2562
+ RUBY_METHOD_FUNC(re2_matchdata_named_captures), -1);
2563
+ rb_define_method(re2_cMatchData, "names",
2564
+ RUBY_METHOD_FUNC(re2_matchdata_names), 0);
2565
+ rb_define_method(re2_cMatchData, "values_at",
2566
+ RUBY_METHOD_FUNC(re2_matchdata_values_at), -1);
2214
2567
  rb_define_method(re2_cMatchData, "deconstruct_keys",
2215
2568
  RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
2216
2569
  rb_define_method(re2_cMatchData, "initialize_copy",
@@ -2248,6 +2601,10 @@ extern "C" void Init_re2(void) {
2248
2601
  RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
2249
2602
  rb_define_method(re2_cRegexp, "named_capturing_groups",
2250
2603
  RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
2604
+ rb_define_method(re2_cRegexp, "named_captures",
2605
+ RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
2606
+ rb_define_method(re2_cRegexp, "names",
2607
+ RUBY_METHOD_FUNC(re2_regexp_names), 0);
2251
2608
  rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
2252
2609
  -1);
2253
2610
  rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
@@ -2310,16 +2667,26 @@ extern "C" void Init_re2(void) {
2310
2667
  rb_define_method(re2_cSet, "size", RUBY_METHOD_FUNC(re2_set_size), 0);
2311
2668
  rb_define_method(re2_cSet, "length", RUBY_METHOD_FUNC(re2_set_size), 0);
2312
2669
 
2670
+ rb_define_module_function(re2_mRE2, "replace",
2671
+ RUBY_METHOD_FUNC(re2_replace), 3);
2313
2672
  rb_define_module_function(re2_mRE2, "Replace",
2314
- RUBY_METHOD_FUNC(re2_Replace), 3);
2673
+ RUBY_METHOD_FUNC(re2_replace), 3);
2674
+ rb_define_module_function(re2_mRE2, "global_replace",
2675
+ RUBY_METHOD_FUNC(re2_global_replace), 3);
2315
2676
  rb_define_module_function(re2_mRE2, "GlobalReplace",
2316
- RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
2677
+ RUBY_METHOD_FUNC(re2_global_replace), 3);
2678
+ rb_define_module_function(re2_mRE2, "extract",
2679
+ RUBY_METHOD_FUNC(re2_extract), 3);
2317
2680
  rb_define_module_function(re2_mRE2, "QuoteMeta",
2318
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2681
+ RUBY_METHOD_FUNC(re2_escape), 1);
2682
+ rb_define_module_function(re2_mRE2, "escape",
2683
+ RUBY_METHOD_FUNC(re2_escape), 1);
2684
+ rb_define_module_function(re2_mRE2, "quote",
2685
+ RUBY_METHOD_FUNC(re2_escape), 1);
2319
2686
  rb_define_singleton_method(re2_cRegexp, "escape",
2320
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2687
+ RUBY_METHOD_FUNC(re2_escape), 1);
2321
2688
  rb_define_singleton_method(re2_cRegexp, "quote",
2322
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2689
+ RUBY_METHOD_FUNC(re2_escape), 1);
2323
2690
 
2324
2691
  // (see RE2::Regexp#initialize)
2325
2692
  rb_define_singleton_method(re2_cRegexp, "compile",
@@ -2347,4 +2714,5 @@ extern "C" void Init_re2(void) {
2347
2714
  id_submatches = rb_intern("submatches");
2348
2715
  id_startpos = rb_intern("startpos");
2349
2716
  id_endpos = rb_intern("endpos");
2717
+ id_symbolize_names = rb_intern("symbolize_names");
2350
2718
  }
data/lib/3.1/re2.so CHANGED
Binary file
data/lib/3.2/re2.so CHANGED
Binary file
data/lib/3.3/re2.so CHANGED
Binary file
data/lib/3.4/re2.so CHANGED
Binary file
data/lib/4.0/re2.so CHANGED
Binary file