re2 2.24.0-aarch64-linux-gnu → 2.25.0-aarch64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +107 -4
- data/ext/re2/re2.cc +399 -31
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/4.0/re2.so +0 -0
- data/lib/re2/string.rb +6 -6
- data/lib/re2/version.rb +1 -1
- data/spec/re2/match_data_spec.rb +312 -0
- data/spec/re2/regexp_spec.rb +58 -1
- data/spec/re2_spec.rb +145 -43
- metadata +1 -1
data/ext/re2/re2.cc
CHANGED
|
@@ -51,7 +51,7 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
|
|
51
51
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
|
52
52
|
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
|
53
53
|
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
|
54
|
-
id_submatches, id_startpos, id_endpos;
|
|
54
|
+
id_submatches, id_startpos, id_endpos, id_symbolize_names;
|
|
55
55
|
|
|
56
56
|
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
|
57
57
|
if (encoding == RE2::Options::EncodingUTF8) {
|
|
@@ -128,6 +128,10 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
|
128
128
|
static void re2_matchdata_mark(void *ptr) {
|
|
129
129
|
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
|
130
130
|
rb_gc_mark_movable(m->regexp);
|
|
131
|
+
|
|
132
|
+
/* Text must not be movable because StringPiece matches hold pointers into
|
|
133
|
+
* its underlying buffer; moving the string would invalidate them.
|
|
134
|
+
*/
|
|
131
135
|
rb_gc_mark(m->text);
|
|
132
136
|
}
|
|
133
137
|
|
|
@@ -172,6 +176,10 @@ static const rb_data_type_t re2_matchdata_data_type = {
|
|
|
172
176
|
static void re2_scanner_mark(void *ptr) {
|
|
173
177
|
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
|
174
178
|
rb_gc_mark_movable(s->regexp);
|
|
179
|
+
|
|
180
|
+
/* Text must not be movable because the StringPiece input holds a pointer
|
|
181
|
+
* into its underlying buffer; moving the string would invalidate it.
|
|
182
|
+
*/
|
|
175
183
|
rb_gc_mark(s->text);
|
|
176
184
|
}
|
|
177
185
|
|
|
@@ -272,6 +280,34 @@ static re2_scanner *unwrap_re2_scanner(VALUE self) {
|
|
|
272
280
|
return c;
|
|
273
281
|
}
|
|
274
282
|
|
|
283
|
+
/*
|
|
284
|
+
* Returns an array of names of all named capturing groups. Names are returned
|
|
285
|
+
* in alphabetical order rather than definition order, as RE2 stores named
|
|
286
|
+
* groups internally in a sorted map.
|
|
287
|
+
*
|
|
288
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
289
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
290
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
291
|
+
*
|
|
292
|
+
* @return [Array<String>] an array of names of named capturing groups
|
|
293
|
+
* @example
|
|
294
|
+
* RE2::Regexp.new('(?P<a>\d+) (?P<b>\w+)').names #=> ["a", "b"]
|
|
295
|
+
*/
|
|
296
|
+
static VALUE re2_regexp_names(const VALUE self) {
|
|
297
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
298
|
+
|
|
299
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
300
|
+
VALUE names = rb_ary_new2(groups.size());
|
|
301
|
+
|
|
302
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
|
303
|
+
rb_ary_push(names,
|
|
304
|
+
encoded_str_new(it->first.data(), it->first.size(),
|
|
305
|
+
p->pattern->options().encoding()));
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return names;
|
|
309
|
+
}
|
|
310
|
+
|
|
275
311
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
|
276
312
|
re2_matchdata *m;
|
|
277
313
|
|
|
@@ -503,7 +539,7 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
|
503
539
|
*
|
|
504
540
|
* @return [Integer] the number of elements
|
|
505
541
|
* @example
|
|
506
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
542
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
507
543
|
* m.size #=> 2
|
|
508
544
|
* m.length #=> 2
|
|
509
545
|
*/
|
|
@@ -520,7 +556,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
|
520
556
|
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
|
521
557
|
* there is no such submatch
|
|
522
558
|
* @example
|
|
523
|
-
* m = RE2::Regexp.new('ob (\d+)').
|
|
559
|
+
* m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
|
|
524
560
|
* m.begin(0) #=> 1
|
|
525
561
|
* m.begin(1) #=> 4
|
|
526
562
|
*/
|
|
@@ -545,7 +581,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
|
545
581
|
* @return [Integer, nil] the offset of the character following the end of the
|
|
546
582
|
* match or `nil` if there is no such match
|
|
547
583
|
* @example
|
|
548
|
-
* m = RE2::Regexp.new('ob (\d+) b').
|
|
584
|
+
* m = RE2::Regexp.new('ob (\d+) b').partial_match("bob 123 bob")
|
|
549
585
|
* m.end(0) #=> 9
|
|
550
586
|
* m.end(1) #=> 7
|
|
551
587
|
*/
|
|
@@ -562,12 +598,125 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
|
562
598
|
}
|
|
563
599
|
}
|
|
564
600
|
|
|
601
|
+
/*
|
|
602
|
+
* Returns the portion of the original string before the match.
|
|
603
|
+
*
|
|
604
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
605
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
606
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
607
|
+
*
|
|
608
|
+
* @return [String] the portion of the original string before the match
|
|
609
|
+
* @example
|
|
610
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
611
|
+
* m.pre_match #=> "bob "
|
|
612
|
+
*/
|
|
613
|
+
static VALUE re2_matchdata_pre_match(const VALUE self) {
|
|
614
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
615
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
616
|
+
|
|
617
|
+
re2::StringPiece *match = &m->matches[0];
|
|
618
|
+
if (match->empty()) {
|
|
619
|
+
return Qnil;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
long offset = match->data() - RSTRING_PTR(m->text);
|
|
623
|
+
|
|
624
|
+
return encoded_str_new(RSTRING_PTR(m->text), offset,
|
|
625
|
+
p->pattern->options().encoding());
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/*
|
|
629
|
+
* Returns the portion of the original string after the match.
|
|
630
|
+
*
|
|
631
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
632
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
633
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
634
|
+
*
|
|
635
|
+
* @return [String] the portion of the original string after the match
|
|
636
|
+
* @example
|
|
637
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
638
|
+
* m.post_match #=> " 456"
|
|
639
|
+
*/
|
|
640
|
+
static VALUE re2_matchdata_post_match(const VALUE self) {
|
|
641
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
642
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
643
|
+
|
|
644
|
+
re2::StringPiece *match = &m->matches[0];
|
|
645
|
+
if (match->empty()) {
|
|
646
|
+
return Qnil;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
long start = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
|
650
|
+
long remaining = RSTRING_LEN(m->text) - start;
|
|
651
|
+
|
|
652
|
+
return encoded_str_new(RSTRING_PTR(m->text) + start, remaining,
|
|
653
|
+
p->pattern->options().encoding());
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/*
|
|
657
|
+
* Returns a two-element array containing the beginning and ending offsets of
|
|
658
|
+
* the nth match.
|
|
659
|
+
*
|
|
660
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
|
661
|
+
* @return [Array<Integer>, nil] a two-element array with the beginning and
|
|
662
|
+
* ending offsets of the match or `nil` if there is no such match
|
|
663
|
+
* @example
|
|
664
|
+
* m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
|
|
665
|
+
* m.offset(0) #=> [1, 7]
|
|
666
|
+
* m.offset(1) #=> [4, 7]
|
|
667
|
+
*/
|
|
668
|
+
static VALUE re2_matchdata_offset(const VALUE self, VALUE n) {
|
|
669
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
670
|
+
|
|
671
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
672
|
+
if (match == NULL) {
|
|
673
|
+
return Qnil;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
long start = match->data() - RSTRING_PTR(m->text);
|
|
677
|
+
long end_pos = start + match->size();
|
|
678
|
+
|
|
679
|
+
VALUE array = rb_ary_new2(2);
|
|
680
|
+
rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, start)));
|
|
681
|
+
rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, end_pos)));
|
|
682
|
+
|
|
683
|
+
return array;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/*
|
|
687
|
+
* Returns the length of the nth match in characters. This is equivalent to
|
|
688
|
+
* `m[n].length` but without allocating a new string.
|
|
689
|
+
*
|
|
690
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
|
691
|
+
* @return [Integer, nil] the length of the match or `nil` if there is no such
|
|
692
|
+
* match
|
|
693
|
+
* @example
|
|
694
|
+
* m = RE2::Regexp.new('(?P<word>\w+) (?P<number>\d+)').partial_match("alice 123")
|
|
695
|
+
* m.match_length(0) #=> 9
|
|
696
|
+
* m.match_length(1) #=> 5
|
|
697
|
+
* m.match_length(:number) #=> 3
|
|
698
|
+
*/
|
|
699
|
+
static VALUE re2_matchdata_match_length(const VALUE self, VALUE n) {
|
|
700
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
701
|
+
|
|
702
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
703
|
+
if (match == NULL) {
|
|
704
|
+
return Qnil;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
long start = match->data() - RSTRING_PTR(m->text);
|
|
708
|
+
long end_pos = start + match->size();
|
|
709
|
+
long char_len = rb_str_sublen(m->text, end_pos) - rb_str_sublen(m->text, start);
|
|
710
|
+
|
|
711
|
+
return LONG2NUM(char_len);
|
|
712
|
+
}
|
|
713
|
+
|
|
565
714
|
/*
|
|
566
715
|
* Returns the {RE2::Regexp} used in the match.
|
|
567
716
|
*
|
|
568
717
|
* @return [RE2::Regexp] the regular expression used in the match
|
|
569
718
|
* @example
|
|
570
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
719
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
571
720
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
|
572
721
|
*/
|
|
573
722
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
@@ -606,7 +755,7 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
|
606
755
|
*
|
|
607
756
|
* @return [Array<String, nil>] the array of matches
|
|
608
757
|
* @example
|
|
609
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
758
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
610
759
|
* m.to_a #=> ["123", "123"]
|
|
611
760
|
*/
|
|
612
761
|
static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
@@ -673,7 +822,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
673
822
|
* @param [Integer] index the index of the match to fetch
|
|
674
823
|
* @return [String, nil] the specified match or `nil` if it isn't present
|
|
675
824
|
* @example
|
|
676
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
825
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
677
826
|
* m[0] #=> "123"
|
|
678
827
|
*
|
|
679
828
|
* @overload [](start, length)
|
|
@@ -683,7 +832,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
683
832
|
* @param [Integer] length the number of elements to fetch
|
|
684
833
|
* @return [Array<String, nil>] the specified matches
|
|
685
834
|
* @example
|
|
686
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
835
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
687
836
|
* m[0, 1] #=> ["123"]
|
|
688
837
|
*
|
|
689
838
|
* @overload [](range)
|
|
@@ -692,8 +841,8 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
692
841
|
* @param [Range] range the range of match indexes to fetch
|
|
693
842
|
* @return [Array<String, nil>] the specified matches
|
|
694
843
|
* @example
|
|
695
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
696
|
-
* m[0..1] #=> "
|
|
844
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
845
|
+
* m[0..1] #=> ["123", "123"]
|
|
697
846
|
*
|
|
698
847
|
* @overload [](name)
|
|
699
848
|
* Access a particular match by name.
|
|
@@ -701,7 +850,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
701
850
|
* @param [String, Symbol] name the name of the match to fetch
|
|
702
851
|
* @return [String, nil] the specific match or `nil` if it isn't present
|
|
703
852
|
* @example
|
|
704
|
-
* m = RE2::Regexp.new('(?P<number>\d+)').
|
|
853
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
|
|
705
854
|
* m["number"] #=> "123"
|
|
706
855
|
* m[:number] #=> "123"
|
|
707
856
|
*/
|
|
@@ -725,6 +874,9 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
|
|
725
874
|
* Returns the entire matched string.
|
|
726
875
|
*
|
|
727
876
|
* @return [String] the entire matched string
|
|
877
|
+
* @example
|
|
878
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
|
|
879
|
+
* m.to_s #=> "123"
|
|
728
880
|
*/
|
|
729
881
|
static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
730
882
|
return re2_matchdata_nth_match(0, self);
|
|
@@ -739,7 +891,7 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
|
739
891
|
*
|
|
740
892
|
* @return [String] a printable version of the match
|
|
741
893
|
* @example
|
|
742
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
894
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
743
895
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
|
744
896
|
*/
|
|
745
897
|
static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
@@ -774,7 +926,7 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
774
926
|
}
|
|
775
927
|
|
|
776
928
|
/*
|
|
777
|
-
* Returns the array of submatches
|
|
929
|
+
* Returns the array of submatches.
|
|
778
930
|
*
|
|
779
931
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
780
932
|
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
@@ -783,11 +935,12 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
783
935
|
*
|
|
784
936
|
* @return [Array<String, nil>] the array of submatches
|
|
785
937
|
* @example
|
|
786
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
938
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
939
|
+
* m.captures #=> ["123"]
|
|
787
940
|
* m.deconstruct #=> ["123"]
|
|
788
941
|
*
|
|
789
942
|
* @example pattern matching
|
|
790
|
-
* case RE2::Regexp.new('(\d+) (\d+)').
|
|
943
|
+
* case RE2::Regexp.new('(\d+) (\d+)').partial_match("bob 123 456")
|
|
791
944
|
* in x, y
|
|
792
945
|
* puts "Matched #{x} #{y}"
|
|
793
946
|
* else
|
|
@@ -828,14 +981,14 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
|
828
981
|
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
|
829
982
|
* or `nil` to return all names
|
|
830
983
|
* @example
|
|
831
|
-
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').
|
|
984
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
832
985
|
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
|
833
986
|
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
|
834
987
|
* m.deconstruct_keys([:fruit]) #=> {}
|
|
835
988
|
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
|
836
989
|
*
|
|
837
990
|
* @example pattern matching
|
|
838
|
-
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').
|
|
991
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
839
992
|
* in numbers:, letters:
|
|
840
993
|
* puts "Numbers: #{numbers}, letters: #{letters}"
|
|
841
994
|
* else
|
|
@@ -852,7 +1005,7 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
852
1005
|
if (NIL_P(keys)) {
|
|
853
1006
|
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
|
854
1007
|
rb_hash_aset(capturing_groups,
|
|
855
|
-
ID2SYM(
|
|
1008
|
+
ID2SYM(rb_intern2(it->first.data(), it->first.size())),
|
|
856
1009
|
re2_matchdata_nth_match(it->second, self));
|
|
857
1010
|
}
|
|
858
1011
|
} else {
|
|
@@ -877,6 +1030,120 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
877
1030
|
return capturing_groups;
|
|
878
1031
|
}
|
|
879
1032
|
|
|
1033
|
+
/*
|
|
1034
|
+
* Returns a hash of capturing group names to matched strings.
|
|
1035
|
+
*
|
|
1036
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1037
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1038
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1039
|
+
*
|
|
1040
|
+
* @overload named_captures
|
|
1041
|
+
* Returns a hash with string keys.
|
|
1042
|
+
*
|
|
1043
|
+
* @return [Hash] a hash of capturing group names to matching strings
|
|
1044
|
+
* @example
|
|
1045
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1046
|
+
* m.named_captures #=> {"numbers" => "123", "letters" => "abc"}
|
|
1047
|
+
*
|
|
1048
|
+
* @overload named_captures(symbolize_names:)
|
|
1049
|
+
* Returns a hash with string or symbol keys.
|
|
1050
|
+
*
|
|
1051
|
+
* @param [Boolean] symbolize_names whether to return group names as symbols
|
|
1052
|
+
* @return [Hash] a hash of capturing group names to matching strings
|
|
1053
|
+
* @example
|
|
1054
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1055
|
+
* m.named_captures
|
|
1056
|
+
* #=> {"numbers" => "123", "letters" => "abc"}
|
|
1057
|
+
* m.named_captures(symbolize_names: true) #=> {numbers: "123", letters: "abc"}
|
|
1058
|
+
*/
|
|
1059
|
+
static VALUE re2_matchdata_named_captures(int argc, VALUE *argv, const VALUE self) {
|
|
1060
|
+
VALUE opts;
|
|
1061
|
+
rb_scan_args(argc, argv, "0:", &opts);
|
|
1062
|
+
|
|
1063
|
+
bool symbolize = false;
|
|
1064
|
+
if (!NIL_P(opts)) {
|
|
1065
|
+
VALUE sym = rb_hash_aref(opts, ID2SYM(id_symbolize_names));
|
|
1066
|
+
symbolize = RTEST(sym);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1070
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
1071
|
+
|
|
1072
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
1073
|
+
VALUE result = rb_hash_new();
|
|
1074
|
+
|
|
1075
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
|
1076
|
+
VALUE key;
|
|
1077
|
+
if (symbolize) {
|
|
1078
|
+
key = ID2SYM(rb_intern2(it->first.data(), it->first.size()));
|
|
1079
|
+
} else {
|
|
1080
|
+
key = encoded_str_new(it->first.data(), it->first.size(),
|
|
1081
|
+
p->pattern->options().encoding());
|
|
1082
|
+
}
|
|
1083
|
+
rb_hash_aset(result, key, re2_matchdata_nth_match(it->second, self));
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
return result;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
/*
|
|
1090
|
+
* Returns an array of names of named capturing groups. Names are returned in
|
|
1091
|
+
* alphabetical order rather than definition order, as RE2 stores named groups
|
|
1092
|
+
* internally in a sorted map.
|
|
1093
|
+
*
|
|
1094
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1095
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1096
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1097
|
+
*
|
|
1098
|
+
* @return [Array<String>] an array of names of named capturing groups
|
|
1099
|
+
* @example
|
|
1100
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1101
|
+
* m.names #=> ["letters", "numbers"]
|
|
1102
|
+
*/
|
|
1103
|
+
static VALUE re2_matchdata_names(const VALUE self) {
|
|
1104
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1105
|
+
|
|
1106
|
+
return re2_regexp_names(m->regexp);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
/*
|
|
1110
|
+
* Returns an array of match values at the given indices or names.
|
|
1111
|
+
*
|
|
1112
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1113
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1114
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1115
|
+
*
|
|
1116
|
+
* @param [Integer, String, Symbol] indexes the indices or names of
|
|
1117
|
+
* the matches to fetch
|
|
1118
|
+
* @return [Array<String, nil>] the values at the given indices or names
|
|
1119
|
+
* @example
|
|
1120
|
+
* m = RE2::Regexp.new('(?P<a>\d+) (?P<b>\d+)').partial_match("123 456")
|
|
1121
|
+
* m.values_at(1, 2) #=> ["123", "456"]
|
|
1122
|
+
* m.values_at(:a, :b) #=> ["123", "456"]
|
|
1123
|
+
* m.values_at(1, :b) #=> ["123", "456"]
|
|
1124
|
+
*/
|
|
1125
|
+
static VALUE re2_matchdata_values_at(int argc, VALUE *argv, const VALUE self) {
|
|
1126
|
+
unwrap_re2_matchdata(self);
|
|
1127
|
+
|
|
1128
|
+
VALUE result = rb_ary_new2(argc);
|
|
1129
|
+
|
|
1130
|
+
for (int i = 0; i < argc; ++i) {
|
|
1131
|
+
VALUE idx = argv[i];
|
|
1132
|
+
|
|
1133
|
+
if (TYPE(idx) == T_STRING) {
|
|
1134
|
+
rb_ary_push(result, re2_matchdata_named_match(
|
|
1135
|
+
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self));
|
|
1136
|
+
} else if (SYMBOL_P(idx)) {
|
|
1137
|
+
rb_ary_push(result, re2_matchdata_named_match(
|
|
1138
|
+
rb_id2name(SYM2ID(idx)), self));
|
|
1139
|
+
} else {
|
|
1140
|
+
rb_ary_push(result, re2_matchdata_nth_match(NUM2INT(idx), self));
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
return result;
|
|
1145
|
+
}
|
|
1146
|
+
|
|
880
1147
|
static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
881
1148
|
re2_matchdata *self_m;
|
|
882
1149
|
re2_matchdata *other_m = unwrap_re2_matchdata(other);
|
|
@@ -1610,6 +1877,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1610
1877
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
|
1611
1878
|
* `PartialMatch`}.
|
|
1612
1879
|
*
|
|
1880
|
+
* @param [String] text the text to search
|
|
1613
1881
|
* @return [Boolean] whether the match was successful
|
|
1614
1882
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1615
1883
|
*/
|
|
@@ -1628,6 +1896,7 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
|
1628
1896
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
|
1629
1897
|
* `FullMatch`}.
|
|
1630
1898
|
*
|
|
1899
|
+
* @param [String] text the text to search
|
|
1631
1900
|
* @return [Boolean] whether the match was successful
|
|
1632
1901
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1633
1902
|
*/
|
|
@@ -1718,11 +1987,11 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
|
|
1718
1987
|
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
|
1719
1988
|
* {RE2::Regexp}) cannot be coerced to `String`s
|
|
1720
1989
|
* @example
|
|
1721
|
-
* RE2.
|
|
1990
|
+
* RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
|
|
1722
1991
|
* re2 = RE2::Regexp.new("hel+o")
|
|
1723
|
-
* RE2.
|
|
1992
|
+
* RE2.replace("hello there", re2, "yo") #=> "yo there"
|
|
1724
1993
|
*/
|
|
1725
|
-
static VALUE
|
|
1994
|
+
static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
1726
1995
|
VALUE rewrite) {
|
|
1727
1996
|
/* Ensure rewrite is a string. */
|
|
1728
1997
|
StringValue(rewrite);
|
|
@@ -1772,10 +2041,10 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
|
1772
2041
|
* @return [String] the resulting string
|
|
1773
2042
|
* @example
|
|
1774
2043
|
* re2 = RE2::Regexp.new("oo?")
|
|
1775
|
-
* RE2.
|
|
1776
|
-
* RE2.
|
|
2044
|
+
* RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
|
|
2045
|
+
* RE2.global_replace("hello there", "e", "i") #=> "hillo thiri"
|
|
1777
2046
|
*/
|
|
1778
|
-
static VALUE
|
|
2047
|
+
static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
1779
2048
|
VALUE rewrite) {
|
|
1780
2049
|
/* Ensure rewrite is a string. */
|
|
1781
2050
|
StringValue(rewrite);
|
|
@@ -1807,6 +2076,71 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
|
1807
2076
|
}
|
|
1808
2077
|
}
|
|
1809
2078
|
|
|
2079
|
+
/*
|
|
2080
|
+
* If `pattern` matches `text`, returns a copy of `rewrite` with substitutions
|
|
2081
|
+
* using
|
|
2082
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L499-L510
|
|
2083
|
+
* `Extract`}. Non-matching portions of `text` are ignored.
|
|
2084
|
+
*
|
|
2085
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
2086
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
2087
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
2088
|
+
*
|
|
2089
|
+
* @param [String] text the string from which to extract
|
|
2090
|
+
* @param [String, RE2::Regexp] pattern a regexp matching the text
|
|
2091
|
+
* @param [String] rewrite the rewrite string with `\1`-style substitutions
|
|
2092
|
+
* @return [String, nil] the extracted string on a successful match or nil if
|
|
2093
|
+
* there is no match
|
|
2094
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
|
2095
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
|
2096
|
+
* @example
|
|
2097
|
+
* RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
|
|
2098
|
+
* #=> "example-alice"
|
|
2099
|
+
* RE2.extract("no match", '(\d+)', '\1') #=> nil
|
|
2100
|
+
*/
|
|
2101
|
+
static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
2102
|
+
VALUE rewrite) {
|
|
2103
|
+
/* Ensure rewrite and text are strings. */
|
|
2104
|
+
StringValue(rewrite);
|
|
2105
|
+
StringValue(text);
|
|
2106
|
+
|
|
2107
|
+
re2_pattern *p;
|
|
2108
|
+
std::string out;
|
|
2109
|
+
bool extracted;
|
|
2110
|
+
|
|
2111
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2112
|
+
p = unwrap_re2_regexp(pattern);
|
|
2113
|
+
extracted = RE2::Extract(
|
|
2114
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2115
|
+
*p->pattern,
|
|
2116
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2117
|
+
&out);
|
|
2118
|
+
|
|
2119
|
+
if (extracted) {
|
|
2120
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2121
|
+
p->pattern->options().encoding());
|
|
2122
|
+
} else {
|
|
2123
|
+
return Qnil;
|
|
2124
|
+
}
|
|
2125
|
+
} else {
|
|
2126
|
+
/* Ensure pattern is a string. */
|
|
2127
|
+
StringValue(pattern);
|
|
2128
|
+
|
|
2129
|
+
extracted = RE2::Extract(
|
|
2130
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2131
|
+
RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
|
|
2132
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2133
|
+
&out);
|
|
2134
|
+
|
|
2135
|
+
if (extracted) {
|
|
2136
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2137
|
+
RE2::Options::EncodingUTF8);
|
|
2138
|
+
} else {
|
|
2139
|
+
return Qnil;
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
|
|
1810
2144
|
/*
|
|
1811
2145
|
* Returns a version of `str` with all potentially meaningful regexp characters
|
|
1812
2146
|
* escaped using
|
|
@@ -1818,9 +2152,12 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
|
1818
2152
|
* @raise [TypeError] if the given unquoted string cannot be coerced to a `String`
|
|
1819
2153
|
* @return [String] the escaped string
|
|
1820
2154
|
* @example
|
|
1821
|
-
* RE2
|
|
2155
|
+
* RE2.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2156
|
+
* RE2.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2157
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2158
|
+
* RE2::Regexp.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
1822
2159
|
*/
|
|
1823
|
-
static VALUE
|
|
2160
|
+
static VALUE re2_escape(VALUE, VALUE unquoted) {
|
|
1824
2161
|
StringValue(unquoted);
|
|
1825
2162
|
|
|
1826
2163
|
std::string quoted_string = RE2::QuoteMeta(
|
|
@@ -2203,6 +2540,14 @@ extern "C" void Init_re2(void) {
|
|
|
2203
2540
|
RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
|
|
2204
2541
|
rb_define_method(re2_cMatchData, "end",
|
|
2205
2542
|
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
|
2543
|
+
rb_define_method(re2_cMatchData, "pre_match",
|
|
2544
|
+
RUBY_METHOD_FUNC(re2_matchdata_pre_match), 0);
|
|
2545
|
+
rb_define_method(re2_cMatchData, "post_match",
|
|
2546
|
+
RUBY_METHOD_FUNC(re2_matchdata_post_match), 0);
|
|
2547
|
+
rb_define_method(re2_cMatchData, "offset",
|
|
2548
|
+
RUBY_METHOD_FUNC(re2_matchdata_offset), 1);
|
|
2549
|
+
rb_define_method(re2_cMatchData, "match_length",
|
|
2550
|
+
RUBY_METHOD_FUNC(re2_matchdata_match_length), 1);
|
|
2206
2551
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
|
2207
2552
|
-1);
|
|
2208
2553
|
rb_define_method(re2_cMatchData, "to_s",
|
|
@@ -2211,6 +2556,14 @@ extern "C" void Init_re2(void) {
|
|
|
2211
2556
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
|
2212
2557
|
rb_define_method(re2_cMatchData, "deconstruct",
|
|
2213
2558
|
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
|
2559
|
+
rb_define_method(re2_cMatchData, "captures",
|
|
2560
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
|
2561
|
+
rb_define_method(re2_cMatchData, "named_captures",
|
|
2562
|
+
RUBY_METHOD_FUNC(re2_matchdata_named_captures), -1);
|
|
2563
|
+
rb_define_method(re2_cMatchData, "names",
|
|
2564
|
+
RUBY_METHOD_FUNC(re2_matchdata_names), 0);
|
|
2565
|
+
rb_define_method(re2_cMatchData, "values_at",
|
|
2566
|
+
RUBY_METHOD_FUNC(re2_matchdata_values_at), -1);
|
|
2214
2567
|
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
|
2215
2568
|
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
|
2216
2569
|
rb_define_method(re2_cMatchData, "initialize_copy",
|
|
@@ -2248,6 +2601,10 @@ extern "C" void Init_re2(void) {
|
|
|
2248
2601
|
RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
|
|
2249
2602
|
rb_define_method(re2_cRegexp, "named_capturing_groups",
|
|
2250
2603
|
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
|
2604
|
+
rb_define_method(re2_cRegexp, "named_captures",
|
|
2605
|
+
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
|
2606
|
+
rb_define_method(re2_cRegexp, "names",
|
|
2607
|
+
RUBY_METHOD_FUNC(re2_regexp_names), 0);
|
|
2251
2608
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
|
2252
2609
|
-1);
|
|
2253
2610
|
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
|
@@ -2310,16 +2667,26 @@ extern "C" void Init_re2(void) {
|
|
|
2310
2667
|
rb_define_method(re2_cSet, "size", RUBY_METHOD_FUNC(re2_set_size), 0);
|
|
2311
2668
|
rb_define_method(re2_cSet, "length", RUBY_METHOD_FUNC(re2_set_size), 0);
|
|
2312
2669
|
|
|
2670
|
+
rb_define_module_function(re2_mRE2, "replace",
|
|
2671
|
+
RUBY_METHOD_FUNC(re2_replace), 3);
|
|
2313
2672
|
rb_define_module_function(re2_mRE2, "Replace",
|
|
2314
|
-
RUBY_METHOD_FUNC(
|
|
2673
|
+
RUBY_METHOD_FUNC(re2_replace), 3);
|
|
2674
|
+
rb_define_module_function(re2_mRE2, "global_replace",
|
|
2675
|
+
RUBY_METHOD_FUNC(re2_global_replace), 3);
|
|
2315
2676
|
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
|
2316
|
-
RUBY_METHOD_FUNC(
|
|
2677
|
+
RUBY_METHOD_FUNC(re2_global_replace), 3);
|
|
2678
|
+
rb_define_module_function(re2_mRE2, "extract",
|
|
2679
|
+
RUBY_METHOD_FUNC(re2_extract), 3);
|
|
2317
2680
|
rb_define_module_function(re2_mRE2, "QuoteMeta",
|
|
2318
|
-
RUBY_METHOD_FUNC(
|
|
2681
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2682
|
+
rb_define_module_function(re2_mRE2, "escape",
|
|
2683
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2684
|
+
rb_define_module_function(re2_mRE2, "quote",
|
|
2685
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2319
2686
|
rb_define_singleton_method(re2_cRegexp, "escape",
|
|
2320
|
-
RUBY_METHOD_FUNC(
|
|
2687
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2321
2688
|
rb_define_singleton_method(re2_cRegexp, "quote",
|
|
2322
|
-
RUBY_METHOD_FUNC(
|
|
2689
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2323
2690
|
|
|
2324
2691
|
// (see RE2::Regexp#initialize)
|
|
2325
2692
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
|
@@ -2347,4 +2714,5 @@ extern "C" void Init_re2(void) {
|
|
|
2347
2714
|
id_submatches = rb_intern("submatches");
|
|
2348
2715
|
id_startpos = rb_intern("startpos");
|
|
2349
2716
|
id_endpos = rb_intern("endpos");
|
|
2717
|
+
id_symbolize_names = rb_intern("symbolize_names");
|
|
2350
2718
|
}
|
data/lib/3.1/re2.so
CHANGED
|
Binary file
|
data/lib/3.2/re2.so
CHANGED
|
Binary file
|
data/lib/3.3/re2.so
CHANGED
|
Binary file
|
data/lib/3.4/re2.so
CHANGED
|
Binary file
|
data/lib/4.0/re2.so
CHANGED
|
Binary file
|