re2 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +29 -6
- data/ext/re2/extconf.rb +20 -12
- data/ext/re2/re2.cc +167 -42
- data/lib/re2/string.rb +3 -19
- data/spec/re2/match_data_spec.rb +58 -0
- data/spec/re2/regexp_spec.rb +2 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ddba6fe8bb8c7e51bcb0b8f4346b39ca903978ff6697a4c14d06702baae10598
|
4
|
+
data.tar.gz: f8905eeeeff7e4bebc532d23fa19b29f1c3e2a88ea303a5aa879bafb4af4e1ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44184ad41a55746903fcd350fbf473d26266c710e324eaac879b46ff45e3b77120d5100a25f5188e2d938cafbc48caf909506d79cf07c31522ccfb9e789c2ce0
|
7
|
+
data.tar.gz: eb282abd472e00ebcbb10e38f84c62bbac785bd139c69140647a29a157475600b6845472edb41ed42234caf5729188052cc1459b1ac2775c544ed0e905c6039d
|
data/README.md
CHANGED
@@ -4,9 +4,9 @@ re2 [, libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
|
7
|
+
**Current version:** 1.7.0
|
8
|
+
**Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0, 3.1, 3.2
|
9
|
+
**Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
|
10
10
|
|
11
11
|
Installation
|
12
12
|
------------
|
@@ -22,7 +22,7 @@ If you are using Debian, you can install the [libre2-dev][] package like so:
|
|
22
22
|
|
23
23
|
$ sudo apt-get install libre2-dev
|
24
24
|
|
25
|
-
Recent versions of re2 require a compiler with C++
|
25
|
+
Recent versions of re2 require a compiler with C++14 support such as [clang](http://clang.llvm.org/) 3.4 or [gcc](https://gcc.gnu.org/) 5.
|
26
26
|
|
27
27
|
If you are using a packaged Ruby distribution, make sure you also have the
|
28
28
|
Ruby header files installed such as those provided by the [ruby-dev][] package
|
@@ -137,7 +137,7 @@ the pattern. After all patterns have been added, the set can be compiled using
|
|
137
137
|
`RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
|
138
138
|
containing the indices of all the patterns that matched.
|
139
139
|
|
140
|
-
```
|
140
|
+
```ruby
|
141
141
|
set = RE2::Set.new
|
142
142
|
set.add("abc") #=> 0
|
143
143
|
set.add("def") #=> 1
|
@@ -147,6 +147,27 @@ set.match("abcdefghi") #=> [0, 1, 2]
|
|
147
147
|
set.match("ghidefabc") #=> [2, 1, 0]
|
148
148
|
```
|
149
149
|
|
150
|
+
As of 1.6.0, you can use [Ruby's pattern matching](https://docs.ruby-lang.org/en/3.0/syntax/pattern_matching_rdoc.html) against `RE2::MatchData` with both array patterns and hash patterns:
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
case RE2('(\w+) (\d+)').match("Alice 42")
|
154
|
+
in [name, age]
|
155
|
+
puts "My name is #{name} and I am #{age} years old"
|
156
|
+
else
|
157
|
+
puts "No match!"
|
158
|
+
end
|
159
|
+
# My name is Alice and I am 42 years old
|
160
|
+
|
161
|
+
|
162
|
+
case RE2('(?P<name>\w+) (?P<age>\d+)').match("Alice 42")
|
163
|
+
in {name:, age:}
|
164
|
+
puts "My name is #{name} and I am #{age} years old"
|
165
|
+
else
|
166
|
+
puts "No match!"
|
167
|
+
end
|
168
|
+
# My name is Alice and I am 42 years old
|
169
|
+
```
|
170
|
+
|
150
171
|
Features
|
151
172
|
--------
|
152
173
|
|
@@ -185,13 +206,15 @@ Features
|
|
185
206
|
[`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and
|
186
207
|
`RE2.quote(unquoted)`
|
187
208
|
|
209
|
+
* Pattern matching with `RE2::MatchData`
|
210
|
+
|
188
211
|
Contributions
|
189
212
|
-------------
|
190
213
|
|
191
214
|
* Thanks to [Jason Woods](https://github.com/driskell) who contributed the
|
192
215
|
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`;
|
193
216
|
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support;
|
194
|
-
* Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan
|
217
|
+
* Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan` and for contributing support for libre2.11 (2023-07-01);
|
195
218
|
* Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
|
196
219
|
the deprecation and removal of the `utf8` encoding option in re2;
|
197
220
|
* Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
|
data/ext/re2/extconf.rb
CHANGED
@@ -44,22 +44,30 @@ unless have_library("re2")
|
|
44
44
|
abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
|
45
45
|
end
|
46
46
|
|
47
|
-
|
48
|
-
checking_for("re2 requires C++11 compiler") do
|
49
|
-
minimal_program = <<SRC
|
47
|
+
minimal_program = <<SRC
|
50
48
|
#include <re2/re2.h>
|
51
49
|
int main() { return 0; }
|
52
50
|
SRC
|
53
51
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
52
|
+
re2_requires_version_flag = checking_for("re2 that requires explicit C++ version flag") do
|
53
|
+
!try_compile(minimal_program, compile_options)
|
54
|
+
end
|
55
|
+
|
56
|
+
if re2_requires_version_flag
|
57
|
+
# Recent versions of re2 depend directly on abseil, which requires a
|
58
|
+
# compiler with C++14 support (see
|
59
|
+
# https://github.com/abseil/abseil-cpp/issues/1127 and
|
60
|
+
# https://github.com/abseil/abseil-cpp/issues/1431). However, the
|
61
|
+
# `std=c++14` flag doesn't appear to suffice; we need at least
|
62
|
+
# `std=c++17`.
|
63
|
+
abort "Cannot compile re2 with your compiler: recent versions require C++14 support." unless %w[c++20 c++17 c++11 c++0x].any? do |std|
|
64
|
+
checking_for("re2 that compiles with #{std} standard") do
|
65
|
+
if try_compile(minimal_program, compile_options + " -std=#{std}")
|
66
|
+
compile_options << " -std=#{std}"
|
67
|
+
$CPPFLAGS << " -std=#{std}"
|
68
|
+
|
69
|
+
true
|
70
|
+
end
|
63
71
|
end
|
64
72
|
end
|
65
73
|
end
|
data/ext/re2/re2.cc
CHANGED
@@ -393,7 +393,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
|
393
393
|
/*
|
394
394
|
* Returns the number of elements in the match array (including nils).
|
395
395
|
*
|
396
|
-
* @return [
|
396
|
+
* @return [Integer] the number of elements
|
397
397
|
* @example
|
398
398
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
399
399
|
* m.size #=> 2
|
@@ -409,8 +409,8 @@ static VALUE re2_matchdata_size(VALUE self) {
|
|
409
409
|
/*
|
410
410
|
* Returns the offset of the start of the nth element of the matchdata.
|
411
411
|
*
|
412
|
-
* @param [
|
413
|
-
* @return [
|
412
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
413
|
+
* @return [Integer] the offset of the start of the match
|
414
414
|
* @example
|
415
415
|
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
416
416
|
* m.begin(0) #=> 1
|
@@ -439,8 +439,8 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
|
439
439
|
/*
|
440
440
|
* Returns the offset of the character following the end of the nth element of the matchdata.
|
441
441
|
*
|
442
|
-
* @param [
|
443
|
-
* @return [
|
442
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
443
|
+
* @return [Integer] the offset of the character following the end of the match
|
444
444
|
* @example
|
445
445
|
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
446
446
|
* m.end(0) #=> 9
|
@@ -584,7 +584,7 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
584
584
|
* @overload [](index)
|
585
585
|
* Access a particular match by index.
|
586
586
|
*
|
587
|
-
* @param [
|
587
|
+
* @param [Integer] index the index of the match to fetch
|
588
588
|
* @return [String, nil] the specified match
|
589
589
|
* @example
|
590
590
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -593,8 +593,8 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
593
593
|
* @overload [](start, length)
|
594
594
|
* Access a range of matches by starting index and length.
|
595
595
|
*
|
596
|
-
* @param [
|
597
|
-
* @param [
|
596
|
+
* @param [Integer] start the index from which to start
|
597
|
+
* @param [Integer] length the number of elements to fetch
|
598
598
|
* @return [Array<String, nil>] the specified matches
|
599
599
|
* @example
|
600
600
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -687,14 +687,117 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
687
687
|
return result;
|
688
688
|
}
|
689
689
|
|
690
|
+
/*
|
691
|
+
* Returns the array of submatches for pattern matching.
|
692
|
+
*
|
693
|
+
* @return [Array<String, nil>] the array of submatches
|
694
|
+
* @example
|
695
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
696
|
+
* m.deconstruct #=> ["123"]
|
697
|
+
*
|
698
|
+
* @example pattern matching
|
699
|
+
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
700
|
+
* in x, y
|
701
|
+
* puts "Matched #{x} #{y}"
|
702
|
+
* else
|
703
|
+
* puts "Unrecognised match"
|
704
|
+
* end
|
705
|
+
*/
|
706
|
+
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
+
int i;
|
708
|
+
re2_matchdata *m;
|
709
|
+
re2_pattern *p;
|
710
|
+
re2::StringPiece *match;
|
711
|
+
VALUE array;
|
712
|
+
|
713
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
714
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
|
+
|
716
|
+
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
+
for (i = 1; i < m->number_of_matches; i++) {
|
718
|
+
match = &m->matches[i];
|
719
|
+
|
720
|
+
if (match->empty()) {
|
721
|
+
rb_ary_push(array, Qnil);
|
722
|
+
} else {
|
723
|
+
rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
|
724
|
+
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
|
725
|
+
}
|
726
|
+
}
|
727
|
+
|
728
|
+
return array;
|
729
|
+
}
|
730
|
+
|
731
|
+
/*
|
732
|
+
* Returns a hash of capturing group names to submatches for pattern matching.
|
733
|
+
*
|
734
|
+
* As this is used by Ruby's pattern matching, it will return an empty hash if given
|
735
|
+
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
|
+
* order but an invalid name will cause the hash to be immediately returned.
|
737
|
+
*
|
738
|
+
* @return [Hash] a hash of capturing group names to submatches
|
739
|
+
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
|
+
* @example
|
741
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
742
|
+
* m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
|
743
|
+
* m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
|
744
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
745
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
|
746
|
+
*
|
747
|
+
* @example pattern matching
|
748
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
749
|
+
* in numbers:, letters:
|
750
|
+
* puts "Numbers: #{numbers}, letters: #{letters}"
|
751
|
+
* else
|
752
|
+
* puts "Unrecognised match"
|
753
|
+
* end
|
754
|
+
*/
|
755
|
+
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
+
int i;
|
757
|
+
VALUE capturing_groups, key;
|
758
|
+
re2_matchdata *m;
|
759
|
+
re2_pattern *p;
|
760
|
+
map<string, int> groups;
|
761
|
+
map<string, int>::iterator iterator;
|
762
|
+
|
763
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
764
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
|
+
|
766
|
+
groups = p->pattern->NamedCapturingGroups();
|
767
|
+
capturing_groups = rb_hash_new();
|
768
|
+
|
769
|
+
if (NIL_P(keys)) {
|
770
|
+
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
|
771
|
+
rb_hash_aset(capturing_groups,
|
772
|
+
ID2SYM(rb_intern(iterator->first.data())),
|
773
|
+
re2_matchdata_nth_match(iterator->second, self));
|
774
|
+
}
|
775
|
+
} else {
|
776
|
+
Check_Type(keys, T_ARRAY);
|
777
|
+
|
778
|
+
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
+
for (i = 0; i < RARRAY_LEN(keys); i++) {
|
780
|
+
key = rb_ary_entry(keys, i);
|
781
|
+
Check_Type(key, T_SYMBOL);
|
782
|
+
string name(rb_id2name(SYM2ID(key)));
|
783
|
+
|
784
|
+
if (groups.count(name) == 0) {
|
785
|
+
break;
|
786
|
+
}
|
787
|
+
|
788
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
|
+
}
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
return capturing_groups;
|
794
|
+
}
|
795
|
+
|
690
796
|
/*
|
691
797
|
* Returns a new RE2 object with a compiled version of
|
692
|
-
* +pattern+ stored inside. Equivalent to +RE2.new+.
|
798
|
+
* +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
|
693
799
|
*
|
694
|
-
* @
|
695
|
-
* @param [String] pattern the pattern to compile
|
696
|
-
* @param [Hash] options the options to compile a regexp with
|
697
|
-
* @see RE2::Regexp.new
|
800
|
+
* @see RE2::Regexp#initialize
|
698
801
|
*
|
699
802
|
*/
|
700
803
|
static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
@@ -727,7 +830,7 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
|
727
830
|
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
728
831
|
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
729
832
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
730
|
-
* @option options [
|
833
|
+
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
731
834
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
732
835
|
* @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
|
733
836
|
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
@@ -878,7 +981,7 @@ static VALUE re2_regexp_log_errors(VALUE self) {
|
|
878
981
|
* Returns the max_mem setting for the regular expression
|
879
982
|
* +re2+.
|
880
983
|
*
|
881
|
-
* @return [
|
984
|
+
* @return [Integer] the max_mem option
|
882
985
|
* @example
|
883
986
|
* re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
|
884
987
|
* re2.max_mem #=> 1024
|
@@ -1032,7 +1135,7 @@ static VALUE re2_regexp_error_arg(VALUE self) {
|
|
1032
1135
|
* of a regexp's "cost". Larger numbers are more expensive
|
1033
1136
|
* than smaller numbers.
|
1034
1137
|
*
|
1035
|
-
* @return [
|
1138
|
+
* @return [Integer] the regexp "cost"
|
1036
1139
|
*/
|
1037
1140
|
static VALUE re2_regexp_program_size(VALUE self) {
|
1038
1141
|
re2_pattern *p;
|
@@ -1097,7 +1200,7 @@ static VALUE re2_regexp_options(VALUE self) {
|
|
1097
1200
|
* wasn't valid on construction. The overall match ($0) does not
|
1098
1201
|
* count: if the regexp is "(a)(b)", returns 2.
|
1099
1202
|
*
|
1100
|
-
* @return [
|
1203
|
+
* @return [Integer] the number of capturing subpatterns
|
1101
1204
|
*/
|
1102
1205
|
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
1103
1206
|
re2_pattern *p;
|
@@ -1167,7 +1270,7 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1167
1270
|
* matches returned (padded with nils if necessary).
|
1168
1271
|
*
|
1169
1272
|
* @param [String] text the text to search
|
1170
|
-
* @param [
|
1273
|
+
* @param [Integer] number_of_matches the number of matches to return
|
1171
1274
|
* @return [RE2::MatchData] the matches
|
1172
1275
|
* @raise [ArgumentError] if given a negative number of matches
|
1173
1276
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
@@ -1245,7 +1348,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1245
1348
|
*
|
1246
1349
|
* @return [Boolean] whether the match was successful
|
1247
1350
|
*/
|
1248
|
-
static VALUE
|
1351
|
+
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1249
1352
|
VALUE argv[2];
|
1250
1353
|
argv[0] = text;
|
1251
1354
|
argv[1] = INT2FIX(0);
|
@@ -1293,7 +1396,7 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1293
1396
|
* @return [String] the resulting string
|
1294
1397
|
* @example
|
1295
1398
|
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
1296
|
-
* re2 = RE2.new("hel+o")
|
1399
|
+
* re2 = RE2::Regexp.new("hel+o")
|
1297
1400
|
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
1298
1401
|
*/
|
1299
1402
|
static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
@@ -1329,7 +1432,7 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1329
1432
|
* @param [String] rewrite the string to replace with
|
1330
1433
|
* @return [String] the resulting string
|
1331
1434
|
* @example
|
1332
|
-
* re2 = RE2.new("oo?")
|
1435
|
+
* re2 = RE2::Regexp.new("oo?")
|
1333
1436
|
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1334
1437
|
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1335
1438
|
*/
|
@@ -1416,7 +1519,7 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1416
1519
|
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
1417
1520
|
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
1418
1521
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1419
|
-
* @option options [
|
1522
|
+
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
1420
1523
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1421
1524
|
* @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
|
1422
1525
|
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
@@ -1514,6 +1617,7 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1514
1617
|
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1515
1618
|
*/
|
1516
1619
|
static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
1620
|
+
UNUSED(self);
|
1517
1621
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1518
1622
|
return Qtrue;
|
1519
1623
|
#else
|
@@ -1526,18 +1630,40 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
|
1526
1630
|
* integer indices of the matching patterns if matched or an empty array if
|
1527
1631
|
* there are no matches.
|
1528
1632
|
*
|
1529
|
-
* @
|
1530
|
-
*
|
1531
|
-
* @
|
1532
|
-
*
|
1533
|
-
*
|
1534
|
-
*
|
1535
|
-
*
|
1536
|
-
*
|
1537
|
-
*
|
1538
|
-
*
|
1539
|
-
*
|
1540
|
-
*
|
1633
|
+
* @return [Array<Integer>]
|
1634
|
+
*
|
1635
|
+
* @overload match(str)
|
1636
|
+
* Returns an array of integer indices of patterns matching the given string
|
1637
|
+
* (if any). Raises exceptions if there are any errors while matching.
|
1638
|
+
*
|
1639
|
+
* @param [String] str the text to match against
|
1640
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1641
|
+
* @raise [MatchError] if an error occurs while matching
|
1642
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1643
|
+
* @example
|
1644
|
+
* set = RE2::Set.new
|
1645
|
+
* set.add("abc")
|
1646
|
+
* set.add("def")
|
1647
|
+
* set.compile
|
1648
|
+
* set.match("abcdef") # => [0, 1]
|
1649
|
+
*
|
1650
|
+
* @overload match(str, options)
|
1651
|
+
* Returns an array of integer indices of patterns matching the given string
|
1652
|
+
* (if any). Raises exceptions if there are any errors while matching and the
|
1653
|
+
* :exception option is set to true.
|
1654
|
+
*
|
1655
|
+
* @param [String] str the text to match against
|
1656
|
+
* @param [Hash] options the options with which to match
|
1657
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
|
1658
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1659
|
+
* @raise [MatchError] if an error occurs while matching
|
1660
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1661
|
+
* @example
|
1662
|
+
* set = RE2::Set.new
|
1663
|
+
* set.add("abc")
|
1664
|
+
* set.add("def")
|
1665
|
+
* set.compile
|
1666
|
+
* set.match("abcdef", :exception => true) # => [0, 1]
|
1541
1667
|
*/
|
1542
1668
|
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1543
1669
|
VALUE str, options, exception_option;
|
@@ -1644,6 +1770,10 @@ void Init_re2(void) {
|
|
1644
1770
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1645
1771
|
rb_define_method(re2_cMatchData, "inspect",
|
1646
1772
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
1773
|
+
rb_define_method(re2_cMatchData, "deconstruct",
|
1774
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
1775
|
+
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
1776
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
1647
1777
|
|
1648
1778
|
rb_define_method(re2_cScanner, "string",
|
1649
1779
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
@@ -1674,11 +1804,11 @@ void Init_re2(void) {
|
|
1674
1804
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1675
1805
|
-1);
|
1676
1806
|
rb_define_method(re2_cRegexp, "match?",
|
1677
|
-
RUBY_METHOD_FUNC(
|
1807
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1678
1808
|
rb_define_method(re2_cRegexp, "=~",
|
1679
|
-
RUBY_METHOD_FUNC(
|
1809
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1680
1810
|
rb_define_method(re2_cRegexp, "===",
|
1681
|
-
RUBY_METHOD_FUNC(
|
1811
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1682
1812
|
rb_define_method(re2_cRegexp, "scan",
|
1683
1813
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1684
1814
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
@@ -1756,9 +1886,4 @@ void Init_re2(void) {
|
|
1756
1886
|
id_anchor_start = rb_intern("anchor_start");
|
1757
1887
|
id_anchor_both = rb_intern("anchor_both");
|
1758
1888
|
id_exception = rb_intern("exception");
|
1759
|
-
|
1760
|
-
#if 0
|
1761
|
-
/* Fake so YARD generates the file. */
|
1762
|
-
rb_mKernel = rb_define_module("Kernel");
|
1763
|
-
#endif
|
1764
1889
|
}
|
data/lib/re2/string.rb
CHANGED
@@ -12,30 +12,14 @@ module RE2
|
|
12
12
|
# Replaces the first occurrence +pattern+ with +rewrite+ and returns a new
|
13
13
|
# string.
|
14
14
|
#
|
15
|
-
# @
|
16
|
-
# @param [String] rewrite the string to replace with
|
17
|
-
# @example
|
18
|
-
# "hello there".re2_sub("hello", "howdy") #=> "howdy there"
|
19
|
-
# re2 = RE2.new("hel+o")
|
20
|
-
# "hello there".re2_sub(re2, "yo") #=> "yo there"
|
21
|
-
# text = "Good morning"
|
22
|
-
# text.re2_sub("morn", "even") #=> "Good evening"
|
23
|
-
# text #=> "Good morning"
|
15
|
+
# @see RE2.Replace
|
24
16
|
def re2_sub(*args)
|
25
17
|
RE2.Replace(self, *args)
|
26
18
|
end
|
27
19
|
|
28
20
|
# Replaces every occurrence of +pattern+ with +rewrite+ and return a new string.
|
29
21
|
#
|
30
|
-
# @
|
31
|
-
# @param [String] rewrite the string to replace with
|
32
|
-
# @example
|
33
|
-
# "hello there".re2_gsub("e", "i") #=> "hillo thiri"
|
34
|
-
# re2 = RE2.new("oo?")
|
35
|
-
# "whoops-doops".re2_gsub(re2, "e") #=> "wheps-deps"
|
36
|
-
# text = "Good morning"
|
37
|
-
# text.re2_gsub("o", "ee") #=> "Geeeed meerning"
|
38
|
-
# text #=> "Good morning"
|
22
|
+
# @see RE2.GlobalReplace
|
39
23
|
def re2_gsub(*args)
|
40
24
|
RE2.GlobalReplace(self, *args)
|
41
25
|
end
|
@@ -74,7 +58,7 @@ module RE2
|
|
74
58
|
# matches returned (padded with nils if necessary).
|
75
59
|
#
|
76
60
|
# @param [String, RE2::Regexp] pattern the regular expression to match
|
77
|
-
# @param [
|
61
|
+
# @param [Integer] number_of_matches the number of matches to return
|
78
62
|
# @return [RE2::MatchData] the matches
|
79
63
|
# @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
80
64
|
# @example
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -241,4 +241,62 @@ RSpec.describe RE2::MatchData do
|
|
241
241
|
expect(md.end(:foo)).to be_nil
|
242
242
|
end
|
243
243
|
end
|
244
|
+
|
245
|
+
describe "#deconstruct" do
|
246
|
+
it "returns all capturing groups" do
|
247
|
+
md = RE2::Regexp.new('w(o)(o)').match('woo')
|
248
|
+
|
249
|
+
expect(md.deconstruct).to eq(['o', 'o'])
|
250
|
+
end
|
251
|
+
|
252
|
+
it "includes optional capturing groups as nil" do
|
253
|
+
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
|
254
|
+
|
255
|
+
expect(md.deconstruct).to eq(['o', 'o', nil])
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "#deconstruct_keys" do
|
260
|
+
it "returns all named captures if given nil" do
|
261
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
262
|
+
|
263
|
+
expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
|
264
|
+
end
|
265
|
+
|
266
|
+
it "returns only named captures if given names" do
|
267
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
268
|
+
|
269
|
+
expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
|
270
|
+
end
|
271
|
+
|
272
|
+
it "returns named captures up until an invalid name is given" do
|
273
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
274
|
+
|
275
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
|
276
|
+
end
|
277
|
+
|
278
|
+
it "returns an empty hash if given more capture names than exist" do
|
279
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
280
|
+
|
281
|
+
expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
|
282
|
+
end
|
283
|
+
|
284
|
+
it "returns an empty hash if there are no named capturing groups" do
|
285
|
+
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
|
286
|
+
|
287
|
+
expect(md.deconstruct_keys(nil)).to eq({})
|
288
|
+
end
|
289
|
+
|
290
|
+
it "raises an error if given a non-array of keys" do
|
291
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
292
|
+
|
293
|
+
expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
|
294
|
+
end
|
295
|
+
|
296
|
+
it "raises an error if given keys as non-symbols" do
|
297
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
298
|
+
|
299
|
+
expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
|
300
|
+
end
|
301
|
+
end
|
244
302
|
end
|
data/spec/re2/regexp_spec.rb
CHANGED
@@ -91,7 +91,8 @@ RSpec.describe RE2::Regexp do
|
|
91
91
|
describe "#program_size" do
|
92
92
|
it "returns a numeric value" do
|
93
93
|
program_size = RE2::Regexp.new('w(o)(o)').program_size
|
94
|
-
|
94
|
+
|
95
|
+
expect(program_size).to be_an(Integer)
|
95
96
|
end
|
96
97
|
|
97
98
|
it "returns -1 for an invalid pattern" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
80
|
- !ruby/object:Gem::Version
|
81
81
|
version: '0'
|
82
82
|
requirements: []
|
83
|
-
rubygems_version: 3.
|
83
|
+
rubygems_version: 3.4.10
|
84
84
|
signing_key:
|
85
85
|
specification_version: 4
|
86
86
|
summary: Ruby bindings to re2.
|