re2 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +46 -3
- data/ext/re2/extconf.rb +24 -0
- data/ext/re2/re2.cc +460 -69
- data/spec/re2/match_data_spec.rb +58 -0
- data/spec/re2/set_spec.rb +168 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2151621115d04b197403c0f67276347c205928136405c05aac1fc70bf3a00dec
|
4
|
+
data.tar.gz: 8ba805a95c535ab7d30296a830448dea51da4f5f699a6aa3f858296b2590d188
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 010e20dae629df302c35c6d5cc412c5a0add1cc3f0b6357114ce64c5d77e6c3bbca7401e57c0b86be5fe87acae17162cd7c860c079dc2c57898761f9fd8d4ce4
|
7
|
+
data.tar.gz: 6bfa2db432e91b87ab5d6fa21babd07a675df3b22904e20f1491322b1d34c8fe1c5814783049c33f84ab8f22e0b3c2ef9c2308161f8694dd6bcbd8ec4da278a4
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ re2 [, libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
|
10
10
|
|
11
11
|
Installation
|
@@ -131,6 +131,43 @@ enum.next #=> ["It"]
|
|
131
131
|
enum.next #=> ["is"]
|
132
132
|
```
|
133
133
|
|
134
|
+
As of 1.5.0, you can use `RE2::Set` to match multiple patterns against a
|
135
|
+
string. Calling `RE2::Set#add` with a pattern will return an integer index of
|
136
|
+
the pattern. After all patterns have been added, the set can be compiled using
|
137
|
+
`RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
|
138
|
+
containing the indices of all the patterns that matched.
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
set = RE2::Set.new
|
142
|
+
set.add("abc") #=> 0
|
143
|
+
set.add("def") #=> 1
|
144
|
+
set.add("ghi") #=> 2
|
145
|
+
set.compile #=> true
|
146
|
+
set.match("abcdefghi") #=> [0, 1, 2]
|
147
|
+
set.match("ghidefabc") #=> [2, 1, 0]
|
148
|
+
```
|
149
|
+
|
150
|
+
As of 1.6.0, you can use [Ruby's pattern matching](https://docs.ruby-lang.org/en/3.0/syntax/pattern_matching_rdoc.html) against `RE2::MatchData` with both array patterns and hash patterns:
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
case RE2('(\w+) (\d+)').match("Alice 42")
|
154
|
+
in [name, age]
|
155
|
+
puts "My name is #{name} and I am #{age} years old"
|
156
|
+
else
|
157
|
+
puts "No match!"
|
158
|
+
end
|
159
|
+
# My name is Alice and I am 42 years old
|
160
|
+
|
161
|
+
|
162
|
+
case RE2('(?P<name>\w+) (?P<age>\d+)').match("Alice 42")
|
163
|
+
in {name:, age:}
|
164
|
+
puts "My name is #{name} and I am #{age} years old"
|
165
|
+
else
|
166
|
+
puts "No match!"
|
167
|
+
end
|
168
|
+
# My name is Alice and I am 42 years old
|
169
|
+
```
|
170
|
+
|
134
171
|
Features
|
135
172
|
--------
|
136
173
|
|
@@ -149,6 +186,8 @@ Features
|
|
149
186
|
|
150
187
|
* Incrementally scanning text with `re2.scan(text)`
|
151
188
|
|
189
|
+
* Search a collection of patterns simultaneously with `RE2::Set`
|
190
|
+
|
152
191
|
* Checking regular expression compilation with `re2.ok?`, `re2.error` and
|
153
192
|
`re2.error_arg`
|
154
193
|
|
@@ -167,6 +206,8 @@ Features
|
|
167
206
|
[`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and
|
168
207
|
`RE2.quote(unquoted)`
|
169
208
|
|
209
|
+
* Pattern matching with `RE2::MatchData`
|
210
|
+
|
170
211
|
Contributions
|
171
212
|
-------------
|
172
213
|
|
@@ -177,7 +218,9 @@ Contributions
|
|
177
218
|
* Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
|
178
219
|
the deprecation and removal of the `utf8` encoding option in re2;
|
179
220
|
* Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
|
180
|
-
using `RE2::Scanner#scan` with an invalid regular expression
|
221
|
+
using `RE2::Scanner#scan` with an invalid regular expression;
|
222
|
+
* Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
|
223
|
+
initial support for `RE2::Set`.
|
181
224
|
|
182
225
|
Contact
|
183
226
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -88,4 +88,28 @@ SRC
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
checking_for("RE2::Set::Match() with error information") do
|
92
|
+
test_re2_set_match_signature = <<SRC
|
93
|
+
#include <vector>
|
94
|
+
#include <re2/re2.h>
|
95
|
+
#include <re2/set.h>
|
96
|
+
|
97
|
+
int main() {
|
98
|
+
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
|
99
|
+
s.Add("foo", NULL);
|
100
|
+
s.Compile();
|
101
|
+
|
102
|
+
std::vector<int> v;
|
103
|
+
RE2::Set::ErrorInfo ei;
|
104
|
+
s.Match("foo", &v, &ei);
|
105
|
+
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
SRC
|
109
|
+
|
110
|
+
if try_compile(test_re2_set_match_signature, compile_options)
|
111
|
+
$defs.push("-DHAVE_ERROR_INFO_ARGUMENT")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
91
115
|
create_makefile("re2")
|
data/ext/re2/re2.cc
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#include <ruby.h>
|
10
10
|
#include <re2/re2.h>
|
11
|
+
#include <re2/set.h>
|
11
12
|
#include <stdint.h>
|
12
13
|
#include <string>
|
13
14
|
#include <sstream>
|
@@ -93,12 +94,82 @@ typedef struct {
|
|
93
94
|
VALUE regexp, text;
|
94
95
|
} re2_scanner;
|
95
96
|
|
96
|
-
|
97
|
+
typedef struct {
|
98
|
+
RE2::Set *set;
|
99
|
+
} re2_set;
|
100
|
+
|
101
|
+
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
102
|
+
re2_eSetMatchError, re2_eSetUnsupportedError;
|
97
103
|
|
98
104
|
/* Symbols used in RE2 options. */
|
99
105
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
100
106
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
101
|
-
id_perl_classes, id_word_boundary, id_one_line
|
107
|
+
id_perl_classes, id_word_boundary, id_one_line,
|
108
|
+
id_unanchored, id_anchor_start, id_anchor_both, id_exception;
|
109
|
+
|
110
|
+
void parse_re2_options(RE2::Options& re2_options, VALUE options) {
|
111
|
+
if (TYPE(options) != T_HASH) {
|
112
|
+
rb_raise(rb_eArgError, "options should be a hash");
|
113
|
+
}
|
114
|
+
VALUE utf8, posix_syntax, longest_match, log_errors,
|
115
|
+
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
116
|
+
word_boundary, one_line;
|
117
|
+
|
118
|
+
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
119
|
+
if (!NIL_P(utf8)) {
|
120
|
+
re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
121
|
+
}
|
122
|
+
|
123
|
+
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
124
|
+
if (!NIL_P(posix_syntax)) {
|
125
|
+
re2_options.set_posix_syntax(RTEST(posix_syntax));
|
126
|
+
}
|
127
|
+
|
128
|
+
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
129
|
+
if (!NIL_P(longest_match)) {
|
130
|
+
re2_options.set_longest_match(RTEST(longest_match));
|
131
|
+
}
|
132
|
+
|
133
|
+
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
134
|
+
if (!NIL_P(log_errors)) {
|
135
|
+
re2_options.set_log_errors(RTEST(log_errors));
|
136
|
+
}
|
137
|
+
|
138
|
+
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
139
|
+
if (!NIL_P(max_mem)) {
|
140
|
+
re2_options.set_max_mem(NUM2INT(max_mem));
|
141
|
+
}
|
142
|
+
|
143
|
+
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
144
|
+
if (!NIL_P(literal)) {
|
145
|
+
re2_options.set_literal(RTEST(literal));
|
146
|
+
}
|
147
|
+
|
148
|
+
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
149
|
+
if (!NIL_P(never_nl)) {
|
150
|
+
re2_options.set_never_nl(RTEST(never_nl));
|
151
|
+
}
|
152
|
+
|
153
|
+
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
154
|
+
if (!NIL_P(case_sensitive)) {
|
155
|
+
re2_options.set_case_sensitive(RTEST(case_sensitive));
|
156
|
+
}
|
157
|
+
|
158
|
+
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
159
|
+
if (!NIL_P(perl_classes)) {
|
160
|
+
re2_options.set_perl_classes(RTEST(perl_classes));
|
161
|
+
}
|
162
|
+
|
163
|
+
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
164
|
+
if (!NIL_P(word_boundary)) {
|
165
|
+
re2_options.set_word_boundary(RTEST(word_boundary));
|
166
|
+
}
|
167
|
+
|
168
|
+
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
169
|
+
if (!NIL_P(one_line)) {
|
170
|
+
re2_options.set_one_line(RTEST(one_line));
|
171
|
+
}
|
172
|
+
}
|
102
173
|
|
103
174
|
void re2_matchdata_mark(re2_matchdata* self) {
|
104
175
|
rb_gc_mark(self->regexp);
|
@@ -616,6 +687,112 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
616
687
|
return result;
|
617
688
|
}
|
618
689
|
|
690
|
+
/*
|
691
|
+
* Returns the array of submatches for pattern matching.
|
692
|
+
*
|
693
|
+
* @return [Array<String, nil>] the array of submatches
|
694
|
+
* @example
|
695
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
696
|
+
* m.deconstruct #=> ["123"]
|
697
|
+
*
|
698
|
+
* @example pattern matching
|
699
|
+
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
700
|
+
* in x, y
|
701
|
+
* puts "Matched #{x} #{y}"
|
702
|
+
* else
|
703
|
+
* puts "Unrecognised match"
|
704
|
+
* end
|
705
|
+
*/
|
706
|
+
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
+
int i;
|
708
|
+
re2_matchdata *m;
|
709
|
+
re2_pattern *p;
|
710
|
+
re2::StringPiece *match;
|
711
|
+
VALUE array;
|
712
|
+
|
713
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
714
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
|
+
|
716
|
+
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
+
for (i = 1; i < m->number_of_matches; i++) {
|
718
|
+
match = &m->matches[i];
|
719
|
+
|
720
|
+
if (match->empty()) {
|
721
|
+
rb_ary_push(array, Qnil);
|
722
|
+
} else {
|
723
|
+
rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
|
724
|
+
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
|
725
|
+
}
|
726
|
+
}
|
727
|
+
|
728
|
+
return array;
|
729
|
+
}
|
730
|
+
|
731
|
+
/*
|
732
|
+
* Returns a hash of capturing group names to submatches for pattern matching.
|
733
|
+
*
|
734
|
+
* As this is used by Ruby's pattern matching, it will return an empty hash if given
|
735
|
+
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
|
+
* order but an invalid name will cause the hash to be immediately returned.
|
737
|
+
*
|
738
|
+
* @return [Hash] a hash of capturing group names to submatches
|
739
|
+
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
|
+
* @example
|
741
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
742
|
+
* m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
|
743
|
+
* m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
|
744
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
745
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
|
746
|
+
*
|
747
|
+
* @example pattern matching
|
748
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
749
|
+
* in numbers:, letters:
|
750
|
+
* puts "Numbers: #{numbers}, letters: #{letters}"
|
751
|
+
* else
|
752
|
+
* puts "Unrecognised match"
|
753
|
+
* end
|
754
|
+
*/
|
755
|
+
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
+
int i;
|
757
|
+
VALUE capturing_groups, key;
|
758
|
+
re2_matchdata *m;
|
759
|
+
re2_pattern *p;
|
760
|
+
map<string, int> groups;
|
761
|
+
map<string, int>::iterator iterator;
|
762
|
+
|
763
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
764
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
|
+
|
766
|
+
groups = p->pattern->NamedCapturingGroups();
|
767
|
+
capturing_groups = rb_hash_new();
|
768
|
+
|
769
|
+
if (NIL_P(keys)) {
|
770
|
+
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
|
771
|
+
rb_hash_aset(capturing_groups,
|
772
|
+
ID2SYM(rb_intern(iterator->first.data())),
|
773
|
+
re2_matchdata_nth_match(iterator->second, self));
|
774
|
+
}
|
775
|
+
} else {
|
776
|
+
Check_Type(keys, T_ARRAY);
|
777
|
+
|
778
|
+
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
+
for (i = 0; i < RARRAY_LEN(keys); i++) {
|
780
|
+
key = rb_ary_entry(keys, i);
|
781
|
+
Check_Type(key, T_SYMBOL);
|
782
|
+
string name(rb_id2name(SYM2ID(key)));
|
783
|
+
|
784
|
+
if (groups.count(name) == 0) {
|
785
|
+
break;
|
786
|
+
}
|
787
|
+
|
788
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
|
+
}
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
return capturing_groups;
|
794
|
+
}
|
795
|
+
|
619
796
|
/*
|
620
797
|
* Returns a new RE2 object with a compiled version of
|
621
798
|
* +pattern+ stored inside. Equivalent to +RE2.new+.
|
@@ -667,75 +844,15 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
|
667
844
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
668
845
|
*/
|
669
846
|
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
670
|
-
VALUE pattern, options
|
671
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
672
|
-
word_boundary, one_line;
|
847
|
+
VALUE pattern, options;
|
673
848
|
re2_pattern *p;
|
674
849
|
|
675
850
|
rb_scan_args(argc, argv, "11", &pattern, &options);
|
676
851
|
Data_Get_Struct(self, re2_pattern, p);
|
677
852
|
|
678
853
|
if (RTEST(options)) {
|
679
|
-
if (TYPE(options) != T_HASH) {
|
680
|
-
rb_raise(rb_eArgError, "options should be a hash");
|
681
|
-
}
|
682
|
-
|
683
854
|
RE2::Options re2_options;
|
684
|
-
|
685
|
-
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
686
|
-
if (!NIL_P(utf8)) {
|
687
|
-
re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
688
|
-
}
|
689
|
-
|
690
|
-
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
691
|
-
if (!NIL_P(posix_syntax)) {
|
692
|
-
re2_options.set_posix_syntax(RTEST(posix_syntax));
|
693
|
-
}
|
694
|
-
|
695
|
-
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
696
|
-
if (!NIL_P(longest_match)) {
|
697
|
-
re2_options.set_longest_match(RTEST(longest_match));
|
698
|
-
}
|
699
|
-
|
700
|
-
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
701
|
-
if (!NIL_P(log_errors)) {
|
702
|
-
re2_options.set_log_errors(RTEST(log_errors));
|
703
|
-
}
|
704
|
-
|
705
|
-
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
706
|
-
if (!NIL_P(max_mem)) {
|
707
|
-
re2_options.set_max_mem(NUM2INT(max_mem));
|
708
|
-
}
|
709
|
-
|
710
|
-
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
711
|
-
if (!NIL_P(literal)) {
|
712
|
-
re2_options.set_literal(RTEST(literal));
|
713
|
-
}
|
714
|
-
|
715
|
-
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
716
|
-
if (!NIL_P(never_nl)) {
|
717
|
-
re2_options.set_never_nl(RTEST(never_nl));
|
718
|
-
}
|
719
|
-
|
720
|
-
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
721
|
-
if (!NIL_P(case_sensitive)) {
|
722
|
-
re2_options.set_case_sensitive(RTEST(case_sensitive));
|
723
|
-
}
|
724
|
-
|
725
|
-
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
726
|
-
if (!NIL_P(perl_classes)) {
|
727
|
-
re2_options.set_perl_classes(RTEST(perl_classes));
|
728
|
-
}
|
729
|
-
|
730
|
-
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
731
|
-
if (!NIL_P(word_boundary)) {
|
732
|
-
re2_options.set_word_boundary(RTEST(word_boundary));
|
733
|
-
}
|
734
|
-
|
735
|
-
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
736
|
-
if (!NIL_P(one_line)) {
|
737
|
-
re2_options.set_one_line(RTEST(one_line));
|
738
|
-
}
|
855
|
+
parse_re2_options(re2_options, options);
|
739
856
|
|
740
857
|
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
741
858
|
} else {
|
@@ -1234,7 +1351,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1234
1351
|
*
|
1235
1352
|
* @return [Boolean] whether the match was successful
|
1236
1353
|
*/
|
1237
|
-
static VALUE
|
1354
|
+
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1238
1355
|
VALUE argv[2];
|
1239
1356
|
argv[0] = text;
|
1240
1357
|
argv[1] = INT2FIX(0);
|
@@ -1362,6 +1479,257 @@ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
|
1362
1479
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1363
1480
|
}
|
1364
1481
|
|
1482
|
+
void re2_set_free(re2_set *self) {
|
1483
|
+
if (self->set) {
|
1484
|
+
delete self->set;
|
1485
|
+
}
|
1486
|
+
free(self);
|
1487
|
+
}
|
1488
|
+
|
1489
|
+
static VALUE re2_set_allocate(VALUE klass) {
|
1490
|
+
re2_set *s;
|
1491
|
+
VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
|
1492
|
+
return result;
|
1493
|
+
}
|
1494
|
+
|
1495
|
+
/*
|
1496
|
+
* Returns a new {RE2::Set} object, a collection of patterns that can be
|
1497
|
+
* searched for simultaneously.
|
1498
|
+
*
|
1499
|
+
* @return [RE2::Set]
|
1500
|
+
*
|
1501
|
+
* @overload initialize
|
1502
|
+
* Returns a new {RE2::Set} object for unanchored patterns with the default
|
1503
|
+
* options.
|
1504
|
+
*
|
1505
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1506
|
+
* @return [RE2::Set]
|
1507
|
+
*
|
1508
|
+
* @overload initialize(anchor)
|
1509
|
+
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1510
|
+
* options.
|
1511
|
+
*
|
1512
|
+
* @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
|
1513
|
+
* @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
|
1514
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1515
|
+
*
|
1516
|
+
* @overload initialize(anchor, options)
|
1517
|
+
* Returns a new {RE2::Set} object with the specified options.
|
1518
|
+
*
|
1519
|
+
* @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
|
1520
|
+
* @param [Hash] options the options with which to compile the pattern
|
1521
|
+
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1522
|
+
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
1523
|
+
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
1524
|
+
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1525
|
+
* @option options [Fixnum] :max_mem approx. max memory footprint of RE2
|
1526
|
+
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1527
|
+
* @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
|
1528
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
1529
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
|
1530
|
+
* @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
|
1531
|
+
* @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
|
1532
|
+
* @return [RE2::Set] an RE2::Set with the specified anchor and options
|
1533
|
+
* @raise [ArgumentError] if anchor is not one of the accepted choices
|
1534
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1535
|
+
*/
|
1536
|
+
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1537
|
+
VALUE anchor, options;
|
1538
|
+
re2_set *s;
|
1539
|
+
RE2::Anchor re2_anchor;
|
1540
|
+
RE2::Options re2_options;
|
1541
|
+
|
1542
|
+
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1543
|
+
Data_Get_Struct(self, re2_set, s);
|
1544
|
+
|
1545
|
+
if (RTEST(options)) {
|
1546
|
+
parse_re2_options(re2_options, options);
|
1547
|
+
}
|
1548
|
+
if (NIL_P(anchor)) {
|
1549
|
+
re2_anchor = RE2::UNANCHORED;
|
1550
|
+
} else {
|
1551
|
+
Check_Type(anchor, T_SYMBOL);
|
1552
|
+
ID id_anchor = SYM2ID(anchor);
|
1553
|
+
if (id_anchor == id_unanchored) {
|
1554
|
+
re2_anchor = RE2::UNANCHORED;
|
1555
|
+
} else if (id_anchor == id_anchor_start) {
|
1556
|
+
re2_anchor = RE2::ANCHOR_START;
|
1557
|
+
} else if (id_anchor == id_anchor_both) {
|
1558
|
+
re2_anchor = RE2::ANCHOR_BOTH;
|
1559
|
+
} else {
|
1560
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1561
|
+
}
|
1562
|
+
}
|
1563
|
+
|
1564
|
+
s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
|
1565
|
+
if (s->set == 0) {
|
1566
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1567
|
+
}
|
1568
|
+
|
1569
|
+
return self;
|
1570
|
+
}
|
1571
|
+
|
1572
|
+
/*
|
1573
|
+
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1574
|
+
* in the output of #match. Cannot be called after #compile has been called.
|
1575
|
+
*
|
1576
|
+
* @param [String] pattern the regex pattern
|
1577
|
+
* @return [Integer] the index of the pattern in the set
|
1578
|
+
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1579
|
+
* @example
|
1580
|
+
* set = RE2::Set.new
|
1581
|
+
* set.add("abc") #=> 0
|
1582
|
+
* set.add("def") #=> 1
|
1583
|
+
*/
|
1584
|
+
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1585
|
+
Check_Type(pattern, T_STRING);
|
1586
|
+
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
1587
|
+
std::string err;
|
1588
|
+
re2_set *s;
|
1589
|
+
Data_Get_Struct(self, re2_set, s);
|
1590
|
+
int index = s->set->Add(regex, &err);
|
1591
|
+
if (index < 0) {
|
1592
|
+
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
|
1593
|
+
}
|
1594
|
+
|
1595
|
+
return INT2FIX(index);
|
1596
|
+
}
|
1597
|
+
|
1598
|
+
/*
|
1599
|
+
* Compiles a Set so it can be used to match against. Must be called after #add
|
1600
|
+
* and before #match.
|
1601
|
+
*
|
1602
|
+
* @return [Bool] whether compilation was a success
|
1603
|
+
* @example
|
1604
|
+
* set = RE2::Set.new
|
1605
|
+
* set.add("abc")
|
1606
|
+
* set.compile # => true
|
1607
|
+
*/
|
1608
|
+
static VALUE re2_set_compile(VALUE self) {
|
1609
|
+
re2_set *s;
|
1610
|
+
Data_Get_Struct(self, re2_set, s);
|
1611
|
+
|
1612
|
+
return BOOL2RUBY(s->set->Compile());
|
1613
|
+
}
|
1614
|
+
|
1615
|
+
/*
|
1616
|
+
* Returns whether the underlying re2 version outputs error information from
|
1617
|
+
* RE2::Set::Match. If not, #match will raise an error if attempting to set its
|
1618
|
+
* :exception option to true.
|
1619
|
+
*
|
1620
|
+
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1621
|
+
*/
|
1622
|
+
static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
1623
|
+
UNUSED(self);
|
1624
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1625
|
+
return Qtrue;
|
1626
|
+
#else
|
1627
|
+
return Qfalse;
|
1628
|
+
#endif
|
1629
|
+
}
|
1630
|
+
|
1631
|
+
/*
|
1632
|
+
* Matches the given text against patterns in the set, returning an array of
|
1633
|
+
* integer indices of the matching patterns if matched or an empty array if
|
1634
|
+
* there are no matches.
|
1635
|
+
*
|
1636
|
+
* @return [Array<Integer>]
|
1637
|
+
*
|
1638
|
+
* @overload match(str)
|
1639
|
+
* Returns an array of integer indices of patterns matching the given string
|
1640
|
+
* (if any). Raises exceptions if there are any errors while matching.
|
1641
|
+
*
|
1642
|
+
* @param [String] str the text to match against
|
1643
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1644
|
+
* @raise [MatchError] if an error occurs while matching
|
1645
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1646
|
+
* @example
|
1647
|
+
* set = RE2::Set.new
|
1648
|
+
* set.add("abc")
|
1649
|
+
* set.add("def")
|
1650
|
+
* set.compile
|
1651
|
+
* set.match("abcdef") # => [0, 1]
|
1652
|
+
*
|
1653
|
+
* @overload match(str, options)
|
1654
|
+
* Returns an array of integer indices of patterns matching the given string
|
1655
|
+
* (if any). Raises exceptions if there are any errors while matching and the
|
1656
|
+
* :exception option is set to true.
|
1657
|
+
*
|
1658
|
+
* @param [String] str the text to match against
|
1659
|
+
* @param [Hash] options the options with which to match
|
1660
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
|
1661
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1662
|
+
* @raise [MatchError] if an error occurs while matching
|
1663
|
+
* @raise [UnsupportedError] if the underlying version of re2 does not output error information
|
1664
|
+
* @example
|
1665
|
+
* set = RE2::Set.new
|
1666
|
+
* set.add("abc")
|
1667
|
+
* set.add("def")
|
1668
|
+
* set.compile
|
1669
|
+
* set.match("abcdef", :exception => true) # => [0, 1]
|
1670
|
+
*/
|
1671
|
+
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1672
|
+
VALUE str, options, exception_option;
|
1673
|
+
bool raise_exception = true;
|
1674
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
1675
|
+
Check_Type(str, T_STRING);
|
1676
|
+
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
|
1677
|
+
std::vector<int> v;
|
1678
|
+
re2_set *s;
|
1679
|
+
Data_Get_Struct(self, re2_set, s);
|
1680
|
+
|
1681
|
+
if (RTEST(options)) {
|
1682
|
+
Check_Type(options, T_HASH);
|
1683
|
+
|
1684
|
+
exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1685
|
+
if (!NIL_P(exception_option)) {
|
1686
|
+
raise_exception = RTEST(exception_option);
|
1687
|
+
}
|
1688
|
+
}
|
1689
|
+
|
1690
|
+
if (raise_exception) {
|
1691
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1692
|
+
RE2::Set::ErrorInfo e;
|
1693
|
+
bool match_failed = !s->set->Match(data, &v, &e);
|
1694
|
+
VALUE result = rb_ary_new2(v.size());
|
1695
|
+
|
1696
|
+
if (match_failed) {
|
1697
|
+
switch (e.kind) {
|
1698
|
+
case RE2::Set::kNoError:
|
1699
|
+
break;
|
1700
|
+
case RE2::Set::kNotCompiled:
|
1701
|
+
rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
|
1702
|
+
case RE2::Set::kOutOfMemory:
|
1703
|
+
rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
|
1704
|
+
case RE2::Set::kInconsistent:
|
1705
|
+
rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
|
1706
|
+
default: // Just in case a future version of libre2 adds new ErrorKinds
|
1707
|
+
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
1708
|
+
}
|
1709
|
+
} else {
|
1710
|
+
for (size_t i = 0; i < v.size(); i++) {
|
1711
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
1712
|
+
}
|
1713
|
+
}
|
1714
|
+
|
1715
|
+
return result;
|
1716
|
+
#else
|
1717
|
+
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
1718
|
+
#endif
|
1719
|
+
} else {
|
1720
|
+
bool matched = s->set->Match(data, &v);
|
1721
|
+
VALUE result = rb_ary_new2(v.size());
|
1722
|
+
|
1723
|
+
if (matched) {
|
1724
|
+
for (size_t i = 0; i < v.size(); i++) {
|
1725
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
1726
|
+
}
|
1727
|
+
}
|
1728
|
+
|
1729
|
+
return result;
|
1730
|
+
}
|
1731
|
+
}
|
1732
|
+
|
1365
1733
|
/* Forward declare Init_re2 to be called by C code but define it separately so
|
1366
1734
|
* that YARD can parse it.
|
1367
1735
|
*/
|
@@ -1372,12 +1740,18 @@ void Init_re2(void) {
|
|
1372
1740
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
1373
1741
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1374
1742
|
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
1743
|
+
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
1744
|
+
re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
|
1745
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1746
|
+
re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
|
1747
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1375
1748
|
|
1376
1749
|
rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
|
1377
1750
|
rb_define_alloc_func(re2_cMatchData,
|
1378
1751
|
(VALUE (*)(VALUE))re2_matchdata_allocate);
|
1379
1752
|
rb_define_alloc_func(re2_cScanner,
|
1380
1753
|
(VALUE (*)(VALUE))re2_scanner_allocate);
|
1754
|
+
rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
|
1381
1755
|
|
1382
1756
|
rb_define_method(re2_cMatchData, "string",
|
1383
1757
|
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
@@ -1394,10 +1768,15 @@ void Init_re2(void) {
|
|
1394
1768
|
rb_define_method(re2_cMatchData, "end",
|
1395
1769
|
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
1396
1770
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1397
|
-
-1);
|
1771
|
+
-1);
|
1772
|
+
rb_define_method(re2_cMatchData, "to_s",
|
1398
1773
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1399
1774
|
rb_define_method(re2_cMatchData, "inspect",
|
1400
1775
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
1776
|
+
rb_define_method(re2_cMatchData, "deconstruct",
|
1777
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
1778
|
+
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
1779
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
1401
1780
|
|
1402
1781
|
rb_define_method(re2_cScanner, "string",
|
1403
1782
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
@@ -1428,11 +1807,11 @@ void Init_re2(void) {
|
|
1428
1807
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1429
1808
|
-1);
|
1430
1809
|
rb_define_method(re2_cRegexp, "match?",
|
1431
|
-
RUBY_METHOD_FUNC(
|
1810
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1432
1811
|
rb_define_method(re2_cRegexp, "=~",
|
1433
|
-
RUBY_METHOD_FUNC(
|
1812
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1434
1813
|
rb_define_method(re2_cRegexp, "===",
|
1435
|
-
RUBY_METHOD_FUNC(
|
1814
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1436
1815
|
rb_define_method(re2_cRegexp, "scan",
|
1437
1816
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1438
1817
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
@@ -1471,6 +1850,14 @@ void Init_re2(void) {
|
|
1471
1850
|
rb_define_method(re2_cRegexp, "one_line?",
|
1472
1851
|
RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
|
1473
1852
|
|
1853
|
+
rb_define_singleton_method(re2_cSet, "match_raises_errors?",
|
1854
|
+
RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
|
1855
|
+
rb_define_method(re2_cSet, "initialize",
|
1856
|
+
RUBY_METHOD_FUNC(re2_set_initialize), -1);
|
1857
|
+
rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
|
1858
|
+
rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
|
1859
|
+
rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
|
1860
|
+
|
1474
1861
|
rb_define_module_function(re2_mRE2, "Replace",
|
1475
1862
|
RUBY_METHOD_FUNC(re2_Replace), 3);
|
1476
1863
|
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
@@ -1498,6 +1885,10 @@ void Init_re2(void) {
|
|
1498
1885
|
id_perl_classes = rb_intern("perl_classes");
|
1499
1886
|
id_word_boundary = rb_intern("word_boundary");
|
1500
1887
|
id_one_line = rb_intern("one_line");
|
1888
|
+
id_unanchored = rb_intern("unanchored");
|
1889
|
+
id_anchor_start = rb_intern("anchor_start");
|
1890
|
+
id_anchor_both = rb_intern("anchor_both");
|
1891
|
+
id_exception = rb_intern("exception");
|
1501
1892
|
|
1502
1893
|
#if 0
|
1503
1894
|
/* Fake so YARD generates the file. */
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -241,4 +241,62 @@ RSpec.describe RE2::MatchData do
|
|
241
241
|
expect(md.end(:foo)).to be_nil
|
242
242
|
end
|
243
243
|
end
|
244
|
+
|
245
|
+
describe "#deconstruct" do
|
246
|
+
it "returns all capturing groups" do
|
247
|
+
md = RE2::Regexp.new('w(o)(o)').match('woo')
|
248
|
+
|
249
|
+
expect(md.deconstruct).to eq(['o', 'o'])
|
250
|
+
end
|
251
|
+
|
252
|
+
it "includes optional capturing groups as nil" do
|
253
|
+
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
|
254
|
+
|
255
|
+
expect(md.deconstruct).to eq(['o', 'o', nil])
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "#deconstruct_keys" do
|
260
|
+
it "returns all named captures if given nil" do
|
261
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
262
|
+
|
263
|
+
expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
|
264
|
+
end
|
265
|
+
|
266
|
+
it "returns only named captures if given names" do
|
267
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
268
|
+
|
269
|
+
expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
|
270
|
+
end
|
271
|
+
|
272
|
+
it "returns named captures up until an invalid name is given" do
|
273
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
274
|
+
|
275
|
+
expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
|
276
|
+
end
|
277
|
+
|
278
|
+
it "returns an empty hash if given more capture names than exist" do
|
279
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
280
|
+
|
281
|
+
expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
|
282
|
+
end
|
283
|
+
|
284
|
+
it "returns an empty hash if there are no named capturing groups" do
|
285
|
+
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')
|
286
|
+
|
287
|
+
expect(md.deconstruct_keys(nil)).to eq({})
|
288
|
+
end
|
289
|
+
|
290
|
+
it "raises an error if given a non-array of keys" do
|
291
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
292
|
+
|
293
|
+
expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
|
294
|
+
end
|
295
|
+
|
296
|
+
it "raises an error if given keys as non-symbols" do
|
297
|
+
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
298
|
+
|
299
|
+
expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
|
300
|
+
end
|
301
|
+
end
|
244
302
|
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
RSpec.describe RE2::Set do
|
2
|
+
describe "#initialize" do
|
3
|
+
it "returns an instance given no args" do
|
4
|
+
set = RE2::Set.new
|
5
|
+
|
6
|
+
expect(set).to be_a(RE2::Set)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "returns an instance given only an anchor of :unanchored" do
|
10
|
+
set = RE2::Set.new(:unanchored)
|
11
|
+
|
12
|
+
expect(set).to be_a(RE2::Set)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "returns an instance given only an anchor of :anchor_start" do
|
16
|
+
set = RE2::Set.new(:anchor_start)
|
17
|
+
|
18
|
+
expect(set).to be_a(RE2::Set)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "returns an instance given only an anchor of :anchor_both" do
|
22
|
+
set = RE2::Set.new(:anchor_both)
|
23
|
+
|
24
|
+
expect(set).to be_a(RE2::Set)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns an instance given an anchor and options" do
|
28
|
+
set = RE2::Set.new(:unanchored, :case_sensitive => false)
|
29
|
+
|
30
|
+
expect(set).to be_a(RE2::Set)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "raises an error if given an inappropriate type" do
|
34
|
+
expect { RE2::Set.new(0) }.to raise_error(TypeError)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an error if given an invalid anchor" do
|
38
|
+
expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error(
|
39
|
+
ArgumentError,
|
40
|
+
"anchor should be one of: :unanchored, :anchor_start, :anchor_both"
|
41
|
+
)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "#add" do
|
46
|
+
it "allows multiple patterns to be added", :aggregate_failures do
|
47
|
+
set = RE2::Set.new
|
48
|
+
|
49
|
+
expect(set.add("abc")).to eq(0)
|
50
|
+
expect(set.add("def")).to eq(1)
|
51
|
+
expect(set.add("ghi")).to eq(2)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "rejects invalid patterns when added" do
|
55
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
56
|
+
|
57
|
+
expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add()/)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "raises an error if called after #compile" do
|
61
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
62
|
+
set.add("abc")
|
63
|
+
set.compile
|
64
|
+
|
65
|
+
silence_stderr do
|
66
|
+
expect { set.add("def") }.to raise_error(ArgumentError)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
it "raises an error if given a non-string pattern" do
|
71
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
72
|
+
|
73
|
+
expect { set.add(0) }.to raise_error(TypeError)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "#compile" do
|
78
|
+
it "compiles the set without error" do
|
79
|
+
set = RE2::Set.new
|
80
|
+
set.add("abc")
|
81
|
+
set.add("def")
|
82
|
+
set.add("ghi")
|
83
|
+
|
84
|
+
expect(set.compile).to be_truthy
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "#match" do
|
89
|
+
it "matches against multiple patterns" do
|
90
|
+
set = RE2::Set.new
|
91
|
+
set.add("abc")
|
92
|
+
set.add("def")
|
93
|
+
set.add("ghi")
|
94
|
+
set.compile
|
95
|
+
|
96
|
+
expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
|
97
|
+
end
|
98
|
+
|
99
|
+
it "raises an error if called before #compile by default" do
|
100
|
+
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
|
101
|
+
|
102
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
103
|
+
|
104
|
+
silence_stderr do
|
105
|
+
expect { set.match("") }.to raise_error(RE2::Set::MatchError)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
it "raises an error if called before #compile when :exception is true" do
|
110
|
+
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
|
111
|
+
|
112
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
113
|
+
|
114
|
+
silence_stderr do
|
115
|
+
expect { set.match("", :exception => true) }.to raise_error(RE2::Set::MatchError)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
it "returns an empty array if called before #compile when :exception is false" do
|
120
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
121
|
+
|
122
|
+
silence_stderr do
|
123
|
+
expect(set.match("", :exception => false)).to be_empty
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "raises an error if :exception is true and re2 does not support it" do
|
128
|
+
skip "Underlying RE2::Set::Match outputs error information" if RE2::Set.match_raises_errors?
|
129
|
+
|
130
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
131
|
+
|
132
|
+
silence_stderr do
|
133
|
+
expect { set.match("", :exception => true) }.to raise_error(RE2::Set::UnsupportedError)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
it "raises an error if given non-hash options" do
|
138
|
+
set = RE2::Set.new
|
139
|
+
|
140
|
+
expect { set.match("", 0) }.to raise_error(TypeError)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def silence_stderr
|
145
|
+
original_stream = STDERR
|
146
|
+
|
147
|
+
if File.const_defined?(:NULL)
|
148
|
+
STDERR.reopen(File::NULL)
|
149
|
+
else
|
150
|
+
platform = RUBY_PLATFORM == 'java' ? RbConfig::CONFIG['host_os'] : RUBY_PLATFORM
|
151
|
+
|
152
|
+
case platform
|
153
|
+
when /mswin|mingw/i
|
154
|
+
STDERR.reopen('NUL')
|
155
|
+
when /amiga/i
|
156
|
+
STDERR.reopen('NIL')
|
157
|
+
when /openvms/i
|
158
|
+
STDERR.reopen('NL:')
|
159
|
+
else
|
160
|
+
STDERR.reopen('/dev/null')
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
yield
|
165
|
+
ensure
|
166
|
+
STDERR.reopen(original_stream)
|
167
|
+
end
|
168
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -57,6 +57,7 @@ files:
|
|
57
57
|
- spec/re2/match_data_spec.rb
|
58
58
|
- spec/re2/regexp_spec.rb
|
59
59
|
- spec/re2/scanner_spec.rb
|
60
|
+
- spec/re2/set_spec.rb
|
60
61
|
- spec/re2/string_spec.rb
|
61
62
|
- spec/re2_spec.rb
|
62
63
|
- spec/spec_helper.rb
|
@@ -79,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
80
|
- !ruby/object:Gem::Version
|
80
81
|
version: '0'
|
81
82
|
requirements: []
|
82
|
-
rubygems_version: 3.
|
83
|
+
rubygems_version: 3.3.7
|
83
84
|
signing_key:
|
84
85
|
specification_version: 4
|
85
86
|
summary: Ruby bindings to re2.
|
@@ -90,4 +91,5 @@ test_files:
|
|
90
91
|
- spec/re2/regexp_spec.rb
|
91
92
|
- spec/re2/match_data_spec.rb
|
92
93
|
- spec/re2/string_spec.rb
|
94
|
+
- spec/re2/set_spec.rb
|
93
95
|
- spec/re2/scanner_spec.rb
|