re2 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -2
- data/ext/re2/extconf.rb +24 -0
- data/ext/re2/re2.cc +323 -65
- data/spec/re2/set_spec.rb +168 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
|
4
|
+
data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
|
7
|
+
data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ re2 [, libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
|
10
10
|
|
@@ -131,6 +131,22 @@ enum.next #=> ["It"]
|
|
131
131
|
enum.next #=> ["is"]
|
132
132
|
```
|
133
133
|
|
134
|
+
As of 1.5.0, you can use `RE2::Set` to match multiple patterns against a
|
135
|
+
string. Calling `RE2::Set#add` with a pattern will return an integer index of
|
136
|
+
the pattern. After all patterns have been added, the set can be compiled using
|
137
|
+
`RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
|
138
|
+
containing the indices of all the patterns that matched.
|
139
|
+
|
140
|
+
``` ruby
|
141
|
+
set = RE2::Set.new
|
142
|
+
set.add("abc") #=> 0
|
143
|
+
set.add("def") #=> 1
|
144
|
+
set.add("ghi") #=> 2
|
145
|
+
set.compile #=> true
|
146
|
+
set.match("abcdefghi") #=> [0, 1, 2]
|
147
|
+
set.match("ghidefabc") #=> [2, 1, 0]
|
148
|
+
```
|
149
|
+
|
134
150
|
Features
|
135
151
|
--------
|
136
152
|
|
@@ -149,6 +165,8 @@ Features
|
|
149
165
|
|
150
166
|
* Incrementally scanning text with `re2.scan(text)`
|
151
167
|
|
168
|
+
* Search a collection of patterns simultaneously with `RE2::Set`
|
169
|
+
|
152
170
|
* Checking regular expression compilation with `re2.ok?`, `re2.error` and
|
153
171
|
`re2.error_arg`
|
154
172
|
|
@@ -177,7 +195,9 @@ Contributions
|
|
177
195
|
* Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
|
178
196
|
the deprecation and removal of the `utf8` encoding option in re2;
|
179
197
|
* Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
|
180
|
-
using `RE2::Scanner#scan` with an invalid regular expression
|
198
|
+
using `RE2::Scanner#scan` with an invalid regular expression;
|
199
|
+
* Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
|
200
|
+
initial support for `RE2::Set`.
|
181
201
|
|
182
202
|
Contact
|
183
203
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -88,4 +88,28 @@ SRC
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
checking_for("RE2::Set::Match() with error information") do
|
92
|
+
test_re2_set_match_signature = <<SRC
|
93
|
+
#include <vector>
|
94
|
+
#include <re2/re2.h>
|
95
|
+
#include <re2/set.h>
|
96
|
+
|
97
|
+
int main() {
|
98
|
+
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
|
99
|
+
s.Add("foo", NULL);
|
100
|
+
s.Compile();
|
101
|
+
|
102
|
+
std::vector<int> v;
|
103
|
+
RE2::Set::ErrorInfo ei;
|
104
|
+
s.Match("foo", &v, &ei);
|
105
|
+
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
SRC
|
109
|
+
|
110
|
+
if try_compile(test_re2_set_match_signature, compile_options)
|
111
|
+
$defs.push("-DHAVE_ERROR_INFO_ARGUMENT")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
91
115
|
create_makefile("re2")
|
data/ext/re2/re2.cc
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#include <ruby.h>
|
10
10
|
#include <re2/re2.h>
|
11
|
+
#include <re2/set.h>
|
11
12
|
#include <stdint.h>
|
12
13
|
#include <string>
|
13
14
|
#include <sstream>
|
@@ -93,12 +94,82 @@ typedef struct {
|
|
93
94
|
VALUE regexp, text;
|
94
95
|
} re2_scanner;
|
95
96
|
|
96
|
-
|
97
|
+
typedef struct {
|
98
|
+
RE2::Set *set;
|
99
|
+
} re2_set;
|
100
|
+
|
101
|
+
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
102
|
+
re2_eSetMatchError, re2_eSetUnsupportedError;
|
97
103
|
|
98
104
|
/* Symbols used in RE2 options. */
|
99
105
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
100
106
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
101
|
-
id_perl_classes, id_word_boundary, id_one_line
|
107
|
+
id_perl_classes, id_word_boundary, id_one_line,
|
108
|
+
id_unanchored, id_anchor_start, id_anchor_both, id_exception;
|
109
|
+
|
110
|
+
void parse_re2_options(RE2::Options& re2_options, VALUE options) {
|
111
|
+
if (TYPE(options) != T_HASH) {
|
112
|
+
rb_raise(rb_eArgError, "options should be a hash");
|
113
|
+
}
|
114
|
+
VALUE utf8, posix_syntax, longest_match, log_errors,
|
115
|
+
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
116
|
+
word_boundary, one_line;
|
117
|
+
|
118
|
+
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
119
|
+
if (!NIL_P(utf8)) {
|
120
|
+
re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
121
|
+
}
|
122
|
+
|
123
|
+
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
124
|
+
if (!NIL_P(posix_syntax)) {
|
125
|
+
re2_options.set_posix_syntax(RTEST(posix_syntax));
|
126
|
+
}
|
127
|
+
|
128
|
+
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
129
|
+
if (!NIL_P(longest_match)) {
|
130
|
+
re2_options.set_longest_match(RTEST(longest_match));
|
131
|
+
}
|
132
|
+
|
133
|
+
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
134
|
+
if (!NIL_P(log_errors)) {
|
135
|
+
re2_options.set_log_errors(RTEST(log_errors));
|
136
|
+
}
|
137
|
+
|
138
|
+
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
139
|
+
if (!NIL_P(max_mem)) {
|
140
|
+
re2_options.set_max_mem(NUM2INT(max_mem));
|
141
|
+
}
|
142
|
+
|
143
|
+
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
144
|
+
if (!NIL_P(literal)) {
|
145
|
+
re2_options.set_literal(RTEST(literal));
|
146
|
+
}
|
147
|
+
|
148
|
+
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
149
|
+
if (!NIL_P(never_nl)) {
|
150
|
+
re2_options.set_never_nl(RTEST(never_nl));
|
151
|
+
}
|
152
|
+
|
153
|
+
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
154
|
+
if (!NIL_P(case_sensitive)) {
|
155
|
+
re2_options.set_case_sensitive(RTEST(case_sensitive));
|
156
|
+
}
|
157
|
+
|
158
|
+
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
159
|
+
if (!NIL_P(perl_classes)) {
|
160
|
+
re2_options.set_perl_classes(RTEST(perl_classes));
|
161
|
+
}
|
162
|
+
|
163
|
+
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
164
|
+
if (!NIL_P(word_boundary)) {
|
165
|
+
re2_options.set_word_boundary(RTEST(word_boundary));
|
166
|
+
}
|
167
|
+
|
168
|
+
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
169
|
+
if (!NIL_P(one_line)) {
|
170
|
+
re2_options.set_one_line(RTEST(one_line));
|
171
|
+
}
|
172
|
+
}
|
102
173
|
|
103
174
|
void re2_matchdata_mark(re2_matchdata* self) {
|
104
175
|
rb_gc_mark(self->regexp);
|
@@ -667,75 +738,15 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
|
667
738
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
668
739
|
*/
|
669
740
|
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
670
|
-
VALUE pattern, options
|
671
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
672
|
-
word_boundary, one_line;
|
741
|
+
VALUE pattern, options;
|
673
742
|
re2_pattern *p;
|
674
743
|
|
675
744
|
rb_scan_args(argc, argv, "11", &pattern, &options);
|
676
745
|
Data_Get_Struct(self, re2_pattern, p);
|
677
746
|
|
678
747
|
if (RTEST(options)) {
|
679
|
-
if (TYPE(options) != T_HASH) {
|
680
|
-
rb_raise(rb_eArgError, "options should be a hash");
|
681
|
-
}
|
682
|
-
|
683
748
|
RE2::Options re2_options;
|
684
|
-
|
685
|
-
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
686
|
-
if (!NIL_P(utf8)) {
|
687
|
-
re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
688
|
-
}
|
689
|
-
|
690
|
-
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
691
|
-
if (!NIL_P(posix_syntax)) {
|
692
|
-
re2_options.set_posix_syntax(RTEST(posix_syntax));
|
693
|
-
}
|
694
|
-
|
695
|
-
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
696
|
-
if (!NIL_P(longest_match)) {
|
697
|
-
re2_options.set_longest_match(RTEST(longest_match));
|
698
|
-
}
|
699
|
-
|
700
|
-
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
701
|
-
if (!NIL_P(log_errors)) {
|
702
|
-
re2_options.set_log_errors(RTEST(log_errors));
|
703
|
-
}
|
704
|
-
|
705
|
-
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
706
|
-
if (!NIL_P(max_mem)) {
|
707
|
-
re2_options.set_max_mem(NUM2INT(max_mem));
|
708
|
-
}
|
709
|
-
|
710
|
-
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
711
|
-
if (!NIL_P(literal)) {
|
712
|
-
re2_options.set_literal(RTEST(literal));
|
713
|
-
}
|
714
|
-
|
715
|
-
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
716
|
-
if (!NIL_P(never_nl)) {
|
717
|
-
re2_options.set_never_nl(RTEST(never_nl));
|
718
|
-
}
|
719
|
-
|
720
|
-
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
721
|
-
if (!NIL_P(case_sensitive)) {
|
722
|
-
re2_options.set_case_sensitive(RTEST(case_sensitive));
|
723
|
-
}
|
724
|
-
|
725
|
-
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
726
|
-
if (!NIL_P(perl_classes)) {
|
727
|
-
re2_options.set_perl_classes(RTEST(perl_classes));
|
728
|
-
}
|
729
|
-
|
730
|
-
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
731
|
-
if (!NIL_P(word_boundary)) {
|
732
|
-
re2_options.set_word_boundary(RTEST(word_boundary));
|
733
|
-
}
|
734
|
-
|
735
|
-
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
736
|
-
if (!NIL_P(one_line)) {
|
737
|
-
re2_options.set_one_line(RTEST(one_line));
|
738
|
-
}
|
749
|
+
parse_re2_options(re2_options, options);
|
739
750
|
|
740
751
|
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
741
752
|
} else {
|
@@ -1362,6 +1373,234 @@ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
|
1362
1373
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1363
1374
|
}
|
1364
1375
|
|
1376
|
+
void re2_set_free(re2_set *self) {
|
1377
|
+
if (self->set) {
|
1378
|
+
delete self->set;
|
1379
|
+
}
|
1380
|
+
free(self);
|
1381
|
+
}
|
1382
|
+
|
1383
|
+
static VALUE re2_set_allocate(VALUE klass) {
|
1384
|
+
re2_set *s;
|
1385
|
+
VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
|
1386
|
+
return result;
|
1387
|
+
}
|
1388
|
+
|
1389
|
+
/*
|
1390
|
+
* Returns a new {RE2::Set} object, a collection of patterns that can be
|
1391
|
+
* searched for simultaneously.
|
1392
|
+
*
|
1393
|
+
* @return [RE2::Set]
|
1394
|
+
*
|
1395
|
+
* @overload initialize
|
1396
|
+
* Returns a new {RE2::Set} object for unanchored patterns with the default
|
1397
|
+
* options.
|
1398
|
+
*
|
1399
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1400
|
+
* @return [RE2::Set]
|
1401
|
+
*
|
1402
|
+
* @overload initialize(anchor)
|
1403
|
+
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1404
|
+
* options.
|
1405
|
+
*
|
1406
|
+
* @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
|
1407
|
+
* @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
|
1408
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1409
|
+
*
|
1410
|
+
* @overload initialize(anchor, options)
|
1411
|
+
* Returns a new {RE2::Set} object with the specified options.
|
1412
|
+
*
|
1413
|
+
* @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
|
1414
|
+
* @param [Hash] options the options with which to compile the pattern
|
1415
|
+
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1416
|
+
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
1417
|
+
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
1418
|
+
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1419
|
+
* @option options [Fixnum] :max_mem approx. max memory footprint of RE2
|
1420
|
+
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1421
|
+
* @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
|
1422
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
1423
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
|
1424
|
+
* @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
|
1425
|
+
* @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
|
1426
|
+
* @return [RE2::Set] an RE2::Set with the specified anchor and options
|
1427
|
+
* @raise [ArgumentError] if anchor is not one of the accepted choices
|
1428
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1429
|
+
*/
|
1430
|
+
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1431
|
+
VALUE anchor, options;
|
1432
|
+
re2_set *s;
|
1433
|
+
RE2::Anchor re2_anchor;
|
1434
|
+
RE2::Options re2_options;
|
1435
|
+
|
1436
|
+
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1437
|
+
Data_Get_Struct(self, re2_set, s);
|
1438
|
+
|
1439
|
+
if (RTEST(options)) {
|
1440
|
+
parse_re2_options(re2_options, options);
|
1441
|
+
}
|
1442
|
+
if (NIL_P(anchor)) {
|
1443
|
+
re2_anchor = RE2::UNANCHORED;
|
1444
|
+
} else {
|
1445
|
+
Check_Type(anchor, T_SYMBOL);
|
1446
|
+
ID id_anchor = SYM2ID(anchor);
|
1447
|
+
if (id_anchor == id_unanchored) {
|
1448
|
+
re2_anchor = RE2::UNANCHORED;
|
1449
|
+
} else if (id_anchor == id_anchor_start) {
|
1450
|
+
re2_anchor = RE2::ANCHOR_START;
|
1451
|
+
} else if (id_anchor == id_anchor_both) {
|
1452
|
+
re2_anchor = RE2::ANCHOR_BOTH;
|
1453
|
+
} else {
|
1454
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1455
|
+
}
|
1456
|
+
}
|
1457
|
+
|
1458
|
+
s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
|
1459
|
+
if (s->set == 0) {
|
1460
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1461
|
+
}
|
1462
|
+
|
1463
|
+
return self;
|
1464
|
+
}
|
1465
|
+
|
1466
|
+
/*
|
1467
|
+
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1468
|
+
* in the output of #match. Cannot be called after #compile has been called.
|
1469
|
+
*
|
1470
|
+
* @param [String] pattern the regex pattern
|
1471
|
+
* @return [Integer] the index of the pattern in the set
|
1472
|
+
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1473
|
+
* @example
|
1474
|
+
* set = RE2::Set.new
|
1475
|
+
* set.add("abc") #=> 0
|
1476
|
+
* set.add("def") #=> 1
|
1477
|
+
*/
|
1478
|
+
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1479
|
+
Check_Type(pattern, T_STRING);
|
1480
|
+
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
1481
|
+
std::string err;
|
1482
|
+
re2_set *s;
|
1483
|
+
Data_Get_Struct(self, re2_set, s);
|
1484
|
+
int index = s->set->Add(regex, &err);
|
1485
|
+
if (index < 0) {
|
1486
|
+
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
|
1487
|
+
}
|
1488
|
+
|
1489
|
+
return INT2FIX(index);
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
/*
|
1493
|
+
* Compiles a Set so it can be used to match against. Must be called after #add
|
1494
|
+
* and before #match.
|
1495
|
+
*
|
1496
|
+
* @return [Bool] whether compilation was a success
|
1497
|
+
* @example
|
1498
|
+
* set = RE2::Set.new
|
1499
|
+
* set.add("abc")
|
1500
|
+
* set.compile # => true
|
1501
|
+
*/
|
1502
|
+
static VALUE re2_set_compile(VALUE self) {
|
1503
|
+
re2_set *s;
|
1504
|
+
Data_Get_Struct(self, re2_set, s);
|
1505
|
+
|
1506
|
+
return BOOL2RUBY(s->set->Compile());
|
1507
|
+
}
|
1508
|
+
|
1509
|
+
/*
|
1510
|
+
* Returns whether the underlying re2 version outputs error information from
|
1511
|
+
* RE2::Set::Match. If not, #match will raise an error if attempting to set its
|
1512
|
+
* :exception option to true.
|
1513
|
+
*
|
1514
|
+
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1515
|
+
*/
|
1516
|
+
static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
1517
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1518
|
+
return Qtrue;
|
1519
|
+
#else
|
1520
|
+
return Qfalse;
|
1521
|
+
#endif
|
1522
|
+
}
|
1523
|
+
|
1524
|
+
/*
|
1525
|
+
* Matches the given text against patterns in the set, returning an array of
|
1526
|
+
* integer indices of the matching patterns if matched or an empty array if
|
1527
|
+
* there are no matches.
|
1528
|
+
*
|
1529
|
+
* @param [String] str the text to match against
|
1530
|
+
* @param [Hash] options the options with which to match
|
1531
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
|
1532
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1533
|
+
* @raise [MatchError] if an error occurs while matching
|
1534
|
+
* @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
|
1535
|
+
* @example
|
1536
|
+
* set = RE2::Set.new
|
1537
|
+
* set.add("abc")
|
1538
|
+
* set.add("def")
|
1539
|
+
* set.compile
|
1540
|
+
* set.match("abcdef") # => [0, 1]
|
1541
|
+
*/
|
1542
|
+
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1543
|
+
VALUE str, options, exception_option;
|
1544
|
+
bool raise_exception = true;
|
1545
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
1546
|
+
Check_Type(str, T_STRING);
|
1547
|
+
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
|
1548
|
+
std::vector<int> v;
|
1549
|
+
re2_set *s;
|
1550
|
+
Data_Get_Struct(self, re2_set, s);
|
1551
|
+
|
1552
|
+
if (RTEST(options)) {
|
1553
|
+
Check_Type(options, T_HASH);
|
1554
|
+
|
1555
|
+
exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1556
|
+
if (!NIL_P(exception_option)) {
|
1557
|
+
raise_exception = RTEST(exception_option);
|
1558
|
+
}
|
1559
|
+
}
|
1560
|
+
|
1561
|
+
if (raise_exception) {
|
1562
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1563
|
+
RE2::Set::ErrorInfo e;
|
1564
|
+
bool match_failed = !s->set->Match(data, &v, &e);
|
1565
|
+
VALUE result = rb_ary_new2(v.size());
|
1566
|
+
|
1567
|
+
if (match_failed) {
|
1568
|
+
switch (e.kind) {
|
1569
|
+
case RE2::Set::kNoError:
|
1570
|
+
break;
|
1571
|
+
case RE2::Set::kNotCompiled:
|
1572
|
+
rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
|
1573
|
+
case RE2::Set::kOutOfMemory:
|
1574
|
+
rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
|
1575
|
+
case RE2::Set::kInconsistent:
|
1576
|
+
rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
|
1577
|
+
default: // Just in case a future version of libre2 adds new ErrorKinds
|
1578
|
+
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
1579
|
+
}
|
1580
|
+
} else {
|
1581
|
+
for (size_t i = 0; i < v.size(); i++) {
|
1582
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
1583
|
+
}
|
1584
|
+
}
|
1585
|
+
|
1586
|
+
return result;
|
1587
|
+
#else
|
1588
|
+
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
1589
|
+
#endif
|
1590
|
+
} else {
|
1591
|
+
bool matched = s->set->Match(data, &v);
|
1592
|
+
VALUE result = rb_ary_new2(v.size());
|
1593
|
+
|
1594
|
+
if (matched) {
|
1595
|
+
for (size_t i = 0; i < v.size(); i++) {
|
1596
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
1597
|
+
}
|
1598
|
+
}
|
1599
|
+
|
1600
|
+
return result;
|
1601
|
+
}
|
1602
|
+
}
|
1603
|
+
|
1365
1604
|
/* Forward declare Init_re2 to be called by C code but define it separately so
|
1366
1605
|
* that YARD can parse it.
|
1367
1606
|
*/
|
@@ -1372,12 +1611,18 @@ void Init_re2(void) {
|
|
1372
1611
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
1373
1612
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1374
1613
|
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
1614
|
+
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
1615
|
+
re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
|
1616
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1617
|
+
re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
|
1618
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1375
1619
|
|
1376
1620
|
rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
|
1377
1621
|
rb_define_alloc_func(re2_cMatchData,
|
1378
1622
|
(VALUE (*)(VALUE))re2_matchdata_allocate);
|
1379
1623
|
rb_define_alloc_func(re2_cScanner,
|
1380
1624
|
(VALUE (*)(VALUE))re2_scanner_allocate);
|
1625
|
+
rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
|
1381
1626
|
|
1382
1627
|
rb_define_method(re2_cMatchData, "string",
|
1383
1628
|
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
@@ -1394,7 +1639,8 @@ void Init_re2(void) {
|
|
1394
1639
|
rb_define_method(re2_cMatchData, "end",
|
1395
1640
|
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
1396
1641
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1397
|
-
-1);
|
1642
|
+
-1);
|
1643
|
+
rb_define_method(re2_cMatchData, "to_s",
|
1398
1644
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1399
1645
|
rb_define_method(re2_cMatchData, "inspect",
|
1400
1646
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
@@ -1471,6 +1717,14 @@ void Init_re2(void) {
|
|
1471
1717
|
rb_define_method(re2_cRegexp, "one_line?",
|
1472
1718
|
RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
|
1473
1719
|
|
1720
|
+
rb_define_singleton_method(re2_cSet, "match_raises_errors?",
|
1721
|
+
RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
|
1722
|
+
rb_define_method(re2_cSet, "initialize",
|
1723
|
+
RUBY_METHOD_FUNC(re2_set_initialize), -1);
|
1724
|
+
rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
|
1725
|
+
rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
|
1726
|
+
rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
|
1727
|
+
|
1474
1728
|
rb_define_module_function(re2_mRE2, "Replace",
|
1475
1729
|
RUBY_METHOD_FUNC(re2_Replace), 3);
|
1476
1730
|
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
@@ -1498,6 +1752,10 @@ void Init_re2(void) {
|
|
1498
1752
|
id_perl_classes = rb_intern("perl_classes");
|
1499
1753
|
id_word_boundary = rb_intern("word_boundary");
|
1500
1754
|
id_one_line = rb_intern("one_line");
|
1755
|
+
id_unanchored = rb_intern("unanchored");
|
1756
|
+
id_anchor_start = rb_intern("anchor_start");
|
1757
|
+
id_anchor_both = rb_intern("anchor_both");
|
1758
|
+
id_exception = rb_intern("exception");
|
1501
1759
|
|
1502
1760
|
#if 0
|
1503
1761
|
/* Fake so YARD generates the file. */
|
@@ -0,0 +1,168 @@
|
|
1
|
+
RSpec.describe RE2::Set do
|
2
|
+
describe "#initialize" do
|
3
|
+
it "returns an instance given no args" do
|
4
|
+
set = RE2::Set.new
|
5
|
+
|
6
|
+
expect(set).to be_a(RE2::Set)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "returns an instance given only an anchor of :unanchored" do
|
10
|
+
set = RE2::Set.new(:unanchored)
|
11
|
+
|
12
|
+
expect(set).to be_a(RE2::Set)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "returns an instance given only an anchor of :anchor_start" do
|
16
|
+
set = RE2::Set.new(:anchor_start)
|
17
|
+
|
18
|
+
expect(set).to be_a(RE2::Set)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "returns an instance given only an anchor of :anchor_both" do
|
22
|
+
set = RE2::Set.new(:anchor_both)
|
23
|
+
|
24
|
+
expect(set).to be_a(RE2::Set)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns an instance given an anchor and options" do
|
28
|
+
set = RE2::Set.new(:unanchored, :case_sensitive => false)
|
29
|
+
|
30
|
+
expect(set).to be_a(RE2::Set)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "raises an error if given an inappropriate type" do
|
34
|
+
expect { RE2::Set.new(0) }.to raise_error(TypeError)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an error if given an invalid anchor" do
|
38
|
+
expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error(
|
39
|
+
ArgumentError,
|
40
|
+
"anchor should be one of: :unanchored, :anchor_start, :anchor_both"
|
41
|
+
)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "#add" do
|
46
|
+
it "allows multiple patterns to be added", :aggregate_failures do
|
47
|
+
set = RE2::Set.new
|
48
|
+
|
49
|
+
expect(set.add("abc")).to eq(0)
|
50
|
+
expect(set.add("def")).to eq(1)
|
51
|
+
expect(set.add("ghi")).to eq(2)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "rejects invalid patterns when added" do
|
55
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
56
|
+
|
57
|
+
expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add()/)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "raises an error if called after #compile" do
|
61
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
62
|
+
set.add("abc")
|
63
|
+
set.compile
|
64
|
+
|
65
|
+
silence_stderr do
|
66
|
+
expect { set.add("def") }.to raise_error(ArgumentError)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
it "raises an error if given a non-string pattern" do
|
71
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
72
|
+
|
73
|
+
expect { set.add(0) }.to raise_error(TypeError)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "#compile" do
|
78
|
+
it "compiles the set without error" do
|
79
|
+
set = RE2::Set.new
|
80
|
+
set.add("abc")
|
81
|
+
set.add("def")
|
82
|
+
set.add("ghi")
|
83
|
+
|
84
|
+
expect(set.compile).to be_truthy
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "#match" do
|
89
|
+
it "matches against multiple patterns" do
|
90
|
+
set = RE2::Set.new
|
91
|
+
set.add("abc")
|
92
|
+
set.add("def")
|
93
|
+
set.add("ghi")
|
94
|
+
set.compile
|
95
|
+
|
96
|
+
expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
|
97
|
+
end
|
98
|
+
|
99
|
+
it "raises an error if called before #compile by default" do
|
100
|
+
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
|
101
|
+
|
102
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
103
|
+
|
104
|
+
silence_stderr do
|
105
|
+
expect { set.match("") }.to raise_error(RE2::Set::MatchError)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
it "raises an error if called before #compile when :exception is true" do
|
110
|
+
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
|
111
|
+
|
112
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
113
|
+
|
114
|
+
silence_stderr do
|
115
|
+
expect { set.match("", :exception => true) }.to raise_error(RE2::Set::MatchError)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
it "returns an empty array if called before #compile when :exception is false" do
|
120
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
121
|
+
|
122
|
+
silence_stderr do
|
123
|
+
expect(set.match("", :exception => false)).to be_empty
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "raises an error if :exception is true and re2 does not support it" do
|
128
|
+
skip "Underlying RE2::Set::Match outputs error information" if RE2::Set.match_raises_errors?
|
129
|
+
|
130
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
131
|
+
|
132
|
+
silence_stderr do
|
133
|
+
expect { set.match("", :exception => true) }.to raise_error(RE2::Set::UnsupportedError)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
it "raises an error if given non-hash options" do
|
138
|
+
set = RE2::Set.new
|
139
|
+
|
140
|
+
expect { set.match("", 0) }.to raise_error(TypeError)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def silence_stderr
|
145
|
+
original_stream = STDERR
|
146
|
+
|
147
|
+
if File.const_defined?(:NULL)
|
148
|
+
STDERR.reopen(File::NULL)
|
149
|
+
else
|
150
|
+
platform = RUBY_PLATFORM == 'java' ? RbConfig::CONFIG['host_os'] : RUBY_PLATFORM
|
151
|
+
|
152
|
+
case platform
|
153
|
+
when /mswin|mingw/i
|
154
|
+
STDERR.reopen('NUL')
|
155
|
+
when /amiga/i
|
156
|
+
STDERR.reopen('NIL')
|
157
|
+
when /openvms/i
|
158
|
+
STDERR.reopen('NL:')
|
159
|
+
else
|
160
|
+
STDERR.reopen('/dev/null')
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
yield
|
165
|
+
ensure
|
166
|
+
STDERR.reopen(original_stream)
|
167
|
+
end
|
168
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -57,6 +57,7 @@ files:
|
|
57
57
|
- spec/re2/match_data_spec.rb
|
58
58
|
- spec/re2/regexp_spec.rb
|
59
59
|
- spec/re2/scanner_spec.rb
|
60
|
+
- spec/re2/set_spec.rb
|
60
61
|
- spec/re2/string_spec.rb
|
61
62
|
- spec/re2_spec.rb
|
62
63
|
- spec/spec_helper.rb
|
@@ -79,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
80
|
- !ruby/object:Gem::Version
|
80
81
|
version: '0'
|
81
82
|
requirements: []
|
82
|
-
rubygems_version: 3.
|
83
|
+
rubygems_version: 3.3.7
|
83
84
|
signing_key:
|
84
85
|
specification_version: 4
|
85
86
|
summary: Ruby bindings to re2.
|
@@ -90,4 +91,5 @@ test_files:
|
|
90
91
|
- spec/re2/regexp_spec.rb
|
91
92
|
- spec/re2/match_data_spec.rb
|
92
93
|
- spec/re2/string_spec.rb
|
94
|
+
- spec/re2/set_spec.rb
|
93
95
|
- spec/re2/scanner_spec.rb
|