re2 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +22 -2
- data/ext/re2/extconf.rb +24 -0
- data/ext/re2/re2.cc +323 -65
- data/spec/re2/set_spec.rb +168 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
|
4
|
+
data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
|
7
|
+
data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
|
|
4
4
|
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
5
|
library".
|
6
6
|
|
7
|
-
**Current version:** 1.
|
7
|
+
**Current version:** 1.5.0
|
8
8
|
**Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
|
9
9
|
**Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
|
10
10
|
|
@@ -131,6 +131,22 @@ enum.next #=> ["It"]
|
|
131
131
|
enum.next #=> ["is"]
|
132
132
|
```
|
133
133
|
|
134
|
+
As of 1.5.0, you can use `RE2::Set` to match multiple patterns against a
|
135
|
+
string. Calling `RE2::Set#add` with a pattern will return an integer index of
|
136
|
+
the pattern. After all patterns have been added, the set can be compiled using
|
137
|
+
`RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
|
138
|
+
containing the indices of all the patterns that matched.
|
139
|
+
|
140
|
+
``` ruby
|
141
|
+
set = RE2::Set.new
|
142
|
+
set.add("abc") #=> 0
|
143
|
+
set.add("def") #=> 1
|
144
|
+
set.add("ghi") #=> 2
|
145
|
+
set.compile #=> true
|
146
|
+
set.match("abcdefghi") #=> [0, 1, 2]
|
147
|
+
set.match("ghidefabc") #=> [2, 1, 0]
|
148
|
+
```
|
149
|
+
|
134
150
|
Features
|
135
151
|
--------
|
136
152
|
|
@@ -149,6 +165,8 @@ Features
|
|
149
165
|
|
150
166
|
* Incrementally scanning text with `re2.scan(text)`
|
151
167
|
|
168
|
+
* Search a collection of patterns simultaneously with `RE2::Set`
|
169
|
+
|
152
170
|
* Checking regular expression compilation with `re2.ok?`, `re2.error` and
|
153
171
|
`re2.error_arg`
|
154
172
|
|
@@ -177,7 +195,9 @@ Contributions
|
|
177
195
|
* Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
|
178
196
|
the deprecation and removal of the `utf8` encoding option in re2;
|
179
197
|
* Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
|
180
|
-
using `RE2::Scanner#scan` with an invalid regular expression
|
198
|
+
using `RE2::Scanner#scan` with an invalid regular expression;
|
199
|
+
* Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
|
200
|
+
initial support for `RE2::Set`.
|
181
201
|
|
182
202
|
Contact
|
183
203
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -88,4 +88,28 @@ SRC
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
checking_for("RE2::Set::Match() with error information") do
|
92
|
+
test_re2_set_match_signature = <<SRC
|
93
|
+
#include <vector>
|
94
|
+
#include <re2/re2.h>
|
95
|
+
#include <re2/set.h>
|
96
|
+
|
97
|
+
int main() {
|
98
|
+
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
|
99
|
+
s.Add("foo", NULL);
|
100
|
+
s.Compile();
|
101
|
+
|
102
|
+
std::vector<int> v;
|
103
|
+
RE2::Set::ErrorInfo ei;
|
104
|
+
s.Match("foo", &v, &ei);
|
105
|
+
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
SRC
|
109
|
+
|
110
|
+
if try_compile(test_re2_set_match_signature, compile_options)
|
111
|
+
$defs.push("-DHAVE_ERROR_INFO_ARGUMENT")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
91
115
|
create_makefile("re2")
|
data/ext/re2/re2.cc
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#include <ruby.h>
|
10
10
|
#include <re2/re2.h>
|
11
|
+
#include <re2/set.h>
|
11
12
|
#include <stdint.h>
|
12
13
|
#include <string>
|
13
14
|
#include <sstream>
|
@@ -93,12 +94,82 @@ typedef struct {
|
|
93
94
|
VALUE regexp, text;
|
94
95
|
} re2_scanner;
|
95
96
|
|
96
|
-
|
97
|
+
typedef struct {
|
98
|
+
RE2::Set *set;
|
99
|
+
} re2_set;
|
100
|
+
|
101
|
+
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
102
|
+
re2_eSetMatchError, re2_eSetUnsupportedError;
|
97
103
|
|
98
104
|
/* Symbols used in RE2 options. */
|
99
105
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
100
106
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
101
|
-
id_perl_classes, id_word_boundary, id_one_line
|
107
|
+
id_perl_classes, id_word_boundary, id_one_line,
|
108
|
+
id_unanchored, id_anchor_start, id_anchor_both, id_exception;
|
109
|
+
|
110
|
+
void parse_re2_options(RE2::Options& re2_options, VALUE options) {
|
111
|
+
if (TYPE(options) != T_HASH) {
|
112
|
+
rb_raise(rb_eArgError, "options should be a hash");
|
113
|
+
}
|
114
|
+
VALUE utf8, posix_syntax, longest_match, log_errors,
|
115
|
+
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
116
|
+
word_boundary, one_line;
|
117
|
+
|
118
|
+
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
119
|
+
if (!NIL_P(utf8)) {
|
120
|
+
re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
121
|
+
}
|
122
|
+
|
123
|
+
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
124
|
+
if (!NIL_P(posix_syntax)) {
|
125
|
+
re2_options.set_posix_syntax(RTEST(posix_syntax));
|
126
|
+
}
|
127
|
+
|
128
|
+
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
129
|
+
if (!NIL_P(longest_match)) {
|
130
|
+
re2_options.set_longest_match(RTEST(longest_match));
|
131
|
+
}
|
132
|
+
|
133
|
+
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
134
|
+
if (!NIL_P(log_errors)) {
|
135
|
+
re2_options.set_log_errors(RTEST(log_errors));
|
136
|
+
}
|
137
|
+
|
138
|
+
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
139
|
+
if (!NIL_P(max_mem)) {
|
140
|
+
re2_options.set_max_mem(NUM2INT(max_mem));
|
141
|
+
}
|
142
|
+
|
143
|
+
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
144
|
+
if (!NIL_P(literal)) {
|
145
|
+
re2_options.set_literal(RTEST(literal));
|
146
|
+
}
|
147
|
+
|
148
|
+
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
149
|
+
if (!NIL_P(never_nl)) {
|
150
|
+
re2_options.set_never_nl(RTEST(never_nl));
|
151
|
+
}
|
152
|
+
|
153
|
+
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
154
|
+
if (!NIL_P(case_sensitive)) {
|
155
|
+
re2_options.set_case_sensitive(RTEST(case_sensitive));
|
156
|
+
}
|
157
|
+
|
158
|
+
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
159
|
+
if (!NIL_P(perl_classes)) {
|
160
|
+
re2_options.set_perl_classes(RTEST(perl_classes));
|
161
|
+
}
|
162
|
+
|
163
|
+
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
164
|
+
if (!NIL_P(word_boundary)) {
|
165
|
+
re2_options.set_word_boundary(RTEST(word_boundary));
|
166
|
+
}
|
167
|
+
|
168
|
+
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
169
|
+
if (!NIL_P(one_line)) {
|
170
|
+
re2_options.set_one_line(RTEST(one_line));
|
171
|
+
}
|
172
|
+
}
|
102
173
|
|
103
174
|
void re2_matchdata_mark(re2_matchdata* self) {
|
104
175
|
rb_gc_mark(self->regexp);
|
@@ -667,75 +738,15 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
|
667
738
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
668
739
|
*/
|
669
740
|
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
670
|
-
VALUE pattern, options
|
671
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
672
|
-
word_boundary, one_line;
|
741
|
+
VALUE pattern, options;
|
673
742
|
re2_pattern *p;
|
674
743
|
|
675
744
|
rb_scan_args(argc, argv, "11", &pattern, &options);
|
676
745
|
Data_Get_Struct(self, re2_pattern, p);
|
677
746
|
|
678
747
|
if (RTEST(options)) {
|
679
|
-
if (TYPE(options) != T_HASH) {
|
680
|
-
rb_raise(rb_eArgError, "options should be a hash");
|
681
|
-
}
|
682
|
-
|
683
748
|
RE2::Options re2_options;
|
684
|
-
|
685
|
-
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
686
|
-
if (!NIL_P(utf8)) {
|
687
|
-
re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
688
|
-
}
|
689
|
-
|
690
|
-
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
691
|
-
if (!NIL_P(posix_syntax)) {
|
692
|
-
re2_options.set_posix_syntax(RTEST(posix_syntax));
|
693
|
-
}
|
694
|
-
|
695
|
-
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
696
|
-
if (!NIL_P(longest_match)) {
|
697
|
-
re2_options.set_longest_match(RTEST(longest_match));
|
698
|
-
}
|
699
|
-
|
700
|
-
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
701
|
-
if (!NIL_P(log_errors)) {
|
702
|
-
re2_options.set_log_errors(RTEST(log_errors));
|
703
|
-
}
|
704
|
-
|
705
|
-
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
706
|
-
if (!NIL_P(max_mem)) {
|
707
|
-
re2_options.set_max_mem(NUM2INT(max_mem));
|
708
|
-
}
|
709
|
-
|
710
|
-
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
711
|
-
if (!NIL_P(literal)) {
|
712
|
-
re2_options.set_literal(RTEST(literal));
|
713
|
-
}
|
714
|
-
|
715
|
-
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
716
|
-
if (!NIL_P(never_nl)) {
|
717
|
-
re2_options.set_never_nl(RTEST(never_nl));
|
718
|
-
}
|
719
|
-
|
720
|
-
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
721
|
-
if (!NIL_P(case_sensitive)) {
|
722
|
-
re2_options.set_case_sensitive(RTEST(case_sensitive));
|
723
|
-
}
|
724
|
-
|
725
|
-
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
726
|
-
if (!NIL_P(perl_classes)) {
|
727
|
-
re2_options.set_perl_classes(RTEST(perl_classes));
|
728
|
-
}
|
729
|
-
|
730
|
-
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
731
|
-
if (!NIL_P(word_boundary)) {
|
732
|
-
re2_options.set_word_boundary(RTEST(word_boundary));
|
733
|
-
}
|
734
|
-
|
735
|
-
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
736
|
-
if (!NIL_P(one_line)) {
|
737
|
-
re2_options.set_one_line(RTEST(one_line));
|
738
|
-
}
|
749
|
+
parse_re2_options(re2_options, options);
|
739
750
|
|
740
751
|
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
741
752
|
} else {
|
@@ -1362,6 +1373,234 @@ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
|
1362
1373
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1363
1374
|
}
|
1364
1375
|
|
1376
|
+
void re2_set_free(re2_set *self) {
|
1377
|
+
if (self->set) {
|
1378
|
+
delete self->set;
|
1379
|
+
}
|
1380
|
+
free(self);
|
1381
|
+
}
|
1382
|
+
|
1383
|
+
static VALUE re2_set_allocate(VALUE klass) {
|
1384
|
+
re2_set *s;
|
1385
|
+
VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
|
1386
|
+
return result;
|
1387
|
+
}
|
1388
|
+
|
1389
|
+
/*
|
1390
|
+
* Returns a new {RE2::Set} object, a collection of patterns that can be
|
1391
|
+
* searched for simultaneously.
|
1392
|
+
*
|
1393
|
+
* @return [RE2::Set]
|
1394
|
+
*
|
1395
|
+
* @overload initialize
|
1396
|
+
* Returns a new {RE2::Set} object for unanchored patterns with the default
|
1397
|
+
* options.
|
1398
|
+
*
|
1399
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1400
|
+
* @return [RE2::Set]
|
1401
|
+
*
|
1402
|
+
* @overload initialize(anchor)
|
1403
|
+
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1404
|
+
* options.
|
1405
|
+
*
|
1406
|
+
* @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
|
1407
|
+
* @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
|
1408
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1409
|
+
*
|
1410
|
+
* @overload initialize(anchor, options)
|
1411
|
+
* Returns a new {RE2::Set} object with the specified options.
|
1412
|
+
*
|
1413
|
+
* @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
|
1414
|
+
* @param [Hash] options the options with which to compile the pattern
|
1415
|
+
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1416
|
+
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
1417
|
+
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
1418
|
+
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1419
|
+
* @option options [Fixnum] :max_mem approx. max memory footprint of RE2
|
1420
|
+
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1421
|
+
* @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
|
1422
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
1423
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
|
1424
|
+
* @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
|
1425
|
+
* @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
|
1426
|
+
* @return [RE2::Set] an RE2::Set with the specified anchor and options
|
1427
|
+
* @raise [ArgumentError] if anchor is not one of the accepted choices
|
1428
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1429
|
+
*/
|
1430
|
+
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1431
|
+
VALUE anchor, options;
|
1432
|
+
re2_set *s;
|
1433
|
+
RE2::Anchor re2_anchor;
|
1434
|
+
RE2::Options re2_options;
|
1435
|
+
|
1436
|
+
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1437
|
+
Data_Get_Struct(self, re2_set, s);
|
1438
|
+
|
1439
|
+
if (RTEST(options)) {
|
1440
|
+
parse_re2_options(re2_options, options);
|
1441
|
+
}
|
1442
|
+
if (NIL_P(anchor)) {
|
1443
|
+
re2_anchor = RE2::UNANCHORED;
|
1444
|
+
} else {
|
1445
|
+
Check_Type(anchor, T_SYMBOL);
|
1446
|
+
ID id_anchor = SYM2ID(anchor);
|
1447
|
+
if (id_anchor == id_unanchored) {
|
1448
|
+
re2_anchor = RE2::UNANCHORED;
|
1449
|
+
} else if (id_anchor == id_anchor_start) {
|
1450
|
+
re2_anchor = RE2::ANCHOR_START;
|
1451
|
+
} else if (id_anchor == id_anchor_both) {
|
1452
|
+
re2_anchor = RE2::ANCHOR_BOTH;
|
1453
|
+
} else {
|
1454
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1455
|
+
}
|
1456
|
+
}
|
1457
|
+
|
1458
|
+
s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
|
1459
|
+
if (s->set == 0) {
|
1460
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1461
|
+
}
|
1462
|
+
|
1463
|
+
return self;
|
1464
|
+
}
|
1465
|
+
|
1466
|
+
/*
|
1467
|
+
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1468
|
+
* in the output of #match. Cannot be called after #compile has been called.
|
1469
|
+
*
|
1470
|
+
* @param [String] pattern the regex pattern
|
1471
|
+
* @return [Integer] the index of the pattern in the set
|
1472
|
+
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1473
|
+
* @example
|
1474
|
+
* set = RE2::Set.new
|
1475
|
+
* set.add("abc") #=> 0
|
1476
|
+
* set.add("def") #=> 1
|
1477
|
+
*/
|
1478
|
+
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1479
|
+
Check_Type(pattern, T_STRING);
|
1480
|
+
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
1481
|
+
std::string err;
|
1482
|
+
re2_set *s;
|
1483
|
+
Data_Get_Struct(self, re2_set, s);
|
1484
|
+
int index = s->set->Add(regex, &err);
|
1485
|
+
if (index < 0) {
|
1486
|
+
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
|
1487
|
+
}
|
1488
|
+
|
1489
|
+
return INT2FIX(index);
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
/*
|
1493
|
+
* Compiles a Set so it can be used to match against. Must be called after #add
|
1494
|
+
* and before #match.
|
1495
|
+
*
|
1496
|
+
* @return [Bool] whether compilation was a success
|
1497
|
+
* @example
|
1498
|
+
* set = RE2::Set.new
|
1499
|
+
* set.add("abc")
|
1500
|
+
* set.compile # => true
|
1501
|
+
*/
|
1502
|
+
static VALUE re2_set_compile(VALUE self) {
|
1503
|
+
re2_set *s;
|
1504
|
+
Data_Get_Struct(self, re2_set, s);
|
1505
|
+
|
1506
|
+
return BOOL2RUBY(s->set->Compile());
|
1507
|
+
}
|
1508
|
+
|
1509
|
+
/*
|
1510
|
+
* Returns whether the underlying re2 version outputs error information from
|
1511
|
+
* RE2::Set::Match. If not, #match will raise an error if attempting to set its
|
1512
|
+
* :exception option to true.
|
1513
|
+
*
|
1514
|
+
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1515
|
+
*/
|
1516
|
+
static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
1517
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1518
|
+
return Qtrue;
|
1519
|
+
#else
|
1520
|
+
return Qfalse;
|
1521
|
+
#endif
|
1522
|
+
}
|
1523
|
+
|
1524
|
+
/*
|
1525
|
+
* Matches the given text against patterns in the set, returning an array of
|
1526
|
+
* integer indices of the matching patterns if matched or an empty array if
|
1527
|
+
* there are no matches.
|
1528
|
+
*
|
1529
|
+
* @param [String] str the text to match against
|
1530
|
+
* @param [Hash] options the options with which to match
|
1531
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
|
1532
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1533
|
+
* @raise [MatchError] if an error occurs while matching
|
1534
|
+
* @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
|
1535
|
+
* @example
|
1536
|
+
* set = RE2::Set.new
|
1537
|
+
* set.add("abc")
|
1538
|
+
* set.add("def")
|
1539
|
+
* set.compile
|
1540
|
+
* set.match("abcdef") # => [0, 1]
|
1541
|
+
*/
|
1542
|
+
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1543
|
+
VALUE str, options, exception_option;
|
1544
|
+
bool raise_exception = true;
|
1545
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
1546
|
+
Check_Type(str, T_STRING);
|
1547
|
+
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
|
1548
|
+
std::vector<int> v;
|
1549
|
+
re2_set *s;
|
1550
|
+
Data_Get_Struct(self, re2_set, s);
|
1551
|
+
|
1552
|
+
if (RTEST(options)) {
|
1553
|
+
Check_Type(options, T_HASH);
|
1554
|
+
|
1555
|
+
exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1556
|
+
if (!NIL_P(exception_option)) {
|
1557
|
+
raise_exception = RTEST(exception_option);
|
1558
|
+
}
|
1559
|
+
}
|
1560
|
+
|
1561
|
+
if (raise_exception) {
|
1562
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1563
|
+
RE2::Set::ErrorInfo e;
|
1564
|
+
bool match_failed = !s->set->Match(data, &v, &e);
|
1565
|
+
VALUE result = rb_ary_new2(v.size());
|
1566
|
+
|
1567
|
+
if (match_failed) {
|
1568
|
+
switch (e.kind) {
|
1569
|
+
case RE2::Set::kNoError:
|
1570
|
+
break;
|
1571
|
+
case RE2::Set::kNotCompiled:
|
1572
|
+
rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
|
1573
|
+
case RE2::Set::kOutOfMemory:
|
1574
|
+
rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
|
1575
|
+
case RE2::Set::kInconsistent:
|
1576
|
+
rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
|
1577
|
+
default: // Just in case a future version of libre2 adds new ErrorKinds
|
1578
|
+
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
1579
|
+
}
|
1580
|
+
} else {
|
1581
|
+
for (size_t i = 0; i < v.size(); i++) {
|
1582
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
1583
|
+
}
|
1584
|
+
}
|
1585
|
+
|
1586
|
+
return result;
|
1587
|
+
#else
|
1588
|
+
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
1589
|
+
#endif
|
1590
|
+
} else {
|
1591
|
+
bool matched = s->set->Match(data, &v);
|
1592
|
+
VALUE result = rb_ary_new2(v.size());
|
1593
|
+
|
1594
|
+
if (matched) {
|
1595
|
+
for (size_t i = 0; i < v.size(); i++) {
|
1596
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
1597
|
+
}
|
1598
|
+
}
|
1599
|
+
|
1600
|
+
return result;
|
1601
|
+
}
|
1602
|
+
}
|
1603
|
+
|
1365
1604
|
/* Forward declare Init_re2 to be called by C code but define it separately so
|
1366
1605
|
* that YARD can parse it.
|
1367
1606
|
*/
|
@@ -1372,12 +1611,18 @@ void Init_re2(void) {
|
|
1372
1611
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
1373
1612
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1374
1613
|
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
1614
|
+
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
1615
|
+
re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
|
1616
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1617
|
+
re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
|
1618
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1375
1619
|
|
1376
1620
|
rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
|
1377
1621
|
rb_define_alloc_func(re2_cMatchData,
|
1378
1622
|
(VALUE (*)(VALUE))re2_matchdata_allocate);
|
1379
1623
|
rb_define_alloc_func(re2_cScanner,
|
1380
1624
|
(VALUE (*)(VALUE))re2_scanner_allocate);
|
1625
|
+
rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
|
1381
1626
|
|
1382
1627
|
rb_define_method(re2_cMatchData, "string",
|
1383
1628
|
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
@@ -1394,7 +1639,8 @@ void Init_re2(void) {
|
|
1394
1639
|
rb_define_method(re2_cMatchData, "end",
|
1395
1640
|
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
1396
1641
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1397
|
-
-1);
|
1642
|
+
-1);
|
1643
|
+
rb_define_method(re2_cMatchData, "to_s",
|
1398
1644
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1399
1645
|
rb_define_method(re2_cMatchData, "inspect",
|
1400
1646
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
@@ -1471,6 +1717,14 @@ void Init_re2(void) {
|
|
1471
1717
|
rb_define_method(re2_cRegexp, "one_line?",
|
1472
1718
|
RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
|
1473
1719
|
|
1720
|
+
rb_define_singleton_method(re2_cSet, "match_raises_errors?",
|
1721
|
+
RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
|
1722
|
+
rb_define_method(re2_cSet, "initialize",
|
1723
|
+
RUBY_METHOD_FUNC(re2_set_initialize), -1);
|
1724
|
+
rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
|
1725
|
+
rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
|
1726
|
+
rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
|
1727
|
+
|
1474
1728
|
rb_define_module_function(re2_mRE2, "Replace",
|
1475
1729
|
RUBY_METHOD_FUNC(re2_Replace), 3);
|
1476
1730
|
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
@@ -1498,6 +1752,10 @@ void Init_re2(void) {
|
|
1498
1752
|
id_perl_classes = rb_intern("perl_classes");
|
1499
1753
|
id_word_boundary = rb_intern("word_boundary");
|
1500
1754
|
id_one_line = rb_intern("one_line");
|
1755
|
+
id_unanchored = rb_intern("unanchored");
|
1756
|
+
id_anchor_start = rb_intern("anchor_start");
|
1757
|
+
id_anchor_both = rb_intern("anchor_both");
|
1758
|
+
id_exception = rb_intern("exception");
|
1501
1759
|
|
1502
1760
|
#if 0
|
1503
1761
|
/* Fake so YARD generates the file. */
|
@@ -0,0 +1,168 @@
|
|
1
|
+
RSpec.describe RE2::Set do
|
2
|
+
describe "#initialize" do
|
3
|
+
it "returns an instance given no args" do
|
4
|
+
set = RE2::Set.new
|
5
|
+
|
6
|
+
expect(set).to be_a(RE2::Set)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "returns an instance given only an anchor of :unanchored" do
|
10
|
+
set = RE2::Set.new(:unanchored)
|
11
|
+
|
12
|
+
expect(set).to be_a(RE2::Set)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "returns an instance given only an anchor of :anchor_start" do
|
16
|
+
set = RE2::Set.new(:anchor_start)
|
17
|
+
|
18
|
+
expect(set).to be_a(RE2::Set)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "returns an instance given only an anchor of :anchor_both" do
|
22
|
+
set = RE2::Set.new(:anchor_both)
|
23
|
+
|
24
|
+
expect(set).to be_a(RE2::Set)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns an instance given an anchor and options" do
|
28
|
+
set = RE2::Set.new(:unanchored, :case_sensitive => false)
|
29
|
+
|
30
|
+
expect(set).to be_a(RE2::Set)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "raises an error if given an inappropriate type" do
|
34
|
+
expect { RE2::Set.new(0) }.to raise_error(TypeError)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "raises an error if given an invalid anchor" do
|
38
|
+
expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error(
|
39
|
+
ArgumentError,
|
40
|
+
"anchor should be one of: :unanchored, :anchor_start, :anchor_both"
|
41
|
+
)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "#add" do
|
46
|
+
it "allows multiple patterns to be added", :aggregate_failures do
|
47
|
+
set = RE2::Set.new
|
48
|
+
|
49
|
+
expect(set.add("abc")).to eq(0)
|
50
|
+
expect(set.add("def")).to eq(1)
|
51
|
+
expect(set.add("ghi")).to eq(2)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "rejects invalid patterns when added" do
|
55
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
56
|
+
|
57
|
+
expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add()/)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "raises an error if called after #compile" do
|
61
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
62
|
+
set.add("abc")
|
63
|
+
set.compile
|
64
|
+
|
65
|
+
silence_stderr do
|
66
|
+
expect { set.add("def") }.to raise_error(ArgumentError)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
it "raises an error if given a non-string pattern" do
|
71
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
72
|
+
|
73
|
+
expect { set.add(0) }.to raise_error(TypeError)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "#compile" do
|
78
|
+
it "compiles the set without error" do
|
79
|
+
set = RE2::Set.new
|
80
|
+
set.add("abc")
|
81
|
+
set.add("def")
|
82
|
+
set.add("ghi")
|
83
|
+
|
84
|
+
expect(set.compile).to be_truthy
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "#match" do
|
89
|
+
it "matches against multiple patterns" do
|
90
|
+
set = RE2::Set.new
|
91
|
+
set.add("abc")
|
92
|
+
set.add("def")
|
93
|
+
set.add("ghi")
|
94
|
+
set.compile
|
95
|
+
|
96
|
+
expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
|
97
|
+
end
|
98
|
+
|
99
|
+
it "raises an error if called before #compile by default" do
|
100
|
+
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
|
101
|
+
|
102
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
103
|
+
|
104
|
+
silence_stderr do
|
105
|
+
expect { set.match("") }.to raise_error(RE2::Set::MatchError)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
it "raises an error if called before #compile when :exception is true" do
|
110
|
+
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
|
111
|
+
|
112
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
113
|
+
|
114
|
+
silence_stderr do
|
115
|
+
expect { set.match("", :exception => true) }.to raise_error(RE2::Set::MatchError)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
it "returns an empty array if called before #compile when :exception is false" do
|
120
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
121
|
+
|
122
|
+
silence_stderr do
|
123
|
+
expect(set.match("", :exception => false)).to be_empty
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "raises an error if :exception is true and re2 does not support it" do
|
128
|
+
skip "Underlying RE2::Set::Match outputs error information" if RE2::Set.match_raises_errors?
|
129
|
+
|
130
|
+
set = RE2::Set.new(:unanchored, :log_errors => false)
|
131
|
+
|
132
|
+
silence_stderr do
|
133
|
+
expect { set.match("", :exception => true) }.to raise_error(RE2::Set::UnsupportedError)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
it "raises an error if given non-hash options" do
|
138
|
+
set = RE2::Set.new
|
139
|
+
|
140
|
+
expect { set.match("", 0) }.to raise_error(TypeError)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def silence_stderr
|
145
|
+
original_stream = STDERR
|
146
|
+
|
147
|
+
if File.const_defined?(:NULL)
|
148
|
+
STDERR.reopen(File::NULL)
|
149
|
+
else
|
150
|
+
platform = RUBY_PLATFORM == 'java' ? RbConfig::CONFIG['host_os'] : RUBY_PLATFORM
|
151
|
+
|
152
|
+
case platform
|
153
|
+
when /mswin|mingw/i
|
154
|
+
STDERR.reopen('NUL')
|
155
|
+
when /amiga/i
|
156
|
+
STDERR.reopen('NIL')
|
157
|
+
when /openvms/i
|
158
|
+
STDERR.reopen('NL:')
|
159
|
+
else
|
160
|
+
STDERR.reopen('/dev/null')
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
yield
|
165
|
+
ensure
|
166
|
+
STDERR.reopen(original_stream)
|
167
|
+
end
|
168
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -57,6 +57,7 @@ files:
|
|
57
57
|
- spec/re2/match_data_spec.rb
|
58
58
|
- spec/re2/regexp_spec.rb
|
59
59
|
- spec/re2/scanner_spec.rb
|
60
|
+
- spec/re2/set_spec.rb
|
60
61
|
- spec/re2/string_spec.rb
|
61
62
|
- spec/re2_spec.rb
|
62
63
|
- spec/spec_helper.rb
|
@@ -79,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
80
|
- !ruby/object:Gem::Version
|
80
81
|
version: '0'
|
81
82
|
requirements: []
|
82
|
-
rubygems_version: 3.
|
83
|
+
rubygems_version: 3.3.7
|
83
84
|
signing_key:
|
84
85
|
specification_version: 4
|
85
86
|
summary: Ruby bindings to re2.
|
@@ -90,4 +91,5 @@ test_files:
|
|
90
91
|
- spec/re2/regexp_spec.rb
|
91
92
|
- spec/re2/match_data_spec.rb
|
92
93
|
- spec/re2/string_spec.rb
|
94
|
+
- spec/re2/set_spec.rb
|
93
95
|
- spec/re2/scanner_spec.rb
|