re2 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b7a33f3c27224496ec131c261c7c66172f21474bce66cdf1cc1631c83040b3ca
4
- data.tar.gz: 4d4a5448aa271bbce4cf37ffadfaadfa0797691fad5cc13f2f722e281116d66f
3
+ metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
4
+ data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
5
5
  SHA512:
6
- metadata.gz: f080eccb20bb599b374485981f4c23fd8a49e84903ebe32e99e00b458c8b6334430cef41cbb1227fcba057f81d2fed4ead67648e506348fa6c2f975b7e6fea7f
7
- data.tar.gz: bbd0ca07b287ca7e212175b4f5eaafd2698b7be94aefa432e81c4ff108b4b4c660e47d03d48d6affb82c0cee5f98fcb37bdcd97651bf309cebb148ac5e654518
6
+ metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
7
+ data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
data/README.md CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 1.3.0
7
+ **Current version:** 1.5.0
8
8
  **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
9
9
  **Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
10
10
 
@@ -131,6 +131,22 @@ enum.next #=> ["It"]
131
131
  enum.next #=> ["is"]
132
132
  ```
133
133
 
134
+ As of 1.5.0, you can use `RE2::Set` to match multiple patterns against a
135
+ string. Calling `RE2::Set#add` with a pattern will return an integer index of
136
+ the pattern. After all patterns have been added, the set can be compiled using
137
+ `RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
138
+ containing the indices of all the patterns that matched.
139
+
140
+ ``` ruby
141
+ set = RE2::Set.new
142
+ set.add("abc") #=> 0
143
+ set.add("def") #=> 1
144
+ set.add("ghi") #=> 2
145
+ set.compile #=> true
146
+ set.match("abcdefghi") #=> [0, 1, 2]
147
+ set.match("ghidefabc") #=> [2, 1, 0]
148
+ ```
149
+
134
150
  Features
135
151
  --------
136
152
 
@@ -149,6 +165,8 @@ Features
149
165
 
150
166
  * Incrementally scanning text with `re2.scan(text)`
151
167
 
168
+ * Search a collection of patterns simultaneously with `RE2::Set`
169
+
152
170
  * Checking regular expression compilation with `re2.ok?`, `re2.error` and
153
171
  `re2.error_arg`
154
172
 
@@ -175,7 +193,11 @@ Contributions
175
193
  * Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support;
176
194
  * Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan`;
177
195
  * Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
178
- the deprecation and removal of the `utf8` encoding option in re2.
196
+ the deprecation and removal of the `utf8` encoding option in re2;
197
+ * Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
198
+ using `RE2::Scanner#scan` with an invalid regular expression;
199
+ * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
200
+ initial support for `RE2::Set`.
179
201
 
180
202
  Contact
181
203
  -------
data/ext/re2/extconf.rb CHANGED
@@ -88,4 +88,28 @@ SRC
88
88
  end
89
89
  end
90
90
 
91
+ checking_for("RE2::Set::Match() with error information") do
92
+ test_re2_set_match_signature = <<SRC
93
+ #include <vector>
94
+ #include <re2/re2.h>
95
+ #include <re2/set.h>
96
+
97
+ int main() {
98
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
99
+ s.Add("foo", NULL);
100
+ s.Compile();
101
+
102
+ std::vector<int> v;
103
+ RE2::Set::ErrorInfo ei;
104
+ s.Match("foo", &v, &ei);
105
+
106
+ return 0;
107
+ }
108
+ SRC
109
+
110
+ if try_compile(test_re2_set_match_signature, compile_options)
111
+ $defs.push("-DHAVE_ERROR_INFO_ARGUMENT")
112
+ end
113
+ end
114
+
91
115
  create_makefile("re2")
data/ext/re2/re2.cc CHANGED
@@ -8,6 +8,7 @@
8
8
 
9
9
  #include <ruby.h>
10
10
  #include <re2/re2.h>
11
+ #include <re2/set.h>
11
12
  #include <stdint.h>
12
13
  #include <string>
13
14
  #include <sstream>
@@ -93,12 +94,82 @@ typedef struct {
93
94
  VALUE regexp, text;
94
95
  } re2_scanner;
95
96
 
96
- VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
97
+ typedef struct {
98
+ RE2::Set *set;
99
+ } re2_set;
100
+
101
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
102
+ re2_eSetMatchError, re2_eSetUnsupportedError;
97
103
 
98
104
  /* Symbols used in RE2 options. */
99
105
  static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
100
106
  id_max_mem, id_literal, id_never_nl, id_case_sensitive,
101
- id_perl_classes, id_word_boundary, id_one_line;
107
+ id_perl_classes, id_word_boundary, id_one_line,
108
+ id_unanchored, id_anchor_start, id_anchor_both, id_exception;
109
+
110
+ void parse_re2_options(RE2::Options& re2_options, VALUE options) {
111
+ if (TYPE(options) != T_HASH) {
112
+ rb_raise(rb_eArgError, "options should be a hash");
113
+ }
114
+ VALUE utf8, posix_syntax, longest_match, log_errors,
115
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
116
+ word_boundary, one_line;
117
+
118
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
119
+ if (!NIL_P(utf8)) {
120
+ re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
121
+ }
122
+
123
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
124
+ if (!NIL_P(posix_syntax)) {
125
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
126
+ }
127
+
128
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
129
+ if (!NIL_P(longest_match)) {
130
+ re2_options.set_longest_match(RTEST(longest_match));
131
+ }
132
+
133
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
134
+ if (!NIL_P(log_errors)) {
135
+ re2_options.set_log_errors(RTEST(log_errors));
136
+ }
137
+
138
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
139
+ if (!NIL_P(max_mem)) {
140
+ re2_options.set_max_mem(NUM2INT(max_mem));
141
+ }
142
+
143
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
144
+ if (!NIL_P(literal)) {
145
+ re2_options.set_literal(RTEST(literal));
146
+ }
147
+
148
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
149
+ if (!NIL_P(never_nl)) {
150
+ re2_options.set_never_nl(RTEST(never_nl));
151
+ }
152
+
153
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
154
+ if (!NIL_P(case_sensitive)) {
155
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
156
+ }
157
+
158
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
159
+ if (!NIL_P(perl_classes)) {
160
+ re2_options.set_perl_classes(RTEST(perl_classes));
161
+ }
162
+
163
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
164
+ if (!NIL_P(word_boundary)) {
165
+ re2_options.set_word_boundary(RTEST(word_boundary));
166
+ }
167
+
168
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
169
+ if (!NIL_P(one_line)) {
170
+ re2_options.set_one_line(RTEST(one_line));
171
+ }
172
+ }
102
173
 
103
174
  void re2_matchdata_mark(re2_matchdata* self) {
104
175
  rb_gc_mark(self->regexp);
@@ -667,75 +738,15 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
667
738
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
668
739
  */
669
740
  static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
670
- VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
671
- max_mem, literal, never_nl, case_sensitive, perl_classes,
672
- word_boundary, one_line;
741
+ VALUE pattern, options;
673
742
  re2_pattern *p;
674
743
 
675
744
  rb_scan_args(argc, argv, "11", &pattern, &options);
676
745
  Data_Get_Struct(self, re2_pattern, p);
677
746
 
678
747
  if (RTEST(options)) {
679
- if (TYPE(options) != T_HASH) {
680
- rb_raise(rb_eArgError, "options should be a hash");
681
- }
682
-
683
748
  RE2::Options re2_options;
684
-
685
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
686
- if (!NIL_P(utf8)) {
687
- re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
688
- }
689
-
690
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
691
- if (!NIL_P(posix_syntax)) {
692
- re2_options.set_posix_syntax(RTEST(posix_syntax));
693
- }
694
-
695
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
696
- if (!NIL_P(longest_match)) {
697
- re2_options.set_longest_match(RTEST(longest_match));
698
- }
699
-
700
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
701
- if (!NIL_P(log_errors)) {
702
- re2_options.set_log_errors(RTEST(log_errors));
703
- }
704
-
705
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
706
- if (!NIL_P(max_mem)) {
707
- re2_options.set_max_mem(NUM2INT(max_mem));
708
- }
709
-
710
- literal = rb_hash_aref(options, ID2SYM(id_literal));
711
- if (!NIL_P(literal)) {
712
- re2_options.set_literal(RTEST(literal));
713
- }
714
-
715
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
716
- if (!NIL_P(never_nl)) {
717
- re2_options.set_never_nl(RTEST(never_nl));
718
- }
719
-
720
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
721
- if (!NIL_P(case_sensitive)) {
722
- re2_options.set_case_sensitive(RTEST(case_sensitive));
723
- }
724
-
725
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
726
- if (!NIL_P(perl_classes)) {
727
- re2_options.set_perl_classes(RTEST(perl_classes));
728
- }
729
-
730
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
731
- if (!NIL_P(word_boundary)) {
732
- re2_options.set_word_boundary(RTEST(word_boundary));
733
- }
734
-
735
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
736
- if (!NIL_P(one_line)) {
737
- re2_options.set_one_line(RTEST(one_line));
738
- }
749
+ parse_re2_options(re2_options, options);
739
750
 
740
751
  p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
741
752
  } else {
@@ -1158,6 +1169,7 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1158
1169
  * @param [String] text the text to search
1159
1170
  * @param [Fixnum] number_of_matches the number of matches to return
1160
1171
  * @return [RE2::MatchData] the matches
1172
+ * @raise [ArgumentError] if given a negative number of matches
1161
1173
  * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1162
1174
  * @example
1163
1175
  * r = RE2::Regexp.new('w(o)(o)')
@@ -1180,7 +1192,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1180
1192
 
1181
1193
  if (RTEST(number_of_matches)) {
1182
1194
  n = NUM2INT(number_of_matches);
1195
+
1196
+ if (n < 0) {
1197
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1198
+ }
1183
1199
  } else {
1200
+ if (!p->pattern->ok()) {
1201
+ return Qnil;
1202
+ }
1203
+
1184
1204
  n = p->pattern->NumberOfCapturingGroups();
1185
1205
  }
1186
1206
 
@@ -1251,7 +1271,13 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1251
1271
  c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1252
1272
  c->regexp = self;
1253
1273
  c->text = text;
1254
- c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1274
+
1275
+ if (p->pattern->ok()) {
1276
+ c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1277
+ } else {
1278
+ c->number_of_capturing_groups = 0;
1279
+ }
1280
+
1255
1281
  c->eof = false;
1256
1282
 
1257
1283
  return scanner;
@@ -1347,6 +1373,234 @@ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1347
1373
  return rb_str_new(quoted_string.data(), quoted_string.size());
1348
1374
  }
1349
1375
 
1376
+ void re2_set_free(re2_set *self) {
1377
+ if (self->set) {
1378
+ delete self->set;
1379
+ }
1380
+ free(self);
1381
+ }
1382
+
1383
+ static VALUE re2_set_allocate(VALUE klass) {
1384
+ re2_set *s;
1385
+ VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1386
+ return result;
1387
+ }
1388
+
1389
+ /*
1390
+ * Returns a new {RE2::Set} object, a collection of patterns that can be
1391
+ * searched for simultaneously.
1392
+ *
1393
+ * @return [RE2::Set]
1394
+ *
1395
+ * @overload initialize
1396
+ * Returns a new {RE2::Set} object for unanchored patterns with the default
1397
+ * options.
1398
+ *
1399
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1400
+ * @return [RE2::Set]
1401
+ *
1402
+ * @overload initialize(anchor)
1403
+ * Returns a new {RE2::Set} object for the specified anchor with the default
1404
+ * options.
1405
+ *
1406
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1407
+ * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1408
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1409
+ *
1410
+ * @overload initialize(anchor, options)
1411
+ * Returns a new {RE2::Set} object with the specified options.
1412
+ *
1413
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1414
+ * @param [Hash] options the options with which to compile the pattern
1415
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1416
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
1417
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
1418
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1419
+ * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
1420
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1421
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1422
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1423
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1424
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1425
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1426
+ * @return [RE2::Set] an RE2::Set with the specified anchor and options
1427
+ * @raise [ArgumentError] if anchor is not one of the accepted choices
1428
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1429
+ */
1430
+ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1431
+ VALUE anchor, options;
1432
+ re2_set *s;
1433
+ RE2::Anchor re2_anchor;
1434
+ RE2::Options re2_options;
1435
+
1436
+ rb_scan_args(argc, argv, "02", &anchor, &options);
1437
+ Data_Get_Struct(self, re2_set, s);
1438
+
1439
+ if (RTEST(options)) {
1440
+ parse_re2_options(re2_options, options);
1441
+ }
1442
+ if (NIL_P(anchor)) {
1443
+ re2_anchor = RE2::UNANCHORED;
1444
+ } else {
1445
+ Check_Type(anchor, T_SYMBOL);
1446
+ ID id_anchor = SYM2ID(anchor);
1447
+ if (id_anchor == id_unanchored) {
1448
+ re2_anchor = RE2::UNANCHORED;
1449
+ } else if (id_anchor == id_anchor_start) {
1450
+ re2_anchor = RE2::ANCHOR_START;
1451
+ } else if (id_anchor == id_anchor_both) {
1452
+ re2_anchor = RE2::ANCHOR_BOTH;
1453
+ } else {
1454
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1455
+ }
1456
+ }
1457
+
1458
+ s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1459
+ if (s->set == 0) {
1460
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1461
+ }
1462
+
1463
+ return self;
1464
+ }
1465
+
1466
+ /*
1467
+ * Adds a pattern to the set. Returns the index that will identify the pattern
1468
+ * in the output of #match. Cannot be called after #compile has been called.
1469
+ *
1470
+ * @param [String] pattern the regex pattern
1471
+ * @return [Integer] the index of the pattern in the set
1472
+ * @raise [ArgumentError] if called after compile or the pattern is rejected
1473
+ * @example
1474
+ * set = RE2::Set.new
1475
+ * set.add("abc") #=> 0
1476
+ * set.add("def") #=> 1
1477
+ */
1478
+ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1479
+ Check_Type(pattern, T_STRING);
1480
+ re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
1481
+ std::string err;
1482
+ re2_set *s;
1483
+ Data_Get_Struct(self, re2_set, s);
1484
+ int index = s->set->Add(regex, &err);
1485
+ if (index < 0) {
1486
+ rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
1487
+ }
1488
+
1489
+ return INT2FIX(index);
1490
+ }
1491
+
1492
+ /*
1493
+ * Compiles a Set so it can be used to match against. Must be called after #add
1494
+ * and before #match.
1495
+ *
1496
+ * @return [Bool] whether compilation was a success
1497
+ * @example
1498
+ * set = RE2::Set.new
1499
+ * set.add("abc")
1500
+ * set.compile # => true
1501
+ */
1502
+ static VALUE re2_set_compile(VALUE self) {
1503
+ re2_set *s;
1504
+ Data_Get_Struct(self, re2_set, s);
1505
+
1506
+ return BOOL2RUBY(s->set->Compile());
1507
+ }
1508
+
1509
+ /*
1510
+ * Returns whether the underlying re2 version outputs error information from
1511
+ * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1512
+ * :exception option to true.
1513
+ *
1514
+ * @return [Bool] whether the underlying re2 outputs error information from Set matches
1515
+ */
1516
+ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1517
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1518
+ return Qtrue;
1519
+ #else
1520
+ return Qfalse;
1521
+ #endif
1522
+ }
1523
+
1524
+ /*
1525
+ * Matches the given text against patterns in the set, returning an array of
1526
+ * integer indices of the matching patterns if matched or an empty array if
1527
+ * there are no matches.
1528
+ *
1529
+ * @param [String] str the text to match against
1530
+ * @param [Hash] options the options with which to match
1531
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1532
+ * @return [Array<Integer>] the indices of matching regexps
1533
+ * @raise [MatchError] if an error occurs while matching
1534
+ * @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
1535
+ * @example
1536
+ * set = RE2::Set.new
1537
+ * set.add("abc")
1538
+ * set.add("def")
1539
+ * set.compile
1540
+ * set.match("abcdef") # => [0, 1]
1541
+ */
1542
+ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1543
+ VALUE str, options, exception_option;
1544
+ bool raise_exception = true;
1545
+ rb_scan_args(argc, argv, "11", &str, &options);
1546
+ Check_Type(str, T_STRING);
1547
+ re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1548
+ std::vector<int> v;
1549
+ re2_set *s;
1550
+ Data_Get_Struct(self, re2_set, s);
1551
+
1552
+ if (RTEST(options)) {
1553
+ Check_Type(options, T_HASH);
1554
+
1555
+ exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1556
+ if (!NIL_P(exception_option)) {
1557
+ raise_exception = RTEST(exception_option);
1558
+ }
1559
+ }
1560
+
1561
+ if (raise_exception) {
1562
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1563
+ RE2::Set::ErrorInfo e;
1564
+ bool match_failed = !s->set->Match(data, &v, &e);
1565
+ VALUE result = rb_ary_new2(v.size());
1566
+
1567
+ if (match_failed) {
1568
+ switch (e.kind) {
1569
+ case RE2::Set::kNoError:
1570
+ break;
1571
+ case RE2::Set::kNotCompiled:
1572
+ rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
1573
+ case RE2::Set::kOutOfMemory:
1574
+ rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
1575
+ case RE2::Set::kInconsistent:
1576
+ rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
1577
+ default: // Just in case a future version of libre2 adds new ErrorKinds
1578
+ rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1579
+ }
1580
+ } else {
1581
+ for (size_t i = 0; i < v.size(); i++) {
1582
+ rb_ary_push(result, INT2FIX(v[i]));
1583
+ }
1584
+ }
1585
+
1586
+ return result;
1587
+ #else
1588
+ rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
1589
+ #endif
1590
+ } else {
1591
+ bool matched = s->set->Match(data, &v);
1592
+ VALUE result = rb_ary_new2(v.size());
1593
+
1594
+ if (matched) {
1595
+ for (size_t i = 0; i < v.size(); i++) {
1596
+ rb_ary_push(result, INT2FIX(v[i]));
1597
+ }
1598
+ }
1599
+
1600
+ return result;
1601
+ }
1602
+ }
1603
+
1350
1604
  /* Forward declare Init_re2 to be called by C code but define it separately so
1351
1605
  * that YARD can parse it.
1352
1606
  */
@@ -1357,12 +1611,18 @@ void Init_re2(void) {
1357
1611
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1358
1612
  re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1359
1613
  re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1614
+ re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
1615
+ re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
1616
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1617
+ re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
1618
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1360
1619
 
1361
1620
  rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1362
1621
  rb_define_alloc_func(re2_cMatchData,
1363
1622
  (VALUE (*)(VALUE))re2_matchdata_allocate);
1364
1623
  rb_define_alloc_func(re2_cScanner,
1365
1624
  (VALUE (*)(VALUE))re2_scanner_allocate);
1625
+ rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
1366
1626
 
1367
1627
  rb_define_method(re2_cMatchData, "string",
1368
1628
  RUBY_METHOD_FUNC(re2_matchdata_string), 0);
@@ -1379,7 +1639,8 @@ void Init_re2(void) {
1379
1639
  rb_define_method(re2_cMatchData, "end",
1380
1640
  RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1381
1641
  rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1382
- -1); rb_define_method(re2_cMatchData, "to_s",
1642
+ -1);
1643
+ rb_define_method(re2_cMatchData, "to_s",
1383
1644
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1384
1645
  rb_define_method(re2_cMatchData, "inspect",
1385
1646
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
@@ -1456,6 +1717,14 @@ void Init_re2(void) {
1456
1717
  rb_define_method(re2_cRegexp, "one_line?",
1457
1718
  RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1458
1719
 
1720
+ rb_define_singleton_method(re2_cSet, "match_raises_errors?",
1721
+ RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
1722
+ rb_define_method(re2_cSet, "initialize",
1723
+ RUBY_METHOD_FUNC(re2_set_initialize), -1);
1724
+ rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
1725
+ rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
1726
+ rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
1727
+
1459
1728
  rb_define_module_function(re2_mRE2, "Replace",
1460
1729
  RUBY_METHOD_FUNC(re2_Replace), 3);
1461
1730
  rb_define_module_function(re2_mRE2, "GlobalReplace",
@@ -1483,6 +1752,10 @@ void Init_re2(void) {
1483
1752
  id_perl_classes = rb_intern("perl_classes");
1484
1753
  id_word_boundary = rb_intern("word_boundary");
1485
1754
  id_one_line = rb_intern("one_line");
1755
+ id_unanchored = rb_intern("unanchored");
1756
+ id_anchor_start = rb_intern("anchor_start");
1757
+ id_anchor_both = rb_intern("anchor_both");
1758
+ id_exception = rb_intern("exception");
1486
1759
 
1487
1760
  #if 0
1488
1761
  /* Fake so YARD generates the file. */
@@ -13,6 +13,11 @@ RSpec.describe RE2::Regexp do
13
13
  it "raises an error if given an inappropriate type" do
14
14
  expect { RE2::Regexp.new(nil) }.to raise_error(TypeError)
15
15
  end
16
+
17
+ it "allows invalid patterns to be created" do
18
+ re = RE2::Regexp.new('???', :log_errors => false)
19
+ expect(re).to be_a(RE2::Regexp)
20
+ end
16
21
  end
17
22
 
18
23
  describe "#compile" do
@@ -25,6 +30,11 @@ RSpec.describe RE2::Regexp do
25
30
  re = RE2::Regexp.compile('woo', :case_sensitive => false)
26
31
  expect(re).to be_a(RE2::Regexp)
27
32
  end
33
+
34
+ it "allows invalid patterns to be created" do
35
+ re = RE2::Regexp.compile('???', :log_errors => false)
36
+ expect(re).to be_a(RE2::Regexp)
37
+ end
28
38
  end
29
39
 
30
40
  describe "#options" do
@@ -83,6 +93,11 @@ RSpec.describe RE2::Regexp do
83
93
  program_size = RE2::Regexp.new('w(o)(o)').program_size
84
94
  expect(program_size).to be_a(Fixnum)
85
95
  end
96
+
97
+ it "returns -1 for an invalid pattern" do
98
+ program_size = RE2::Regexp.new('???', :log_errors => false).program_size
99
+ expect(program_size).to eq(-1)
100
+ end
86
101
  end
87
102
 
88
103
  describe "#to_str" do
@@ -97,6 +112,11 @@ RSpec.describe RE2::Regexp do
97
112
  pattern = RE2::Regexp.new('w(o)(o)').pattern
98
113
  expect(pattern).to eq("w(o)(o)")
99
114
  end
115
+
116
+ it "returns the pattern even if invalid" do
117
+ pattern = RE2::Regexp.new('???', :log_errors => false).pattern
118
+ expect(pattern).to eq("???")
119
+ end
100
120
  end
101
121
 
102
122
  describe "#inspect" do
@@ -274,6 +294,15 @@ RSpec.describe RE2::Regexp do
274
294
  expect { re.match("My name is Robert Paulson", {}) }.to raise_error(TypeError)
275
295
  end
276
296
 
297
+ it "raises an exception when given a negative number of matches" do
298
+ expect { re.match("My name is Robert Paulson", -1) }.to raise_error(ArgumentError, "number of matches should be >= 0")
299
+ end
300
+
301
+ it "returns nil with an invalid pattern" do
302
+ re = RE2::Regexp.new('???', :log_errors => false)
303
+ expect(re.match('My name is Robert Paulson')).to be_nil
304
+ end
305
+
277
306
  describe "with a specific number of matches under the total in the pattern" do
278
307
  subject { re.match("My name is Robert Paulson", 1) }
279
308
 
@@ -325,6 +354,11 @@ RSpec.describe RE2::Regexp do
325
354
  expect(re.match?("My name is Robert Paulson")).to eq(true)
326
355
  expect(re.match?("My age is 99")).to eq(false)
327
356
  end
357
+
358
+ it "returns false if the pattern is invalid" do
359
+ re = RE2::Regexp.new('???', :log_errors => false)
360
+ expect(re.match?("My name is Robert Paulson")).to eq(false)
361
+ end
328
362
  end
329
363
 
330
364
  describe "#=~" do
@@ -365,13 +399,13 @@ RSpec.describe RE2::Regexp do
365
399
  end
366
400
  end
367
401
 
368
- describe "#escape" do
402
+ describe ".escape" do
369
403
  it "transforms a string into a regexp" do
370
404
  expect(RE2::Regexp.escape("1.5-2.0?")).to eq('1\.5\-2\.0\?')
371
405
  end
372
406
  end
373
407
 
374
- describe "#quote" do
408
+ describe ".quote" do
375
409
  it "transforms a string into a regexp" do
376
410
  expect(RE2::Regexp.quote("1.5-2.0?")).to eq('1\.5\-2\.0\?')
377
411
  end
@@ -383,6 +417,10 @@ RSpec.describe RE2::Regexp do
383
417
  expect(RE2::Regexp.new('abc').number_of_capturing_groups).to eq(0)
384
418
  expect(RE2::Regexp.new('a((b)c)').number_of_capturing_groups).to eq(2)
385
419
  end
420
+
421
+ it "returns -1 for an invalid regexp" do
422
+ expect(RE2::Regexp.new('???', :log_errors => false).number_of_capturing_groups).to eq(-1)
423
+ end
386
424
  end
387
425
 
388
426
  describe "#named_capturing_groups" do
@@ -400,6 +438,10 @@ RSpec.describe RE2::Regexp do
400
438
  expect(groups["bob"]).to eq(1)
401
439
  expect(groups["rob"]).to eq(3)
402
440
  end
441
+
442
+ it "returns an empty hash for an invalid regexp" do
443
+ expect(RE2::Regexp.new('???', :log_errors => false).named_capturing_groups).to be_empty
444
+ end
403
445
  end
404
446
 
405
447
  describe "#scan" do
@@ -45,6 +45,12 @@ RSpec.describe RE2::Scanner do
45
45
  expect(scanner.scan).to be_nil
46
46
  end
47
47
 
48
+ it "returns nil if the regexp is invalid" do
49
+ r = RE2::Regexp.new('???', :log_errors => false)
50
+ scanner = r.scan("Foo bar")
51
+ expect(scanner.scan).to be_nil
52
+ end
53
+
48
54
  it "returns an empty array if the input is empty" do
49
55
  r = RE2::Regexp.new("")
50
56
  scanner = r.scan("")
@@ -0,0 +1,168 @@
1
+ RSpec.describe RE2::Set do
2
+ describe "#initialize" do
3
+ it "returns an instance given no args" do
4
+ set = RE2::Set.new
5
+
6
+ expect(set).to be_a(RE2::Set)
7
+ end
8
+
9
+ it "returns an instance given only an anchor of :unanchored" do
10
+ set = RE2::Set.new(:unanchored)
11
+
12
+ expect(set).to be_a(RE2::Set)
13
+ end
14
+
15
+ it "returns an instance given only an anchor of :anchor_start" do
16
+ set = RE2::Set.new(:anchor_start)
17
+
18
+ expect(set).to be_a(RE2::Set)
19
+ end
20
+
21
+ it "returns an instance given only an anchor of :anchor_both" do
22
+ set = RE2::Set.new(:anchor_both)
23
+
24
+ expect(set).to be_a(RE2::Set)
25
+ end
26
+
27
+ it "returns an instance given an anchor and options" do
28
+ set = RE2::Set.new(:unanchored, :case_sensitive => false)
29
+
30
+ expect(set).to be_a(RE2::Set)
31
+ end
32
+
33
+ it "raises an error if given an inappropriate type" do
34
+ expect { RE2::Set.new(0) }.to raise_error(TypeError)
35
+ end
36
+
37
+ it "raises an error if given an invalid anchor" do
38
+ expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error(
39
+ ArgumentError,
40
+ "anchor should be one of: :unanchored, :anchor_start, :anchor_both"
41
+ )
42
+ end
43
+ end
44
+
45
+ describe "#add" do
46
+ it "allows multiple patterns to be added", :aggregate_failures do
47
+ set = RE2::Set.new
48
+
49
+ expect(set.add("abc")).to eq(0)
50
+ expect(set.add("def")).to eq(1)
51
+ expect(set.add("ghi")).to eq(2)
52
+ end
53
+
54
+ it "rejects invalid patterns when added" do
55
+ set = RE2::Set.new(:unanchored, :log_errors => false)
56
+
57
+ expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add()/)
58
+ end
59
+
60
+ it "raises an error if called after #compile" do
61
+ set = RE2::Set.new(:unanchored, :log_errors => false)
62
+ set.add("abc")
63
+ set.compile
64
+
65
+ silence_stderr do
66
+ expect { set.add("def") }.to raise_error(ArgumentError)
67
+ end
68
+ end
69
+
70
+ it "raises an error if given a non-string pattern" do
71
+ set = RE2::Set.new(:unanchored, :log_errors => false)
72
+
73
+ expect { set.add(0) }.to raise_error(TypeError)
74
+ end
75
+ end
76
+
77
+ describe "#compile" do
78
+ it "compiles the set without error" do
79
+ set = RE2::Set.new
80
+ set.add("abc")
81
+ set.add("def")
82
+ set.add("ghi")
83
+
84
+ expect(set.compile).to be_truthy
85
+ end
86
+ end
87
+
88
+ describe "#match" do
89
+ it "matches against multiple patterns" do
90
+ set = RE2::Set.new
91
+ set.add("abc")
92
+ set.add("def")
93
+ set.add("ghi")
94
+ set.compile
95
+
96
+ expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
97
+ end
98
+
99
+ it "raises an error if called before #compile by default" do
100
+ skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
101
+
102
+ set = RE2::Set.new(:unanchored, :log_errors => false)
103
+
104
+ silence_stderr do
105
+ expect { set.match("") }.to raise_error(RE2::Set::MatchError)
106
+ end
107
+ end
108
+
109
+ it "raises an error if called before #compile when :exception is true" do
110
+ skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
111
+
112
+ set = RE2::Set.new(:unanchored, :log_errors => false)
113
+
114
+ silence_stderr do
115
+ expect { set.match("", :exception => true) }.to raise_error(RE2::Set::MatchError)
116
+ end
117
+ end
118
+
119
+ it "returns an empty array if called before #compile when :exception is false" do
120
+ set = RE2::Set.new(:unanchored, :log_errors => false)
121
+
122
+ silence_stderr do
123
+ expect(set.match("", :exception => false)).to be_empty
124
+ end
125
+ end
126
+
127
+ it "raises an error if :exception is true and re2 does not support it" do
128
+ skip "Underlying RE2::Set::Match outputs error information" if RE2::Set.match_raises_errors?
129
+
130
+ set = RE2::Set.new(:unanchored, :log_errors => false)
131
+
132
+ silence_stderr do
133
+ expect { set.match("", :exception => true) }.to raise_error(RE2::Set::UnsupportedError)
134
+ end
135
+ end
136
+
137
+ it "raises an error if given non-hash options" do
138
+ set = RE2::Set.new
139
+
140
+ expect { set.match("", 0) }.to raise_error(TypeError)
141
+ end
142
+ end
143
+
144
+ def silence_stderr
145
+ original_stream = STDERR
146
+
147
+ if File.const_defined?(:NULL)
148
+ STDERR.reopen(File::NULL)
149
+ else
150
+ platform = RUBY_PLATFORM == 'java' ? RbConfig::CONFIG['host_os'] : RUBY_PLATFORM
151
+
152
+ case platform
153
+ when /mswin|mingw/i
154
+ STDERR.reopen('NUL')
155
+ when /amiga/i
156
+ STDERR.reopen('NIL')
157
+ when /openvms/i
158
+ STDERR.reopen('NL:')
159
+ else
160
+ STDERR.reopen('/dev/null')
161
+ end
162
+ end
163
+
164
+ yield
165
+ ensure
166
+ STDERR.reopen(original_stream)
167
+ end
168
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2022-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -57,6 +57,7 @@ files:
57
57
  - spec/re2/match_data_spec.rb
58
58
  - spec/re2/regexp_spec.rb
59
59
  - spec/re2/scanner_spec.rb
60
+ - spec/re2/set_spec.rb
60
61
  - spec/re2/string_spec.rb
61
62
  - spec/re2_spec.rb
62
63
  - spec/spec_helper.rb
@@ -79,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
80
  - !ruby/object:Gem::Version
80
81
  version: '0'
81
82
  requirements: []
82
- rubygems_version: 3.1.4
83
+ rubygems_version: 3.3.7
83
84
  signing_key:
84
85
  specification_version: 4
85
86
  summary: Ruby bindings to re2.
@@ -90,4 +91,5 @@ test_files:
90
91
  - spec/re2/regexp_spec.rb
91
92
  - spec/re2/match_data_spec.rb
92
93
  - spec/re2/string_spec.rb
94
+ - spec/re2/set_spec.rb
93
95
  - spec/re2/scanner_spec.rb