re2 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce0e303b87738a767776165216cbbd8cf0f63ec9dba3ef4389f657cbb5da8dc3
4
- data.tar.gz: 61983a9e93dc64334d43a41f3f978ff71de020bd9d6f27bc409431a313ea58e0
3
+ metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
4
+ data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
5
5
  SHA512:
6
- metadata.gz: 229f667e12094ae2d42ae3d72a08aa0567cf17e1d666d9677b6b0bdfccc549f241870085ac918789fbbcd0fcb538a942a31f6922f32d88cc7b54043b553f35e3
7
- data.tar.gz: 850d6dc79bcfbfe96a913ac93ddc2c8d4e1b79ddd8aa50eb951bfaae99e3a1688ff31fd2e55c2e497946b9ea4913bcca4938070b6fa37f2ca77d88721652ca66
6
+ metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
7
+ data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
data/README.md CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 1.4.0
7
+ **Current version:** 1.5.0
8
8
  **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
9
9
  **Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
10
10
 
@@ -131,6 +131,22 @@ enum.next #=> ["It"]
131
131
  enum.next #=> ["is"]
132
132
  ```
133
133
 
134
+ As of 1.5.0, you can use `RE2::Set` to match multiple patterns against a
135
+ string. Calling `RE2::Set#add` with a pattern will return an integer index of
136
+ the pattern. After all patterns have been added, the set can be compiled using
137
+ `RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
138
+ containing the indices of all the patterns that matched.
139
+
140
+ ``` ruby
141
+ set = RE2::Set.new
142
+ set.add("abc") #=> 0
143
+ set.add("def") #=> 1
144
+ set.add("ghi") #=> 2
145
+ set.compile #=> true
146
+ set.match("abcdefghi") #=> [0, 1, 2]
147
+ set.match("ghidefabc") #=> [2, 1, 0]
148
+ ```
149
+
134
150
  Features
135
151
  --------
136
152
 
@@ -149,6 +165,8 @@ Features
149
165
 
150
166
  * Incrementally scanning text with `re2.scan(text)`
151
167
 
168
+ * Search a collection of patterns simultaneously with `RE2::Set`
169
+
152
170
  * Checking regular expression compilation with `re2.ok?`, `re2.error` and
153
171
  `re2.error_arg`
154
172
 
@@ -177,7 +195,9 @@ Contributions
177
195
  * Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
178
196
  the deprecation and removal of the `utf8` encoding option in re2;
179
197
  * Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
180
- using `RE2::Scanner#scan` with an invalid regular expression.
198
+ using `RE2::Scanner#scan` with an invalid regular expression;
199
+ * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
200
+ initial support for `RE2::Set`.
181
201
 
182
202
  Contact
183
203
  -------
data/ext/re2/extconf.rb CHANGED
@@ -88,4 +88,28 @@ SRC
88
88
  end
89
89
  end
90
90
 
91
+ checking_for("RE2::Set::Match() with error information") do
92
+ test_re2_set_match_signature = <<SRC
93
+ #include <vector>
94
+ #include <re2/re2.h>
95
+ #include <re2/set.h>
96
+
97
+ int main() {
98
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
99
+ s.Add("foo", NULL);
100
+ s.Compile();
101
+
102
+ std::vector<int> v;
103
+ RE2::Set::ErrorInfo ei;
104
+ s.Match("foo", &v, &ei);
105
+
106
+ return 0;
107
+ }
108
+ SRC
109
+
110
+ if try_compile(test_re2_set_match_signature, compile_options)
111
+ $defs.push("-DHAVE_ERROR_INFO_ARGUMENT")
112
+ end
113
+ end
114
+
91
115
  create_makefile("re2")
data/ext/re2/re2.cc CHANGED
@@ -8,6 +8,7 @@
8
8
 
9
9
  #include <ruby.h>
10
10
  #include <re2/re2.h>
11
+ #include <re2/set.h>
11
12
  #include <stdint.h>
12
13
  #include <string>
13
14
  #include <sstream>
@@ -93,12 +94,82 @@ typedef struct {
93
94
  VALUE regexp, text;
94
95
  } re2_scanner;
95
96
 
96
- VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
97
+ typedef struct {
98
+ RE2::Set *set;
99
+ } re2_set;
100
+
101
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
102
+ re2_eSetMatchError, re2_eSetUnsupportedError;
97
103
 
98
104
  /* Symbols used in RE2 options. */
99
105
  static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
100
106
  id_max_mem, id_literal, id_never_nl, id_case_sensitive,
101
- id_perl_classes, id_word_boundary, id_one_line;
107
+ id_perl_classes, id_word_boundary, id_one_line,
108
+ id_unanchored, id_anchor_start, id_anchor_both, id_exception;
109
+
110
+ void parse_re2_options(RE2::Options& re2_options, VALUE options) {
111
+ if (TYPE(options) != T_HASH) {
112
+ rb_raise(rb_eArgError, "options should be a hash");
113
+ }
114
+ VALUE utf8, posix_syntax, longest_match, log_errors,
115
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
116
+ word_boundary, one_line;
117
+
118
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
119
+ if (!NIL_P(utf8)) {
120
+ re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
121
+ }
122
+
123
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
124
+ if (!NIL_P(posix_syntax)) {
125
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
126
+ }
127
+
128
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
129
+ if (!NIL_P(longest_match)) {
130
+ re2_options.set_longest_match(RTEST(longest_match));
131
+ }
132
+
133
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
134
+ if (!NIL_P(log_errors)) {
135
+ re2_options.set_log_errors(RTEST(log_errors));
136
+ }
137
+
138
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
139
+ if (!NIL_P(max_mem)) {
140
+ re2_options.set_max_mem(NUM2INT(max_mem));
141
+ }
142
+
143
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
144
+ if (!NIL_P(literal)) {
145
+ re2_options.set_literal(RTEST(literal));
146
+ }
147
+
148
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
149
+ if (!NIL_P(never_nl)) {
150
+ re2_options.set_never_nl(RTEST(never_nl));
151
+ }
152
+
153
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
154
+ if (!NIL_P(case_sensitive)) {
155
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
156
+ }
157
+
158
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
159
+ if (!NIL_P(perl_classes)) {
160
+ re2_options.set_perl_classes(RTEST(perl_classes));
161
+ }
162
+
163
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
164
+ if (!NIL_P(word_boundary)) {
165
+ re2_options.set_word_boundary(RTEST(word_boundary));
166
+ }
167
+
168
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
169
+ if (!NIL_P(one_line)) {
170
+ re2_options.set_one_line(RTEST(one_line));
171
+ }
172
+ }
102
173
 
103
174
  void re2_matchdata_mark(re2_matchdata* self) {
104
175
  rb_gc_mark(self->regexp);
@@ -667,75 +738,15 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
667
738
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
668
739
  */
669
740
  static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
670
- VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
671
- max_mem, literal, never_nl, case_sensitive, perl_classes,
672
- word_boundary, one_line;
741
+ VALUE pattern, options;
673
742
  re2_pattern *p;
674
743
 
675
744
  rb_scan_args(argc, argv, "11", &pattern, &options);
676
745
  Data_Get_Struct(self, re2_pattern, p);
677
746
 
678
747
  if (RTEST(options)) {
679
- if (TYPE(options) != T_HASH) {
680
- rb_raise(rb_eArgError, "options should be a hash");
681
- }
682
-
683
748
  RE2::Options re2_options;
684
-
685
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
686
- if (!NIL_P(utf8)) {
687
- re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
688
- }
689
-
690
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
691
- if (!NIL_P(posix_syntax)) {
692
- re2_options.set_posix_syntax(RTEST(posix_syntax));
693
- }
694
-
695
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
696
- if (!NIL_P(longest_match)) {
697
- re2_options.set_longest_match(RTEST(longest_match));
698
- }
699
-
700
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
701
- if (!NIL_P(log_errors)) {
702
- re2_options.set_log_errors(RTEST(log_errors));
703
- }
704
-
705
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
706
- if (!NIL_P(max_mem)) {
707
- re2_options.set_max_mem(NUM2INT(max_mem));
708
- }
709
-
710
- literal = rb_hash_aref(options, ID2SYM(id_literal));
711
- if (!NIL_P(literal)) {
712
- re2_options.set_literal(RTEST(literal));
713
- }
714
-
715
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
716
- if (!NIL_P(never_nl)) {
717
- re2_options.set_never_nl(RTEST(never_nl));
718
- }
719
-
720
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
721
- if (!NIL_P(case_sensitive)) {
722
- re2_options.set_case_sensitive(RTEST(case_sensitive));
723
- }
724
-
725
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
726
- if (!NIL_P(perl_classes)) {
727
- re2_options.set_perl_classes(RTEST(perl_classes));
728
- }
729
-
730
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
731
- if (!NIL_P(word_boundary)) {
732
- re2_options.set_word_boundary(RTEST(word_boundary));
733
- }
734
-
735
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
736
- if (!NIL_P(one_line)) {
737
- re2_options.set_one_line(RTEST(one_line));
738
- }
749
+ parse_re2_options(re2_options, options);
739
750
 
740
751
  p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
741
752
  } else {
@@ -1362,6 +1373,234 @@ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1362
1373
  return rb_str_new(quoted_string.data(), quoted_string.size());
1363
1374
  }
1364
1375
 
1376
+ void re2_set_free(re2_set *self) {
1377
+ if (self->set) {
1378
+ delete self->set;
1379
+ }
1380
+ free(self);
1381
+ }
1382
+
1383
+ static VALUE re2_set_allocate(VALUE klass) {
1384
+ re2_set *s;
1385
+ VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1386
+ return result;
1387
+ }
1388
+
1389
+ /*
1390
+ * Returns a new {RE2::Set} object, a collection of patterns that can be
1391
+ * searched for simultaneously.
1392
+ *
1393
+ * @return [RE2::Set]
1394
+ *
1395
+ * @overload initialize
1396
+ * Returns a new {RE2::Set} object for unanchored patterns with the default
1397
+ * options.
1398
+ *
1399
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1400
+ * @return [RE2::Set]
1401
+ *
1402
+ * @overload initialize(anchor)
1403
+ * Returns a new {RE2::Set} object for the specified anchor with the default
1404
+ * options.
1405
+ *
1406
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1407
+ * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1408
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1409
+ *
1410
+ * @overload initialize(anchor, options)
1411
+ * Returns a new {RE2::Set} object with the specified options.
1412
+ *
1413
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1414
+ * @param [Hash] options the options with which to compile the pattern
1415
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1416
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
1417
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
1418
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1419
+ * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
1420
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1421
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1422
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1423
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1424
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1425
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1426
+ * @return [RE2::Set] an RE2::Set with the specified anchor and options
1427
+ * @raise [ArgumentError] if anchor is not one of the accepted choices
1428
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1429
+ */
1430
+ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1431
+ VALUE anchor, options;
1432
+ re2_set *s;
1433
+ RE2::Anchor re2_anchor;
1434
+ RE2::Options re2_options;
1435
+
1436
+ rb_scan_args(argc, argv, "02", &anchor, &options);
1437
+ Data_Get_Struct(self, re2_set, s);
1438
+
1439
+ if (RTEST(options)) {
1440
+ parse_re2_options(re2_options, options);
1441
+ }
1442
+ if (NIL_P(anchor)) {
1443
+ re2_anchor = RE2::UNANCHORED;
1444
+ } else {
1445
+ Check_Type(anchor, T_SYMBOL);
1446
+ ID id_anchor = SYM2ID(anchor);
1447
+ if (id_anchor == id_unanchored) {
1448
+ re2_anchor = RE2::UNANCHORED;
1449
+ } else if (id_anchor == id_anchor_start) {
1450
+ re2_anchor = RE2::ANCHOR_START;
1451
+ } else if (id_anchor == id_anchor_both) {
1452
+ re2_anchor = RE2::ANCHOR_BOTH;
1453
+ } else {
1454
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1455
+ }
1456
+ }
1457
+
1458
+ s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1459
+ if (s->set == 0) {
1460
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1461
+ }
1462
+
1463
+ return self;
1464
+ }
1465
+
1466
+ /*
1467
+ * Adds a pattern to the set. Returns the index that will identify the pattern
1468
+ * in the output of #match. Cannot be called after #compile has been called.
1469
+ *
1470
+ * @param [String] pattern the regex pattern
1471
+ * @return [Integer] the index of the pattern in the set
1472
+ * @raise [ArgumentError] if called after compile or the pattern is rejected
1473
+ * @example
1474
+ * set = RE2::Set.new
1475
+ * set.add("abc") #=> 0
1476
+ * set.add("def") #=> 1
1477
+ */
1478
+ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1479
+ Check_Type(pattern, T_STRING);
1480
+ re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
1481
+ std::string err;
1482
+ re2_set *s;
1483
+ Data_Get_Struct(self, re2_set, s);
1484
+ int index = s->set->Add(regex, &err);
1485
+ if (index < 0) {
1486
+ rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
1487
+ }
1488
+
1489
+ return INT2FIX(index);
1490
+ }
1491
+
1492
+ /*
1493
+ * Compiles a Set so it can be used to match against. Must be called after #add
1494
+ * and before #match.
1495
+ *
1496
+ * @return [Bool] whether compilation was a success
1497
+ * @example
1498
+ * set = RE2::Set.new
1499
+ * set.add("abc")
1500
+ * set.compile # => true
1501
+ */
1502
+ static VALUE re2_set_compile(VALUE self) {
1503
+ re2_set *s;
1504
+ Data_Get_Struct(self, re2_set, s);
1505
+
1506
+ return BOOL2RUBY(s->set->Compile());
1507
+ }
1508
+
1509
+ /*
1510
+ * Returns whether the underlying re2 version outputs error information from
1511
+ * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1512
+ * :exception option to true.
1513
+ *
1514
+ * @return [Bool] whether the underlying re2 outputs error information from Set matches
1515
+ */
1516
+ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1517
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1518
+ return Qtrue;
1519
+ #else
1520
+ return Qfalse;
1521
+ #endif
1522
+ }
1523
+
1524
+ /*
1525
+ * Matches the given text against patterns in the set, returning an array of
1526
+ * integer indices of the matching patterns if matched or an empty array if
1527
+ * there are no matches.
1528
+ *
1529
+ * @param [String] str the text to match against
1530
+ * @param [Hash] options the options with which to match
1531
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1532
+ * @return [Array<Integer>] the indices of matching regexps
1533
+ * @raise [MatchError] if an error occurs while matching
1534
+ * @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
1535
+ * @example
1536
+ * set = RE2::Set.new
1537
+ * set.add("abc")
1538
+ * set.add("def")
1539
+ * set.compile
1540
+ * set.match("abcdef") # => [0, 1]
1541
+ */
1542
+ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1543
+ VALUE str, options, exception_option;
1544
+ bool raise_exception = true;
1545
+ rb_scan_args(argc, argv, "11", &str, &options);
1546
+ Check_Type(str, T_STRING);
1547
+ re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1548
+ std::vector<int> v;
1549
+ re2_set *s;
1550
+ Data_Get_Struct(self, re2_set, s);
1551
+
1552
+ if (RTEST(options)) {
1553
+ Check_Type(options, T_HASH);
1554
+
1555
+ exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1556
+ if (!NIL_P(exception_option)) {
1557
+ raise_exception = RTEST(exception_option);
1558
+ }
1559
+ }
1560
+
1561
+ if (raise_exception) {
1562
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1563
+ RE2::Set::ErrorInfo e;
1564
+ bool match_failed = !s->set->Match(data, &v, &e);
1565
+ VALUE result = rb_ary_new2(v.size());
1566
+
1567
+ if (match_failed) {
1568
+ switch (e.kind) {
1569
+ case RE2::Set::kNoError:
1570
+ break;
1571
+ case RE2::Set::kNotCompiled:
1572
+ rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
1573
+ case RE2::Set::kOutOfMemory:
1574
+ rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
1575
+ case RE2::Set::kInconsistent:
1576
+ rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
1577
+ default: // Just in case a future version of libre2 adds new ErrorKinds
1578
+ rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1579
+ }
1580
+ } else {
1581
+ for (size_t i = 0; i < v.size(); i++) {
1582
+ rb_ary_push(result, INT2FIX(v[i]));
1583
+ }
1584
+ }
1585
+
1586
+ return result;
1587
+ #else
1588
+ rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
1589
+ #endif
1590
+ } else {
1591
+ bool matched = s->set->Match(data, &v);
1592
+ VALUE result = rb_ary_new2(v.size());
1593
+
1594
+ if (matched) {
1595
+ for (size_t i = 0; i < v.size(); i++) {
1596
+ rb_ary_push(result, INT2FIX(v[i]));
1597
+ }
1598
+ }
1599
+
1600
+ return result;
1601
+ }
1602
+ }
1603
+
1365
1604
  /* Forward declare Init_re2 to be called by C code but define it separately so
1366
1605
  * that YARD can parse it.
1367
1606
  */
@@ -1372,12 +1611,18 @@ void Init_re2(void) {
1372
1611
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1373
1612
  re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1374
1613
  re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1614
+ re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
1615
+ re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
1616
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1617
+ re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
1618
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1375
1619
 
1376
1620
  rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1377
1621
  rb_define_alloc_func(re2_cMatchData,
1378
1622
  (VALUE (*)(VALUE))re2_matchdata_allocate);
1379
1623
  rb_define_alloc_func(re2_cScanner,
1380
1624
  (VALUE (*)(VALUE))re2_scanner_allocate);
1625
+ rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
1381
1626
 
1382
1627
  rb_define_method(re2_cMatchData, "string",
1383
1628
  RUBY_METHOD_FUNC(re2_matchdata_string), 0);
@@ -1394,7 +1639,8 @@ void Init_re2(void) {
1394
1639
  rb_define_method(re2_cMatchData, "end",
1395
1640
  RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1396
1641
  rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1397
- -1); rb_define_method(re2_cMatchData, "to_s",
1642
+ -1);
1643
+ rb_define_method(re2_cMatchData, "to_s",
1398
1644
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1399
1645
  rb_define_method(re2_cMatchData, "inspect",
1400
1646
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
@@ -1471,6 +1717,14 @@ void Init_re2(void) {
1471
1717
  rb_define_method(re2_cRegexp, "one_line?",
1472
1718
  RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1473
1719
 
1720
+ rb_define_singleton_method(re2_cSet, "match_raises_errors?",
1721
+ RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
1722
+ rb_define_method(re2_cSet, "initialize",
1723
+ RUBY_METHOD_FUNC(re2_set_initialize), -1);
1724
+ rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
1725
+ rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
1726
+ rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
1727
+
1474
1728
  rb_define_module_function(re2_mRE2, "Replace",
1475
1729
  RUBY_METHOD_FUNC(re2_Replace), 3);
1476
1730
  rb_define_module_function(re2_mRE2, "GlobalReplace",
@@ -1498,6 +1752,10 @@ void Init_re2(void) {
1498
1752
  id_perl_classes = rb_intern("perl_classes");
1499
1753
  id_word_boundary = rb_intern("word_boundary");
1500
1754
  id_one_line = rb_intern("one_line");
1755
+ id_unanchored = rb_intern("unanchored");
1756
+ id_anchor_start = rb_intern("anchor_start");
1757
+ id_anchor_both = rb_intern("anchor_both");
1758
+ id_exception = rb_intern("exception");
1501
1759
 
1502
1760
  #if 0
1503
1761
  /* Fake so YARD generates the file. */
@@ -0,0 +1,168 @@
1
+ RSpec.describe RE2::Set do
2
+ describe "#initialize" do
3
+ it "returns an instance given no args" do
4
+ set = RE2::Set.new
5
+
6
+ expect(set).to be_a(RE2::Set)
7
+ end
8
+
9
+ it "returns an instance given only an anchor of :unanchored" do
10
+ set = RE2::Set.new(:unanchored)
11
+
12
+ expect(set).to be_a(RE2::Set)
13
+ end
14
+
15
+ it "returns an instance given only an anchor of :anchor_start" do
16
+ set = RE2::Set.new(:anchor_start)
17
+
18
+ expect(set).to be_a(RE2::Set)
19
+ end
20
+
21
+ it "returns an instance given only an anchor of :anchor_both" do
22
+ set = RE2::Set.new(:anchor_both)
23
+
24
+ expect(set).to be_a(RE2::Set)
25
+ end
26
+
27
+ it "returns an instance given an anchor and options" do
28
+ set = RE2::Set.new(:unanchored, :case_sensitive => false)
29
+
30
+ expect(set).to be_a(RE2::Set)
31
+ end
32
+
33
+ it "raises an error if given an inappropriate type" do
34
+ expect { RE2::Set.new(0) }.to raise_error(TypeError)
35
+ end
36
+
37
+ it "raises an error if given an invalid anchor" do
38
+ expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error(
39
+ ArgumentError,
40
+ "anchor should be one of: :unanchored, :anchor_start, :anchor_both"
41
+ )
42
+ end
43
+ end
44
+
45
+ describe "#add" do
46
+ it "allows multiple patterns to be added", :aggregate_failures do
47
+ set = RE2::Set.new
48
+
49
+ expect(set.add("abc")).to eq(0)
50
+ expect(set.add("def")).to eq(1)
51
+ expect(set.add("ghi")).to eq(2)
52
+ end
53
+
54
+ it "rejects invalid patterns when added" do
55
+ set = RE2::Set.new(:unanchored, :log_errors => false)
56
+
57
+ expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add()/)
58
+ end
59
+
60
+ it "raises an error if called after #compile" do
61
+ set = RE2::Set.new(:unanchored, :log_errors => false)
62
+ set.add("abc")
63
+ set.compile
64
+
65
+ silence_stderr do
66
+ expect { set.add("def") }.to raise_error(ArgumentError)
67
+ end
68
+ end
69
+
70
+ it "raises an error if given a non-string pattern" do
71
+ set = RE2::Set.new(:unanchored, :log_errors => false)
72
+
73
+ expect { set.add(0) }.to raise_error(TypeError)
74
+ end
75
+ end
76
+
77
+ describe "#compile" do
78
+ it "compiles the set without error" do
79
+ set = RE2::Set.new
80
+ set.add("abc")
81
+ set.add("def")
82
+ set.add("ghi")
83
+
84
+ expect(set.compile).to be_truthy
85
+ end
86
+ end
87
+
88
+ describe "#match" do
89
+ it "matches against multiple patterns" do
90
+ set = RE2::Set.new
91
+ set.add("abc")
92
+ set.add("def")
93
+ set.add("ghi")
94
+ set.compile
95
+
96
+ expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
97
+ end
98
+
99
+ it "raises an error if called before #compile by default" do
100
+ skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
101
+
102
+ set = RE2::Set.new(:unanchored, :log_errors => false)
103
+
104
+ silence_stderr do
105
+ expect { set.match("") }.to raise_error(RE2::Set::MatchError)
106
+ end
107
+ end
108
+
109
+ it "raises an error if called before #compile when :exception is true" do
110
+ skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
111
+
112
+ set = RE2::Set.new(:unanchored, :log_errors => false)
113
+
114
+ silence_stderr do
115
+ expect { set.match("", :exception => true) }.to raise_error(RE2::Set::MatchError)
116
+ end
117
+ end
118
+
119
+ it "returns an empty array if called before #compile when :exception is false" do
120
+ set = RE2::Set.new(:unanchored, :log_errors => false)
121
+
122
+ silence_stderr do
123
+ expect(set.match("", :exception => false)).to be_empty
124
+ end
125
+ end
126
+
127
+ it "raises an error if :exception is true and re2 does not support it" do
128
+ skip "Underlying RE2::Set::Match outputs error information" if RE2::Set.match_raises_errors?
129
+
130
+ set = RE2::Set.new(:unanchored, :log_errors => false)
131
+
132
+ silence_stderr do
133
+ expect { set.match("", :exception => true) }.to raise_error(RE2::Set::UnsupportedError)
134
+ end
135
+ end
136
+
137
+ it "raises an error if given non-hash options" do
138
+ set = RE2::Set.new
139
+
140
+ expect { set.match("", 0) }.to raise_error(TypeError)
141
+ end
142
+ end
143
+
144
+ def silence_stderr
145
+ original_stream = STDERR
146
+
147
+ if File.const_defined?(:NULL)
148
+ STDERR.reopen(File::NULL)
149
+ else
150
+ platform = RUBY_PLATFORM == 'java' ? RbConfig::CONFIG['host_os'] : RUBY_PLATFORM
151
+
152
+ case platform
153
+ when /mswin|mingw/i
154
+ STDERR.reopen('NUL')
155
+ when /amiga/i
156
+ STDERR.reopen('NIL')
157
+ when /openvms/i
158
+ STDERR.reopen('NL:')
159
+ else
160
+ STDERR.reopen('/dev/null')
161
+ end
162
+ end
163
+
164
+ yield
165
+ ensure
166
+ STDERR.reopen(original_stream)
167
+ end
168
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-29 00:00:00.000000000 Z
11
+ date: 2022-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -57,6 +57,7 @@ files:
57
57
  - spec/re2/match_data_spec.rb
58
58
  - spec/re2/regexp_spec.rb
59
59
  - spec/re2/scanner_spec.rb
60
+ - spec/re2/set_spec.rb
60
61
  - spec/re2/string_spec.rb
61
62
  - spec/re2_spec.rb
62
63
  - spec/spec_helper.rb
@@ -79,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
80
  - !ruby/object:Gem::Version
80
81
  version: '0'
81
82
  requirements: []
82
- rubygems_version: 3.2.3
83
+ rubygems_version: 3.3.7
83
84
  signing_key:
84
85
  specification_version: 4
85
86
  summary: Ruby bindings to re2.
@@ -90,4 +91,5 @@ test_files:
90
91
  - spec/re2/regexp_spec.rb
91
92
  - spec/re2/match_data_spec.rb
92
93
  - spec/re2/string_spec.rb
94
+ - spec/re2/set_spec.rb
93
95
  - spec/re2/scanner_spec.rb