re2 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce0e303b87738a767776165216cbbd8cf0f63ec9dba3ef4389f657cbb5da8dc3
4
- data.tar.gz: 61983a9e93dc64334d43a41f3f978ff71de020bd9d6f27bc409431a313ea58e0
3
+ metadata.gz: 8588a481a52f07a6c965094bb69c1aee177add80d071aee3a1771f97e19dc4f6
4
+ data.tar.gz: bb123e3a2d5352379548bf68f57fd195c8aa458e2da06f0864b7135682c19b1d
5
5
  SHA512:
6
- metadata.gz: 229f667e12094ae2d42ae3d72a08aa0567cf17e1d666d9677b6b0bdfccc549f241870085ac918789fbbcd0fcb538a942a31f6922f32d88cc7b54043b553f35e3
7
- data.tar.gz: 850d6dc79bcfbfe96a913ac93ddc2c8d4e1b79ddd8aa50eb951bfaae99e3a1688ff31fd2e55c2e497946b9ea4913bcca4938070b6fa37f2ca77d88721652ca66
6
+ metadata.gz: b421f15ace868de905dece66db9e607636624cf2548eabe6be9979c397ac825035c0b24a5d8e8d6a351aa5809f6265d07f7b67a4c720a81e0b4e78f8d4599c67
7
+ data.tar.gz: fb7314faaf44a8847eadf207b0226ea71071ddb02ae41b7644ade69b69c06ff86f7c5d1a34042b4ef90633b98b6185bc07a6e2241bfa570089525e73eca6a61a
data/README.md CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/ba
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 1.4.0
7
+ **Current version:** 1.5.0
8
8
  **Supported Ruby versions:** 1.8.7, 1.9.3, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0
9
9
  **Supported re2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01)
10
10
 
@@ -131,6 +131,22 @@ enum.next #=> ["It"]
131
131
  enum.next #=> ["is"]
132
132
  ```
133
133
 
134
+ As of 1.5.0, you can use `RE2::Set` to match multiple patterns against a
135
+ string. Calling `RE2::Set#add` with a pattern will return an integer index of
136
+ the pattern. After all patterns have been added, the set can be compiled using
137
+ `RE2::Set#compile`, and then `RE2::Set#match` will return an `Array<Integer>`
138
+ containing the indices of all the patterns that matched.
139
+
140
+ ``` ruby
141
+ set = RE2::Set.new
142
+ set.add("abc") #=> 0
143
+ set.add("def") #=> 1
144
+ set.add("ghi") #=> 2
145
+ set.compile #=> true
146
+ set.match("abcdefghi") #=> [0, 1, 2]
147
+ set.match("ghidefabc") #=> [2, 1, 0]
148
+ ```
149
+
134
150
  Features
135
151
  --------
136
152
 
@@ -149,6 +165,8 @@ Features
149
165
 
150
166
  * Incrementally scanning text with `re2.scan(text)`
151
167
 
168
+ * Search a collection of patterns simultaneously with `RE2::Set`
169
+
152
170
  * Checking regular expression compilation with `re2.ok?`, `re2.error` and
153
171
  `re2.error_arg`
154
172
 
@@ -177,7 +195,9 @@ Contributions
177
195
  * Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
178
196
  the deprecation and removal of the `utf8` encoding option in re2;
179
197
  * Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
180
- using `RE2::Scanner#scan` with an invalid regular expression.
198
+ using `RE2::Scanner#scan` with an invalid regular expression;
199
+ * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
200
+ initial support for `RE2::Set`.
181
201
 
182
202
  Contact
183
203
  -------
data/ext/re2/extconf.rb CHANGED
@@ -88,4 +88,28 @@ SRC
88
88
  end
89
89
  end
90
90
 
91
+ checking_for("RE2::Set::Match() with error information") do
92
+ test_re2_set_match_signature = <<SRC
93
+ #include <vector>
94
+ #include <re2/re2.h>
95
+ #include <re2/set.h>
96
+
97
+ int main() {
98
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
99
+ s.Add("foo", NULL);
100
+ s.Compile();
101
+
102
+ std::vector<int> v;
103
+ RE2::Set::ErrorInfo ei;
104
+ s.Match("foo", &v, &ei);
105
+
106
+ return 0;
107
+ }
108
+ SRC
109
+
110
+ if try_compile(test_re2_set_match_signature, compile_options)
111
+ $defs.push("-DHAVE_ERROR_INFO_ARGUMENT")
112
+ end
113
+ end
114
+
91
115
  create_makefile("re2")
data/ext/re2/re2.cc CHANGED
@@ -8,6 +8,7 @@
8
8
 
9
9
  #include <ruby.h>
10
10
  #include <re2/re2.h>
11
+ #include <re2/set.h>
11
12
  #include <stdint.h>
12
13
  #include <string>
13
14
  #include <sstream>
@@ -93,12 +94,82 @@ typedef struct {
93
94
  VALUE regexp, text;
94
95
  } re2_scanner;
95
96
 
96
- VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
97
+ typedef struct {
98
+ RE2::Set *set;
99
+ } re2_set;
100
+
101
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
102
+ re2_eSetMatchError, re2_eSetUnsupportedError;
97
103
 
98
104
  /* Symbols used in RE2 options. */
99
105
  static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
100
106
  id_max_mem, id_literal, id_never_nl, id_case_sensitive,
101
- id_perl_classes, id_word_boundary, id_one_line;
107
+ id_perl_classes, id_word_boundary, id_one_line,
108
+ id_unanchored, id_anchor_start, id_anchor_both, id_exception;
109
+
110
+ void parse_re2_options(RE2::Options& re2_options, VALUE options) {
111
+ if (TYPE(options) != T_HASH) {
112
+ rb_raise(rb_eArgError, "options should be a hash");
113
+ }
114
+ VALUE utf8, posix_syntax, longest_match, log_errors,
115
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
116
+ word_boundary, one_line;
117
+
118
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
119
+ if (!NIL_P(utf8)) {
120
+ re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
121
+ }
122
+
123
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
124
+ if (!NIL_P(posix_syntax)) {
125
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
126
+ }
127
+
128
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
129
+ if (!NIL_P(longest_match)) {
130
+ re2_options.set_longest_match(RTEST(longest_match));
131
+ }
132
+
133
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
134
+ if (!NIL_P(log_errors)) {
135
+ re2_options.set_log_errors(RTEST(log_errors));
136
+ }
137
+
138
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
139
+ if (!NIL_P(max_mem)) {
140
+ re2_options.set_max_mem(NUM2INT(max_mem));
141
+ }
142
+
143
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
144
+ if (!NIL_P(literal)) {
145
+ re2_options.set_literal(RTEST(literal));
146
+ }
147
+
148
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
149
+ if (!NIL_P(never_nl)) {
150
+ re2_options.set_never_nl(RTEST(never_nl));
151
+ }
152
+
153
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
154
+ if (!NIL_P(case_sensitive)) {
155
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
156
+ }
157
+
158
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
159
+ if (!NIL_P(perl_classes)) {
160
+ re2_options.set_perl_classes(RTEST(perl_classes));
161
+ }
162
+
163
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
164
+ if (!NIL_P(word_boundary)) {
165
+ re2_options.set_word_boundary(RTEST(word_boundary));
166
+ }
167
+
168
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
169
+ if (!NIL_P(one_line)) {
170
+ re2_options.set_one_line(RTEST(one_line));
171
+ }
172
+ }
102
173
 
103
174
  void re2_matchdata_mark(re2_matchdata* self) {
104
175
  rb_gc_mark(self->regexp);
@@ -667,75 +738,15 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
667
738
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
668
739
  */
669
740
  static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
670
- VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
671
- max_mem, literal, never_nl, case_sensitive, perl_classes,
672
- word_boundary, one_line;
741
+ VALUE pattern, options;
673
742
  re2_pattern *p;
674
743
 
675
744
  rb_scan_args(argc, argv, "11", &pattern, &options);
676
745
  Data_Get_Struct(self, re2_pattern, p);
677
746
 
678
747
  if (RTEST(options)) {
679
- if (TYPE(options) != T_HASH) {
680
- rb_raise(rb_eArgError, "options should be a hash");
681
- }
682
-
683
748
  RE2::Options re2_options;
684
-
685
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
686
- if (!NIL_P(utf8)) {
687
- re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
688
- }
689
-
690
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
691
- if (!NIL_P(posix_syntax)) {
692
- re2_options.set_posix_syntax(RTEST(posix_syntax));
693
- }
694
-
695
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
696
- if (!NIL_P(longest_match)) {
697
- re2_options.set_longest_match(RTEST(longest_match));
698
- }
699
-
700
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
701
- if (!NIL_P(log_errors)) {
702
- re2_options.set_log_errors(RTEST(log_errors));
703
- }
704
-
705
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
706
- if (!NIL_P(max_mem)) {
707
- re2_options.set_max_mem(NUM2INT(max_mem));
708
- }
709
-
710
- literal = rb_hash_aref(options, ID2SYM(id_literal));
711
- if (!NIL_P(literal)) {
712
- re2_options.set_literal(RTEST(literal));
713
- }
714
-
715
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
716
- if (!NIL_P(never_nl)) {
717
- re2_options.set_never_nl(RTEST(never_nl));
718
- }
719
-
720
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
721
- if (!NIL_P(case_sensitive)) {
722
- re2_options.set_case_sensitive(RTEST(case_sensitive));
723
- }
724
-
725
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
726
- if (!NIL_P(perl_classes)) {
727
- re2_options.set_perl_classes(RTEST(perl_classes));
728
- }
729
-
730
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
731
- if (!NIL_P(word_boundary)) {
732
- re2_options.set_word_boundary(RTEST(word_boundary));
733
- }
734
-
735
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
736
- if (!NIL_P(one_line)) {
737
- re2_options.set_one_line(RTEST(one_line));
738
- }
749
+ parse_re2_options(re2_options, options);
739
750
 
740
751
  p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
741
752
  } else {
@@ -1362,6 +1373,234 @@ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1362
1373
  return rb_str_new(quoted_string.data(), quoted_string.size());
1363
1374
  }
1364
1375
 
1376
+ void re2_set_free(re2_set *self) {
1377
+ if (self->set) {
1378
+ delete self->set;
1379
+ }
1380
+ free(self);
1381
+ }
1382
+
1383
+ static VALUE re2_set_allocate(VALUE klass) {
1384
+ re2_set *s;
1385
+ VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1386
+ return result;
1387
+ }
1388
+
1389
+ /*
1390
+ * Returns a new {RE2::Set} object, a collection of patterns that can be
1391
+ * searched for simultaneously.
1392
+ *
1393
+ * @return [RE2::Set]
1394
+ *
1395
+ * @overload initialize
1396
+ * Returns a new {RE2::Set} object for unanchored patterns with the default
1397
+ * options.
1398
+ *
1399
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1400
+ * @return [RE2::Set]
1401
+ *
1402
+ * @overload initialize(anchor)
1403
+ * Returns a new {RE2::Set} object for the specified anchor with the default
1404
+ * options.
1405
+ *
1406
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1407
+ * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1408
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1409
+ *
1410
+ * @overload initialize(anchor, options)
1411
+ * Returns a new {RE2::Set} object with the specified options.
1412
+ *
1413
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1414
+ * @param [Hash] options the options with which to compile the pattern
1415
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1416
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
1417
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
1418
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1419
+ * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
1420
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1421
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1422
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1423
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1424
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1425
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1426
+ * @return [RE2::Set] an RE2::Set with the specified anchor and options
1427
+ * @raise [ArgumentError] if anchor is not one of the accepted choices
1428
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1429
+ */
1430
+ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1431
+ VALUE anchor, options;
1432
+ re2_set *s;
1433
+ RE2::Anchor re2_anchor;
1434
+ RE2::Options re2_options;
1435
+
1436
+ rb_scan_args(argc, argv, "02", &anchor, &options);
1437
+ Data_Get_Struct(self, re2_set, s);
1438
+
1439
+ if (RTEST(options)) {
1440
+ parse_re2_options(re2_options, options);
1441
+ }
1442
+ if (NIL_P(anchor)) {
1443
+ re2_anchor = RE2::UNANCHORED;
1444
+ } else {
1445
+ Check_Type(anchor, T_SYMBOL);
1446
+ ID id_anchor = SYM2ID(anchor);
1447
+ if (id_anchor == id_unanchored) {
1448
+ re2_anchor = RE2::UNANCHORED;
1449
+ } else if (id_anchor == id_anchor_start) {
1450
+ re2_anchor = RE2::ANCHOR_START;
1451
+ } else if (id_anchor == id_anchor_both) {
1452
+ re2_anchor = RE2::ANCHOR_BOTH;
1453
+ } else {
1454
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1455
+ }
1456
+ }
1457
+
1458
+ s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1459
+ if (s->set == 0) {
1460
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1461
+ }
1462
+
1463
+ return self;
1464
+ }
1465
+
1466
+ /*
1467
+ * Adds a pattern to the set. Returns the index that will identify the pattern
1468
+ * in the output of #match. Cannot be called after #compile has been called.
1469
+ *
1470
+ * @param [String] pattern the regex pattern
1471
+ * @return [Integer] the index of the pattern in the set
1472
+ * @raise [ArgumentError] if called after compile or the pattern is rejected
1473
+ * @example
1474
+ * set = RE2::Set.new
1475
+ * set.add("abc") #=> 0
1476
+ * set.add("def") #=> 1
1477
+ */
1478
+ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1479
+ Check_Type(pattern, T_STRING);
1480
+ re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
1481
+ std::string err;
1482
+ re2_set *s;
1483
+ Data_Get_Struct(self, re2_set, s);
1484
+ int index = s->set->Add(regex, &err);
1485
+ if (index < 0) {
1486
+ rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
1487
+ }
1488
+
1489
+ return INT2FIX(index);
1490
+ }
1491
+
1492
+ /*
1493
+ * Compiles a Set so it can be used to match against. Must be called after #add
1494
+ * and before #match.
1495
+ *
1496
+ * @return [Bool] whether compilation was a success
1497
+ * @example
1498
+ * set = RE2::Set.new
1499
+ * set.add("abc")
1500
+ * set.compile # => true
1501
+ */
1502
+ static VALUE re2_set_compile(VALUE self) {
1503
+ re2_set *s;
1504
+ Data_Get_Struct(self, re2_set, s);
1505
+
1506
+ return BOOL2RUBY(s->set->Compile());
1507
+ }
1508
+
1509
+ /*
1510
+ * Returns whether the underlying re2 version outputs error information from
1511
+ * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1512
+ * :exception option to true.
1513
+ *
1514
+ * @return [Bool] whether the underlying re2 outputs error information from Set matches
1515
+ */
1516
+ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1517
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1518
+ return Qtrue;
1519
+ #else
1520
+ return Qfalse;
1521
+ #endif
1522
+ }
1523
+
1524
+ /*
1525
+ * Matches the given text against patterns in the set, returning an array of
1526
+ * integer indices of the matching patterns if matched or an empty array if
1527
+ * there are no matches.
1528
+ *
1529
+ * @param [String] str the text to match against
1530
+ * @param [Hash] options the options with which to match
1531
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1532
+ * @return [Array<Integer>] the indices of matching regexps
1533
+ * @raise [MatchError] if an error occurs while matching
1534
+ * @raise [UnsupportedError] if using the :exception option against a version of re2 that does not support it
1535
+ * @example
1536
+ * set = RE2::Set.new
1537
+ * set.add("abc")
1538
+ * set.add("def")
1539
+ * set.compile
1540
+ * set.match("abcdef") # => [0, 1]
1541
+ */
1542
+ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1543
+ VALUE str, options, exception_option;
1544
+ bool raise_exception = true;
1545
+ rb_scan_args(argc, argv, "11", &str, &options);
1546
+ Check_Type(str, T_STRING);
1547
+ re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1548
+ std::vector<int> v;
1549
+ re2_set *s;
1550
+ Data_Get_Struct(self, re2_set, s);
1551
+
1552
+ if (RTEST(options)) {
1553
+ Check_Type(options, T_HASH);
1554
+
1555
+ exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1556
+ if (!NIL_P(exception_option)) {
1557
+ raise_exception = RTEST(exception_option);
1558
+ }
1559
+ }
1560
+
1561
+ if (raise_exception) {
1562
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1563
+ RE2::Set::ErrorInfo e;
1564
+ bool match_failed = !s->set->Match(data, &v, &e);
1565
+ VALUE result = rb_ary_new2(v.size());
1566
+
1567
+ if (match_failed) {
1568
+ switch (e.kind) {
1569
+ case RE2::Set::kNoError:
1570
+ break;
1571
+ case RE2::Set::kNotCompiled:
1572
+ rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
1573
+ case RE2::Set::kOutOfMemory:
1574
+ rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
1575
+ case RE2::Set::kInconsistent:
1576
+ rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
1577
+ default: // Just in case a future version of libre2 adds new ErrorKinds
1578
+ rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1579
+ }
1580
+ } else {
1581
+ for (size_t i = 0; i < v.size(); i++) {
1582
+ rb_ary_push(result, INT2FIX(v[i]));
1583
+ }
1584
+ }
1585
+
1586
+ return result;
1587
+ #else
1588
+ rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
1589
+ #endif
1590
+ } else {
1591
+ bool matched = s->set->Match(data, &v);
1592
+ VALUE result = rb_ary_new2(v.size());
1593
+
1594
+ if (matched) {
1595
+ for (size_t i = 0; i < v.size(); i++) {
1596
+ rb_ary_push(result, INT2FIX(v[i]));
1597
+ }
1598
+ }
1599
+
1600
+ return result;
1601
+ }
1602
+ }
1603
+
1365
1604
  /* Forward declare Init_re2 to be called by C code but define it separately so
1366
1605
  * that YARD can parse it.
1367
1606
  */
@@ -1372,12 +1611,18 @@ void Init_re2(void) {
1372
1611
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1373
1612
  re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1374
1613
  re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1614
+ re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
1615
+ re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
1616
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1617
+ re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
1618
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1375
1619
 
1376
1620
  rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1377
1621
  rb_define_alloc_func(re2_cMatchData,
1378
1622
  (VALUE (*)(VALUE))re2_matchdata_allocate);
1379
1623
  rb_define_alloc_func(re2_cScanner,
1380
1624
  (VALUE (*)(VALUE))re2_scanner_allocate);
1625
+ rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
1381
1626
 
1382
1627
  rb_define_method(re2_cMatchData, "string",
1383
1628
  RUBY_METHOD_FUNC(re2_matchdata_string), 0);
@@ -1394,7 +1639,8 @@ void Init_re2(void) {
1394
1639
  rb_define_method(re2_cMatchData, "end",
1395
1640
  RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1396
1641
  rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1397
- -1); rb_define_method(re2_cMatchData, "to_s",
1642
+ -1);
1643
+ rb_define_method(re2_cMatchData, "to_s",
1398
1644
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1399
1645
  rb_define_method(re2_cMatchData, "inspect",
1400
1646
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
@@ -1471,6 +1717,14 @@ void Init_re2(void) {
1471
1717
  rb_define_method(re2_cRegexp, "one_line?",
1472
1718
  RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1473
1719
 
1720
+ rb_define_singleton_method(re2_cSet, "match_raises_errors?",
1721
+ RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
1722
+ rb_define_method(re2_cSet, "initialize",
1723
+ RUBY_METHOD_FUNC(re2_set_initialize), -1);
1724
+ rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
1725
+ rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
1726
+ rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
1727
+
1474
1728
  rb_define_module_function(re2_mRE2, "Replace",
1475
1729
  RUBY_METHOD_FUNC(re2_Replace), 3);
1476
1730
  rb_define_module_function(re2_mRE2, "GlobalReplace",
@@ -1498,6 +1752,10 @@ void Init_re2(void) {
1498
1752
  id_perl_classes = rb_intern("perl_classes");
1499
1753
  id_word_boundary = rb_intern("word_boundary");
1500
1754
  id_one_line = rb_intern("one_line");
1755
+ id_unanchored = rb_intern("unanchored");
1756
+ id_anchor_start = rb_intern("anchor_start");
1757
+ id_anchor_both = rb_intern("anchor_both");
1758
+ id_exception = rb_intern("exception");
1501
1759
 
1502
1760
  #if 0
1503
1761
  /* Fake so YARD generates the file. */
@@ -0,0 +1,168 @@
1
+ RSpec.describe RE2::Set do
2
+ describe "#initialize" do
3
+ it "returns an instance given no args" do
4
+ set = RE2::Set.new
5
+
6
+ expect(set).to be_a(RE2::Set)
7
+ end
8
+
9
+ it "returns an instance given only an anchor of :unanchored" do
10
+ set = RE2::Set.new(:unanchored)
11
+
12
+ expect(set).to be_a(RE2::Set)
13
+ end
14
+
15
+ it "returns an instance given only an anchor of :anchor_start" do
16
+ set = RE2::Set.new(:anchor_start)
17
+
18
+ expect(set).to be_a(RE2::Set)
19
+ end
20
+
21
+ it "returns an instance given only an anchor of :anchor_both" do
22
+ set = RE2::Set.new(:anchor_both)
23
+
24
+ expect(set).to be_a(RE2::Set)
25
+ end
26
+
27
+ it "returns an instance given an anchor and options" do
28
+ set = RE2::Set.new(:unanchored, :case_sensitive => false)
29
+
30
+ expect(set).to be_a(RE2::Set)
31
+ end
32
+
33
+ it "raises an error if given an inappropriate type" do
34
+ expect { RE2::Set.new(0) }.to raise_error(TypeError)
35
+ end
36
+
37
+ it "raises an error if given an invalid anchor" do
38
+ expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error(
39
+ ArgumentError,
40
+ "anchor should be one of: :unanchored, :anchor_start, :anchor_both"
41
+ )
42
+ end
43
+ end
44
+
45
+ describe "#add" do
46
+ it "allows multiple patterns to be added", :aggregate_failures do
47
+ set = RE2::Set.new
48
+
49
+ expect(set.add("abc")).to eq(0)
50
+ expect(set.add("def")).to eq(1)
51
+ expect(set.add("ghi")).to eq(2)
52
+ end
53
+
54
+ it "rejects invalid patterns when added" do
55
+ set = RE2::Set.new(:unanchored, :log_errors => false)
56
+
57
+ expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add()/)
58
+ end
59
+
60
+ it "raises an error if called after #compile" do
61
+ set = RE2::Set.new(:unanchored, :log_errors => false)
62
+ set.add("abc")
63
+ set.compile
64
+
65
+ silence_stderr do
66
+ expect { set.add("def") }.to raise_error(ArgumentError)
67
+ end
68
+ end
69
+
70
+ it "raises an error if given a non-string pattern" do
71
+ set = RE2::Set.new(:unanchored, :log_errors => false)
72
+
73
+ expect { set.add(0) }.to raise_error(TypeError)
74
+ end
75
+ end
76
+
77
+ describe "#compile" do
78
+ it "compiles the set without error" do
79
+ set = RE2::Set.new
80
+ set.add("abc")
81
+ set.add("def")
82
+ set.add("ghi")
83
+
84
+ expect(set.compile).to be_truthy
85
+ end
86
+ end
87
+
88
+ describe "#match" do
89
+ it "matches against multiple patterns" do
90
+ set = RE2::Set.new
91
+ set.add("abc")
92
+ set.add("def")
93
+ set.add("ghi")
94
+ set.compile
95
+
96
+ expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
97
+ end
98
+
99
+ it "raises an error if called before #compile by default" do
100
+ skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
101
+
102
+ set = RE2::Set.new(:unanchored, :log_errors => false)
103
+
104
+ silence_stderr do
105
+ expect { set.match("") }.to raise_error(RE2::Set::MatchError)
106
+ end
107
+ end
108
+
109
+ it "raises an error if called before #compile when :exception is true" do
110
+ skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?
111
+
112
+ set = RE2::Set.new(:unanchored, :log_errors => false)
113
+
114
+ silence_stderr do
115
+ expect { set.match("", :exception => true) }.to raise_error(RE2::Set::MatchError)
116
+ end
117
+ end
118
+
119
+ it "returns an empty array if called before #compile when :exception is false" do
120
+ set = RE2::Set.new(:unanchored, :log_errors => false)
121
+
122
+ silence_stderr do
123
+ expect(set.match("", :exception => false)).to be_empty
124
+ end
125
+ end
126
+
127
+ it "raises an error if :exception is true and re2 does not support it" do
128
+ skip "Underlying RE2::Set::Match outputs error information" if RE2::Set.match_raises_errors?
129
+
130
+ set = RE2::Set.new(:unanchored, :log_errors => false)
131
+
132
+ silence_stderr do
133
+ expect { set.match("", :exception => true) }.to raise_error(RE2::Set::UnsupportedError)
134
+ end
135
+ end
136
+
137
+ it "raises an error if given non-hash options" do
138
+ set = RE2::Set.new
139
+
140
+ expect { set.match("", 0) }.to raise_error(TypeError)
141
+ end
142
+ end
143
+
144
+ def silence_stderr
145
+ original_stream = STDERR
146
+
147
+ if File.const_defined?(:NULL)
148
+ STDERR.reopen(File::NULL)
149
+ else
150
+ platform = RUBY_PLATFORM == 'java' ? RbConfig::CONFIG['host_os'] : RUBY_PLATFORM
151
+
152
+ case platform
153
+ when /mswin|mingw/i
154
+ STDERR.reopen('NUL')
155
+ when /amiga/i
156
+ STDERR.reopen('NIL')
157
+ when /openvms/i
158
+ STDERR.reopen('NL:')
159
+ else
160
+ STDERR.reopen('/dev/null')
161
+ end
162
+ end
163
+
164
+ yield
165
+ ensure
166
+ STDERR.reopen(original_stream)
167
+ end
168
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-29 00:00:00.000000000 Z
11
+ date: 2022-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -57,6 +57,7 @@ files:
57
57
  - spec/re2/match_data_spec.rb
58
58
  - spec/re2/regexp_spec.rb
59
59
  - spec/re2/scanner_spec.rb
60
+ - spec/re2/set_spec.rb
60
61
  - spec/re2/string_spec.rb
61
62
  - spec/re2_spec.rb
62
63
  - spec/spec_helper.rb
@@ -79,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
80
  - !ruby/object:Gem::Version
80
81
  version: '0'
81
82
  requirements: []
82
- rubygems_version: 3.2.3
83
+ rubygems_version: 3.3.7
83
84
  signing_key:
84
85
  specification_version: 4
85
86
  summary: Ruby bindings to re2.
@@ -90,4 +91,5 @@ test_files:
90
91
  - spec/re2/regexp_spec.rb
91
92
  - spec/re2/match_data_spec.rb
92
93
  - spec/re2/string_spec.rb
94
+ - spec/re2/set_spec.rb
93
95
  - spec/re2/scanner_spec.rb