re2 2.26.1-x86_64-linux-gnu → 2.27.0-x86_64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f26a7c50adf99ee96e8af8cf72c2807df885f3aea35a56ac56962327b6151cce
4
- data.tar.gz: 8823d9b232988ec6026e30b4930284a0deed495cbf7fd7c8c2fab09c0244004c
3
+ metadata.gz: b86c49eb21074e4ef2eaa58dc1234224074621dffff2ac3d7fd2d8c6b4214daf
4
+ data.tar.gz: bf270c0bc858ee0e63d2d3514f7a35c99539455b0dbac7fef145b33a1eb6251a
5
5
  SHA512:
6
- metadata.gz: 8423cd1e0bfb54c2e7d211c5f9b667cbd3025163adbbb42575e545ef5bbef4e437c955f107bdeb30a0a988e65374c3b601bcaa542716b9a649f0fdd803fc78c3
7
- data.tar.gz: 55b2b7aa364e94aa1a0e25a344707643974b435d82c8a04a73ac261aa99f3c2b350cd9dd01627e5a21c0b81c5412d5a833ecb7e138bc08a085a5ed87a574e352
6
+ metadata.gz: 0fcf3b3b7d9f47754f70996160a26be706afd222f75c181a114e4014c088642cb3b2d185fedc2dc58fc54f2211ac834ac5f7526ff748dd894c4ed8d4eca5ebbf
7
+ data.tar.gz: 2e4d7a22246ea69ebc4d00428f347033d5e2d911e3f46735b662eb1be4be541d31dbe33efbad9165067a284e1c21c2d152a18c824ebb6ecec9f66e5eeb7ab631
data/README.md CHANGED
@@ -6,7 +6,7 @@ Python".
6
6
 
7
7
  [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/mudge/re2/actions)
8
8
 
9
- **Current version:** 2.26.1
9
+ **Current version:** 2.27.0
10
10
  **Bundled RE2 version:** libre2.11 (2025-11-05)
11
11
 
12
12
  ```ruby
data/ext/re2/re2.cc CHANGED
@@ -19,6 +19,7 @@
19
19
  #include <re2/set.h>
20
20
  #include <ruby.h>
21
21
  #include <ruby/encoding.h>
22
+ #include <ruby/thread.h>
22
23
 
23
24
  #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
24
25
 
@@ -43,6 +44,132 @@ typedef struct {
43
44
  RE2::Set *set;
44
45
  } re2_set;
45
46
 
47
+ struct nogvl_match_arg {
48
+ const RE2 *pattern;
49
+ re2::StringPiece text;
50
+ size_t startpos;
51
+ size_t endpos;
52
+ RE2::Anchor anchor;
53
+ re2::StringPiece *matches;
54
+ int n;
55
+ bool matched;
56
+ };
57
+
58
+ static void *nogvl_match(void *ptr) {
59
+ auto *arg = static_cast<nogvl_match_arg *>(ptr);
60
+ #ifdef HAVE_ENDPOS_ARGUMENT
61
+ arg->matched = arg->pattern->Match(
62
+ arg->text, arg->startpos, arg->endpos,
63
+ arg->anchor, arg->matches, arg->n);
64
+ #else
65
+ arg->matched = arg->pattern->Match(
66
+ arg->text, arg->startpos,
67
+ arg->anchor, arg->matches, arg->n);
68
+ #endif
69
+ return nullptr;
70
+ }
71
+
72
+ static bool re2_match_without_gvl(
73
+ const RE2 *pattern, VALUE text, size_t startpos, size_t endpos,
74
+ RE2::Anchor anchor, re2::StringPiece *matches, int n) {
75
+ nogvl_match_arg arg;
76
+ arg.pattern = pattern;
77
+ arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
78
+ arg.startpos = startpos;
79
+ arg.endpos = endpos;
80
+ arg.anchor = anchor;
81
+ arg.matches = matches;
82
+ arg.n = n;
83
+ arg.matched = false;
84
+
85
+ /* Abseil's synchronization primitives (SRWLOCK, SleepConditionVariableSRW)
86
+ * are incompatible with Ruby's Win32 Mutex-based GVL, causing
87
+ * WAIT_ABANDONED crashes when multiple threads match concurrently.
88
+ */
89
+ #ifdef _WIN32
90
+ nogvl_match(&arg);
91
+ #else
92
+ /* No unblocking function is needed: RE2 matching is CPU-bound computation,
93
+ * not a blocking system call, so a signal cannot safely interrupt it.
94
+ */
95
+ rb_thread_call_without_gvl(nogvl_match, &arg, NULL, NULL);
96
+ #endif
97
+
98
+ return arg.matched;
99
+ }
100
+
101
+ struct nogvl_set_match_arg {
102
+ const RE2::Set *set;
103
+ re2::StringPiece text;
104
+ std::vector<int> *v;
105
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
106
+ RE2::Set::ErrorInfo *error_info;
107
+ #endif
108
+ bool matched;
109
+ };
110
+
111
+ static void *nogvl_set_match(void *ptr) {
112
+ auto *arg = static_cast<nogvl_set_match_arg *>(ptr);
113
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
114
+ if (arg->error_info) {
115
+ arg->matched = arg->set->Match(arg->text, arg->v, arg->error_info);
116
+ } else {
117
+ arg->matched = arg->set->Match(arg->text, arg->v);
118
+ }
119
+ #else
120
+ arg->matched = arg->set->Match(arg->text, arg->v);
121
+ #endif
122
+ return nullptr;
123
+ }
124
+
125
+ struct nogvl_replace_arg {
126
+ std::string *str;
127
+ const RE2 *pattern;
128
+ re2::StringPiece string_pattern;
129
+ re2::StringPiece rewrite;
130
+ };
131
+
132
+ static void *nogvl_replace(void *ptr) {
133
+ auto *arg = static_cast<nogvl_replace_arg *>(ptr);
134
+ if (arg->pattern) {
135
+ RE2::Replace(arg->str, *arg->pattern, arg->rewrite);
136
+ } else {
137
+ RE2::Replace(arg->str, arg->string_pattern, arg->rewrite);
138
+ }
139
+ return nullptr;
140
+ }
141
+
142
+ static void *nogvl_global_replace(void *ptr) {
143
+ auto *arg = static_cast<nogvl_replace_arg *>(ptr);
144
+ if (arg->pattern) {
145
+ RE2::GlobalReplace(arg->str, *arg->pattern, arg->rewrite);
146
+ } else {
147
+ RE2::GlobalReplace(arg->str, arg->string_pattern, arg->rewrite);
148
+ }
149
+ return nullptr;
150
+ }
151
+
152
+ struct nogvl_extract_arg {
153
+ re2::StringPiece text;
154
+ const RE2 *pattern;
155
+ re2::StringPiece string_pattern;
156
+ re2::StringPiece rewrite;
157
+ std::string *out;
158
+ bool extracted;
159
+ };
160
+
161
+ static void *nogvl_extract(void *ptr) {
162
+ auto *arg = static_cast<nogvl_extract_arg *>(ptr);
163
+ if (arg->pattern) {
164
+ arg->extracted = RE2::Extract(arg->text, *arg->pattern,
165
+ arg->rewrite, arg->out);
166
+ } else {
167
+ arg->extracted = RE2::Extract(arg->text, RE2(arg->string_pattern),
168
+ arg->rewrite, arg->out);
169
+ }
170
+ return nullptr;
171
+ }
172
+
46
173
  VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
47
174
  re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
48
175
 
@@ -126,7 +253,7 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
126
253
  }
127
254
 
128
255
  static void re2_matchdata_mark(void *ptr) {
129
- re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
256
+ re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
130
257
  rb_gc_mark_movable(m->regexp);
131
258
 
132
259
  /* Text must not be movable because StringPiece matches hold pointers into
@@ -136,12 +263,12 @@ static void re2_matchdata_mark(void *ptr) {
136
263
  }
137
264
 
138
265
  static void re2_matchdata_compact(void *ptr) {
139
- re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
266
+ re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
140
267
  m->regexp = rb_gc_location(m->regexp);
141
268
  }
142
269
 
143
270
  static void re2_matchdata_free(void *ptr) {
144
- re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
271
+ re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
145
272
  if (m->matches) {
146
273
  delete[] m->matches;
147
274
  }
@@ -149,7 +276,7 @@ static void re2_matchdata_free(void *ptr) {
149
276
  }
150
277
 
151
278
  static size_t re2_matchdata_memsize(const void *ptr) {
152
- const re2_matchdata *m = reinterpret_cast<const re2_matchdata *>(ptr);
279
+ const re2_matchdata *m = static_cast<const re2_matchdata *>(ptr);
153
280
  size_t size = sizeof(*m);
154
281
  if (m->matches) {
155
282
  size += sizeof(*m->matches) * m->number_of_matches;
@@ -174,7 +301,7 @@ static const rb_data_type_t re2_matchdata_data_type = {
174
301
  };
175
302
 
176
303
  static void re2_scanner_mark(void *ptr) {
177
- re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
304
+ re2_scanner *s = static_cast<re2_scanner *>(ptr);
178
305
  rb_gc_mark_movable(s->regexp);
179
306
 
180
307
  /* Text must not be movable because the StringPiece input holds a pointer
@@ -184,12 +311,12 @@ static void re2_scanner_mark(void *ptr) {
184
311
  }
185
312
 
186
313
  static void re2_scanner_compact(void *ptr) {
187
- re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
314
+ re2_scanner *s = static_cast<re2_scanner *>(ptr);
188
315
  s->regexp = rb_gc_location(s->regexp);
189
316
  }
190
317
 
191
318
  static void re2_scanner_free(void *ptr) {
192
- re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
319
+ re2_scanner *s = static_cast<re2_scanner *>(ptr);
193
320
  if (s->input) {
194
321
  delete s->input;
195
322
  }
@@ -197,7 +324,7 @@ static void re2_scanner_free(void *ptr) {
197
324
  }
198
325
 
199
326
  static size_t re2_scanner_memsize(const void *ptr) {
200
- const re2_scanner *s = reinterpret_cast<const re2_scanner *>(ptr);
327
+ const re2_scanner *s = static_cast<const re2_scanner *>(ptr);
201
328
  size_t size = sizeof(*s);
202
329
  if (s->input) {
203
330
  size += sizeof(*s->input);
@@ -222,7 +349,7 @@ static const rb_data_type_t re2_scanner_data_type = {
222
349
  };
223
350
 
224
351
  static void re2_regexp_free(void *ptr) {
225
- re2_pattern *p = reinterpret_cast<re2_pattern *>(ptr);
352
+ re2_pattern *p = static_cast<re2_pattern *>(ptr);
226
353
  if (p->pattern) {
227
354
  delete p->pattern;
228
355
  }
@@ -230,7 +357,7 @@ static void re2_regexp_free(void *ptr) {
230
357
  }
231
358
 
232
359
  static size_t re2_regexp_memsize(const void *ptr) {
233
- const re2_pattern *p = reinterpret_cast<const re2_pattern *>(ptr);
360
+ const re2_pattern *p = static_cast<const re2_pattern *>(ptr);
234
361
  size_t size = sizeof(*p);
235
362
  if (p->pattern) {
236
363
  size += sizeof(*p->pattern);
@@ -250,7 +377,7 @@ static const rb_data_type_t re2_regexp_data_type = {
250
377
  0,
251
378
  // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
252
379
  // macro to update VALUE references, as to trigger write barriers.
253
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
380
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
254
381
  };
255
382
 
256
383
  static re2_pattern *unwrap_re2_regexp(VALUE self) {
@@ -403,6 +530,7 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
403
530
 
404
531
  if (self_c->input) {
405
532
  delete self_c->input;
533
+ self_c->input = nullptr;
406
534
  }
407
535
 
408
536
  RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
@@ -481,9 +609,27 @@ static VALUE re2_scanner_scan(VALUE self) {
481
609
  /* Check whether we've exhausted the input yet. */
482
610
  c->eof = new_input_size == 0;
483
611
 
484
- /* If the match didn't advance the input, we need to do this ourselves. */
612
+ /* If the match didn't advance the input, we need to do this ourselves,
613
+ * advancing by a whole character to avoid splitting multi-byte characters.
614
+ *
615
+ * The lookup table approach is taken from RE2's own Python extension: the
616
+ * high 4 bits of a UTF-8 lead byte determine the character's byte length.
617
+ *
618
+ * See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
619
+ */
485
620
  if (!input_advanced && new_input_size > 0) {
486
- c->input->remove_prefix(1);
621
+ size_t char_size = 1;
622
+
623
+ if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
624
+ char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
625
+ [((*c->input)[0] & 0xFF) >> 4];
626
+
627
+ if (char_size > new_input_size) {
628
+ char_size = new_input_size;
629
+ }
630
+ }
631
+
632
+ c->input->remove_prefix(char_size);
487
633
  }
488
634
 
489
635
  return result;
@@ -1152,6 +1298,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
1152
1298
 
1153
1299
  if (self_m->matches) {
1154
1300
  delete[] self_m->matches;
1301
+ self_m->matches = nullptr;
1155
1302
  }
1156
1303
 
1157
1304
  self_m->number_of_matches = other_m->number_of_matches;
@@ -1229,8 +1376,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
1229
1376
 
1230
1377
  TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1231
1378
 
1379
+ rb_check_frozen(self);
1380
+
1232
1381
  if (p->pattern) {
1233
1382
  delete p->pattern;
1383
+ p->pattern = nullptr;
1234
1384
  }
1235
1385
 
1236
1386
  if (RTEST(options)) {
@@ -1248,6 +1398,8 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
1248
1398
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
1249
1399
  }
1250
1400
 
1401
+ rb_obj_freeze(self);
1402
+
1251
1403
  return self;
1252
1404
  }
1253
1405
 
@@ -1257,8 +1409,11 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
1257
1409
 
1258
1410
  TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
1259
1411
 
1412
+ rb_check_frozen(self);
1413
+
1260
1414
  if (self_p->pattern) {
1261
1415
  delete self_p->pattern;
1416
+ self_p->pattern = nullptr;
1262
1417
  }
1263
1418
 
1264
1419
  self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
@@ -1267,6 +1422,8 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
1267
1422
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
1268
1423
  }
1269
1424
 
1425
+ rb_obj_freeze(self);
1426
+
1270
1427
  return self;
1271
1428
  }
1272
1429
 
@@ -1731,8 +1888,9 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1731
1888
 
1732
1889
  rb_scan_args(argc, argv, "11", &text, &options);
1733
1890
 
1734
- /* Ensure text is a string. */
1891
+ /* Coerce and freeze text to prevent mutation. */
1735
1892
  StringValue(text);
1893
+ text = rb_str_new_frozen(text);
1736
1894
 
1737
1895
  p = unwrap_re2_regexp(self);
1738
1896
 
@@ -1822,16 +1980,18 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1822
1980
  rb_raise(rb_eArgError, "startpos should be <= endpos");
1823
1981
  }
1824
1982
 
1825
- if (n == 0) {
1826
- #ifdef HAVE_ENDPOS_ARGUMENT
1827
- bool matched = p->pattern->Match(
1828
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
1829
- startpos, endpos, anchor, 0, 0);
1830
- #else
1831
- bool matched = p->pattern->Match(
1832
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
1833
- startpos, anchor, 0, 0);
1983
+ #ifndef HAVE_ENDPOS_ARGUMENT
1984
+ /* Old RE2's Match() takes int startpos. Reject values that would overflow. */
1985
+ if (startpos > INT_MAX) {
1986
+ rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
1987
+ }
1834
1988
  #endif
1989
+
1990
+ if (n == 0) {
1991
+ bool matched = re2_match_without_gvl(
1992
+ p->pattern, text, startpos, endpos, anchor, 0, 0);
1993
+ RB_GC_GUARD(text);
1994
+
1835
1995
  return BOOL2RUBY(matched);
1836
1996
  } else {
1837
1997
  if (n == INT_MAX) {
@@ -1847,17 +2007,10 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1847
2007
  "not enough memory to allocate StringPieces for matches");
1848
2008
  }
1849
2009
 
1850
- text = rb_str_new_frozen(text);
2010
+ bool matched = re2_match_without_gvl(
2011
+ p->pattern, text, startpos, endpos, anchor, matches, n);
2012
+ RB_GC_GUARD(text);
1851
2013
 
1852
- #ifdef HAVE_ENDPOS_ARGUMENT
1853
- bool matched = p->pattern->Match(
1854
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
1855
- startpos, endpos, anchor, matches, n);
1856
- #else
1857
- bool matched = p->pattern->Match(
1858
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
1859
- startpos, anchor, matches, n);
1860
- #endif
1861
2014
  if (matched) {
1862
2015
  VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1863
2016
  TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
@@ -1886,13 +2039,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1886
2039
  * @raise [TypeError] if text cannot be coerced to a `String`
1887
2040
  */
1888
2041
  static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1889
- /* Ensure text is a string. */
1890
2042
  StringValue(text);
2043
+ text = rb_str_new_frozen(text);
1891
2044
 
1892
2045
  re2_pattern *p = unwrap_re2_regexp(self);
2046
+ bool matched = re2_match_without_gvl(
2047
+ p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
2048
+ RB_GC_GUARD(text);
1893
2049
 
1894
- return BOOL2RUBY(RE2::PartialMatch(
1895
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
2050
+ return BOOL2RUBY(matched);
1896
2051
  }
1897
2052
 
1898
2053
  /*
@@ -1905,13 +2060,15 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1905
2060
  * @raise [TypeError] if text cannot be coerced to a `String`
1906
2061
  */
1907
2062
  static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
1908
- /* Ensure text is a string. */
1909
2063
  StringValue(text);
2064
+ text = rb_str_new_frozen(text);
1910
2065
 
1911
2066
  re2_pattern *p = unwrap_re2_regexp(self);
2067
+ bool matched = re2_match_without_gvl(
2068
+ p->pattern, text, 0, RSTRING_LEN(text), RE2::ANCHOR_BOTH, 0, 0);
2069
+ RB_GC_GUARD(text);
1912
2070
 
1913
- return BOOL2RUBY(RE2::FullMatch(
1914
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
2071
+ return BOOL2RUBY(matched);
1915
2072
  }
1916
2073
 
1917
2074
  /*
@@ -1927,8 +2084,8 @@ static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
1927
2084
  * #=> #<RE2::Scanner:0x0000000000000001>
1928
2085
  */
1929
2086
  static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1930
- /* Ensure text is a string. */
1931
2087
  StringValue(text);
2088
+ text = rb_str_new_frozen(text);
1932
2089
 
1933
2090
  re2_pattern *p = unwrap_re2_regexp(self);
1934
2091
  re2_scanner *c;
@@ -1936,7 +2093,7 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1936
2093
  TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
1937
2094
 
1938
2095
  RB_OBJ_WRITE(scanner, &c->regexp, self);
1939
- RB_OBJ_WRITE(scanner, &c->text, rb_str_new_frozen(text));
2096
+ RB_OBJ_WRITE(scanner, &c->text, text);
1940
2097
  c->input = new(std::nothrow) re2::StringPiece(
1941
2098
  RSTRING_PTR(c->text), RSTRING_LEN(c->text));
1942
2099
  if (c->input == nullptr) {
@@ -1997,34 +2154,53 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
1997
2154
  */
1998
2155
  static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
1999
2156
  VALUE rewrite) {
2000
- /* Ensure rewrite is a string. */
2001
- StringValue(rewrite);
2157
+ re2_pattern *p = nullptr;
2002
2158
 
2003
- re2_pattern *p;
2004
-
2005
- /* Take a copy of str so it can be modified in-place by
2006
- * RE2::Replace.
2159
+ /* Coerce and freeze all arguments before any C++ allocations so that any
2160
+ * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
2161
+ * memory, and later coercions cannot mutate earlier strings.
2007
2162
  */
2008
2163
  StringValue(str);
2009
- std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2010
-
2011
- /* Do the replacement. */
2164
+ str = rb_str_new_frozen(str);
2012
2165
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2013
2166
  p = unwrap_re2_regexp(pattern);
2014
- RE2::Replace(&str_as_string, *p->pattern,
2015
- re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2016
-
2017
- return encoded_str_new(str_as_string.data(), str_as_string.size(),
2018
- p->pattern->options().encoding());
2019
2167
  } else {
2020
- /* Ensure pattern is a string. */
2021
2168
  StringValue(pattern);
2169
+ pattern = rb_str_new_frozen(pattern);
2170
+ }
2171
+ StringValue(rewrite);
2172
+ rewrite = rb_str_new_frozen(rewrite);
2173
+
2174
+ /* Take a copy of str so it can be modified in-place by RE2::Replace. */
2175
+ std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2176
+
2177
+ nogvl_replace_arg arg;
2178
+ arg.str = &str_as_string;
2179
+ if (p) {
2180
+ arg.pattern = p->pattern;
2181
+ } else {
2182
+ arg.pattern = nullptr;
2183
+ arg.string_pattern = re2::StringPiece(
2184
+ RSTRING_PTR(pattern), RSTRING_LEN(pattern));
2185
+ }
2186
+ arg.rewrite = re2::StringPiece(
2187
+ RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
2188
+
2189
+ #ifdef _WIN32
2190
+ nogvl_replace(&arg);
2191
+ #else
2192
+ rb_thread_call_without_gvl(nogvl_replace, &arg, NULL, NULL);
2193
+ #endif
2022
2194
 
2023
- RE2::Replace(&str_as_string,
2024
- re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
2025
- re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2195
+ RB_GC_GUARD(rewrite);
2196
+ RB_GC_GUARD(pattern);
2026
2197
 
2027
- return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
2198
+ if (p) {
2199
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
2200
+ p->pattern->options().encoding());
2201
+ } else {
2202
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
2203
+ RE2::Options::EncodingUTF8);
2028
2204
  }
2029
2205
  }
2030
2206
 
@@ -2050,33 +2226,55 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
2050
2226
  */
2051
2227
  static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
2052
2228
  VALUE rewrite) {
2053
- /* Ensure rewrite is a string. */
2229
+ re2_pattern *p = nullptr;
2230
+
2231
+ /* Coerce and freeze all arguments before any C++ allocations so that any
2232
+ * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
2233
+ * memory, and later coercions cannot mutate earlier strings.
2234
+ */
2235
+ StringValue(str);
2236
+ str = rb_str_new_frozen(str);
2237
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2238
+ p = unwrap_re2_regexp(pattern);
2239
+ } else {
2240
+ StringValue(pattern);
2241
+ pattern = rb_str_new_frozen(pattern);
2242
+ }
2054
2243
  StringValue(rewrite);
2244
+ rewrite = rb_str_new_frozen(rewrite);
2055
2245
 
2056
2246
  /* Take a copy of str so it can be modified in-place by
2057
2247
  * RE2::GlobalReplace.
2058
2248
  */
2059
- re2_pattern *p;
2060
- StringValue(str);
2061
2249
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2062
2250
 
2063
- /* Do the replacement. */
2064
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2065
- p = unwrap_re2_regexp(pattern);
2066
- RE2::GlobalReplace(&str_as_string, *p->pattern,
2067
- re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2251
+ nogvl_replace_arg arg;
2252
+ arg.str = &str_as_string;
2253
+ if (p) {
2254
+ arg.pattern = p->pattern;
2255
+ } else {
2256
+ arg.pattern = nullptr;
2257
+ arg.string_pattern = re2::StringPiece(
2258
+ RSTRING_PTR(pattern), RSTRING_LEN(pattern));
2259
+ }
2260
+ arg.rewrite = re2::StringPiece(
2261
+ RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
2262
+
2263
+ #ifdef _WIN32
2264
+ nogvl_global_replace(&arg);
2265
+ #else
2266
+ rb_thread_call_without_gvl(nogvl_global_replace, &arg, NULL, NULL);
2267
+ #endif
2268
+
2269
+ RB_GC_GUARD(rewrite);
2270
+ RB_GC_GUARD(pattern);
2068
2271
 
2272
+ if (p) {
2069
2273
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
2070
2274
  p->pattern->options().encoding());
2071
2275
  } else {
2072
- /* Ensure pattern is a string. */
2073
- StringValue(pattern);
2074
-
2075
- RE2::GlobalReplace(&str_as_string,
2076
- re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
2077
- re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2078
-
2079
- return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
2276
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
2277
+ RE2::Options::EncodingUTF8);
2080
2278
  }
2081
2279
  }
2082
2280
 
@@ -2104,44 +2302,55 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
2104
2302
  */
2105
2303
  static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
2106
2304
  VALUE rewrite) {
2107
- /* Ensure rewrite and text are strings. */
2108
- StringValue(rewrite);
2109
- StringValue(text);
2110
-
2111
- re2_pattern *p;
2112
- std::string out;
2113
- bool extracted;
2305
+ re2_pattern *p = nullptr;
2114
2306
 
2307
+ /* Coerce and freeze all arguments before any C++ allocations so that any
2308
+ * Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
2309
+ * memory, and later coercions cannot mutate earlier strings.
2310
+ */
2311
+ StringValue(text);
2312
+ text = rb_str_new_frozen(text);
2115
2313
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2116
2314
  p = unwrap_re2_regexp(pattern);
2117
- extracted = RE2::Extract(
2118
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2119
- *p->pattern,
2120
- re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
2121
- &out);
2122
-
2123
- if (extracted) {
2124
- return encoded_str_new(out.data(), out.size(),
2125
- p->pattern->options().encoding());
2126
- } else {
2127
- return Qnil;
2128
- }
2129
2315
  } else {
2130
- /* Ensure pattern is a string. */
2131
2316
  StringValue(pattern);
2317
+ pattern = rb_str_new_frozen(pattern);
2318
+ }
2319
+ StringValue(rewrite);
2320
+ rewrite = rb_str_new_frozen(rewrite);
2132
2321
 
2133
- extracted = RE2::Extract(
2134
- re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2135
- RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
2136
- re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
2137
- &out);
2322
+ std::string out;
2138
2323
 
2139
- if (extracted) {
2140
- return encoded_str_new(out.data(), out.size(),
2141
- RE2::Options::EncodingUTF8);
2142
- } else {
2143
- return Qnil;
2144
- }
2324
+ nogvl_extract_arg arg;
2325
+ arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
2326
+ if (p) {
2327
+ arg.pattern = p->pattern;
2328
+ } else {
2329
+ arg.pattern = nullptr;
2330
+ arg.string_pattern = re2::StringPiece(
2331
+ RSTRING_PTR(pattern), RSTRING_LEN(pattern));
2332
+ }
2333
+ arg.rewrite = re2::StringPiece(
2334
+ RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
2335
+ arg.out = &out;
2336
+ arg.extracted = false;
2337
+
2338
+ #ifdef _WIN32
2339
+ nogvl_extract(&arg);
2340
+ #else
2341
+ rb_thread_call_without_gvl(nogvl_extract, &arg, NULL, NULL);
2342
+ #endif
2343
+
2344
+ RB_GC_GUARD(text);
2345
+ RB_GC_GUARD(rewrite);
2346
+ RB_GC_GUARD(pattern);
2347
+
2348
+ if (arg.extracted) {
2349
+ return encoded_str_new(out.data(), out.size(),
2350
+ p ? p->pattern->options().encoding()
2351
+ : RE2::Options::EncodingUTF8);
2352
+ } else {
2353
+ return Qnil;
2145
2354
  }
2146
2355
  }
2147
2356
 
@@ -2171,7 +2380,7 @@ static VALUE re2_escape(VALUE, VALUE unquoted) {
2171
2380
  }
2172
2381
 
2173
2382
  static void re2_set_free(void *ptr) {
2174
- re2_set *s = reinterpret_cast<re2_set *>(ptr);
2383
+ re2_set *s = static_cast<re2_set *>(ptr);
2175
2384
  if (s->set) {
2176
2385
  delete s->set;
2177
2386
  }
@@ -2179,7 +2388,7 @@ static void re2_set_free(void *ptr) {
2179
2388
  }
2180
2389
 
2181
2390
  static size_t re2_set_memsize(const void *ptr) {
2182
- const re2_set *s = reinterpret_cast<const re2_set *>(ptr);
2391
+ const re2_set *s = static_cast<const re2_set *>(ptr);
2183
2392
  size_t size = sizeof(*s);
2184
2393
  if (s->set) {
2185
2394
  size += sizeof(*s->set);
@@ -2199,7 +2408,7 @@ static const rb_data_type_t re2_set_data_type = {
2199
2408
  0,
2200
2409
  // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
2201
2410
  // macro to update VALUE references, as to trigger write barriers.
2202
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
2411
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
2203
2412
  };
2204
2413
 
2205
2414
  static re2_set *unwrap_re2_set(VALUE self) {
@@ -2292,8 +2501,11 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
2292
2501
  parse_re2_options(&re2_options, options);
2293
2502
  }
2294
2503
 
2504
+ rb_check_frozen(self);
2505
+
2295
2506
  if (s->set) {
2296
2507
  delete s->set;
2508
+ s->set = nullptr;
2297
2509
  }
2298
2510
 
2299
2511
  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
@@ -2321,6 +2533,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
2321
2533
  StringValue(pattern);
2322
2534
 
2323
2535
  re2_set *s = unwrap_re2_set(self);
2536
+ rb_check_frozen(self);
2324
2537
 
2325
2538
  int index;
2326
2539
  VALUE msg;
@@ -2352,8 +2565,15 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
2352
2565
  */
2353
2566
  static VALUE re2_set_compile(VALUE self) {
2354
2567
  re2_set *s = unwrap_re2_set(self);
2568
+ rb_check_frozen(self);
2355
2569
 
2356
- return BOOL2RUBY(s->set->Compile());
2570
+ bool compiled = s->set->Compile();
2571
+
2572
+ if (compiled) {
2573
+ rb_obj_freeze(self);
2574
+ }
2575
+
2576
+ return BOOL2RUBY(compiled);
2357
2577
  }
2358
2578
 
2359
2579
  /*
@@ -2450,6 +2670,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2450
2670
  rb_scan_args(argc, argv, "11", &str, &options);
2451
2671
 
2452
2672
  StringValue(str);
2673
+ str = rb_str_new_frozen(str);
2674
+
2453
2675
  re2_set *s = unwrap_re2_set(self);
2454
2676
 
2455
2677
  if (RTEST(options)) {
@@ -2466,8 +2688,21 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2466
2688
  if (raise_exception) {
2467
2689
  #ifdef HAVE_ERROR_INFO_ARGUMENT
2468
2690
  RE2::Set::ErrorInfo e;
2469
- bool match_failed = !s->set->Match(
2470
- re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v, &e);
2691
+ nogvl_set_match_arg arg;
2692
+ arg.set = s->set;
2693
+ arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
2694
+ arg.v = &v;
2695
+ arg.error_info = &e;
2696
+ arg.matched = false;
2697
+
2698
+ #ifdef _WIN32
2699
+ nogvl_set_match(&arg);
2700
+ #else
2701
+ rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
2702
+ #endif
2703
+ RB_GC_GUARD(str);
2704
+
2705
+ bool match_failed = !arg.matched;
2471
2706
  VALUE result = rb_ary_new2(v.size());
2472
2707
 
2473
2708
  if (match_failed) {
@@ -2494,11 +2729,25 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2494
2729
  rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
2495
2730
  #endif
2496
2731
  } else {
2497
- bool matched = s->set->Match(
2498
- re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v);
2732
+ nogvl_set_match_arg arg;
2733
+ arg.set = s->set;
2734
+ arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
2735
+ arg.v = &v;
2736
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
2737
+ arg.error_info = nullptr;
2738
+ #endif
2739
+ arg.matched = false;
2740
+
2741
+ #ifdef _WIN32
2742
+ nogvl_set_match(&arg);
2743
+ #else
2744
+ rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
2745
+ #endif
2746
+ RB_GC_GUARD(str);
2747
+
2499
2748
  VALUE result = rb_ary_new2(v.size());
2500
2749
 
2501
- if (matched) {
2750
+ if (arg.matched) {
2502
2751
  for (int index : v) {
2503
2752
  rb_ary_push(result, INT2FIX(index));
2504
2753
  }
@@ -2509,6 +2758,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2509
2758
  }
2510
2759
 
2511
2760
  extern "C" void Init_re2(void) {
2761
+ rb_ext_ractor_safe(true);
2762
+
2512
2763
  re2_mRE2 = rb_define_module("RE2");
2513
2764
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
2514
2765
  re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
data/lib/3.1/re2.so CHANGED
Binary file
data/lib/3.2/re2.so CHANGED
Binary file
data/lib/3.3/re2.so CHANGED
Binary file
data/lib/3.4/re2.so CHANGED
Binary file
data/lib/4.0/re2.so CHANGED
Binary file
data/lib/re2/version.rb CHANGED
@@ -10,5 +10,5 @@
10
10
 
11
11
 
12
12
  module RE2
13
- VERSION = "2.26.1"
13
+ VERSION = "2.27.0"
14
14
  end
@@ -39,6 +39,16 @@ RSpec.describe RE2::Regexp do
39
39
 
40
40
  expect(re).to be_a(RE2::Regexp)
41
41
  end
42
+
43
+ it "returns a frozen object" do
44
+ expect(RE2::Regexp.new('woo')).to be_frozen
45
+ end
46
+
47
+ it "cannot be re-initialized" do
48
+ re = RE2::Regexp.new('woo')
49
+
50
+ expect { re.send(:initialize, 'bar') }.to raise_error(FrozenError)
51
+ end
42
52
  end
43
53
 
44
54
  describe "#dup" do
@@ -70,6 +80,13 @@ RSpec.describe RE2::Regexp do
70
80
  expect(copy).to_not be_case_sensitive
71
81
  end
72
82
 
83
+ it "returns a frozen copy" do
84
+ re = described_class.new('(\d+)')
85
+ copy = re.dup
86
+
87
+ expect(copy).to be_frozen
88
+ end
89
+
73
90
  it "raises an error when called on an uninitialized object" do
74
91
  expect { described_class.allocate.dup }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
75
92
  end
@@ -83,6 +100,13 @@ RSpec.describe RE2::Regexp do
83
100
  expect(copy.to_s).to eq('woo')
84
101
  end
85
102
 
103
+ it "returns a frozen copy" do
104
+ re = described_class.new('woo')
105
+ copy = re.clone
106
+
107
+ expect(copy).to be_frozen
108
+ end
109
+
86
110
  it "raises an error when called on an uninitialized object" do
87
111
  expect { described_class.allocate.clone }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
88
112
  end
@@ -757,9 +781,28 @@ RSpec.describe RE2::Regexp do
757
781
  expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
758
782
  end
759
783
 
784
+ it "raises an error when startpos exceeds INT_MAX on old RE2 ABI" do
785
+ skip "Underlying RE2::Match does not take int startpos" if RE2::Regexp.match_has_endpos_argument?
786
+ skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
787
+
788
+ re = RE2::Regexp.new('(\w+)', log_errors: false)
789
+
790
+ expect { re.match("test", start_pos: 2_147_483_648) }.to raise_error(RangeError, /startpos should be <=/)
791
+ end
792
+
760
793
  it "raises an error when called on an uninitialized object" do
761
794
  expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
762
795
  end
796
+
797
+ it "can be run concurrently" do
798
+ re = RE2::Regexp.new('(\w+)\s(\w+)')
799
+
800
+ threads = 10.times.map do
801
+ Thread.new { re.match("one two").values_at(1, 2) }
802
+ end
803
+
804
+ expect(threads.map(&:value)).to all(eq(["one", "two"]))
805
+ end
763
806
  end
764
807
 
765
808
  describe "#match?" do
@@ -785,6 +828,16 @@ RSpec.describe RE2::Regexp do
785
828
  it "raises an error when called on an uninitialized object" do
786
829
  expect { described_class.allocate.match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
787
830
  end
831
+
832
+ it "can be run concurrently" do
833
+ re = RE2::Regexp.new('(\w+)\s(\w+)')
834
+
835
+ threads = 10.times.map do
836
+ Thread.new { re.match?("one two") }
837
+ end
838
+
839
+ expect(threads.map(&:value)).to all(eq(true))
840
+ end
788
841
  end
789
842
 
790
843
  describe "#partial_match?" do
@@ -817,6 +870,16 @@ RSpec.describe RE2::Regexp do
817
870
  it "raises an error when called on an uninitialized object" do
818
871
  expect { described_class.allocate.partial_match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
819
872
  end
873
+
874
+ it "can be run concurrently" do
875
+ re = RE2::Regexp.new('(\d+)')
876
+
877
+ threads = 10.times.map do
878
+ Thread.new { re.partial_match?("alice 123") }
879
+ end
880
+
881
+ expect(threads.map(&:value)).to all(eq(true))
882
+ end
820
883
  end
821
884
 
822
885
  describe "#=~" do
@@ -906,6 +969,16 @@ RSpec.describe RE2::Regexp do
906
969
  it "raises an error when called on an uninitialized object" do
907
970
  expect { described_class.allocate.full_match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
908
971
  end
972
+
973
+ it "can be run concurrently" do
974
+ re = RE2::Regexp.new('(\w+) (\d+)')
975
+
976
+ threads = 10.times.map do
977
+ Thread.new { re.full_match?("alice 123") }
978
+ end
979
+
980
+ expect(threads.map(&:value)).to all(eq(true))
981
+ end
909
982
  end
910
983
 
911
984
  describe "#ok?" do
@@ -243,6 +243,52 @@ RSpec.describe RE2::Scanner do
243
243
  expect(scanner.scan).to be_nil
244
244
  end
245
245
 
246
+ it "advances by whole characters with zero-width matches on 2-byte UTF-8 input", :aggregate_failures do
247
+ r = RE2::Regexp.new("")
248
+ scanner = r.scan("à")
249
+
250
+ expect(scanner.scan).to eq([])
251
+ expect(scanner.scan).to eq([])
252
+ expect(scanner.scan).to be_nil
253
+ end
254
+
255
+ it "advances by whole characters with zero-width matches on 3-byte UTF-8 input", :aggregate_failures do
256
+ r = RE2::Regexp.new("")
257
+ scanner = r.scan("\u20AC")
258
+
259
+ expect(scanner.scan).to eq([])
260
+ expect(scanner.scan).to eq([])
261
+ expect(scanner.scan).to be_nil
262
+ end
263
+
264
+ it "advances by whole characters with zero-width matches on 4-byte UTF-8 input", :aggregate_failures do
265
+ r = RE2::Regexp.new("")
266
+ scanner = r.scan("\u{1F600}")
267
+
268
+ expect(scanner.scan).to eq([])
269
+ expect(scanner.scan).to eq([])
270
+ expect(scanner.scan).to be_nil
271
+ end
272
+
273
+ it "advances by single bytes with zero-width matches on Latin-1 input", :aggregate_failures do
274
+ r = RE2::Regexp.new("", utf8: false)
275
+ scanner = r.scan("\xC3\xA0")
276
+
277
+ expect(scanner.scan).to eq([])
278
+ expect(scanner.scan).to eq([])
279
+ expect(scanner.scan).to eq([])
280
+ expect(scanner.scan).to be_nil
281
+ end
282
+
283
+ it "handles truncated multi-byte sequences at the end of input", :aggregate_failures do
284
+ r = RE2::Regexp.new("")
285
+ scanner = r.scan("\xC3")
286
+
287
+ expect(scanner.scan).to eq([])
288
+ expect(scanner.scan).to eq([])
289
+ expect(scanner.scan).to be_nil
290
+ end
291
+
246
292
  it "raises a Type Error if given input that can't be coerced to a String" do
247
293
  r = RE2::Regexp.new('(\w+)')
248
294
 
data/spec/re2/set_spec.rb CHANGED
@@ -84,14 +84,12 @@ RSpec.describe RE2::Set do
84
84
  expect { set.add("(?P<#{'o' * 200}") }.to raise_error(ArgumentError, "str rejected by RE2::Set->Add(): invalid named capture group: (?P<oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo")
85
85
  end
86
86
 
87
- it "raises an error if called after #compile" do
87
+ it "raises a FrozenError if called after #compile" do
88
88
  set = RE2::Set.new(:unanchored, log_errors: false)
89
89
  set.add("abc")
90
90
  set.compile
91
91
 
92
- silence_stderr do
93
- expect { set.add("def") }.to raise_error(ArgumentError)
94
- end
92
+ expect { set.add("def") }.to raise_error(FrozenError)
95
93
  end
96
94
 
97
95
  it "raises an error if given a pattern that can't be coerced to a String" do
@@ -121,6 +119,29 @@ RSpec.describe RE2::Set do
121
119
  expect(set.compile).to be_truthy
122
120
  end
123
121
 
122
+ it "freezes the set on successful compilation" do
123
+ set = RE2::Set.new
124
+ set.add("abc")
125
+ set.compile
126
+
127
+ expect(set).to be_frozen
128
+ end
129
+
130
+ it "is not frozen before compilation" do
131
+ set = RE2::Set.new
132
+ set.add("abc")
133
+
134
+ expect(set).to_not be_frozen
135
+ end
136
+
137
+ it "cannot be re-initialized after compilation" do
138
+ set = RE2::Set.new
139
+ set.add("abc")
140
+ set.compile
141
+
142
+ expect { set.send(:initialize) }.to raise_error(FrozenError)
143
+ end
144
+
124
145
  it "raises an error when called on an uninitialized object" do
125
146
  expect { described_class.allocate.compile }.to raise_error(TypeError, /uninitialized RE2::Set/)
126
147
  end
@@ -226,6 +247,20 @@ RSpec.describe RE2::Set do
226
247
  it "raises an error when called on an uninitialized object" do
227
248
  expect { described_class.allocate.match("foo") }.to raise_error(TypeError, /uninitialized RE2::Set/)
228
249
  end
250
+
251
+ it "can be run concurrently" do
252
+ set = RE2::Set.new
253
+ set.add("abc")
254
+ set.add("def")
255
+ set.add("ghi")
256
+ set.compile
257
+
258
+ threads = 10.times.map do
259
+ Thread.new { set.match("abcdefghi", exception: false) }
260
+ end
261
+
262
+ expect(threads.map(&:value)).to all(eq([0, 1, 2]))
263
+ end
229
264
  end
230
265
 
231
266
  describe "#size" do
data/spec/re2_spec.rb CHANGED
@@ -86,9 +86,33 @@ RSpec.describe RE2 do
86
86
  expect { RE2.replace("woo", 0, "ah") }.to raise_error(TypeError)
87
87
  end
88
88
 
89
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
90
+ expect { RE2.replace("a" * 128, 0, "ah") }.to raise_error(TypeError)
91
+ end
92
+
89
93
  it "raises a Type Error for a replacement that can't be converted to String" do
90
94
  expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
91
95
  end
96
+
97
+ it "can be run concurrently with the same RE2::Regexp pattern" do
98
+ re = RE2::Regexp.new('(\w+)\s(\w+)')
99
+
100
+ threads = 10.times.map do
101
+ Thread.new { RE2.replace("one two", re, '\2 \1') }
102
+ end
103
+
104
+ expect(threads.map(&:value)).to all(eq("two one"))
105
+ end
106
+
107
+ it "can be run concurrently with the same string pattern" do
108
+ re = '(\w+)\s(\w+)'
109
+
110
+ threads = 10.times.map do
111
+ Thread.new { RE2.replace("one two", re, '\2 \1') }
112
+ end
113
+
114
+ expect(threads.map(&:value)).to all(eq("two one"))
115
+ end
92
116
  end
93
117
 
94
118
  describe ".Replace" do
@@ -182,9 +206,33 @@ RSpec.describe RE2 do
182
206
  expect { RE2.global_replace("woo", 0, "a") }.to raise_error(TypeError)
183
207
  end
184
208
 
209
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
210
+ expect { RE2.global_replace("a" * 128, 0, "a") }.to raise_error(TypeError)
211
+ end
212
+
185
213
  it "raises a Type Error for a replacement that can't be converted to String" do
186
214
  expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
187
215
  end
216
+
217
+ it "can be run concurrently with the same RE2::Regexp pattern" do
218
+ re = RE2::Regexp.new('(\w+)\s(\w+)')
219
+
220
+ threads = 10.times.map do
221
+ Thread.new { RE2.global_replace("one two three four", re, '\2 \1') }
222
+ end
223
+
224
+ expect(threads.map(&:value)).to all(eq("two one four three"))
225
+ end
226
+
227
+ it "can be run concurrently with the same string pattern" do
228
+ re = '(\w+)\s(\w+)'
229
+
230
+ threads = 10.times.map do
231
+ Thread.new { RE2.global_replace("one two three four", re, '\2 \1') }
232
+ end
233
+
234
+ expect(threads.map(&:value)).to all(eq("two one four three"))
235
+ end
188
236
  end
189
237
 
190
238
  describe ".GlobalReplace" do
@@ -266,9 +314,33 @@ RSpec.describe RE2 do
266
314
  expect { RE2.extract("woo", 0, '\1') }.to raise_error(TypeError)
267
315
  end
268
316
 
317
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
318
+ expect { RE2.extract("a" * 128, 0, '\1') }.to raise_error(TypeError)
319
+ end
320
+
269
321
  it "raises a Type Error for a rewrite that can't be converted to String" do
270
322
  expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
271
323
  end
324
+
325
+ it "can be run concurrently with the same RE2::Regexp pattern" do
326
+ re = RE2::Regexp.new('(\w+)@(\w+)')
327
+
328
+ threads = 10.times.map do
329
+ Thread.new { RE2.extract("alice@example", re, '\2-\1') }
330
+ end
331
+
332
+ expect(threads.map(&:value)).to all(eq("example-alice"))
333
+ end
334
+
335
+ it "can be run concurrently with the same string pattern" do
336
+ re = '(\w+)@(\w+)'
337
+
338
+ threads = 10.times.map do
339
+ Thread.new { RE2.extract("alice@example", re, '\2-\1') }
340
+ end
341
+
342
+ expect(threads.map(&:value)).to all(eq("example-alice"))
343
+ end
272
344
  end
273
345
 
274
346
  describe "#escape" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.26.1
4
+ version: 2.27.0
5
5
  platform: x86_64-linux-gnu
6
6
  authors:
7
7
  - Paul Mucur