re2 2.26.2-x86_64-linux-gnu → 2.27.0-x86_64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/re2/re2.cc +308 -98
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/4.0/re2.so +0 -0
- data/lib/re2/version.rb +1 -1
- data/spec/re2/regexp_spec.rb +64 -0
- data/spec/re2/set_spec.rb +39 -4
- data/spec/re2_spec.rb +60 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b86c49eb21074e4ef2eaa58dc1234224074621dffff2ac3d7fd2d8c6b4214daf
|
|
4
|
+
data.tar.gz: bf270c0bc858ee0e63d2d3514f7a35c99539455b0dbac7fef145b33a1eb6251a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0fcf3b3b7d9f47754f70996160a26be706afd222f75c181a114e4014c088642cb3b2d185fedc2dc58fc54f2211ac834ac5f7526ff748dd894c4ed8d4eca5ebbf
|
|
7
|
+
data.tar.gz: 2e4d7a22246ea69ebc4d00428f347033d5e2d911e3f46735b662eb1be4be541d31dbe33efbad9165067a284e1c21c2d152a18c824ebb6ecec9f66e5eeb7ab631
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Python".
|
|
|
6
6
|
|
|
7
7
|
[](https://github.com/mudge/re2/actions)
|
|
8
8
|
|
|
9
|
-
**Current version:** 2.
|
|
9
|
+
**Current version:** 2.27.0
|
|
10
10
|
**Bundled RE2 version:** libre2.11 (2025-11-05)
|
|
11
11
|
|
|
12
12
|
```ruby
|
data/ext/re2/re2.cc
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
#include <re2/set.h>
|
|
20
20
|
#include <ruby.h>
|
|
21
21
|
#include <ruby/encoding.h>
|
|
22
|
+
#include <ruby/thread.h>
|
|
22
23
|
|
|
23
24
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
|
24
25
|
|
|
@@ -43,6 +44,132 @@ typedef struct {
|
|
|
43
44
|
RE2::Set *set;
|
|
44
45
|
} re2_set;
|
|
45
46
|
|
|
47
|
+
struct nogvl_match_arg {
|
|
48
|
+
const RE2 *pattern;
|
|
49
|
+
re2::StringPiece text;
|
|
50
|
+
size_t startpos;
|
|
51
|
+
size_t endpos;
|
|
52
|
+
RE2::Anchor anchor;
|
|
53
|
+
re2::StringPiece *matches;
|
|
54
|
+
int n;
|
|
55
|
+
bool matched;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
static void *nogvl_match(void *ptr) {
|
|
59
|
+
auto *arg = static_cast<nogvl_match_arg *>(ptr);
|
|
60
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
61
|
+
arg->matched = arg->pattern->Match(
|
|
62
|
+
arg->text, arg->startpos, arg->endpos,
|
|
63
|
+
arg->anchor, arg->matches, arg->n);
|
|
64
|
+
#else
|
|
65
|
+
arg->matched = arg->pattern->Match(
|
|
66
|
+
arg->text, arg->startpos,
|
|
67
|
+
arg->anchor, arg->matches, arg->n);
|
|
68
|
+
#endif
|
|
69
|
+
return nullptr;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static bool re2_match_without_gvl(
|
|
73
|
+
const RE2 *pattern, VALUE text, size_t startpos, size_t endpos,
|
|
74
|
+
RE2::Anchor anchor, re2::StringPiece *matches, int n) {
|
|
75
|
+
nogvl_match_arg arg;
|
|
76
|
+
arg.pattern = pattern;
|
|
77
|
+
arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
|
|
78
|
+
arg.startpos = startpos;
|
|
79
|
+
arg.endpos = endpos;
|
|
80
|
+
arg.anchor = anchor;
|
|
81
|
+
arg.matches = matches;
|
|
82
|
+
arg.n = n;
|
|
83
|
+
arg.matched = false;
|
|
84
|
+
|
|
85
|
+
/* Abseil's synchronization primitives (SRWLOCK, SleepConditionVariableSRW)
|
|
86
|
+
* are incompatible with Ruby's Win32 Mutex-based GVL, causing
|
|
87
|
+
* WAIT_ABANDONED crashes when multiple threads match concurrently.
|
|
88
|
+
*/
|
|
89
|
+
#ifdef _WIN32
|
|
90
|
+
nogvl_match(&arg);
|
|
91
|
+
#else
|
|
92
|
+
/* No unblocking function is needed: RE2 matching is CPU-bound computation,
|
|
93
|
+
* not a blocking system call, so a signal cannot safely interrupt it.
|
|
94
|
+
*/
|
|
95
|
+
rb_thread_call_without_gvl(nogvl_match, &arg, NULL, NULL);
|
|
96
|
+
#endif
|
|
97
|
+
|
|
98
|
+
return arg.matched;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
struct nogvl_set_match_arg {
|
|
102
|
+
const RE2::Set *set;
|
|
103
|
+
re2::StringPiece text;
|
|
104
|
+
std::vector<int> *v;
|
|
105
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
106
|
+
RE2::Set::ErrorInfo *error_info;
|
|
107
|
+
#endif
|
|
108
|
+
bool matched;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
static void *nogvl_set_match(void *ptr) {
|
|
112
|
+
auto *arg = static_cast<nogvl_set_match_arg *>(ptr);
|
|
113
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
114
|
+
if (arg->error_info) {
|
|
115
|
+
arg->matched = arg->set->Match(arg->text, arg->v, arg->error_info);
|
|
116
|
+
} else {
|
|
117
|
+
arg->matched = arg->set->Match(arg->text, arg->v);
|
|
118
|
+
}
|
|
119
|
+
#else
|
|
120
|
+
arg->matched = arg->set->Match(arg->text, arg->v);
|
|
121
|
+
#endif
|
|
122
|
+
return nullptr;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
struct nogvl_replace_arg {
|
|
126
|
+
std::string *str;
|
|
127
|
+
const RE2 *pattern;
|
|
128
|
+
re2::StringPiece string_pattern;
|
|
129
|
+
re2::StringPiece rewrite;
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
static void *nogvl_replace(void *ptr) {
|
|
133
|
+
auto *arg = static_cast<nogvl_replace_arg *>(ptr);
|
|
134
|
+
if (arg->pattern) {
|
|
135
|
+
RE2::Replace(arg->str, *arg->pattern, arg->rewrite);
|
|
136
|
+
} else {
|
|
137
|
+
RE2::Replace(arg->str, arg->string_pattern, arg->rewrite);
|
|
138
|
+
}
|
|
139
|
+
return nullptr;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static void *nogvl_global_replace(void *ptr) {
|
|
143
|
+
auto *arg = static_cast<nogvl_replace_arg *>(ptr);
|
|
144
|
+
if (arg->pattern) {
|
|
145
|
+
RE2::GlobalReplace(arg->str, *arg->pattern, arg->rewrite);
|
|
146
|
+
} else {
|
|
147
|
+
RE2::GlobalReplace(arg->str, arg->string_pattern, arg->rewrite);
|
|
148
|
+
}
|
|
149
|
+
return nullptr;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
struct nogvl_extract_arg {
|
|
153
|
+
re2::StringPiece text;
|
|
154
|
+
const RE2 *pattern;
|
|
155
|
+
re2::StringPiece string_pattern;
|
|
156
|
+
re2::StringPiece rewrite;
|
|
157
|
+
std::string *out;
|
|
158
|
+
bool extracted;
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
static void *nogvl_extract(void *ptr) {
|
|
162
|
+
auto *arg = static_cast<nogvl_extract_arg *>(ptr);
|
|
163
|
+
if (arg->pattern) {
|
|
164
|
+
arg->extracted = RE2::Extract(arg->text, *arg->pattern,
|
|
165
|
+
arg->rewrite, arg->out);
|
|
166
|
+
} else {
|
|
167
|
+
arg->extracted = RE2::Extract(arg->text, RE2(arg->string_pattern),
|
|
168
|
+
arg->rewrite, arg->out);
|
|
169
|
+
}
|
|
170
|
+
return nullptr;
|
|
171
|
+
}
|
|
172
|
+
|
|
46
173
|
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
|
47
174
|
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
|
48
175
|
|
|
@@ -126,7 +253,7 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
|
126
253
|
}
|
|
127
254
|
|
|
128
255
|
static void re2_matchdata_mark(void *ptr) {
|
|
129
|
-
re2_matchdata *m =
|
|
256
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
130
257
|
rb_gc_mark_movable(m->regexp);
|
|
131
258
|
|
|
132
259
|
/* Text must not be movable because StringPiece matches hold pointers into
|
|
@@ -136,12 +263,12 @@ static void re2_matchdata_mark(void *ptr) {
|
|
|
136
263
|
}
|
|
137
264
|
|
|
138
265
|
static void re2_matchdata_compact(void *ptr) {
|
|
139
|
-
re2_matchdata *m =
|
|
266
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
140
267
|
m->regexp = rb_gc_location(m->regexp);
|
|
141
268
|
}
|
|
142
269
|
|
|
143
270
|
static void re2_matchdata_free(void *ptr) {
|
|
144
|
-
re2_matchdata *m =
|
|
271
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
145
272
|
if (m->matches) {
|
|
146
273
|
delete[] m->matches;
|
|
147
274
|
}
|
|
@@ -149,7 +276,7 @@ static void re2_matchdata_free(void *ptr) {
|
|
|
149
276
|
}
|
|
150
277
|
|
|
151
278
|
static size_t re2_matchdata_memsize(const void *ptr) {
|
|
152
|
-
const re2_matchdata *m =
|
|
279
|
+
const re2_matchdata *m = static_cast<const re2_matchdata *>(ptr);
|
|
153
280
|
size_t size = sizeof(*m);
|
|
154
281
|
if (m->matches) {
|
|
155
282
|
size += sizeof(*m->matches) * m->number_of_matches;
|
|
@@ -174,7 +301,7 @@ static const rb_data_type_t re2_matchdata_data_type = {
|
|
|
174
301
|
};
|
|
175
302
|
|
|
176
303
|
static void re2_scanner_mark(void *ptr) {
|
|
177
|
-
re2_scanner *s =
|
|
304
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
178
305
|
rb_gc_mark_movable(s->regexp);
|
|
179
306
|
|
|
180
307
|
/* Text must not be movable because the StringPiece input holds a pointer
|
|
@@ -184,12 +311,12 @@ static void re2_scanner_mark(void *ptr) {
|
|
|
184
311
|
}
|
|
185
312
|
|
|
186
313
|
static void re2_scanner_compact(void *ptr) {
|
|
187
|
-
re2_scanner *s =
|
|
314
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
188
315
|
s->regexp = rb_gc_location(s->regexp);
|
|
189
316
|
}
|
|
190
317
|
|
|
191
318
|
static void re2_scanner_free(void *ptr) {
|
|
192
|
-
re2_scanner *s =
|
|
319
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
193
320
|
if (s->input) {
|
|
194
321
|
delete s->input;
|
|
195
322
|
}
|
|
@@ -197,7 +324,7 @@ static void re2_scanner_free(void *ptr) {
|
|
|
197
324
|
}
|
|
198
325
|
|
|
199
326
|
static size_t re2_scanner_memsize(const void *ptr) {
|
|
200
|
-
const re2_scanner *s =
|
|
327
|
+
const re2_scanner *s = static_cast<const re2_scanner *>(ptr);
|
|
201
328
|
size_t size = sizeof(*s);
|
|
202
329
|
if (s->input) {
|
|
203
330
|
size += sizeof(*s->input);
|
|
@@ -222,7 +349,7 @@ static const rb_data_type_t re2_scanner_data_type = {
|
|
|
222
349
|
};
|
|
223
350
|
|
|
224
351
|
static void re2_regexp_free(void *ptr) {
|
|
225
|
-
re2_pattern *p =
|
|
352
|
+
re2_pattern *p = static_cast<re2_pattern *>(ptr);
|
|
226
353
|
if (p->pattern) {
|
|
227
354
|
delete p->pattern;
|
|
228
355
|
}
|
|
@@ -230,7 +357,7 @@ static void re2_regexp_free(void *ptr) {
|
|
|
230
357
|
}
|
|
231
358
|
|
|
232
359
|
static size_t re2_regexp_memsize(const void *ptr) {
|
|
233
|
-
const re2_pattern *p =
|
|
360
|
+
const re2_pattern *p = static_cast<const re2_pattern *>(ptr);
|
|
234
361
|
size_t size = sizeof(*p);
|
|
235
362
|
if (p->pattern) {
|
|
236
363
|
size += sizeof(*p->pattern);
|
|
@@ -250,7 +377,7 @@ static const rb_data_type_t re2_regexp_data_type = {
|
|
|
250
377
|
0,
|
|
251
378
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
|
252
379
|
// macro to update VALUE references, as to trigger write barriers.
|
|
253
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
380
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
|
|
254
381
|
};
|
|
255
382
|
|
|
256
383
|
static re2_pattern *unwrap_re2_regexp(VALUE self) {
|
|
@@ -1249,6 +1376,8 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1249
1376
|
|
|
1250
1377
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1251
1378
|
|
|
1379
|
+
rb_check_frozen(self);
|
|
1380
|
+
|
|
1252
1381
|
if (p->pattern) {
|
|
1253
1382
|
delete p->pattern;
|
|
1254
1383
|
p->pattern = nullptr;
|
|
@@ -1269,6 +1398,8 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1269
1398
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1270
1399
|
}
|
|
1271
1400
|
|
|
1401
|
+
rb_obj_freeze(self);
|
|
1402
|
+
|
|
1272
1403
|
return self;
|
|
1273
1404
|
}
|
|
1274
1405
|
|
|
@@ -1278,6 +1409,8 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
|
1278
1409
|
|
|
1279
1410
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
|
|
1280
1411
|
|
|
1412
|
+
rb_check_frozen(self);
|
|
1413
|
+
|
|
1281
1414
|
if (self_p->pattern) {
|
|
1282
1415
|
delete self_p->pattern;
|
|
1283
1416
|
self_p->pattern = nullptr;
|
|
@@ -1289,6 +1422,8 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
|
1289
1422
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1290
1423
|
}
|
|
1291
1424
|
|
|
1425
|
+
rb_obj_freeze(self);
|
|
1426
|
+
|
|
1292
1427
|
return self;
|
|
1293
1428
|
}
|
|
1294
1429
|
|
|
@@ -1753,8 +1888,9 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1753
1888
|
|
|
1754
1889
|
rb_scan_args(argc, argv, "11", &text, &options);
|
|
1755
1890
|
|
|
1756
|
-
/*
|
|
1891
|
+
/* Coerce and freeze text to prevent mutation. */
|
|
1757
1892
|
StringValue(text);
|
|
1893
|
+
text = rb_str_new_frozen(text);
|
|
1758
1894
|
|
|
1759
1895
|
p = unwrap_re2_regexp(self);
|
|
1760
1896
|
|
|
@@ -1852,15 +1988,10 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1852
1988
|
#endif
|
|
1853
1989
|
|
|
1854
1990
|
if (n == 0) {
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
#else
|
|
1860
|
-
bool matched = p->pattern->Match(
|
|
1861
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1862
|
-
startpos, anchor, 0, 0);
|
|
1863
|
-
#endif
|
|
1991
|
+
bool matched = re2_match_without_gvl(
|
|
1992
|
+
p->pattern, text, startpos, endpos, anchor, 0, 0);
|
|
1993
|
+
RB_GC_GUARD(text);
|
|
1994
|
+
|
|
1864
1995
|
return BOOL2RUBY(matched);
|
|
1865
1996
|
} else {
|
|
1866
1997
|
if (n == INT_MAX) {
|
|
@@ -1876,17 +2007,10 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1876
2007
|
"not enough memory to allocate StringPieces for matches");
|
|
1877
2008
|
}
|
|
1878
2009
|
|
|
1879
|
-
|
|
2010
|
+
bool matched = re2_match_without_gvl(
|
|
2011
|
+
p->pattern, text, startpos, endpos, anchor, matches, n);
|
|
2012
|
+
RB_GC_GUARD(text);
|
|
1880
2013
|
|
|
1881
|
-
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1882
|
-
bool matched = p->pattern->Match(
|
|
1883
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1884
|
-
startpos, endpos, anchor, matches, n);
|
|
1885
|
-
#else
|
|
1886
|
-
bool matched = p->pattern->Match(
|
|
1887
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1888
|
-
startpos, anchor, matches, n);
|
|
1889
|
-
#endif
|
|
1890
2014
|
if (matched) {
|
|
1891
2015
|
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
|
1892
2016
|
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
|
|
@@ -1915,13 +2039,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1915
2039
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1916
2040
|
*/
|
|
1917
2041
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
1918
|
-
/* Ensure text is a string. */
|
|
1919
2042
|
StringValue(text);
|
|
2043
|
+
text = rb_str_new_frozen(text);
|
|
1920
2044
|
|
|
1921
2045
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
2046
|
+
bool matched = re2_match_without_gvl(
|
|
2047
|
+
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
|
2048
|
+
RB_GC_GUARD(text);
|
|
1922
2049
|
|
|
1923
|
-
return BOOL2RUBY(
|
|
1924
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
2050
|
+
return BOOL2RUBY(matched);
|
|
1925
2051
|
}
|
|
1926
2052
|
|
|
1927
2053
|
/*
|
|
@@ -1934,13 +2060,15 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
|
1934
2060
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1935
2061
|
*/
|
|
1936
2062
|
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
1937
|
-
/* Ensure text is a string. */
|
|
1938
2063
|
StringValue(text);
|
|
2064
|
+
text = rb_str_new_frozen(text);
|
|
1939
2065
|
|
|
1940
2066
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
2067
|
+
bool matched = re2_match_without_gvl(
|
|
2068
|
+
p->pattern, text, 0, RSTRING_LEN(text), RE2::ANCHOR_BOTH, 0, 0);
|
|
2069
|
+
RB_GC_GUARD(text);
|
|
1941
2070
|
|
|
1942
|
-
return BOOL2RUBY(
|
|
1943
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
2071
|
+
return BOOL2RUBY(matched);
|
|
1944
2072
|
}
|
|
1945
2073
|
|
|
1946
2074
|
/*
|
|
@@ -1956,8 +2084,8 @@ static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
|
1956
2084
|
* #=> #<RE2::Scanner:0x0000000000000001>
|
|
1957
2085
|
*/
|
|
1958
2086
|
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1959
|
-
/* Ensure text is a string. */
|
|
1960
2087
|
StringValue(text);
|
|
2088
|
+
text = rb_str_new_frozen(text);
|
|
1961
2089
|
|
|
1962
2090
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1963
2091
|
re2_scanner *c;
|
|
@@ -1965,7 +2093,7 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
|
1965
2093
|
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
|
1966
2094
|
|
|
1967
2095
|
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
|
1968
|
-
RB_OBJ_WRITE(scanner, &c->text,
|
|
2096
|
+
RB_OBJ_WRITE(scanner, &c->text, text);
|
|
1969
2097
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
1970
2098
|
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
|
1971
2099
|
if (c->input == nullptr) {
|
|
@@ -2028,35 +2156,51 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2028
2156
|
VALUE rewrite) {
|
|
2029
2157
|
re2_pattern *p = nullptr;
|
|
2030
2158
|
|
|
2031
|
-
/* Coerce all arguments before any C++ allocations so that any
|
|
2032
|
-
* exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2159
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2160
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2161
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2033
2162
|
*/
|
|
2034
2163
|
StringValue(str);
|
|
2164
|
+
str = rb_str_new_frozen(str);
|
|
2035
2165
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2036
2166
|
p = unwrap_re2_regexp(pattern);
|
|
2037
2167
|
} else {
|
|
2038
2168
|
StringValue(pattern);
|
|
2169
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2039
2170
|
}
|
|
2040
2171
|
StringValue(rewrite);
|
|
2172
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2041
2173
|
|
|
2042
|
-
/* Take a copy of str so it can be modified in-place by
|
|
2043
|
-
* RE2::Replace.
|
|
2044
|
-
*/
|
|
2174
|
+
/* Take a copy of str so it can be modified in-place by RE2::Replace. */
|
|
2045
2175
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2046
2176
|
|
|
2047
|
-
|
|
2177
|
+
nogvl_replace_arg arg;
|
|
2178
|
+
arg.str = &str_as_string;
|
|
2048
2179
|
if (p) {
|
|
2049
|
-
|
|
2050
|
-
|
|
2180
|
+
arg.pattern = p->pattern;
|
|
2181
|
+
} else {
|
|
2182
|
+
arg.pattern = nullptr;
|
|
2183
|
+
arg.string_pattern = re2::StringPiece(
|
|
2184
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2185
|
+
}
|
|
2186
|
+
arg.rewrite = re2::StringPiece(
|
|
2187
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2051
2188
|
|
|
2189
|
+
#ifdef _WIN32
|
|
2190
|
+
nogvl_replace(&arg);
|
|
2191
|
+
#else
|
|
2192
|
+
rb_thread_call_without_gvl(nogvl_replace, &arg, NULL, NULL);
|
|
2193
|
+
#endif
|
|
2194
|
+
|
|
2195
|
+
RB_GC_GUARD(rewrite);
|
|
2196
|
+
RB_GC_GUARD(pattern);
|
|
2197
|
+
|
|
2198
|
+
if (p) {
|
|
2052
2199
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2053
2200
|
p->pattern->options().encoding());
|
|
2054
2201
|
} else {
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2058
|
-
|
|
2059
|
-
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
|
2202
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2203
|
+
RE2::Options::EncodingUTF8);
|
|
2060
2204
|
}
|
|
2061
2205
|
}
|
|
2062
2206
|
|
|
@@ -2084,35 +2228,53 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2084
2228
|
VALUE rewrite) {
|
|
2085
2229
|
re2_pattern *p = nullptr;
|
|
2086
2230
|
|
|
2087
|
-
/* Coerce all arguments before any C++ allocations so that any
|
|
2088
|
-
* exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2231
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2232
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2233
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2089
2234
|
*/
|
|
2090
2235
|
StringValue(str);
|
|
2236
|
+
str = rb_str_new_frozen(str);
|
|
2091
2237
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2092
2238
|
p = unwrap_re2_regexp(pattern);
|
|
2093
2239
|
} else {
|
|
2094
2240
|
StringValue(pattern);
|
|
2241
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2095
2242
|
}
|
|
2096
2243
|
StringValue(rewrite);
|
|
2244
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2097
2245
|
|
|
2098
2246
|
/* Take a copy of str so it can be modified in-place by
|
|
2099
2247
|
* RE2::GlobalReplace.
|
|
2100
2248
|
*/
|
|
2101
2249
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2102
2250
|
|
|
2103
|
-
|
|
2251
|
+
nogvl_replace_arg arg;
|
|
2252
|
+
arg.str = &str_as_string;
|
|
2104
2253
|
if (p) {
|
|
2105
|
-
|
|
2106
|
-
|
|
2254
|
+
arg.pattern = p->pattern;
|
|
2255
|
+
} else {
|
|
2256
|
+
arg.pattern = nullptr;
|
|
2257
|
+
arg.string_pattern = re2::StringPiece(
|
|
2258
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2259
|
+
}
|
|
2260
|
+
arg.rewrite = re2::StringPiece(
|
|
2261
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2107
2262
|
|
|
2263
|
+
#ifdef _WIN32
|
|
2264
|
+
nogvl_global_replace(&arg);
|
|
2265
|
+
#else
|
|
2266
|
+
rb_thread_call_without_gvl(nogvl_global_replace, &arg, NULL, NULL);
|
|
2267
|
+
#endif
|
|
2268
|
+
|
|
2269
|
+
RB_GC_GUARD(rewrite);
|
|
2270
|
+
RB_GC_GUARD(pattern);
|
|
2271
|
+
|
|
2272
|
+
if (p) {
|
|
2108
2273
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2109
2274
|
p->pattern->options().encoding());
|
|
2110
2275
|
} else {
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2114
|
-
|
|
2115
|
-
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
|
2276
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2277
|
+
RE2::Options::EncodingUTF8);
|
|
2116
2278
|
}
|
|
2117
2279
|
}
|
|
2118
2280
|
|
|
@@ -2142,46 +2304,53 @@ static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
|
2142
2304
|
VALUE rewrite) {
|
|
2143
2305
|
re2_pattern *p = nullptr;
|
|
2144
2306
|
|
|
2145
|
-
/* Coerce all arguments before any C++ allocations so that any
|
|
2146
|
-
* exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2307
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2308
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2309
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2147
2310
|
*/
|
|
2148
2311
|
StringValue(text);
|
|
2312
|
+
text = rb_str_new_frozen(text);
|
|
2149
2313
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2150
2314
|
p = unwrap_re2_regexp(pattern);
|
|
2151
2315
|
} else {
|
|
2152
2316
|
StringValue(pattern);
|
|
2317
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2153
2318
|
}
|
|
2154
2319
|
StringValue(rewrite);
|
|
2320
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2155
2321
|
|
|
2156
2322
|
std::string out;
|
|
2157
|
-
bool extracted;
|
|
2158
2323
|
|
|
2324
|
+
nogvl_extract_arg arg;
|
|
2325
|
+
arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
|
|
2159
2326
|
if (p) {
|
|
2160
|
-
|
|
2161
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2162
|
-
*p->pattern,
|
|
2163
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2164
|
-
&out);
|
|
2165
|
-
|
|
2166
|
-
if (extracted) {
|
|
2167
|
-
return encoded_str_new(out.data(), out.size(),
|
|
2168
|
-
p->pattern->options().encoding());
|
|
2169
|
-
} else {
|
|
2170
|
-
return Qnil;
|
|
2171
|
-
}
|
|
2327
|
+
arg.pattern = p->pattern;
|
|
2172
2328
|
} else {
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2329
|
+
arg.pattern = nullptr;
|
|
2330
|
+
arg.string_pattern = re2::StringPiece(
|
|
2331
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2332
|
+
}
|
|
2333
|
+
arg.rewrite = re2::StringPiece(
|
|
2334
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2335
|
+
arg.out = &out;
|
|
2336
|
+
arg.extracted = false;
|
|
2337
|
+
|
|
2338
|
+
#ifdef _WIN32
|
|
2339
|
+
nogvl_extract(&arg);
|
|
2340
|
+
#else
|
|
2341
|
+
rb_thread_call_without_gvl(nogvl_extract, &arg, NULL, NULL);
|
|
2342
|
+
#endif
|
|
2343
|
+
|
|
2344
|
+
RB_GC_GUARD(text);
|
|
2345
|
+
RB_GC_GUARD(rewrite);
|
|
2346
|
+
RB_GC_GUARD(pattern);
|
|
2347
|
+
|
|
2348
|
+
if (arg.extracted) {
|
|
2349
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2350
|
+
p ? p->pattern->options().encoding()
|
|
2351
|
+
: RE2::Options::EncodingUTF8);
|
|
2352
|
+
} else {
|
|
2353
|
+
return Qnil;
|
|
2185
2354
|
}
|
|
2186
2355
|
}
|
|
2187
2356
|
|
|
@@ -2211,7 +2380,7 @@ static VALUE re2_escape(VALUE, VALUE unquoted) {
|
|
|
2211
2380
|
}
|
|
2212
2381
|
|
|
2213
2382
|
static void re2_set_free(void *ptr) {
|
|
2214
|
-
re2_set *s =
|
|
2383
|
+
re2_set *s = static_cast<re2_set *>(ptr);
|
|
2215
2384
|
if (s->set) {
|
|
2216
2385
|
delete s->set;
|
|
2217
2386
|
}
|
|
@@ -2219,7 +2388,7 @@ static void re2_set_free(void *ptr) {
|
|
|
2219
2388
|
}
|
|
2220
2389
|
|
|
2221
2390
|
static size_t re2_set_memsize(const void *ptr) {
|
|
2222
|
-
const re2_set *s =
|
|
2391
|
+
const re2_set *s = static_cast<const re2_set *>(ptr);
|
|
2223
2392
|
size_t size = sizeof(*s);
|
|
2224
2393
|
if (s->set) {
|
|
2225
2394
|
size += sizeof(*s->set);
|
|
@@ -2239,7 +2408,7 @@ static const rb_data_type_t re2_set_data_type = {
|
|
|
2239
2408
|
0,
|
|
2240
2409
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
|
2241
2410
|
// macro to update VALUE references, as to trigger write barriers.
|
|
2242
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
2411
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
|
|
2243
2412
|
};
|
|
2244
2413
|
|
|
2245
2414
|
static re2_set *unwrap_re2_set(VALUE self) {
|
|
@@ -2332,6 +2501,8 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
2332
2501
|
parse_re2_options(&re2_options, options);
|
|
2333
2502
|
}
|
|
2334
2503
|
|
|
2504
|
+
rb_check_frozen(self);
|
|
2505
|
+
|
|
2335
2506
|
if (s->set) {
|
|
2336
2507
|
delete s->set;
|
|
2337
2508
|
s->set = nullptr;
|
|
@@ -2362,6 +2533,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
|
2362
2533
|
StringValue(pattern);
|
|
2363
2534
|
|
|
2364
2535
|
re2_set *s = unwrap_re2_set(self);
|
|
2536
|
+
rb_check_frozen(self);
|
|
2365
2537
|
|
|
2366
2538
|
int index;
|
|
2367
2539
|
VALUE msg;
|
|
@@ -2393,8 +2565,15 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
|
2393
2565
|
*/
|
|
2394
2566
|
static VALUE re2_set_compile(VALUE self) {
|
|
2395
2567
|
re2_set *s = unwrap_re2_set(self);
|
|
2568
|
+
rb_check_frozen(self);
|
|
2569
|
+
|
|
2570
|
+
bool compiled = s->set->Compile();
|
|
2396
2571
|
|
|
2397
|
-
|
|
2572
|
+
if (compiled) {
|
|
2573
|
+
rb_obj_freeze(self);
|
|
2574
|
+
}
|
|
2575
|
+
|
|
2576
|
+
return BOOL2RUBY(compiled);
|
|
2398
2577
|
}
|
|
2399
2578
|
|
|
2400
2579
|
/*
|
|
@@ -2491,6 +2670,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2491
2670
|
rb_scan_args(argc, argv, "11", &str, &options);
|
|
2492
2671
|
|
|
2493
2672
|
StringValue(str);
|
|
2673
|
+
str = rb_str_new_frozen(str);
|
|
2674
|
+
|
|
2494
2675
|
re2_set *s = unwrap_re2_set(self);
|
|
2495
2676
|
|
|
2496
2677
|
if (RTEST(options)) {
|
|
@@ -2507,8 +2688,21 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2507
2688
|
if (raise_exception) {
|
|
2508
2689
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
2509
2690
|
RE2::Set::ErrorInfo e;
|
|
2510
|
-
|
|
2511
|
-
|
|
2691
|
+
nogvl_set_match_arg arg;
|
|
2692
|
+
arg.set = s->set;
|
|
2693
|
+
arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2694
|
+
arg.v = &v;
|
|
2695
|
+
arg.error_info = &e;
|
|
2696
|
+
arg.matched = false;
|
|
2697
|
+
|
|
2698
|
+
#ifdef _WIN32
|
|
2699
|
+
nogvl_set_match(&arg);
|
|
2700
|
+
#else
|
|
2701
|
+
rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
|
|
2702
|
+
#endif
|
|
2703
|
+
RB_GC_GUARD(str);
|
|
2704
|
+
|
|
2705
|
+
bool match_failed = !arg.matched;
|
|
2512
2706
|
VALUE result = rb_ary_new2(v.size());
|
|
2513
2707
|
|
|
2514
2708
|
if (match_failed) {
|
|
@@ -2535,11 +2729,25 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2535
2729
|
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
|
2536
2730
|
#endif
|
|
2537
2731
|
} else {
|
|
2538
|
-
|
|
2539
|
-
|
|
2732
|
+
nogvl_set_match_arg arg;
|
|
2733
|
+
arg.set = s->set;
|
|
2734
|
+
arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2735
|
+
arg.v = &v;
|
|
2736
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
2737
|
+
arg.error_info = nullptr;
|
|
2738
|
+
#endif
|
|
2739
|
+
arg.matched = false;
|
|
2740
|
+
|
|
2741
|
+
#ifdef _WIN32
|
|
2742
|
+
nogvl_set_match(&arg);
|
|
2743
|
+
#else
|
|
2744
|
+
rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
|
|
2745
|
+
#endif
|
|
2746
|
+
RB_GC_GUARD(str);
|
|
2747
|
+
|
|
2540
2748
|
VALUE result = rb_ary_new2(v.size());
|
|
2541
2749
|
|
|
2542
|
-
if (matched) {
|
|
2750
|
+
if (arg.matched) {
|
|
2543
2751
|
for (int index : v) {
|
|
2544
2752
|
rb_ary_push(result, INT2FIX(index));
|
|
2545
2753
|
}
|
|
@@ -2550,6 +2758,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2550
2758
|
}
|
|
2551
2759
|
|
|
2552
2760
|
extern "C" void Init_re2(void) {
|
|
2761
|
+
rb_ext_ractor_safe(true);
|
|
2762
|
+
|
|
2553
2763
|
re2_mRE2 = rb_define_module("RE2");
|
|
2554
2764
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
|
2555
2765
|
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
data/lib/3.1/re2.so
CHANGED
|
Binary file
|
data/lib/3.2/re2.so
CHANGED
|
Binary file
|
data/lib/3.3/re2.so
CHANGED
|
Binary file
|
data/lib/3.4/re2.so
CHANGED
|
Binary file
|
data/lib/4.0/re2.so
CHANGED
|
Binary file
|
data/lib/re2/version.rb
CHANGED
data/spec/re2/regexp_spec.rb
CHANGED
|
@@ -39,6 +39,16 @@ RSpec.describe RE2::Regexp do
|
|
|
39
39
|
|
|
40
40
|
expect(re).to be_a(RE2::Regexp)
|
|
41
41
|
end
|
|
42
|
+
|
|
43
|
+
it "returns a frozen object" do
|
|
44
|
+
expect(RE2::Regexp.new('woo')).to be_frozen
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "cannot be re-initialized" do
|
|
48
|
+
re = RE2::Regexp.new('woo')
|
|
49
|
+
|
|
50
|
+
expect { re.send(:initialize, 'bar') }.to raise_error(FrozenError)
|
|
51
|
+
end
|
|
42
52
|
end
|
|
43
53
|
|
|
44
54
|
describe "#dup" do
|
|
@@ -70,6 +80,13 @@ RSpec.describe RE2::Regexp do
|
|
|
70
80
|
expect(copy).to_not be_case_sensitive
|
|
71
81
|
end
|
|
72
82
|
|
|
83
|
+
it "returns a frozen copy" do
|
|
84
|
+
re = described_class.new('(\d+)')
|
|
85
|
+
copy = re.dup
|
|
86
|
+
|
|
87
|
+
expect(copy).to be_frozen
|
|
88
|
+
end
|
|
89
|
+
|
|
73
90
|
it "raises an error when called on an uninitialized object" do
|
|
74
91
|
expect { described_class.allocate.dup }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
75
92
|
end
|
|
@@ -83,6 +100,13 @@ RSpec.describe RE2::Regexp do
|
|
|
83
100
|
expect(copy.to_s).to eq('woo')
|
|
84
101
|
end
|
|
85
102
|
|
|
103
|
+
it "returns a frozen copy" do
|
|
104
|
+
re = described_class.new('woo')
|
|
105
|
+
copy = re.clone
|
|
106
|
+
|
|
107
|
+
expect(copy).to be_frozen
|
|
108
|
+
end
|
|
109
|
+
|
|
86
110
|
it "raises an error when called on an uninitialized object" do
|
|
87
111
|
expect { described_class.allocate.clone }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
88
112
|
end
|
|
@@ -769,6 +793,16 @@ RSpec.describe RE2::Regexp do
|
|
|
769
793
|
it "raises an error when called on an uninitialized object" do
|
|
770
794
|
expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
771
795
|
end
|
|
796
|
+
|
|
797
|
+
it "can be run concurrently" do
|
|
798
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
799
|
+
|
|
800
|
+
threads = 10.times.map do
|
|
801
|
+
Thread.new { re.match("one two").values_at(1, 2) }
|
|
802
|
+
end
|
|
803
|
+
|
|
804
|
+
expect(threads.map(&:value)).to all(eq(["one", "two"]))
|
|
805
|
+
end
|
|
772
806
|
end
|
|
773
807
|
|
|
774
808
|
describe "#match?" do
|
|
@@ -794,6 +828,16 @@ RSpec.describe RE2::Regexp do
|
|
|
794
828
|
it "raises an error when called on an uninitialized object" do
|
|
795
829
|
expect { described_class.allocate.match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
796
830
|
end
|
|
831
|
+
|
|
832
|
+
it "can be run concurrently" do
|
|
833
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
834
|
+
|
|
835
|
+
threads = 10.times.map do
|
|
836
|
+
Thread.new { re.match?("one two") }
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
expect(threads.map(&:value)).to all(eq(true))
|
|
840
|
+
end
|
|
797
841
|
end
|
|
798
842
|
|
|
799
843
|
describe "#partial_match?" do
|
|
@@ -826,6 +870,16 @@ RSpec.describe RE2::Regexp do
|
|
|
826
870
|
it "raises an error when called on an uninitialized object" do
|
|
827
871
|
expect { described_class.allocate.partial_match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
828
872
|
end
|
|
873
|
+
|
|
874
|
+
it "can be run concurrently" do
|
|
875
|
+
re = RE2::Regexp.new('(\d+)')
|
|
876
|
+
|
|
877
|
+
threads = 10.times.map do
|
|
878
|
+
Thread.new { re.partial_match?("alice 123") }
|
|
879
|
+
end
|
|
880
|
+
|
|
881
|
+
expect(threads.map(&:value)).to all(eq(true))
|
|
882
|
+
end
|
|
829
883
|
end
|
|
830
884
|
|
|
831
885
|
describe "#=~" do
|
|
@@ -915,6 +969,16 @@ RSpec.describe RE2::Regexp do
|
|
|
915
969
|
it "raises an error when called on an uninitialized object" do
|
|
916
970
|
expect { described_class.allocate.full_match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
917
971
|
end
|
|
972
|
+
|
|
973
|
+
it "can be run concurrently" do
|
|
974
|
+
re = RE2::Regexp.new('(\w+) (\d+)')
|
|
975
|
+
|
|
976
|
+
threads = 10.times.map do
|
|
977
|
+
Thread.new { re.full_match?("alice 123") }
|
|
978
|
+
end
|
|
979
|
+
|
|
980
|
+
expect(threads.map(&:value)).to all(eq(true))
|
|
981
|
+
end
|
|
918
982
|
end
|
|
919
983
|
|
|
920
984
|
describe "#ok?" do
|
data/spec/re2/set_spec.rb
CHANGED
|
@@ -84,14 +84,12 @@ RSpec.describe RE2::Set do
|
|
|
84
84
|
expect { set.add("(?P<#{'o' * 200}") }.to raise_error(ArgumentError, "str rejected by RE2::Set->Add(): invalid named capture group: (?P<oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo")
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
-
it "raises
|
|
87
|
+
it "raises a FrozenError if called after #compile" do
|
|
88
88
|
set = RE2::Set.new(:unanchored, log_errors: false)
|
|
89
89
|
set.add("abc")
|
|
90
90
|
set.compile
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
expect { set.add("def") }.to raise_error(ArgumentError)
|
|
94
|
-
end
|
|
92
|
+
expect { set.add("def") }.to raise_error(FrozenError)
|
|
95
93
|
end
|
|
96
94
|
|
|
97
95
|
it "raises an error if given a pattern that can't be coerced to a String" do
|
|
@@ -121,6 +119,29 @@ RSpec.describe RE2::Set do
|
|
|
121
119
|
expect(set.compile).to be_truthy
|
|
122
120
|
end
|
|
123
121
|
|
|
122
|
+
it "freezes the set on successful compilation" do
|
|
123
|
+
set = RE2::Set.new
|
|
124
|
+
set.add("abc")
|
|
125
|
+
set.compile
|
|
126
|
+
|
|
127
|
+
expect(set).to be_frozen
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "is not frozen before compilation" do
|
|
131
|
+
set = RE2::Set.new
|
|
132
|
+
set.add("abc")
|
|
133
|
+
|
|
134
|
+
expect(set).to_not be_frozen
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
it "cannot be re-initialized after compilation" do
|
|
138
|
+
set = RE2::Set.new
|
|
139
|
+
set.add("abc")
|
|
140
|
+
set.compile
|
|
141
|
+
|
|
142
|
+
expect { set.send(:initialize) }.to raise_error(FrozenError)
|
|
143
|
+
end
|
|
144
|
+
|
|
124
145
|
it "raises an error when called on an uninitialized object" do
|
|
125
146
|
expect { described_class.allocate.compile }.to raise_error(TypeError, /uninitialized RE2::Set/)
|
|
126
147
|
end
|
|
@@ -226,6 +247,20 @@ RSpec.describe RE2::Set do
|
|
|
226
247
|
it "raises an error when called on an uninitialized object" do
|
|
227
248
|
expect { described_class.allocate.match("foo") }.to raise_error(TypeError, /uninitialized RE2::Set/)
|
|
228
249
|
end
|
|
250
|
+
|
|
251
|
+
it "can be run concurrently" do
|
|
252
|
+
set = RE2::Set.new
|
|
253
|
+
set.add("abc")
|
|
254
|
+
set.add("def")
|
|
255
|
+
set.add("ghi")
|
|
256
|
+
set.compile
|
|
257
|
+
|
|
258
|
+
threads = 10.times.map do
|
|
259
|
+
Thread.new { set.match("abcdefghi", exception: false) }
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
expect(threads.map(&:value)).to all(eq([0, 1, 2]))
|
|
263
|
+
end
|
|
229
264
|
end
|
|
230
265
|
|
|
231
266
|
describe "#size" do
|
data/spec/re2_spec.rb
CHANGED
|
@@ -93,6 +93,26 @@ RSpec.describe RE2 do
|
|
|
93
93
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
94
94
|
expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
|
|
95
95
|
end
|
|
96
|
+
|
|
97
|
+
it "can be run concurrently with the same RE2::Regexp pattern" do
|
|
98
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
99
|
+
|
|
100
|
+
threads = 10.times.map do
|
|
101
|
+
Thread.new { RE2.replace("one two", re, '\2 \1') }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
expect(threads.map(&:value)).to all(eq("two one"))
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it "can be run concurrently with the same string pattern" do
|
|
108
|
+
re = '(\w+)\s(\w+)'
|
|
109
|
+
|
|
110
|
+
threads = 10.times.map do
|
|
111
|
+
Thread.new { RE2.replace("one two", re, '\2 \1') }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
expect(threads.map(&:value)).to all(eq("two one"))
|
|
115
|
+
end
|
|
96
116
|
end
|
|
97
117
|
|
|
98
118
|
describe ".Replace" do
|
|
@@ -193,6 +213,26 @@ RSpec.describe RE2 do
|
|
|
193
213
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
194
214
|
expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
|
|
195
215
|
end
|
|
216
|
+
|
|
217
|
+
it "can be run concurrently with the same RE2::Regexp pattern" do
|
|
218
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
219
|
+
|
|
220
|
+
threads = 10.times.map do
|
|
221
|
+
Thread.new { RE2.global_replace("one two three four", re, '\2 \1') }
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
expect(threads.map(&:value)).to all(eq("two one four three"))
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
it "can be run concurrently with the same string pattern" do
|
|
228
|
+
re = '(\w+)\s(\w+)'
|
|
229
|
+
|
|
230
|
+
threads = 10.times.map do
|
|
231
|
+
Thread.new { RE2.global_replace("one two three four", re, '\2 \1') }
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
expect(threads.map(&:value)).to all(eq("two one four three"))
|
|
235
|
+
end
|
|
196
236
|
end
|
|
197
237
|
|
|
198
238
|
describe ".GlobalReplace" do
|
|
@@ -281,6 +321,26 @@ RSpec.describe RE2 do
|
|
|
281
321
|
it "raises a Type Error for a rewrite that can't be converted to String" do
|
|
282
322
|
expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
|
|
283
323
|
end
|
|
324
|
+
|
|
325
|
+
it "can be run concurrently with the same RE2::Regexp pattern" do
|
|
326
|
+
re = RE2::Regexp.new('(\w+)@(\w+)')
|
|
327
|
+
|
|
328
|
+
threads = 10.times.map do
|
|
329
|
+
Thread.new { RE2.extract("alice@example", re, '\2-\1') }
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
expect(threads.map(&:value)).to all(eq("example-alice"))
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
it "can be run concurrently with the same string pattern" do
|
|
336
|
+
re = '(\w+)@(\w+)'
|
|
337
|
+
|
|
338
|
+
threads = 10.times.map do
|
|
339
|
+
Thread.new { RE2.extract("alice@example", re, '\2-\1') }
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
expect(threads.map(&:value)).to all(eq("example-alice"))
|
|
343
|
+
end
|
|
284
344
|
end
|
|
285
345
|
|
|
286
346
|
describe "#escape" do
|