re2 2.26.1-arm-linux-gnu → 2.27.0-arm-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/re2/re2.cc +367 -116
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/4.0/re2.so +0 -0
- data/lib/re2/version.rb +1 -1
- data/spec/re2/regexp_spec.rb +73 -0
- data/spec/re2/scanner_spec.rb +46 -0
- data/spec/re2/set_spec.rb +39 -4
- data/spec/re2_spec.rb +72 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 645eab3b4e65e9324ce7c21fb194003d47e49218bc14ea9a33a971d128b8426d
|
|
4
|
+
data.tar.gz: f1459959f76efa60581508fd0b1f8360afb4ccfaf8a651d076a4b67cbd334659
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 55cc7f7cf8a3ac43c7dfc64040a1f9f137bf04d040b99bcbc4039be358c6b6d6176239164747d8b8ad7be1a1af2a38ba82870bf00965caf2a76af7609a78aefb
|
|
7
|
+
data.tar.gz: 753a84156ac0cea64cf30e579baf613d691dd04bfd1f092ecbc5e5ad5308ecba601c360c97c4f42b5ddf0f5983b9555d09393f46564dc2e1cb5a0b81f2d401fa
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Python".
|
|
|
6
6
|
|
|
7
7
|
[](https://github.com/mudge/re2/actions)
|
|
8
8
|
|
|
9
|
-
**Current version:** 2.
|
|
9
|
+
**Current version:** 2.27.0
|
|
10
10
|
**Bundled RE2 version:** libre2.11 (2025-11-05)
|
|
11
11
|
|
|
12
12
|
```ruby
|
data/ext/re2/re2.cc
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
#include <re2/set.h>
|
|
20
20
|
#include <ruby.h>
|
|
21
21
|
#include <ruby/encoding.h>
|
|
22
|
+
#include <ruby/thread.h>
|
|
22
23
|
|
|
23
24
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
|
24
25
|
|
|
@@ -43,6 +44,132 @@ typedef struct {
|
|
|
43
44
|
RE2::Set *set;
|
|
44
45
|
} re2_set;
|
|
45
46
|
|
|
47
|
+
struct nogvl_match_arg {
|
|
48
|
+
const RE2 *pattern;
|
|
49
|
+
re2::StringPiece text;
|
|
50
|
+
size_t startpos;
|
|
51
|
+
size_t endpos;
|
|
52
|
+
RE2::Anchor anchor;
|
|
53
|
+
re2::StringPiece *matches;
|
|
54
|
+
int n;
|
|
55
|
+
bool matched;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
static void *nogvl_match(void *ptr) {
|
|
59
|
+
auto *arg = static_cast<nogvl_match_arg *>(ptr);
|
|
60
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
61
|
+
arg->matched = arg->pattern->Match(
|
|
62
|
+
arg->text, arg->startpos, arg->endpos,
|
|
63
|
+
arg->anchor, arg->matches, arg->n);
|
|
64
|
+
#else
|
|
65
|
+
arg->matched = arg->pattern->Match(
|
|
66
|
+
arg->text, arg->startpos,
|
|
67
|
+
arg->anchor, arg->matches, arg->n);
|
|
68
|
+
#endif
|
|
69
|
+
return nullptr;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static bool re2_match_without_gvl(
|
|
73
|
+
const RE2 *pattern, VALUE text, size_t startpos, size_t endpos,
|
|
74
|
+
RE2::Anchor anchor, re2::StringPiece *matches, int n) {
|
|
75
|
+
nogvl_match_arg arg;
|
|
76
|
+
arg.pattern = pattern;
|
|
77
|
+
arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
|
|
78
|
+
arg.startpos = startpos;
|
|
79
|
+
arg.endpos = endpos;
|
|
80
|
+
arg.anchor = anchor;
|
|
81
|
+
arg.matches = matches;
|
|
82
|
+
arg.n = n;
|
|
83
|
+
arg.matched = false;
|
|
84
|
+
|
|
85
|
+
/* Abseil's synchronization primitives (SRWLOCK, SleepConditionVariableSRW)
|
|
86
|
+
* are incompatible with Ruby's Win32 Mutex-based GVL, causing
|
|
87
|
+
* WAIT_ABANDONED crashes when multiple threads match concurrently.
|
|
88
|
+
*/
|
|
89
|
+
#ifdef _WIN32
|
|
90
|
+
nogvl_match(&arg);
|
|
91
|
+
#else
|
|
92
|
+
/* No unblocking function is needed: RE2 matching is CPU-bound computation,
|
|
93
|
+
* not a blocking system call, so a signal cannot safely interrupt it.
|
|
94
|
+
*/
|
|
95
|
+
rb_thread_call_without_gvl(nogvl_match, &arg, NULL, NULL);
|
|
96
|
+
#endif
|
|
97
|
+
|
|
98
|
+
return arg.matched;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
struct nogvl_set_match_arg {
|
|
102
|
+
const RE2::Set *set;
|
|
103
|
+
re2::StringPiece text;
|
|
104
|
+
std::vector<int> *v;
|
|
105
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
106
|
+
RE2::Set::ErrorInfo *error_info;
|
|
107
|
+
#endif
|
|
108
|
+
bool matched;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
static void *nogvl_set_match(void *ptr) {
|
|
112
|
+
auto *arg = static_cast<nogvl_set_match_arg *>(ptr);
|
|
113
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
114
|
+
if (arg->error_info) {
|
|
115
|
+
arg->matched = arg->set->Match(arg->text, arg->v, arg->error_info);
|
|
116
|
+
} else {
|
|
117
|
+
arg->matched = arg->set->Match(arg->text, arg->v);
|
|
118
|
+
}
|
|
119
|
+
#else
|
|
120
|
+
arg->matched = arg->set->Match(arg->text, arg->v);
|
|
121
|
+
#endif
|
|
122
|
+
return nullptr;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
struct nogvl_replace_arg {
|
|
126
|
+
std::string *str;
|
|
127
|
+
const RE2 *pattern;
|
|
128
|
+
re2::StringPiece string_pattern;
|
|
129
|
+
re2::StringPiece rewrite;
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
static void *nogvl_replace(void *ptr) {
|
|
133
|
+
auto *arg = static_cast<nogvl_replace_arg *>(ptr);
|
|
134
|
+
if (arg->pattern) {
|
|
135
|
+
RE2::Replace(arg->str, *arg->pattern, arg->rewrite);
|
|
136
|
+
} else {
|
|
137
|
+
RE2::Replace(arg->str, arg->string_pattern, arg->rewrite);
|
|
138
|
+
}
|
|
139
|
+
return nullptr;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static void *nogvl_global_replace(void *ptr) {
|
|
143
|
+
auto *arg = static_cast<nogvl_replace_arg *>(ptr);
|
|
144
|
+
if (arg->pattern) {
|
|
145
|
+
RE2::GlobalReplace(arg->str, *arg->pattern, arg->rewrite);
|
|
146
|
+
} else {
|
|
147
|
+
RE2::GlobalReplace(arg->str, arg->string_pattern, arg->rewrite);
|
|
148
|
+
}
|
|
149
|
+
return nullptr;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
struct nogvl_extract_arg {
|
|
153
|
+
re2::StringPiece text;
|
|
154
|
+
const RE2 *pattern;
|
|
155
|
+
re2::StringPiece string_pattern;
|
|
156
|
+
re2::StringPiece rewrite;
|
|
157
|
+
std::string *out;
|
|
158
|
+
bool extracted;
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
static void *nogvl_extract(void *ptr) {
|
|
162
|
+
auto *arg = static_cast<nogvl_extract_arg *>(ptr);
|
|
163
|
+
if (arg->pattern) {
|
|
164
|
+
arg->extracted = RE2::Extract(arg->text, *arg->pattern,
|
|
165
|
+
arg->rewrite, arg->out);
|
|
166
|
+
} else {
|
|
167
|
+
arg->extracted = RE2::Extract(arg->text, RE2(arg->string_pattern),
|
|
168
|
+
arg->rewrite, arg->out);
|
|
169
|
+
}
|
|
170
|
+
return nullptr;
|
|
171
|
+
}
|
|
172
|
+
|
|
46
173
|
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
|
47
174
|
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
|
48
175
|
|
|
@@ -126,7 +253,7 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
|
126
253
|
}
|
|
127
254
|
|
|
128
255
|
static void re2_matchdata_mark(void *ptr) {
|
|
129
|
-
re2_matchdata *m =
|
|
256
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
130
257
|
rb_gc_mark_movable(m->regexp);
|
|
131
258
|
|
|
132
259
|
/* Text must not be movable because StringPiece matches hold pointers into
|
|
@@ -136,12 +263,12 @@ static void re2_matchdata_mark(void *ptr) {
|
|
|
136
263
|
}
|
|
137
264
|
|
|
138
265
|
static void re2_matchdata_compact(void *ptr) {
|
|
139
|
-
re2_matchdata *m =
|
|
266
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
140
267
|
m->regexp = rb_gc_location(m->regexp);
|
|
141
268
|
}
|
|
142
269
|
|
|
143
270
|
static void re2_matchdata_free(void *ptr) {
|
|
144
|
-
re2_matchdata *m =
|
|
271
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
145
272
|
if (m->matches) {
|
|
146
273
|
delete[] m->matches;
|
|
147
274
|
}
|
|
@@ -149,7 +276,7 @@ static void re2_matchdata_free(void *ptr) {
|
|
|
149
276
|
}
|
|
150
277
|
|
|
151
278
|
static size_t re2_matchdata_memsize(const void *ptr) {
|
|
152
|
-
const re2_matchdata *m =
|
|
279
|
+
const re2_matchdata *m = static_cast<const re2_matchdata *>(ptr);
|
|
153
280
|
size_t size = sizeof(*m);
|
|
154
281
|
if (m->matches) {
|
|
155
282
|
size += sizeof(*m->matches) * m->number_of_matches;
|
|
@@ -174,7 +301,7 @@ static const rb_data_type_t re2_matchdata_data_type = {
|
|
|
174
301
|
};
|
|
175
302
|
|
|
176
303
|
static void re2_scanner_mark(void *ptr) {
|
|
177
|
-
re2_scanner *s =
|
|
304
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
178
305
|
rb_gc_mark_movable(s->regexp);
|
|
179
306
|
|
|
180
307
|
/* Text must not be movable because the StringPiece input holds a pointer
|
|
@@ -184,12 +311,12 @@ static void re2_scanner_mark(void *ptr) {
|
|
|
184
311
|
}
|
|
185
312
|
|
|
186
313
|
static void re2_scanner_compact(void *ptr) {
|
|
187
|
-
re2_scanner *s =
|
|
314
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
188
315
|
s->regexp = rb_gc_location(s->regexp);
|
|
189
316
|
}
|
|
190
317
|
|
|
191
318
|
static void re2_scanner_free(void *ptr) {
|
|
192
|
-
re2_scanner *s =
|
|
319
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
193
320
|
if (s->input) {
|
|
194
321
|
delete s->input;
|
|
195
322
|
}
|
|
@@ -197,7 +324,7 @@ static void re2_scanner_free(void *ptr) {
|
|
|
197
324
|
}
|
|
198
325
|
|
|
199
326
|
static size_t re2_scanner_memsize(const void *ptr) {
|
|
200
|
-
const re2_scanner *s =
|
|
327
|
+
const re2_scanner *s = static_cast<const re2_scanner *>(ptr);
|
|
201
328
|
size_t size = sizeof(*s);
|
|
202
329
|
if (s->input) {
|
|
203
330
|
size += sizeof(*s->input);
|
|
@@ -222,7 +349,7 @@ static const rb_data_type_t re2_scanner_data_type = {
|
|
|
222
349
|
};
|
|
223
350
|
|
|
224
351
|
static void re2_regexp_free(void *ptr) {
|
|
225
|
-
re2_pattern *p =
|
|
352
|
+
re2_pattern *p = static_cast<re2_pattern *>(ptr);
|
|
226
353
|
if (p->pattern) {
|
|
227
354
|
delete p->pattern;
|
|
228
355
|
}
|
|
@@ -230,7 +357,7 @@ static void re2_regexp_free(void *ptr) {
|
|
|
230
357
|
}
|
|
231
358
|
|
|
232
359
|
static size_t re2_regexp_memsize(const void *ptr) {
|
|
233
|
-
const re2_pattern *p =
|
|
360
|
+
const re2_pattern *p = static_cast<const re2_pattern *>(ptr);
|
|
234
361
|
size_t size = sizeof(*p);
|
|
235
362
|
if (p->pattern) {
|
|
236
363
|
size += sizeof(*p->pattern);
|
|
@@ -250,7 +377,7 @@ static const rb_data_type_t re2_regexp_data_type = {
|
|
|
250
377
|
0,
|
|
251
378
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
|
252
379
|
// macro to update VALUE references, as to trigger write barriers.
|
|
253
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
380
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
|
|
254
381
|
};
|
|
255
382
|
|
|
256
383
|
static re2_pattern *unwrap_re2_regexp(VALUE self) {
|
|
@@ -403,6 +530,7 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
|
|
|
403
530
|
|
|
404
531
|
if (self_c->input) {
|
|
405
532
|
delete self_c->input;
|
|
533
|
+
self_c->input = nullptr;
|
|
406
534
|
}
|
|
407
535
|
|
|
408
536
|
RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
|
|
@@ -481,9 +609,27 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
481
609
|
/* Check whether we've exhausted the input yet. */
|
|
482
610
|
c->eof = new_input_size == 0;
|
|
483
611
|
|
|
484
|
-
/* If the match didn't advance the input, we need to do this ourselves
|
|
612
|
+
/* If the match didn't advance the input, we need to do this ourselves,
|
|
613
|
+
* advancing by a whole character to avoid splitting multi-byte characters.
|
|
614
|
+
*
|
|
615
|
+
* The lookup table approach is taken from RE2's own Python extension: the
|
|
616
|
+
* high 4 bits of a UTF-8 lead byte determine the character's byte length.
|
|
617
|
+
*
|
|
618
|
+
* See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
|
|
619
|
+
*/
|
|
485
620
|
if (!input_advanced && new_input_size > 0) {
|
|
486
|
-
|
|
621
|
+
size_t char_size = 1;
|
|
622
|
+
|
|
623
|
+
if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
|
|
624
|
+
char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
|
|
625
|
+
[((*c->input)[0] & 0xFF) >> 4];
|
|
626
|
+
|
|
627
|
+
if (char_size > new_input_size) {
|
|
628
|
+
char_size = new_input_size;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
c->input->remove_prefix(char_size);
|
|
487
633
|
}
|
|
488
634
|
|
|
489
635
|
return result;
|
|
@@ -1152,6 +1298,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
|
1152
1298
|
|
|
1153
1299
|
if (self_m->matches) {
|
|
1154
1300
|
delete[] self_m->matches;
|
|
1301
|
+
self_m->matches = nullptr;
|
|
1155
1302
|
}
|
|
1156
1303
|
|
|
1157
1304
|
self_m->number_of_matches = other_m->number_of_matches;
|
|
@@ -1229,8 +1376,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1229
1376
|
|
|
1230
1377
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1231
1378
|
|
|
1379
|
+
rb_check_frozen(self);
|
|
1380
|
+
|
|
1232
1381
|
if (p->pattern) {
|
|
1233
1382
|
delete p->pattern;
|
|
1383
|
+
p->pattern = nullptr;
|
|
1234
1384
|
}
|
|
1235
1385
|
|
|
1236
1386
|
if (RTEST(options)) {
|
|
@@ -1248,6 +1398,8 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1248
1398
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1249
1399
|
}
|
|
1250
1400
|
|
|
1401
|
+
rb_obj_freeze(self);
|
|
1402
|
+
|
|
1251
1403
|
return self;
|
|
1252
1404
|
}
|
|
1253
1405
|
|
|
@@ -1257,8 +1409,11 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
|
1257
1409
|
|
|
1258
1410
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
|
|
1259
1411
|
|
|
1412
|
+
rb_check_frozen(self);
|
|
1413
|
+
|
|
1260
1414
|
if (self_p->pattern) {
|
|
1261
1415
|
delete self_p->pattern;
|
|
1416
|
+
self_p->pattern = nullptr;
|
|
1262
1417
|
}
|
|
1263
1418
|
|
|
1264
1419
|
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
|
|
@@ -1267,6 +1422,8 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
|
1267
1422
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1268
1423
|
}
|
|
1269
1424
|
|
|
1425
|
+
rb_obj_freeze(self);
|
|
1426
|
+
|
|
1270
1427
|
return self;
|
|
1271
1428
|
}
|
|
1272
1429
|
|
|
@@ -1731,8 +1888,9 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1731
1888
|
|
|
1732
1889
|
rb_scan_args(argc, argv, "11", &text, &options);
|
|
1733
1890
|
|
|
1734
|
-
/*
|
|
1891
|
+
/* Coerce and freeze text to prevent mutation. */
|
|
1735
1892
|
StringValue(text);
|
|
1893
|
+
text = rb_str_new_frozen(text);
|
|
1736
1894
|
|
|
1737
1895
|
p = unwrap_re2_regexp(self);
|
|
1738
1896
|
|
|
@@ -1822,16 +1980,18 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1822
1980
|
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
|
1823
1981
|
}
|
|
1824
1982
|
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
#else
|
|
1831
|
-
bool matched = p->pattern->Match(
|
|
1832
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1833
|
-
startpos, anchor, 0, 0);
|
|
1983
|
+
#ifndef HAVE_ENDPOS_ARGUMENT
|
|
1984
|
+
/* Old RE2's Match() takes int startpos. Reject values that would overflow. */
|
|
1985
|
+
if (startpos > INT_MAX) {
|
|
1986
|
+
rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
|
|
1987
|
+
}
|
|
1834
1988
|
#endif
|
|
1989
|
+
|
|
1990
|
+
if (n == 0) {
|
|
1991
|
+
bool matched = re2_match_without_gvl(
|
|
1992
|
+
p->pattern, text, startpos, endpos, anchor, 0, 0);
|
|
1993
|
+
RB_GC_GUARD(text);
|
|
1994
|
+
|
|
1835
1995
|
return BOOL2RUBY(matched);
|
|
1836
1996
|
} else {
|
|
1837
1997
|
if (n == INT_MAX) {
|
|
@@ -1847,17 +2007,10 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1847
2007
|
"not enough memory to allocate StringPieces for matches");
|
|
1848
2008
|
}
|
|
1849
2009
|
|
|
1850
|
-
|
|
2010
|
+
bool matched = re2_match_without_gvl(
|
|
2011
|
+
p->pattern, text, startpos, endpos, anchor, matches, n);
|
|
2012
|
+
RB_GC_GUARD(text);
|
|
1851
2013
|
|
|
1852
|
-
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1853
|
-
bool matched = p->pattern->Match(
|
|
1854
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1855
|
-
startpos, endpos, anchor, matches, n);
|
|
1856
|
-
#else
|
|
1857
|
-
bool matched = p->pattern->Match(
|
|
1858
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1859
|
-
startpos, anchor, matches, n);
|
|
1860
|
-
#endif
|
|
1861
2014
|
if (matched) {
|
|
1862
2015
|
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
|
1863
2016
|
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
|
|
@@ -1886,13 +2039,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1886
2039
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1887
2040
|
*/
|
|
1888
2041
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
1889
|
-
/* Ensure text is a string. */
|
|
1890
2042
|
StringValue(text);
|
|
2043
|
+
text = rb_str_new_frozen(text);
|
|
1891
2044
|
|
|
1892
2045
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
2046
|
+
bool matched = re2_match_without_gvl(
|
|
2047
|
+
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
|
2048
|
+
RB_GC_GUARD(text);
|
|
1893
2049
|
|
|
1894
|
-
return BOOL2RUBY(
|
|
1895
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
2050
|
+
return BOOL2RUBY(matched);
|
|
1896
2051
|
}
|
|
1897
2052
|
|
|
1898
2053
|
/*
|
|
@@ -1905,13 +2060,15 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
|
1905
2060
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1906
2061
|
*/
|
|
1907
2062
|
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
1908
|
-
/* Ensure text is a string. */
|
|
1909
2063
|
StringValue(text);
|
|
2064
|
+
text = rb_str_new_frozen(text);
|
|
1910
2065
|
|
|
1911
2066
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
2067
|
+
bool matched = re2_match_without_gvl(
|
|
2068
|
+
p->pattern, text, 0, RSTRING_LEN(text), RE2::ANCHOR_BOTH, 0, 0);
|
|
2069
|
+
RB_GC_GUARD(text);
|
|
1912
2070
|
|
|
1913
|
-
return BOOL2RUBY(
|
|
1914
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
2071
|
+
return BOOL2RUBY(matched);
|
|
1915
2072
|
}
|
|
1916
2073
|
|
|
1917
2074
|
/*
|
|
@@ -1927,8 +2084,8 @@ static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
|
1927
2084
|
* #=> #<RE2::Scanner:0x0000000000000001>
|
|
1928
2085
|
*/
|
|
1929
2086
|
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1930
|
-
/* Ensure text is a string. */
|
|
1931
2087
|
StringValue(text);
|
|
2088
|
+
text = rb_str_new_frozen(text);
|
|
1932
2089
|
|
|
1933
2090
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1934
2091
|
re2_scanner *c;
|
|
@@ -1936,7 +2093,7 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
|
1936
2093
|
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
|
1937
2094
|
|
|
1938
2095
|
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
|
1939
|
-
RB_OBJ_WRITE(scanner, &c->text,
|
|
2096
|
+
RB_OBJ_WRITE(scanner, &c->text, text);
|
|
1940
2097
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
1941
2098
|
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
|
1942
2099
|
if (c->input == nullptr) {
|
|
@@ -1997,34 +2154,53 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
|
|
1997
2154
|
*/
|
|
1998
2155
|
static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
1999
2156
|
VALUE rewrite) {
|
|
2000
|
-
|
|
2001
|
-
StringValue(rewrite);
|
|
2157
|
+
re2_pattern *p = nullptr;
|
|
2002
2158
|
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
* RE2::Replace.
|
|
2159
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2160
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2161
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2007
2162
|
*/
|
|
2008
2163
|
StringValue(str);
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
/* Do the replacement. */
|
|
2164
|
+
str = rb_str_new_frozen(str);
|
|
2012
2165
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2013
2166
|
p = unwrap_re2_regexp(pattern);
|
|
2014
|
-
RE2::Replace(&str_as_string, *p->pattern,
|
|
2015
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2016
|
-
|
|
2017
|
-
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2018
|
-
p->pattern->options().encoding());
|
|
2019
2167
|
} else {
|
|
2020
|
-
/* Ensure pattern is a string. */
|
|
2021
2168
|
StringValue(pattern);
|
|
2169
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2170
|
+
}
|
|
2171
|
+
StringValue(rewrite);
|
|
2172
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2173
|
+
|
|
2174
|
+
/* Take a copy of str so it can be modified in-place by RE2::Replace. */
|
|
2175
|
+
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2176
|
+
|
|
2177
|
+
nogvl_replace_arg arg;
|
|
2178
|
+
arg.str = &str_as_string;
|
|
2179
|
+
if (p) {
|
|
2180
|
+
arg.pattern = p->pattern;
|
|
2181
|
+
} else {
|
|
2182
|
+
arg.pattern = nullptr;
|
|
2183
|
+
arg.string_pattern = re2::StringPiece(
|
|
2184
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2185
|
+
}
|
|
2186
|
+
arg.rewrite = re2::StringPiece(
|
|
2187
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2188
|
+
|
|
2189
|
+
#ifdef _WIN32
|
|
2190
|
+
nogvl_replace(&arg);
|
|
2191
|
+
#else
|
|
2192
|
+
rb_thread_call_without_gvl(nogvl_replace, &arg, NULL, NULL);
|
|
2193
|
+
#endif
|
|
2022
2194
|
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2195
|
+
RB_GC_GUARD(rewrite);
|
|
2196
|
+
RB_GC_GUARD(pattern);
|
|
2026
2197
|
|
|
2027
|
-
|
|
2198
|
+
if (p) {
|
|
2199
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2200
|
+
p->pattern->options().encoding());
|
|
2201
|
+
} else {
|
|
2202
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2203
|
+
RE2::Options::EncodingUTF8);
|
|
2028
2204
|
}
|
|
2029
2205
|
}
|
|
2030
2206
|
|
|
@@ -2050,33 +2226,55 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2050
2226
|
*/
|
|
2051
2227
|
static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
2052
2228
|
VALUE rewrite) {
|
|
2053
|
-
|
|
2229
|
+
re2_pattern *p = nullptr;
|
|
2230
|
+
|
|
2231
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2232
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2233
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2234
|
+
*/
|
|
2235
|
+
StringValue(str);
|
|
2236
|
+
str = rb_str_new_frozen(str);
|
|
2237
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2238
|
+
p = unwrap_re2_regexp(pattern);
|
|
2239
|
+
} else {
|
|
2240
|
+
StringValue(pattern);
|
|
2241
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2242
|
+
}
|
|
2054
2243
|
StringValue(rewrite);
|
|
2244
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2055
2245
|
|
|
2056
2246
|
/* Take a copy of str so it can be modified in-place by
|
|
2057
2247
|
* RE2::GlobalReplace.
|
|
2058
2248
|
*/
|
|
2059
|
-
re2_pattern *p;
|
|
2060
|
-
StringValue(str);
|
|
2061
2249
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2062
2250
|
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2251
|
+
nogvl_replace_arg arg;
|
|
2252
|
+
arg.str = &str_as_string;
|
|
2253
|
+
if (p) {
|
|
2254
|
+
arg.pattern = p->pattern;
|
|
2255
|
+
} else {
|
|
2256
|
+
arg.pattern = nullptr;
|
|
2257
|
+
arg.string_pattern = re2::StringPiece(
|
|
2258
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2259
|
+
}
|
|
2260
|
+
arg.rewrite = re2::StringPiece(
|
|
2261
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2262
|
+
|
|
2263
|
+
#ifdef _WIN32
|
|
2264
|
+
nogvl_global_replace(&arg);
|
|
2265
|
+
#else
|
|
2266
|
+
rb_thread_call_without_gvl(nogvl_global_replace, &arg, NULL, NULL);
|
|
2267
|
+
#endif
|
|
2268
|
+
|
|
2269
|
+
RB_GC_GUARD(rewrite);
|
|
2270
|
+
RB_GC_GUARD(pattern);
|
|
2068
2271
|
|
|
2272
|
+
if (p) {
|
|
2069
2273
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2070
2274
|
p->pattern->options().encoding());
|
|
2071
2275
|
} else {
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
RE2::GlobalReplace(&str_as_string,
|
|
2076
|
-
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
2077
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2078
|
-
|
|
2079
|
-
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
|
2276
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2277
|
+
RE2::Options::EncodingUTF8);
|
|
2080
2278
|
}
|
|
2081
2279
|
}
|
|
2082
2280
|
|
|
@@ -2104,44 +2302,55 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2104
2302
|
*/
|
|
2105
2303
|
static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
2106
2304
|
VALUE rewrite) {
|
|
2107
|
-
|
|
2108
|
-
StringValue(rewrite);
|
|
2109
|
-
StringValue(text);
|
|
2110
|
-
|
|
2111
|
-
re2_pattern *p;
|
|
2112
|
-
std::string out;
|
|
2113
|
-
bool extracted;
|
|
2305
|
+
re2_pattern *p = nullptr;
|
|
2114
2306
|
|
|
2307
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2308
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2309
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2310
|
+
*/
|
|
2311
|
+
StringValue(text);
|
|
2312
|
+
text = rb_str_new_frozen(text);
|
|
2115
2313
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2116
2314
|
p = unwrap_re2_regexp(pattern);
|
|
2117
|
-
extracted = RE2::Extract(
|
|
2118
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2119
|
-
*p->pattern,
|
|
2120
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2121
|
-
&out);
|
|
2122
|
-
|
|
2123
|
-
if (extracted) {
|
|
2124
|
-
return encoded_str_new(out.data(), out.size(),
|
|
2125
|
-
p->pattern->options().encoding());
|
|
2126
|
-
} else {
|
|
2127
|
-
return Qnil;
|
|
2128
|
-
}
|
|
2129
2315
|
} else {
|
|
2130
|
-
/* Ensure pattern is a string. */
|
|
2131
2316
|
StringValue(pattern);
|
|
2317
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2318
|
+
}
|
|
2319
|
+
StringValue(rewrite);
|
|
2320
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2132
2321
|
|
|
2133
|
-
|
|
2134
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2135
|
-
RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
|
|
2136
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2137
|
-
&out);
|
|
2322
|
+
std::string out;
|
|
2138
2323
|
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2324
|
+
nogvl_extract_arg arg;
|
|
2325
|
+
arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
|
|
2326
|
+
if (p) {
|
|
2327
|
+
arg.pattern = p->pattern;
|
|
2328
|
+
} else {
|
|
2329
|
+
arg.pattern = nullptr;
|
|
2330
|
+
arg.string_pattern = re2::StringPiece(
|
|
2331
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2332
|
+
}
|
|
2333
|
+
arg.rewrite = re2::StringPiece(
|
|
2334
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2335
|
+
arg.out = &out;
|
|
2336
|
+
arg.extracted = false;
|
|
2337
|
+
|
|
2338
|
+
#ifdef _WIN32
|
|
2339
|
+
nogvl_extract(&arg);
|
|
2340
|
+
#else
|
|
2341
|
+
rb_thread_call_without_gvl(nogvl_extract, &arg, NULL, NULL);
|
|
2342
|
+
#endif
|
|
2343
|
+
|
|
2344
|
+
RB_GC_GUARD(text);
|
|
2345
|
+
RB_GC_GUARD(rewrite);
|
|
2346
|
+
RB_GC_GUARD(pattern);
|
|
2347
|
+
|
|
2348
|
+
if (arg.extracted) {
|
|
2349
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2350
|
+
p ? p->pattern->options().encoding()
|
|
2351
|
+
: RE2::Options::EncodingUTF8);
|
|
2352
|
+
} else {
|
|
2353
|
+
return Qnil;
|
|
2145
2354
|
}
|
|
2146
2355
|
}
|
|
2147
2356
|
|
|
@@ -2171,7 +2380,7 @@ static VALUE re2_escape(VALUE, VALUE unquoted) {
|
|
|
2171
2380
|
}
|
|
2172
2381
|
|
|
2173
2382
|
static void re2_set_free(void *ptr) {
|
|
2174
|
-
re2_set *s =
|
|
2383
|
+
re2_set *s = static_cast<re2_set *>(ptr);
|
|
2175
2384
|
if (s->set) {
|
|
2176
2385
|
delete s->set;
|
|
2177
2386
|
}
|
|
@@ -2179,7 +2388,7 @@ static void re2_set_free(void *ptr) {
|
|
|
2179
2388
|
}
|
|
2180
2389
|
|
|
2181
2390
|
static size_t re2_set_memsize(const void *ptr) {
|
|
2182
|
-
const re2_set *s =
|
|
2391
|
+
const re2_set *s = static_cast<const re2_set *>(ptr);
|
|
2183
2392
|
size_t size = sizeof(*s);
|
|
2184
2393
|
if (s->set) {
|
|
2185
2394
|
size += sizeof(*s->set);
|
|
@@ -2199,7 +2408,7 @@ static const rb_data_type_t re2_set_data_type = {
|
|
|
2199
2408
|
0,
|
|
2200
2409
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
|
2201
2410
|
// macro to update VALUE references, as to trigger write barriers.
|
|
2202
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
2411
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
|
|
2203
2412
|
};
|
|
2204
2413
|
|
|
2205
2414
|
static re2_set *unwrap_re2_set(VALUE self) {
|
|
@@ -2292,8 +2501,11 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
2292
2501
|
parse_re2_options(&re2_options, options);
|
|
2293
2502
|
}
|
|
2294
2503
|
|
|
2504
|
+
rb_check_frozen(self);
|
|
2505
|
+
|
|
2295
2506
|
if (s->set) {
|
|
2296
2507
|
delete s->set;
|
|
2508
|
+
s->set = nullptr;
|
|
2297
2509
|
}
|
|
2298
2510
|
|
|
2299
2511
|
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
|
@@ -2321,6 +2533,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
|
2321
2533
|
StringValue(pattern);
|
|
2322
2534
|
|
|
2323
2535
|
re2_set *s = unwrap_re2_set(self);
|
|
2536
|
+
rb_check_frozen(self);
|
|
2324
2537
|
|
|
2325
2538
|
int index;
|
|
2326
2539
|
VALUE msg;
|
|
@@ -2352,8 +2565,15 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
|
2352
2565
|
*/
|
|
2353
2566
|
static VALUE re2_set_compile(VALUE self) {
|
|
2354
2567
|
re2_set *s = unwrap_re2_set(self);
|
|
2568
|
+
rb_check_frozen(self);
|
|
2355
2569
|
|
|
2356
|
-
|
|
2570
|
+
bool compiled = s->set->Compile();
|
|
2571
|
+
|
|
2572
|
+
if (compiled) {
|
|
2573
|
+
rb_obj_freeze(self);
|
|
2574
|
+
}
|
|
2575
|
+
|
|
2576
|
+
return BOOL2RUBY(compiled);
|
|
2357
2577
|
}
|
|
2358
2578
|
|
|
2359
2579
|
/*
|
|
@@ -2450,6 +2670,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2450
2670
|
rb_scan_args(argc, argv, "11", &str, &options);
|
|
2451
2671
|
|
|
2452
2672
|
StringValue(str);
|
|
2673
|
+
str = rb_str_new_frozen(str);
|
|
2674
|
+
|
|
2453
2675
|
re2_set *s = unwrap_re2_set(self);
|
|
2454
2676
|
|
|
2455
2677
|
if (RTEST(options)) {
|
|
@@ -2466,8 +2688,21 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2466
2688
|
if (raise_exception) {
|
|
2467
2689
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
2468
2690
|
RE2::Set::ErrorInfo e;
|
|
2469
|
-
|
|
2470
|
-
|
|
2691
|
+
nogvl_set_match_arg arg;
|
|
2692
|
+
arg.set = s->set;
|
|
2693
|
+
arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2694
|
+
arg.v = &v;
|
|
2695
|
+
arg.error_info = &e;
|
|
2696
|
+
arg.matched = false;
|
|
2697
|
+
|
|
2698
|
+
#ifdef _WIN32
|
|
2699
|
+
nogvl_set_match(&arg);
|
|
2700
|
+
#else
|
|
2701
|
+
rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
|
|
2702
|
+
#endif
|
|
2703
|
+
RB_GC_GUARD(str);
|
|
2704
|
+
|
|
2705
|
+
bool match_failed = !arg.matched;
|
|
2471
2706
|
VALUE result = rb_ary_new2(v.size());
|
|
2472
2707
|
|
|
2473
2708
|
if (match_failed) {
|
|
@@ -2494,11 +2729,25 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2494
2729
|
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
|
2495
2730
|
#endif
|
|
2496
2731
|
} else {
|
|
2497
|
-
|
|
2498
|
-
|
|
2732
|
+
nogvl_set_match_arg arg;
|
|
2733
|
+
arg.set = s->set;
|
|
2734
|
+
arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2735
|
+
arg.v = &v;
|
|
2736
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
2737
|
+
arg.error_info = nullptr;
|
|
2738
|
+
#endif
|
|
2739
|
+
arg.matched = false;
|
|
2740
|
+
|
|
2741
|
+
#ifdef _WIN32
|
|
2742
|
+
nogvl_set_match(&arg);
|
|
2743
|
+
#else
|
|
2744
|
+
rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
|
|
2745
|
+
#endif
|
|
2746
|
+
RB_GC_GUARD(str);
|
|
2747
|
+
|
|
2499
2748
|
VALUE result = rb_ary_new2(v.size());
|
|
2500
2749
|
|
|
2501
|
-
if (matched) {
|
|
2750
|
+
if (arg.matched) {
|
|
2502
2751
|
for (int index : v) {
|
|
2503
2752
|
rb_ary_push(result, INT2FIX(index));
|
|
2504
2753
|
}
|
|
@@ -2509,6 +2758,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2509
2758
|
}
|
|
2510
2759
|
|
|
2511
2760
|
extern "C" void Init_re2(void) {
|
|
2761
|
+
rb_ext_ractor_safe(true);
|
|
2762
|
+
|
|
2512
2763
|
re2_mRE2 = rb_define_module("RE2");
|
|
2513
2764
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
|
2514
2765
|
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
data/lib/3.1/re2.so
CHANGED
|
Binary file
|
data/lib/3.2/re2.so
CHANGED
|
Binary file
|
data/lib/3.3/re2.so
CHANGED
|
Binary file
|
data/lib/3.4/re2.so
CHANGED
|
Binary file
|
data/lib/4.0/re2.so
CHANGED
|
Binary file
|
data/lib/re2/version.rb
CHANGED
data/spec/re2/regexp_spec.rb
CHANGED
|
@@ -39,6 +39,16 @@ RSpec.describe RE2::Regexp do
|
|
|
39
39
|
|
|
40
40
|
expect(re).to be_a(RE2::Regexp)
|
|
41
41
|
end
|
|
42
|
+
|
|
43
|
+
it "returns a frozen object" do
|
|
44
|
+
expect(RE2::Regexp.new('woo')).to be_frozen
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "cannot be re-initialized" do
|
|
48
|
+
re = RE2::Regexp.new('woo')
|
|
49
|
+
|
|
50
|
+
expect { re.send(:initialize, 'bar') }.to raise_error(FrozenError)
|
|
51
|
+
end
|
|
42
52
|
end
|
|
43
53
|
|
|
44
54
|
describe "#dup" do
|
|
@@ -70,6 +80,13 @@ RSpec.describe RE2::Regexp do
|
|
|
70
80
|
expect(copy).to_not be_case_sensitive
|
|
71
81
|
end
|
|
72
82
|
|
|
83
|
+
it "returns a frozen copy" do
|
|
84
|
+
re = described_class.new('(\d+)')
|
|
85
|
+
copy = re.dup
|
|
86
|
+
|
|
87
|
+
expect(copy).to be_frozen
|
|
88
|
+
end
|
|
89
|
+
|
|
73
90
|
it "raises an error when called on an uninitialized object" do
|
|
74
91
|
expect { described_class.allocate.dup }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
75
92
|
end
|
|
@@ -83,6 +100,13 @@ RSpec.describe RE2::Regexp do
|
|
|
83
100
|
expect(copy.to_s).to eq('woo')
|
|
84
101
|
end
|
|
85
102
|
|
|
103
|
+
it "returns a frozen copy" do
|
|
104
|
+
re = described_class.new('woo')
|
|
105
|
+
copy = re.clone
|
|
106
|
+
|
|
107
|
+
expect(copy).to be_frozen
|
|
108
|
+
end
|
|
109
|
+
|
|
86
110
|
it "raises an error when called on an uninitialized object" do
|
|
87
111
|
expect { described_class.allocate.clone }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
88
112
|
end
|
|
@@ -757,9 +781,28 @@ RSpec.describe RE2::Regexp do
|
|
|
757
781
|
expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
|
|
758
782
|
end
|
|
759
783
|
|
|
784
|
+
it "raises an error when startpos exceeds INT_MAX on old RE2 ABI" do
|
|
785
|
+
skip "Underlying RE2::Match does not take int startpos" if RE2::Regexp.match_has_endpos_argument?
|
|
786
|
+
skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
|
|
787
|
+
|
|
788
|
+
re = RE2::Regexp.new('(\w+)', log_errors: false)
|
|
789
|
+
|
|
790
|
+
expect { re.match("test", start_pos: 2_147_483_648) }.to raise_error(RangeError, /startpos should be <=/)
|
|
791
|
+
end
|
|
792
|
+
|
|
760
793
|
it "raises an error when called on an uninitialized object" do
|
|
761
794
|
expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
762
795
|
end
|
|
796
|
+
|
|
797
|
+
it "can be run concurrently" do
|
|
798
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
799
|
+
|
|
800
|
+
threads = 10.times.map do
|
|
801
|
+
Thread.new { re.match("one two").values_at(1, 2) }
|
|
802
|
+
end
|
|
803
|
+
|
|
804
|
+
expect(threads.map(&:value)).to all(eq(["one", "two"]))
|
|
805
|
+
end
|
|
763
806
|
end
|
|
764
807
|
|
|
765
808
|
describe "#match?" do
|
|
@@ -785,6 +828,16 @@ RSpec.describe RE2::Regexp do
|
|
|
785
828
|
it "raises an error when called on an uninitialized object" do
|
|
786
829
|
expect { described_class.allocate.match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
787
830
|
end
|
|
831
|
+
|
|
832
|
+
it "can be run concurrently" do
|
|
833
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
834
|
+
|
|
835
|
+
threads = 10.times.map do
|
|
836
|
+
Thread.new { re.match?("one two") }
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
expect(threads.map(&:value)).to all(eq(true))
|
|
840
|
+
end
|
|
788
841
|
end
|
|
789
842
|
|
|
790
843
|
describe "#partial_match?" do
|
|
@@ -817,6 +870,16 @@ RSpec.describe RE2::Regexp do
|
|
|
817
870
|
it "raises an error when called on an uninitialized object" do
|
|
818
871
|
expect { described_class.allocate.partial_match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
819
872
|
end
|
|
873
|
+
|
|
874
|
+
it "can be run concurrently" do
|
|
875
|
+
re = RE2::Regexp.new('(\d+)')
|
|
876
|
+
|
|
877
|
+
threads = 10.times.map do
|
|
878
|
+
Thread.new { re.partial_match?("alice 123") }
|
|
879
|
+
end
|
|
880
|
+
|
|
881
|
+
expect(threads.map(&:value)).to all(eq(true))
|
|
882
|
+
end
|
|
820
883
|
end
|
|
821
884
|
|
|
822
885
|
describe "#=~" do
|
|
@@ -906,6 +969,16 @@ RSpec.describe RE2::Regexp do
|
|
|
906
969
|
it "raises an error when called on an uninitialized object" do
|
|
907
970
|
expect { described_class.allocate.full_match?("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
908
971
|
end
|
|
972
|
+
|
|
973
|
+
it "can be run concurrently" do
|
|
974
|
+
re = RE2::Regexp.new('(\w+) (\d+)')
|
|
975
|
+
|
|
976
|
+
threads = 10.times.map do
|
|
977
|
+
Thread.new { re.full_match?("alice 123") }
|
|
978
|
+
end
|
|
979
|
+
|
|
980
|
+
expect(threads.map(&:value)).to all(eq(true))
|
|
981
|
+
end
|
|
909
982
|
end
|
|
910
983
|
|
|
911
984
|
describe "#ok?" do
|
data/spec/re2/scanner_spec.rb
CHANGED
|
@@ -243,6 +243,52 @@ RSpec.describe RE2::Scanner do
|
|
|
243
243
|
expect(scanner.scan).to be_nil
|
|
244
244
|
end
|
|
245
245
|
|
|
246
|
+
it "advances by whole characters with zero-width matches on 2-byte UTF-8 input", :aggregate_failures do
|
|
247
|
+
r = RE2::Regexp.new("")
|
|
248
|
+
scanner = r.scan("à")
|
|
249
|
+
|
|
250
|
+
expect(scanner.scan).to eq([])
|
|
251
|
+
expect(scanner.scan).to eq([])
|
|
252
|
+
expect(scanner.scan).to be_nil
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
it "advances by whole characters with zero-width matches on 3-byte UTF-8 input", :aggregate_failures do
|
|
256
|
+
r = RE2::Regexp.new("")
|
|
257
|
+
scanner = r.scan("\u20AC")
|
|
258
|
+
|
|
259
|
+
expect(scanner.scan).to eq([])
|
|
260
|
+
expect(scanner.scan).to eq([])
|
|
261
|
+
expect(scanner.scan).to be_nil
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
it "advances by whole characters with zero-width matches on 4-byte UTF-8 input", :aggregate_failures do
|
|
265
|
+
r = RE2::Regexp.new("")
|
|
266
|
+
scanner = r.scan("\u{1F600}")
|
|
267
|
+
|
|
268
|
+
expect(scanner.scan).to eq([])
|
|
269
|
+
expect(scanner.scan).to eq([])
|
|
270
|
+
expect(scanner.scan).to be_nil
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it "advances by single bytes with zero-width matches on Latin-1 input", :aggregate_failures do
|
|
274
|
+
r = RE2::Regexp.new("", utf8: false)
|
|
275
|
+
scanner = r.scan("\xC3\xA0")
|
|
276
|
+
|
|
277
|
+
expect(scanner.scan).to eq([])
|
|
278
|
+
expect(scanner.scan).to eq([])
|
|
279
|
+
expect(scanner.scan).to eq([])
|
|
280
|
+
expect(scanner.scan).to be_nil
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
it "handles truncated multi-byte sequences at the end of input", :aggregate_failures do
|
|
284
|
+
r = RE2::Regexp.new("")
|
|
285
|
+
scanner = r.scan("\xC3")
|
|
286
|
+
|
|
287
|
+
expect(scanner.scan).to eq([])
|
|
288
|
+
expect(scanner.scan).to eq([])
|
|
289
|
+
expect(scanner.scan).to be_nil
|
|
290
|
+
end
|
|
291
|
+
|
|
246
292
|
it "raises a Type Error if given input that can't be coerced to a String" do
|
|
247
293
|
r = RE2::Regexp.new('(\w+)')
|
|
248
294
|
|
data/spec/re2/set_spec.rb
CHANGED
|
@@ -84,14 +84,12 @@ RSpec.describe RE2::Set do
|
|
|
84
84
|
expect { set.add("(?P<#{'o' * 200}") }.to raise_error(ArgumentError, "str rejected by RE2::Set->Add(): invalid named capture group: (?P<oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo")
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
-
it "raises
|
|
87
|
+
it "raises a FrozenError if called after #compile" do
|
|
88
88
|
set = RE2::Set.new(:unanchored, log_errors: false)
|
|
89
89
|
set.add("abc")
|
|
90
90
|
set.compile
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
expect { set.add("def") }.to raise_error(ArgumentError)
|
|
94
|
-
end
|
|
92
|
+
expect { set.add("def") }.to raise_error(FrozenError)
|
|
95
93
|
end
|
|
96
94
|
|
|
97
95
|
it "raises an error if given a pattern that can't be coerced to a String" do
|
|
@@ -121,6 +119,29 @@ RSpec.describe RE2::Set do
|
|
|
121
119
|
expect(set.compile).to be_truthy
|
|
122
120
|
end
|
|
123
121
|
|
|
122
|
+
it "freezes the set on successful compilation" do
|
|
123
|
+
set = RE2::Set.new
|
|
124
|
+
set.add("abc")
|
|
125
|
+
set.compile
|
|
126
|
+
|
|
127
|
+
expect(set).to be_frozen
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "is not frozen before compilation" do
|
|
131
|
+
set = RE2::Set.new
|
|
132
|
+
set.add("abc")
|
|
133
|
+
|
|
134
|
+
expect(set).to_not be_frozen
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
it "cannot be re-initialized after compilation" do
|
|
138
|
+
set = RE2::Set.new
|
|
139
|
+
set.add("abc")
|
|
140
|
+
set.compile
|
|
141
|
+
|
|
142
|
+
expect { set.send(:initialize) }.to raise_error(FrozenError)
|
|
143
|
+
end
|
|
144
|
+
|
|
124
145
|
it "raises an error when called on an uninitialized object" do
|
|
125
146
|
expect { described_class.allocate.compile }.to raise_error(TypeError, /uninitialized RE2::Set/)
|
|
126
147
|
end
|
|
@@ -226,6 +247,20 @@ RSpec.describe RE2::Set do
|
|
|
226
247
|
it "raises an error when called on an uninitialized object" do
|
|
227
248
|
expect { described_class.allocate.match("foo") }.to raise_error(TypeError, /uninitialized RE2::Set/)
|
|
228
249
|
end
|
|
250
|
+
|
|
251
|
+
it "can be run concurrently" do
|
|
252
|
+
set = RE2::Set.new
|
|
253
|
+
set.add("abc")
|
|
254
|
+
set.add("def")
|
|
255
|
+
set.add("ghi")
|
|
256
|
+
set.compile
|
|
257
|
+
|
|
258
|
+
threads = 10.times.map do
|
|
259
|
+
Thread.new { set.match("abcdefghi", exception: false) }
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
expect(threads.map(&:value)).to all(eq([0, 1, 2]))
|
|
263
|
+
end
|
|
229
264
|
end
|
|
230
265
|
|
|
231
266
|
describe "#size" do
|
data/spec/re2_spec.rb
CHANGED
|
@@ -86,9 +86,33 @@ RSpec.describe RE2 do
|
|
|
86
86
|
expect { RE2.replace("woo", 0, "ah") }.to raise_error(TypeError)
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
90
|
+
expect { RE2.replace("a" * 128, 0, "ah") }.to raise_error(TypeError)
|
|
91
|
+
end
|
|
92
|
+
|
|
89
93
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
90
94
|
expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
|
|
91
95
|
end
|
|
96
|
+
|
|
97
|
+
it "can be run concurrently with the same RE2::Regexp pattern" do
|
|
98
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
99
|
+
|
|
100
|
+
threads = 10.times.map do
|
|
101
|
+
Thread.new { RE2.replace("one two", re, '\2 \1') }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
expect(threads.map(&:value)).to all(eq("two one"))
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it "can be run concurrently with the same string pattern" do
|
|
108
|
+
re = '(\w+)\s(\w+)'
|
|
109
|
+
|
|
110
|
+
threads = 10.times.map do
|
|
111
|
+
Thread.new { RE2.replace("one two", re, '\2 \1') }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
expect(threads.map(&:value)).to all(eq("two one"))
|
|
115
|
+
end
|
|
92
116
|
end
|
|
93
117
|
|
|
94
118
|
describe ".Replace" do
|
|
@@ -182,9 +206,33 @@ RSpec.describe RE2 do
|
|
|
182
206
|
expect { RE2.global_replace("woo", 0, "a") }.to raise_error(TypeError)
|
|
183
207
|
end
|
|
184
208
|
|
|
209
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
210
|
+
expect { RE2.global_replace("a" * 128, 0, "a") }.to raise_error(TypeError)
|
|
211
|
+
end
|
|
212
|
+
|
|
185
213
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
186
214
|
expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
|
|
187
215
|
end
|
|
216
|
+
|
|
217
|
+
it "can be run concurrently with the same RE2::Regexp pattern" do
|
|
218
|
+
re = RE2::Regexp.new('(\w+)\s(\w+)')
|
|
219
|
+
|
|
220
|
+
threads = 10.times.map do
|
|
221
|
+
Thread.new { RE2.global_replace("one two three four", re, '\2 \1') }
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
expect(threads.map(&:value)).to all(eq("two one four three"))
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
it "can be run concurrently with the same string pattern" do
|
|
228
|
+
re = '(\w+)\s(\w+)'
|
|
229
|
+
|
|
230
|
+
threads = 10.times.map do
|
|
231
|
+
Thread.new { RE2.global_replace("one two three four", re, '\2 \1') }
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
expect(threads.map(&:value)).to all(eq("two one four three"))
|
|
235
|
+
end
|
|
188
236
|
end
|
|
189
237
|
|
|
190
238
|
describe ".GlobalReplace" do
|
|
@@ -266,9 +314,33 @@ RSpec.describe RE2 do
|
|
|
266
314
|
expect { RE2.extract("woo", 0, '\1') }.to raise_error(TypeError)
|
|
267
315
|
end
|
|
268
316
|
|
|
317
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
318
|
+
expect { RE2.extract("a" * 128, 0, '\1') }.to raise_error(TypeError)
|
|
319
|
+
end
|
|
320
|
+
|
|
269
321
|
it "raises a Type Error for a rewrite that can't be converted to String" do
|
|
270
322
|
expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
|
|
271
323
|
end
|
|
324
|
+
|
|
325
|
+
it "can be run concurrently with the same RE2::Regexp pattern" do
|
|
326
|
+
re = RE2::Regexp.new('(\w+)@(\w+)')
|
|
327
|
+
|
|
328
|
+
threads = 10.times.map do
|
|
329
|
+
Thread.new { RE2.extract("alice@example", re, '\2-\1') }
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
expect(threads.map(&:value)).to all(eq("example-alice"))
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
it "can be run concurrently with the same string pattern" do
|
|
336
|
+
re = '(\w+)@(\w+)'
|
|
337
|
+
|
|
338
|
+
threads = 10.times.map do
|
|
339
|
+
Thread.new { RE2.extract("alice@example", re, '\2-\1') }
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
expect(threads.map(&:value)).to all(eq("example-alice"))
|
|
343
|
+
end
|
|
272
344
|
end
|
|
273
345
|
|
|
274
346
|
describe "#escape" do
|