re2 2.26.1-x86_64-linux-gnu → 2.26.2-x86_64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/re2/re2.cc +68 -27
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/4.0/re2.so +0 -0
- data/lib/re2/version.rb +1 -1
- data/spec/re2/regexp_spec.rb +9 -0
- data/spec/re2/scanner_spec.rb +46 -0
- data/spec/re2_spec.rb +12 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cebd541e73b7d04ed10265c89a3c0ef8f52989a11ff8065d7bded9caba0a887f
|
|
4
|
+
data.tar.gz: c023f43cf0c7782b496f22811cd079f4a72da624b98c48f1049a62ca8345781e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0e75a3f78b09dbff07b53b9566382bd03227efecb246065451c9b30063c053c2d48cfd6a42ac44746a5a182aafc563cd2879094193acdb467f4400e7c12af308
|
|
7
|
+
data.tar.gz: 17755302c984c920b3510463a91e2d7d7280d7f49e5180662a52e46400751a0f3aa4a67625c7e5147df2f6d0b1f8fea69c3d795f6d70d13b5e6d88e15ce44d03
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Python".
|
|
|
6
6
|
|
|
7
7
|
[](https://github.com/mudge/re2/actions)
|
|
8
8
|
|
|
9
|
-
**Current version:** 2.26.
|
|
9
|
+
**Current version:** 2.26.2
|
|
10
10
|
**Bundled RE2 version:** libre2.11 (2025-11-05)
|
|
11
11
|
|
|
12
12
|
```ruby
|
data/ext/re2/re2.cc
CHANGED
|
@@ -403,6 +403,7 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
|
|
|
403
403
|
|
|
404
404
|
if (self_c->input) {
|
|
405
405
|
delete self_c->input;
|
|
406
|
+
self_c->input = nullptr;
|
|
406
407
|
}
|
|
407
408
|
|
|
408
409
|
RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
|
|
@@ -481,9 +482,27 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
481
482
|
/* Check whether we've exhausted the input yet. */
|
|
482
483
|
c->eof = new_input_size == 0;
|
|
483
484
|
|
|
484
|
-
/* If the match didn't advance the input, we need to do this ourselves
|
|
485
|
+
/* If the match didn't advance the input, we need to do this ourselves,
|
|
486
|
+
* advancing by a whole character to avoid splitting multi-byte characters.
|
|
487
|
+
*
|
|
488
|
+
* The lookup table approach is taken from RE2's own Python extension: the
|
|
489
|
+
* high 4 bits of a UTF-8 lead byte determine the character's byte length.
|
|
490
|
+
*
|
|
491
|
+
* See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
|
|
492
|
+
*/
|
|
485
493
|
if (!input_advanced && new_input_size > 0) {
|
|
486
|
-
|
|
494
|
+
size_t char_size = 1;
|
|
495
|
+
|
|
496
|
+
if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
|
|
497
|
+
char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
|
|
498
|
+
[((*c->input)[0] & 0xFF) >> 4];
|
|
499
|
+
|
|
500
|
+
if (char_size > new_input_size) {
|
|
501
|
+
char_size = new_input_size;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
c->input->remove_prefix(char_size);
|
|
487
506
|
}
|
|
488
507
|
|
|
489
508
|
return result;
|
|
@@ -1152,6 +1171,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
|
1152
1171
|
|
|
1153
1172
|
if (self_m->matches) {
|
|
1154
1173
|
delete[] self_m->matches;
|
|
1174
|
+
self_m->matches = nullptr;
|
|
1155
1175
|
}
|
|
1156
1176
|
|
|
1157
1177
|
self_m->number_of_matches = other_m->number_of_matches;
|
|
@@ -1231,6 +1251,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1231
1251
|
|
|
1232
1252
|
if (p->pattern) {
|
|
1233
1253
|
delete p->pattern;
|
|
1254
|
+
p->pattern = nullptr;
|
|
1234
1255
|
}
|
|
1235
1256
|
|
|
1236
1257
|
if (RTEST(options)) {
|
|
@@ -1259,6 +1280,7 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
|
1259
1280
|
|
|
1260
1281
|
if (self_p->pattern) {
|
|
1261
1282
|
delete self_p->pattern;
|
|
1283
|
+
self_p->pattern = nullptr;
|
|
1262
1284
|
}
|
|
1263
1285
|
|
|
1264
1286
|
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
|
|
@@ -1822,6 +1844,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1822
1844
|
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
|
1823
1845
|
}
|
|
1824
1846
|
|
|
1847
|
+
#ifndef HAVE_ENDPOS_ARGUMENT
|
|
1848
|
+
/* Old RE2's Match() takes int startpos. Reject values that would overflow. */
|
|
1849
|
+
if (startpos > INT_MAX) {
|
|
1850
|
+
rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
|
|
1851
|
+
}
|
|
1852
|
+
#endif
|
|
1853
|
+
|
|
1825
1854
|
if (n == 0) {
|
|
1826
1855
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1827
1856
|
bool matched = p->pattern->Match(
|
|
@@ -1997,29 +2026,32 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
|
|
1997
2026
|
*/
|
|
1998
2027
|
static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
1999
2028
|
VALUE rewrite) {
|
|
2000
|
-
|
|
2001
|
-
StringValue(rewrite);
|
|
2029
|
+
re2_pattern *p = nullptr;
|
|
2002
2030
|
|
|
2003
|
-
|
|
2031
|
+
/* Coerce all arguments before any C++ allocations so that any Ruby
|
|
2032
|
+
* exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
|
|
2033
|
+
*/
|
|
2034
|
+
StringValue(str);
|
|
2035
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2036
|
+
p = unwrap_re2_regexp(pattern);
|
|
2037
|
+
} else {
|
|
2038
|
+
StringValue(pattern);
|
|
2039
|
+
}
|
|
2040
|
+
StringValue(rewrite);
|
|
2004
2041
|
|
|
2005
2042
|
/* Take a copy of str so it can be modified in-place by
|
|
2006
2043
|
* RE2::Replace.
|
|
2007
2044
|
*/
|
|
2008
|
-
StringValue(str);
|
|
2009
2045
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2010
2046
|
|
|
2011
2047
|
/* Do the replacement. */
|
|
2012
|
-
if (
|
|
2013
|
-
p = unwrap_re2_regexp(pattern);
|
|
2048
|
+
if (p) {
|
|
2014
2049
|
RE2::Replace(&str_as_string, *p->pattern,
|
|
2015
2050
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2016
2051
|
|
|
2017
2052
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2018
2053
|
p->pattern->options().encoding());
|
|
2019
2054
|
} else {
|
|
2020
|
-
/* Ensure pattern is a string. */
|
|
2021
|
-
StringValue(pattern);
|
|
2022
|
-
|
|
2023
2055
|
RE2::Replace(&str_as_string,
|
|
2024
2056
|
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
2025
2057
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
@@ -2050,28 +2082,32 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2050
2082
|
*/
|
|
2051
2083
|
static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
2052
2084
|
VALUE rewrite) {
|
|
2053
|
-
|
|
2085
|
+
re2_pattern *p = nullptr;
|
|
2086
|
+
|
|
2087
|
+
/* Coerce all arguments before any C++ allocations so that any Ruby
|
|
2088
|
+
* exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
|
|
2089
|
+
*/
|
|
2090
|
+
StringValue(str);
|
|
2091
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2092
|
+
p = unwrap_re2_regexp(pattern);
|
|
2093
|
+
} else {
|
|
2094
|
+
StringValue(pattern);
|
|
2095
|
+
}
|
|
2054
2096
|
StringValue(rewrite);
|
|
2055
2097
|
|
|
2056
2098
|
/* Take a copy of str so it can be modified in-place by
|
|
2057
2099
|
* RE2::GlobalReplace.
|
|
2058
2100
|
*/
|
|
2059
|
-
re2_pattern *p;
|
|
2060
|
-
StringValue(str);
|
|
2061
2101
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2062
2102
|
|
|
2063
2103
|
/* Do the replacement. */
|
|
2064
|
-
if (
|
|
2065
|
-
p = unwrap_re2_regexp(pattern);
|
|
2104
|
+
if (p) {
|
|
2066
2105
|
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
|
2067
2106
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2068
2107
|
|
|
2069
2108
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2070
2109
|
p->pattern->options().encoding());
|
|
2071
2110
|
} else {
|
|
2072
|
-
/* Ensure pattern is a string. */
|
|
2073
|
-
StringValue(pattern);
|
|
2074
|
-
|
|
2075
2111
|
RE2::GlobalReplace(&str_as_string,
|
|
2076
2112
|
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
2077
2113
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
@@ -2104,16 +2140,23 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2104
2140
|
*/
|
|
2105
2141
|
static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
2106
2142
|
VALUE rewrite) {
|
|
2107
|
-
|
|
2108
|
-
|
|
2143
|
+
re2_pattern *p = nullptr;
|
|
2144
|
+
|
|
2145
|
+
/* Coerce all arguments before any C++ allocations so that any Ruby
|
|
2146
|
+
* exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
|
|
2147
|
+
*/
|
|
2109
2148
|
StringValue(text);
|
|
2149
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2150
|
+
p = unwrap_re2_regexp(pattern);
|
|
2151
|
+
} else {
|
|
2152
|
+
StringValue(pattern);
|
|
2153
|
+
}
|
|
2154
|
+
StringValue(rewrite);
|
|
2110
2155
|
|
|
2111
|
-
re2_pattern *p;
|
|
2112
2156
|
std::string out;
|
|
2113
2157
|
bool extracted;
|
|
2114
2158
|
|
|
2115
|
-
if (
|
|
2116
|
-
p = unwrap_re2_regexp(pattern);
|
|
2159
|
+
if (p) {
|
|
2117
2160
|
extracted = RE2::Extract(
|
|
2118
2161
|
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2119
2162
|
*p->pattern,
|
|
@@ -2127,9 +2170,6 @@ static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
|
2127
2170
|
return Qnil;
|
|
2128
2171
|
}
|
|
2129
2172
|
} else {
|
|
2130
|
-
/* Ensure pattern is a string. */
|
|
2131
|
-
StringValue(pattern);
|
|
2132
|
-
|
|
2133
2173
|
extracted = RE2::Extract(
|
|
2134
2174
|
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2135
2175
|
RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
|
|
@@ -2294,6 +2334,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
2294
2334
|
|
|
2295
2335
|
if (s->set) {
|
|
2296
2336
|
delete s->set;
|
|
2337
|
+
s->set = nullptr;
|
|
2297
2338
|
}
|
|
2298
2339
|
|
|
2299
2340
|
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
data/lib/3.1/re2.so
CHANGED
|
Binary file
|
data/lib/3.2/re2.so
CHANGED
|
Binary file
|
data/lib/3.3/re2.so
CHANGED
|
Binary file
|
data/lib/3.4/re2.so
CHANGED
|
Binary file
|
data/lib/4.0/re2.so
CHANGED
|
Binary file
|
data/lib/re2/version.rb
CHANGED
data/spec/re2/regexp_spec.rb
CHANGED
|
@@ -757,6 +757,15 @@ RSpec.describe RE2::Regexp do
|
|
|
757
757
|
expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
|
|
758
758
|
end
|
|
759
759
|
|
|
760
|
+
it "raises an error when startpos exceeds INT_MAX on old RE2 ABI" do
|
|
761
|
+
skip "Underlying RE2::Match does not take int startpos" if RE2::Regexp.match_has_endpos_argument?
|
|
762
|
+
skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
|
|
763
|
+
|
|
764
|
+
re = RE2::Regexp.new('(\w+)', log_errors: false)
|
|
765
|
+
|
|
766
|
+
expect { re.match("test", start_pos: 2_147_483_648) }.to raise_error(RangeError, /startpos should be <=/)
|
|
767
|
+
end
|
|
768
|
+
|
|
760
769
|
it "raises an error when called on an uninitialized object" do
|
|
761
770
|
expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
762
771
|
end
|
data/spec/re2/scanner_spec.rb
CHANGED
|
@@ -243,6 +243,52 @@ RSpec.describe RE2::Scanner do
|
|
|
243
243
|
expect(scanner.scan).to be_nil
|
|
244
244
|
end
|
|
245
245
|
|
|
246
|
+
it "advances by whole characters with zero-width matches on 2-byte UTF-8 input", :aggregate_failures do
|
|
247
|
+
r = RE2::Regexp.new("")
|
|
248
|
+
scanner = r.scan("à")
|
|
249
|
+
|
|
250
|
+
expect(scanner.scan).to eq([])
|
|
251
|
+
expect(scanner.scan).to eq([])
|
|
252
|
+
expect(scanner.scan).to be_nil
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
it "advances by whole characters with zero-width matches on 3-byte UTF-8 input", :aggregate_failures do
|
|
256
|
+
r = RE2::Regexp.new("")
|
|
257
|
+
scanner = r.scan("\u20AC")
|
|
258
|
+
|
|
259
|
+
expect(scanner.scan).to eq([])
|
|
260
|
+
expect(scanner.scan).to eq([])
|
|
261
|
+
expect(scanner.scan).to be_nil
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
it "advances by whole characters with zero-width matches on 4-byte UTF-8 input", :aggregate_failures do
|
|
265
|
+
r = RE2::Regexp.new("")
|
|
266
|
+
scanner = r.scan("\u{1F600}")
|
|
267
|
+
|
|
268
|
+
expect(scanner.scan).to eq([])
|
|
269
|
+
expect(scanner.scan).to eq([])
|
|
270
|
+
expect(scanner.scan).to be_nil
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it "advances by single bytes with zero-width matches on Latin-1 input", :aggregate_failures do
|
|
274
|
+
r = RE2::Regexp.new("", utf8: false)
|
|
275
|
+
scanner = r.scan("\xC3\xA0")
|
|
276
|
+
|
|
277
|
+
expect(scanner.scan).to eq([])
|
|
278
|
+
expect(scanner.scan).to eq([])
|
|
279
|
+
expect(scanner.scan).to eq([])
|
|
280
|
+
expect(scanner.scan).to be_nil
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
it "handles truncated multi-byte sequences at the end of input", :aggregate_failures do
|
|
284
|
+
r = RE2::Regexp.new("")
|
|
285
|
+
scanner = r.scan("\xC3")
|
|
286
|
+
|
|
287
|
+
expect(scanner.scan).to eq([])
|
|
288
|
+
expect(scanner.scan).to eq([])
|
|
289
|
+
expect(scanner.scan).to be_nil
|
|
290
|
+
end
|
|
291
|
+
|
|
246
292
|
it "raises a Type Error if given input that can't be coerced to a String" do
|
|
247
293
|
r = RE2::Regexp.new('(\w+)')
|
|
248
294
|
|
data/spec/re2_spec.rb
CHANGED
|
@@ -86,6 +86,10 @@ RSpec.describe RE2 do
|
|
|
86
86
|
expect { RE2.replace("woo", 0, "ah") }.to raise_error(TypeError)
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
90
|
+
expect { RE2.replace("a" * 128, 0, "ah") }.to raise_error(TypeError)
|
|
91
|
+
end
|
|
92
|
+
|
|
89
93
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
90
94
|
expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
|
|
91
95
|
end
|
|
@@ -182,6 +186,10 @@ RSpec.describe RE2 do
|
|
|
182
186
|
expect { RE2.global_replace("woo", 0, "a") }.to raise_error(TypeError)
|
|
183
187
|
end
|
|
184
188
|
|
|
189
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
190
|
+
expect { RE2.global_replace("a" * 128, 0, "a") }.to raise_error(TypeError)
|
|
191
|
+
end
|
|
192
|
+
|
|
185
193
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
186
194
|
expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
|
|
187
195
|
end
|
|
@@ -266,6 +274,10 @@ RSpec.describe RE2 do
|
|
|
266
274
|
expect { RE2.extract("woo", 0, '\1') }.to raise_error(TypeError)
|
|
267
275
|
end
|
|
268
276
|
|
|
277
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
278
|
+
expect { RE2.extract("a" * 128, 0, '\1') }.to raise_error(TypeError)
|
|
279
|
+
end
|
|
280
|
+
|
|
269
281
|
it "raises a Type Error for a rewrite that can't be converted to String" do
|
|
270
282
|
expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
|
|
271
283
|
end
|