re2 2.26.1-x86_64-linux-gnu → 2.26.2-x86_64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f26a7c50adf99ee96e8af8cf72c2807df885f3aea35a56ac56962327b6151cce
4
- data.tar.gz: 8823d9b232988ec6026e30b4930284a0deed495cbf7fd7c8c2fab09c0244004c
3
+ metadata.gz: cebd541e73b7d04ed10265c89a3c0ef8f52989a11ff8065d7bded9caba0a887f
4
+ data.tar.gz: c023f43cf0c7782b496f22811cd079f4a72da624b98c48f1049a62ca8345781e
5
5
  SHA512:
6
- metadata.gz: 8423cd1e0bfb54c2e7d211c5f9b667cbd3025163adbbb42575e545ef5bbef4e437c955f107bdeb30a0a988e65374c3b601bcaa542716b9a649f0fdd803fc78c3
7
- data.tar.gz: 55b2b7aa364e94aa1a0e25a344707643974b435d82c8a04a73ac261aa99f3c2b350cd9dd01627e5a21c0b81c5412d5a833ecb7e138bc08a085a5ed87a574e352
6
+ metadata.gz: 0e75a3f78b09dbff07b53b9566382bd03227efecb246065451c9b30063c053c2d48cfd6a42ac44746a5a182aafc563cd2879094193acdb467f4400e7c12af308
7
+ data.tar.gz: 17755302c984c920b3510463a91e2d7d7280d7f49e5180662a52e46400751a0f3aa4a67625c7e5147df2f6d0b1f8fea69c3d795f6d70d13b5e6d88e15ce44d03
data/README.md CHANGED
@@ -6,7 +6,7 @@ Python".
6
6
 
7
7
  [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/mudge/re2/actions)
8
8
 
9
- **Current version:** 2.26.1
9
+ **Current version:** 2.26.2
10
10
  **Bundled RE2 version:** libre2.11 (2025-11-05)
11
11
 
12
12
  ```ruby
data/ext/re2/re2.cc CHANGED
@@ -403,6 +403,7 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
403
403
 
404
404
  if (self_c->input) {
405
405
  delete self_c->input;
406
+ self_c->input = nullptr;
406
407
  }
407
408
 
408
409
  RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
@@ -481,9 +482,27 @@ static VALUE re2_scanner_scan(VALUE self) {
481
482
  /* Check whether we've exhausted the input yet. */
482
483
  c->eof = new_input_size == 0;
483
484
 
484
- /* If the match didn't advance the input, we need to do this ourselves. */
485
+ /* If the match didn't advance the input, we need to do this ourselves,
486
+ * advancing by a whole character to avoid splitting multi-byte characters.
487
+ *
488
+ * The lookup table approach is taken from RE2's own Python extension: the
489
+ * high 4 bits of a UTF-8 lead byte determine the character's byte length.
490
+ *
491
+ * See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
492
+ */
485
493
  if (!input_advanced && new_input_size > 0) {
486
- c->input->remove_prefix(1);
494
+ size_t char_size = 1;
495
+
496
+ if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
497
+ char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
498
+ [((*c->input)[0] & 0xFF) >> 4];
499
+
500
+ if (char_size > new_input_size) {
501
+ char_size = new_input_size;
502
+ }
503
+ }
504
+
505
+ c->input->remove_prefix(char_size);
487
506
  }
488
507
 
489
508
  return result;
@@ -1152,6 +1171,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
1152
1171
 
1153
1172
  if (self_m->matches) {
1154
1173
  delete[] self_m->matches;
1174
+ self_m->matches = nullptr;
1155
1175
  }
1156
1176
 
1157
1177
  self_m->number_of_matches = other_m->number_of_matches;
@@ -1231,6 +1251,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
1231
1251
 
1232
1252
  if (p->pattern) {
1233
1253
  delete p->pattern;
1254
+ p->pattern = nullptr;
1234
1255
  }
1235
1256
 
1236
1257
  if (RTEST(options)) {
@@ -1259,6 +1280,7 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
1259
1280
 
1260
1281
  if (self_p->pattern) {
1261
1282
  delete self_p->pattern;
1283
+ self_p->pattern = nullptr;
1262
1284
  }
1263
1285
 
1264
1286
  self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
@@ -1822,6 +1844,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1822
1844
  rb_raise(rb_eArgError, "startpos should be <= endpos");
1823
1845
  }
1824
1846
 
1847
+ #ifndef HAVE_ENDPOS_ARGUMENT
1848
+ /* Old RE2's Match() takes int startpos. Reject values that would overflow. */
1849
+ if (startpos > INT_MAX) {
1850
+ rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
1851
+ }
1852
+ #endif
1853
+
1825
1854
  if (n == 0) {
1826
1855
  #ifdef HAVE_ENDPOS_ARGUMENT
1827
1856
  bool matched = p->pattern->Match(
@@ -1997,29 +2026,32 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
1997
2026
  */
1998
2027
  static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
1999
2028
  VALUE rewrite) {
2000
- /* Ensure rewrite is a string. */
2001
- StringValue(rewrite);
2029
+ re2_pattern *p = nullptr;
2002
2030
 
2003
- re2_pattern *p;
2031
+ /* Coerce all arguments before any C++ allocations so that any Ruby
2032
+ * exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
2033
+ */
2034
+ StringValue(str);
2035
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2036
+ p = unwrap_re2_regexp(pattern);
2037
+ } else {
2038
+ StringValue(pattern);
2039
+ }
2040
+ StringValue(rewrite);
2004
2041
 
2005
2042
  /* Take a copy of str so it can be modified in-place by
2006
2043
  * RE2::Replace.
2007
2044
  */
2008
- StringValue(str);
2009
2045
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2010
2046
 
2011
2047
  /* Do the replacement. */
2012
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2013
- p = unwrap_re2_regexp(pattern);
2048
+ if (p) {
2014
2049
  RE2::Replace(&str_as_string, *p->pattern,
2015
2050
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2016
2051
 
2017
2052
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
2018
2053
  p->pattern->options().encoding());
2019
2054
  } else {
2020
- /* Ensure pattern is a string. */
2021
- StringValue(pattern);
2022
-
2023
2055
  RE2::Replace(&str_as_string,
2024
2056
  re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
2025
2057
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
@@ -2050,28 +2082,32 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
2050
2082
  */
2051
2083
  static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
2052
2084
  VALUE rewrite) {
2053
- /* Ensure rewrite is a string. */
2085
+ re2_pattern *p = nullptr;
2086
+
2087
+ /* Coerce all arguments before any C++ allocations so that any Ruby
2088
+ * exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
2089
+ */
2090
+ StringValue(str);
2091
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2092
+ p = unwrap_re2_regexp(pattern);
2093
+ } else {
2094
+ StringValue(pattern);
2095
+ }
2054
2096
  StringValue(rewrite);
2055
2097
 
2056
2098
  /* Take a copy of str so it can be modified in-place by
2057
2099
  * RE2::GlobalReplace.
2058
2100
  */
2059
- re2_pattern *p;
2060
- StringValue(str);
2061
2101
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2062
2102
 
2063
2103
  /* Do the replacement. */
2064
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2065
- p = unwrap_re2_regexp(pattern);
2104
+ if (p) {
2066
2105
  RE2::GlobalReplace(&str_as_string, *p->pattern,
2067
2106
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2068
2107
 
2069
2108
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
2070
2109
  p->pattern->options().encoding());
2071
2110
  } else {
2072
- /* Ensure pattern is a string. */
2073
- StringValue(pattern);
2074
-
2075
2111
  RE2::GlobalReplace(&str_as_string,
2076
2112
  re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
2077
2113
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
@@ -2104,16 +2140,23 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
2104
2140
  */
2105
2141
  static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
2106
2142
  VALUE rewrite) {
2107
- /* Ensure rewrite and text are strings. */
2108
- StringValue(rewrite);
2143
+ re2_pattern *p = nullptr;
2144
+
2145
+ /* Coerce all arguments before any C++ allocations so that any Ruby
2146
+ * exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
2147
+ */
2109
2148
  StringValue(text);
2149
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2150
+ p = unwrap_re2_regexp(pattern);
2151
+ } else {
2152
+ StringValue(pattern);
2153
+ }
2154
+ StringValue(rewrite);
2110
2155
 
2111
- re2_pattern *p;
2112
2156
  std::string out;
2113
2157
  bool extracted;
2114
2158
 
2115
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2116
- p = unwrap_re2_regexp(pattern);
2159
+ if (p) {
2117
2160
  extracted = RE2::Extract(
2118
2161
  re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2119
2162
  *p->pattern,
@@ -2127,9 +2170,6 @@ static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
2127
2170
  return Qnil;
2128
2171
  }
2129
2172
  } else {
2130
- /* Ensure pattern is a string. */
2131
- StringValue(pattern);
2132
-
2133
2173
  extracted = RE2::Extract(
2134
2174
  re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2135
2175
  RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
@@ -2294,6 +2334,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
2294
2334
 
2295
2335
  if (s->set) {
2296
2336
  delete s->set;
2337
+ s->set = nullptr;
2297
2338
  }
2298
2339
 
2299
2340
  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
data/lib/3.1/re2.so CHANGED
Binary file
data/lib/3.2/re2.so CHANGED
Binary file
data/lib/3.3/re2.so CHANGED
Binary file
data/lib/3.4/re2.so CHANGED
Binary file
data/lib/4.0/re2.so CHANGED
Binary file
data/lib/re2/version.rb CHANGED
@@ -10,5 +10,5 @@
10
10
 
11
11
 
12
12
  module RE2
13
- VERSION = "2.26.1"
13
+ VERSION = "2.26.2"
14
14
  end
@@ -757,6 +757,15 @@ RSpec.describe RE2::Regexp do
757
757
  expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
758
758
  end
759
759
 
760
+ it "raises an error when startpos exceeds INT_MAX on old RE2 ABI" do
761
+ skip "Underlying RE2::Match does not take int startpos" if RE2::Regexp.match_has_endpos_argument?
762
+ skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
763
+
764
+ re = RE2::Regexp.new('(\w+)', log_errors: false)
765
+
766
+ expect { re.match("test", start_pos: 2_147_483_648) }.to raise_error(RangeError, /startpos should be <=/)
767
+ end
768
+
760
769
  it "raises an error when called on an uninitialized object" do
761
770
  expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
762
771
  end
@@ -243,6 +243,52 @@ RSpec.describe RE2::Scanner do
243
243
  expect(scanner.scan).to be_nil
244
244
  end
245
245
 
246
+ it "advances by whole characters with zero-width matches on 2-byte UTF-8 input", :aggregate_failures do
247
+ r = RE2::Regexp.new("")
248
+ scanner = r.scan("à")
249
+
250
+ expect(scanner.scan).to eq([])
251
+ expect(scanner.scan).to eq([])
252
+ expect(scanner.scan).to be_nil
253
+ end
254
+
255
+ it "advances by whole characters with zero-width matches on 3-byte UTF-8 input", :aggregate_failures do
256
+ r = RE2::Regexp.new("")
257
+ scanner = r.scan("\u20AC")
258
+
259
+ expect(scanner.scan).to eq([])
260
+ expect(scanner.scan).to eq([])
261
+ expect(scanner.scan).to be_nil
262
+ end
263
+
264
+ it "advances by whole characters with zero-width matches on 4-byte UTF-8 input", :aggregate_failures do
265
+ r = RE2::Regexp.new("")
266
+ scanner = r.scan("\u{1F600}")
267
+
268
+ expect(scanner.scan).to eq([])
269
+ expect(scanner.scan).to eq([])
270
+ expect(scanner.scan).to be_nil
271
+ end
272
+
273
+ it "advances by single bytes with zero-width matches on Latin-1 input", :aggregate_failures do
274
+ r = RE2::Regexp.new("", utf8: false)
275
+ scanner = r.scan("\xC3\xA0")
276
+
277
+ expect(scanner.scan).to eq([])
278
+ expect(scanner.scan).to eq([])
279
+ expect(scanner.scan).to eq([])
280
+ expect(scanner.scan).to be_nil
281
+ end
282
+
283
+ it "handles truncated multi-byte sequences at the end of input", :aggregate_failures do
284
+ r = RE2::Regexp.new("")
285
+ scanner = r.scan("\xC3")
286
+
287
+ expect(scanner.scan).to eq([])
288
+ expect(scanner.scan).to eq([])
289
+ expect(scanner.scan).to be_nil
290
+ end
291
+
246
292
  it "raises a Type Error if given input that can't be coerced to a String" do
247
293
  r = RE2::Regexp.new('(\w+)')
248
294
 
data/spec/re2_spec.rb CHANGED
@@ -86,6 +86,10 @@ RSpec.describe RE2 do
86
86
  expect { RE2.replace("woo", 0, "ah") }.to raise_error(TypeError)
87
87
  end
88
88
 
89
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
90
+ expect { RE2.replace("a" * 128, 0, "ah") }.to raise_error(TypeError)
91
+ end
92
+
89
93
  it "raises a Type Error for a replacement that can't be converted to String" do
90
94
  expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
91
95
  end
@@ -182,6 +186,10 @@ RSpec.describe RE2 do
182
186
  expect { RE2.global_replace("woo", 0, "a") }.to raise_error(TypeError)
183
187
  end
184
188
 
189
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
190
+ expect { RE2.global_replace("a" * 128, 0, "a") }.to raise_error(TypeError)
191
+ end
192
+
185
193
  it "raises a Type Error for a replacement that can't be converted to String" do
186
194
  expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
187
195
  end
@@ -266,6 +274,10 @@ RSpec.describe RE2 do
266
274
  expect { RE2.extract("woo", 0, '\1') }.to raise_error(TypeError)
267
275
  end
268
276
 
277
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
278
+ expect { RE2.extract("a" * 128, 0, '\1') }.to raise_error(TypeError)
279
+ end
280
+
269
281
  it "raises a Type Error for a rewrite that can't be converted to String" do
270
282
  expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
271
283
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.26.1
4
+ version: 2.26.2
5
5
  platform: x86_64-linux-gnu
6
6
  authors:
7
7
  - Paul Mucur