re2 2.26.0-arm-linux-gnu → 2.26.2-arm-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b6ed5f6991cec617f1612aef8ea1cf9b801ca4b2480cca40da5922a8ea08d3d
4
- data.tar.gz: b2c1a13f2e66a33072325c741de4879c52c91b7954b41a336c8c42b504a81522
3
+ metadata.gz: 744848933fe9dcbb23dabbc2167aa08137fa5cc539ef836ace4bb4f1fdbfb786
4
+ data.tar.gz: 812ac2e83384239d1fe96f5e7db938f999da640d508b532b72458cd2aecaa5fe
5
5
  SHA512:
6
- metadata.gz: 9a08acbffab88f7f131a6d90d4a93aab7aac73672bbbe0bf313cab046d4a08ecfb51a0f46c00a6a73e1923082449435b0294e51f4361ca1e360f04e19fc555cb
7
- data.tar.gz: ee9df3f7625596cdd0fa8091ce23451d2611401a5b793157c056d147207ea6e6968356b922d6ad5d22ab0e40fc1e1af636adddae0fcba872fe28e7c385fd4dd9
6
+ metadata.gz: 9f262c5dc8f11908a5d0523662f0fa31dd9d599e0823c2e3f1ad1134bfe76981616e8f9083053a4bb16885df95c7b0b636f34efd977f113a0dae5c2bdd73622b
7
+ data.tar.gz: 0e8981d9ef0cb3b05315b00300ade2d4991f1de1b030a39790e90a9f5f7224d41b1f972d1d1be618839220668a8cd29fa93fa0976700a92bcd00e14100493065
data/README.md CHANGED
@@ -6,7 +6,7 @@ Python".
6
6
 
7
7
  [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/mudge/re2/actions)
8
8
 
9
- **Current version:** 2.26.0
9
+ **Current version:** 2.26.2
10
10
  **Bundled RE2 version:** libre2.11 (2025-11-05)
11
11
 
12
12
  ```ruby
data/ext/re2/re2.cc CHANGED
@@ -8,7 +8,7 @@
8
8
  * Released under the BSD Licence, please see LICENSE.txt
9
9
  */
10
10
 
11
- #include <stdint.h>
11
+ #include <cstdint>
12
12
 
13
13
  #include <map>
14
14
  #include <sstream>
@@ -296,12 +296,12 @@ static re2_scanner *unwrap_re2_scanner(VALUE self) {
296
296
  static VALUE re2_regexp_names(const VALUE self) {
297
297
  re2_pattern *p = unwrap_re2_regexp(self);
298
298
 
299
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
299
+ const auto& groups = p->pattern->NamedCapturingGroups();
300
300
  VALUE names = rb_ary_new2(groups.size());
301
301
 
302
- for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
302
+ for (const auto& group : groups) {
303
303
  rb_ary_push(names,
304
- encoded_str_new(it->first.data(), it->first.size(),
304
+ encoded_str_new(group.first.data(), group.first.size(),
305
305
  p->pattern->options().encoding()));
306
306
  }
307
307
 
@@ -385,7 +385,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
385
385
  delete c->input;
386
386
  c->input = new(std::nothrow) re2::StringPiece(
387
387
  RSTRING_PTR(c->text), RSTRING_LEN(c->text));
388
- if (c->input == 0) {
388
+ if (c->input == nullptr) {
389
389
  rb_raise(rb_eNoMemError,
390
390
  "not enough memory to allocate StringPiece for input");
391
391
  }
@@ -403,6 +403,7 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
403
403
 
404
404
  if (self_c->input) {
405
405
  delete self_c->input;
406
+ self_c->input = nullptr;
406
407
  }
407
408
 
408
409
  RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
@@ -412,12 +413,12 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
412
413
 
413
414
  if (other_c->input) {
414
415
  self_c->input = new(std::nothrow) re2::StringPiece(*other_c->input);
415
- if (self_c->input == 0) {
416
+ if (self_c->input == nullptr) {
416
417
  rb_raise(rb_eNoMemError,
417
418
  "not enough memory to allocate StringPiece for input");
418
419
  }
419
420
  } else {
420
- self_c->input = NULL;
421
+ self_c->input = nullptr;
421
422
  }
422
423
 
423
424
  return self;
@@ -469,7 +470,7 @@ static VALUE re2_scanner_scan(VALUE self) {
469
470
  VALUE result = rb_ary_new2(c->number_of_capturing_groups);
470
471
 
471
472
  for (int i = 0; i < c->number_of_capturing_groups; ++i) {
472
- if (matches[i].data() == NULL) {
473
+ if (matches[i].data() == nullptr) {
473
474
  rb_ary_push(result, Qnil);
474
475
  } else {
475
476
  rb_ary_push(result, encoded_str_new(matches[i].data(),
@@ -481,9 +482,27 @@ static VALUE re2_scanner_scan(VALUE self) {
481
482
  /* Check whether we've exhausted the input yet. */
482
483
  c->eof = new_input_size == 0;
483
484
 
484
- /* If the match didn't advance the input, we need to do this ourselves. */
485
+ /* If the match didn't advance the input, we need to do this ourselves,
486
+ * advancing by a whole character to avoid splitting multi-byte characters.
487
+ *
488
+ * The lookup table approach is taken from RE2's own Python extension: the
489
+ * high 4 bits of a UTF-8 lead byte determine the character's byte length.
490
+ *
491
+ * See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
492
+ */
485
493
  if (!input_advanced && new_input_size > 0) {
486
- c->input->remove_prefix(1);
494
+ size_t char_size = 1;
495
+
496
+ if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
497
+ char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
498
+ [((*c->input)[0] & 0xFF) >> 4];
499
+
500
+ if (char_size > new_input_size) {
501
+ char_size = new_input_size;
502
+ }
503
+ }
504
+
505
+ c->input->remove_prefix(char_size);
487
506
  }
488
507
 
489
508
  return result;
@@ -501,36 +520,36 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
501
520
  if (RB_INTEGER_TYPE_P(idx)) {
502
521
  id = NUM2INT(idx);
503
522
  } else if (SYMBOL_P(idx)) {
504
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
505
- std::map<std::string, int>::const_iterator search = groups.find(rb_id2name(SYM2ID(idx)));
523
+ const auto& groups = p->pattern->NamedCapturingGroups();
524
+ auto search = groups.find(rb_id2name(SYM2ID(idx)));
506
525
 
507
526
  if (search != groups.end()) {
508
527
  id = search->second;
509
528
  } else {
510
- return NULL;
529
+ return nullptr;
511
530
  }
512
531
  } else {
513
532
  StringValue(idx);
514
533
 
515
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
516
- std::map<std::string, int>::const_iterator search = groups.find(std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)));
534
+ const auto& groups = p->pattern->NamedCapturingGroups();
535
+ auto search = groups.find(std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)));
517
536
 
518
537
  if (search != groups.end()) {
519
538
  id = search->second;
520
539
  } else {
521
- return NULL;
540
+ return nullptr;
522
541
  }
523
542
  }
524
543
 
525
544
  if (id >= 0 && id < m->number_of_matches) {
526
545
  re2::StringPiece *match = &m->matches[id];
527
546
 
528
- if (match->data() != NULL) {
547
+ if (match->data() != nullptr) {
529
548
  return match;
530
549
  }
531
550
  }
532
551
 
533
- return NULL;
552
+ return nullptr;
534
553
  }
535
554
 
536
555
  /*
@@ -564,7 +583,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
564
583
  re2_matchdata *m = unwrap_re2_matchdata(self);
565
584
 
566
585
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
567
- if (match == NULL) {
586
+ if (match == nullptr) {
568
587
  return Qnil;
569
588
  } else {
570
589
  long offset = match->data() - RSTRING_PTR(m->text);
@@ -589,7 +608,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
589
608
  re2_matchdata *m = unwrap_re2_matchdata(self);
590
609
 
591
610
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
592
- if (match == NULL) {
611
+ if (match == nullptr) {
593
612
  return Qnil;
594
613
  } else {
595
614
  long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
@@ -615,7 +634,7 @@ static VALUE re2_matchdata_pre_match(const VALUE self) {
615
634
  re2_pattern *p = unwrap_re2_regexp(m->regexp);
616
635
 
617
636
  re2::StringPiece *match = &m->matches[0];
618
- if (match->data() == NULL) {
637
+ if (match->data() == nullptr) {
619
638
  return Qnil;
620
639
  }
621
640
 
@@ -642,7 +661,7 @@ static VALUE re2_matchdata_post_match(const VALUE self) {
642
661
  re2_pattern *p = unwrap_re2_regexp(m->regexp);
643
662
 
644
663
  re2::StringPiece *match = &m->matches[0];
645
- if (match->data() == NULL) {
664
+ if (match->data() == nullptr) {
646
665
  return Qnil;
647
666
  }
648
667
 
@@ -669,7 +688,7 @@ static VALUE re2_matchdata_offset(const VALUE self, VALUE n) {
669
688
  re2_matchdata *m = unwrap_re2_matchdata(self);
670
689
 
671
690
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
672
- if (match == NULL) {
691
+ if (match == nullptr) {
673
692
  return Qnil;
674
693
  }
675
694
 
@@ -700,7 +719,7 @@ static VALUE re2_matchdata_match_length(const VALUE self, VALUE n) {
700
719
  re2_matchdata *m = unwrap_re2_matchdata(self);
701
720
 
702
721
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
703
- if (match == NULL) {
722
+ if (match == nullptr) {
704
723
  return Qnil;
705
724
  }
706
725
 
@@ -766,7 +785,7 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
766
785
  for (int i = 0; i < m->number_of_matches; ++i) {
767
786
  re2::StringPiece *match = &m->matches[i];
768
787
 
769
- if (match->data() == NULL) {
788
+ if (match->data() == nullptr) {
770
789
  rb_ary_push(array, Qnil);
771
790
  } else {
772
791
  rb_ary_push(array, encoded_str_new(match->data(), match->size(),
@@ -786,7 +805,7 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
786
805
  } else {
787
806
  re2::StringPiece *match = &m->matches[nth];
788
807
 
789
- if (match->data() == NULL) {
808
+ if (match->data() == nullptr) {
790
809
  return Qnil;
791
810
  } else {
792
811
  return encoded_str_new(match->data(), match->size(),
@@ -799,8 +818,8 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
799
818
  re2_matchdata *m = unwrap_re2_matchdata(self);
800
819
  re2_pattern *p = unwrap_re2_regexp(m->regexp);
801
820
 
802
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
803
- std::map<std::string, int>::const_iterator search = groups.find(name);
821
+ const auto& groups = p->pattern->NamedCapturingGroups();
822
+ auto search = groups.find(name);
804
823
 
805
824
  if (search != groups.end()) {
806
825
  return re2_matchdata_nth_match(search->second, self);
@@ -955,7 +974,7 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
955
974
  for (int i = 1; i < m->number_of_matches; ++i) {
956
975
  re2::StringPiece *match = &m->matches[i];
957
976
 
958
- if (match->data() == NULL) {
977
+ if (match->data() == nullptr) {
959
978
  rb_ary_push(array, Qnil);
960
979
  } else {
961
980
  rb_ary_push(array, encoded_str_new(match->data(), match->size(),
@@ -999,14 +1018,14 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
999
1018
  re2_matchdata *m = unwrap_re2_matchdata(self);
1000
1019
  re2_pattern *p = unwrap_re2_regexp(m->regexp);
1001
1020
 
1002
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1021
+ const auto& groups = p->pattern->NamedCapturingGroups();
1003
1022
  VALUE capturing_groups = rb_hash_new();
1004
1023
 
1005
1024
  if (NIL_P(keys)) {
1006
- for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1025
+ for (const auto& group : groups) {
1007
1026
  rb_hash_aset(capturing_groups,
1008
- ID2SYM(rb_intern2(it->first.data(), it->first.size())),
1009
- re2_matchdata_nth_match(it->second, self));
1027
+ ID2SYM(rb_intern2(group.first.data(), group.first.size())),
1028
+ re2_matchdata_nth_match(group.second, self));
1010
1029
  }
1011
1030
  } else {
1012
1031
  Check_Type(keys, T_ARRAY);
@@ -1016,7 +1035,7 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
1016
1035
  VALUE key = rb_ary_entry(keys, i);
1017
1036
  Check_Type(key, T_SYMBOL);
1018
1037
  const char *name = rb_id2name(SYM2ID(key));
1019
- std::map<std::string, int>::const_iterator search = groups.find(name);
1038
+ auto search = groups.find(name);
1020
1039
 
1021
1040
  if (search != groups.end()) {
1022
1041
  rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
@@ -1069,18 +1088,18 @@ static VALUE re2_matchdata_named_captures(int argc, VALUE *argv, const VALUE sel
1069
1088
  re2_matchdata *m = unwrap_re2_matchdata(self);
1070
1089
  re2_pattern *p = unwrap_re2_regexp(m->regexp);
1071
1090
 
1072
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1091
+ const auto& groups = p->pattern->NamedCapturingGroups();
1073
1092
  VALUE result = rb_hash_new();
1074
1093
 
1075
- for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1094
+ for (const auto& group : groups) {
1076
1095
  VALUE key;
1077
1096
  if (symbolize) {
1078
- key = ID2SYM(rb_intern2(it->first.data(), it->first.size()));
1097
+ key = ID2SYM(rb_intern2(group.first.data(), group.first.size()));
1079
1098
  } else {
1080
- key = encoded_str_new(it->first.data(), it->first.size(),
1099
+ key = encoded_str_new(group.first.data(), group.first.size(),
1081
1100
  p->pattern->options().encoding());
1082
1101
  }
1083
- rb_hash_aset(result, key, re2_matchdata_nth_match(it->second, self));
1102
+ rb_hash_aset(result, key, re2_matchdata_nth_match(group.second, self));
1084
1103
  }
1085
1104
 
1086
1105
  return result;
@@ -1152,6 +1171,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
1152
1171
 
1153
1172
  if (self_m->matches) {
1154
1173
  delete[] self_m->matches;
1174
+ self_m->matches = nullptr;
1155
1175
  }
1156
1176
 
1157
1177
  self_m->number_of_matches = other_m->number_of_matches;
@@ -1160,7 +1180,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
1160
1180
 
1161
1181
  if (other_m->matches) {
1162
1182
  self_m->matches = new(std::nothrow) re2::StringPiece[other_m->number_of_matches];
1163
- if (self_m->matches == 0) {
1183
+ if (self_m->matches == nullptr) {
1164
1184
  rb_raise(rb_eNoMemError,
1165
1185
  "not enough memory to allocate StringPiece for matches");
1166
1186
  }
@@ -1168,7 +1188,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
1168
1188
  self_m->matches[i] = other_m->matches[i];
1169
1189
  }
1170
1190
  } else {
1171
- self_m->matches = NULL;
1191
+ self_m->matches = nullptr;
1172
1192
  }
1173
1193
 
1174
1194
  return self;
@@ -1231,6 +1251,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
1231
1251
 
1232
1252
  if (p->pattern) {
1233
1253
  delete p->pattern;
1254
+ p->pattern = nullptr;
1234
1255
  }
1235
1256
 
1236
1257
  if (RTEST(options)) {
@@ -1244,7 +1265,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
1244
1265
  re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
1245
1266
  }
1246
1267
 
1247
- if (p->pattern == 0) {
1268
+ if (p->pattern == nullptr) {
1248
1269
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
1249
1270
  }
1250
1271
 
@@ -1259,11 +1280,12 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
1259
1280
 
1260
1281
  if (self_p->pattern) {
1261
1282
  delete self_p->pattern;
1283
+ self_p->pattern = nullptr;
1262
1284
  }
1263
1285
 
1264
1286
  self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
1265
1287
  other_p->pattern->options());
1266
- if (self_p->pattern == 0) {
1288
+ if (self_p->pattern == nullptr) {
1267
1289
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
1268
1290
  }
1269
1291
 
@@ -1629,14 +1651,14 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1629
1651
  */
1630
1652
  static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1631
1653
  re2_pattern *p = unwrap_re2_regexp(self);
1632
- const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1654
+ const auto& groups = p->pattern->NamedCapturingGroups();
1633
1655
  VALUE capturing_groups = rb_hash_new();
1634
1656
 
1635
- for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1657
+ for (const auto& group : groups) {
1636
1658
  rb_hash_aset(capturing_groups,
1637
- encoded_str_new(it->first.data(), it->first.size(),
1659
+ encoded_str_new(group.first.data(), group.first.size(),
1638
1660
  p->pattern->options().encoding()),
1639
- INT2FIX(it->second));
1661
+ INT2FIX(group.second));
1640
1662
  }
1641
1663
 
1642
1664
  return capturing_groups;
@@ -1737,8 +1759,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1737
1759
  p = unwrap_re2_regexp(self);
1738
1760
 
1739
1761
  int n;
1740
- int startpos = 0;
1741
- int endpos = RSTRING_LEN(text);
1762
+ size_t startpos = 0;
1763
+ size_t endpos = RSTRING_LEN(text);
1742
1764
  RE2::Anchor anchor = RE2::UNANCHORED;
1743
1765
 
1744
1766
  if (RTEST(options)) {
@@ -1756,11 +1778,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1756
1778
  VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
1757
1779
  if (!NIL_P(endpos_option)) {
1758
1780
  #ifdef HAVE_ENDPOS_ARGUMENT
1759
- endpos = NUM2INT(endpos_option);
1781
+ ssize_t endpos_value = NUM2SSIZET(endpos_option);
1760
1782
 
1761
- if (endpos < 0) {
1783
+ if (endpos_value < 0) {
1762
1784
  rb_raise(rb_eArgError, "endpos should be >= 0");
1763
1785
  }
1786
+
1787
+ endpos = static_cast<size_t>(endpos_value);
1764
1788
  #else
1765
1789
  rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
1766
1790
  #endif
@@ -1799,11 +1823,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1799
1823
 
1800
1824
  VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
1801
1825
  if (!NIL_P(startpos_option)) {
1802
- startpos = NUM2INT(startpos_option);
1826
+ ssize_t startpos_value = NUM2SSIZET(startpos_option);
1803
1827
 
1804
- if (startpos < 0) {
1828
+ if (startpos_value < 0) {
1805
1829
  rb_raise(rb_eArgError, "startpos should be >= 0");
1806
1830
  }
1831
+
1832
+ startpos = static_cast<size_t>(startpos_value);
1807
1833
  }
1808
1834
  }
1809
1835
  } else {
@@ -1818,6 +1844,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1818
1844
  rb_raise(rb_eArgError, "startpos should be <= endpos");
1819
1845
  }
1820
1846
 
1847
+ #ifndef HAVE_ENDPOS_ARGUMENT
1848
+ /* Old RE2's Match() takes int startpos. Reject values that would overflow. */
1849
+ if (startpos > INT_MAX) {
1850
+ rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
1851
+ }
1852
+ #endif
1853
+
1821
1854
  if (n == 0) {
1822
1855
  #ifdef HAVE_ENDPOS_ARGUMENT
1823
1856
  bool matched = p->pattern->Match(
@@ -1838,7 +1871,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1838
1871
  n += 1;
1839
1872
 
1840
1873
  re2::StringPiece *matches = new(std::nothrow) re2::StringPiece[n];
1841
- if (matches == 0) {
1874
+ if (matches == nullptr) {
1842
1875
  rb_raise(rb_eNoMemError,
1843
1876
  "not enough memory to allocate StringPieces for matches");
1844
1877
  }
@@ -1935,7 +1968,7 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1935
1968
  RB_OBJ_WRITE(scanner, &c->text, rb_str_new_frozen(text));
1936
1969
  c->input = new(std::nothrow) re2::StringPiece(
1937
1970
  RSTRING_PTR(c->text), RSTRING_LEN(c->text));
1938
- if (c->input == 0) {
1971
+ if (c->input == nullptr) {
1939
1972
  rb_raise(rb_eNoMemError,
1940
1973
  "not enough memory to allocate StringPiece for input");
1941
1974
  }
@@ -1993,29 +2026,32 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
1993
2026
  */
1994
2027
  static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
1995
2028
  VALUE rewrite) {
1996
- /* Ensure rewrite is a string. */
1997
- StringValue(rewrite);
2029
+ re2_pattern *p = nullptr;
1998
2030
 
1999
- re2_pattern *p;
2031
+ /* Coerce all arguments before any C++ allocations so that any Ruby
2032
+ * exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
2033
+ */
2034
+ StringValue(str);
2035
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2036
+ p = unwrap_re2_regexp(pattern);
2037
+ } else {
2038
+ StringValue(pattern);
2039
+ }
2040
+ StringValue(rewrite);
2000
2041
 
2001
2042
  /* Take a copy of str so it can be modified in-place by
2002
2043
  * RE2::Replace.
2003
2044
  */
2004
- StringValue(str);
2005
2045
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2006
2046
 
2007
2047
  /* Do the replacement. */
2008
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2009
- p = unwrap_re2_regexp(pattern);
2048
+ if (p) {
2010
2049
  RE2::Replace(&str_as_string, *p->pattern,
2011
2050
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2012
2051
 
2013
2052
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
2014
2053
  p->pattern->options().encoding());
2015
2054
  } else {
2016
- /* Ensure pattern is a string. */
2017
- StringValue(pattern);
2018
-
2019
2055
  RE2::Replace(&str_as_string,
2020
2056
  re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
2021
2057
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
@@ -2046,28 +2082,32 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
2046
2082
  */
2047
2083
  static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
2048
2084
  VALUE rewrite) {
2049
- /* Ensure rewrite is a string. */
2085
+ re2_pattern *p = nullptr;
2086
+
2087
+ /* Coerce all arguments before any C++ allocations so that any Ruby
2088
+ * exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
2089
+ */
2090
+ StringValue(str);
2091
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2092
+ p = unwrap_re2_regexp(pattern);
2093
+ } else {
2094
+ StringValue(pattern);
2095
+ }
2050
2096
  StringValue(rewrite);
2051
2097
 
2052
2098
  /* Take a copy of str so it can be modified in-place by
2053
2099
  * RE2::GlobalReplace.
2054
2100
  */
2055
- re2_pattern *p;
2056
- StringValue(str);
2057
2101
  std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
2058
2102
 
2059
2103
  /* Do the replacement. */
2060
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2061
- p = unwrap_re2_regexp(pattern);
2104
+ if (p) {
2062
2105
  RE2::GlobalReplace(&str_as_string, *p->pattern,
2063
2106
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
2064
2107
 
2065
2108
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
2066
2109
  p->pattern->options().encoding());
2067
2110
  } else {
2068
- /* Ensure pattern is a string. */
2069
- StringValue(pattern);
2070
-
2071
2111
  RE2::GlobalReplace(&str_as_string,
2072
2112
  re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
2073
2113
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
@@ -2100,16 +2140,23 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
2100
2140
  */
2101
2141
  static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
2102
2142
  VALUE rewrite) {
2103
- /* Ensure rewrite and text are strings. */
2104
- StringValue(rewrite);
2143
+ re2_pattern *p = nullptr;
2144
+
2145
+ /* Coerce all arguments before any C++ allocations so that any Ruby
2146
+ * exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
2147
+ */
2105
2148
  StringValue(text);
2149
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2150
+ p = unwrap_re2_regexp(pattern);
2151
+ } else {
2152
+ StringValue(pattern);
2153
+ }
2154
+ StringValue(rewrite);
2106
2155
 
2107
- re2_pattern *p;
2108
2156
  std::string out;
2109
2157
  bool extracted;
2110
2158
 
2111
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
2112
- p = unwrap_re2_regexp(pattern);
2159
+ if (p) {
2113
2160
  extracted = RE2::Extract(
2114
2161
  re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2115
2162
  *p->pattern,
@@ -2123,9 +2170,6 @@ static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
2123
2170
  return Qnil;
2124
2171
  }
2125
2172
  } else {
2126
- /* Ensure pattern is a string. */
2127
- StringValue(pattern);
2128
-
2129
2173
  extracted = RE2::Extract(
2130
2174
  re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
2131
2175
  RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
@@ -2290,10 +2334,11 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
2290
2334
 
2291
2335
  if (s->set) {
2292
2336
  delete s->set;
2337
+ s->set = nullptr;
2293
2338
  }
2294
2339
 
2295
2340
  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
2296
- if (s->set == 0) {
2341
+ if (s->set == nullptr) {
2297
2342
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
2298
2343
  }
2299
2344
 
@@ -2480,8 +2525,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2480
2525
  rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
2481
2526
  }
2482
2527
  } else {
2483
- for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
2484
- rb_ary_push(result, INT2FIX(v[i]));
2528
+ for (int index : v) {
2529
+ rb_ary_push(result, INT2FIX(index));
2485
2530
  }
2486
2531
  }
2487
2532
 
@@ -2495,8 +2540,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2495
2540
  VALUE result = rb_ary_new2(v.size());
2496
2541
 
2497
2542
  if (matched) {
2498
- for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
2499
- rb_ary_push(result, INT2FIX(v[i]));
2543
+ for (int index : v) {
2544
+ rb_ary_push(result, INT2FIX(index));
2500
2545
  }
2501
2546
  }
2502
2547
 
data/lib/3.1/re2.so CHANGED
Binary file
data/lib/3.2/re2.so CHANGED
Binary file
data/lib/3.3/re2.so CHANGED
Binary file
data/lib/3.4/re2.so CHANGED
Binary file
data/lib/4.0/re2.so CHANGED
Binary file
data/lib/re2/version.rb CHANGED
@@ -10,5 +10,5 @@
10
10
 
11
11
 
12
12
  module RE2
13
- VERSION = "2.26.0"
13
+ VERSION = "2.26.2"
14
14
  end
@@ -590,6 +590,24 @@ RSpec.describe RE2::Regexp do
590
590
  expect { re.match("one two three", endpos: 3) }.to raise_error(RE2::Regexp::UnsupportedError)
591
591
  end
592
592
 
593
+ it "does not truncate startpos to 32 bits" do
594
+ skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
595
+ skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
596
+
597
+ re = RE2::Regexp.new('(\w+)', log_errors: false)
598
+
599
+ expect(re.match("one two three", startpos: 2_147_483_648, endpos: 2_147_483_649)).to be_nil
600
+ end
601
+
602
+ it "does not truncate endpos to 32 bits" do
603
+ skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
604
+ skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
605
+
606
+ re = RE2::Regexp.new('(\w+)', log_errors: false)
607
+
608
+ expect(re.match("one two three", endpos: 2_147_483_648)).to be_nil
609
+ end
610
+
593
611
  it "does not anchor matches by default when extracting submatches" do
594
612
  re = RE2::Regexp.new('(two)')
595
613
 
@@ -739,6 +757,15 @@ RSpec.describe RE2::Regexp do
739
757
  expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
740
758
  end
741
759
 
760
+ it "raises an error when startpos exceeds INT_MAX on old RE2 ABI" do
761
+ skip "Underlying RE2::Match does not take int startpos" if RE2::Regexp.match_has_endpos_argument?
762
+ skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
763
+
764
+ re = RE2::Regexp.new('(\w+)', log_errors: false)
765
+
766
+ expect { re.match("test", start_pos: 2_147_483_648) }.to raise_error(RangeError, /startpos should be <=/)
767
+ end
768
+
742
769
  it "raises an error when called on an uninitialized object" do
743
770
  expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
744
771
  end
@@ -243,6 +243,52 @@ RSpec.describe RE2::Scanner do
243
243
  expect(scanner.scan).to be_nil
244
244
  end
245
245
 
246
+ it "advances by whole characters with zero-width matches on 2-byte UTF-8 input", :aggregate_failures do
247
+ r = RE2::Regexp.new("")
248
+ scanner = r.scan("à")
249
+
250
+ expect(scanner.scan).to eq([])
251
+ expect(scanner.scan).to eq([])
252
+ expect(scanner.scan).to be_nil
253
+ end
254
+
255
+ it "advances by whole characters with zero-width matches on 3-byte UTF-8 input", :aggregate_failures do
256
+ r = RE2::Regexp.new("")
257
+ scanner = r.scan("\u20AC")
258
+
259
+ expect(scanner.scan).to eq([])
260
+ expect(scanner.scan).to eq([])
261
+ expect(scanner.scan).to be_nil
262
+ end
263
+
264
+ it "advances by whole characters with zero-width matches on 4-byte UTF-8 input", :aggregate_failures do
265
+ r = RE2::Regexp.new("")
266
+ scanner = r.scan("\u{1F600}")
267
+
268
+ expect(scanner.scan).to eq([])
269
+ expect(scanner.scan).to eq([])
270
+ expect(scanner.scan).to be_nil
271
+ end
272
+
273
+ it "advances by single bytes with zero-width matches on Latin-1 input", :aggregate_failures do
274
+ r = RE2::Regexp.new("", utf8: false)
275
+ scanner = r.scan("\xC3\xA0")
276
+
277
+ expect(scanner.scan).to eq([])
278
+ expect(scanner.scan).to eq([])
279
+ expect(scanner.scan).to eq([])
280
+ expect(scanner.scan).to be_nil
281
+ end
282
+
283
+ it "handles truncated multi-byte sequences at the end of input", :aggregate_failures do
284
+ r = RE2::Regexp.new("")
285
+ scanner = r.scan("\xC3")
286
+
287
+ expect(scanner.scan).to eq([])
288
+ expect(scanner.scan).to eq([])
289
+ expect(scanner.scan).to be_nil
290
+ end
291
+
246
292
  it "raises a Type Error if given input that can't be coerced to a String" do
247
293
  r = RE2::Regexp.new('(\w+)')
248
294
 
data/spec/re2_spec.rb CHANGED
@@ -86,6 +86,10 @@ RSpec.describe RE2 do
86
86
  expect { RE2.replace("woo", 0, "ah") }.to raise_error(TypeError)
87
87
  end
88
88
 
89
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
90
+ expect { RE2.replace("a" * 128, 0, "ah") }.to raise_error(TypeError)
91
+ end
92
+
89
93
  it "raises a Type Error for a replacement that can't be converted to String" do
90
94
  expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
91
95
  end
@@ -182,6 +186,10 @@ RSpec.describe RE2 do
182
186
  expect { RE2.global_replace("woo", 0, "a") }.to raise_error(TypeError)
183
187
  end
184
188
 
189
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
190
+ expect { RE2.global_replace("a" * 128, 0, "a") }.to raise_error(TypeError)
191
+ end
192
+
185
193
  it "raises a Type Error for a replacement that can't be converted to String" do
186
194
  expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
187
195
  end
@@ -266,6 +274,10 @@ RSpec.describe RE2 do
266
274
  expect { RE2.extract("woo", 0, '\1') }.to raise_error(TypeError)
267
275
  end
268
276
 
277
+ it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
278
+ expect { RE2.extract("a" * 128, 0, '\1') }.to raise_error(TypeError)
279
+ end
280
+
269
281
  it "raises a Type Error for a rewrite that can't be converted to String" do
270
282
  expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
271
283
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.26.0
4
+ version: 2.26.2
5
5
  platform: arm-linux-gnu
6
6
  authors:
7
7
  - Paul Mucur