re2 2.26.0-aarch64-linux-gnu → 2.26.2-aarch64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/re2/re2.cc +130 -85
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/4.0/re2.so +0 -0
- data/lib/re2/version.rb +1 -1
- data/spec/re2/regexp_spec.rb +27 -0
- data/spec/re2/scanner_spec.rb +46 -0
- data/spec/re2_spec.rb +12 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b40f4cfd1c562498a0249b43c03cffb2a22525184f4ed51f1c498c2f8ab17954
|
|
4
|
+
data.tar.gz: ed9df3b2b2085afb91e3cd418ce38f7e7513c96c6381e05a40089c975f0256ea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b6205ed6870d1459dbb2384cd2e563ed59d0176ce74c3c7ad8c68d33c16f8a1e637de69eb571069de7a47dadd6d095a52e23508455b89f259438792536a0b513
|
|
7
|
+
data.tar.gz: 26b825f7a5d4026f40dfc6909838ca5d32ed3750009508bf7eb131659db1599bdc6cdb7564f08d54caeb0d98a7e0acb610840cdd8491e94d44b4fec18634a80c
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Python".
|
|
|
6
6
|
|
|
7
7
|
[](https://github.com/mudge/re2/actions)
|
|
8
8
|
|
|
9
|
-
**Current version:** 2.26.
|
|
9
|
+
**Current version:** 2.26.2
|
|
10
10
|
**Bundled RE2 version:** libre2.11 (2025-11-05)
|
|
11
11
|
|
|
12
12
|
```ruby
|
data/ext/re2/re2.cc
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* Released under the BSD Licence, please see LICENSE.txt
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
#include <
|
|
11
|
+
#include <cstdint>
|
|
12
12
|
|
|
13
13
|
#include <map>
|
|
14
14
|
#include <sstream>
|
|
@@ -296,12 +296,12 @@ static re2_scanner *unwrap_re2_scanner(VALUE self) {
|
|
|
296
296
|
static VALUE re2_regexp_names(const VALUE self) {
|
|
297
297
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
298
298
|
|
|
299
|
-
const
|
|
299
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
300
300
|
VALUE names = rb_ary_new2(groups.size());
|
|
301
301
|
|
|
302
|
-
for (
|
|
302
|
+
for (const auto& group : groups) {
|
|
303
303
|
rb_ary_push(names,
|
|
304
|
-
encoded_str_new(
|
|
304
|
+
encoded_str_new(group.first.data(), group.first.size(),
|
|
305
305
|
p->pattern->options().encoding()));
|
|
306
306
|
}
|
|
307
307
|
|
|
@@ -385,7 +385,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
|
385
385
|
delete c->input;
|
|
386
386
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
387
387
|
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
|
388
|
-
if (c->input ==
|
|
388
|
+
if (c->input == nullptr) {
|
|
389
389
|
rb_raise(rb_eNoMemError,
|
|
390
390
|
"not enough memory to allocate StringPiece for input");
|
|
391
391
|
}
|
|
@@ -403,6 +403,7 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
|
|
|
403
403
|
|
|
404
404
|
if (self_c->input) {
|
|
405
405
|
delete self_c->input;
|
|
406
|
+
self_c->input = nullptr;
|
|
406
407
|
}
|
|
407
408
|
|
|
408
409
|
RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
|
|
@@ -412,12 +413,12 @@ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
|
|
|
412
413
|
|
|
413
414
|
if (other_c->input) {
|
|
414
415
|
self_c->input = new(std::nothrow) re2::StringPiece(*other_c->input);
|
|
415
|
-
if (self_c->input ==
|
|
416
|
+
if (self_c->input == nullptr) {
|
|
416
417
|
rb_raise(rb_eNoMemError,
|
|
417
418
|
"not enough memory to allocate StringPiece for input");
|
|
418
419
|
}
|
|
419
420
|
} else {
|
|
420
|
-
self_c->input =
|
|
421
|
+
self_c->input = nullptr;
|
|
421
422
|
}
|
|
422
423
|
|
|
423
424
|
return self;
|
|
@@ -469,7 +470,7 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
469
470
|
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
|
|
470
471
|
|
|
471
472
|
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
|
472
|
-
if (matches[i].data() ==
|
|
473
|
+
if (matches[i].data() == nullptr) {
|
|
473
474
|
rb_ary_push(result, Qnil);
|
|
474
475
|
} else {
|
|
475
476
|
rb_ary_push(result, encoded_str_new(matches[i].data(),
|
|
@@ -481,9 +482,27 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
481
482
|
/* Check whether we've exhausted the input yet. */
|
|
482
483
|
c->eof = new_input_size == 0;
|
|
483
484
|
|
|
484
|
-
/* If the match didn't advance the input, we need to do this ourselves
|
|
485
|
+
/* If the match didn't advance the input, we need to do this ourselves,
|
|
486
|
+
* advancing by a whole character to avoid splitting multi-byte characters.
|
|
487
|
+
*
|
|
488
|
+
* The lookup table approach is taken from RE2's own Python extension: the
|
|
489
|
+
* high 4 bits of a UTF-8 lead byte determine the character's byte length.
|
|
490
|
+
*
|
|
491
|
+
* See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
|
|
492
|
+
*/
|
|
485
493
|
if (!input_advanced && new_input_size > 0) {
|
|
486
|
-
|
|
494
|
+
size_t char_size = 1;
|
|
495
|
+
|
|
496
|
+
if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
|
|
497
|
+
char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
|
|
498
|
+
[((*c->input)[0] & 0xFF) >> 4];
|
|
499
|
+
|
|
500
|
+
if (char_size > new_input_size) {
|
|
501
|
+
char_size = new_input_size;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
c->input->remove_prefix(char_size);
|
|
487
506
|
}
|
|
488
507
|
|
|
489
508
|
return result;
|
|
@@ -501,36 +520,36 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
|
501
520
|
if (RB_INTEGER_TYPE_P(idx)) {
|
|
502
521
|
id = NUM2INT(idx);
|
|
503
522
|
} else if (SYMBOL_P(idx)) {
|
|
504
|
-
const
|
|
505
|
-
|
|
523
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
524
|
+
auto search = groups.find(rb_id2name(SYM2ID(idx)));
|
|
506
525
|
|
|
507
526
|
if (search != groups.end()) {
|
|
508
527
|
id = search->second;
|
|
509
528
|
} else {
|
|
510
|
-
return
|
|
529
|
+
return nullptr;
|
|
511
530
|
}
|
|
512
531
|
} else {
|
|
513
532
|
StringValue(idx);
|
|
514
533
|
|
|
515
|
-
const
|
|
516
|
-
|
|
534
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
535
|
+
auto search = groups.find(std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)));
|
|
517
536
|
|
|
518
537
|
if (search != groups.end()) {
|
|
519
538
|
id = search->second;
|
|
520
539
|
} else {
|
|
521
|
-
return
|
|
540
|
+
return nullptr;
|
|
522
541
|
}
|
|
523
542
|
}
|
|
524
543
|
|
|
525
544
|
if (id >= 0 && id < m->number_of_matches) {
|
|
526
545
|
re2::StringPiece *match = &m->matches[id];
|
|
527
546
|
|
|
528
|
-
if (match->data() !=
|
|
547
|
+
if (match->data() != nullptr) {
|
|
529
548
|
return match;
|
|
530
549
|
}
|
|
531
550
|
}
|
|
532
551
|
|
|
533
|
-
return
|
|
552
|
+
return nullptr;
|
|
534
553
|
}
|
|
535
554
|
|
|
536
555
|
/*
|
|
@@ -564,7 +583,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
|
564
583
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
565
584
|
|
|
566
585
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
567
|
-
if (match ==
|
|
586
|
+
if (match == nullptr) {
|
|
568
587
|
return Qnil;
|
|
569
588
|
} else {
|
|
570
589
|
long offset = match->data() - RSTRING_PTR(m->text);
|
|
@@ -589,7 +608,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
|
589
608
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
590
609
|
|
|
591
610
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
592
|
-
if (match ==
|
|
611
|
+
if (match == nullptr) {
|
|
593
612
|
return Qnil;
|
|
594
613
|
} else {
|
|
595
614
|
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
|
@@ -615,7 +634,7 @@ static VALUE re2_matchdata_pre_match(const VALUE self) {
|
|
|
615
634
|
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
616
635
|
|
|
617
636
|
re2::StringPiece *match = &m->matches[0];
|
|
618
|
-
if (match->data() ==
|
|
637
|
+
if (match->data() == nullptr) {
|
|
619
638
|
return Qnil;
|
|
620
639
|
}
|
|
621
640
|
|
|
@@ -642,7 +661,7 @@ static VALUE re2_matchdata_post_match(const VALUE self) {
|
|
|
642
661
|
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
643
662
|
|
|
644
663
|
re2::StringPiece *match = &m->matches[0];
|
|
645
|
-
if (match->data() ==
|
|
664
|
+
if (match->data() == nullptr) {
|
|
646
665
|
return Qnil;
|
|
647
666
|
}
|
|
648
667
|
|
|
@@ -669,7 +688,7 @@ static VALUE re2_matchdata_offset(const VALUE self, VALUE n) {
|
|
|
669
688
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
670
689
|
|
|
671
690
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
672
|
-
if (match ==
|
|
691
|
+
if (match == nullptr) {
|
|
673
692
|
return Qnil;
|
|
674
693
|
}
|
|
675
694
|
|
|
@@ -700,7 +719,7 @@ static VALUE re2_matchdata_match_length(const VALUE self, VALUE n) {
|
|
|
700
719
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
701
720
|
|
|
702
721
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
703
|
-
if (match ==
|
|
722
|
+
if (match == nullptr) {
|
|
704
723
|
return Qnil;
|
|
705
724
|
}
|
|
706
725
|
|
|
@@ -766,7 +785,7 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
|
766
785
|
for (int i = 0; i < m->number_of_matches; ++i) {
|
|
767
786
|
re2::StringPiece *match = &m->matches[i];
|
|
768
787
|
|
|
769
|
-
if (match->data() ==
|
|
788
|
+
if (match->data() == nullptr) {
|
|
770
789
|
rb_ary_push(array, Qnil);
|
|
771
790
|
} else {
|
|
772
791
|
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
|
@@ -786,7 +805,7 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
|
786
805
|
} else {
|
|
787
806
|
re2::StringPiece *match = &m->matches[nth];
|
|
788
807
|
|
|
789
|
-
if (match->data() ==
|
|
808
|
+
if (match->data() == nullptr) {
|
|
790
809
|
return Qnil;
|
|
791
810
|
} else {
|
|
792
811
|
return encoded_str_new(match->data(), match->size(),
|
|
@@ -799,8 +818,8 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
799
818
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
800
819
|
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
801
820
|
|
|
802
|
-
const
|
|
803
|
-
|
|
821
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
822
|
+
auto search = groups.find(name);
|
|
804
823
|
|
|
805
824
|
if (search != groups.end()) {
|
|
806
825
|
return re2_matchdata_nth_match(search->second, self);
|
|
@@ -955,7 +974,7 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
|
955
974
|
for (int i = 1; i < m->number_of_matches; ++i) {
|
|
956
975
|
re2::StringPiece *match = &m->matches[i];
|
|
957
976
|
|
|
958
|
-
if (match->data() ==
|
|
977
|
+
if (match->data() == nullptr) {
|
|
959
978
|
rb_ary_push(array, Qnil);
|
|
960
979
|
} else {
|
|
961
980
|
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
|
@@ -999,14 +1018,14 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
999
1018
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1000
1019
|
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
1001
1020
|
|
|
1002
|
-
const
|
|
1021
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
1003
1022
|
VALUE capturing_groups = rb_hash_new();
|
|
1004
1023
|
|
|
1005
1024
|
if (NIL_P(keys)) {
|
|
1006
|
-
for (
|
|
1025
|
+
for (const auto& group : groups) {
|
|
1007
1026
|
rb_hash_aset(capturing_groups,
|
|
1008
|
-
ID2SYM(rb_intern2(
|
|
1009
|
-
re2_matchdata_nth_match(
|
|
1027
|
+
ID2SYM(rb_intern2(group.first.data(), group.first.size())),
|
|
1028
|
+
re2_matchdata_nth_match(group.second, self));
|
|
1010
1029
|
}
|
|
1011
1030
|
} else {
|
|
1012
1031
|
Check_Type(keys, T_ARRAY);
|
|
@@ -1016,7 +1035,7 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
1016
1035
|
VALUE key = rb_ary_entry(keys, i);
|
|
1017
1036
|
Check_Type(key, T_SYMBOL);
|
|
1018
1037
|
const char *name = rb_id2name(SYM2ID(key));
|
|
1019
|
-
|
|
1038
|
+
auto search = groups.find(name);
|
|
1020
1039
|
|
|
1021
1040
|
if (search != groups.end()) {
|
|
1022
1041
|
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
|
|
@@ -1069,18 +1088,18 @@ static VALUE re2_matchdata_named_captures(int argc, VALUE *argv, const VALUE sel
|
|
|
1069
1088
|
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1070
1089
|
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
1071
1090
|
|
|
1072
|
-
const
|
|
1091
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
1073
1092
|
VALUE result = rb_hash_new();
|
|
1074
1093
|
|
|
1075
|
-
for (
|
|
1094
|
+
for (const auto& group : groups) {
|
|
1076
1095
|
VALUE key;
|
|
1077
1096
|
if (symbolize) {
|
|
1078
|
-
key = ID2SYM(rb_intern2(
|
|
1097
|
+
key = ID2SYM(rb_intern2(group.first.data(), group.first.size()));
|
|
1079
1098
|
} else {
|
|
1080
|
-
key = encoded_str_new(
|
|
1099
|
+
key = encoded_str_new(group.first.data(), group.first.size(),
|
|
1081
1100
|
p->pattern->options().encoding());
|
|
1082
1101
|
}
|
|
1083
|
-
rb_hash_aset(result, key, re2_matchdata_nth_match(
|
|
1102
|
+
rb_hash_aset(result, key, re2_matchdata_nth_match(group.second, self));
|
|
1084
1103
|
}
|
|
1085
1104
|
|
|
1086
1105
|
return result;
|
|
@@ -1152,6 +1171,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
|
1152
1171
|
|
|
1153
1172
|
if (self_m->matches) {
|
|
1154
1173
|
delete[] self_m->matches;
|
|
1174
|
+
self_m->matches = nullptr;
|
|
1155
1175
|
}
|
|
1156
1176
|
|
|
1157
1177
|
self_m->number_of_matches = other_m->number_of_matches;
|
|
@@ -1160,7 +1180,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
|
1160
1180
|
|
|
1161
1181
|
if (other_m->matches) {
|
|
1162
1182
|
self_m->matches = new(std::nothrow) re2::StringPiece[other_m->number_of_matches];
|
|
1163
|
-
if (self_m->matches ==
|
|
1183
|
+
if (self_m->matches == nullptr) {
|
|
1164
1184
|
rb_raise(rb_eNoMemError,
|
|
1165
1185
|
"not enough memory to allocate StringPiece for matches");
|
|
1166
1186
|
}
|
|
@@ -1168,7 +1188,7 @@ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
|
1168
1188
|
self_m->matches[i] = other_m->matches[i];
|
|
1169
1189
|
}
|
|
1170
1190
|
} else {
|
|
1171
|
-
self_m->matches =
|
|
1191
|
+
self_m->matches = nullptr;
|
|
1172
1192
|
}
|
|
1173
1193
|
|
|
1174
1194
|
return self;
|
|
@@ -1231,6 +1251,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1231
1251
|
|
|
1232
1252
|
if (p->pattern) {
|
|
1233
1253
|
delete p->pattern;
|
|
1254
|
+
p->pattern = nullptr;
|
|
1234
1255
|
}
|
|
1235
1256
|
|
|
1236
1257
|
if (RTEST(options)) {
|
|
@@ -1244,7 +1265,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1244
1265
|
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
|
|
1245
1266
|
}
|
|
1246
1267
|
|
|
1247
|
-
if (p->pattern ==
|
|
1268
|
+
if (p->pattern == nullptr) {
|
|
1248
1269
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1249
1270
|
}
|
|
1250
1271
|
|
|
@@ -1259,11 +1280,12 @@ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
|
1259
1280
|
|
|
1260
1281
|
if (self_p->pattern) {
|
|
1261
1282
|
delete self_p->pattern;
|
|
1283
|
+
self_p->pattern = nullptr;
|
|
1262
1284
|
}
|
|
1263
1285
|
|
|
1264
1286
|
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
|
|
1265
1287
|
other_p->pattern->options());
|
|
1266
|
-
if (self_p->pattern ==
|
|
1288
|
+
if (self_p->pattern == nullptr) {
|
|
1267
1289
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1268
1290
|
}
|
|
1269
1291
|
|
|
@@ -1629,14 +1651,14 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
|
1629
1651
|
*/
|
|
1630
1652
|
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
|
1631
1653
|
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1632
|
-
const
|
|
1654
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
1633
1655
|
VALUE capturing_groups = rb_hash_new();
|
|
1634
1656
|
|
|
1635
|
-
for (
|
|
1657
|
+
for (const auto& group : groups) {
|
|
1636
1658
|
rb_hash_aset(capturing_groups,
|
|
1637
|
-
encoded_str_new(
|
|
1659
|
+
encoded_str_new(group.first.data(), group.first.size(),
|
|
1638
1660
|
p->pattern->options().encoding()),
|
|
1639
|
-
INT2FIX(
|
|
1661
|
+
INT2FIX(group.second));
|
|
1640
1662
|
}
|
|
1641
1663
|
|
|
1642
1664
|
return capturing_groups;
|
|
@@ -1737,8 +1759,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1737
1759
|
p = unwrap_re2_regexp(self);
|
|
1738
1760
|
|
|
1739
1761
|
int n;
|
|
1740
|
-
|
|
1741
|
-
|
|
1762
|
+
size_t startpos = 0;
|
|
1763
|
+
size_t endpos = RSTRING_LEN(text);
|
|
1742
1764
|
RE2::Anchor anchor = RE2::UNANCHORED;
|
|
1743
1765
|
|
|
1744
1766
|
if (RTEST(options)) {
|
|
@@ -1756,11 +1778,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1756
1778
|
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
|
|
1757
1779
|
if (!NIL_P(endpos_option)) {
|
|
1758
1780
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1759
|
-
|
|
1781
|
+
ssize_t endpos_value = NUM2SSIZET(endpos_option);
|
|
1760
1782
|
|
|
1761
|
-
if (
|
|
1783
|
+
if (endpos_value < 0) {
|
|
1762
1784
|
rb_raise(rb_eArgError, "endpos should be >= 0");
|
|
1763
1785
|
}
|
|
1786
|
+
|
|
1787
|
+
endpos = static_cast<size_t>(endpos_value);
|
|
1764
1788
|
#else
|
|
1765
1789
|
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
|
|
1766
1790
|
#endif
|
|
@@ -1799,11 +1823,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1799
1823
|
|
|
1800
1824
|
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
|
|
1801
1825
|
if (!NIL_P(startpos_option)) {
|
|
1802
|
-
|
|
1826
|
+
ssize_t startpos_value = NUM2SSIZET(startpos_option);
|
|
1803
1827
|
|
|
1804
|
-
if (
|
|
1828
|
+
if (startpos_value < 0) {
|
|
1805
1829
|
rb_raise(rb_eArgError, "startpos should be >= 0");
|
|
1806
1830
|
}
|
|
1831
|
+
|
|
1832
|
+
startpos = static_cast<size_t>(startpos_value);
|
|
1807
1833
|
}
|
|
1808
1834
|
}
|
|
1809
1835
|
} else {
|
|
@@ -1818,6 +1844,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1818
1844
|
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
|
1819
1845
|
}
|
|
1820
1846
|
|
|
1847
|
+
#ifndef HAVE_ENDPOS_ARGUMENT
|
|
1848
|
+
/* Old RE2's Match() takes int startpos. Reject values that would overflow. */
|
|
1849
|
+
if (startpos > INT_MAX) {
|
|
1850
|
+
rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
|
|
1851
|
+
}
|
|
1852
|
+
#endif
|
|
1853
|
+
|
|
1821
1854
|
if (n == 0) {
|
|
1822
1855
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1823
1856
|
bool matched = p->pattern->Match(
|
|
@@ -1838,7 +1871,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1838
1871
|
n += 1;
|
|
1839
1872
|
|
|
1840
1873
|
re2::StringPiece *matches = new(std::nothrow) re2::StringPiece[n];
|
|
1841
|
-
if (matches ==
|
|
1874
|
+
if (matches == nullptr) {
|
|
1842
1875
|
rb_raise(rb_eNoMemError,
|
|
1843
1876
|
"not enough memory to allocate StringPieces for matches");
|
|
1844
1877
|
}
|
|
@@ -1935,7 +1968,7 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
|
1935
1968
|
RB_OBJ_WRITE(scanner, &c->text, rb_str_new_frozen(text));
|
|
1936
1969
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
1937
1970
|
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
|
1938
|
-
if (c->input ==
|
|
1971
|
+
if (c->input == nullptr) {
|
|
1939
1972
|
rb_raise(rb_eNoMemError,
|
|
1940
1973
|
"not enough memory to allocate StringPiece for input");
|
|
1941
1974
|
}
|
|
@@ -1993,29 +2026,32 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
|
|
1993
2026
|
*/
|
|
1994
2027
|
static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
1995
2028
|
VALUE rewrite) {
|
|
1996
|
-
|
|
1997
|
-
StringValue(rewrite);
|
|
2029
|
+
re2_pattern *p = nullptr;
|
|
1998
2030
|
|
|
1999
|
-
|
|
2031
|
+
/* Coerce all arguments before any C++ allocations so that any Ruby
|
|
2032
|
+
* exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
|
|
2033
|
+
*/
|
|
2034
|
+
StringValue(str);
|
|
2035
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2036
|
+
p = unwrap_re2_regexp(pattern);
|
|
2037
|
+
} else {
|
|
2038
|
+
StringValue(pattern);
|
|
2039
|
+
}
|
|
2040
|
+
StringValue(rewrite);
|
|
2000
2041
|
|
|
2001
2042
|
/* Take a copy of str so it can be modified in-place by
|
|
2002
2043
|
* RE2::Replace.
|
|
2003
2044
|
*/
|
|
2004
|
-
StringValue(str);
|
|
2005
2045
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2006
2046
|
|
|
2007
2047
|
/* Do the replacement. */
|
|
2008
|
-
if (
|
|
2009
|
-
p = unwrap_re2_regexp(pattern);
|
|
2048
|
+
if (p) {
|
|
2010
2049
|
RE2::Replace(&str_as_string, *p->pattern,
|
|
2011
2050
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2012
2051
|
|
|
2013
2052
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2014
2053
|
p->pattern->options().encoding());
|
|
2015
2054
|
} else {
|
|
2016
|
-
/* Ensure pattern is a string. */
|
|
2017
|
-
StringValue(pattern);
|
|
2018
|
-
|
|
2019
2055
|
RE2::Replace(&str_as_string,
|
|
2020
2056
|
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
2021
2057
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
@@ -2046,28 +2082,32 @@ static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2046
2082
|
*/
|
|
2047
2083
|
static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
2048
2084
|
VALUE rewrite) {
|
|
2049
|
-
|
|
2085
|
+
re2_pattern *p = nullptr;
|
|
2086
|
+
|
|
2087
|
+
/* Coerce all arguments before any C++ allocations so that any Ruby
|
|
2088
|
+
* exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
|
|
2089
|
+
*/
|
|
2090
|
+
StringValue(str);
|
|
2091
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2092
|
+
p = unwrap_re2_regexp(pattern);
|
|
2093
|
+
} else {
|
|
2094
|
+
StringValue(pattern);
|
|
2095
|
+
}
|
|
2050
2096
|
StringValue(rewrite);
|
|
2051
2097
|
|
|
2052
2098
|
/* Take a copy of str so it can be modified in-place by
|
|
2053
2099
|
* RE2::GlobalReplace.
|
|
2054
2100
|
*/
|
|
2055
|
-
re2_pattern *p;
|
|
2056
|
-
StringValue(str);
|
|
2057
2101
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2058
2102
|
|
|
2059
2103
|
/* Do the replacement. */
|
|
2060
|
-
if (
|
|
2061
|
-
p = unwrap_re2_regexp(pattern);
|
|
2104
|
+
if (p) {
|
|
2062
2105
|
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
|
2063
2106
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2064
2107
|
|
|
2065
2108
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2066
2109
|
p->pattern->options().encoding());
|
|
2067
2110
|
} else {
|
|
2068
|
-
/* Ensure pattern is a string. */
|
|
2069
|
-
StringValue(pattern);
|
|
2070
|
-
|
|
2071
2111
|
RE2::GlobalReplace(&str_as_string,
|
|
2072
2112
|
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
2073
2113
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
@@ -2100,16 +2140,23 @@ static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
|
2100
2140
|
*/
|
|
2101
2141
|
static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
2102
2142
|
VALUE rewrite) {
|
|
2103
|
-
|
|
2104
|
-
|
|
2143
|
+
re2_pattern *p = nullptr;
|
|
2144
|
+
|
|
2145
|
+
/* Coerce all arguments before any C++ allocations so that any Ruby
|
|
2146
|
+
* exceptions (via longjmp) cannot bypass C++ destructors and leak memory.
|
|
2147
|
+
*/
|
|
2105
2148
|
StringValue(text);
|
|
2149
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2150
|
+
p = unwrap_re2_regexp(pattern);
|
|
2151
|
+
} else {
|
|
2152
|
+
StringValue(pattern);
|
|
2153
|
+
}
|
|
2154
|
+
StringValue(rewrite);
|
|
2106
2155
|
|
|
2107
|
-
re2_pattern *p;
|
|
2108
2156
|
std::string out;
|
|
2109
2157
|
bool extracted;
|
|
2110
2158
|
|
|
2111
|
-
if (
|
|
2112
|
-
p = unwrap_re2_regexp(pattern);
|
|
2159
|
+
if (p) {
|
|
2113
2160
|
extracted = RE2::Extract(
|
|
2114
2161
|
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2115
2162
|
*p->pattern,
|
|
@@ -2123,9 +2170,6 @@ static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
|
2123
2170
|
return Qnil;
|
|
2124
2171
|
}
|
|
2125
2172
|
} else {
|
|
2126
|
-
/* Ensure pattern is a string. */
|
|
2127
|
-
StringValue(pattern);
|
|
2128
|
-
|
|
2129
2173
|
extracted = RE2::Extract(
|
|
2130
2174
|
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2131
2175
|
RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
|
|
@@ -2290,10 +2334,11 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
2290
2334
|
|
|
2291
2335
|
if (s->set) {
|
|
2292
2336
|
delete s->set;
|
|
2337
|
+
s->set = nullptr;
|
|
2293
2338
|
}
|
|
2294
2339
|
|
|
2295
2340
|
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
|
2296
|
-
if (s->set ==
|
|
2341
|
+
if (s->set == nullptr) {
|
|
2297
2342
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
|
2298
2343
|
}
|
|
2299
2344
|
|
|
@@ -2480,8 +2525,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2480
2525
|
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
|
2481
2526
|
}
|
|
2482
2527
|
} else {
|
|
2483
|
-
for (
|
|
2484
|
-
rb_ary_push(result, INT2FIX(
|
|
2528
|
+
for (int index : v) {
|
|
2529
|
+
rb_ary_push(result, INT2FIX(index));
|
|
2485
2530
|
}
|
|
2486
2531
|
}
|
|
2487
2532
|
|
|
@@ -2495,8 +2540,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2495
2540
|
VALUE result = rb_ary_new2(v.size());
|
|
2496
2541
|
|
|
2497
2542
|
if (matched) {
|
|
2498
|
-
for (
|
|
2499
|
-
rb_ary_push(result, INT2FIX(
|
|
2543
|
+
for (int index : v) {
|
|
2544
|
+
rb_ary_push(result, INT2FIX(index));
|
|
2500
2545
|
}
|
|
2501
2546
|
}
|
|
2502
2547
|
|
data/lib/3.1/re2.so
CHANGED
|
Binary file
|
data/lib/3.2/re2.so
CHANGED
|
Binary file
|
data/lib/3.3/re2.so
CHANGED
|
Binary file
|
data/lib/3.4/re2.so
CHANGED
|
Binary file
|
data/lib/4.0/re2.so
CHANGED
|
Binary file
|
data/lib/re2/version.rb
CHANGED
data/spec/re2/regexp_spec.rb
CHANGED
|
@@ -590,6 +590,24 @@ RSpec.describe RE2::Regexp do
|
|
|
590
590
|
expect { re.match("one two three", endpos: 3) }.to raise_error(RE2::Regexp::UnsupportedError)
|
|
591
591
|
end
|
|
592
592
|
|
|
593
|
+
it "does not truncate startpos to 32 bits" do
|
|
594
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
|
595
|
+
skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
|
|
596
|
+
|
|
597
|
+
re = RE2::Regexp.new('(\w+)', log_errors: false)
|
|
598
|
+
|
|
599
|
+
expect(re.match("one two three", startpos: 2_147_483_648, endpos: 2_147_483_649)).to be_nil
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
it "does not truncate endpos to 32 bits" do
|
|
603
|
+
skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument?
|
|
604
|
+
skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
|
|
605
|
+
|
|
606
|
+
re = RE2::Regexp.new('(\w+)', log_errors: false)
|
|
607
|
+
|
|
608
|
+
expect(re.match("one two three", endpos: 2_147_483_648)).to be_nil
|
|
609
|
+
end
|
|
610
|
+
|
|
593
611
|
it "does not anchor matches by default when extracting submatches" do
|
|
594
612
|
re = RE2::Regexp.new('(two)')
|
|
595
613
|
|
|
@@ -739,6 +757,15 @@ RSpec.describe RE2::Regexp do
|
|
|
739
757
|
expect(re.match("one two three", nil)).to be_a(RE2::MatchData)
|
|
740
758
|
end
|
|
741
759
|
|
|
760
|
+
it "raises an error when startpos exceeds INT_MAX on old RE2 ABI" do
|
|
761
|
+
skip "Underlying RE2::Match does not take int startpos" if RE2::Regexp.match_has_endpos_argument?
|
|
762
|
+
skip "size_t is not larger than a 32-bit int" if RbConfig::SIZEOF.fetch("size_t") <= (32 / 8)
|
|
763
|
+
|
|
764
|
+
re = RE2::Regexp.new('(\w+)', log_errors: false)
|
|
765
|
+
|
|
766
|
+
expect { re.match("test", start_pos: 2_147_483_648) }.to raise_error(RangeError, /startpos should be <=/)
|
|
767
|
+
end
|
|
768
|
+
|
|
742
769
|
it "raises an error when called on an uninitialized object" do
|
|
743
770
|
expect { described_class.allocate.match("test") }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
|
|
744
771
|
end
|
data/spec/re2/scanner_spec.rb
CHANGED
|
@@ -243,6 +243,52 @@ RSpec.describe RE2::Scanner do
|
|
|
243
243
|
expect(scanner.scan).to be_nil
|
|
244
244
|
end
|
|
245
245
|
|
|
246
|
+
it "advances by whole characters with zero-width matches on 2-byte UTF-8 input", :aggregate_failures do
|
|
247
|
+
r = RE2::Regexp.new("")
|
|
248
|
+
scanner = r.scan("à")
|
|
249
|
+
|
|
250
|
+
expect(scanner.scan).to eq([])
|
|
251
|
+
expect(scanner.scan).to eq([])
|
|
252
|
+
expect(scanner.scan).to be_nil
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
it "advances by whole characters with zero-width matches on 3-byte UTF-8 input", :aggregate_failures do
|
|
256
|
+
r = RE2::Regexp.new("")
|
|
257
|
+
scanner = r.scan("\u20AC")
|
|
258
|
+
|
|
259
|
+
expect(scanner.scan).to eq([])
|
|
260
|
+
expect(scanner.scan).to eq([])
|
|
261
|
+
expect(scanner.scan).to be_nil
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
it "advances by whole characters with zero-width matches on 4-byte UTF-8 input", :aggregate_failures do
|
|
265
|
+
r = RE2::Regexp.new("")
|
|
266
|
+
scanner = r.scan("\u{1F600}")
|
|
267
|
+
|
|
268
|
+
expect(scanner.scan).to eq([])
|
|
269
|
+
expect(scanner.scan).to eq([])
|
|
270
|
+
expect(scanner.scan).to be_nil
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it "advances by single bytes with zero-width matches on Latin-1 input", :aggregate_failures do
|
|
274
|
+
r = RE2::Regexp.new("", utf8: false)
|
|
275
|
+
scanner = r.scan("\xC3\xA0")
|
|
276
|
+
|
|
277
|
+
expect(scanner.scan).to eq([])
|
|
278
|
+
expect(scanner.scan).to eq([])
|
|
279
|
+
expect(scanner.scan).to eq([])
|
|
280
|
+
expect(scanner.scan).to be_nil
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
it "handles truncated multi-byte sequences at the end of input", :aggregate_failures do
|
|
284
|
+
r = RE2::Regexp.new("")
|
|
285
|
+
scanner = r.scan("\xC3")
|
|
286
|
+
|
|
287
|
+
expect(scanner.scan).to eq([])
|
|
288
|
+
expect(scanner.scan).to eq([])
|
|
289
|
+
expect(scanner.scan).to be_nil
|
|
290
|
+
end
|
|
291
|
+
|
|
246
292
|
it "raises a Type Error if given input that can't be coerced to a String" do
|
|
247
293
|
r = RE2::Regexp.new('(\w+)')
|
|
248
294
|
|
data/spec/re2_spec.rb
CHANGED
|
@@ -86,6 +86,10 @@ RSpec.describe RE2 do
|
|
|
86
86
|
expect { RE2.replace("woo", 0, "ah") }.to raise_error(TypeError)
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
90
|
+
expect { RE2.replace("a" * 128, 0, "ah") }.to raise_error(TypeError)
|
|
91
|
+
end
|
|
92
|
+
|
|
89
93
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
90
94
|
expect { RE2.replace("woo", "oo", 0) }.to raise_error(TypeError)
|
|
91
95
|
end
|
|
@@ -182,6 +186,10 @@ RSpec.describe RE2 do
|
|
|
182
186
|
expect { RE2.global_replace("woo", 0, "a") }.to raise_error(TypeError)
|
|
183
187
|
end
|
|
184
188
|
|
|
189
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
190
|
+
expect { RE2.global_replace("a" * 128, 0, "a") }.to raise_error(TypeError)
|
|
191
|
+
end
|
|
192
|
+
|
|
185
193
|
it "raises a Type Error for a replacement that can't be converted to String" do
|
|
186
194
|
expect { RE2.global_replace("woo", "o", 0) }.to raise_error(TypeError)
|
|
187
195
|
end
|
|
@@ -266,6 +274,10 @@ RSpec.describe RE2 do
|
|
|
266
274
|
expect { RE2.extract("woo", 0, '\1') }.to raise_error(TypeError)
|
|
267
275
|
end
|
|
268
276
|
|
|
277
|
+
it "does not leak memory when given a non-String, non-RE2::Regexp pattern" do
|
|
278
|
+
expect { RE2.extract("a" * 128, 0, '\1') }.to raise_error(TypeError)
|
|
279
|
+
end
|
|
280
|
+
|
|
269
281
|
it "raises a Type Error for a rewrite that can't be converted to String" do
|
|
270
282
|
expect { RE2.extract("woo", '(\w+)', 0) }.to raise_error(TypeError)
|
|
271
283
|
end
|