re2 2.23.0-arm-linux-gnu → 2.24.0-arm-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/re2/re2.cc CHANGED
@@ -245,6 +245,33 @@ static const rb_data_type_t re2_regexp_data_type = {
245
245
  RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
246
246
  };
247
247
 
248
+ static re2_pattern *unwrap_re2_regexp(VALUE self) {
249
+ re2_pattern *p;
250
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
251
+ if (!p->pattern) {
252
+ rb_raise(rb_eTypeError, "uninitialized RE2::Regexp");
253
+ }
254
+ return p;
255
+ }
256
+
257
+ static re2_matchdata *unwrap_re2_matchdata(VALUE self) {
258
+ re2_matchdata *m;
259
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
260
+ if (!RTEST(m->regexp)) {
261
+ rb_raise(rb_eTypeError, "uninitialized RE2::MatchData");
262
+ }
263
+ return m;
264
+ }
265
+
266
+ static re2_scanner *unwrap_re2_scanner(VALUE self) {
267
+ re2_scanner *c;
268
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
269
+ if (!RTEST(c->regexp)) {
270
+ rb_raise(rb_eTypeError, "uninitialized RE2::Scanner");
271
+ }
272
+ return c;
273
+ }
274
+
248
275
  static VALUE re2_matchdata_allocate(VALUE klass) {
249
276
  re2_matchdata *m;
250
277
 
@@ -269,8 +296,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
269
296
  * m.string #=> "bob 123"
270
297
  */
271
298
  static VALUE re2_matchdata_string(const VALUE self) {
272
- re2_matchdata *m;
273
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
299
+ re2_matchdata *m = unwrap_re2_matchdata(self);
274
300
 
275
301
  return m->text;
276
302
  }
@@ -287,8 +313,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
287
313
  * c.string #=> "foo"
288
314
  */
289
315
  static VALUE re2_scanner_string(const VALUE self) {
290
- re2_scanner *c;
291
- TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
316
+ re2_scanner *c = unwrap_re2_scanner(self);
292
317
 
293
318
  return c->text;
294
319
  }
@@ -302,8 +327,7 @@ static VALUE re2_scanner_string(const VALUE self) {
302
327
  * c.eof? #=> true
303
328
  */
304
329
  static VALUE re2_scanner_eof(const VALUE self) {
305
- re2_scanner *c;
306
- TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
330
+ re2_scanner *c = unwrap_re2_scanner(self);
307
331
 
308
332
  return BOOL2RUBY(c->eof);
309
333
  }
@@ -320,8 +344,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
320
344
  * e.scan #=> ["1"]
321
345
  */
322
346
  static VALUE re2_scanner_rewind(VALUE self) {
323
- re2_scanner *c;
324
- TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
347
+ re2_scanner *c = unwrap_re2_scanner(self);
325
348
 
326
349
  delete c->input;
327
350
  c->input = new(std::nothrow) re2::StringPiece(
@@ -336,6 +359,34 @@ static VALUE re2_scanner_rewind(VALUE self) {
336
359
  return self;
337
360
  }
338
361
 
362
+ static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
363
+ re2_scanner *self_c;
364
+ re2_scanner *other_c = unwrap_re2_scanner(other);
365
+
366
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, self_c);
367
+
368
+ if (self_c->input) {
369
+ delete self_c->input;
370
+ }
371
+
372
+ RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
373
+ RB_OBJ_WRITE(self, &self_c->text, other_c->text);
374
+ self_c->number_of_capturing_groups = other_c->number_of_capturing_groups;
375
+ self_c->eof = other_c->eof;
376
+
377
+ if (other_c->input) {
378
+ self_c->input = new(std::nothrow) re2::StringPiece(*other_c->input);
379
+ if (self_c->input == 0) {
380
+ rb_raise(rb_eNoMemError,
381
+ "not enough memory to allocate StringPiece for input");
382
+ }
383
+ } else {
384
+ self_c->input = NULL;
385
+ }
386
+
387
+ return self;
388
+ }
389
+
339
390
  /*
340
391
  * Scan the given text incrementally for matches using
341
392
  * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
@@ -356,11 +407,8 @@ static VALUE re2_scanner_rewind(VALUE self) {
356
407
  * s.scan #=> ["bar"]
357
408
  */
358
409
  static VALUE re2_scanner_scan(VALUE self) {
359
- re2_pattern *p;
360
- re2_scanner *c;
361
-
362
- TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
363
- TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
410
+ re2_scanner *c = unwrap_re2_scanner(self);
411
+ re2_pattern *p = unwrap_re2_regexp(c->regexp);
364
412
 
365
413
  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
366
414
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
@@ -409,11 +457,8 @@ static VALUE re2_scanner_scan(VALUE self) {
409
457
  }
410
458
 
411
459
  static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
412
- re2_matchdata *m;
413
- re2_pattern *p;
414
-
415
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
416
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
460
+ re2_matchdata *m = unwrap_re2_matchdata(self);
461
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
417
462
 
418
463
  int id;
419
464
 
@@ -463,9 +508,7 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
463
508
  * m.length #=> 2
464
509
  */
465
510
  static VALUE re2_matchdata_size(const VALUE self) {
466
- re2_matchdata *m;
467
-
468
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
511
+ re2_matchdata *m = unwrap_re2_matchdata(self);
469
512
 
470
513
  return INT2FIX(m->number_of_matches);
471
514
  }
@@ -482,9 +525,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
482
525
  * m.begin(1) #=> 4
483
526
  */
484
527
  static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
485
- re2_matchdata *m;
486
-
487
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
528
+ re2_matchdata *m = unwrap_re2_matchdata(self);
488
529
 
489
530
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
490
531
  if (match == NULL) {
@@ -509,9 +550,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
509
550
  * m.end(1) #=> 7
510
551
  */
511
552
  static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
512
- re2_matchdata *m;
513
-
514
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
553
+ re2_matchdata *m = unwrap_re2_matchdata(self);
515
554
 
516
555
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
517
556
  if (match == NULL) {
@@ -532,8 +571,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
532
571
  * m.regexp #=> #<RE2::Regexp /(\d+)/>
533
572
  */
534
573
  static VALUE re2_matchdata_regexp(const VALUE self) {
535
- re2_matchdata *m;
536
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
574
+ re2_matchdata *m = unwrap_re2_matchdata(self);
537
575
 
538
576
  return m->regexp;
539
577
  }
@@ -547,8 +585,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
547
585
  * c.regexp #=> #<RE2::Regexp /(\d+)/>
548
586
  */
549
587
  static VALUE re2_scanner_regexp(const VALUE self) {
550
- re2_scanner *c;
551
- TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
588
+ re2_scanner *c = unwrap_re2_scanner(self);
552
589
 
553
590
  return c->regexp;
554
591
  }
@@ -573,11 +610,8 @@ static VALUE re2_regexp_allocate(VALUE klass) {
573
610
  * m.to_a #=> ["123", "123"]
574
611
  */
575
612
  static VALUE re2_matchdata_to_a(const VALUE self) {
576
- re2_matchdata *m;
577
- re2_pattern *p;
578
-
579
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
580
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
613
+ re2_matchdata *m = unwrap_re2_matchdata(self);
614
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
581
615
 
582
616
  VALUE array = rb_ary_new2(m->number_of_matches);
583
617
  for (int i = 0; i < m->number_of_matches; ++i) {
@@ -595,11 +629,8 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
595
629
  }
596
630
 
597
631
  static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
598
- re2_matchdata *m;
599
- re2_pattern *p;
600
-
601
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
602
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
632
+ re2_matchdata *m = unwrap_re2_matchdata(self);
633
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
603
634
 
604
635
  if (nth < 0 || nth >= m->number_of_matches) {
605
636
  return Qnil;
@@ -616,11 +647,8 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
616
647
  }
617
648
 
618
649
  static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self) {
619
- re2_matchdata *m;
620
- re2_pattern *p;
621
-
622
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
623
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
650
+ re2_matchdata *m = unwrap_re2_matchdata(self);
651
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
624
652
 
625
653
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
626
654
  std::map<std::string, int>::const_iterator search = groups.find(name);
@@ -715,11 +743,8 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
715
743
  * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
716
744
  */
717
745
  static VALUE re2_matchdata_inspect(const VALUE self) {
718
- re2_matchdata *m;
719
- re2_pattern *p;
720
-
721
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
722
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
746
+ re2_matchdata *m = unwrap_re2_matchdata(self);
747
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
723
748
 
724
749
  std::ostringstream output;
725
750
  output << "#<RE2::MatchData";
@@ -770,11 +795,8 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
770
795
  * end
771
796
  */
772
797
  static VALUE re2_matchdata_deconstruct(const VALUE self) {
773
- re2_matchdata *m;
774
- re2_pattern *p;
775
-
776
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
777
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
798
+ re2_matchdata *m = unwrap_re2_matchdata(self);
799
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
778
800
 
779
801
  VALUE array = rb_ary_new2(m->number_of_matches - 1);
780
802
  for (int i = 1; i < m->number_of_matches; ++i) {
@@ -821,11 +843,8 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
821
843
  * end
822
844
  */
823
845
  static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
824
- re2_matchdata *m;
825
- re2_pattern *p;
826
-
827
- TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
828
- TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
846
+ re2_matchdata *m = unwrap_re2_matchdata(self);
847
+ re2_pattern *p = unwrap_re2_regexp(m->regexp);
829
848
 
830
849
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
831
850
  VALUE capturing_groups = rb_hash_new();
@@ -858,6 +877,36 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
858
877
  return capturing_groups;
859
878
  }
860
879
 
880
+ static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
881
+ re2_matchdata *self_m;
882
+ re2_matchdata *other_m = unwrap_re2_matchdata(other);
883
+
884
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, self_m);
885
+
886
+ if (self_m->matches) {
887
+ delete[] self_m->matches;
888
+ }
889
+
890
+ self_m->number_of_matches = other_m->number_of_matches;
891
+ RB_OBJ_WRITE(self, &self_m->regexp, other_m->regexp);
892
+ RB_OBJ_WRITE(self, &self_m->text, other_m->text);
893
+
894
+ if (other_m->matches) {
895
+ self_m->matches = new(std::nothrow) re2::StringPiece[other_m->number_of_matches];
896
+ if (self_m->matches == 0) {
897
+ rb_raise(rb_eNoMemError,
898
+ "not enough memory to allocate StringPiece for matches");
899
+ }
900
+ for (int i = 0; i < other_m->number_of_matches; ++i) {
901
+ self_m->matches[i] = other_m->matches[i];
902
+ }
903
+ } else {
904
+ self_m->matches = NULL;
905
+ }
906
+
907
+ return self;
908
+ }
909
+
861
910
  /*
862
911
  * Shorthand to compile a new {RE2::Regexp}.
863
912
  *
@@ -913,6 +962,10 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
913
962
 
914
963
  TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
915
964
 
965
+ if (p->pattern) {
966
+ delete p->pattern;
967
+ }
968
+
916
969
  if (RTEST(options)) {
917
970
  RE2::Options re2_options;
918
971
  parse_re2_options(&re2_options, options);
@@ -931,6 +984,25 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
931
984
  return self;
932
985
  }
933
986
 
987
+ static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
988
+ re2_pattern *self_p;
989
+ re2_pattern *other_p = unwrap_re2_regexp(other);
990
+
991
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
992
+
993
+ if (self_p->pattern) {
994
+ delete self_p->pattern;
995
+ }
996
+
997
+ self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
998
+ other_p->pattern->options());
999
+ if (self_p->pattern == 0) {
1000
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
1001
+ }
1002
+
1003
+ return self;
1004
+ }
1005
+
934
1006
  /*
935
1007
  * Returns a printable version of the regular expression.
936
1008
  *
@@ -945,9 +1017,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
945
1017
  * re2.inspect #=> "#<RE2::Regexp /woo?/>"
946
1018
  */
947
1019
  static VALUE re2_regexp_inspect(const VALUE self) {
948
- re2_pattern *p;
949
-
950
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1020
+ re2_pattern *p = unwrap_re2_regexp(self);
951
1021
 
952
1022
  std::ostringstream output;
953
1023
 
@@ -970,8 +1040,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
970
1040
  * re2.to_s #=> "woo?"
971
1041
  */
972
1042
  static VALUE re2_regexp_to_s(const VALUE self) {
973
- re2_pattern *p;
974
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1043
+ re2_pattern *p = unwrap_re2_regexp(self);
975
1044
 
976
1045
  return encoded_str_new(p->pattern->pattern().data(),
977
1046
  p->pattern->pattern().size(),
@@ -987,8 +1056,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
987
1056
  * re2.ok? #=> true
988
1057
  */
989
1058
  static VALUE re2_regexp_ok(const VALUE self) {
990
- re2_pattern *p;
991
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1059
+ re2_pattern *p = unwrap_re2_regexp(self);
992
1060
 
993
1061
  return BOOL2RUBY(p->pattern->ok());
994
1062
  }
@@ -1003,8 +1071,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
1003
1071
  * re2.utf8? #=> true
1004
1072
  */
1005
1073
  static VALUE re2_regexp_utf8(const VALUE self) {
1006
- re2_pattern *p;
1007
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1074
+ re2_pattern *p = unwrap_re2_regexp(self);
1008
1075
 
1009
1076
  return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
1010
1077
  }
@@ -1019,8 +1086,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
1019
1086
  * re2.posix_syntax? #=> true
1020
1087
  */
1021
1088
  static VALUE re2_regexp_posix_syntax(const VALUE self) {
1022
- re2_pattern *p;
1023
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1089
+ re2_pattern *p = unwrap_re2_regexp(self);
1024
1090
 
1025
1091
  return BOOL2RUBY(p->pattern->options().posix_syntax());
1026
1092
  }
@@ -1035,8 +1101,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
1035
1101
  * re2.longest_match? #=> true
1036
1102
  */
1037
1103
  static VALUE re2_regexp_longest_match(const VALUE self) {
1038
- re2_pattern *p;
1039
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1104
+ re2_pattern *p = unwrap_re2_regexp(self);
1040
1105
 
1041
1106
  return BOOL2RUBY(p->pattern->options().longest_match());
1042
1107
  }
@@ -1051,8 +1116,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
1051
1116
  * re2.log_errors? #=> true
1052
1117
  */
1053
1118
  static VALUE re2_regexp_log_errors(const VALUE self) {
1054
- re2_pattern *p;
1055
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1119
+ re2_pattern *p = unwrap_re2_regexp(self);
1056
1120
 
1057
1121
  return BOOL2RUBY(p->pattern->options().log_errors());
1058
1122
  }
@@ -1066,8 +1130,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
1066
1130
  * re2.max_mem #=> 1024
1067
1131
  */
1068
1132
  static VALUE re2_regexp_max_mem(const VALUE self) {
1069
- re2_pattern *p;
1070
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1133
+ re2_pattern *p = unwrap_re2_regexp(self);
1071
1134
 
1072
1135
  return INT2FIX(p->pattern->options().max_mem());
1073
1136
  }
@@ -1082,8 +1145,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
1082
1145
  * re2.literal? #=> true
1083
1146
  */
1084
1147
  static VALUE re2_regexp_literal(const VALUE self) {
1085
- re2_pattern *p;
1086
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1148
+ re2_pattern *p = unwrap_re2_regexp(self);
1087
1149
 
1088
1150
  return BOOL2RUBY(p->pattern->options().literal());
1089
1151
  }
@@ -1098,8 +1160,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
1098
1160
  * re2.never_nl? #=> true
1099
1161
  */
1100
1162
  static VALUE re2_regexp_never_nl(const VALUE self) {
1101
- re2_pattern *p;
1102
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1163
+ re2_pattern *p = unwrap_re2_regexp(self);
1103
1164
 
1104
1165
  return BOOL2RUBY(p->pattern->options().never_nl());
1105
1166
  }
@@ -1114,8 +1175,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
1114
1175
  * re2.case_sensitive? #=> true
1115
1176
  */
1116
1177
  static VALUE re2_regexp_case_sensitive(const VALUE self) {
1117
- re2_pattern *p;
1118
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1178
+ re2_pattern *p = unwrap_re2_regexp(self);
1119
1179
 
1120
1180
  return BOOL2RUBY(p->pattern->options().case_sensitive());
1121
1181
  }
@@ -1144,8 +1204,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
1144
1204
  * re2.perl_classes? #=> true
1145
1205
  */
1146
1206
  static VALUE re2_regexp_perl_classes(const VALUE self) {
1147
- re2_pattern *p;
1148
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1207
+ re2_pattern *p = unwrap_re2_regexp(self);
1149
1208
 
1150
1209
  return BOOL2RUBY(p->pattern->options().perl_classes());
1151
1210
  }
@@ -1160,8 +1219,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
1160
1219
  * re2.word_boundary? #=> true
1161
1220
  */
1162
1221
  static VALUE re2_regexp_word_boundary(const VALUE self) {
1163
- re2_pattern *p;
1164
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1222
+ re2_pattern *p = unwrap_re2_regexp(self);
1165
1223
 
1166
1224
  return BOOL2RUBY(p->pattern->options().word_boundary());
1167
1225
  }
@@ -1176,8 +1234,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
1176
1234
  * re2.one_line? #=> true
1177
1235
  */
1178
1236
  static VALUE re2_regexp_one_line(const VALUE self) {
1179
- re2_pattern *p;
1180
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1237
+ re2_pattern *p = unwrap_re2_regexp(self);
1181
1238
 
1182
1239
  return BOOL2RUBY(p->pattern->options().one_line());
1183
1240
  }
@@ -1189,8 +1246,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
1189
1246
  * @return [String, nil] the error string or `nil`
1190
1247
  */
1191
1248
  static VALUE re2_regexp_error(const VALUE self) {
1192
- re2_pattern *p;
1193
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1249
+ re2_pattern *p = unwrap_re2_regexp(self);
1194
1250
 
1195
1251
  if (p->pattern->ok()) {
1196
1252
  return Qnil;
@@ -1210,8 +1266,7 @@ static VALUE re2_regexp_error(const VALUE self) {
1210
1266
  * @return [String, nil] the offending portion of the regexp or `nil`
1211
1267
  */
1212
1268
  static VALUE re2_regexp_error_arg(const VALUE self) {
1213
- re2_pattern *p;
1214
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1269
+ re2_pattern *p = unwrap_re2_regexp(self);
1215
1270
 
1216
1271
  if (p->pattern->ok()) {
1217
1272
  return Qnil;
@@ -1230,8 +1285,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
1230
1285
  * @return [Integer] the regexp "cost"
1231
1286
  */
1232
1287
  static VALUE re2_regexp_program_size(const VALUE self) {
1233
- re2_pattern *p;
1234
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1288
+ re2_pattern *p = unwrap_re2_regexp(self);
1235
1289
 
1236
1290
  return INT2FIX(p->pattern->ProgramSize());
1237
1291
  }
@@ -1242,9 +1296,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
1242
1296
  * @return [Hash] the options
1243
1297
  */
1244
1298
  static VALUE re2_regexp_options(const VALUE self) {
1245
- re2_pattern *p;
1246
-
1247
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1299
+ re2_pattern *p = unwrap_re2_regexp(self);
1248
1300
  VALUE options = rb_hash_new();
1249
1301
 
1250
1302
  rb_hash_aset(options, ID2SYM(id_utf8),
@@ -1294,8 +1346,7 @@ static VALUE re2_regexp_options(const VALUE self) {
1294
1346
  * @return [Integer] the number of capturing subpatterns
1295
1347
  */
1296
1348
  static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1297
- re2_pattern *p;
1298
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1349
+ re2_pattern *p = unwrap_re2_regexp(self);
1299
1350
 
1300
1351
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
1301
1352
  }
@@ -1310,9 +1361,7 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1310
1361
  * @return [Hash] a hash of names to capturing indices
1311
1362
  */
1312
1363
  static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1313
- re2_pattern *p;
1314
-
1315
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1364
+ re2_pattern *p = unwrap_re2_regexp(self);
1316
1365
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1317
1366
  VALUE capturing_groups = rb_hash_new();
1318
1367
 
@@ -1418,7 +1467,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1418
1467
  /* Ensure text is a string. */
1419
1468
  StringValue(text);
1420
1469
 
1421
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1470
+ p = unwrap_re2_regexp(self);
1422
1471
 
1423
1472
  int n;
1424
1473
  int startpos = 0;
@@ -1565,12 +1614,10 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1565
1614
  * @raise [TypeError] if text cannot be coerced to a `String`
1566
1615
  */
1567
1616
  static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1568
- re2_pattern *p;
1569
-
1570
1617
  /* Ensure text is a string. */
1571
1618
  StringValue(text);
1572
1619
 
1573
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1620
+ re2_pattern *p = unwrap_re2_regexp(self);
1574
1621
 
1575
1622
  return BOOL2RUBY(RE2::PartialMatch(
1576
1623
  re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
@@ -1585,12 +1632,10 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1585
1632
  * @raise [TypeError] if text cannot be coerced to a `String`
1586
1633
  */
1587
1634
  static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
1588
- re2_pattern *p;
1589
-
1590
1635
  /* Ensure text is a string. */
1591
1636
  StringValue(text);
1592
1637
 
1593
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1638
+ re2_pattern *p = unwrap_re2_regexp(self);
1594
1639
 
1595
1640
  return BOOL2RUBY(RE2::FullMatch(
1596
1641
  re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
@@ -1612,10 +1657,8 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1612
1657
  /* Ensure text is a string. */
1613
1658
  StringValue(text);
1614
1659
 
1615
- re2_pattern *p;
1660
+ re2_pattern *p = unwrap_re2_regexp(self);
1616
1661
  re2_scanner *c;
1617
-
1618
- TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1619
1662
  VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
1620
1663
  TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
1621
1664
 
@@ -1694,7 +1737,7 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1694
1737
 
1695
1738
  /* Do the replacement. */
1696
1739
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1697
- TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
1740
+ p = unwrap_re2_regexp(pattern);
1698
1741
  RE2::Replace(&str_as_string, *p->pattern,
1699
1742
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
1700
1743
 
@@ -1746,7 +1789,7 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1746
1789
 
1747
1790
  /* Do the replacement. */
1748
1791
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1749
- TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
1792
+ p = unwrap_re2_regexp(pattern);
1750
1793
  RE2::GlobalReplace(&str_as_string, *p->pattern,
1751
1794
  re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
1752
1795
 
@@ -1818,6 +1861,15 @@ static const rb_data_type_t re2_set_data_type = {
1818
1861
  RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
1819
1862
  };
1820
1863
 
1864
+ static re2_set *unwrap_re2_set(VALUE self) {
1865
+ re2_set *s;
1866
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1867
+ if (!s->set) {
1868
+ rb_raise(rb_eTypeError, "uninitialized RE2::Set");
1869
+ }
1870
+ return s;
1871
+ }
1872
+
1821
1873
  static VALUE re2_set_allocate(VALUE klass) {
1822
1874
  re2_set *s;
1823
1875
  VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
@@ -1825,6 +1877,10 @@ static VALUE re2_set_allocate(VALUE klass) {
1825
1877
  return result;
1826
1878
  }
1827
1879
 
1880
+ static VALUE re2_set_initialize_copy(VALUE, VALUE) {
1881
+ rb_raise(rb_eTypeError, "cannot copy RE2::Set");
1882
+ }
1883
+
1828
1884
  /*
1829
1885
  * Returns a new {RE2::Set} object, a collection of patterns that can be
1830
1886
  * searched for simultaneously.
@@ -1895,6 +1951,10 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1895
1951
  parse_re2_options(&re2_options, options);
1896
1952
  }
1897
1953
 
1954
+ if (s->set) {
1955
+ delete s->set;
1956
+ }
1957
+
1898
1958
  s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
1899
1959
  if (s->set == 0) {
1900
1960
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
@@ -1919,8 +1979,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1919
1979
  static VALUE re2_set_add(VALUE self, VALUE pattern) {
1920
1980
  StringValue(pattern);
1921
1981
 
1922
- re2_set *s;
1923
- TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1982
+ re2_set *s = unwrap_re2_set(self);
1924
1983
 
1925
1984
  int index;
1926
1985
  VALUE msg;
@@ -1951,8 +2010,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1951
2010
  * set.compile #=> true
1952
2011
  */
1953
2012
  static VALUE re2_set_compile(VALUE self) {
1954
- re2_set *s;
1955
- TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
2013
+ re2_set *s = unwrap_re2_set(self);
1956
2014
 
1957
2015
  return BOOL2RUBY(s->set->Compile());
1958
2016
  }
@@ -1968,8 +2026,7 @@ static VALUE re2_set_compile(VALUE self) {
1968
2026
  */
1969
2027
  static VALUE re2_set_size(VALUE self) {
1970
2028
  #ifdef HAVE_SET_SIZE
1971
- re2_set *s;
1972
- TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
2029
+ re2_set *s = unwrap_re2_set(self);
1973
2030
 
1974
2031
  return INT2FIX(s->set->Size());
1975
2032
  #else
@@ -2052,8 +2109,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
2052
2109
  rb_scan_args(argc, argv, "11", &str, &options);
2053
2110
 
2054
2111
  StringValue(str);
2055
- re2_set *s;
2056
- TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
2112
+ re2_set *s = unwrap_re2_set(self);
2057
2113
 
2058
2114
  if (RTEST(options)) {
2059
2115
  Check_Type(options, T_HASH);
@@ -2157,6 +2213,8 @@ extern "C" void Init_re2(void) {
2157
2213
  RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
2158
2214
  rb_define_method(re2_cMatchData, "deconstruct_keys",
2159
2215
  RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
2216
+ rb_define_method(re2_cMatchData, "initialize_copy",
2217
+ RUBY_METHOD_FUNC(re2_matchdata_initialize_copy), 1);
2160
2218
 
2161
2219
  rb_define_method(re2_cScanner, "string",
2162
2220
  RUBY_METHOD_FUNC(re2_scanner_string), 0);
@@ -2168,11 +2226,15 @@ extern "C" void Init_re2(void) {
2168
2226
  RUBY_METHOD_FUNC(re2_scanner_scan), 0);
2169
2227
  rb_define_method(re2_cScanner, "rewind",
2170
2228
  RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
2229
+ rb_define_method(re2_cScanner, "initialize_copy",
2230
+ RUBY_METHOD_FUNC(re2_scanner_initialize_copy), 1);
2171
2231
 
2172
2232
  rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
2173
2233
  RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
2174
2234
  rb_define_method(re2_cRegexp, "initialize",
2175
2235
  RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
2236
+ rb_define_method(re2_cRegexp, "initialize_copy",
2237
+ RUBY_METHOD_FUNC(re2_regexp_initialize_copy), 1);
2176
2238
  rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
2177
2239
  rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
2178
2240
  0);
@@ -2240,6 +2302,8 @@ extern "C" void Init_re2(void) {
2240
2302
  RUBY_METHOD_FUNC(re2_set_size_p), 0);
2241
2303
  rb_define_method(re2_cSet, "initialize",
2242
2304
  RUBY_METHOD_FUNC(re2_set_initialize), -1);
2305
+ rb_define_method(re2_cSet, "initialize_copy",
2306
+ RUBY_METHOD_FUNC(re2_set_initialize_copy), 1);
2243
2307
  rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
2244
2308
  rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
2245
2309
  rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);