re2 2.3.0 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a13971802fb657d7882e4a7d104673ede16481ec279ca791b173278e69e2f16
4
- data.tar.gz: c911db8b2c8a10d6c2ead9ee6f6242cb77cccf832ba685927fef9191ba64cb30
3
+ metadata.gz: 9e4047a4b94dbb690ecd3eda0b27faa13f7f2d5a0fb207d4a4c87e97c6eb6b3c
4
+ data.tar.gz: 181cfd173f05e28d9366976c114a5dccb71d36cba89b0dd596742e09b552049a
5
5
  SHA512:
6
- metadata.gz: '0516976272847272023485d8830e1ab25e3a30c066ab6268a3d7ce5594a543f5450639fc24f4fdd83952c66b5a20302f06f06d53bd50eb388304763b77cbf364'
7
- data.tar.gz: 73917e5ac5721e4dfbd9e4163ff738686ac0da1dda15c2c1bce1919a19627927f61e5e3b1dcdb0926f0fe46dbb84d202a94dba85d926bc8e48893bf50f50f503
6
+ metadata.gz: 26691960166af79c291d0c30ddab0208cc5448ec8d53a06a7761da8fe4b7a49a35e5e02786f9fd8c88545e18f950ca04bf91b27544ddcc6f7aecc037a6a8026f
7
+ data.tar.gz: 6f0af5f515daf0a49c302f5de4c5e27daf46a62d17cf1d008beb73417eea92220192e077aa74e5e2ba12b7899308e14d2bc3d3e5f7803fa0433aedd6e795722c
data/README.md CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
5
5
  backtracking regular expression engines like those used in PCRE, Perl, and
6
6
  Python".
7
7
 
8
- **Current version:** 2.3.0
8
+ **Current version:** 2.4.0
9
9
  **Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
10
10
  **Bundled RE2 version:** libre2.11 (2023-11-01)
11
11
  **Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
@@ -39,8 +39,9 @@ If you are using Debian, you can install the [libre2-dev][] package like so:
39
39
 
40
40
  $ sudo apt-get install libre2-dev
41
41
 
42
- Recent versions of RE2 require a compiler with C++14 support such as
43
- [clang](http://clang.llvm.org/) 3.4 or [gcc](https://gcc.gnu.org/) 5.
42
+ Recent versions of RE2 require [CMake](https://cmake.org) and a compiler with
43
+ C++14 support such as [clang](http://clang.llvm.org/) 3.4 or
44
+ [gcc](https://gcc.gnu.org/) 5.
44
45
 
45
46
  If you are using a packaged Ruby distribution, make sure you also have the
46
47
  Ruby header files installed such as those provided by the [ruby-dev][] package
@@ -269,6 +270,9 @@ Contributions
269
270
  * Thanks to [Peter Zhu](https://github.com/peterzhu2118) for
270
271
  [ruby_memcheck](https://github.com/Shopify/ruby_memcheck) and helping find
271
272
  the memory leaks fixed in 2.1.3.
273
+ * Thanks to [Jean Boussier](https://github.com/byroot) for contributing the
274
+ switch to Ruby's `TypedData` API and the resulting garbage collection
275
+ improvements in 2.4.0.
272
276
 
273
277
  Contact
274
278
  -------
data/ext/re2/extconf.rb CHANGED
@@ -128,6 +128,7 @@ def build_extension(static_p = false)
128
128
 
129
129
  have_library("stdc++")
130
130
  have_header("stdint.h")
131
+ have_func("rb_gc_mark_movable") # introduced in Ruby 2.7
131
132
 
132
133
  if !static_p and !have_library("re2")
133
134
  abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
data/ext/re2/re2.cc CHANGED
@@ -122,49 +122,145 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
122
122
  }
123
123
  }
124
124
 
125
- static void re2_matchdata_mark(re2_matchdata* self) {
126
- rb_gc_mark(self->regexp);
127
- rb_gc_mark(self->text);
125
+ /* For compatibility with ruby < 2.7 */
126
+ #ifdef HAVE_RB_GC_MARK_MOVABLE
127
+ #define re2_compact_callback(x) .dcompact = (x),
128
+ #else
129
+ #define rb_gc_mark_movable(x) rb_gc_mark(x)
130
+ #define re2_compact_callback(x)
131
+ #endif
132
+
133
+ static void re2_matchdata_mark(void *data) {
134
+ re2_matchdata *self = (re2_matchdata *)data;
135
+ rb_gc_mark_movable(self->regexp);
136
+ rb_gc_mark_movable(self->text);
128
137
  }
129
138
 
130
- static void re2_matchdata_free(re2_matchdata* self) {
139
+ #ifdef HAVE_RB_GC_MARK_MOVABLE
140
+ static void re2_matchdata_update_references(void *data) {
141
+ re2_matchdata *self = (re2_matchdata *)data;
142
+ self->regexp = rb_gc_location(self->regexp);
143
+ self->text = rb_gc_location(self->text);
144
+ }
145
+ #endif
146
+
147
+ static void re2_matchdata_free(void *data) {
148
+ re2_matchdata *self = (re2_matchdata *)data;
131
149
  if (self->matches) {
132
150
  delete[] self->matches;
133
151
  }
134
- free(self);
152
+ xfree(self);
153
+ }
154
+
155
+ static size_t re2_matchdata_memsize(const void *data) {
156
+ const re2_matchdata *self = (const re2_matchdata *)data;
157
+ size_t size = sizeof(re2_matchdata);
158
+ if (self->matches) {
159
+ size += sizeof(self->matches) * self->number_of_matches;
160
+ }
161
+
162
+ return size;
135
163
  }
136
164
 
137
- static void re2_scanner_mark(re2_scanner* self) {
138
- rb_gc_mark(self->regexp);
139
- rb_gc_mark(self->text);
165
+ static const rb_data_type_t re2_matchdata_data_type = {
166
+ .wrap_struct_name = "RE2::MatchData",
167
+ .function = {
168
+ .dmark = re2_matchdata_mark,
169
+ .dfree = re2_matchdata_free,
170
+ .dsize = re2_matchdata_memsize,
171
+ re2_compact_callback(re2_matchdata_update_references)
172
+ },
173
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
174
+ // macro to update VALUE references, as to trigger write barriers.
175
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
176
+ };
177
+
178
+ static void re2_scanner_mark(void *data) {
179
+ re2_scanner *self = (re2_scanner *)data;
180
+ rb_gc_mark_movable(self->regexp);
181
+ rb_gc_mark_movable(self->text);
140
182
  }
141
183
 
142
- static void re2_scanner_free(re2_scanner* self) {
184
+ #ifdef HAVE_RB_GC_MARK_MOVABLE
185
+ static void re2_scanner_update_references(void *data) {
186
+ re2_scanner *self = (re2_scanner *)data;
187
+ self->regexp = rb_gc_location(self->regexp);
188
+ self->text = rb_gc_location(self->text);
189
+ }
190
+ #endif
191
+
192
+ static void re2_scanner_free(void *data) {
193
+ re2_scanner *self = (re2_scanner *)data;
143
194
  if (self->input) {
144
195
  delete self->input;
145
196
  }
146
- free(self);
197
+ xfree(self);
147
198
  }
148
199
 
149
- static void re2_regexp_free(re2_pattern* self) {
200
+ static size_t re2_scanner_memsize(const void *data) {
201
+ const re2_scanner *self = (const re2_scanner *)data;
202
+ size_t size = sizeof(re2_scanner);
203
+ if (self->input) {
204
+ size += sizeof(self->input);
205
+ }
206
+
207
+ return size;
208
+ }
209
+
210
+ static const rb_data_type_t re2_scanner_data_type = {
211
+ .wrap_struct_name = "RE2::Scanner",
212
+ .function = {
213
+ .dmark = re2_scanner_mark,
214
+ .dfree = re2_scanner_free,
215
+ .dsize = re2_scanner_memsize,
216
+ re2_compact_callback(re2_scanner_update_references)
217
+ },
218
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
219
+ // macro to update VALUE references, as to trigger write barriers.
220
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
221
+ };
222
+
223
+ static void re2_regexp_free(void *data) {
224
+ re2_pattern *self = (re2_pattern *)data;
150
225
  if (self->pattern) {
151
226
  delete self->pattern;
152
227
  }
153
- free(self);
228
+ xfree(self);
154
229
  }
155
230
 
231
+ static size_t re2_regexp_memsize(const void *data) {
232
+ const re2_pattern *self = (const re2_pattern *)data;
233
+ size_t size = sizeof(re2_pattern);
234
+ if (self->pattern) {
235
+ size += sizeof(self->pattern);
236
+ }
237
+
238
+ return size;
239
+ }
240
+
241
+ static const rb_data_type_t re2_regexp_data_type = {
242
+ .wrap_struct_name = "RE2::Regexp",
243
+ .function = {
244
+ .dmark = NULL,
245
+ .dfree = re2_regexp_free,
246
+ .dsize = re2_regexp_memsize,
247
+ },
248
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
249
+ // macro to update VALUE references, as to trigger write barriers.
250
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
251
+ };
252
+
156
253
  static VALUE re2_matchdata_allocate(VALUE klass) {
157
254
  re2_matchdata *m;
158
255
 
159
- return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
160
- re2_matchdata_free, m);
256
+ return TypedData_Make_Struct(klass, re2_matchdata, &re2_matchdata_data_type,
257
+ m);
161
258
  }
162
259
 
163
260
  static VALUE re2_scanner_allocate(VALUE klass) {
164
261
  re2_scanner *c;
165
262
 
166
- return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
167
- re2_scanner_free, c);
263
+ return TypedData_Make_Struct(klass, re2_scanner, &re2_scanner_data_type, c);
168
264
  }
169
265
 
170
266
  /*
@@ -177,7 +273,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
177
273
  */
178
274
  static VALUE re2_matchdata_string(const VALUE self) {
179
275
  re2_matchdata *m;
180
- Data_Get_Struct(self, re2_matchdata, m);
276
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
181
277
 
182
278
  return m->text;
183
279
  }
@@ -192,7 +288,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
192
288
  */
193
289
  static VALUE re2_scanner_string(const VALUE self) {
194
290
  re2_scanner *c;
195
- Data_Get_Struct(self, re2_scanner, c);
291
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
196
292
 
197
293
  return c->text;
198
294
  }
@@ -207,7 +303,7 @@ static VALUE re2_scanner_string(const VALUE self) {
207
303
  */
208
304
  static VALUE re2_scanner_eof(const VALUE self) {
209
305
  re2_scanner *c;
210
- Data_Get_Struct(self, re2_scanner, c);
306
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
211
307
 
212
308
  return BOOL2RUBY(c->eof);
213
309
  }
@@ -225,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
225
321
  */
226
322
  static VALUE re2_scanner_rewind(VALUE self) {
227
323
  re2_scanner *c;
228
- Data_Get_Struct(self, re2_scanner, c);
324
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
229
325
 
230
326
  delete c->input;
231
327
  c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(c->text));
@@ -252,8 +348,8 @@ static VALUE re2_scanner_scan(VALUE self) {
252
348
  re2_pattern *p;
253
349
  re2_scanner *c;
254
350
 
255
- Data_Get_Struct(self, re2_scanner, c);
256
- Data_Get_Struct(c->regexp, re2_pattern, p);
351
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
352
+ TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
257
353
 
258
354
  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
259
355
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
@@ -308,8 +404,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
308
404
  re2_matchdata *m;
309
405
  re2_pattern *p;
310
406
 
311
- Data_Get_Struct(self, re2_matchdata, m);
312
- Data_Get_Struct(m->regexp, re2_pattern, p);
407
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
408
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
313
409
 
314
410
  int id;
315
411
 
@@ -349,7 +445,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
349
445
  */
350
446
  static VALUE re2_matchdata_size(const VALUE self) {
351
447
  re2_matchdata *m;
352
- Data_Get_Struct(self, re2_matchdata, m);
448
+
449
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
353
450
 
354
451
  return INT2FIX(m->number_of_matches);
355
452
  }
@@ -367,7 +464,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
367
464
  static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
368
465
  re2_matchdata *m;
369
466
 
370
- Data_Get_Struct(self, re2_matchdata, m);
467
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
371
468
 
372
469
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
373
470
  if (match == NULL) {
@@ -392,7 +489,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
392
489
  static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
393
490
  re2_matchdata *m;
394
491
 
395
- Data_Get_Struct(self, re2_matchdata, m);
492
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
396
493
 
397
494
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
398
495
  if (match == NULL) {
@@ -414,7 +511,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
414
511
  */
415
512
  static VALUE re2_matchdata_regexp(const VALUE self) {
416
513
  re2_matchdata *m;
417
- Data_Get_Struct(self, re2_matchdata, m);
514
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
418
515
 
419
516
  return m->regexp;
420
517
  }
@@ -429,7 +526,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
429
526
  */
430
527
  static VALUE re2_scanner_regexp(const VALUE self) {
431
528
  re2_scanner *c;
432
- Data_Get_Struct(self, re2_scanner, c);
529
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
433
530
 
434
531
  return c->regexp;
435
532
  }
@@ -437,7 +534,7 @@ static VALUE re2_scanner_regexp(const VALUE self) {
437
534
  static VALUE re2_regexp_allocate(VALUE klass) {
438
535
  re2_pattern *p;
439
536
 
440
- return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
537
+ return TypedData_Make_Struct(klass, re2_pattern, &re2_regexp_data_type, p);
441
538
  }
442
539
 
443
540
  /*
@@ -456,8 +553,8 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
456
553
  re2_matchdata *m;
457
554
  re2_pattern *p;
458
555
 
459
- Data_Get_Struct(self, re2_matchdata, m);
460
- Data_Get_Struct(m->regexp, re2_pattern, p);
556
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
557
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
461
558
 
462
559
  VALUE array = rb_ary_new2(m->number_of_matches);
463
560
  for (int i = 0; i < m->number_of_matches; ++i) {
@@ -478,8 +575,8 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
478
575
  re2_matchdata *m;
479
576
  re2_pattern *p;
480
577
 
481
- Data_Get_Struct(self, re2_matchdata, m);
482
- Data_Get_Struct(m->regexp, re2_pattern, p);
578
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
579
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
483
580
 
484
581
  if (nth < 0 || nth >= m->number_of_matches) {
485
582
  return Qnil;
@@ -499,8 +596,8 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
499
596
  re2_matchdata *m;
500
597
  re2_pattern *p;
501
598
 
502
- Data_Get_Struct(self, re2_matchdata, m);
503
- Data_Get_Struct(m->regexp, re2_pattern, p);
599
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
600
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
504
601
 
505
602
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
506
603
  std::map<std::string, int>::const_iterator search = groups.find(name);
@@ -599,8 +696,8 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
599
696
  re2_matchdata *m;
600
697
  re2_pattern *p;
601
698
 
602
- Data_Get_Struct(self, re2_matchdata, m);
603
- Data_Get_Struct(m->regexp, re2_pattern, p);
699
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
700
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
604
701
 
605
702
  std::ostringstream output;
606
703
  output << "#<RE2::MatchData";
@@ -651,8 +748,8 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
651
748
  re2_matchdata *m;
652
749
  re2_pattern *p;
653
750
 
654
- Data_Get_Struct(self, re2_matchdata, m);
655
- Data_Get_Struct(m->regexp, re2_pattern, p);
751
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
752
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
656
753
 
657
754
  VALUE array = rb_ary_new2(m->number_of_matches - 1);
658
755
  for (int i = 1; i < m->number_of_matches; ++i) {
@@ -701,8 +798,8 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
701
798
  re2_matchdata *m;
702
799
  re2_pattern *p;
703
800
 
704
- Data_Get_Struct(self, re2_matchdata, m);
705
- Data_Get_Struct(m->regexp, re2_pattern, p);
801
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
802
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
706
803
 
707
804
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
708
805
  VALUE capturing_groups = rb_hash_new();
@@ -790,7 +887,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
790
887
  /* Ensure pattern is a string. */
791
888
  StringValue(pattern);
792
889
 
793
- Data_Get_Struct(self, re2_pattern, p);
890
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
794
891
 
795
892
  if (RTEST(options)) {
796
893
  RE2::Options re2_options;
@@ -823,7 +920,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
823
920
  static VALUE re2_regexp_inspect(const VALUE self) {
824
921
  re2_pattern *p;
825
922
 
826
- Data_Get_Struct(self, re2_pattern, p);
923
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
827
924
 
828
925
  std::ostringstream output;
829
926
 
@@ -847,7 +944,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
847
944
  */
848
945
  static VALUE re2_regexp_to_s(const VALUE self) {
849
946
  re2_pattern *p;
850
- Data_Get_Struct(self, re2_pattern, p);
947
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
851
948
 
852
949
  return encoded_str_new(p->pattern->pattern().data(),
853
950
  p->pattern->pattern().size(),
@@ -865,7 +962,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
865
962
  */
866
963
  static VALUE re2_regexp_ok(const VALUE self) {
867
964
  re2_pattern *p;
868
- Data_Get_Struct(self, re2_pattern, p);
965
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
869
966
 
870
967
  return BOOL2RUBY(p->pattern->ok());
871
968
  }
@@ -881,7 +978,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
881
978
  */
882
979
  static VALUE re2_regexp_utf8(const VALUE self) {
883
980
  re2_pattern *p;
884
- Data_Get_Struct(self, re2_pattern, p);
981
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
885
982
 
886
983
  return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
887
984
  }
@@ -897,7 +994,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
897
994
  */
898
995
  static VALUE re2_regexp_posix_syntax(const VALUE self) {
899
996
  re2_pattern *p;
900
- Data_Get_Struct(self, re2_pattern, p);
997
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
901
998
 
902
999
  return BOOL2RUBY(p->pattern->options().posix_syntax());
903
1000
  }
@@ -913,7 +1010,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
913
1010
  */
914
1011
  static VALUE re2_regexp_longest_match(const VALUE self) {
915
1012
  re2_pattern *p;
916
- Data_Get_Struct(self, re2_pattern, p);
1013
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
917
1014
 
918
1015
  return BOOL2RUBY(p->pattern->options().longest_match());
919
1016
  }
@@ -929,7 +1026,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
929
1026
  */
930
1027
  static VALUE re2_regexp_log_errors(const VALUE self) {
931
1028
  re2_pattern *p;
932
- Data_Get_Struct(self, re2_pattern, p);
1029
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
933
1030
 
934
1031
  return BOOL2RUBY(p->pattern->options().log_errors());
935
1032
  }
@@ -945,7 +1042,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
945
1042
  */
946
1043
  static VALUE re2_regexp_max_mem(const VALUE self) {
947
1044
  re2_pattern *p;
948
- Data_Get_Struct(self, re2_pattern, p);
1045
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
949
1046
 
950
1047
  return INT2FIX(p->pattern->options().max_mem());
951
1048
  }
@@ -961,7 +1058,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
961
1058
  */
962
1059
  static VALUE re2_regexp_literal(const VALUE self) {
963
1060
  re2_pattern *p;
964
- Data_Get_Struct(self, re2_pattern, p);
1061
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
965
1062
 
966
1063
  return BOOL2RUBY(p->pattern->options().literal());
967
1064
  }
@@ -977,7 +1074,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
977
1074
  */
978
1075
  static VALUE re2_regexp_never_nl(const VALUE self) {
979
1076
  re2_pattern *p;
980
- Data_Get_Struct(self, re2_pattern, p);
1077
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
981
1078
 
982
1079
  return BOOL2RUBY(p->pattern->options().never_nl());
983
1080
  }
@@ -993,7 +1090,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
993
1090
  */
994
1091
  static VALUE re2_regexp_case_sensitive(const VALUE self) {
995
1092
  re2_pattern *p;
996
- Data_Get_Struct(self, re2_pattern, p);
1093
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
997
1094
 
998
1095
  return BOOL2RUBY(p->pattern->options().case_sensitive());
999
1096
  }
@@ -1023,7 +1120,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
1023
1120
  */
1024
1121
  static VALUE re2_regexp_perl_classes(const VALUE self) {
1025
1122
  re2_pattern *p;
1026
- Data_Get_Struct(self, re2_pattern, p);
1123
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1027
1124
 
1028
1125
  return BOOL2RUBY(p->pattern->options().perl_classes());
1029
1126
  }
@@ -1039,7 +1136,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
1039
1136
  */
1040
1137
  static VALUE re2_regexp_word_boundary(const VALUE self) {
1041
1138
  re2_pattern *p;
1042
- Data_Get_Struct(self, re2_pattern, p);
1139
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1043
1140
 
1044
1141
  return BOOL2RUBY(p->pattern->options().word_boundary());
1045
1142
  }
@@ -1055,7 +1152,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
1055
1152
  */
1056
1153
  static VALUE re2_regexp_one_line(const VALUE self) {
1057
1154
  re2_pattern *p;
1058
- Data_Get_Struct(self, re2_pattern, p);
1155
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1059
1156
 
1060
1157
  return BOOL2RUBY(p->pattern->options().one_line());
1061
1158
  }
@@ -1068,7 +1165,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
1068
1165
  */
1069
1166
  static VALUE re2_regexp_error(const VALUE self) {
1070
1167
  re2_pattern *p;
1071
- Data_Get_Struct(self, re2_pattern, p);
1168
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1072
1169
 
1073
1170
  if (p->pattern->ok()) {
1074
1171
  return Qnil;
@@ -1089,7 +1186,7 @@ static VALUE re2_regexp_error(const VALUE self) {
1089
1186
  */
1090
1187
  static VALUE re2_regexp_error_arg(const VALUE self) {
1091
1188
  re2_pattern *p;
1092
- Data_Get_Struct(self, re2_pattern, p);
1189
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1093
1190
 
1094
1191
  if (p->pattern->ok()) {
1095
1192
  return Qnil;
@@ -1109,7 +1206,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
1109
1206
  */
1110
1207
  static VALUE re2_regexp_program_size(const VALUE self) {
1111
1208
  re2_pattern *p;
1112
- Data_Get_Struct(self, re2_pattern, p);
1209
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1113
1210
 
1114
1211
  return INT2FIX(p->pattern->ProgramSize());
1115
1212
  }
@@ -1123,7 +1220,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
1123
1220
  static VALUE re2_regexp_options(const VALUE self) {
1124
1221
  re2_pattern *p;
1125
1222
 
1126
- Data_Get_Struct(self, re2_pattern, p);
1223
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1127
1224
  VALUE options = rb_hash_new();
1128
1225
 
1129
1226
  rb_hash_aset(options, ID2SYM(id_utf8),
@@ -1174,7 +1271,7 @@ static VALUE re2_regexp_options(const VALUE self) {
1174
1271
  */
1175
1272
  static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1176
1273
  re2_pattern *p;
1177
- Data_Get_Struct(self, re2_pattern, p);
1274
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1178
1275
 
1179
1276
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
1180
1277
  }
@@ -1191,7 +1288,7 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1191
1288
  static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1192
1289
  re2_pattern *p;
1193
1290
 
1194
- Data_Get_Struct(self, re2_pattern, p);
1291
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1195
1292
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1196
1293
  VALUE capturing_groups = rb_hash_new();
1197
1294
 
@@ -1267,7 +1364,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1267
1364
  /* Ensure text is a string. */
1268
1365
  StringValue(text);
1269
1366
 
1270
- Data_Get_Struct(self, re2_pattern, p);
1367
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1271
1368
 
1272
1369
  int n;
1273
1370
 
@@ -1299,11 +1396,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1299
1396
  n += 1;
1300
1397
 
1301
1398
  VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1302
- Data_Get_Struct(matchdata, re2_matchdata, m);
1399
+ TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
1303
1400
  m->matches = new(std::nothrow) re2::StringPiece[n];
1304
- m->regexp = self;
1305
- m->text = rb_str_dup(text);
1306
- rb_str_freeze(m->text);
1401
+ RB_OBJ_WRITE(matchdata, &m->regexp, self);
1402
+ if (!RTEST(rb_obj_frozen_p(text))) {
1403
+ text = rb_str_freeze(rb_str_dup(text));
1404
+ }
1405
+ RB_OBJ_WRITE(matchdata, &m->text, text);
1307
1406
 
1308
1407
  if (m->matches == 0) {
1309
1408
  rb_raise(rb_eNoMemError,
@@ -1352,13 +1451,13 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1352
1451
  re2_pattern *p;
1353
1452
  re2_scanner *c;
1354
1453
 
1355
- Data_Get_Struct(self, re2_pattern, p);
1454
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1356
1455
  VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
1357
- Data_Get_Struct(scanner, re2_scanner, c);
1456
+ TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
1358
1457
 
1359
1458
  c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(text));
1360
- c->regexp = self;
1361
- c->text = text;
1459
+ RB_OBJ_WRITE(scanner, &c->regexp, self);
1460
+ RB_OBJ_WRITE(scanner, &c->text, text);
1362
1461
 
1363
1462
  if (p->pattern->ok()) {
1364
1463
  c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
@@ -1402,7 +1501,7 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1402
1501
 
1403
1502
  /* Do the replacement. */
1404
1503
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1405
- Data_Get_Struct(pattern, re2_pattern, p);
1504
+ TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
1406
1505
  RE2::Replace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
1407
1506
 
1408
1507
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
@@ -1446,7 +1545,7 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1446
1545
 
1447
1546
  /* Do the replacement. */
1448
1547
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1449
- Data_Get_Struct(pattern, re2_pattern, p);
1548
+ TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
1450
1549
  RE2::GlobalReplace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
1451
1550
 
1452
1551
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
@@ -1480,16 +1579,39 @@ static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
1480
1579
  return rb_str_new(quoted_string.data(), quoted_string.size());
1481
1580
  }
1482
1581
 
1483
- static void re2_set_free(re2_set *self) {
1582
+ static void re2_set_free(void *data) {
1583
+ re2_set *self = (re2_set *)data;
1484
1584
  if (self->set) {
1485
1585
  delete self->set;
1486
1586
  }
1487
- free(self);
1587
+ xfree(self);
1588
+ }
1589
+
1590
+ static size_t re2_set_memsize(const void *data) {
1591
+ const re2_set *self = (const re2_set *)data;
1592
+ size_t size = sizeof(re2_set);
1593
+ if (self->set) {
1594
+ size += sizeof(self->set);
1595
+ }
1596
+
1597
+ return size;
1488
1598
  }
1489
1599
 
1600
+ static const rb_data_type_t re2_set_data_type = {
1601
+ .wrap_struct_name = "RE2::Set",
1602
+ .function = {
1603
+ .dmark = NULL,
1604
+ .dfree = re2_set_free,
1605
+ .dsize = re2_set_memsize,
1606
+ },
1607
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
1608
+ // macro to update VALUE references, as to trigger write barriers.
1609
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
1610
+ };
1611
+
1490
1612
  static VALUE re2_set_allocate(VALUE klass) {
1491
1613
  re2_set *s;
1492
- VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1614
+ VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
1493
1615
 
1494
1616
  return result;
1495
1617
  }
@@ -1540,7 +1662,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1540
1662
  re2_set *s;
1541
1663
 
1542
1664
  rb_scan_args(argc, argv, "02", &anchor, &options);
1543
- Data_Get_Struct(self, re2_set, s);
1665
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1544
1666
 
1545
1667
  RE2::Anchor re2_anchor = RE2::UNANCHORED;
1546
1668
 
@@ -1588,7 +1710,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1588
1710
  StringValue(pattern);
1589
1711
 
1590
1712
  re2_set *s;
1591
- Data_Get_Struct(self, re2_set, s);
1713
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1592
1714
 
1593
1715
  /* To prevent the memory of the err string leaking when we call rb_raise,
1594
1716
  * take a copy of it and let it go out of scope.
@@ -1621,7 +1743,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1621
1743
  */
1622
1744
  static VALUE re2_set_compile(VALUE self) {
1623
1745
  re2_set *s;
1624
- Data_Get_Struct(self, re2_set, s);
1746
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1625
1747
 
1626
1748
  return BOOL2RUBY(s->set->Compile());
1627
1749
  }
@@ -1688,7 +1810,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1688
1810
 
1689
1811
  StringValue(str);
1690
1812
  re2_set *s;
1691
- Data_Get_Struct(self, re2_set, s);
1813
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1692
1814
 
1693
1815
  if (RTEST(options)) {
1694
1816
  Check_Type(options, T_HASH);
data/lib/re2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RE2
4
- VERSION = "2.3.0"
4
+ VERSION = "2.4.0"
5
5
  end
@@ -129,6 +129,12 @@ RSpec.describe RE2::MatchData do
129
129
  re = RE2::Regexp.new('(\D+)').match("bob")
130
130
  expect(re.string).to be_frozen
131
131
  end
132
+
133
+ it "does not copy the string if it was already frozen" do
134
+ string = "bob".freeze
135
+ re = RE2::Regexp.new('(\D+)').match(string)
136
+ expect(re.string).to equal(string)
137
+ end
132
138
  end
133
139
 
134
140
  describe "#size" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0
4
+ version: 2.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-10-31 00:00:00.000000000 Z
12
+ date: 2023-11-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler