re2 2.3.0 → 2.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a13971802fb657d7882e4a7d104673ede16481ec279ca791b173278e69e2f16
4
- data.tar.gz: c911db8b2c8a10d6c2ead9ee6f6242cb77cccf832ba685927fef9191ba64cb30
3
+ metadata.gz: ef5426f8bbd4fab32b2e41296fff7e523e571b486b90e71120cf31830eadaee9
4
+ data.tar.gz: 8a81a4a26fd7d315ba62eea762438b7b63a4a6cf5373d94b9be6af2c66a7346b
5
5
  SHA512:
6
- metadata.gz: '0516976272847272023485d8830e1ab25e3a30c066ab6268a3d7ce5594a543f5450639fc24f4fdd83952c66b5a20302f06f06d53bd50eb388304763b77cbf364'
7
- data.tar.gz: 73917e5ac5721e4dfbd9e4163ff738686ac0da1dda15c2c1bce1919a19627927f61e5e3b1dcdb0926f0fe46dbb84d202a94dba85d926bc8e48893bf50f50f503
6
+ metadata.gz: a7790c334317a0b2e94d0f6a4aed0bc4644385e7ae89de506ac5cea2ae7b1df85059796a586a191439728cbebcdc64bfd11b6edfa65cd4303c5e35497e342b1b
7
+ data.tar.gz: 5fbcc85713cf53c0b1cee1ecda0206df28f486f0b8436672db8ecf9635ccff41039da0cff1754035be33c951d45d79a9598445368e5206013cad8b425efc16c3
data/README.md CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
5
5
  backtracking regular expression engines like those used in PCRE, Perl, and
6
6
  Python".
7
7
 
8
- **Current version:** 2.3.0
8
+ **Current version:** 2.4.2
9
9
  **Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
10
10
  **Bundled RE2 version:** libre2.11 (2023-11-01)
11
11
  **Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
@@ -39,8 +39,9 @@ If you are using Debian, you can install the [libre2-dev][] package like so:
39
39
 
40
40
  $ sudo apt-get install libre2-dev
41
41
 
42
- Recent versions of RE2 require a compiler with C++14 support such as
43
- [clang](http://clang.llvm.org/) 3.4 or [gcc](https://gcc.gnu.org/) 5.
42
+ Recent versions of RE2 require [CMake](https://cmake.org) and a compiler with
43
+ C++14 support such as [clang](http://clang.llvm.org/) 3.4 or
44
+ [gcc](https://gcc.gnu.org/) 5.
44
45
 
45
46
  If you are using a packaged Ruby distribution, make sure you also have the
46
47
  Ruby header files installed such as those provided by the [ruby-dev][] package
@@ -255,13 +256,17 @@ Contributions
255
256
  -------------
256
257
 
257
258
  * Thanks to [Jason Woods](https://github.com/driskell) who contributed the
258
- original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`;
259
- * Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support;
260
- * Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan`, contributing support for libre2.11 (2023-07-01) and for vendoring RE2 and abseil and compiling native gems in 2.0;
259
+ original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`.
260
+ * Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed
261
+ C++11 support.
262
+ * Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty
263
+ patterns and `RE2::Regexp#scan`, contributing support for libre2.11
264
+ (2023-07-01) and for vendoring RE2 and abseil and compiling native gems in
265
+ 2.0.
261
266
  * Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
262
- the deprecation and removal of the `utf8` encoding option in RE2;
267
+ the deprecation and removal of the `utf8` encoding option in RE2.
263
268
  * Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
264
- using `RE2::Scanner#scan` with an invalid regular expression;
269
+ using `RE2::Scanner#scan` with an invalid regular expression.
265
270
  * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributing the
266
271
  initial support for `RE2::Set`.
267
272
  * Thanks to [Mike Dalessio](https://github.com/flavorjones) for reviewing the
@@ -269,6 +274,9 @@ Contributions
269
274
  * Thanks to [Peter Zhu](https://github.com/peterzhu2118) for
270
275
  [ruby_memcheck](https://github.com/Shopify/ruby_memcheck) and helping find
271
276
  the memory leaks fixed in 2.1.3.
277
+ * Thanks to [Jean Boussier](https://github.com/byroot) for contributing the
278
+ switch to Ruby's `TypedData` API and the resulting garbage collection
279
+ improvements in 2.4.0.
272
280
 
273
281
  Contact
274
282
  -------
data/ext/re2/extconf.rb CHANGED
@@ -128,6 +128,7 @@ def build_extension(static_p = false)
128
128
 
129
129
  have_library("stdc++")
130
130
  have_header("stdint.h")
131
+ have_func("rb_gc_mark_movable") # introduced in Ruby 2.7
131
132
 
132
133
  if !static_p and !have_library("re2")
133
134
  abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
data/ext/re2/re2.cc CHANGED
@@ -122,49 +122,145 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
122
122
  }
123
123
  }
124
124
 
125
- static void re2_matchdata_mark(re2_matchdata* self) {
126
- rb_gc_mark(self->regexp);
127
- rb_gc_mark(self->text);
125
+ /* For compatibility with ruby < 2.7 */
126
+ #ifdef HAVE_RB_GC_MARK_MOVABLE
127
+ #define re2_compact_callback(x) .dcompact = (x),
128
+ #else
129
+ #define rb_gc_mark_movable(x) rb_gc_mark(x)
130
+ #define re2_compact_callback(x)
131
+ #endif
132
+
133
+ static void re2_matchdata_mark(void *ptr) {
134
+ re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
135
+ rb_gc_mark_movable(m->regexp);
136
+ rb_gc_mark_movable(m->text);
137
+ }
138
+
139
+ #ifdef HAVE_RB_GC_MARK_MOVABLE
140
+ static void re2_matchdata_compact(void *ptr) {
141
+ re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
142
+ m->regexp = rb_gc_location(m->regexp);
143
+ m->text = rb_gc_location(m->text);
144
+ }
145
+ #endif
146
+
147
+ static void re2_matchdata_free(void *ptr) {
148
+ re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
149
+ if (m->matches) {
150
+ delete[] m->matches;
151
+ }
152
+ xfree(m);
128
153
  }
129
154
 
130
- static void re2_matchdata_free(re2_matchdata* self) {
131
- if (self->matches) {
132
- delete[] self->matches;
155
+ static size_t re2_matchdata_memsize(const void *ptr) {
156
+ const re2_matchdata *m = reinterpret_cast<const re2_matchdata *>(ptr);
157
+ size_t size = sizeof(*m);
158
+ if (m->matches) {
159
+ size += sizeof(*m->matches) * m->number_of_matches;
133
160
  }
134
- free(self);
161
+
162
+ return size;
163
+ }
164
+
165
+ static const rb_data_type_t re2_matchdata_data_type = {
166
+ .wrap_struct_name = "RE2::MatchData",
167
+ .function = {
168
+ .dmark = re2_matchdata_mark,
169
+ .dfree = re2_matchdata_free,
170
+ .dsize = re2_matchdata_memsize,
171
+ re2_compact_callback(re2_matchdata_compact)
172
+ },
173
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
174
+ // macro to update VALUE references, as to trigger write barriers.
175
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
176
+ };
177
+
178
+ static void re2_scanner_mark(void *ptr) {
179
+ re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
180
+ rb_gc_mark_movable(s->regexp);
181
+ rb_gc_mark_movable(s->text);
182
+ }
183
+
184
+ #ifdef HAVE_RB_GC_MARK_MOVABLE
185
+ static void re2_scanner_compact(void *ptr) {
186
+ re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
187
+ s->regexp = rb_gc_location(s->regexp);
188
+ s->text = rb_gc_location(s->text);
135
189
  }
190
+ #endif
136
191
 
137
- static void re2_scanner_mark(re2_scanner* self) {
138
- rb_gc_mark(self->regexp);
139
- rb_gc_mark(self->text);
192
+ static void re2_scanner_free(void *ptr) {
193
+ re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
194
+ if (s->input) {
195
+ delete s->input;
196
+ }
197
+ xfree(s);
140
198
  }
141
199
 
142
- static void re2_scanner_free(re2_scanner* self) {
143
- if (self->input) {
144
- delete self->input;
200
+ static size_t re2_scanner_memsize(const void *ptr) {
201
+ const re2_scanner *s = reinterpret_cast<const re2_scanner *>(ptr);
202
+ size_t size = sizeof(*s);
203
+ if (s->input) {
204
+ size += sizeof(*s->input);
145
205
  }
146
- free(self);
206
+
207
+ return size;
208
+ }
209
+
210
+ static const rb_data_type_t re2_scanner_data_type = {
211
+ .wrap_struct_name = "RE2::Scanner",
212
+ .function = {
213
+ .dmark = re2_scanner_mark,
214
+ .dfree = re2_scanner_free,
215
+ .dsize = re2_scanner_memsize,
216
+ re2_compact_callback(re2_scanner_compact)
217
+ },
218
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
219
+ // macro to update VALUE references, as to trigger write barriers.
220
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
221
+ };
222
+
223
+ static void re2_regexp_free(void *ptr) {
224
+ re2_pattern *p = reinterpret_cast<re2_pattern *>(ptr);
225
+ if (p->pattern) {
226
+ delete p->pattern;
227
+ }
228
+ xfree(p);
147
229
  }
148
230
 
149
- static void re2_regexp_free(re2_pattern* self) {
150
- if (self->pattern) {
151
- delete self->pattern;
231
+ static size_t re2_regexp_memsize(const void *ptr) {
232
+ const re2_pattern *p = reinterpret_cast<const re2_pattern *>(ptr);
233
+ size_t size = sizeof(*p);
234
+ if (p->pattern) {
235
+ size += sizeof(*p->pattern);
152
236
  }
153
- free(self);
237
+
238
+ return size;
154
239
  }
155
240
 
241
+ static const rb_data_type_t re2_regexp_data_type = {
242
+ .wrap_struct_name = "RE2::Regexp",
243
+ .function = {
244
+ .dmark = NULL,
245
+ .dfree = re2_regexp_free,
246
+ .dsize = re2_regexp_memsize,
247
+ },
248
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
249
+ // macro to update VALUE references, as to trigger write barriers.
250
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
251
+ };
252
+
156
253
  static VALUE re2_matchdata_allocate(VALUE klass) {
157
254
  re2_matchdata *m;
158
255
 
159
- return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
160
- re2_matchdata_free, m);
256
+ return TypedData_Make_Struct(klass, re2_matchdata, &re2_matchdata_data_type,
257
+ m);
161
258
  }
162
259
 
163
260
  static VALUE re2_scanner_allocate(VALUE klass) {
164
261
  re2_scanner *c;
165
262
 
166
- return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
167
- re2_scanner_free, c);
263
+ return TypedData_Make_Struct(klass, re2_scanner, &re2_scanner_data_type, c);
168
264
  }
169
265
 
170
266
  /*
@@ -177,7 +273,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
177
273
  */
178
274
  static VALUE re2_matchdata_string(const VALUE self) {
179
275
  re2_matchdata *m;
180
- Data_Get_Struct(self, re2_matchdata, m);
276
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
181
277
 
182
278
  return m->text;
183
279
  }
@@ -192,7 +288,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
192
288
  */
193
289
  static VALUE re2_scanner_string(const VALUE self) {
194
290
  re2_scanner *c;
195
- Data_Get_Struct(self, re2_scanner, c);
291
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
196
292
 
197
293
  return c->text;
198
294
  }
@@ -207,7 +303,7 @@ static VALUE re2_scanner_string(const VALUE self) {
207
303
  */
208
304
  static VALUE re2_scanner_eof(const VALUE self) {
209
305
  re2_scanner *c;
210
- Data_Get_Struct(self, re2_scanner, c);
306
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
211
307
 
212
308
  return BOOL2RUBY(c->eof);
213
309
  }
@@ -225,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
225
321
  */
226
322
  static VALUE re2_scanner_rewind(VALUE self) {
227
323
  re2_scanner *c;
228
- Data_Get_Struct(self, re2_scanner, c);
324
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
229
325
 
230
326
  delete c->input;
231
327
  c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(c->text));
@@ -252,8 +348,8 @@ static VALUE re2_scanner_scan(VALUE self) {
252
348
  re2_pattern *p;
253
349
  re2_scanner *c;
254
350
 
255
- Data_Get_Struct(self, re2_scanner, c);
256
- Data_Get_Struct(c->regexp, re2_pattern, p);
351
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
352
+ TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
257
353
 
258
354
  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
259
355
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
@@ -308,8 +404,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
308
404
  re2_matchdata *m;
309
405
  re2_pattern *p;
310
406
 
311
- Data_Get_Struct(self, re2_matchdata, m);
312
- Data_Get_Struct(m->regexp, re2_pattern, p);
407
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
408
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
313
409
 
314
410
  int id;
315
411
 
@@ -349,7 +445,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
349
445
  */
350
446
  static VALUE re2_matchdata_size(const VALUE self) {
351
447
  re2_matchdata *m;
352
- Data_Get_Struct(self, re2_matchdata, m);
448
+
449
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
353
450
 
354
451
  return INT2FIX(m->number_of_matches);
355
452
  }
@@ -367,7 +464,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
367
464
  static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
368
465
  re2_matchdata *m;
369
466
 
370
- Data_Get_Struct(self, re2_matchdata, m);
467
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
371
468
 
372
469
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
373
470
  if (match == NULL) {
@@ -392,7 +489,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
392
489
  static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
393
490
  re2_matchdata *m;
394
491
 
395
- Data_Get_Struct(self, re2_matchdata, m);
492
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
396
493
 
397
494
  re2::StringPiece *match = re2_matchdata_find_match(n, self);
398
495
  if (match == NULL) {
@@ -414,7 +511,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
414
511
  */
415
512
  static VALUE re2_matchdata_regexp(const VALUE self) {
416
513
  re2_matchdata *m;
417
- Data_Get_Struct(self, re2_matchdata, m);
514
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
418
515
 
419
516
  return m->regexp;
420
517
  }
@@ -429,7 +526,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
429
526
  */
430
527
  static VALUE re2_scanner_regexp(const VALUE self) {
431
528
  re2_scanner *c;
432
- Data_Get_Struct(self, re2_scanner, c);
529
+ TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
433
530
 
434
531
  return c->regexp;
435
532
  }
@@ -437,7 +534,7 @@ static VALUE re2_scanner_regexp(const VALUE self) {
437
534
  static VALUE re2_regexp_allocate(VALUE klass) {
438
535
  re2_pattern *p;
439
536
 
440
- return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
537
+ return TypedData_Make_Struct(klass, re2_pattern, &re2_regexp_data_type, p);
441
538
  }
442
539
 
443
540
  /*
@@ -456,8 +553,8 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
456
553
  re2_matchdata *m;
457
554
  re2_pattern *p;
458
555
 
459
- Data_Get_Struct(self, re2_matchdata, m);
460
- Data_Get_Struct(m->regexp, re2_pattern, p);
556
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
557
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
461
558
 
462
559
  VALUE array = rb_ary_new2(m->number_of_matches);
463
560
  for (int i = 0; i < m->number_of_matches; ++i) {
@@ -478,8 +575,8 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
478
575
  re2_matchdata *m;
479
576
  re2_pattern *p;
480
577
 
481
- Data_Get_Struct(self, re2_matchdata, m);
482
- Data_Get_Struct(m->regexp, re2_pattern, p);
578
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
579
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
483
580
 
484
581
  if (nth < 0 || nth >= m->number_of_matches) {
485
582
  return Qnil;
@@ -499,8 +596,8 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
499
596
  re2_matchdata *m;
500
597
  re2_pattern *p;
501
598
 
502
- Data_Get_Struct(self, re2_matchdata, m);
503
- Data_Get_Struct(m->regexp, re2_pattern, p);
599
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
600
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
504
601
 
505
602
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
506
603
  std::map<std::string, int>::const_iterator search = groups.find(name);
@@ -599,8 +696,8 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
599
696
  re2_matchdata *m;
600
697
  re2_pattern *p;
601
698
 
602
- Data_Get_Struct(self, re2_matchdata, m);
603
- Data_Get_Struct(m->regexp, re2_pattern, p);
699
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
700
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
604
701
 
605
702
  std::ostringstream output;
606
703
  output << "#<RE2::MatchData";
@@ -651,8 +748,8 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
651
748
  re2_matchdata *m;
652
749
  re2_pattern *p;
653
750
 
654
- Data_Get_Struct(self, re2_matchdata, m);
655
- Data_Get_Struct(m->regexp, re2_pattern, p);
751
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
752
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
656
753
 
657
754
  VALUE array = rb_ary_new2(m->number_of_matches - 1);
658
755
  for (int i = 1; i < m->number_of_matches; ++i) {
@@ -701,8 +798,8 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
701
798
  re2_matchdata *m;
702
799
  re2_pattern *p;
703
800
 
704
- Data_Get_Struct(self, re2_matchdata, m);
705
- Data_Get_Struct(m->regexp, re2_pattern, p);
801
+ TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
802
+ TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
706
803
 
707
804
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
708
805
  VALUE capturing_groups = rb_hash_new();
@@ -790,7 +887,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
790
887
  /* Ensure pattern is a string. */
791
888
  StringValue(pattern);
792
889
 
793
- Data_Get_Struct(self, re2_pattern, p);
890
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
794
891
 
795
892
  if (RTEST(options)) {
796
893
  RE2::Options re2_options;
@@ -823,7 +920,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
823
920
  static VALUE re2_regexp_inspect(const VALUE self) {
824
921
  re2_pattern *p;
825
922
 
826
- Data_Get_Struct(self, re2_pattern, p);
923
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
827
924
 
828
925
  std::ostringstream output;
829
926
 
@@ -847,7 +944,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
847
944
  */
848
945
  static VALUE re2_regexp_to_s(const VALUE self) {
849
946
  re2_pattern *p;
850
- Data_Get_Struct(self, re2_pattern, p);
947
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
851
948
 
852
949
  return encoded_str_new(p->pattern->pattern().data(),
853
950
  p->pattern->pattern().size(),
@@ -865,7 +962,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
865
962
  */
866
963
  static VALUE re2_regexp_ok(const VALUE self) {
867
964
  re2_pattern *p;
868
- Data_Get_Struct(self, re2_pattern, p);
965
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
869
966
 
870
967
  return BOOL2RUBY(p->pattern->ok());
871
968
  }
@@ -881,7 +978,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
881
978
  */
882
979
  static VALUE re2_regexp_utf8(const VALUE self) {
883
980
  re2_pattern *p;
884
- Data_Get_Struct(self, re2_pattern, p);
981
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
885
982
 
886
983
  return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
887
984
  }
@@ -897,7 +994,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
897
994
  */
898
995
  static VALUE re2_regexp_posix_syntax(const VALUE self) {
899
996
  re2_pattern *p;
900
- Data_Get_Struct(self, re2_pattern, p);
997
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
901
998
 
902
999
  return BOOL2RUBY(p->pattern->options().posix_syntax());
903
1000
  }
@@ -913,7 +1010,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
913
1010
  */
914
1011
  static VALUE re2_regexp_longest_match(const VALUE self) {
915
1012
  re2_pattern *p;
916
- Data_Get_Struct(self, re2_pattern, p);
1013
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
917
1014
 
918
1015
  return BOOL2RUBY(p->pattern->options().longest_match());
919
1016
  }
@@ -929,7 +1026,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
929
1026
  */
930
1027
  static VALUE re2_regexp_log_errors(const VALUE self) {
931
1028
  re2_pattern *p;
932
- Data_Get_Struct(self, re2_pattern, p);
1029
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
933
1030
 
934
1031
  return BOOL2RUBY(p->pattern->options().log_errors());
935
1032
  }
@@ -945,7 +1042,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
945
1042
  */
946
1043
  static VALUE re2_regexp_max_mem(const VALUE self) {
947
1044
  re2_pattern *p;
948
- Data_Get_Struct(self, re2_pattern, p);
1045
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
949
1046
 
950
1047
  return INT2FIX(p->pattern->options().max_mem());
951
1048
  }
@@ -961,7 +1058,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
961
1058
  */
962
1059
  static VALUE re2_regexp_literal(const VALUE self) {
963
1060
  re2_pattern *p;
964
- Data_Get_Struct(self, re2_pattern, p);
1061
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
965
1062
 
966
1063
  return BOOL2RUBY(p->pattern->options().literal());
967
1064
  }
@@ -977,7 +1074,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
977
1074
  */
978
1075
  static VALUE re2_regexp_never_nl(const VALUE self) {
979
1076
  re2_pattern *p;
980
- Data_Get_Struct(self, re2_pattern, p);
1077
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
981
1078
 
982
1079
  return BOOL2RUBY(p->pattern->options().never_nl());
983
1080
  }
@@ -993,7 +1090,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
993
1090
  */
994
1091
  static VALUE re2_regexp_case_sensitive(const VALUE self) {
995
1092
  re2_pattern *p;
996
- Data_Get_Struct(self, re2_pattern, p);
1093
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
997
1094
 
998
1095
  return BOOL2RUBY(p->pattern->options().case_sensitive());
999
1096
  }
@@ -1023,7 +1120,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
1023
1120
  */
1024
1121
  static VALUE re2_regexp_perl_classes(const VALUE self) {
1025
1122
  re2_pattern *p;
1026
- Data_Get_Struct(self, re2_pattern, p);
1123
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1027
1124
 
1028
1125
  return BOOL2RUBY(p->pattern->options().perl_classes());
1029
1126
  }
@@ -1039,7 +1136,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
1039
1136
  */
1040
1137
  static VALUE re2_regexp_word_boundary(const VALUE self) {
1041
1138
  re2_pattern *p;
1042
- Data_Get_Struct(self, re2_pattern, p);
1139
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1043
1140
 
1044
1141
  return BOOL2RUBY(p->pattern->options().word_boundary());
1045
1142
  }
@@ -1055,7 +1152,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
1055
1152
  */
1056
1153
  static VALUE re2_regexp_one_line(const VALUE self) {
1057
1154
  re2_pattern *p;
1058
- Data_Get_Struct(self, re2_pattern, p);
1155
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1059
1156
 
1060
1157
  return BOOL2RUBY(p->pattern->options().one_line());
1061
1158
  }
@@ -1068,7 +1165,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
1068
1165
  */
1069
1166
  static VALUE re2_regexp_error(const VALUE self) {
1070
1167
  re2_pattern *p;
1071
- Data_Get_Struct(self, re2_pattern, p);
1168
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1072
1169
 
1073
1170
  if (p->pattern->ok()) {
1074
1171
  return Qnil;
@@ -1089,7 +1186,7 @@ static VALUE re2_regexp_error(const VALUE self) {
1089
1186
  */
1090
1187
  static VALUE re2_regexp_error_arg(const VALUE self) {
1091
1188
  re2_pattern *p;
1092
- Data_Get_Struct(self, re2_pattern, p);
1189
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1093
1190
 
1094
1191
  if (p->pattern->ok()) {
1095
1192
  return Qnil;
@@ -1109,7 +1206,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
1109
1206
  */
1110
1207
  static VALUE re2_regexp_program_size(const VALUE self) {
1111
1208
  re2_pattern *p;
1112
- Data_Get_Struct(self, re2_pattern, p);
1209
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1113
1210
 
1114
1211
  return INT2FIX(p->pattern->ProgramSize());
1115
1212
  }
@@ -1123,7 +1220,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
1123
1220
  static VALUE re2_regexp_options(const VALUE self) {
1124
1221
  re2_pattern *p;
1125
1222
 
1126
- Data_Get_Struct(self, re2_pattern, p);
1223
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1127
1224
  VALUE options = rb_hash_new();
1128
1225
 
1129
1226
  rb_hash_aset(options, ID2SYM(id_utf8),
@@ -1174,7 +1271,7 @@ static VALUE re2_regexp_options(const VALUE self) {
1174
1271
  */
1175
1272
  static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1176
1273
  re2_pattern *p;
1177
- Data_Get_Struct(self, re2_pattern, p);
1274
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1178
1275
 
1179
1276
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
1180
1277
  }
@@ -1191,7 +1288,7 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1191
1288
  static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1192
1289
  re2_pattern *p;
1193
1290
 
1194
- Data_Get_Struct(self, re2_pattern, p);
1291
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1195
1292
  const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1196
1293
  VALUE capturing_groups = rb_hash_new();
1197
1294
 
@@ -1267,7 +1364,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1267
1364
  /* Ensure text is a string. */
1268
1365
  StringValue(text);
1269
1366
 
1270
- Data_Get_Struct(self, re2_pattern, p);
1367
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1271
1368
 
1272
1369
  int n;
1273
1370
 
@@ -1299,11 +1396,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1299
1396
  n += 1;
1300
1397
 
1301
1398
  VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1302
- Data_Get_Struct(matchdata, re2_matchdata, m);
1399
+ TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
1303
1400
  m->matches = new(std::nothrow) re2::StringPiece[n];
1304
- m->regexp = self;
1305
- m->text = rb_str_dup(text);
1306
- rb_str_freeze(m->text);
1401
+ RB_OBJ_WRITE(matchdata, &m->regexp, self);
1402
+ if (!RTEST(rb_obj_frozen_p(text))) {
1403
+ text = rb_str_freeze(rb_str_dup(text));
1404
+ }
1405
+ RB_OBJ_WRITE(matchdata, &m->text, text);
1307
1406
 
1308
1407
  if (m->matches == 0) {
1309
1408
  rb_raise(rb_eNoMemError,
@@ -1352,13 +1451,13 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1352
1451
  re2_pattern *p;
1353
1452
  re2_scanner *c;
1354
1453
 
1355
- Data_Get_Struct(self, re2_pattern, p);
1454
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1356
1455
  VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
1357
- Data_Get_Struct(scanner, re2_scanner, c);
1456
+ TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
1358
1457
 
1359
1458
  c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(text));
1360
- c->regexp = self;
1361
- c->text = text;
1459
+ RB_OBJ_WRITE(scanner, &c->regexp, self);
1460
+ RB_OBJ_WRITE(scanner, &c->text, text);
1362
1461
 
1363
1462
  if (p->pattern->ok()) {
1364
1463
  c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
@@ -1402,7 +1501,7 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1402
1501
 
1403
1502
  /* Do the replacement. */
1404
1503
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1405
- Data_Get_Struct(pattern, re2_pattern, p);
1504
+ TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
1406
1505
  RE2::Replace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
1407
1506
 
1408
1507
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
@@ -1446,7 +1545,7 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1446
1545
 
1447
1546
  /* Do the replacement. */
1448
1547
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1449
- Data_Get_Struct(pattern, re2_pattern, p);
1548
+ TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
1450
1549
  RE2::GlobalReplace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
1451
1550
 
1452
1551
  return encoded_str_new(str_as_string.data(), str_as_string.size(),
@@ -1480,16 +1579,39 @@ static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
1480
1579
  return rb_str_new(quoted_string.data(), quoted_string.size());
1481
1580
  }
1482
1581
 
1483
- static void re2_set_free(re2_set *self) {
1484
- if (self->set) {
1485
- delete self->set;
1582
+ static void re2_set_free(void *ptr) {
1583
+ re2_set *s = reinterpret_cast<re2_set *>(ptr);
1584
+ if (s->set) {
1585
+ delete s->set;
1486
1586
  }
1487
- free(self);
1587
+ xfree(s);
1488
1588
  }
1489
1589
 
1590
+ static size_t re2_set_memsize(const void *ptr) {
1591
+ const re2_set *s = reinterpret_cast<const re2_set *>(ptr);
1592
+ size_t size = sizeof(*s);
1593
+ if (s->set) {
1594
+ size += sizeof(*s->set);
1595
+ }
1596
+
1597
+ return size;
1598
+ }
1599
+
1600
+ static const rb_data_type_t re2_set_data_type = {
1601
+ .wrap_struct_name = "RE2::Set",
1602
+ .function = {
1603
+ .dmark = NULL,
1604
+ .dfree = re2_set_free,
1605
+ .dsize = re2_set_memsize,
1606
+ },
1607
+ // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
1608
+ // macro to update VALUE references, as to trigger write barriers.
1609
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
1610
+ };
1611
+
1490
1612
  static VALUE re2_set_allocate(VALUE klass) {
1491
1613
  re2_set *s;
1492
- VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1614
+ VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
1493
1615
 
1494
1616
  return result;
1495
1617
  }
@@ -1540,7 +1662,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1540
1662
  re2_set *s;
1541
1663
 
1542
1664
  rb_scan_args(argc, argv, "02", &anchor, &options);
1543
- Data_Get_Struct(self, re2_set, s);
1665
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1544
1666
 
1545
1667
  RE2::Anchor re2_anchor = RE2::UNANCHORED;
1546
1668
 
@@ -1588,7 +1710,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1588
1710
  StringValue(pattern);
1589
1711
 
1590
1712
  re2_set *s;
1591
- Data_Get_Struct(self, re2_set, s);
1713
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1592
1714
 
1593
1715
  /* To prevent the memory of the err string leaking when we call rb_raise,
1594
1716
  * take a copy of it and let it go out of scope.
@@ -1621,7 +1743,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1621
1743
  */
1622
1744
  static VALUE re2_set_compile(VALUE self) {
1623
1745
  re2_set *s;
1624
- Data_Get_Struct(self, re2_set, s);
1746
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1625
1747
 
1626
1748
  return BOOL2RUBY(s->set->Compile());
1627
1749
  }
@@ -1688,7 +1810,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1688
1810
 
1689
1811
  StringValue(str);
1690
1812
  re2_set *s;
1691
- Data_Get_Struct(self, re2_set, s);
1813
+ TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
1692
1814
 
1693
1815
  if (RTEST(options)) {
1694
1816
  Check_Type(options, T_HASH);
@@ -1755,12 +1877,14 @@ extern "C" void Init_re2(void) {
1755
1877
  re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
1756
1878
  rb_const_get(rb_cObject, rb_intern("StandardError")));
1757
1879
 
1758
- rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1880
+ rb_define_alloc_func(re2_cRegexp,
1881
+ reinterpret_cast<VALUE (*)(VALUE)>(re2_regexp_allocate));
1759
1882
  rb_define_alloc_func(re2_cMatchData,
1760
- (VALUE (*)(VALUE))re2_matchdata_allocate);
1883
+ reinterpret_cast<VALUE (*)(VALUE)>(re2_matchdata_allocate));
1761
1884
  rb_define_alloc_func(re2_cScanner,
1762
- (VALUE (*)(VALUE))re2_scanner_allocate);
1763
- rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
1885
+ reinterpret_cast<VALUE (*)(VALUE)>(re2_scanner_allocate));
1886
+ rb_define_alloc_func(re2_cSet,
1887
+ reinterpret_cast<VALUE (*)(VALUE)>(re2_set_allocate));
1764
1888
 
1765
1889
  rb_define_method(re2_cMatchData, "string",
1766
1890
  RUBY_METHOD_FUNC(re2_matchdata_string), 0);
data/lib/re2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RE2
4
- VERSION = "2.3.0"
4
+ VERSION = "2.4.2"
5
5
  end
data/re2.gemspec CHANGED
@@ -11,7 +11,6 @@ Gem::Specification.new do |s|
11
11
  s.license = "BSD-3-Clause"
12
12
  s.required_ruby_version = ">= 2.6.0"
13
13
  s.files = [
14
- ".rspec",
15
14
  "dependencies.yml",
16
15
  "ext/re2/extconf.rb",
17
16
  "ext/re2/re2.cc",
@@ -28,6 +27,7 @@ Gem::Specification.new do |s|
28
27
  "re2.gemspec"
29
28
  ]
30
29
  s.test_files = [
30
+ ".rspec",
31
31
  "spec/spec_helper.rb",
32
32
  "spec/re2_spec.rb",
33
33
  "spec/kernel_spec.rb",
@@ -1,5 +1,14 @@
1
1
  # encoding: utf-8
2
+ require 'objspace'
3
+
2
4
  RSpec.describe RE2::MatchData do
5
+ it "reports a larger consuming memory size when it has more matches" do
6
+ matches1 = RE2::Regexp.new('w(o)').match('woo')
7
+ matches2 = RE2::Regexp.new('w(o)(o)').match('woo')
8
+
9
+ expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2)
10
+ end
11
+
3
12
  describe "#to_a" do
4
13
  it "is populated with the match and capturing groups" do
5
14
  a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
@@ -129,6 +138,12 @@ RSpec.describe RE2::MatchData do
129
138
  re = RE2::Regexp.new('(\D+)').match("bob")
130
139
  expect(re.string).to be_frozen
131
140
  end
141
+
142
+ it "does not copy the string if it was already frozen" do
143
+ string = "bob".freeze
144
+ re = RE2::Regexp.new('(\D+)').match(string)
145
+ expect(re.string).to equal(string)
146
+ end
132
147
  end
133
148
 
134
149
  describe "#size" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0
4
+ version: 2.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-10-31 00:00:00.000000000 Z
12
+ date: 2023-11-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler
@@ -124,6 +124,7 @@ signing_key:
124
124
  specification_version: 4
125
125
  summary: Ruby bindings to RE2.
126
126
  test_files:
127
+ - ".rspec"
127
128
  - spec/spec_helper.rb
128
129
  - spec/re2_spec.rb
129
130
  - spec/kernel_spec.rb