re2 2.3.0-arm64-darwin → 2.4.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -3
- data/ext/re2/extconf.rb +1 -0
- data/ext/re2/re2.cc +203 -81
- data/lib/2.6/re2.bundle +0 -0
- data/lib/2.7/re2.bundle +0 -0
- data/lib/3.0/re2.bundle +0 -0
- data/lib/3.1/re2.bundle +0 -0
- data/lib/3.2/re2.bundle +0 -0
- data/lib/re2/version.rb +1 -1
- data/spec/re2/match_data_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f857ab018afa6ffc22097316b2ab22221f3cfdb08b7760c5f26a412867d4ad8
|
4
|
+
data.tar.gz: 4cd31eb62dbf285b9ed9ad4e51fa4544cf3e92ad83660824608e951232936083
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: afd8e34c9addd078ccb1c722fb0d08d94ea894762fbacb07d17d4f7ab1bc7e299ee5c935b14219e56701c0cfe7ab3a1158b0a0c2f657e11b52e1049332156745
|
7
|
+
data.tar.gz: ca4dd56e046d16271df5f3419e33899a4edf736d68e4caeccf10384df46e2a8b300a4792ea5fea9f13f557cebc9f6a7252708e5e2cb5e4dc429601ff99245d27
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
|
|
5
5
|
backtracking regular expression engines like those used in PCRE, Perl, and
|
6
6
|
Python".
|
7
7
|
|
8
|
-
**Current version:** 2.
|
8
|
+
**Current version:** 2.4.0
|
9
9
|
**Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
|
10
10
|
**Bundled RE2 version:** libre2.11 (2023-11-01)
|
11
11
|
**Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
|
@@ -39,8 +39,9 @@ If you are using Debian, you can install the [libre2-dev][] package like so:
|
|
39
39
|
|
40
40
|
$ sudo apt-get install libre2-dev
|
41
41
|
|
42
|
-
Recent versions of RE2 require a compiler with
|
43
|
-
[clang](http://clang.llvm.org/) 3.4 or
|
42
|
+
Recent versions of RE2 require [CMake](https://cmake.org) and a compiler with
|
43
|
+
C++14 support such as [clang](http://clang.llvm.org/) 3.4 or
|
44
|
+
[gcc](https://gcc.gnu.org/) 5.
|
44
45
|
|
45
46
|
If you are using a packaged Ruby distribution, make sure you also have the
|
46
47
|
Ruby header files installed such as those provided by the [ruby-dev][] package
|
@@ -269,6 +270,9 @@ Contributions
|
|
269
270
|
* Thanks to [Peter Zhu](https://github.com/peterzhu2118) for
|
270
271
|
[ruby_memcheck](https://github.com/Shopify/ruby_memcheck) and helping find
|
271
272
|
the memory leaks fixed in 2.1.3.
|
273
|
+
* Thanks to [Jean Boussier](https://github.com/byroot) for contributing the
|
274
|
+
switch to Ruby's `TypedData` API and the resulting garbage collection
|
275
|
+
improvements in 2.4.0.
|
272
276
|
|
273
277
|
Contact
|
274
278
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -128,6 +128,7 @@ def build_extension(static_p = false)
|
|
128
128
|
|
129
129
|
have_library("stdc++")
|
130
130
|
have_header("stdint.h")
|
131
|
+
have_func("rb_gc_mark_movable") # introduced in Ruby 2.7
|
131
132
|
|
132
133
|
if !static_p and !have_library("re2")
|
133
134
|
abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
|
data/ext/re2/re2.cc
CHANGED
@@ -122,49 +122,145 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
122
122
|
}
|
123
123
|
}
|
124
124
|
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
/* For compatibility with ruby < 2.7 */
|
126
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
127
|
+
#define re2_compact_callback(x) .dcompact = (x),
|
128
|
+
#else
|
129
|
+
#define rb_gc_mark_movable(x) rb_gc_mark(x)
|
130
|
+
#define re2_compact_callback(x)
|
131
|
+
#endif
|
132
|
+
|
133
|
+
static void re2_matchdata_mark(void *data) {
|
134
|
+
re2_matchdata *self = (re2_matchdata *)data;
|
135
|
+
rb_gc_mark_movable(self->regexp);
|
136
|
+
rb_gc_mark_movable(self->text);
|
128
137
|
}
|
129
138
|
|
130
|
-
|
139
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
140
|
+
static void re2_matchdata_update_references(void *data) {
|
141
|
+
re2_matchdata *self = (re2_matchdata *)data;
|
142
|
+
self->regexp = rb_gc_location(self->regexp);
|
143
|
+
self->text = rb_gc_location(self->text);
|
144
|
+
}
|
145
|
+
#endif
|
146
|
+
|
147
|
+
static void re2_matchdata_free(void *data) {
|
148
|
+
re2_matchdata *self = (re2_matchdata *)data;
|
131
149
|
if (self->matches) {
|
132
150
|
delete[] self->matches;
|
133
151
|
}
|
134
|
-
|
152
|
+
xfree(self);
|
153
|
+
}
|
154
|
+
|
155
|
+
static size_t re2_matchdata_memsize(const void *data) {
|
156
|
+
const re2_matchdata *self = (const re2_matchdata *)data;
|
157
|
+
size_t size = sizeof(re2_matchdata);
|
158
|
+
if (self->matches) {
|
159
|
+
size += sizeof(self->matches) * self->number_of_matches;
|
160
|
+
}
|
161
|
+
|
162
|
+
return size;
|
135
163
|
}
|
136
164
|
|
137
|
-
static
|
138
|
-
|
139
|
-
|
165
|
+
static const rb_data_type_t re2_matchdata_data_type = {
|
166
|
+
.wrap_struct_name = "RE2::MatchData",
|
167
|
+
.function = {
|
168
|
+
.dmark = re2_matchdata_mark,
|
169
|
+
.dfree = re2_matchdata_free,
|
170
|
+
.dsize = re2_matchdata_memsize,
|
171
|
+
re2_compact_callback(re2_matchdata_update_references)
|
172
|
+
},
|
173
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
174
|
+
// macro to update VALUE references, as to trigger write barriers.
|
175
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
176
|
+
};
|
177
|
+
|
178
|
+
static void re2_scanner_mark(void *data) {
|
179
|
+
re2_scanner *self = (re2_scanner *)data;
|
180
|
+
rb_gc_mark_movable(self->regexp);
|
181
|
+
rb_gc_mark_movable(self->text);
|
140
182
|
}
|
141
183
|
|
142
|
-
|
184
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
185
|
+
static void re2_scanner_update_references(void *data) {
|
186
|
+
re2_scanner *self = (re2_scanner *)data;
|
187
|
+
self->regexp = rb_gc_location(self->regexp);
|
188
|
+
self->text = rb_gc_location(self->text);
|
189
|
+
}
|
190
|
+
#endif
|
191
|
+
|
192
|
+
static void re2_scanner_free(void *data) {
|
193
|
+
re2_scanner *self = (re2_scanner *)data;
|
143
194
|
if (self->input) {
|
144
195
|
delete self->input;
|
145
196
|
}
|
146
|
-
|
197
|
+
xfree(self);
|
147
198
|
}
|
148
199
|
|
149
|
-
static void
|
200
|
+
static size_t re2_scanner_memsize(const void *data) {
|
201
|
+
const re2_scanner *self = (const re2_scanner *)data;
|
202
|
+
size_t size = sizeof(re2_scanner);
|
203
|
+
if (self->input) {
|
204
|
+
size += sizeof(self->input);
|
205
|
+
}
|
206
|
+
|
207
|
+
return size;
|
208
|
+
}
|
209
|
+
|
210
|
+
static const rb_data_type_t re2_scanner_data_type = {
|
211
|
+
.wrap_struct_name = "RE2::Scanner",
|
212
|
+
.function = {
|
213
|
+
.dmark = re2_scanner_mark,
|
214
|
+
.dfree = re2_scanner_free,
|
215
|
+
.dsize = re2_scanner_memsize,
|
216
|
+
re2_compact_callback(re2_scanner_update_references)
|
217
|
+
},
|
218
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
219
|
+
// macro to update VALUE references, as to trigger write barriers.
|
220
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
221
|
+
};
|
222
|
+
|
223
|
+
static void re2_regexp_free(void *data) {
|
224
|
+
re2_pattern *self = (re2_pattern *)data;
|
150
225
|
if (self->pattern) {
|
151
226
|
delete self->pattern;
|
152
227
|
}
|
153
|
-
|
228
|
+
xfree(self);
|
154
229
|
}
|
155
230
|
|
231
|
+
static size_t re2_regexp_memsize(const void *data) {
|
232
|
+
const re2_pattern *self = (const re2_pattern *)data;
|
233
|
+
size_t size = sizeof(re2_pattern);
|
234
|
+
if (self->pattern) {
|
235
|
+
size += sizeof(self->pattern);
|
236
|
+
}
|
237
|
+
|
238
|
+
return size;
|
239
|
+
}
|
240
|
+
|
241
|
+
static const rb_data_type_t re2_regexp_data_type = {
|
242
|
+
.wrap_struct_name = "RE2::Regexp",
|
243
|
+
.function = {
|
244
|
+
.dmark = NULL,
|
245
|
+
.dfree = re2_regexp_free,
|
246
|
+
.dsize = re2_regexp_memsize,
|
247
|
+
},
|
248
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
249
|
+
// macro to update VALUE references, as to trigger write barriers.
|
250
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
251
|
+
};
|
252
|
+
|
156
253
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
157
254
|
re2_matchdata *m;
|
158
255
|
|
159
|
-
return
|
160
|
-
|
256
|
+
return TypedData_Make_Struct(klass, re2_matchdata, &re2_matchdata_data_type,
|
257
|
+
m);
|
161
258
|
}
|
162
259
|
|
163
260
|
static VALUE re2_scanner_allocate(VALUE klass) {
|
164
261
|
re2_scanner *c;
|
165
262
|
|
166
|
-
return
|
167
|
-
re2_scanner_free, c);
|
263
|
+
return TypedData_Make_Struct(klass, re2_scanner, &re2_scanner_data_type, c);
|
168
264
|
}
|
169
265
|
|
170
266
|
/*
|
@@ -177,7 +273,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
177
273
|
*/
|
178
274
|
static VALUE re2_matchdata_string(const VALUE self) {
|
179
275
|
re2_matchdata *m;
|
180
|
-
|
276
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
181
277
|
|
182
278
|
return m->text;
|
183
279
|
}
|
@@ -192,7 +288,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
192
288
|
*/
|
193
289
|
static VALUE re2_scanner_string(const VALUE self) {
|
194
290
|
re2_scanner *c;
|
195
|
-
|
291
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
196
292
|
|
197
293
|
return c->text;
|
198
294
|
}
|
@@ -207,7 +303,7 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
207
303
|
*/
|
208
304
|
static VALUE re2_scanner_eof(const VALUE self) {
|
209
305
|
re2_scanner *c;
|
210
|
-
|
306
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
211
307
|
|
212
308
|
return BOOL2RUBY(c->eof);
|
213
309
|
}
|
@@ -225,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
225
321
|
*/
|
226
322
|
static VALUE re2_scanner_rewind(VALUE self) {
|
227
323
|
re2_scanner *c;
|
228
|
-
|
324
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
229
325
|
|
230
326
|
delete c->input;
|
231
327
|
c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(c->text));
|
@@ -252,8 +348,8 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
252
348
|
re2_pattern *p;
|
253
349
|
re2_scanner *c;
|
254
350
|
|
255
|
-
|
256
|
-
|
351
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
352
|
+
TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
|
257
353
|
|
258
354
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
259
355
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
@@ -308,8 +404,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
308
404
|
re2_matchdata *m;
|
309
405
|
re2_pattern *p;
|
310
406
|
|
311
|
-
|
312
|
-
|
407
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
408
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
313
409
|
|
314
410
|
int id;
|
315
411
|
|
@@ -349,7 +445,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
349
445
|
*/
|
350
446
|
static VALUE re2_matchdata_size(const VALUE self) {
|
351
447
|
re2_matchdata *m;
|
352
|
-
|
448
|
+
|
449
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
353
450
|
|
354
451
|
return INT2FIX(m->number_of_matches);
|
355
452
|
}
|
@@ -367,7 +464,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
367
464
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
368
465
|
re2_matchdata *m;
|
369
466
|
|
370
|
-
|
467
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
371
468
|
|
372
469
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
373
470
|
if (match == NULL) {
|
@@ -392,7 +489,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
392
489
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
393
490
|
re2_matchdata *m;
|
394
491
|
|
395
|
-
|
492
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
396
493
|
|
397
494
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
398
495
|
if (match == NULL) {
|
@@ -414,7 +511,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
414
511
|
*/
|
415
512
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
416
513
|
re2_matchdata *m;
|
417
|
-
|
514
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
418
515
|
|
419
516
|
return m->regexp;
|
420
517
|
}
|
@@ -429,7 +526,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
429
526
|
*/
|
430
527
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
431
528
|
re2_scanner *c;
|
432
|
-
|
529
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
433
530
|
|
434
531
|
return c->regexp;
|
435
532
|
}
|
@@ -437,7 +534,7 @@ static VALUE re2_scanner_regexp(const VALUE self) {
|
|
437
534
|
static VALUE re2_regexp_allocate(VALUE klass) {
|
438
535
|
re2_pattern *p;
|
439
536
|
|
440
|
-
return
|
537
|
+
return TypedData_Make_Struct(klass, re2_pattern, &re2_regexp_data_type, p);
|
441
538
|
}
|
442
539
|
|
443
540
|
/*
|
@@ -456,8 +553,8 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
456
553
|
re2_matchdata *m;
|
457
554
|
re2_pattern *p;
|
458
555
|
|
459
|
-
|
460
|
-
|
556
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
557
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
461
558
|
|
462
559
|
VALUE array = rb_ary_new2(m->number_of_matches);
|
463
560
|
for (int i = 0; i < m->number_of_matches; ++i) {
|
@@ -478,8 +575,8 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
478
575
|
re2_matchdata *m;
|
479
576
|
re2_pattern *p;
|
480
577
|
|
481
|
-
|
482
|
-
|
578
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
579
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
483
580
|
|
484
581
|
if (nth < 0 || nth >= m->number_of_matches) {
|
485
582
|
return Qnil;
|
@@ -499,8 +596,8 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
499
596
|
re2_matchdata *m;
|
500
597
|
re2_pattern *p;
|
501
598
|
|
502
|
-
|
503
|
-
|
599
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
600
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
504
601
|
|
505
602
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
506
603
|
std::map<std::string, int>::const_iterator search = groups.find(name);
|
@@ -599,8 +696,8 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
599
696
|
re2_matchdata *m;
|
600
697
|
re2_pattern *p;
|
601
698
|
|
602
|
-
|
603
|
-
|
699
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
700
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
604
701
|
|
605
702
|
std::ostringstream output;
|
606
703
|
output << "#<RE2::MatchData";
|
@@ -651,8 +748,8 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
651
748
|
re2_matchdata *m;
|
652
749
|
re2_pattern *p;
|
653
750
|
|
654
|
-
|
655
|
-
|
751
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
752
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
656
753
|
|
657
754
|
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
658
755
|
for (int i = 1; i < m->number_of_matches; ++i) {
|
@@ -701,8 +798,8 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
701
798
|
re2_matchdata *m;
|
702
799
|
re2_pattern *p;
|
703
800
|
|
704
|
-
|
705
|
-
|
801
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
802
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
706
803
|
|
707
804
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
708
805
|
VALUE capturing_groups = rb_hash_new();
|
@@ -790,7 +887,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
790
887
|
/* Ensure pattern is a string. */
|
791
888
|
StringValue(pattern);
|
792
889
|
|
793
|
-
|
890
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
794
891
|
|
795
892
|
if (RTEST(options)) {
|
796
893
|
RE2::Options re2_options;
|
@@ -823,7 +920,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
823
920
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
824
921
|
re2_pattern *p;
|
825
922
|
|
826
|
-
|
923
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
827
924
|
|
828
925
|
std::ostringstream output;
|
829
926
|
|
@@ -847,7 +944,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
847
944
|
*/
|
848
945
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
849
946
|
re2_pattern *p;
|
850
|
-
|
947
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
851
948
|
|
852
949
|
return encoded_str_new(p->pattern->pattern().data(),
|
853
950
|
p->pattern->pattern().size(),
|
@@ -865,7 +962,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
865
962
|
*/
|
866
963
|
static VALUE re2_regexp_ok(const VALUE self) {
|
867
964
|
re2_pattern *p;
|
868
|
-
|
965
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
869
966
|
|
870
967
|
return BOOL2RUBY(p->pattern->ok());
|
871
968
|
}
|
@@ -881,7 +978,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
881
978
|
*/
|
882
979
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
883
980
|
re2_pattern *p;
|
884
|
-
|
981
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
885
982
|
|
886
983
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
887
984
|
}
|
@@ -897,7 +994,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
897
994
|
*/
|
898
995
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
899
996
|
re2_pattern *p;
|
900
|
-
|
997
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
901
998
|
|
902
999
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
903
1000
|
}
|
@@ -913,7 +1010,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
913
1010
|
*/
|
914
1011
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
915
1012
|
re2_pattern *p;
|
916
|
-
|
1013
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
917
1014
|
|
918
1015
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
919
1016
|
}
|
@@ -929,7 +1026,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
929
1026
|
*/
|
930
1027
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
931
1028
|
re2_pattern *p;
|
932
|
-
|
1029
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
933
1030
|
|
934
1031
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
935
1032
|
}
|
@@ -945,7 +1042,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
945
1042
|
*/
|
946
1043
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
947
1044
|
re2_pattern *p;
|
948
|
-
|
1045
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
949
1046
|
|
950
1047
|
return INT2FIX(p->pattern->options().max_mem());
|
951
1048
|
}
|
@@ -961,7 +1058,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
961
1058
|
*/
|
962
1059
|
static VALUE re2_regexp_literal(const VALUE self) {
|
963
1060
|
re2_pattern *p;
|
964
|
-
|
1061
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
965
1062
|
|
966
1063
|
return BOOL2RUBY(p->pattern->options().literal());
|
967
1064
|
}
|
@@ -977,7 +1074,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
977
1074
|
*/
|
978
1075
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
979
1076
|
re2_pattern *p;
|
980
|
-
|
1077
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
981
1078
|
|
982
1079
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
983
1080
|
}
|
@@ -993,7 +1090,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
993
1090
|
*/
|
994
1091
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
995
1092
|
re2_pattern *p;
|
996
|
-
|
1093
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
997
1094
|
|
998
1095
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
999
1096
|
}
|
@@ -1023,7 +1120,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
|
1023
1120
|
*/
|
1024
1121
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1025
1122
|
re2_pattern *p;
|
1026
|
-
|
1123
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1027
1124
|
|
1028
1125
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
1029
1126
|
}
|
@@ -1039,7 +1136,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1039
1136
|
*/
|
1040
1137
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1041
1138
|
re2_pattern *p;
|
1042
|
-
|
1139
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1043
1140
|
|
1044
1141
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
1045
1142
|
}
|
@@ -1055,7 +1152,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1055
1152
|
*/
|
1056
1153
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
1057
1154
|
re2_pattern *p;
|
1058
|
-
|
1155
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1059
1156
|
|
1060
1157
|
return BOOL2RUBY(p->pattern->options().one_line());
|
1061
1158
|
}
|
@@ -1068,7 +1165,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1068
1165
|
*/
|
1069
1166
|
static VALUE re2_regexp_error(const VALUE self) {
|
1070
1167
|
re2_pattern *p;
|
1071
|
-
|
1168
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1072
1169
|
|
1073
1170
|
if (p->pattern->ok()) {
|
1074
1171
|
return Qnil;
|
@@ -1089,7 +1186,7 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
1089
1186
|
*/
|
1090
1187
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1091
1188
|
re2_pattern *p;
|
1092
|
-
|
1189
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1093
1190
|
|
1094
1191
|
if (p->pattern->ok()) {
|
1095
1192
|
return Qnil;
|
@@ -1109,7 +1206,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
|
|
1109
1206
|
*/
|
1110
1207
|
static VALUE re2_regexp_program_size(const VALUE self) {
|
1111
1208
|
re2_pattern *p;
|
1112
|
-
|
1209
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1113
1210
|
|
1114
1211
|
return INT2FIX(p->pattern->ProgramSize());
|
1115
1212
|
}
|
@@ -1123,7 +1220,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1123
1220
|
static VALUE re2_regexp_options(const VALUE self) {
|
1124
1221
|
re2_pattern *p;
|
1125
1222
|
|
1126
|
-
|
1223
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1127
1224
|
VALUE options = rb_hash_new();
|
1128
1225
|
|
1129
1226
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
@@ -1174,7 +1271,7 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
1174
1271
|
*/
|
1175
1272
|
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
1176
1273
|
re2_pattern *p;
|
1177
|
-
|
1274
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1178
1275
|
|
1179
1276
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
1180
1277
|
}
|
@@ -1191,7 +1288,7 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1191
1288
|
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
1192
1289
|
re2_pattern *p;
|
1193
1290
|
|
1194
|
-
|
1291
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1195
1292
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
1196
1293
|
VALUE capturing_groups = rb_hash_new();
|
1197
1294
|
|
@@ -1267,7 +1364,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1267
1364
|
/* Ensure text is a string. */
|
1268
1365
|
StringValue(text);
|
1269
1366
|
|
1270
|
-
|
1367
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1271
1368
|
|
1272
1369
|
int n;
|
1273
1370
|
|
@@ -1299,11 +1396,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1299
1396
|
n += 1;
|
1300
1397
|
|
1301
1398
|
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1302
|
-
|
1399
|
+
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
|
1303
1400
|
m->matches = new(std::nothrow) re2::StringPiece[n];
|
1304
|
-
m->regexp
|
1305
|
-
|
1306
|
-
|
1401
|
+
RB_OBJ_WRITE(matchdata, &m->regexp, self);
|
1402
|
+
if (!RTEST(rb_obj_frozen_p(text))) {
|
1403
|
+
text = rb_str_freeze(rb_str_dup(text));
|
1404
|
+
}
|
1405
|
+
RB_OBJ_WRITE(matchdata, &m->text, text);
|
1307
1406
|
|
1308
1407
|
if (m->matches == 0) {
|
1309
1408
|
rb_raise(rb_eNoMemError,
|
@@ -1352,13 +1451,13 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1352
1451
|
re2_pattern *p;
|
1353
1452
|
re2_scanner *c;
|
1354
1453
|
|
1355
|
-
|
1454
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1356
1455
|
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1357
|
-
|
1456
|
+
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
1358
1457
|
|
1359
1458
|
c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(text));
|
1360
|
-
c->regexp
|
1361
|
-
c->text
|
1459
|
+
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
1460
|
+
RB_OBJ_WRITE(scanner, &c->text, text);
|
1362
1461
|
|
1363
1462
|
if (p->pattern->ok()) {
|
1364
1463
|
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
|
@@ -1402,7 +1501,7 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
1402
1501
|
|
1403
1502
|
/* Do the replacement. */
|
1404
1503
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1405
|
-
|
1504
|
+
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1406
1505
|
RE2::Replace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
|
1407
1506
|
|
1408
1507
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
@@ -1446,7 +1545,7 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
1446
1545
|
|
1447
1546
|
/* Do the replacement. */
|
1448
1547
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1449
|
-
|
1548
|
+
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1450
1549
|
RE2::GlobalReplace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
|
1451
1550
|
|
1452
1551
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
@@ -1480,16 +1579,39 @@ static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
|
1480
1579
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1481
1580
|
}
|
1482
1581
|
|
1483
|
-
static void re2_set_free(
|
1582
|
+
static void re2_set_free(void *data) {
|
1583
|
+
re2_set *self = (re2_set *)data;
|
1484
1584
|
if (self->set) {
|
1485
1585
|
delete self->set;
|
1486
1586
|
}
|
1487
|
-
|
1587
|
+
xfree(self);
|
1588
|
+
}
|
1589
|
+
|
1590
|
+
static size_t re2_set_memsize(const void *data) {
|
1591
|
+
const re2_set *self = (const re2_set *)data;
|
1592
|
+
size_t size = sizeof(re2_set);
|
1593
|
+
if (self->set) {
|
1594
|
+
size += sizeof(self->set);
|
1595
|
+
}
|
1596
|
+
|
1597
|
+
return size;
|
1488
1598
|
}
|
1489
1599
|
|
1600
|
+
static const rb_data_type_t re2_set_data_type = {
|
1601
|
+
.wrap_struct_name = "RE2::Set",
|
1602
|
+
.function = {
|
1603
|
+
.dmark = NULL,
|
1604
|
+
.dfree = re2_set_free,
|
1605
|
+
.dsize = re2_set_memsize,
|
1606
|
+
},
|
1607
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
1608
|
+
// macro to update VALUE references, as to trigger write barriers.
|
1609
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
1610
|
+
};
|
1611
|
+
|
1490
1612
|
static VALUE re2_set_allocate(VALUE klass) {
|
1491
1613
|
re2_set *s;
|
1492
|
-
VALUE result =
|
1614
|
+
VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
|
1493
1615
|
|
1494
1616
|
return result;
|
1495
1617
|
}
|
@@ -1540,7 +1662,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1540
1662
|
re2_set *s;
|
1541
1663
|
|
1542
1664
|
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1543
|
-
|
1665
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1544
1666
|
|
1545
1667
|
RE2::Anchor re2_anchor = RE2::UNANCHORED;
|
1546
1668
|
|
@@ -1588,7 +1710,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1588
1710
|
StringValue(pattern);
|
1589
1711
|
|
1590
1712
|
re2_set *s;
|
1591
|
-
|
1713
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1592
1714
|
|
1593
1715
|
/* To prevent the memory of the err string leaking when we call rb_raise,
|
1594
1716
|
* take a copy of it and let it go out of scope.
|
@@ -1621,7 +1743,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1621
1743
|
*/
|
1622
1744
|
static VALUE re2_set_compile(VALUE self) {
|
1623
1745
|
re2_set *s;
|
1624
|
-
|
1746
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1625
1747
|
|
1626
1748
|
return BOOL2RUBY(s->set->Compile());
|
1627
1749
|
}
|
@@ -1688,7 +1810,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1688
1810
|
|
1689
1811
|
StringValue(str);
|
1690
1812
|
re2_set *s;
|
1691
|
-
|
1813
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1692
1814
|
|
1693
1815
|
if (RTEST(options)) {
|
1694
1816
|
Check_Type(options, T_HASH);
|
data/lib/2.6/re2.bundle
CHANGED
Binary file
|
data/lib/2.7/re2.bundle
CHANGED
Binary file
|
data/lib/3.0/re2.bundle
CHANGED
Binary file
|
data/lib/3.1/re2.bundle
CHANGED
Binary file
|
data/lib/3.2/re2.bundle
CHANGED
Binary file
|
data/lib/re2/version.rb
CHANGED
data/spec/re2/match_data_spec.rb
CHANGED
@@ -129,6 +129,12 @@ RSpec.describe RE2::MatchData do
|
|
129
129
|
re = RE2::Regexp.new('(\D+)').match("bob")
|
130
130
|
expect(re.string).to be_frozen
|
131
131
|
end
|
132
|
+
|
133
|
+
it "does not copy the string if it was already frozen" do
|
134
|
+
string = "bob".freeze
|
135
|
+
re = RE2::Regexp.new('(\D+)').match(string)
|
136
|
+
expect(re.string).to equal(string)
|
137
|
+
end
|
132
138
|
end
|
133
139
|
|
134
140
|
describe "#size" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.4.0
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake-compiler
|