re2 2.3.0 → 2.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -8
- data/ext/re2/extconf.rb +1 -0
- data/ext/re2/re2.cc +217 -93
- data/lib/re2/version.rb +1 -1
- data/re2.gemspec +1 -1
- data/spec/re2/match_data_spec.rb +15 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef5426f8bbd4fab32b2e41296fff7e523e571b486b90e71120cf31830eadaee9
|
4
|
+
data.tar.gz: 8a81a4a26fd7d315ba62eea762438b7b63a4a6cf5373d94b9be6af2c66a7346b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7790c334317a0b2e94d0f6a4aed0bc4644385e7ae89de506ac5cea2ae7b1df85059796a586a191439728cbebcdc64bfd11b6edfa65cd4303c5e35497e342b1b
|
7
|
+
data.tar.gz: 5fbcc85713cf53c0b1cee1ecda0206df28f486f0b8436672db8ecf9635ccff41039da0cff1754035be33c951d45d79a9598445368e5206013cad8b425efc16c3
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
|
|
5
5
|
backtracking regular expression engines like those used in PCRE, Perl, and
|
6
6
|
Python".
|
7
7
|
|
8
|
-
**Current version:** 2.
|
8
|
+
**Current version:** 2.4.2
|
9
9
|
**Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
|
10
10
|
**Bundled RE2 version:** libre2.11 (2023-11-01)
|
11
11
|
**Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
|
@@ -39,8 +39,9 @@ If you are using Debian, you can install the [libre2-dev][] package like so:
|
|
39
39
|
|
40
40
|
$ sudo apt-get install libre2-dev
|
41
41
|
|
42
|
-
Recent versions of RE2 require a compiler with
|
43
|
-
[clang](http://clang.llvm.org/) 3.4 or
|
42
|
+
Recent versions of RE2 require [CMake](https://cmake.org) and a compiler with
|
43
|
+
C++14 support such as [clang](http://clang.llvm.org/) 3.4 or
|
44
|
+
[gcc](https://gcc.gnu.org/) 5.
|
44
45
|
|
45
46
|
If you are using a packaged Ruby distribution, make sure you also have the
|
46
47
|
Ruby header files installed such as those provided by the [ruby-dev][] package
|
@@ -255,13 +256,17 @@ Contributions
|
|
255
256
|
-------------
|
256
257
|
|
257
258
|
* Thanks to [Jason Woods](https://github.com/driskell) who contributed the
|
258
|
-
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end
|
259
|
-
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed
|
260
|
-
|
259
|
+
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`.
|
260
|
+
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed
|
261
|
+
C++11 support.
|
262
|
+
* Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty
|
263
|
+
patterns and `RE2::Regexp#scan`, contributing support for libre2.11
|
264
|
+
(2023-07-01) and for vendoring RE2 and abseil and compiling native gems in
|
265
|
+
2.0.
|
261
266
|
* Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting
|
262
|
-
the deprecation and removal of the `utf8` encoding option in RE2
|
267
|
+
the deprecation and removal of the `utf8` encoding option in RE2.
|
263
268
|
* Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
|
264
|
-
using `RE2::Scanner#scan` with an invalid regular expression
|
269
|
+
using `RE2::Scanner#scan` with an invalid regular expression.
|
265
270
|
* Thanks to [Pritam Baral](https://github.com/pritambaral) for contributing the
|
266
271
|
initial support for `RE2::Set`.
|
267
272
|
* Thanks to [Mike Dalessio](https://github.com/flavorjones) for reviewing the
|
@@ -269,6 +274,9 @@ Contributions
|
|
269
274
|
* Thanks to [Peter Zhu](https://github.com/peterzhu2118) for
|
270
275
|
[ruby_memcheck](https://github.com/Shopify/ruby_memcheck) and helping find
|
271
276
|
the memory leaks fixed in 2.1.3.
|
277
|
+
* Thanks to [Jean Boussier](https://github.com/byroot) for contributing the
|
278
|
+
switch to Ruby's `TypedData` API and the resulting garbage collection
|
279
|
+
improvements in 2.4.0.
|
272
280
|
|
273
281
|
Contact
|
274
282
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -128,6 +128,7 @@ def build_extension(static_p = false)
|
|
128
128
|
|
129
129
|
have_library("stdc++")
|
130
130
|
have_header("stdint.h")
|
131
|
+
have_func("rb_gc_mark_movable") # introduced in Ruby 2.7
|
131
132
|
|
132
133
|
if !static_p and !have_library("re2")
|
133
134
|
abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
|
data/ext/re2/re2.cc
CHANGED
@@ -122,49 +122,145 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
122
122
|
}
|
123
123
|
}
|
124
124
|
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
/* For compatibility with ruby < 2.7 */
|
126
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
127
|
+
#define re2_compact_callback(x) .dcompact = (x),
|
128
|
+
#else
|
129
|
+
#define rb_gc_mark_movable(x) rb_gc_mark(x)
|
130
|
+
#define re2_compact_callback(x)
|
131
|
+
#endif
|
132
|
+
|
133
|
+
static void re2_matchdata_mark(void *ptr) {
|
134
|
+
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
135
|
+
rb_gc_mark_movable(m->regexp);
|
136
|
+
rb_gc_mark_movable(m->text);
|
137
|
+
}
|
138
|
+
|
139
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
140
|
+
static void re2_matchdata_compact(void *ptr) {
|
141
|
+
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
142
|
+
m->regexp = rb_gc_location(m->regexp);
|
143
|
+
m->text = rb_gc_location(m->text);
|
144
|
+
}
|
145
|
+
#endif
|
146
|
+
|
147
|
+
static void re2_matchdata_free(void *ptr) {
|
148
|
+
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
149
|
+
if (m->matches) {
|
150
|
+
delete[] m->matches;
|
151
|
+
}
|
152
|
+
xfree(m);
|
128
153
|
}
|
129
154
|
|
130
|
-
static void
|
131
|
-
|
132
|
-
|
155
|
+
static size_t re2_matchdata_memsize(const void *ptr) {
|
156
|
+
const re2_matchdata *m = reinterpret_cast<const re2_matchdata *>(ptr);
|
157
|
+
size_t size = sizeof(*m);
|
158
|
+
if (m->matches) {
|
159
|
+
size += sizeof(*m->matches) * m->number_of_matches;
|
133
160
|
}
|
134
|
-
|
161
|
+
|
162
|
+
return size;
|
163
|
+
}
|
164
|
+
|
165
|
+
static const rb_data_type_t re2_matchdata_data_type = {
|
166
|
+
.wrap_struct_name = "RE2::MatchData",
|
167
|
+
.function = {
|
168
|
+
.dmark = re2_matchdata_mark,
|
169
|
+
.dfree = re2_matchdata_free,
|
170
|
+
.dsize = re2_matchdata_memsize,
|
171
|
+
re2_compact_callback(re2_matchdata_compact)
|
172
|
+
},
|
173
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
174
|
+
// macro to update VALUE references, as to trigger write barriers.
|
175
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
176
|
+
};
|
177
|
+
|
178
|
+
static void re2_scanner_mark(void *ptr) {
|
179
|
+
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
180
|
+
rb_gc_mark_movable(s->regexp);
|
181
|
+
rb_gc_mark_movable(s->text);
|
182
|
+
}
|
183
|
+
|
184
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
185
|
+
static void re2_scanner_compact(void *ptr) {
|
186
|
+
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
187
|
+
s->regexp = rb_gc_location(s->regexp);
|
188
|
+
s->text = rb_gc_location(s->text);
|
135
189
|
}
|
190
|
+
#endif
|
136
191
|
|
137
|
-
static void
|
138
|
-
|
139
|
-
|
192
|
+
static void re2_scanner_free(void *ptr) {
|
193
|
+
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
194
|
+
if (s->input) {
|
195
|
+
delete s->input;
|
196
|
+
}
|
197
|
+
xfree(s);
|
140
198
|
}
|
141
199
|
|
142
|
-
static void
|
143
|
-
|
144
|
-
|
200
|
+
static size_t re2_scanner_memsize(const void *ptr) {
|
201
|
+
const re2_scanner *s = reinterpret_cast<const re2_scanner *>(ptr);
|
202
|
+
size_t size = sizeof(*s);
|
203
|
+
if (s->input) {
|
204
|
+
size += sizeof(*s->input);
|
145
205
|
}
|
146
|
-
|
206
|
+
|
207
|
+
return size;
|
208
|
+
}
|
209
|
+
|
210
|
+
static const rb_data_type_t re2_scanner_data_type = {
|
211
|
+
.wrap_struct_name = "RE2::Scanner",
|
212
|
+
.function = {
|
213
|
+
.dmark = re2_scanner_mark,
|
214
|
+
.dfree = re2_scanner_free,
|
215
|
+
.dsize = re2_scanner_memsize,
|
216
|
+
re2_compact_callback(re2_scanner_compact)
|
217
|
+
},
|
218
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
219
|
+
// macro to update VALUE references, as to trigger write barriers.
|
220
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
221
|
+
};
|
222
|
+
|
223
|
+
static void re2_regexp_free(void *ptr) {
|
224
|
+
re2_pattern *p = reinterpret_cast<re2_pattern *>(ptr);
|
225
|
+
if (p->pattern) {
|
226
|
+
delete p->pattern;
|
227
|
+
}
|
228
|
+
xfree(p);
|
147
229
|
}
|
148
230
|
|
149
|
-
static void
|
150
|
-
|
151
|
-
|
231
|
+
static size_t re2_regexp_memsize(const void *ptr) {
|
232
|
+
const re2_pattern *p = reinterpret_cast<const re2_pattern *>(ptr);
|
233
|
+
size_t size = sizeof(*p);
|
234
|
+
if (p->pattern) {
|
235
|
+
size += sizeof(*p->pattern);
|
152
236
|
}
|
153
|
-
|
237
|
+
|
238
|
+
return size;
|
154
239
|
}
|
155
240
|
|
241
|
+
static const rb_data_type_t re2_regexp_data_type = {
|
242
|
+
.wrap_struct_name = "RE2::Regexp",
|
243
|
+
.function = {
|
244
|
+
.dmark = NULL,
|
245
|
+
.dfree = re2_regexp_free,
|
246
|
+
.dsize = re2_regexp_memsize,
|
247
|
+
},
|
248
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
249
|
+
// macro to update VALUE references, as to trigger write barriers.
|
250
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
251
|
+
};
|
252
|
+
|
156
253
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
157
254
|
re2_matchdata *m;
|
158
255
|
|
159
|
-
return
|
160
|
-
|
256
|
+
return TypedData_Make_Struct(klass, re2_matchdata, &re2_matchdata_data_type,
|
257
|
+
m);
|
161
258
|
}
|
162
259
|
|
163
260
|
static VALUE re2_scanner_allocate(VALUE klass) {
|
164
261
|
re2_scanner *c;
|
165
262
|
|
166
|
-
return
|
167
|
-
re2_scanner_free, c);
|
263
|
+
return TypedData_Make_Struct(klass, re2_scanner, &re2_scanner_data_type, c);
|
168
264
|
}
|
169
265
|
|
170
266
|
/*
|
@@ -177,7 +273,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
177
273
|
*/
|
178
274
|
static VALUE re2_matchdata_string(const VALUE self) {
|
179
275
|
re2_matchdata *m;
|
180
|
-
|
276
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
181
277
|
|
182
278
|
return m->text;
|
183
279
|
}
|
@@ -192,7 +288,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
192
288
|
*/
|
193
289
|
static VALUE re2_scanner_string(const VALUE self) {
|
194
290
|
re2_scanner *c;
|
195
|
-
|
291
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
196
292
|
|
197
293
|
return c->text;
|
198
294
|
}
|
@@ -207,7 +303,7 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
207
303
|
*/
|
208
304
|
static VALUE re2_scanner_eof(const VALUE self) {
|
209
305
|
re2_scanner *c;
|
210
|
-
|
306
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
211
307
|
|
212
308
|
return BOOL2RUBY(c->eof);
|
213
309
|
}
|
@@ -225,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
225
321
|
*/
|
226
322
|
static VALUE re2_scanner_rewind(VALUE self) {
|
227
323
|
re2_scanner *c;
|
228
|
-
|
324
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
229
325
|
|
230
326
|
delete c->input;
|
231
327
|
c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(c->text));
|
@@ -252,8 +348,8 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
252
348
|
re2_pattern *p;
|
253
349
|
re2_scanner *c;
|
254
350
|
|
255
|
-
|
256
|
-
|
351
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
352
|
+
TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
|
257
353
|
|
258
354
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
259
355
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
@@ -308,8 +404,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
308
404
|
re2_matchdata *m;
|
309
405
|
re2_pattern *p;
|
310
406
|
|
311
|
-
|
312
|
-
|
407
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
408
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
313
409
|
|
314
410
|
int id;
|
315
411
|
|
@@ -349,7 +445,8 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
349
445
|
*/
|
350
446
|
static VALUE re2_matchdata_size(const VALUE self) {
|
351
447
|
re2_matchdata *m;
|
352
|
-
|
448
|
+
|
449
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
353
450
|
|
354
451
|
return INT2FIX(m->number_of_matches);
|
355
452
|
}
|
@@ -367,7 +464,7 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
367
464
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
368
465
|
re2_matchdata *m;
|
369
466
|
|
370
|
-
|
467
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
371
468
|
|
372
469
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
373
470
|
if (match == NULL) {
|
@@ -392,7 +489,7 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
392
489
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
393
490
|
re2_matchdata *m;
|
394
491
|
|
395
|
-
|
492
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
396
493
|
|
397
494
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
398
495
|
if (match == NULL) {
|
@@ -414,7 +511,7 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
414
511
|
*/
|
415
512
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
416
513
|
re2_matchdata *m;
|
417
|
-
|
514
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
418
515
|
|
419
516
|
return m->regexp;
|
420
517
|
}
|
@@ -429,7 +526,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
429
526
|
*/
|
430
527
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
431
528
|
re2_scanner *c;
|
432
|
-
|
529
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
433
530
|
|
434
531
|
return c->regexp;
|
435
532
|
}
|
@@ -437,7 +534,7 @@ static VALUE re2_scanner_regexp(const VALUE self) {
|
|
437
534
|
static VALUE re2_regexp_allocate(VALUE klass) {
|
438
535
|
re2_pattern *p;
|
439
536
|
|
440
|
-
return
|
537
|
+
return TypedData_Make_Struct(klass, re2_pattern, &re2_regexp_data_type, p);
|
441
538
|
}
|
442
539
|
|
443
540
|
/*
|
@@ -456,8 +553,8 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
456
553
|
re2_matchdata *m;
|
457
554
|
re2_pattern *p;
|
458
555
|
|
459
|
-
|
460
|
-
|
556
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
557
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
461
558
|
|
462
559
|
VALUE array = rb_ary_new2(m->number_of_matches);
|
463
560
|
for (int i = 0; i < m->number_of_matches; ++i) {
|
@@ -478,8 +575,8 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
478
575
|
re2_matchdata *m;
|
479
576
|
re2_pattern *p;
|
480
577
|
|
481
|
-
|
482
|
-
|
578
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
579
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
483
580
|
|
484
581
|
if (nth < 0 || nth >= m->number_of_matches) {
|
485
582
|
return Qnil;
|
@@ -499,8 +596,8 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
499
596
|
re2_matchdata *m;
|
500
597
|
re2_pattern *p;
|
501
598
|
|
502
|
-
|
503
|
-
|
599
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
600
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
504
601
|
|
505
602
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
506
603
|
std::map<std::string, int>::const_iterator search = groups.find(name);
|
@@ -599,8 +696,8 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
599
696
|
re2_matchdata *m;
|
600
697
|
re2_pattern *p;
|
601
698
|
|
602
|
-
|
603
|
-
|
699
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
700
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
604
701
|
|
605
702
|
std::ostringstream output;
|
606
703
|
output << "#<RE2::MatchData";
|
@@ -651,8 +748,8 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
651
748
|
re2_matchdata *m;
|
652
749
|
re2_pattern *p;
|
653
750
|
|
654
|
-
|
655
|
-
|
751
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
752
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
656
753
|
|
657
754
|
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
658
755
|
for (int i = 1; i < m->number_of_matches; ++i) {
|
@@ -701,8 +798,8 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
701
798
|
re2_matchdata *m;
|
702
799
|
re2_pattern *p;
|
703
800
|
|
704
|
-
|
705
|
-
|
801
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
802
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
706
803
|
|
707
804
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
708
805
|
VALUE capturing_groups = rb_hash_new();
|
@@ -790,7 +887,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
790
887
|
/* Ensure pattern is a string. */
|
791
888
|
StringValue(pattern);
|
792
889
|
|
793
|
-
|
890
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
794
891
|
|
795
892
|
if (RTEST(options)) {
|
796
893
|
RE2::Options re2_options;
|
@@ -823,7 +920,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
823
920
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
824
921
|
re2_pattern *p;
|
825
922
|
|
826
|
-
|
923
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
827
924
|
|
828
925
|
std::ostringstream output;
|
829
926
|
|
@@ -847,7 +944,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
847
944
|
*/
|
848
945
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
849
946
|
re2_pattern *p;
|
850
|
-
|
947
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
851
948
|
|
852
949
|
return encoded_str_new(p->pattern->pattern().data(),
|
853
950
|
p->pattern->pattern().size(),
|
@@ -865,7 +962,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
865
962
|
*/
|
866
963
|
static VALUE re2_regexp_ok(const VALUE self) {
|
867
964
|
re2_pattern *p;
|
868
|
-
|
965
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
869
966
|
|
870
967
|
return BOOL2RUBY(p->pattern->ok());
|
871
968
|
}
|
@@ -881,7 +978,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
881
978
|
*/
|
882
979
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
883
980
|
re2_pattern *p;
|
884
|
-
|
981
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
885
982
|
|
886
983
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
887
984
|
}
|
@@ -897,7 +994,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
897
994
|
*/
|
898
995
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
899
996
|
re2_pattern *p;
|
900
|
-
|
997
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
901
998
|
|
902
999
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
903
1000
|
}
|
@@ -913,7 +1010,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
913
1010
|
*/
|
914
1011
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
915
1012
|
re2_pattern *p;
|
916
|
-
|
1013
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
917
1014
|
|
918
1015
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
919
1016
|
}
|
@@ -929,7 +1026,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
929
1026
|
*/
|
930
1027
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
931
1028
|
re2_pattern *p;
|
932
|
-
|
1029
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
933
1030
|
|
934
1031
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
935
1032
|
}
|
@@ -945,7 +1042,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
945
1042
|
*/
|
946
1043
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
947
1044
|
re2_pattern *p;
|
948
|
-
|
1045
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
949
1046
|
|
950
1047
|
return INT2FIX(p->pattern->options().max_mem());
|
951
1048
|
}
|
@@ -961,7 +1058,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
961
1058
|
*/
|
962
1059
|
static VALUE re2_regexp_literal(const VALUE self) {
|
963
1060
|
re2_pattern *p;
|
964
|
-
|
1061
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
965
1062
|
|
966
1063
|
return BOOL2RUBY(p->pattern->options().literal());
|
967
1064
|
}
|
@@ -977,7 +1074,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
977
1074
|
*/
|
978
1075
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
979
1076
|
re2_pattern *p;
|
980
|
-
|
1077
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
981
1078
|
|
982
1079
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
983
1080
|
}
|
@@ -993,7 +1090,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
993
1090
|
*/
|
994
1091
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
995
1092
|
re2_pattern *p;
|
996
|
-
|
1093
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
997
1094
|
|
998
1095
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
999
1096
|
}
|
@@ -1023,7 +1120,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
|
1023
1120
|
*/
|
1024
1121
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1025
1122
|
re2_pattern *p;
|
1026
|
-
|
1123
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1027
1124
|
|
1028
1125
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
1029
1126
|
}
|
@@ -1039,7 +1136,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1039
1136
|
*/
|
1040
1137
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1041
1138
|
re2_pattern *p;
|
1042
|
-
|
1139
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1043
1140
|
|
1044
1141
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
1045
1142
|
}
|
@@ -1055,7 +1152,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1055
1152
|
*/
|
1056
1153
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
1057
1154
|
re2_pattern *p;
|
1058
|
-
|
1155
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1059
1156
|
|
1060
1157
|
return BOOL2RUBY(p->pattern->options().one_line());
|
1061
1158
|
}
|
@@ -1068,7 +1165,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1068
1165
|
*/
|
1069
1166
|
static VALUE re2_regexp_error(const VALUE self) {
|
1070
1167
|
re2_pattern *p;
|
1071
|
-
|
1168
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1072
1169
|
|
1073
1170
|
if (p->pattern->ok()) {
|
1074
1171
|
return Qnil;
|
@@ -1089,7 +1186,7 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
1089
1186
|
*/
|
1090
1187
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1091
1188
|
re2_pattern *p;
|
1092
|
-
|
1189
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1093
1190
|
|
1094
1191
|
if (p->pattern->ok()) {
|
1095
1192
|
return Qnil;
|
@@ -1109,7 +1206,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
|
|
1109
1206
|
*/
|
1110
1207
|
static VALUE re2_regexp_program_size(const VALUE self) {
|
1111
1208
|
re2_pattern *p;
|
1112
|
-
|
1209
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1113
1210
|
|
1114
1211
|
return INT2FIX(p->pattern->ProgramSize());
|
1115
1212
|
}
|
@@ -1123,7 +1220,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1123
1220
|
static VALUE re2_regexp_options(const VALUE self) {
|
1124
1221
|
re2_pattern *p;
|
1125
1222
|
|
1126
|
-
|
1223
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1127
1224
|
VALUE options = rb_hash_new();
|
1128
1225
|
|
1129
1226
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
@@ -1174,7 +1271,7 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
1174
1271
|
*/
|
1175
1272
|
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
1176
1273
|
re2_pattern *p;
|
1177
|
-
|
1274
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1178
1275
|
|
1179
1276
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
1180
1277
|
}
|
@@ -1191,7 +1288,7 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1191
1288
|
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
1192
1289
|
re2_pattern *p;
|
1193
1290
|
|
1194
|
-
|
1291
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1195
1292
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
1196
1293
|
VALUE capturing_groups = rb_hash_new();
|
1197
1294
|
|
@@ -1267,7 +1364,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1267
1364
|
/* Ensure text is a string. */
|
1268
1365
|
StringValue(text);
|
1269
1366
|
|
1270
|
-
|
1367
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1271
1368
|
|
1272
1369
|
int n;
|
1273
1370
|
|
@@ -1299,11 +1396,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1299
1396
|
n += 1;
|
1300
1397
|
|
1301
1398
|
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1302
|
-
|
1399
|
+
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
|
1303
1400
|
m->matches = new(std::nothrow) re2::StringPiece[n];
|
1304
|
-
m->regexp
|
1305
|
-
|
1306
|
-
|
1401
|
+
RB_OBJ_WRITE(matchdata, &m->regexp, self);
|
1402
|
+
if (!RTEST(rb_obj_frozen_p(text))) {
|
1403
|
+
text = rb_str_freeze(rb_str_dup(text));
|
1404
|
+
}
|
1405
|
+
RB_OBJ_WRITE(matchdata, &m->text, text);
|
1307
1406
|
|
1308
1407
|
if (m->matches == 0) {
|
1309
1408
|
rb_raise(rb_eNoMemError,
|
@@ -1352,13 +1451,13 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1352
1451
|
re2_pattern *p;
|
1353
1452
|
re2_scanner *c;
|
1354
1453
|
|
1355
|
-
|
1454
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1356
1455
|
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1357
|
-
|
1456
|
+
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
1358
1457
|
|
1359
1458
|
c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(text));
|
1360
|
-
c->regexp
|
1361
|
-
c->text
|
1459
|
+
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
1460
|
+
RB_OBJ_WRITE(scanner, &c->text, text);
|
1362
1461
|
|
1363
1462
|
if (p->pattern->ok()) {
|
1364
1463
|
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
|
@@ -1402,7 +1501,7 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
1402
1501
|
|
1403
1502
|
/* Do the replacement. */
|
1404
1503
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1405
|
-
|
1504
|
+
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1406
1505
|
RE2::Replace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
|
1407
1506
|
|
1408
1507
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
@@ -1446,7 +1545,7 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
1446
1545
|
|
1447
1546
|
/* Do the replacement. */
|
1448
1547
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1449
|
-
|
1548
|
+
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1450
1549
|
RE2::GlobalReplace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
|
1451
1550
|
|
1452
1551
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
@@ -1480,16 +1579,39 @@ static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
|
1480
1579
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1481
1580
|
}
|
1482
1581
|
|
1483
|
-
static void re2_set_free(
|
1484
|
-
|
1485
|
-
|
1582
|
+
static void re2_set_free(void *ptr) {
|
1583
|
+
re2_set *s = reinterpret_cast<re2_set *>(ptr);
|
1584
|
+
if (s->set) {
|
1585
|
+
delete s->set;
|
1486
1586
|
}
|
1487
|
-
|
1587
|
+
xfree(s);
|
1488
1588
|
}
|
1489
1589
|
|
1590
|
+
static size_t re2_set_memsize(const void *ptr) {
|
1591
|
+
const re2_set *s = reinterpret_cast<const re2_set *>(ptr);
|
1592
|
+
size_t size = sizeof(*s);
|
1593
|
+
if (s->set) {
|
1594
|
+
size += sizeof(*s->set);
|
1595
|
+
}
|
1596
|
+
|
1597
|
+
return size;
|
1598
|
+
}
|
1599
|
+
|
1600
|
+
static const rb_data_type_t re2_set_data_type = {
|
1601
|
+
.wrap_struct_name = "RE2::Set",
|
1602
|
+
.function = {
|
1603
|
+
.dmark = NULL,
|
1604
|
+
.dfree = re2_set_free,
|
1605
|
+
.dsize = re2_set_memsize,
|
1606
|
+
},
|
1607
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
1608
|
+
// macro to update VALUE references, as to trigger write barriers.
|
1609
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
1610
|
+
};
|
1611
|
+
|
1490
1612
|
static VALUE re2_set_allocate(VALUE klass) {
|
1491
1613
|
re2_set *s;
|
1492
|
-
VALUE result =
|
1614
|
+
VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
|
1493
1615
|
|
1494
1616
|
return result;
|
1495
1617
|
}
|
@@ -1540,7 +1662,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1540
1662
|
re2_set *s;
|
1541
1663
|
|
1542
1664
|
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1543
|
-
|
1665
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1544
1666
|
|
1545
1667
|
RE2::Anchor re2_anchor = RE2::UNANCHORED;
|
1546
1668
|
|
@@ -1588,7 +1710,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1588
1710
|
StringValue(pattern);
|
1589
1711
|
|
1590
1712
|
re2_set *s;
|
1591
|
-
|
1713
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1592
1714
|
|
1593
1715
|
/* To prevent the memory of the err string leaking when we call rb_raise,
|
1594
1716
|
* take a copy of it and let it go out of scope.
|
@@ -1621,7 +1743,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1621
1743
|
*/
|
1622
1744
|
static VALUE re2_set_compile(VALUE self) {
|
1623
1745
|
re2_set *s;
|
1624
|
-
|
1746
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1625
1747
|
|
1626
1748
|
return BOOL2RUBY(s->set->Compile());
|
1627
1749
|
}
|
@@ -1688,7 +1810,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1688
1810
|
|
1689
1811
|
StringValue(str);
|
1690
1812
|
re2_set *s;
|
1691
|
-
|
1813
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1692
1814
|
|
1693
1815
|
if (RTEST(options)) {
|
1694
1816
|
Check_Type(options, T_HASH);
|
@@ -1755,12 +1877,14 @@ extern "C" void Init_re2(void) {
|
|
1755
1877
|
re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
|
1756
1878
|
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1757
1879
|
|
1758
|
-
rb_define_alloc_func(re2_cRegexp,
|
1880
|
+
rb_define_alloc_func(re2_cRegexp,
|
1881
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_regexp_allocate));
|
1759
1882
|
rb_define_alloc_func(re2_cMatchData,
|
1760
|
-
|
1883
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_matchdata_allocate));
|
1761
1884
|
rb_define_alloc_func(re2_cScanner,
|
1762
|
-
|
1763
|
-
rb_define_alloc_func(re2_cSet,
|
1885
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_scanner_allocate));
|
1886
|
+
rb_define_alloc_func(re2_cSet,
|
1887
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_set_allocate));
|
1764
1888
|
|
1765
1889
|
rb_define_method(re2_cMatchData, "string",
|
1766
1890
|
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
data/lib/re2/version.rb
CHANGED
data/re2.gemspec
CHANGED
@@ -11,7 +11,6 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.license = "BSD-3-Clause"
|
12
12
|
s.required_ruby_version = ">= 2.6.0"
|
13
13
|
s.files = [
|
14
|
-
".rspec",
|
15
14
|
"dependencies.yml",
|
16
15
|
"ext/re2/extconf.rb",
|
17
16
|
"ext/re2/re2.cc",
|
@@ -28,6 +27,7 @@ Gem::Specification.new do |s|
|
|
28
27
|
"re2.gemspec"
|
29
28
|
]
|
30
29
|
s.test_files = [
|
30
|
+
".rspec",
|
31
31
|
"spec/spec_helper.rb",
|
32
32
|
"spec/re2_spec.rb",
|
33
33
|
"spec/kernel_spec.rb",
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
require 'objspace'
|
3
|
+
|
2
4
|
RSpec.describe RE2::MatchData do
|
5
|
+
it "reports a larger consuming memory size when it has more matches" do
|
6
|
+
matches1 = RE2::Regexp.new('w(o)').match('woo')
|
7
|
+
matches2 = RE2::Regexp.new('w(o)(o)').match('woo')
|
8
|
+
|
9
|
+
expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2)
|
10
|
+
end
|
11
|
+
|
3
12
|
describe "#to_a" do
|
4
13
|
it "is populated with the match and capturing groups" do
|
5
14
|
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
@@ -129,6 +138,12 @@ RSpec.describe RE2::MatchData do
|
|
129
138
|
re = RE2::Regexp.new('(\D+)').match("bob")
|
130
139
|
expect(re.string).to be_frozen
|
131
140
|
end
|
141
|
+
|
142
|
+
it "does not copy the string if it was already frozen" do
|
143
|
+
string = "bob".freeze
|
144
|
+
re = RE2::Regexp.new('(\D+)').match(string)
|
145
|
+
expect(re.string).to equal(string)
|
146
|
+
end
|
132
147
|
end
|
133
148
|
|
134
149
|
describe "#size" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-11-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake-compiler
|
@@ -124,6 +124,7 @@ signing_key:
|
|
124
124
|
specification_version: 4
|
125
125
|
summary: Ruby bindings to RE2.
|
126
126
|
test_files:
|
127
|
+
- ".rspec"
|
127
128
|
- spec/spec_helper.rb
|
128
129
|
- spec/re2_spec.rb
|
129
130
|
- spec/kernel_spec.rb
|