re2 2.23.0 → 2.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +107 -4
- data/Rakefile +0 -4
- data/dependencies.yml +2 -2
- data/ext/re2/extconf.rb +4 -5
- data/ext/re2/re2.cc +962 -275
- data/lib/re2/string.rb +6 -6
- data/lib/re2/version.rb +1 -1
- data/ports/archives/20260107.1.tar.gz +0 -0
- data/spec/re2/match_data_spec.rb +495 -2
- data/spec/re2/regexp_spec.rb +324 -1
- data/spec/re2/scanner_spec.rb +134 -13
- data/spec/re2/set_spec.rb +75 -4
- data/spec/re2_spec.rb +217 -43
- metadata +3 -3
- data/ports/archives/20250814.1.tar.gz +0 -0
data/ext/re2/re2.cc
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* Released under the BSD Licence, please see LICENSE.txt
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
#include <
|
|
11
|
+
#include <cstdint>
|
|
12
12
|
|
|
13
13
|
#include <map>
|
|
14
14
|
#include <sstream>
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
#include <re2/set.h>
|
|
20
20
|
#include <ruby.h>
|
|
21
21
|
#include <ruby/encoding.h>
|
|
22
|
+
#include <ruby/thread.h>
|
|
22
23
|
|
|
23
24
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
|
24
25
|
|
|
@@ -43,6 +44,132 @@ typedef struct {
|
|
|
43
44
|
RE2::Set *set;
|
|
44
45
|
} re2_set;
|
|
45
46
|
|
|
47
|
+
struct nogvl_match_arg {
|
|
48
|
+
const RE2 *pattern;
|
|
49
|
+
re2::StringPiece text;
|
|
50
|
+
size_t startpos;
|
|
51
|
+
size_t endpos;
|
|
52
|
+
RE2::Anchor anchor;
|
|
53
|
+
re2::StringPiece *matches;
|
|
54
|
+
int n;
|
|
55
|
+
bool matched;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
static void *nogvl_match(void *ptr) {
|
|
59
|
+
auto *arg = static_cast<nogvl_match_arg *>(ptr);
|
|
60
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
61
|
+
arg->matched = arg->pattern->Match(
|
|
62
|
+
arg->text, arg->startpos, arg->endpos,
|
|
63
|
+
arg->anchor, arg->matches, arg->n);
|
|
64
|
+
#else
|
|
65
|
+
arg->matched = arg->pattern->Match(
|
|
66
|
+
arg->text, arg->startpos,
|
|
67
|
+
arg->anchor, arg->matches, arg->n);
|
|
68
|
+
#endif
|
|
69
|
+
return nullptr;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static bool re2_match_without_gvl(
|
|
73
|
+
const RE2 *pattern, VALUE text, size_t startpos, size_t endpos,
|
|
74
|
+
RE2::Anchor anchor, re2::StringPiece *matches, int n) {
|
|
75
|
+
nogvl_match_arg arg;
|
|
76
|
+
arg.pattern = pattern;
|
|
77
|
+
arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
|
|
78
|
+
arg.startpos = startpos;
|
|
79
|
+
arg.endpos = endpos;
|
|
80
|
+
arg.anchor = anchor;
|
|
81
|
+
arg.matches = matches;
|
|
82
|
+
arg.n = n;
|
|
83
|
+
arg.matched = false;
|
|
84
|
+
|
|
85
|
+
/* Abseil's synchronization primitives (SRWLOCK, SleepConditionVariableSRW)
|
|
86
|
+
* are incompatible with Ruby's Win32 Mutex-based GVL, causing
|
|
87
|
+
* WAIT_ABANDONED crashes when multiple threads match concurrently.
|
|
88
|
+
*/
|
|
89
|
+
#ifdef _WIN32
|
|
90
|
+
nogvl_match(&arg);
|
|
91
|
+
#else
|
|
92
|
+
/* No unblocking function is needed: RE2 matching is CPU-bound computation,
|
|
93
|
+
* not a blocking system call, so a signal cannot safely interrupt it.
|
|
94
|
+
*/
|
|
95
|
+
rb_thread_call_without_gvl(nogvl_match, &arg, NULL, NULL);
|
|
96
|
+
#endif
|
|
97
|
+
|
|
98
|
+
return arg.matched;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
struct nogvl_set_match_arg {
|
|
102
|
+
const RE2::Set *set;
|
|
103
|
+
re2::StringPiece text;
|
|
104
|
+
std::vector<int> *v;
|
|
105
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
106
|
+
RE2::Set::ErrorInfo *error_info;
|
|
107
|
+
#endif
|
|
108
|
+
bool matched;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
static void *nogvl_set_match(void *ptr) {
|
|
112
|
+
auto *arg = static_cast<nogvl_set_match_arg *>(ptr);
|
|
113
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
114
|
+
if (arg->error_info) {
|
|
115
|
+
arg->matched = arg->set->Match(arg->text, arg->v, arg->error_info);
|
|
116
|
+
} else {
|
|
117
|
+
arg->matched = arg->set->Match(arg->text, arg->v);
|
|
118
|
+
}
|
|
119
|
+
#else
|
|
120
|
+
arg->matched = arg->set->Match(arg->text, arg->v);
|
|
121
|
+
#endif
|
|
122
|
+
return nullptr;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
struct nogvl_replace_arg {
|
|
126
|
+
std::string *str;
|
|
127
|
+
const RE2 *pattern;
|
|
128
|
+
re2::StringPiece string_pattern;
|
|
129
|
+
re2::StringPiece rewrite;
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
static void *nogvl_replace(void *ptr) {
|
|
133
|
+
auto *arg = static_cast<nogvl_replace_arg *>(ptr);
|
|
134
|
+
if (arg->pattern) {
|
|
135
|
+
RE2::Replace(arg->str, *arg->pattern, arg->rewrite);
|
|
136
|
+
} else {
|
|
137
|
+
RE2::Replace(arg->str, arg->string_pattern, arg->rewrite);
|
|
138
|
+
}
|
|
139
|
+
return nullptr;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static void *nogvl_global_replace(void *ptr) {
|
|
143
|
+
auto *arg = static_cast<nogvl_replace_arg *>(ptr);
|
|
144
|
+
if (arg->pattern) {
|
|
145
|
+
RE2::GlobalReplace(arg->str, *arg->pattern, arg->rewrite);
|
|
146
|
+
} else {
|
|
147
|
+
RE2::GlobalReplace(arg->str, arg->string_pattern, arg->rewrite);
|
|
148
|
+
}
|
|
149
|
+
return nullptr;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
struct nogvl_extract_arg {
|
|
153
|
+
re2::StringPiece text;
|
|
154
|
+
const RE2 *pattern;
|
|
155
|
+
re2::StringPiece string_pattern;
|
|
156
|
+
re2::StringPiece rewrite;
|
|
157
|
+
std::string *out;
|
|
158
|
+
bool extracted;
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
static void *nogvl_extract(void *ptr) {
|
|
162
|
+
auto *arg = static_cast<nogvl_extract_arg *>(ptr);
|
|
163
|
+
if (arg->pattern) {
|
|
164
|
+
arg->extracted = RE2::Extract(arg->text, *arg->pattern,
|
|
165
|
+
arg->rewrite, arg->out);
|
|
166
|
+
} else {
|
|
167
|
+
arg->extracted = RE2::Extract(arg->text, RE2(arg->string_pattern),
|
|
168
|
+
arg->rewrite, arg->out);
|
|
169
|
+
}
|
|
170
|
+
return nullptr;
|
|
171
|
+
}
|
|
172
|
+
|
|
46
173
|
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
|
47
174
|
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
|
48
175
|
|
|
@@ -51,7 +178,7 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
|
|
51
178
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
|
52
179
|
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
|
53
180
|
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
|
54
|
-
id_submatches, id_startpos, id_endpos;
|
|
181
|
+
id_submatches, id_startpos, id_endpos, id_symbolize_names;
|
|
55
182
|
|
|
56
183
|
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
|
57
184
|
if (encoding == RE2::Options::EncodingUTF8) {
|
|
@@ -126,18 +253,22 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
|
126
253
|
}
|
|
127
254
|
|
|
128
255
|
static void re2_matchdata_mark(void *ptr) {
|
|
129
|
-
re2_matchdata *m =
|
|
256
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
130
257
|
rb_gc_mark_movable(m->regexp);
|
|
258
|
+
|
|
259
|
+
/* Text must not be movable because StringPiece matches hold pointers into
|
|
260
|
+
* its underlying buffer; moving the string would invalidate them.
|
|
261
|
+
*/
|
|
131
262
|
rb_gc_mark(m->text);
|
|
132
263
|
}
|
|
133
264
|
|
|
134
265
|
static void re2_matchdata_compact(void *ptr) {
|
|
135
|
-
re2_matchdata *m =
|
|
266
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
136
267
|
m->regexp = rb_gc_location(m->regexp);
|
|
137
268
|
}
|
|
138
269
|
|
|
139
270
|
static void re2_matchdata_free(void *ptr) {
|
|
140
|
-
re2_matchdata *m =
|
|
271
|
+
re2_matchdata *m = static_cast<re2_matchdata *>(ptr);
|
|
141
272
|
if (m->matches) {
|
|
142
273
|
delete[] m->matches;
|
|
143
274
|
}
|
|
@@ -145,7 +276,7 @@ static void re2_matchdata_free(void *ptr) {
|
|
|
145
276
|
}
|
|
146
277
|
|
|
147
278
|
static size_t re2_matchdata_memsize(const void *ptr) {
|
|
148
|
-
const re2_matchdata *m =
|
|
279
|
+
const re2_matchdata *m = static_cast<const re2_matchdata *>(ptr);
|
|
149
280
|
size_t size = sizeof(*m);
|
|
150
281
|
if (m->matches) {
|
|
151
282
|
size += sizeof(*m->matches) * m->number_of_matches;
|
|
@@ -170,18 +301,22 @@ static const rb_data_type_t re2_matchdata_data_type = {
|
|
|
170
301
|
};
|
|
171
302
|
|
|
172
303
|
static void re2_scanner_mark(void *ptr) {
|
|
173
|
-
re2_scanner *s =
|
|
304
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
174
305
|
rb_gc_mark_movable(s->regexp);
|
|
306
|
+
|
|
307
|
+
/* Text must not be movable because the StringPiece input holds a pointer
|
|
308
|
+
* into its underlying buffer; moving the string would invalidate it.
|
|
309
|
+
*/
|
|
175
310
|
rb_gc_mark(s->text);
|
|
176
311
|
}
|
|
177
312
|
|
|
178
313
|
static void re2_scanner_compact(void *ptr) {
|
|
179
|
-
re2_scanner *s =
|
|
314
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
180
315
|
s->regexp = rb_gc_location(s->regexp);
|
|
181
316
|
}
|
|
182
317
|
|
|
183
318
|
static void re2_scanner_free(void *ptr) {
|
|
184
|
-
re2_scanner *s =
|
|
319
|
+
re2_scanner *s = static_cast<re2_scanner *>(ptr);
|
|
185
320
|
if (s->input) {
|
|
186
321
|
delete s->input;
|
|
187
322
|
}
|
|
@@ -189,7 +324,7 @@ static void re2_scanner_free(void *ptr) {
|
|
|
189
324
|
}
|
|
190
325
|
|
|
191
326
|
static size_t re2_scanner_memsize(const void *ptr) {
|
|
192
|
-
const re2_scanner *s =
|
|
327
|
+
const re2_scanner *s = static_cast<const re2_scanner *>(ptr);
|
|
193
328
|
size_t size = sizeof(*s);
|
|
194
329
|
if (s->input) {
|
|
195
330
|
size += sizeof(*s->input);
|
|
@@ -214,7 +349,7 @@ static const rb_data_type_t re2_scanner_data_type = {
|
|
|
214
349
|
};
|
|
215
350
|
|
|
216
351
|
static void re2_regexp_free(void *ptr) {
|
|
217
|
-
re2_pattern *p =
|
|
352
|
+
re2_pattern *p = static_cast<re2_pattern *>(ptr);
|
|
218
353
|
if (p->pattern) {
|
|
219
354
|
delete p->pattern;
|
|
220
355
|
}
|
|
@@ -222,7 +357,7 @@ static void re2_regexp_free(void *ptr) {
|
|
|
222
357
|
}
|
|
223
358
|
|
|
224
359
|
static size_t re2_regexp_memsize(const void *ptr) {
|
|
225
|
-
const re2_pattern *p =
|
|
360
|
+
const re2_pattern *p = static_cast<const re2_pattern *>(ptr);
|
|
226
361
|
size_t size = sizeof(*p);
|
|
227
362
|
if (p->pattern) {
|
|
228
363
|
size += sizeof(*p->pattern);
|
|
@@ -242,9 +377,64 @@ static const rb_data_type_t re2_regexp_data_type = {
|
|
|
242
377
|
0,
|
|
243
378
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
|
244
379
|
// macro to update VALUE references, as to trigger write barriers.
|
|
245
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
380
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
|
|
246
381
|
};
|
|
247
382
|
|
|
383
|
+
static re2_pattern *unwrap_re2_regexp(VALUE self) {
|
|
384
|
+
re2_pattern *p;
|
|
385
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
386
|
+
if (!p->pattern) {
|
|
387
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::Regexp");
|
|
388
|
+
}
|
|
389
|
+
return p;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
static re2_matchdata *unwrap_re2_matchdata(VALUE self) {
|
|
393
|
+
re2_matchdata *m;
|
|
394
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
395
|
+
if (!RTEST(m->regexp)) {
|
|
396
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::MatchData");
|
|
397
|
+
}
|
|
398
|
+
return m;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
static re2_scanner *unwrap_re2_scanner(VALUE self) {
|
|
402
|
+
re2_scanner *c;
|
|
403
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
404
|
+
if (!RTEST(c->regexp)) {
|
|
405
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::Scanner");
|
|
406
|
+
}
|
|
407
|
+
return c;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/*
|
|
411
|
+
* Returns an array of names of all named capturing groups. Names are returned
|
|
412
|
+
* in alphabetical order rather than definition order, as RE2 stores named
|
|
413
|
+
* groups internally in a sorted map.
|
|
414
|
+
*
|
|
415
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
416
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
417
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
418
|
+
*
|
|
419
|
+
* @return [Array<String>] an array of names of named capturing groups
|
|
420
|
+
* @example
|
|
421
|
+
* RE2::Regexp.new('(?P<a>\d+) (?P<b>\w+)').names #=> ["a", "b"]
|
|
422
|
+
*/
|
|
423
|
+
static VALUE re2_regexp_names(const VALUE self) {
|
|
424
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
425
|
+
|
|
426
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
427
|
+
VALUE names = rb_ary_new2(groups.size());
|
|
428
|
+
|
|
429
|
+
for (const auto& group : groups) {
|
|
430
|
+
rb_ary_push(names,
|
|
431
|
+
encoded_str_new(group.first.data(), group.first.size(),
|
|
432
|
+
p->pattern->options().encoding()));
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return names;
|
|
436
|
+
}
|
|
437
|
+
|
|
248
438
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
|
249
439
|
re2_matchdata *m;
|
|
250
440
|
|
|
@@ -269,8 +459,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
|
269
459
|
* m.string #=> "bob 123"
|
|
270
460
|
*/
|
|
271
461
|
static VALUE re2_matchdata_string(const VALUE self) {
|
|
272
|
-
re2_matchdata *m;
|
|
273
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
462
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
274
463
|
|
|
275
464
|
return m->text;
|
|
276
465
|
}
|
|
@@ -287,8 +476,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
|
287
476
|
* c.string #=> "foo"
|
|
288
477
|
*/
|
|
289
478
|
static VALUE re2_scanner_string(const VALUE self) {
|
|
290
|
-
re2_scanner *c;
|
|
291
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
479
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
292
480
|
|
|
293
481
|
return c->text;
|
|
294
482
|
}
|
|
@@ -302,8 +490,7 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
|
302
490
|
* c.eof? #=> true
|
|
303
491
|
*/
|
|
304
492
|
static VALUE re2_scanner_eof(const VALUE self) {
|
|
305
|
-
re2_scanner *c;
|
|
306
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
493
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
307
494
|
|
|
308
495
|
return BOOL2RUBY(c->eof);
|
|
309
496
|
}
|
|
@@ -320,13 +507,12 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
|
320
507
|
* e.scan #=> ["1"]
|
|
321
508
|
*/
|
|
322
509
|
static VALUE re2_scanner_rewind(VALUE self) {
|
|
323
|
-
re2_scanner *c;
|
|
324
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
510
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
325
511
|
|
|
326
512
|
delete c->input;
|
|
327
513
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
328
514
|
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
|
329
|
-
if (c->input ==
|
|
515
|
+
if (c->input == nullptr) {
|
|
330
516
|
rb_raise(rb_eNoMemError,
|
|
331
517
|
"not enough memory to allocate StringPiece for input");
|
|
332
518
|
}
|
|
@@ -336,6 +522,35 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
|
336
522
|
return self;
|
|
337
523
|
}
|
|
338
524
|
|
|
525
|
+
static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
|
|
526
|
+
re2_scanner *self_c;
|
|
527
|
+
re2_scanner *other_c = unwrap_re2_scanner(other);
|
|
528
|
+
|
|
529
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, self_c);
|
|
530
|
+
|
|
531
|
+
if (self_c->input) {
|
|
532
|
+
delete self_c->input;
|
|
533
|
+
self_c->input = nullptr;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
|
|
537
|
+
RB_OBJ_WRITE(self, &self_c->text, other_c->text);
|
|
538
|
+
self_c->number_of_capturing_groups = other_c->number_of_capturing_groups;
|
|
539
|
+
self_c->eof = other_c->eof;
|
|
540
|
+
|
|
541
|
+
if (other_c->input) {
|
|
542
|
+
self_c->input = new(std::nothrow) re2::StringPiece(*other_c->input);
|
|
543
|
+
if (self_c->input == nullptr) {
|
|
544
|
+
rb_raise(rb_eNoMemError,
|
|
545
|
+
"not enough memory to allocate StringPiece for input");
|
|
546
|
+
}
|
|
547
|
+
} else {
|
|
548
|
+
self_c->input = nullptr;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
return self;
|
|
552
|
+
}
|
|
553
|
+
|
|
339
554
|
/*
|
|
340
555
|
* Scan the given text incrementally for matches using
|
|
341
556
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
|
@@ -356,11 +571,8 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
|
356
571
|
* s.scan #=> ["bar"]
|
|
357
572
|
*/
|
|
358
573
|
static VALUE re2_scanner_scan(VALUE self) {
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
363
|
-
TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
574
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
575
|
+
re2_pattern *p = unwrap_re2_regexp(c->regexp);
|
|
364
576
|
|
|
365
577
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
|
366
578
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
|
@@ -385,7 +597,7 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
385
597
|
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
|
|
386
598
|
|
|
387
599
|
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
|
388
|
-
if (matches[i].
|
|
600
|
+
if (matches[i].data() == nullptr) {
|
|
389
601
|
rb_ary_push(result, Qnil);
|
|
390
602
|
} else {
|
|
391
603
|
rb_ary_push(result, encoded_str_new(matches[i].data(),
|
|
@@ -397,9 +609,27 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
397
609
|
/* Check whether we've exhausted the input yet. */
|
|
398
610
|
c->eof = new_input_size == 0;
|
|
399
611
|
|
|
400
|
-
/* If the match didn't advance the input, we need to do this ourselves
|
|
612
|
+
/* If the match didn't advance the input, we need to do this ourselves,
|
|
613
|
+
* advancing by a whole character to avoid splitting multi-byte characters.
|
|
614
|
+
*
|
|
615
|
+
* The lookup table approach is taken from RE2's own Python extension: the
|
|
616
|
+
* high 4 bits of a UTF-8 lead byte determine the character's byte length.
|
|
617
|
+
*
|
|
618
|
+
* See https://github.com/google/re2/blob/972a15cedd008d846f1a39b2e88ce48d7f166cbd/python/_re2.cc#L46-L48
|
|
619
|
+
*/
|
|
401
620
|
if (!input_advanced && new_input_size > 0) {
|
|
402
|
-
|
|
621
|
+
size_t char_size = 1;
|
|
622
|
+
|
|
623
|
+
if (p->pattern->options().encoding() == RE2::Options::EncodingUTF8) {
|
|
624
|
+
char_size = "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
|
|
625
|
+
[((*c->input)[0] & 0xFF) >> 4];
|
|
626
|
+
|
|
627
|
+
if (char_size > new_input_size) {
|
|
628
|
+
char_size = new_input_size;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
c->input->remove_prefix(char_size);
|
|
403
633
|
}
|
|
404
634
|
|
|
405
635
|
return result;
|
|
@@ -409,47 +639,44 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
409
639
|
}
|
|
410
640
|
|
|
411
641
|
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
412
|
-
re2_matchdata *m;
|
|
413
|
-
re2_pattern *p;
|
|
414
|
-
|
|
415
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
416
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
642
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
643
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
417
644
|
|
|
418
645
|
int id;
|
|
419
646
|
|
|
420
647
|
if (RB_INTEGER_TYPE_P(idx)) {
|
|
421
648
|
id = NUM2INT(idx);
|
|
422
649
|
} else if (SYMBOL_P(idx)) {
|
|
423
|
-
const
|
|
424
|
-
|
|
650
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
651
|
+
auto search = groups.find(rb_id2name(SYM2ID(idx)));
|
|
425
652
|
|
|
426
653
|
if (search != groups.end()) {
|
|
427
654
|
id = search->second;
|
|
428
655
|
} else {
|
|
429
|
-
return
|
|
656
|
+
return nullptr;
|
|
430
657
|
}
|
|
431
658
|
} else {
|
|
432
659
|
StringValue(idx);
|
|
433
660
|
|
|
434
|
-
const
|
|
435
|
-
|
|
661
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
662
|
+
auto search = groups.find(std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)));
|
|
436
663
|
|
|
437
664
|
if (search != groups.end()) {
|
|
438
665
|
id = search->second;
|
|
439
666
|
} else {
|
|
440
|
-
return
|
|
667
|
+
return nullptr;
|
|
441
668
|
}
|
|
442
669
|
}
|
|
443
670
|
|
|
444
671
|
if (id >= 0 && id < m->number_of_matches) {
|
|
445
672
|
re2::StringPiece *match = &m->matches[id];
|
|
446
673
|
|
|
447
|
-
if (
|
|
674
|
+
if (match->data() != nullptr) {
|
|
448
675
|
return match;
|
|
449
676
|
}
|
|
450
677
|
}
|
|
451
678
|
|
|
452
|
-
return
|
|
679
|
+
return nullptr;
|
|
453
680
|
}
|
|
454
681
|
|
|
455
682
|
/*
|
|
@@ -458,14 +685,12 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
|
458
685
|
*
|
|
459
686
|
* @return [Integer] the number of elements
|
|
460
687
|
* @example
|
|
461
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
688
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
462
689
|
* m.size #=> 2
|
|
463
690
|
* m.length #=> 2
|
|
464
691
|
*/
|
|
465
692
|
static VALUE re2_matchdata_size(const VALUE self) {
|
|
466
|
-
re2_matchdata *m;
|
|
467
|
-
|
|
468
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
693
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
469
694
|
|
|
470
695
|
return INT2FIX(m->number_of_matches);
|
|
471
696
|
}
|
|
@@ -477,17 +702,15 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
|
477
702
|
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
|
478
703
|
* there is no such submatch
|
|
479
704
|
* @example
|
|
480
|
-
* m = RE2::Regexp.new('ob (\d+)').
|
|
705
|
+
* m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
|
|
481
706
|
* m.begin(0) #=> 1
|
|
482
707
|
* m.begin(1) #=> 4
|
|
483
708
|
*/
|
|
484
709
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
485
|
-
re2_matchdata *m;
|
|
486
|
-
|
|
487
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
710
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
488
711
|
|
|
489
712
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
490
|
-
if (match ==
|
|
713
|
+
if (match == nullptr) {
|
|
491
714
|
return Qnil;
|
|
492
715
|
} else {
|
|
493
716
|
long offset = match->data() - RSTRING_PTR(m->text);
|
|
@@ -504,17 +727,15 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
|
504
727
|
* @return [Integer, nil] the offset of the character following the end of the
|
|
505
728
|
* match or `nil` if there is no such match
|
|
506
729
|
* @example
|
|
507
|
-
* m = RE2::Regexp.new('ob (\d+) b').
|
|
730
|
+
* m = RE2::Regexp.new('ob (\d+) b').partial_match("bob 123 bob")
|
|
508
731
|
* m.end(0) #=> 9
|
|
509
732
|
* m.end(1) #=> 7
|
|
510
733
|
*/
|
|
511
734
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
512
|
-
re2_matchdata *m;
|
|
513
|
-
|
|
514
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
735
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
515
736
|
|
|
516
737
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
517
|
-
if (match ==
|
|
738
|
+
if (match == nullptr) {
|
|
518
739
|
return Qnil;
|
|
519
740
|
} else {
|
|
520
741
|
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
|
@@ -523,17 +744,129 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
|
523
744
|
}
|
|
524
745
|
}
|
|
525
746
|
|
|
747
|
+
/*
|
|
748
|
+
* Returns the portion of the original string before the match.
|
|
749
|
+
*
|
|
750
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
751
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
752
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
753
|
+
*
|
|
754
|
+
* @return [String] the portion of the original string before the match
|
|
755
|
+
* @example
|
|
756
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
757
|
+
* m.pre_match #=> "bob "
|
|
758
|
+
*/
|
|
759
|
+
static VALUE re2_matchdata_pre_match(const VALUE self) {
|
|
760
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
761
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
762
|
+
|
|
763
|
+
re2::StringPiece *match = &m->matches[0];
|
|
764
|
+
if (match->data() == nullptr) {
|
|
765
|
+
return Qnil;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
long offset = match->data() - RSTRING_PTR(m->text);
|
|
769
|
+
|
|
770
|
+
return encoded_str_new(RSTRING_PTR(m->text), offset,
|
|
771
|
+
p->pattern->options().encoding());
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
/*
|
|
775
|
+
* Returns the portion of the original string after the match.
|
|
776
|
+
*
|
|
777
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
778
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
779
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
780
|
+
*
|
|
781
|
+
* @return [String] the portion of the original string after the match
|
|
782
|
+
* @example
|
|
783
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
784
|
+
* m.post_match #=> " 456"
|
|
785
|
+
*/
|
|
786
|
+
static VALUE re2_matchdata_post_match(const VALUE self) {
|
|
787
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
788
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
789
|
+
|
|
790
|
+
re2::StringPiece *match = &m->matches[0];
|
|
791
|
+
if (match->data() == nullptr) {
|
|
792
|
+
return Qnil;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
long start = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
|
796
|
+
long remaining = RSTRING_LEN(m->text) - start;
|
|
797
|
+
|
|
798
|
+
return encoded_str_new(RSTRING_PTR(m->text) + start, remaining,
|
|
799
|
+
p->pattern->options().encoding());
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
/*
|
|
803
|
+
* Returns a two-element array containing the beginning and ending offsets of
|
|
804
|
+
* the nth match.
|
|
805
|
+
*
|
|
806
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
|
807
|
+
* @return [Array<Integer>, nil] a two-element array with the beginning and
|
|
808
|
+
* ending offsets of the match or `nil` if there is no such match
|
|
809
|
+
* @example
|
|
810
|
+
* m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
|
|
811
|
+
* m.offset(0) #=> [1, 7]
|
|
812
|
+
* m.offset(1) #=> [4, 7]
|
|
813
|
+
*/
|
|
814
|
+
static VALUE re2_matchdata_offset(const VALUE self, VALUE n) {
|
|
815
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
816
|
+
|
|
817
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
818
|
+
if (match == nullptr) {
|
|
819
|
+
return Qnil;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
long start = match->data() - RSTRING_PTR(m->text);
|
|
823
|
+
long end_pos = start + match->size();
|
|
824
|
+
|
|
825
|
+
VALUE array = rb_ary_new2(2);
|
|
826
|
+
rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, start)));
|
|
827
|
+
rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, end_pos)));
|
|
828
|
+
|
|
829
|
+
return array;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
/*
|
|
833
|
+
* Returns the length of the nth match in characters. This is equivalent to
|
|
834
|
+
* `m[n].length` but without allocating a new string.
|
|
835
|
+
*
|
|
836
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
|
837
|
+
* @return [Integer, nil] the length of the match or `nil` if there is no such
|
|
838
|
+
* match
|
|
839
|
+
* @example
|
|
840
|
+
* m = RE2::Regexp.new('(?P<word>\w+) (?P<number>\d+)').partial_match("alice 123")
|
|
841
|
+
* m.match_length(0) #=> 9
|
|
842
|
+
* m.match_length(1) #=> 5
|
|
843
|
+
* m.match_length(:number) #=> 3
|
|
844
|
+
*/
|
|
845
|
+
static VALUE re2_matchdata_match_length(const VALUE self, VALUE n) {
|
|
846
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
847
|
+
|
|
848
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
849
|
+
if (match == nullptr) {
|
|
850
|
+
return Qnil;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
long start = match->data() - RSTRING_PTR(m->text);
|
|
854
|
+
long end_pos = start + match->size();
|
|
855
|
+
long char_len = rb_str_sublen(m->text, end_pos) - rb_str_sublen(m->text, start);
|
|
856
|
+
|
|
857
|
+
return LONG2NUM(char_len);
|
|
858
|
+
}
|
|
859
|
+
|
|
526
860
|
/*
|
|
527
861
|
* Returns the {RE2::Regexp} used in the match.
|
|
528
862
|
*
|
|
529
863
|
* @return [RE2::Regexp] the regular expression used in the match
|
|
530
864
|
* @example
|
|
531
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
865
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
532
866
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
|
533
867
|
*/
|
|
534
868
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
535
|
-
re2_matchdata *m;
|
|
536
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
869
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
537
870
|
|
|
538
871
|
return m->regexp;
|
|
539
872
|
}
|
|
@@ -547,8 +880,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
|
547
880
|
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
|
548
881
|
*/
|
|
549
882
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
|
550
|
-
re2_scanner *c;
|
|
551
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
883
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
552
884
|
|
|
553
885
|
return c->regexp;
|
|
554
886
|
}
|
|
@@ -569,21 +901,18 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
|
569
901
|
*
|
|
570
902
|
* @return [Array<String, nil>] the array of matches
|
|
571
903
|
* @example
|
|
572
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
904
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
573
905
|
* m.to_a #=> ["123", "123"]
|
|
574
906
|
*/
|
|
575
907
|
static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
576
|
-
re2_matchdata *m;
|
|
577
|
-
re2_pattern *p;
|
|
578
|
-
|
|
579
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
580
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
908
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
909
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
581
910
|
|
|
582
911
|
VALUE array = rb_ary_new2(m->number_of_matches);
|
|
583
912
|
for (int i = 0; i < m->number_of_matches; ++i) {
|
|
584
913
|
re2::StringPiece *match = &m->matches[i];
|
|
585
914
|
|
|
586
|
-
if (match->
|
|
915
|
+
if (match->data() == nullptr) {
|
|
587
916
|
rb_ary_push(array, Qnil);
|
|
588
917
|
} else {
|
|
589
918
|
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
|
@@ -595,18 +924,15 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
|
595
924
|
}
|
|
596
925
|
|
|
597
926
|
static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
598
|
-
re2_matchdata *m;
|
|
599
|
-
re2_pattern *p;
|
|
600
|
-
|
|
601
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
602
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
927
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
928
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
603
929
|
|
|
604
930
|
if (nth < 0 || nth >= m->number_of_matches) {
|
|
605
931
|
return Qnil;
|
|
606
932
|
} else {
|
|
607
933
|
re2::StringPiece *match = &m->matches[nth];
|
|
608
934
|
|
|
609
|
-
if (match->
|
|
935
|
+
if (match->data() == nullptr) {
|
|
610
936
|
return Qnil;
|
|
611
937
|
} else {
|
|
612
938
|
return encoded_str_new(match->data(), match->size(),
|
|
@@ -616,14 +942,11 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
|
616
942
|
}
|
|
617
943
|
|
|
618
944
|
static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self) {
|
|
619
|
-
re2_matchdata *m;
|
|
620
|
-
re2_pattern *p;
|
|
945
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
946
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
621
947
|
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
626
|
-
std::map<std::string, int>::const_iterator search = groups.find(name);
|
|
948
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
949
|
+
auto search = groups.find(name);
|
|
627
950
|
|
|
628
951
|
if (search != groups.end()) {
|
|
629
952
|
return re2_matchdata_nth_match(search->second, self);
|
|
@@ -645,7 +968,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
645
968
|
* @param [Integer] index the index of the match to fetch
|
|
646
969
|
* @return [String, nil] the specified match or `nil` if it isn't present
|
|
647
970
|
* @example
|
|
648
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
971
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
649
972
|
* m[0] #=> "123"
|
|
650
973
|
*
|
|
651
974
|
* @overload [](start, length)
|
|
@@ -655,7 +978,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
655
978
|
* @param [Integer] length the number of elements to fetch
|
|
656
979
|
* @return [Array<String, nil>] the specified matches
|
|
657
980
|
* @example
|
|
658
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
981
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
659
982
|
* m[0, 1] #=> ["123"]
|
|
660
983
|
*
|
|
661
984
|
* @overload [](range)
|
|
@@ -664,8 +987,8 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
664
987
|
* @param [Range] range the range of match indexes to fetch
|
|
665
988
|
* @return [Array<String, nil>] the specified matches
|
|
666
989
|
* @example
|
|
667
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
668
|
-
* m[0..1] #=> "
|
|
990
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
991
|
+
* m[0..1] #=> ["123", "123"]
|
|
669
992
|
*
|
|
670
993
|
* @overload [](name)
|
|
671
994
|
* Access a particular match by name.
|
|
@@ -673,7 +996,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
673
996
|
* @param [String, Symbol] name the name of the match to fetch
|
|
674
997
|
* @return [String, nil] the specific match or `nil` if it isn't present
|
|
675
998
|
* @example
|
|
676
|
-
* m = RE2::Regexp.new('(?P<number>\d+)').
|
|
999
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
|
|
677
1000
|
* m["number"] #=> "123"
|
|
678
1001
|
* m[:number] #=> "123"
|
|
679
1002
|
*/
|
|
@@ -697,6 +1020,9 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
|
|
697
1020
|
* Returns the entire matched string.
|
|
698
1021
|
*
|
|
699
1022
|
* @return [String] the entire matched string
|
|
1023
|
+
* @example
|
|
1024
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
|
|
1025
|
+
* m.to_s #=> "123"
|
|
700
1026
|
*/
|
|
701
1027
|
static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
702
1028
|
return re2_matchdata_nth_match(0, self);
|
|
@@ -711,15 +1037,12 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
|
711
1037
|
*
|
|
712
1038
|
* @return [String] a printable version of the match
|
|
713
1039
|
* @example
|
|
714
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
1040
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
715
1041
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
|
716
1042
|
*/
|
|
717
1043
|
static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
718
|
-
re2_matchdata *m;
|
|
719
|
-
re2_pattern *p;
|
|
720
|
-
|
|
721
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
722
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
1044
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1045
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
723
1046
|
|
|
724
1047
|
std::ostringstream output;
|
|
725
1048
|
output << "#<RE2::MatchData";
|
|
@@ -749,7 +1072,7 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
749
1072
|
}
|
|
750
1073
|
|
|
751
1074
|
/*
|
|
752
|
-
* Returns the array of submatches
|
|
1075
|
+
* Returns the array of submatches.
|
|
753
1076
|
*
|
|
754
1077
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
755
1078
|
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
@@ -758,11 +1081,12 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
758
1081
|
*
|
|
759
1082
|
* @return [Array<String, nil>] the array of submatches
|
|
760
1083
|
* @example
|
|
761
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
1084
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
1085
|
+
* m.captures #=> ["123"]
|
|
762
1086
|
* m.deconstruct #=> ["123"]
|
|
763
1087
|
*
|
|
764
1088
|
* @example pattern matching
|
|
765
|
-
* case RE2::Regexp.new('(\d+) (\d+)').
|
|
1089
|
+
* case RE2::Regexp.new('(\d+) (\d+)').partial_match("bob 123 456")
|
|
766
1090
|
* in x, y
|
|
767
1091
|
* puts "Matched #{x} #{y}"
|
|
768
1092
|
* else
|
|
@@ -770,17 +1094,14 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
770
1094
|
* end
|
|
771
1095
|
*/
|
|
772
1096
|
static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
773
|
-
re2_matchdata *m;
|
|
774
|
-
re2_pattern *p;
|
|
775
|
-
|
|
776
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
777
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
1097
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1098
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
778
1099
|
|
|
779
1100
|
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
|
780
1101
|
for (int i = 1; i < m->number_of_matches; ++i) {
|
|
781
1102
|
re2::StringPiece *match = &m->matches[i];
|
|
782
1103
|
|
|
783
|
-
if (match->
|
|
1104
|
+
if (match->data() == nullptr) {
|
|
784
1105
|
rb_ary_push(array, Qnil);
|
|
785
1106
|
} else {
|
|
786
1107
|
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
|
@@ -806,14 +1127,14 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
|
806
1127
|
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
|
807
1128
|
* or `nil` to return all names
|
|
808
1129
|
* @example
|
|
809
|
-
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').
|
|
1130
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
810
1131
|
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
|
811
1132
|
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
|
812
1133
|
* m.deconstruct_keys([:fruit]) #=> {}
|
|
813
1134
|
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
|
814
1135
|
*
|
|
815
1136
|
* @example pattern matching
|
|
816
|
-
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').
|
|
1137
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
817
1138
|
* in numbers:, letters:
|
|
818
1139
|
* puts "Numbers: #{numbers}, letters: #{letters}"
|
|
819
1140
|
* else
|
|
@@ -821,20 +1142,17 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
|
821
1142
|
* end
|
|
822
1143
|
*/
|
|
823
1144
|
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
|
|
824
|
-
re2_matchdata *m;
|
|
825
|
-
re2_pattern *p;
|
|
826
|
-
|
|
827
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
828
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
1145
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1146
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
829
1147
|
|
|
830
|
-
const
|
|
1148
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
831
1149
|
VALUE capturing_groups = rb_hash_new();
|
|
832
1150
|
|
|
833
1151
|
if (NIL_P(keys)) {
|
|
834
|
-
for (
|
|
1152
|
+
for (const auto& group : groups) {
|
|
835
1153
|
rb_hash_aset(capturing_groups,
|
|
836
|
-
ID2SYM(
|
|
837
|
-
re2_matchdata_nth_match(
|
|
1154
|
+
ID2SYM(rb_intern2(group.first.data(), group.first.size())),
|
|
1155
|
+
re2_matchdata_nth_match(group.second, self));
|
|
838
1156
|
}
|
|
839
1157
|
} else {
|
|
840
1158
|
Check_Type(keys, T_ARRAY);
|
|
@@ -844,7 +1162,7 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
844
1162
|
VALUE key = rb_ary_entry(keys, i);
|
|
845
1163
|
Check_Type(key, T_SYMBOL);
|
|
846
1164
|
const char *name = rb_id2name(SYM2ID(key));
|
|
847
|
-
|
|
1165
|
+
auto search = groups.find(name);
|
|
848
1166
|
|
|
849
1167
|
if (search != groups.end()) {
|
|
850
1168
|
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
|
|
@@ -858,6 +1176,151 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
858
1176
|
return capturing_groups;
|
|
859
1177
|
}
|
|
860
1178
|
|
|
1179
|
+
/*
|
|
1180
|
+
* Returns a hash of capturing group names to matched strings.
|
|
1181
|
+
*
|
|
1182
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1183
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1184
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1185
|
+
*
|
|
1186
|
+
* @overload named_captures
|
|
1187
|
+
* Returns a hash with string keys.
|
|
1188
|
+
*
|
|
1189
|
+
* @return [Hash] a hash of capturing group names to matching strings
|
|
1190
|
+
* @example
|
|
1191
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1192
|
+
* m.named_captures #=> {"numbers" => "123", "letters" => "abc"}
|
|
1193
|
+
*
|
|
1194
|
+
* @overload named_captures(symbolize_names:)
|
|
1195
|
+
* Returns a hash with string or symbol keys.
|
|
1196
|
+
*
|
|
1197
|
+
* @param [Boolean] symbolize_names whether to return group names as symbols
|
|
1198
|
+
* @return [Hash] a hash of capturing group names to matching strings
|
|
1199
|
+
* @example
|
|
1200
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1201
|
+
* m.named_captures
|
|
1202
|
+
* #=> {"numbers" => "123", "letters" => "abc"}
|
|
1203
|
+
* m.named_captures(symbolize_names: true) #=> {numbers: "123", letters: "abc"}
|
|
1204
|
+
*/
|
|
1205
|
+
static VALUE re2_matchdata_named_captures(int argc, VALUE *argv, const VALUE self) {
|
|
1206
|
+
VALUE opts;
|
|
1207
|
+
rb_scan_args(argc, argv, "0:", &opts);
|
|
1208
|
+
|
|
1209
|
+
bool symbolize = false;
|
|
1210
|
+
if (!NIL_P(opts)) {
|
|
1211
|
+
VALUE sym = rb_hash_aref(opts, ID2SYM(id_symbolize_names));
|
|
1212
|
+
symbolize = RTEST(sym);
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1216
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
1217
|
+
|
|
1218
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
1219
|
+
VALUE result = rb_hash_new();
|
|
1220
|
+
|
|
1221
|
+
for (const auto& group : groups) {
|
|
1222
|
+
VALUE key;
|
|
1223
|
+
if (symbolize) {
|
|
1224
|
+
key = ID2SYM(rb_intern2(group.first.data(), group.first.size()));
|
|
1225
|
+
} else {
|
|
1226
|
+
key = encoded_str_new(group.first.data(), group.first.size(),
|
|
1227
|
+
p->pattern->options().encoding());
|
|
1228
|
+
}
|
|
1229
|
+
rb_hash_aset(result, key, re2_matchdata_nth_match(group.second, self));
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
return result;
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
/*
|
|
1236
|
+
* Returns an array of names of named capturing groups. Names are returned in
|
|
1237
|
+
* alphabetical order rather than definition order, as RE2 stores named groups
|
|
1238
|
+
* internally in a sorted map.
|
|
1239
|
+
*
|
|
1240
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1241
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1242
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1243
|
+
*
|
|
1244
|
+
* @return [Array<String>] an array of names of named capturing groups
|
|
1245
|
+
* @example
|
|
1246
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1247
|
+
* m.names #=> ["letters", "numbers"]
|
|
1248
|
+
*/
|
|
1249
|
+
static VALUE re2_matchdata_names(const VALUE self) {
|
|
1250
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1251
|
+
|
|
1252
|
+
return re2_regexp_names(m->regexp);
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
/*
|
|
1256
|
+
* Returns an array of match values at the given indices or names.
|
|
1257
|
+
*
|
|
1258
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1259
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1260
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1261
|
+
*
|
|
1262
|
+
* @param [Integer, String, Symbol] indexes the indices or names of
|
|
1263
|
+
* the matches to fetch
|
|
1264
|
+
* @return [Array<String, nil>] the values at the given indices or names
|
|
1265
|
+
* @example
|
|
1266
|
+
* m = RE2::Regexp.new('(?P<a>\d+) (?P<b>\d+)').partial_match("123 456")
|
|
1267
|
+
* m.values_at(1, 2) #=> ["123", "456"]
|
|
1268
|
+
* m.values_at(:a, :b) #=> ["123", "456"]
|
|
1269
|
+
* m.values_at(1, :b) #=> ["123", "456"]
|
|
1270
|
+
*/
|
|
1271
|
+
static VALUE re2_matchdata_values_at(int argc, VALUE *argv, const VALUE self) {
|
|
1272
|
+
unwrap_re2_matchdata(self);
|
|
1273
|
+
|
|
1274
|
+
VALUE result = rb_ary_new2(argc);
|
|
1275
|
+
|
|
1276
|
+
for (int i = 0; i < argc; ++i) {
|
|
1277
|
+
VALUE idx = argv[i];
|
|
1278
|
+
|
|
1279
|
+
if (TYPE(idx) == T_STRING) {
|
|
1280
|
+
rb_ary_push(result, re2_matchdata_named_match(
|
|
1281
|
+
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self));
|
|
1282
|
+
} else if (SYMBOL_P(idx)) {
|
|
1283
|
+
rb_ary_push(result, re2_matchdata_named_match(
|
|
1284
|
+
rb_id2name(SYM2ID(idx)), self));
|
|
1285
|
+
} else {
|
|
1286
|
+
rb_ary_push(result, re2_matchdata_nth_match(NUM2INT(idx), self));
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
return result;
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
1294
|
+
re2_matchdata *self_m;
|
|
1295
|
+
re2_matchdata *other_m = unwrap_re2_matchdata(other);
|
|
1296
|
+
|
|
1297
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, self_m);
|
|
1298
|
+
|
|
1299
|
+
if (self_m->matches) {
|
|
1300
|
+
delete[] self_m->matches;
|
|
1301
|
+
self_m->matches = nullptr;
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
self_m->number_of_matches = other_m->number_of_matches;
|
|
1305
|
+
RB_OBJ_WRITE(self, &self_m->regexp, other_m->regexp);
|
|
1306
|
+
RB_OBJ_WRITE(self, &self_m->text, other_m->text);
|
|
1307
|
+
|
|
1308
|
+
if (other_m->matches) {
|
|
1309
|
+
self_m->matches = new(std::nothrow) re2::StringPiece[other_m->number_of_matches];
|
|
1310
|
+
if (self_m->matches == nullptr) {
|
|
1311
|
+
rb_raise(rb_eNoMemError,
|
|
1312
|
+
"not enough memory to allocate StringPiece for matches");
|
|
1313
|
+
}
|
|
1314
|
+
for (int i = 0; i < other_m->number_of_matches; ++i) {
|
|
1315
|
+
self_m->matches[i] = other_m->matches[i];
|
|
1316
|
+
}
|
|
1317
|
+
} else {
|
|
1318
|
+
self_m->matches = nullptr;
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
return self;
|
|
1322
|
+
}
|
|
1323
|
+
|
|
861
1324
|
/*
|
|
862
1325
|
* Shorthand to compile a new {RE2::Regexp}.
|
|
863
1326
|
*
|
|
@@ -913,6 +1376,13 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
913
1376
|
|
|
914
1377
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
915
1378
|
|
|
1379
|
+
rb_check_frozen(self);
|
|
1380
|
+
|
|
1381
|
+
if (p->pattern) {
|
|
1382
|
+
delete p->pattern;
|
|
1383
|
+
p->pattern = nullptr;
|
|
1384
|
+
}
|
|
1385
|
+
|
|
916
1386
|
if (RTEST(options)) {
|
|
917
1387
|
RE2::Options re2_options;
|
|
918
1388
|
parse_re2_options(&re2_options, options);
|
|
@@ -924,10 +1394,36 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
924
1394
|
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
|
|
925
1395
|
}
|
|
926
1396
|
|
|
927
|
-
if (p->pattern ==
|
|
1397
|
+
if (p->pattern == nullptr) {
|
|
928
1398
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
929
1399
|
}
|
|
930
1400
|
|
|
1401
|
+
rb_obj_freeze(self);
|
|
1402
|
+
|
|
1403
|
+
return self;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
1407
|
+
re2_pattern *self_p;
|
|
1408
|
+
re2_pattern *other_p = unwrap_re2_regexp(other);
|
|
1409
|
+
|
|
1410
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
|
|
1411
|
+
|
|
1412
|
+
rb_check_frozen(self);
|
|
1413
|
+
|
|
1414
|
+
if (self_p->pattern) {
|
|
1415
|
+
delete self_p->pattern;
|
|
1416
|
+
self_p->pattern = nullptr;
|
|
1417
|
+
}
|
|
1418
|
+
|
|
1419
|
+
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
|
|
1420
|
+
other_p->pattern->options());
|
|
1421
|
+
if (self_p->pattern == nullptr) {
|
|
1422
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1425
|
+
rb_obj_freeze(self);
|
|
1426
|
+
|
|
931
1427
|
return self;
|
|
932
1428
|
}
|
|
933
1429
|
|
|
@@ -945,9 +1441,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
945
1441
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
|
946
1442
|
*/
|
|
947
1443
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
|
948
|
-
re2_pattern *p;
|
|
949
|
-
|
|
950
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1444
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
951
1445
|
|
|
952
1446
|
std::ostringstream output;
|
|
953
1447
|
|
|
@@ -970,8 +1464,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
|
970
1464
|
* re2.to_s #=> "woo?"
|
|
971
1465
|
*/
|
|
972
1466
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
|
973
|
-
re2_pattern *p;
|
|
974
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1467
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
975
1468
|
|
|
976
1469
|
return encoded_str_new(p->pattern->pattern().data(),
|
|
977
1470
|
p->pattern->pattern().size(),
|
|
@@ -987,8 +1480,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
|
987
1480
|
* re2.ok? #=> true
|
|
988
1481
|
*/
|
|
989
1482
|
static VALUE re2_regexp_ok(const VALUE self) {
|
|
990
|
-
re2_pattern *p;
|
|
991
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1483
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
992
1484
|
|
|
993
1485
|
return BOOL2RUBY(p->pattern->ok());
|
|
994
1486
|
}
|
|
@@ -1003,8 +1495,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
|
1003
1495
|
* re2.utf8? #=> true
|
|
1004
1496
|
*/
|
|
1005
1497
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
|
1006
|
-
re2_pattern *p;
|
|
1007
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1498
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1008
1499
|
|
|
1009
1500
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
|
1010
1501
|
}
|
|
@@ -1019,8 +1510,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
|
1019
1510
|
* re2.posix_syntax? #=> true
|
|
1020
1511
|
*/
|
|
1021
1512
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
1022
|
-
re2_pattern *p;
|
|
1023
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1513
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1024
1514
|
|
|
1025
1515
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
|
1026
1516
|
}
|
|
@@ -1035,8 +1525,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
|
1035
1525
|
* re2.longest_match? #=> true
|
|
1036
1526
|
*/
|
|
1037
1527
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
1038
|
-
re2_pattern *p;
|
|
1039
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1528
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1040
1529
|
|
|
1041
1530
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
|
1042
1531
|
}
|
|
@@ -1051,8 +1540,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
|
1051
1540
|
* re2.log_errors? #=> true
|
|
1052
1541
|
*/
|
|
1053
1542
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
1054
|
-
re2_pattern *p;
|
|
1055
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1543
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1056
1544
|
|
|
1057
1545
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
|
1058
1546
|
}
|
|
@@ -1066,8 +1554,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
|
1066
1554
|
* re2.max_mem #=> 1024
|
|
1067
1555
|
*/
|
|
1068
1556
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
1069
|
-
re2_pattern *p;
|
|
1070
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1557
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1071
1558
|
|
|
1072
1559
|
return INT2FIX(p->pattern->options().max_mem());
|
|
1073
1560
|
}
|
|
@@ -1082,8 +1569,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
|
1082
1569
|
* re2.literal? #=> true
|
|
1083
1570
|
*/
|
|
1084
1571
|
static VALUE re2_regexp_literal(const VALUE self) {
|
|
1085
|
-
re2_pattern *p;
|
|
1086
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1572
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1087
1573
|
|
|
1088
1574
|
return BOOL2RUBY(p->pattern->options().literal());
|
|
1089
1575
|
}
|
|
@@ -1098,8 +1584,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
|
1098
1584
|
* re2.never_nl? #=> true
|
|
1099
1585
|
*/
|
|
1100
1586
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
1101
|
-
re2_pattern *p;
|
|
1102
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1587
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1103
1588
|
|
|
1104
1589
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
|
1105
1590
|
}
|
|
@@ -1114,8 +1599,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
|
1114
1599
|
* re2.case_sensitive? #=> true
|
|
1115
1600
|
*/
|
|
1116
1601
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
|
1117
|
-
re2_pattern *p;
|
|
1118
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1602
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1119
1603
|
|
|
1120
1604
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
|
1121
1605
|
}
|
|
@@ -1144,8 +1628,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
|
|
1144
1628
|
* re2.perl_classes? #=> true
|
|
1145
1629
|
*/
|
|
1146
1630
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1147
|
-
re2_pattern *p;
|
|
1148
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1631
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1149
1632
|
|
|
1150
1633
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
|
1151
1634
|
}
|
|
@@ -1160,8 +1643,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
|
1160
1643
|
* re2.word_boundary? #=> true
|
|
1161
1644
|
*/
|
|
1162
1645
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1163
|
-
re2_pattern *p;
|
|
1164
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1646
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1165
1647
|
|
|
1166
1648
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
|
1167
1649
|
}
|
|
@@ -1176,8 +1658,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
|
1176
1658
|
* re2.one_line? #=> true
|
|
1177
1659
|
*/
|
|
1178
1660
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1179
|
-
re2_pattern *p;
|
|
1180
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1661
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1181
1662
|
|
|
1182
1663
|
return BOOL2RUBY(p->pattern->options().one_line());
|
|
1183
1664
|
}
|
|
@@ -1189,8 +1670,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
|
1189
1670
|
* @return [String, nil] the error string or `nil`
|
|
1190
1671
|
*/
|
|
1191
1672
|
static VALUE re2_regexp_error(const VALUE self) {
|
|
1192
|
-
re2_pattern *p;
|
|
1193
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1673
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1194
1674
|
|
|
1195
1675
|
if (p->pattern->ok()) {
|
|
1196
1676
|
return Qnil;
|
|
@@ -1210,8 +1690,7 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
|
1210
1690
|
* @return [String, nil] the offending portion of the regexp or `nil`
|
|
1211
1691
|
*/
|
|
1212
1692
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
|
1213
|
-
re2_pattern *p;
|
|
1214
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1693
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1215
1694
|
|
|
1216
1695
|
if (p->pattern->ok()) {
|
|
1217
1696
|
return Qnil;
|
|
@@ -1230,8 +1709,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
|
|
|
1230
1709
|
* @return [Integer] the regexp "cost"
|
|
1231
1710
|
*/
|
|
1232
1711
|
static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1233
|
-
re2_pattern *p;
|
|
1234
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1712
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1235
1713
|
|
|
1236
1714
|
return INT2FIX(p->pattern->ProgramSize());
|
|
1237
1715
|
}
|
|
@@ -1242,9 +1720,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
|
1242
1720
|
* @return [Hash] the options
|
|
1243
1721
|
*/
|
|
1244
1722
|
static VALUE re2_regexp_options(const VALUE self) {
|
|
1245
|
-
re2_pattern *p;
|
|
1246
|
-
|
|
1247
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1723
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1248
1724
|
VALUE options = rb_hash_new();
|
|
1249
1725
|
|
|
1250
1726
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
|
@@ -1294,8 +1770,7 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
|
1294
1770
|
* @return [Integer] the number of capturing subpatterns
|
|
1295
1771
|
*/
|
|
1296
1772
|
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1297
|
-
re2_pattern *p;
|
|
1298
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1773
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1299
1774
|
|
|
1300
1775
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
|
1301
1776
|
}
|
|
@@ -1310,17 +1785,15 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
|
1310
1785
|
* @return [Hash] a hash of names to capturing indices
|
|
1311
1786
|
*/
|
|
1312
1787
|
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
|
1313
|
-
re2_pattern *p;
|
|
1314
|
-
|
|
1315
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1316
|
-
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
1788
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1789
|
+
const auto& groups = p->pattern->NamedCapturingGroups();
|
|
1317
1790
|
VALUE capturing_groups = rb_hash_new();
|
|
1318
1791
|
|
|
1319
|
-
for (
|
|
1792
|
+
for (const auto& group : groups) {
|
|
1320
1793
|
rb_hash_aset(capturing_groups,
|
|
1321
|
-
encoded_str_new(
|
|
1794
|
+
encoded_str_new(group.first.data(), group.first.size(),
|
|
1322
1795
|
p->pattern->options().encoding()),
|
|
1323
|
-
INT2FIX(
|
|
1796
|
+
INT2FIX(group.second));
|
|
1324
1797
|
}
|
|
1325
1798
|
|
|
1326
1799
|
return capturing_groups;
|
|
@@ -1415,14 +1888,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1415
1888
|
|
|
1416
1889
|
rb_scan_args(argc, argv, "11", &text, &options);
|
|
1417
1890
|
|
|
1418
|
-
/*
|
|
1891
|
+
/* Coerce and freeze text to prevent mutation. */
|
|
1419
1892
|
StringValue(text);
|
|
1893
|
+
text = rb_str_new_frozen(text);
|
|
1420
1894
|
|
|
1421
|
-
|
|
1895
|
+
p = unwrap_re2_regexp(self);
|
|
1422
1896
|
|
|
1423
1897
|
int n;
|
|
1424
|
-
|
|
1425
|
-
|
|
1898
|
+
size_t startpos = 0;
|
|
1899
|
+
size_t endpos = RSTRING_LEN(text);
|
|
1426
1900
|
RE2::Anchor anchor = RE2::UNANCHORED;
|
|
1427
1901
|
|
|
1428
1902
|
if (RTEST(options)) {
|
|
@@ -1440,11 +1914,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1440
1914
|
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
|
|
1441
1915
|
if (!NIL_P(endpos_option)) {
|
|
1442
1916
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1443
|
-
|
|
1917
|
+
ssize_t endpos_value = NUM2SSIZET(endpos_option);
|
|
1444
1918
|
|
|
1445
|
-
if (
|
|
1919
|
+
if (endpos_value < 0) {
|
|
1446
1920
|
rb_raise(rb_eArgError, "endpos should be >= 0");
|
|
1447
1921
|
}
|
|
1922
|
+
|
|
1923
|
+
endpos = static_cast<size_t>(endpos_value);
|
|
1448
1924
|
#else
|
|
1449
1925
|
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
|
|
1450
1926
|
#endif
|
|
@@ -1483,11 +1959,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1483
1959
|
|
|
1484
1960
|
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
|
|
1485
1961
|
if (!NIL_P(startpos_option)) {
|
|
1486
|
-
|
|
1962
|
+
ssize_t startpos_value = NUM2SSIZET(startpos_option);
|
|
1487
1963
|
|
|
1488
|
-
if (
|
|
1964
|
+
if (startpos_value < 0) {
|
|
1489
1965
|
rb_raise(rb_eArgError, "startpos should be >= 0");
|
|
1490
1966
|
}
|
|
1967
|
+
|
|
1968
|
+
startpos = static_cast<size_t>(startpos_value);
|
|
1491
1969
|
}
|
|
1492
1970
|
}
|
|
1493
1971
|
} else {
|
|
@@ -1502,16 +1980,18 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1502
1980
|
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
|
1503
1981
|
}
|
|
1504
1982
|
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
#else
|
|
1511
|
-
bool matched = p->pattern->Match(
|
|
1512
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1513
|
-
startpos, anchor, 0, 0);
|
|
1983
|
+
#ifndef HAVE_ENDPOS_ARGUMENT
|
|
1984
|
+
/* Old RE2's Match() takes int startpos. Reject values that would overflow. */
|
|
1985
|
+
if (startpos > INT_MAX) {
|
|
1986
|
+
rb_raise(rb_eRangeError, "startpos should be <= %d", INT_MAX);
|
|
1987
|
+
}
|
|
1514
1988
|
#endif
|
|
1989
|
+
|
|
1990
|
+
if (n == 0) {
|
|
1991
|
+
bool matched = re2_match_without_gvl(
|
|
1992
|
+
p->pattern, text, startpos, endpos, anchor, 0, 0);
|
|
1993
|
+
RB_GC_GUARD(text);
|
|
1994
|
+
|
|
1515
1995
|
return BOOL2RUBY(matched);
|
|
1516
1996
|
} else {
|
|
1517
1997
|
if (n == INT_MAX) {
|
|
@@ -1522,22 +2002,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1522
2002
|
n += 1;
|
|
1523
2003
|
|
|
1524
2004
|
re2::StringPiece *matches = new(std::nothrow) re2::StringPiece[n];
|
|
1525
|
-
if (matches ==
|
|
2005
|
+
if (matches == nullptr) {
|
|
1526
2006
|
rb_raise(rb_eNoMemError,
|
|
1527
2007
|
"not enough memory to allocate StringPieces for matches");
|
|
1528
2008
|
}
|
|
1529
2009
|
|
|
1530
|
-
|
|
2010
|
+
bool matched = re2_match_without_gvl(
|
|
2011
|
+
p->pattern, text, startpos, endpos, anchor, matches, n);
|
|
2012
|
+
RB_GC_GUARD(text);
|
|
1531
2013
|
|
|
1532
|
-
#ifdef HAVE_ENDPOS_ARGUMENT
|
|
1533
|
-
bool matched = p->pattern->Match(
|
|
1534
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1535
|
-
startpos, endpos, anchor, matches, n);
|
|
1536
|
-
#else
|
|
1537
|
-
bool matched = p->pattern->Match(
|
|
1538
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
1539
|
-
startpos, anchor, matches, n);
|
|
1540
|
-
#endif
|
|
1541
2014
|
if (matched) {
|
|
1542
2015
|
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
|
1543
2016
|
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
|
|
@@ -1561,19 +2034,20 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1561
2034
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
|
1562
2035
|
* `PartialMatch`}.
|
|
1563
2036
|
*
|
|
2037
|
+
* @param [String] text the text to search
|
|
1564
2038
|
* @return [Boolean] whether the match was successful
|
|
1565
2039
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1566
2040
|
*/
|
|
1567
2041
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
1568
|
-
re2_pattern *p;
|
|
1569
|
-
|
|
1570
|
-
/* Ensure text is a string. */
|
|
1571
2042
|
StringValue(text);
|
|
2043
|
+
text = rb_str_new_frozen(text);
|
|
1572
2044
|
|
|
1573
|
-
|
|
2045
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
2046
|
+
bool matched = re2_match_without_gvl(
|
|
2047
|
+
p->pattern, text, 0, RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
|
2048
|
+
RB_GC_GUARD(text);
|
|
1574
2049
|
|
|
1575
|
-
return BOOL2RUBY(
|
|
1576
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
2050
|
+
return BOOL2RUBY(matched);
|
|
1577
2051
|
}
|
|
1578
2052
|
|
|
1579
2053
|
/*
|
|
@@ -1581,19 +2055,20 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
|
1581
2055
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
|
1582
2056
|
* `FullMatch`}.
|
|
1583
2057
|
*
|
|
2058
|
+
* @param [String] text the text to search
|
|
1584
2059
|
* @return [Boolean] whether the match was successful
|
|
1585
2060
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1586
2061
|
*/
|
|
1587
2062
|
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
1588
|
-
re2_pattern *p;
|
|
1589
|
-
|
|
1590
|
-
/* Ensure text is a string. */
|
|
1591
2063
|
StringValue(text);
|
|
2064
|
+
text = rb_str_new_frozen(text);
|
|
1592
2065
|
|
|
1593
|
-
|
|
2066
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
2067
|
+
bool matched = re2_match_without_gvl(
|
|
2068
|
+
p->pattern, text, 0, RSTRING_LEN(text), RE2::ANCHOR_BOTH, 0, 0);
|
|
2069
|
+
RB_GC_GUARD(text);
|
|
1594
2070
|
|
|
1595
|
-
return BOOL2RUBY(
|
|
1596
|
-
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
2071
|
+
return BOOL2RUBY(matched);
|
|
1597
2072
|
}
|
|
1598
2073
|
|
|
1599
2074
|
/*
|
|
@@ -1609,21 +2084,19 @@ static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
|
1609
2084
|
* #=> #<RE2::Scanner:0x0000000000000001>
|
|
1610
2085
|
*/
|
|
1611
2086
|
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1612
|
-
/* Ensure text is a string. */
|
|
1613
2087
|
StringValue(text);
|
|
2088
|
+
text = rb_str_new_frozen(text);
|
|
1614
2089
|
|
|
1615
|
-
re2_pattern *p;
|
|
2090
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1616
2091
|
re2_scanner *c;
|
|
1617
|
-
|
|
1618
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1619
2092
|
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
|
1620
2093
|
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
|
1621
2094
|
|
|
1622
2095
|
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
|
1623
|
-
RB_OBJ_WRITE(scanner, &c->text,
|
|
2096
|
+
RB_OBJ_WRITE(scanner, &c->text, text);
|
|
1624
2097
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
1625
2098
|
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
|
1626
|
-
if (c->input ==
|
|
2099
|
+
if (c->input == nullptr) {
|
|
1627
2100
|
rb_raise(rb_eNoMemError,
|
|
1628
2101
|
"not enough memory to allocate StringPiece for input");
|
|
1629
2102
|
}
|
|
@@ -1675,40 +2148,59 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
|
|
1675
2148
|
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
|
1676
2149
|
* {RE2::Regexp}) cannot be coerced to `String`s
|
|
1677
2150
|
* @example
|
|
1678
|
-
* RE2.
|
|
2151
|
+
* RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
|
|
1679
2152
|
* re2 = RE2::Regexp.new("hel+o")
|
|
1680
|
-
* RE2.
|
|
2153
|
+
* RE2.replace("hello there", re2, "yo") #=> "yo there"
|
|
1681
2154
|
*/
|
|
1682
|
-
static VALUE
|
|
2155
|
+
static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
1683
2156
|
VALUE rewrite) {
|
|
1684
|
-
|
|
1685
|
-
StringValue(rewrite);
|
|
2157
|
+
re2_pattern *p = nullptr;
|
|
1686
2158
|
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
* RE2::Replace.
|
|
2159
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2160
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2161
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
1691
2162
|
*/
|
|
1692
2163
|
StringValue(str);
|
|
2164
|
+
str = rb_str_new_frozen(str);
|
|
2165
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2166
|
+
p = unwrap_re2_regexp(pattern);
|
|
2167
|
+
} else {
|
|
2168
|
+
StringValue(pattern);
|
|
2169
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2170
|
+
}
|
|
2171
|
+
StringValue(rewrite);
|
|
2172
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
2173
|
+
|
|
2174
|
+
/* Take a copy of str so it can be modified in-place by RE2::Replace. */
|
|
1693
2175
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
1694
2176
|
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
2177
|
+
nogvl_replace_arg arg;
|
|
2178
|
+
arg.str = &str_as_string;
|
|
2179
|
+
if (p) {
|
|
2180
|
+
arg.pattern = p->pattern;
|
|
2181
|
+
} else {
|
|
2182
|
+
arg.pattern = nullptr;
|
|
2183
|
+
arg.string_pattern = re2::StringPiece(
|
|
2184
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2185
|
+
}
|
|
2186
|
+
arg.rewrite = re2::StringPiece(
|
|
2187
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2188
|
+
|
|
2189
|
+
#ifdef _WIN32
|
|
2190
|
+
nogvl_replace(&arg);
|
|
2191
|
+
#else
|
|
2192
|
+
rb_thread_call_without_gvl(nogvl_replace, &arg, NULL, NULL);
|
|
2193
|
+
#endif
|
|
1700
2194
|
|
|
2195
|
+
RB_GC_GUARD(rewrite);
|
|
2196
|
+
RB_GC_GUARD(pattern);
|
|
2197
|
+
|
|
2198
|
+
if (p) {
|
|
1701
2199
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
1702
2200
|
p->pattern->options().encoding());
|
|
1703
2201
|
} else {
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
RE2::Replace(&str_as_string,
|
|
1708
|
-
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
1709
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
1710
|
-
|
|
1711
|
-
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
|
2202
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2203
|
+
RE2::Options::EncodingUTF8);
|
|
1712
2204
|
}
|
|
1713
2205
|
}
|
|
1714
2206
|
|
|
@@ -1729,38 +2221,136 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
|
1729
2221
|
* @return [String] the resulting string
|
|
1730
2222
|
* @example
|
|
1731
2223
|
* re2 = RE2::Regexp.new("oo?")
|
|
1732
|
-
* RE2.
|
|
1733
|
-
* RE2.
|
|
2224
|
+
* RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
|
|
2225
|
+
* RE2.global_replace("hello there", "e", "i") #=> "hillo thiri"
|
|
1734
2226
|
*/
|
|
1735
|
-
static VALUE
|
|
2227
|
+
static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
1736
2228
|
VALUE rewrite) {
|
|
1737
|
-
|
|
2229
|
+
re2_pattern *p = nullptr;
|
|
2230
|
+
|
|
2231
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2232
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2233
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2234
|
+
*/
|
|
2235
|
+
StringValue(str);
|
|
2236
|
+
str = rb_str_new_frozen(str);
|
|
2237
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2238
|
+
p = unwrap_re2_regexp(pattern);
|
|
2239
|
+
} else {
|
|
2240
|
+
StringValue(pattern);
|
|
2241
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2242
|
+
}
|
|
1738
2243
|
StringValue(rewrite);
|
|
2244
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
1739
2245
|
|
|
1740
2246
|
/* Take a copy of str so it can be modified in-place by
|
|
1741
2247
|
* RE2::GlobalReplace.
|
|
1742
2248
|
*/
|
|
1743
|
-
re2_pattern *p;
|
|
1744
|
-
StringValue(str);
|
|
1745
2249
|
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
1746
2250
|
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
2251
|
+
nogvl_replace_arg arg;
|
|
2252
|
+
arg.str = &str_as_string;
|
|
2253
|
+
if (p) {
|
|
2254
|
+
arg.pattern = p->pattern;
|
|
2255
|
+
} else {
|
|
2256
|
+
arg.pattern = nullptr;
|
|
2257
|
+
arg.string_pattern = re2::StringPiece(
|
|
2258
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2259
|
+
}
|
|
2260
|
+
arg.rewrite = re2::StringPiece(
|
|
2261
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
1752
2262
|
|
|
2263
|
+
#ifdef _WIN32
|
|
2264
|
+
nogvl_global_replace(&arg);
|
|
2265
|
+
#else
|
|
2266
|
+
rb_thread_call_without_gvl(nogvl_global_replace, &arg, NULL, NULL);
|
|
2267
|
+
#endif
|
|
2268
|
+
|
|
2269
|
+
RB_GC_GUARD(rewrite);
|
|
2270
|
+
RB_GC_GUARD(pattern);
|
|
2271
|
+
|
|
2272
|
+
if (p) {
|
|
1753
2273
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
1754
2274
|
p->pattern->options().encoding());
|
|
1755
2275
|
} else {
|
|
1756
|
-
|
|
2276
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
|
2277
|
+
RE2::Options::EncodingUTF8);
|
|
2278
|
+
}
|
|
2279
|
+
}
|
|
2280
|
+
|
|
2281
|
+
/*
|
|
2282
|
+
* If `pattern` matches `text`, returns a copy of `rewrite` with substitutions
|
|
2283
|
+
* using
|
|
2284
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L499-L510
|
|
2285
|
+
* `Extract`}. Non-matching portions of `text` are ignored.
|
|
2286
|
+
*
|
|
2287
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
2288
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
2289
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
2290
|
+
*
|
|
2291
|
+
* @param [String] text the string from which to extract
|
|
2292
|
+
* @param [String, RE2::Regexp] pattern a regexp matching the text
|
|
2293
|
+
* @param [String] rewrite the rewrite string with `\1`-style substitutions
|
|
2294
|
+
* @return [String, nil] the extracted string on a successful match or nil if
|
|
2295
|
+
* there is no match
|
|
2296
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
|
2297
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
|
2298
|
+
* @example
|
|
2299
|
+
* RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
|
|
2300
|
+
* #=> "example-alice"
|
|
2301
|
+
* RE2.extract("no match", '(\d+)', '\1') #=> nil
|
|
2302
|
+
*/
|
|
2303
|
+
static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
2304
|
+
VALUE rewrite) {
|
|
2305
|
+
re2_pattern *p = nullptr;
|
|
2306
|
+
|
|
2307
|
+
/* Coerce and freeze all arguments before any C++ allocations so that any
|
|
2308
|
+
* Ruby exceptions (via longjmp) cannot bypass C++ destructors and leak
|
|
2309
|
+
* memory, and later coercions cannot mutate earlier strings.
|
|
2310
|
+
*/
|
|
2311
|
+
StringValue(text);
|
|
2312
|
+
text = rb_str_new_frozen(text);
|
|
2313
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2314
|
+
p = unwrap_re2_regexp(pattern);
|
|
2315
|
+
} else {
|
|
1757
2316
|
StringValue(pattern);
|
|
2317
|
+
pattern = rb_str_new_frozen(pattern);
|
|
2318
|
+
}
|
|
2319
|
+
StringValue(rewrite);
|
|
2320
|
+
rewrite = rb_str_new_frozen(rewrite);
|
|
1758
2321
|
|
|
1759
|
-
|
|
1760
|
-
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
|
1761
|
-
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
2322
|
+
std::string out;
|
|
1762
2323
|
|
|
1763
|
-
|
|
2324
|
+
nogvl_extract_arg arg;
|
|
2325
|
+
arg.text = re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text));
|
|
2326
|
+
if (p) {
|
|
2327
|
+
arg.pattern = p->pattern;
|
|
2328
|
+
} else {
|
|
2329
|
+
arg.pattern = nullptr;
|
|
2330
|
+
arg.string_pattern = re2::StringPiece(
|
|
2331
|
+
RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
|
2332
|
+
}
|
|
2333
|
+
arg.rewrite = re2::StringPiece(
|
|
2334
|
+
RSTRING_PTR(rewrite), RSTRING_LEN(rewrite));
|
|
2335
|
+
arg.out = &out;
|
|
2336
|
+
arg.extracted = false;
|
|
2337
|
+
|
|
2338
|
+
#ifdef _WIN32
|
|
2339
|
+
nogvl_extract(&arg);
|
|
2340
|
+
#else
|
|
2341
|
+
rb_thread_call_without_gvl(nogvl_extract, &arg, NULL, NULL);
|
|
2342
|
+
#endif
|
|
2343
|
+
|
|
2344
|
+
RB_GC_GUARD(text);
|
|
2345
|
+
RB_GC_GUARD(rewrite);
|
|
2346
|
+
RB_GC_GUARD(pattern);
|
|
2347
|
+
|
|
2348
|
+
if (arg.extracted) {
|
|
2349
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2350
|
+
p ? p->pattern->options().encoding()
|
|
2351
|
+
: RE2::Options::EncodingUTF8);
|
|
2352
|
+
} else {
|
|
2353
|
+
return Qnil;
|
|
1764
2354
|
}
|
|
1765
2355
|
}
|
|
1766
2356
|
|
|
@@ -1775,9 +2365,12 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
|
1775
2365
|
* @raise [TypeError] if the given unquoted string cannot be coerced to a `String`
|
|
1776
2366
|
* @return [String] the escaped string
|
|
1777
2367
|
* @example
|
|
1778
|
-
* RE2
|
|
2368
|
+
* RE2.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2369
|
+
* RE2.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2370
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2371
|
+
* RE2::Regexp.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
1779
2372
|
*/
|
|
1780
|
-
static VALUE
|
|
2373
|
+
static VALUE re2_escape(VALUE, VALUE unquoted) {
|
|
1781
2374
|
StringValue(unquoted);
|
|
1782
2375
|
|
|
1783
2376
|
std::string quoted_string = RE2::QuoteMeta(
|
|
@@ -1787,7 +2380,7 @@ static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
|
|
1787
2380
|
}
|
|
1788
2381
|
|
|
1789
2382
|
static void re2_set_free(void *ptr) {
|
|
1790
|
-
re2_set *s =
|
|
2383
|
+
re2_set *s = static_cast<re2_set *>(ptr);
|
|
1791
2384
|
if (s->set) {
|
|
1792
2385
|
delete s->set;
|
|
1793
2386
|
}
|
|
@@ -1795,7 +2388,7 @@ static void re2_set_free(void *ptr) {
|
|
|
1795
2388
|
}
|
|
1796
2389
|
|
|
1797
2390
|
static size_t re2_set_memsize(const void *ptr) {
|
|
1798
|
-
const re2_set *s =
|
|
2391
|
+
const re2_set *s = static_cast<const re2_set *>(ptr);
|
|
1799
2392
|
size_t size = sizeof(*s);
|
|
1800
2393
|
if (s->set) {
|
|
1801
2394
|
size += sizeof(*s->set);
|
|
@@ -1815,9 +2408,18 @@ static const rb_data_type_t re2_set_data_type = {
|
|
|
1815
2408
|
0,
|
|
1816
2409
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
|
1817
2410
|
// macro to update VALUE references, as to trigger write barriers.
|
|
1818
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
2411
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
|
|
1819
2412
|
};
|
|
1820
2413
|
|
|
2414
|
+
static re2_set *unwrap_re2_set(VALUE self) {
|
|
2415
|
+
re2_set *s;
|
|
2416
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2417
|
+
if (!s->set) {
|
|
2418
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::Set");
|
|
2419
|
+
}
|
|
2420
|
+
return s;
|
|
2421
|
+
}
|
|
2422
|
+
|
|
1821
2423
|
static VALUE re2_set_allocate(VALUE klass) {
|
|
1822
2424
|
re2_set *s;
|
|
1823
2425
|
VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
|
|
@@ -1825,6 +2427,10 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
|
1825
2427
|
return result;
|
|
1826
2428
|
}
|
|
1827
2429
|
|
|
2430
|
+
static VALUE re2_set_initialize_copy(VALUE, VALUE) {
|
|
2431
|
+
rb_raise(rb_eTypeError, "cannot copy RE2::Set");
|
|
2432
|
+
}
|
|
2433
|
+
|
|
1828
2434
|
/*
|
|
1829
2435
|
* Returns a new {RE2::Set} object, a collection of patterns that can be
|
|
1830
2436
|
* searched for simultaneously.
|
|
@@ -1895,8 +2501,15 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1895
2501
|
parse_re2_options(&re2_options, options);
|
|
1896
2502
|
}
|
|
1897
2503
|
|
|
2504
|
+
rb_check_frozen(self);
|
|
2505
|
+
|
|
2506
|
+
if (s->set) {
|
|
2507
|
+
delete s->set;
|
|
2508
|
+
s->set = nullptr;
|
|
2509
|
+
}
|
|
2510
|
+
|
|
1898
2511
|
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
|
1899
|
-
if (s->set ==
|
|
2512
|
+
if (s->set == nullptr) {
|
|
1900
2513
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
|
1901
2514
|
}
|
|
1902
2515
|
|
|
@@ -1919,8 +2532,8 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1919
2532
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1920
2533
|
StringValue(pattern);
|
|
1921
2534
|
|
|
1922
|
-
re2_set *s;
|
|
1923
|
-
|
|
2535
|
+
re2_set *s = unwrap_re2_set(self);
|
|
2536
|
+
rb_check_frozen(self);
|
|
1924
2537
|
|
|
1925
2538
|
int index;
|
|
1926
2539
|
VALUE msg;
|
|
@@ -1951,10 +2564,16 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
|
1951
2564
|
* set.compile #=> true
|
|
1952
2565
|
*/
|
|
1953
2566
|
static VALUE re2_set_compile(VALUE self) {
|
|
1954
|
-
re2_set *s;
|
|
1955
|
-
|
|
2567
|
+
re2_set *s = unwrap_re2_set(self);
|
|
2568
|
+
rb_check_frozen(self);
|
|
1956
2569
|
|
|
1957
|
-
|
|
2570
|
+
bool compiled = s->set->Compile();
|
|
2571
|
+
|
|
2572
|
+
if (compiled) {
|
|
2573
|
+
rb_obj_freeze(self);
|
|
2574
|
+
}
|
|
2575
|
+
|
|
2576
|
+
return BOOL2RUBY(compiled);
|
|
1958
2577
|
}
|
|
1959
2578
|
|
|
1960
2579
|
/*
|
|
@@ -1968,8 +2587,7 @@ static VALUE re2_set_compile(VALUE self) {
|
|
|
1968
2587
|
*/
|
|
1969
2588
|
static VALUE re2_set_size(VALUE self) {
|
|
1970
2589
|
#ifdef HAVE_SET_SIZE
|
|
1971
|
-
re2_set *s;
|
|
1972
|
-
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2590
|
+
re2_set *s = unwrap_re2_set(self);
|
|
1973
2591
|
|
|
1974
2592
|
return INT2FIX(s->set->Size());
|
|
1975
2593
|
#else
|
|
@@ -2052,8 +2670,9 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2052
2670
|
rb_scan_args(argc, argv, "11", &str, &options);
|
|
2053
2671
|
|
|
2054
2672
|
StringValue(str);
|
|
2055
|
-
|
|
2056
|
-
|
|
2673
|
+
str = rb_str_new_frozen(str);
|
|
2674
|
+
|
|
2675
|
+
re2_set *s = unwrap_re2_set(self);
|
|
2057
2676
|
|
|
2058
2677
|
if (RTEST(options)) {
|
|
2059
2678
|
Check_Type(options, T_HASH);
|
|
@@ -2069,8 +2688,21 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2069
2688
|
if (raise_exception) {
|
|
2070
2689
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
2071
2690
|
RE2::Set::ErrorInfo e;
|
|
2072
|
-
|
|
2073
|
-
|
|
2691
|
+
nogvl_set_match_arg arg;
|
|
2692
|
+
arg.set = s->set;
|
|
2693
|
+
arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2694
|
+
arg.v = &v;
|
|
2695
|
+
arg.error_info = &e;
|
|
2696
|
+
arg.matched = false;
|
|
2697
|
+
|
|
2698
|
+
#ifdef _WIN32
|
|
2699
|
+
nogvl_set_match(&arg);
|
|
2700
|
+
#else
|
|
2701
|
+
rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
|
|
2702
|
+
#endif
|
|
2703
|
+
RB_GC_GUARD(str);
|
|
2704
|
+
|
|
2705
|
+
bool match_failed = !arg.matched;
|
|
2074
2706
|
VALUE result = rb_ary_new2(v.size());
|
|
2075
2707
|
|
|
2076
2708
|
if (match_failed) {
|
|
@@ -2087,8 +2719,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2087
2719
|
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
|
2088
2720
|
}
|
|
2089
2721
|
} else {
|
|
2090
|
-
for (
|
|
2091
|
-
rb_ary_push(result, INT2FIX(
|
|
2722
|
+
for (int index : v) {
|
|
2723
|
+
rb_ary_push(result, INT2FIX(index));
|
|
2092
2724
|
}
|
|
2093
2725
|
}
|
|
2094
2726
|
|
|
@@ -2097,13 +2729,27 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2097
2729
|
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
|
2098
2730
|
#endif
|
|
2099
2731
|
} else {
|
|
2100
|
-
|
|
2101
|
-
|
|
2732
|
+
nogvl_set_match_arg arg;
|
|
2733
|
+
arg.set = s->set;
|
|
2734
|
+
arg.text = re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
2735
|
+
arg.v = &v;
|
|
2736
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
|
2737
|
+
arg.error_info = nullptr;
|
|
2738
|
+
#endif
|
|
2739
|
+
arg.matched = false;
|
|
2740
|
+
|
|
2741
|
+
#ifdef _WIN32
|
|
2742
|
+
nogvl_set_match(&arg);
|
|
2743
|
+
#else
|
|
2744
|
+
rb_thread_call_without_gvl(nogvl_set_match, &arg, NULL, NULL);
|
|
2745
|
+
#endif
|
|
2746
|
+
RB_GC_GUARD(str);
|
|
2747
|
+
|
|
2102
2748
|
VALUE result = rb_ary_new2(v.size());
|
|
2103
2749
|
|
|
2104
|
-
if (matched) {
|
|
2105
|
-
for (
|
|
2106
|
-
rb_ary_push(result, INT2FIX(
|
|
2750
|
+
if (arg.matched) {
|
|
2751
|
+
for (int index : v) {
|
|
2752
|
+
rb_ary_push(result, INT2FIX(index));
|
|
2107
2753
|
}
|
|
2108
2754
|
}
|
|
2109
2755
|
|
|
@@ -2112,6 +2758,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2112
2758
|
}
|
|
2113
2759
|
|
|
2114
2760
|
extern "C" void Init_re2(void) {
|
|
2761
|
+
rb_ext_ractor_safe(true);
|
|
2762
|
+
|
|
2115
2763
|
re2_mRE2 = rb_define_module("RE2");
|
|
2116
2764
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
|
2117
2765
|
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
|
@@ -2147,6 +2795,14 @@ extern "C" void Init_re2(void) {
|
|
|
2147
2795
|
RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
|
|
2148
2796
|
rb_define_method(re2_cMatchData, "end",
|
|
2149
2797
|
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
|
2798
|
+
rb_define_method(re2_cMatchData, "pre_match",
|
|
2799
|
+
RUBY_METHOD_FUNC(re2_matchdata_pre_match), 0);
|
|
2800
|
+
rb_define_method(re2_cMatchData, "post_match",
|
|
2801
|
+
RUBY_METHOD_FUNC(re2_matchdata_post_match), 0);
|
|
2802
|
+
rb_define_method(re2_cMatchData, "offset",
|
|
2803
|
+
RUBY_METHOD_FUNC(re2_matchdata_offset), 1);
|
|
2804
|
+
rb_define_method(re2_cMatchData, "match_length",
|
|
2805
|
+
RUBY_METHOD_FUNC(re2_matchdata_match_length), 1);
|
|
2150
2806
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
|
2151
2807
|
-1);
|
|
2152
2808
|
rb_define_method(re2_cMatchData, "to_s",
|
|
@@ -2155,8 +2811,18 @@ extern "C" void Init_re2(void) {
|
|
|
2155
2811
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
|
2156
2812
|
rb_define_method(re2_cMatchData, "deconstruct",
|
|
2157
2813
|
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
|
2814
|
+
rb_define_method(re2_cMatchData, "captures",
|
|
2815
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
|
2816
|
+
rb_define_method(re2_cMatchData, "named_captures",
|
|
2817
|
+
RUBY_METHOD_FUNC(re2_matchdata_named_captures), -1);
|
|
2818
|
+
rb_define_method(re2_cMatchData, "names",
|
|
2819
|
+
RUBY_METHOD_FUNC(re2_matchdata_names), 0);
|
|
2820
|
+
rb_define_method(re2_cMatchData, "values_at",
|
|
2821
|
+
RUBY_METHOD_FUNC(re2_matchdata_values_at), -1);
|
|
2158
2822
|
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
|
2159
2823
|
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
|
2824
|
+
rb_define_method(re2_cMatchData, "initialize_copy",
|
|
2825
|
+
RUBY_METHOD_FUNC(re2_matchdata_initialize_copy), 1);
|
|
2160
2826
|
|
|
2161
2827
|
rb_define_method(re2_cScanner, "string",
|
|
2162
2828
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
|
@@ -2168,11 +2834,15 @@ extern "C" void Init_re2(void) {
|
|
|
2168
2834
|
RUBY_METHOD_FUNC(re2_scanner_scan), 0);
|
|
2169
2835
|
rb_define_method(re2_cScanner, "rewind",
|
|
2170
2836
|
RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
|
|
2837
|
+
rb_define_method(re2_cScanner, "initialize_copy",
|
|
2838
|
+
RUBY_METHOD_FUNC(re2_scanner_initialize_copy), 1);
|
|
2171
2839
|
|
|
2172
2840
|
rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
|
|
2173
2841
|
RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
|
|
2174
2842
|
rb_define_method(re2_cRegexp, "initialize",
|
|
2175
2843
|
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
|
2844
|
+
rb_define_method(re2_cRegexp, "initialize_copy",
|
|
2845
|
+
RUBY_METHOD_FUNC(re2_regexp_initialize_copy), 1);
|
|
2176
2846
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
|
2177
2847
|
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
|
2178
2848
|
0);
|
|
@@ -2186,6 +2856,10 @@ extern "C" void Init_re2(void) {
|
|
|
2186
2856
|
RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
|
|
2187
2857
|
rb_define_method(re2_cRegexp, "named_capturing_groups",
|
|
2188
2858
|
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
|
2859
|
+
rb_define_method(re2_cRegexp, "named_captures",
|
|
2860
|
+
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
|
2861
|
+
rb_define_method(re2_cRegexp, "names",
|
|
2862
|
+
RUBY_METHOD_FUNC(re2_regexp_names), 0);
|
|
2189
2863
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
|
2190
2864
|
-1);
|
|
2191
2865
|
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
|
@@ -2240,22 +2914,34 @@ extern "C" void Init_re2(void) {
|
|
|
2240
2914
|
RUBY_METHOD_FUNC(re2_set_size_p), 0);
|
|
2241
2915
|
rb_define_method(re2_cSet, "initialize",
|
|
2242
2916
|
RUBY_METHOD_FUNC(re2_set_initialize), -1);
|
|
2917
|
+
rb_define_method(re2_cSet, "initialize_copy",
|
|
2918
|
+
RUBY_METHOD_FUNC(re2_set_initialize_copy), 1);
|
|
2243
2919
|
rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
|
|
2244
2920
|
rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
|
|
2245
2921
|
rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
|
|
2246
2922
|
rb_define_method(re2_cSet, "size", RUBY_METHOD_FUNC(re2_set_size), 0);
|
|
2247
2923
|
rb_define_method(re2_cSet, "length", RUBY_METHOD_FUNC(re2_set_size), 0);
|
|
2248
2924
|
|
|
2925
|
+
rb_define_module_function(re2_mRE2, "replace",
|
|
2926
|
+
RUBY_METHOD_FUNC(re2_replace), 3);
|
|
2249
2927
|
rb_define_module_function(re2_mRE2, "Replace",
|
|
2250
|
-
RUBY_METHOD_FUNC(
|
|
2928
|
+
RUBY_METHOD_FUNC(re2_replace), 3);
|
|
2929
|
+
rb_define_module_function(re2_mRE2, "global_replace",
|
|
2930
|
+
RUBY_METHOD_FUNC(re2_global_replace), 3);
|
|
2251
2931
|
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
|
2252
|
-
RUBY_METHOD_FUNC(
|
|
2932
|
+
RUBY_METHOD_FUNC(re2_global_replace), 3);
|
|
2933
|
+
rb_define_module_function(re2_mRE2, "extract",
|
|
2934
|
+
RUBY_METHOD_FUNC(re2_extract), 3);
|
|
2253
2935
|
rb_define_module_function(re2_mRE2, "QuoteMeta",
|
|
2254
|
-
RUBY_METHOD_FUNC(
|
|
2936
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2937
|
+
rb_define_module_function(re2_mRE2, "escape",
|
|
2938
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2939
|
+
rb_define_module_function(re2_mRE2, "quote",
|
|
2940
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2255
2941
|
rb_define_singleton_method(re2_cRegexp, "escape",
|
|
2256
|
-
RUBY_METHOD_FUNC(
|
|
2942
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2257
2943
|
rb_define_singleton_method(re2_cRegexp, "quote",
|
|
2258
|
-
RUBY_METHOD_FUNC(
|
|
2944
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2259
2945
|
|
|
2260
2946
|
// (see RE2::Regexp#initialize)
|
|
2261
2947
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
|
@@ -2283,4 +2969,5 @@ extern "C" void Init_re2(void) {
|
|
|
2283
2969
|
id_submatches = rb_intern("submatches");
|
|
2284
2970
|
id_startpos = rb_intern("startpos");
|
|
2285
2971
|
id_endpos = rb_intern("endpos");
|
|
2972
|
+
id_symbolize_names = rb_intern("symbolize_names");
|
|
2286
2973
|
}
|