re2 2.23.0-arm-linux-gnu → 2.25.0-arm-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +107 -4
- data/dependencies.yml +2 -2
- data/ext/re2/extconf.rb +1 -1
- data/ext/re2/re2.cc +587 -155
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/4.0/re2.so +0 -0
- data/lib/re2/string.rb +6 -6
- data/lib/re2/version.rb +1 -1
- data/spec/re2/match_data_spec.rb +409 -0
- data/spec/re2/regexp_spec.rb +233 -1
- data/spec/re2/scanner_spec.rb +66 -0
- data/spec/re2/set_spec.rb +36 -0
- data/spec/re2_spec.rb +145 -43
- metadata +1 -1
data/ext/re2/re2.cc
CHANGED
|
@@ -51,7 +51,7 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
|
|
51
51
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
|
52
52
|
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
|
53
53
|
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
|
54
|
-
id_submatches, id_startpos, id_endpos;
|
|
54
|
+
id_submatches, id_startpos, id_endpos, id_symbolize_names;
|
|
55
55
|
|
|
56
56
|
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
|
57
57
|
if (encoding == RE2::Options::EncodingUTF8) {
|
|
@@ -128,6 +128,10 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
|
128
128
|
static void re2_matchdata_mark(void *ptr) {
|
|
129
129
|
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
|
130
130
|
rb_gc_mark_movable(m->regexp);
|
|
131
|
+
|
|
132
|
+
/* Text must not be movable because StringPiece matches hold pointers into
|
|
133
|
+
* its underlying buffer; moving the string would invalidate them.
|
|
134
|
+
*/
|
|
131
135
|
rb_gc_mark(m->text);
|
|
132
136
|
}
|
|
133
137
|
|
|
@@ -172,6 +176,10 @@ static const rb_data_type_t re2_matchdata_data_type = {
|
|
|
172
176
|
static void re2_scanner_mark(void *ptr) {
|
|
173
177
|
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
|
174
178
|
rb_gc_mark_movable(s->regexp);
|
|
179
|
+
|
|
180
|
+
/* Text must not be movable because the StringPiece input holds a pointer
|
|
181
|
+
* into its underlying buffer; moving the string would invalidate it.
|
|
182
|
+
*/
|
|
175
183
|
rb_gc_mark(s->text);
|
|
176
184
|
}
|
|
177
185
|
|
|
@@ -245,6 +253,61 @@ static const rb_data_type_t re2_regexp_data_type = {
|
|
|
245
253
|
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
246
254
|
};
|
|
247
255
|
|
|
256
|
+
static re2_pattern *unwrap_re2_regexp(VALUE self) {
|
|
257
|
+
re2_pattern *p;
|
|
258
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
259
|
+
if (!p->pattern) {
|
|
260
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::Regexp");
|
|
261
|
+
}
|
|
262
|
+
return p;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
static re2_matchdata *unwrap_re2_matchdata(VALUE self) {
|
|
266
|
+
re2_matchdata *m;
|
|
267
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
268
|
+
if (!RTEST(m->regexp)) {
|
|
269
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::MatchData");
|
|
270
|
+
}
|
|
271
|
+
return m;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
static re2_scanner *unwrap_re2_scanner(VALUE self) {
|
|
275
|
+
re2_scanner *c;
|
|
276
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
277
|
+
if (!RTEST(c->regexp)) {
|
|
278
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::Scanner");
|
|
279
|
+
}
|
|
280
|
+
return c;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/*
|
|
284
|
+
* Returns an array of names of all named capturing groups. Names are returned
|
|
285
|
+
* in alphabetical order rather than definition order, as RE2 stores named
|
|
286
|
+
* groups internally in a sorted map.
|
|
287
|
+
*
|
|
288
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
289
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
290
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
291
|
+
*
|
|
292
|
+
* @return [Array<String>] an array of names of named capturing groups
|
|
293
|
+
* @example
|
|
294
|
+
* RE2::Regexp.new('(?P<a>\d+) (?P<b>\w+)').names #=> ["a", "b"]
|
|
295
|
+
*/
|
|
296
|
+
static VALUE re2_regexp_names(const VALUE self) {
|
|
297
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
298
|
+
|
|
299
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
300
|
+
VALUE names = rb_ary_new2(groups.size());
|
|
301
|
+
|
|
302
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
|
303
|
+
rb_ary_push(names,
|
|
304
|
+
encoded_str_new(it->first.data(), it->first.size(),
|
|
305
|
+
p->pattern->options().encoding()));
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return names;
|
|
309
|
+
}
|
|
310
|
+
|
|
248
311
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
|
249
312
|
re2_matchdata *m;
|
|
250
313
|
|
|
@@ -269,8 +332,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
|
269
332
|
* m.string #=> "bob 123"
|
|
270
333
|
*/
|
|
271
334
|
static VALUE re2_matchdata_string(const VALUE self) {
|
|
272
|
-
re2_matchdata *m;
|
|
273
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
335
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
274
336
|
|
|
275
337
|
return m->text;
|
|
276
338
|
}
|
|
@@ -287,8 +349,7 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
|
287
349
|
* c.string #=> "foo"
|
|
288
350
|
*/
|
|
289
351
|
static VALUE re2_scanner_string(const VALUE self) {
|
|
290
|
-
re2_scanner *c;
|
|
291
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
352
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
292
353
|
|
|
293
354
|
return c->text;
|
|
294
355
|
}
|
|
@@ -302,8 +363,7 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
|
302
363
|
* c.eof? #=> true
|
|
303
364
|
*/
|
|
304
365
|
static VALUE re2_scanner_eof(const VALUE self) {
|
|
305
|
-
re2_scanner *c;
|
|
306
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
366
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
307
367
|
|
|
308
368
|
return BOOL2RUBY(c->eof);
|
|
309
369
|
}
|
|
@@ -320,8 +380,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
|
320
380
|
* e.scan #=> ["1"]
|
|
321
381
|
*/
|
|
322
382
|
static VALUE re2_scanner_rewind(VALUE self) {
|
|
323
|
-
re2_scanner *c;
|
|
324
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
383
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
325
384
|
|
|
326
385
|
delete c->input;
|
|
327
386
|
c->input = new(std::nothrow) re2::StringPiece(
|
|
@@ -336,6 +395,34 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
|
336
395
|
return self;
|
|
337
396
|
}
|
|
338
397
|
|
|
398
|
+
static VALUE re2_scanner_initialize_copy(VALUE self, VALUE other) {
|
|
399
|
+
re2_scanner *self_c;
|
|
400
|
+
re2_scanner *other_c = unwrap_re2_scanner(other);
|
|
401
|
+
|
|
402
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, self_c);
|
|
403
|
+
|
|
404
|
+
if (self_c->input) {
|
|
405
|
+
delete self_c->input;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
RB_OBJ_WRITE(self, &self_c->regexp, other_c->regexp);
|
|
409
|
+
RB_OBJ_WRITE(self, &self_c->text, other_c->text);
|
|
410
|
+
self_c->number_of_capturing_groups = other_c->number_of_capturing_groups;
|
|
411
|
+
self_c->eof = other_c->eof;
|
|
412
|
+
|
|
413
|
+
if (other_c->input) {
|
|
414
|
+
self_c->input = new(std::nothrow) re2::StringPiece(*other_c->input);
|
|
415
|
+
if (self_c->input == 0) {
|
|
416
|
+
rb_raise(rb_eNoMemError,
|
|
417
|
+
"not enough memory to allocate StringPiece for input");
|
|
418
|
+
}
|
|
419
|
+
} else {
|
|
420
|
+
self_c->input = NULL;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
return self;
|
|
424
|
+
}
|
|
425
|
+
|
|
339
426
|
/*
|
|
340
427
|
* Scan the given text incrementally for matches using
|
|
341
428
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
|
@@ -356,11 +443,8 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
|
356
443
|
* s.scan #=> ["bar"]
|
|
357
444
|
*/
|
|
358
445
|
static VALUE re2_scanner_scan(VALUE self) {
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
363
|
-
TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
446
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
447
|
+
re2_pattern *p = unwrap_re2_regexp(c->regexp);
|
|
364
448
|
|
|
365
449
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
|
366
450
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
|
@@ -409,11 +493,8 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
|
409
493
|
}
|
|
410
494
|
|
|
411
495
|
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
412
|
-
re2_matchdata *m;
|
|
413
|
-
re2_pattern *p;
|
|
414
|
-
|
|
415
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
416
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
496
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
497
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
417
498
|
|
|
418
499
|
int id;
|
|
419
500
|
|
|
@@ -458,14 +539,12 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
|
458
539
|
*
|
|
459
540
|
* @return [Integer] the number of elements
|
|
460
541
|
* @example
|
|
461
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
542
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
462
543
|
* m.size #=> 2
|
|
463
544
|
* m.length #=> 2
|
|
464
545
|
*/
|
|
465
546
|
static VALUE re2_matchdata_size(const VALUE self) {
|
|
466
|
-
re2_matchdata *m;
|
|
467
|
-
|
|
468
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
547
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
469
548
|
|
|
470
549
|
return INT2FIX(m->number_of_matches);
|
|
471
550
|
}
|
|
@@ -477,14 +556,12 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
|
477
556
|
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
|
478
557
|
* there is no such submatch
|
|
479
558
|
* @example
|
|
480
|
-
* m = RE2::Regexp.new('ob (\d+)').
|
|
559
|
+
* m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
|
|
481
560
|
* m.begin(0) #=> 1
|
|
482
561
|
* m.begin(1) #=> 4
|
|
483
562
|
*/
|
|
484
563
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
485
|
-
re2_matchdata *m;
|
|
486
|
-
|
|
487
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
564
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
488
565
|
|
|
489
566
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
490
567
|
if (match == NULL) {
|
|
@@ -504,14 +581,12 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
|
504
581
|
* @return [Integer, nil] the offset of the character following the end of the
|
|
505
582
|
* match or `nil` if there is no such match
|
|
506
583
|
* @example
|
|
507
|
-
* m = RE2::Regexp.new('ob (\d+) b').
|
|
584
|
+
* m = RE2::Regexp.new('ob (\d+) b').partial_match("bob 123 bob")
|
|
508
585
|
* m.end(0) #=> 9
|
|
509
586
|
* m.end(1) #=> 7
|
|
510
587
|
*/
|
|
511
588
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
512
|
-
re2_matchdata *m;
|
|
513
|
-
|
|
514
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
589
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
515
590
|
|
|
516
591
|
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
517
592
|
if (match == NULL) {
|
|
@@ -523,17 +598,129 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
|
523
598
|
}
|
|
524
599
|
}
|
|
525
600
|
|
|
601
|
+
/*
|
|
602
|
+
* Returns the portion of the original string before the match.
|
|
603
|
+
*
|
|
604
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
605
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
606
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
607
|
+
*
|
|
608
|
+
* @return [String] the portion of the original string before the match
|
|
609
|
+
* @example
|
|
610
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
611
|
+
* m.pre_match #=> "bob "
|
|
612
|
+
*/
|
|
613
|
+
static VALUE re2_matchdata_pre_match(const VALUE self) {
|
|
614
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
615
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
616
|
+
|
|
617
|
+
re2::StringPiece *match = &m->matches[0];
|
|
618
|
+
if (match->empty()) {
|
|
619
|
+
return Qnil;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
long offset = match->data() - RSTRING_PTR(m->text);
|
|
623
|
+
|
|
624
|
+
return encoded_str_new(RSTRING_PTR(m->text), offset,
|
|
625
|
+
p->pattern->options().encoding());
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/*
|
|
629
|
+
* Returns the portion of the original string after the match.
|
|
630
|
+
*
|
|
631
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
632
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
633
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
634
|
+
*
|
|
635
|
+
* @return [String] the portion of the original string after the match
|
|
636
|
+
* @example
|
|
637
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123 456")
|
|
638
|
+
* m.post_match #=> " 456"
|
|
639
|
+
*/
|
|
640
|
+
static VALUE re2_matchdata_post_match(const VALUE self) {
|
|
641
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
642
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
643
|
+
|
|
644
|
+
re2::StringPiece *match = &m->matches[0];
|
|
645
|
+
if (match->empty()) {
|
|
646
|
+
return Qnil;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
long start = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
|
650
|
+
long remaining = RSTRING_LEN(m->text) - start;
|
|
651
|
+
|
|
652
|
+
return encoded_str_new(RSTRING_PTR(m->text) + start, remaining,
|
|
653
|
+
p->pattern->options().encoding());
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/*
|
|
657
|
+
* Returns a two-element array containing the beginning and ending offsets of
|
|
658
|
+
* the nth match.
|
|
659
|
+
*
|
|
660
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
|
661
|
+
* @return [Array<Integer>, nil] a two-element array with the beginning and
|
|
662
|
+
* ending offsets of the match or `nil` if there is no such match
|
|
663
|
+
* @example
|
|
664
|
+
* m = RE2::Regexp.new('ob (\d+)').partial_match("bob 123")
|
|
665
|
+
* m.offset(0) #=> [1, 7]
|
|
666
|
+
* m.offset(1) #=> [4, 7]
|
|
667
|
+
*/
|
|
668
|
+
static VALUE re2_matchdata_offset(const VALUE self, VALUE n) {
|
|
669
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
670
|
+
|
|
671
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
672
|
+
if (match == NULL) {
|
|
673
|
+
return Qnil;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
long start = match->data() - RSTRING_PTR(m->text);
|
|
677
|
+
long end_pos = start + match->size();
|
|
678
|
+
|
|
679
|
+
VALUE array = rb_ary_new2(2);
|
|
680
|
+
rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, start)));
|
|
681
|
+
rb_ary_push(array, LONG2NUM(rb_str_sublen(m->text, end_pos)));
|
|
682
|
+
|
|
683
|
+
return array;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/*
|
|
687
|
+
* Returns the length of the nth match in characters. This is equivalent to
|
|
688
|
+
* `m[n].length` but without allocating a new string.
|
|
689
|
+
*
|
|
690
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
|
691
|
+
* @return [Integer, nil] the length of the match or `nil` if there is no such
|
|
692
|
+
* match
|
|
693
|
+
* @example
|
|
694
|
+
* m = RE2::Regexp.new('(?P<word>\w+) (?P<number>\d+)').partial_match("alice 123")
|
|
695
|
+
* m.match_length(0) #=> 9
|
|
696
|
+
* m.match_length(1) #=> 5
|
|
697
|
+
* m.match_length(:number) #=> 3
|
|
698
|
+
*/
|
|
699
|
+
static VALUE re2_matchdata_match_length(const VALUE self, VALUE n) {
|
|
700
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
701
|
+
|
|
702
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
|
703
|
+
if (match == NULL) {
|
|
704
|
+
return Qnil;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
long start = match->data() - RSTRING_PTR(m->text);
|
|
708
|
+
long end_pos = start + match->size();
|
|
709
|
+
long char_len = rb_str_sublen(m->text, end_pos) - rb_str_sublen(m->text, start);
|
|
710
|
+
|
|
711
|
+
return LONG2NUM(char_len);
|
|
712
|
+
}
|
|
713
|
+
|
|
526
714
|
/*
|
|
527
715
|
* Returns the {RE2::Regexp} used in the match.
|
|
528
716
|
*
|
|
529
717
|
* @return [RE2::Regexp] the regular expression used in the match
|
|
530
718
|
* @example
|
|
531
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
719
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
532
720
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
|
533
721
|
*/
|
|
534
722
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
535
|
-
re2_matchdata *m;
|
|
536
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
723
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
537
724
|
|
|
538
725
|
return m->regexp;
|
|
539
726
|
}
|
|
@@ -547,8 +734,7 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
|
547
734
|
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
|
548
735
|
*/
|
|
549
736
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
|
550
|
-
re2_scanner *c;
|
|
551
|
-
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
|
737
|
+
re2_scanner *c = unwrap_re2_scanner(self);
|
|
552
738
|
|
|
553
739
|
return c->regexp;
|
|
554
740
|
}
|
|
@@ -569,15 +755,12 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
|
569
755
|
*
|
|
570
756
|
* @return [Array<String, nil>] the array of matches
|
|
571
757
|
* @example
|
|
572
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
758
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
573
759
|
* m.to_a #=> ["123", "123"]
|
|
574
760
|
*/
|
|
575
761
|
static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
576
|
-
re2_matchdata *m;
|
|
577
|
-
re2_pattern *p;
|
|
578
|
-
|
|
579
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
580
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
762
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
763
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
581
764
|
|
|
582
765
|
VALUE array = rb_ary_new2(m->number_of_matches);
|
|
583
766
|
for (int i = 0; i < m->number_of_matches; ++i) {
|
|
@@ -595,11 +778,8 @@ static VALUE re2_matchdata_to_a(const VALUE self) {
|
|
|
595
778
|
}
|
|
596
779
|
|
|
597
780
|
static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
598
|
-
re2_matchdata *m;
|
|
599
|
-
re2_pattern *p;
|
|
600
|
-
|
|
601
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
602
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
781
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
782
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
603
783
|
|
|
604
784
|
if (nth < 0 || nth >= m->number_of_matches) {
|
|
605
785
|
return Qnil;
|
|
@@ -616,11 +796,8 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
|
616
796
|
}
|
|
617
797
|
|
|
618
798
|
static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self) {
|
|
619
|
-
re2_matchdata *m;
|
|
620
|
-
re2_pattern *p;
|
|
621
|
-
|
|
622
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
623
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
799
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
800
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
624
801
|
|
|
625
802
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
626
803
|
std::map<std::string, int>::const_iterator search = groups.find(name);
|
|
@@ -645,7 +822,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
645
822
|
* @param [Integer] index the index of the match to fetch
|
|
646
823
|
* @return [String, nil] the specified match or `nil` if it isn't present
|
|
647
824
|
* @example
|
|
648
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
825
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
649
826
|
* m[0] #=> "123"
|
|
650
827
|
*
|
|
651
828
|
* @overload [](start, length)
|
|
@@ -655,7 +832,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
655
832
|
* @param [Integer] length the number of elements to fetch
|
|
656
833
|
* @return [Array<String, nil>] the specified matches
|
|
657
834
|
* @example
|
|
658
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
835
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
659
836
|
* m[0, 1] #=> ["123"]
|
|
660
837
|
*
|
|
661
838
|
* @overload [](range)
|
|
@@ -664,8 +841,8 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
664
841
|
* @param [Range] range the range of match indexes to fetch
|
|
665
842
|
* @return [Array<String, nil>] the specified matches
|
|
666
843
|
* @example
|
|
667
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
668
|
-
* m[0..1] #=> "
|
|
844
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
845
|
+
* m[0..1] #=> ["123", "123"]
|
|
669
846
|
*
|
|
670
847
|
* @overload [](name)
|
|
671
848
|
* Access a particular match by name.
|
|
@@ -673,7 +850,7 @@ static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self
|
|
|
673
850
|
* @param [String, Symbol] name the name of the match to fetch
|
|
674
851
|
* @return [String, nil] the specific match or `nil` if it isn't present
|
|
675
852
|
* @example
|
|
676
|
-
* m = RE2::Regexp.new('(?P<number>\d+)').
|
|
853
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
|
|
677
854
|
* m["number"] #=> "123"
|
|
678
855
|
* m[:number] #=> "123"
|
|
679
856
|
*/
|
|
@@ -697,6 +874,9 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
|
|
697
874
|
* Returns the entire matched string.
|
|
698
875
|
*
|
|
699
876
|
* @return [String] the entire matched string
|
|
877
|
+
* @example
|
|
878
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').partial_match("bob 123")
|
|
879
|
+
* m.to_s #=> "123"
|
|
700
880
|
*/
|
|
701
881
|
static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
702
882
|
return re2_matchdata_nth_match(0, self);
|
|
@@ -711,15 +891,12 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
|
711
891
|
*
|
|
712
892
|
* @return [String] a printable version of the match
|
|
713
893
|
* @example
|
|
714
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
894
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
715
895
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
|
716
896
|
*/
|
|
717
897
|
static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
718
|
-
re2_matchdata *m;
|
|
719
|
-
re2_pattern *p;
|
|
720
|
-
|
|
721
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
722
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
898
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
899
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
723
900
|
|
|
724
901
|
std::ostringstream output;
|
|
725
902
|
output << "#<RE2::MatchData";
|
|
@@ -749,7 +926,7 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
749
926
|
}
|
|
750
927
|
|
|
751
928
|
/*
|
|
752
|
-
* Returns the array of submatches
|
|
929
|
+
* Returns the array of submatches.
|
|
753
930
|
*
|
|
754
931
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
755
932
|
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
@@ -758,11 +935,12 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
758
935
|
*
|
|
759
936
|
* @return [Array<String, nil>] the array of submatches
|
|
760
937
|
* @example
|
|
761
|
-
* m = RE2::Regexp.new('(\d+)').
|
|
938
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
|
939
|
+
* m.captures #=> ["123"]
|
|
762
940
|
* m.deconstruct #=> ["123"]
|
|
763
941
|
*
|
|
764
942
|
* @example pattern matching
|
|
765
|
-
* case RE2::Regexp.new('(\d+) (\d+)').
|
|
943
|
+
* case RE2::Regexp.new('(\d+) (\d+)').partial_match("bob 123 456")
|
|
766
944
|
* in x, y
|
|
767
945
|
* puts "Matched #{x} #{y}"
|
|
768
946
|
* else
|
|
@@ -770,11 +948,8 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
|
770
948
|
* end
|
|
771
949
|
*/
|
|
772
950
|
static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
773
|
-
re2_matchdata *m;
|
|
774
|
-
re2_pattern *p;
|
|
775
|
-
|
|
776
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
777
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
951
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
952
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
778
953
|
|
|
779
954
|
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
|
780
955
|
for (int i = 1; i < m->number_of_matches; ++i) {
|
|
@@ -806,14 +981,14 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
|
806
981
|
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
|
807
982
|
* or `nil` to return all names
|
|
808
983
|
* @example
|
|
809
|
-
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').
|
|
984
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
810
985
|
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
|
811
986
|
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
|
812
987
|
* m.deconstruct_keys([:fruit]) #=> {}
|
|
813
988
|
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
|
814
989
|
*
|
|
815
990
|
* @example pattern matching
|
|
816
|
-
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').
|
|
991
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
817
992
|
* in numbers:, letters:
|
|
818
993
|
* puts "Numbers: #{numbers}, letters: #{letters}"
|
|
819
994
|
* else
|
|
@@ -821,11 +996,8 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
|
821
996
|
* end
|
|
822
997
|
*/
|
|
823
998
|
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
|
|
824
|
-
re2_matchdata *m;
|
|
825
|
-
re2_pattern *p;
|
|
826
|
-
|
|
827
|
-
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
|
828
|
-
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
|
999
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1000
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
829
1001
|
|
|
830
1002
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
831
1003
|
VALUE capturing_groups = rb_hash_new();
|
|
@@ -833,7 +1005,7 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
833
1005
|
if (NIL_P(keys)) {
|
|
834
1006
|
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
|
835
1007
|
rb_hash_aset(capturing_groups,
|
|
836
|
-
ID2SYM(
|
|
1008
|
+
ID2SYM(rb_intern2(it->first.data(), it->first.size())),
|
|
837
1009
|
re2_matchdata_nth_match(it->second, self));
|
|
838
1010
|
}
|
|
839
1011
|
} else {
|
|
@@ -858,6 +1030,150 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
|
858
1030
|
return capturing_groups;
|
|
859
1031
|
}
|
|
860
1032
|
|
|
1033
|
+
/*
|
|
1034
|
+
* Returns a hash of capturing group names to matched strings.
|
|
1035
|
+
*
|
|
1036
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1037
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1038
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1039
|
+
*
|
|
1040
|
+
* @overload named_captures
|
|
1041
|
+
* Returns a hash with string keys.
|
|
1042
|
+
*
|
|
1043
|
+
* @return [Hash] a hash of capturing group names to matching strings
|
|
1044
|
+
* @example
|
|
1045
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1046
|
+
* m.named_captures #=> {"numbers" => "123", "letters" => "abc"}
|
|
1047
|
+
*
|
|
1048
|
+
* @overload named_captures(symbolize_names:)
|
|
1049
|
+
* Returns a hash with string or symbol keys.
|
|
1050
|
+
*
|
|
1051
|
+
* @param [Boolean] symbolize_names whether to return group names as symbols
|
|
1052
|
+
* @return [Hash] a hash of capturing group names to matching strings
|
|
1053
|
+
* @example
|
|
1054
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1055
|
+
* m.named_captures
|
|
1056
|
+
* #=> {"numbers" => "123", "letters" => "abc"}
|
|
1057
|
+
* m.named_captures(symbolize_names: true) #=> {numbers: "123", letters: "abc"}
|
|
1058
|
+
*/
|
|
1059
|
+
static VALUE re2_matchdata_named_captures(int argc, VALUE *argv, const VALUE self) {
|
|
1060
|
+
VALUE opts;
|
|
1061
|
+
rb_scan_args(argc, argv, "0:", &opts);
|
|
1062
|
+
|
|
1063
|
+
bool symbolize = false;
|
|
1064
|
+
if (!NIL_P(opts)) {
|
|
1065
|
+
VALUE sym = rb_hash_aref(opts, ID2SYM(id_symbolize_names));
|
|
1066
|
+
symbolize = RTEST(sym);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1070
|
+
re2_pattern *p = unwrap_re2_regexp(m->regexp);
|
|
1071
|
+
|
|
1072
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
1073
|
+
VALUE result = rb_hash_new();
|
|
1074
|
+
|
|
1075
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
|
1076
|
+
VALUE key;
|
|
1077
|
+
if (symbolize) {
|
|
1078
|
+
key = ID2SYM(rb_intern2(it->first.data(), it->first.size()));
|
|
1079
|
+
} else {
|
|
1080
|
+
key = encoded_str_new(it->first.data(), it->first.size(),
|
|
1081
|
+
p->pattern->options().encoding());
|
|
1082
|
+
}
|
|
1083
|
+
rb_hash_aset(result, key, re2_matchdata_nth_match(it->second, self));
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
return result;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
/*
|
|
1090
|
+
* Returns an array of names of named capturing groups. Names are returned in
|
|
1091
|
+
* alphabetical order rather than definition order, as RE2 stores named groups
|
|
1092
|
+
* internally in a sorted map.
|
|
1093
|
+
*
|
|
1094
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1095
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1096
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1097
|
+
*
|
|
1098
|
+
* @return [Array<String>] an array of names of named capturing groups
|
|
1099
|
+
* @example
|
|
1100
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').partial_match('123 abc')
|
|
1101
|
+
* m.names #=> ["letters", "numbers"]
|
|
1102
|
+
*/
|
|
1103
|
+
static VALUE re2_matchdata_names(const VALUE self) {
|
|
1104
|
+
re2_matchdata *m = unwrap_re2_matchdata(self);
|
|
1105
|
+
|
|
1106
|
+
return re2_regexp_names(m->regexp);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
/*
|
|
1110
|
+
* Returns an array of match values at the given indices or names.
|
|
1111
|
+
*
|
|
1112
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
1113
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
1114
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
1115
|
+
*
|
|
1116
|
+
* @param [Integer, String, Symbol] indexes the indices or names of
|
|
1117
|
+
* the matches to fetch
|
|
1118
|
+
* @return [Array<String, nil>] the values at the given indices or names
|
|
1119
|
+
* @example
|
|
1120
|
+
* m = RE2::Regexp.new('(?P<a>\d+) (?P<b>\d+)').partial_match("123 456")
|
|
1121
|
+
* m.values_at(1, 2) #=> ["123", "456"]
|
|
1122
|
+
* m.values_at(:a, :b) #=> ["123", "456"]
|
|
1123
|
+
* m.values_at(1, :b) #=> ["123", "456"]
|
|
1124
|
+
*/
|
|
1125
|
+
static VALUE re2_matchdata_values_at(int argc, VALUE *argv, const VALUE self) {
|
|
1126
|
+
unwrap_re2_matchdata(self);
|
|
1127
|
+
|
|
1128
|
+
VALUE result = rb_ary_new2(argc);
|
|
1129
|
+
|
|
1130
|
+
for (int i = 0; i < argc; ++i) {
|
|
1131
|
+
VALUE idx = argv[i];
|
|
1132
|
+
|
|
1133
|
+
if (TYPE(idx) == T_STRING) {
|
|
1134
|
+
rb_ary_push(result, re2_matchdata_named_match(
|
|
1135
|
+
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self));
|
|
1136
|
+
} else if (SYMBOL_P(idx)) {
|
|
1137
|
+
rb_ary_push(result, re2_matchdata_named_match(
|
|
1138
|
+
rb_id2name(SYM2ID(idx)), self));
|
|
1139
|
+
} else {
|
|
1140
|
+
rb_ary_push(result, re2_matchdata_nth_match(NUM2INT(idx), self));
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
return result;
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
static VALUE re2_matchdata_initialize_copy(VALUE self, VALUE other) {
|
|
1148
|
+
re2_matchdata *self_m;
|
|
1149
|
+
re2_matchdata *other_m = unwrap_re2_matchdata(other);
|
|
1150
|
+
|
|
1151
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, self_m);
|
|
1152
|
+
|
|
1153
|
+
if (self_m->matches) {
|
|
1154
|
+
delete[] self_m->matches;
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
self_m->number_of_matches = other_m->number_of_matches;
|
|
1158
|
+
RB_OBJ_WRITE(self, &self_m->regexp, other_m->regexp);
|
|
1159
|
+
RB_OBJ_WRITE(self, &self_m->text, other_m->text);
|
|
1160
|
+
|
|
1161
|
+
if (other_m->matches) {
|
|
1162
|
+
self_m->matches = new(std::nothrow) re2::StringPiece[other_m->number_of_matches];
|
|
1163
|
+
if (self_m->matches == 0) {
|
|
1164
|
+
rb_raise(rb_eNoMemError,
|
|
1165
|
+
"not enough memory to allocate StringPiece for matches");
|
|
1166
|
+
}
|
|
1167
|
+
for (int i = 0; i < other_m->number_of_matches; ++i) {
|
|
1168
|
+
self_m->matches[i] = other_m->matches[i];
|
|
1169
|
+
}
|
|
1170
|
+
} else {
|
|
1171
|
+
self_m->matches = NULL;
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
return self;
|
|
1175
|
+
}
|
|
1176
|
+
|
|
861
1177
|
/*
|
|
862
1178
|
* Shorthand to compile a new {RE2::Regexp}.
|
|
863
1179
|
*
|
|
@@ -913,6 +1229,10 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
913
1229
|
|
|
914
1230
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
915
1231
|
|
|
1232
|
+
if (p->pattern) {
|
|
1233
|
+
delete p->pattern;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
916
1236
|
if (RTEST(options)) {
|
|
917
1237
|
RE2::Options re2_options;
|
|
918
1238
|
parse_re2_options(&re2_options, options);
|
|
@@ -931,6 +1251,25 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
931
1251
|
return self;
|
|
932
1252
|
}
|
|
933
1253
|
|
|
1254
|
+
static VALUE re2_regexp_initialize_copy(VALUE self, VALUE other) {
|
|
1255
|
+
re2_pattern *self_p;
|
|
1256
|
+
re2_pattern *other_p = unwrap_re2_regexp(other);
|
|
1257
|
+
|
|
1258
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, self_p);
|
|
1259
|
+
|
|
1260
|
+
if (self_p->pattern) {
|
|
1261
|
+
delete self_p->pattern;
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
self_p->pattern = new(std::nothrow) RE2(other_p->pattern->pattern(),
|
|
1265
|
+
other_p->pattern->options());
|
|
1266
|
+
if (self_p->pattern == 0) {
|
|
1267
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
return self;
|
|
1271
|
+
}
|
|
1272
|
+
|
|
934
1273
|
/*
|
|
935
1274
|
* Returns a printable version of the regular expression.
|
|
936
1275
|
*
|
|
@@ -945,9 +1284,7 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
945
1284
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
|
946
1285
|
*/
|
|
947
1286
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
|
948
|
-
re2_pattern *p;
|
|
949
|
-
|
|
950
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1287
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
951
1288
|
|
|
952
1289
|
std::ostringstream output;
|
|
953
1290
|
|
|
@@ -970,8 +1307,7 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
|
970
1307
|
* re2.to_s #=> "woo?"
|
|
971
1308
|
*/
|
|
972
1309
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
|
973
|
-
re2_pattern *p;
|
|
974
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1310
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
975
1311
|
|
|
976
1312
|
return encoded_str_new(p->pattern->pattern().data(),
|
|
977
1313
|
p->pattern->pattern().size(),
|
|
@@ -987,8 +1323,7 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
|
987
1323
|
* re2.ok? #=> true
|
|
988
1324
|
*/
|
|
989
1325
|
static VALUE re2_regexp_ok(const VALUE self) {
|
|
990
|
-
re2_pattern *p;
|
|
991
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1326
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
992
1327
|
|
|
993
1328
|
return BOOL2RUBY(p->pattern->ok());
|
|
994
1329
|
}
|
|
@@ -1003,8 +1338,7 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
|
1003
1338
|
* re2.utf8? #=> true
|
|
1004
1339
|
*/
|
|
1005
1340
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
|
1006
|
-
re2_pattern *p;
|
|
1007
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1341
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1008
1342
|
|
|
1009
1343
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
|
1010
1344
|
}
|
|
@@ -1019,8 +1353,7 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
|
1019
1353
|
* re2.posix_syntax? #=> true
|
|
1020
1354
|
*/
|
|
1021
1355
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
1022
|
-
re2_pattern *p;
|
|
1023
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1356
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1024
1357
|
|
|
1025
1358
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
|
1026
1359
|
}
|
|
@@ -1035,8 +1368,7 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
|
1035
1368
|
* re2.longest_match? #=> true
|
|
1036
1369
|
*/
|
|
1037
1370
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
1038
|
-
re2_pattern *p;
|
|
1039
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1371
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1040
1372
|
|
|
1041
1373
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
|
1042
1374
|
}
|
|
@@ -1051,8 +1383,7 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
|
1051
1383
|
* re2.log_errors? #=> true
|
|
1052
1384
|
*/
|
|
1053
1385
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
1054
|
-
re2_pattern *p;
|
|
1055
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1386
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1056
1387
|
|
|
1057
1388
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
|
1058
1389
|
}
|
|
@@ -1066,8 +1397,7 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
|
1066
1397
|
* re2.max_mem #=> 1024
|
|
1067
1398
|
*/
|
|
1068
1399
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
1069
|
-
re2_pattern *p;
|
|
1070
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1400
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1071
1401
|
|
|
1072
1402
|
return INT2FIX(p->pattern->options().max_mem());
|
|
1073
1403
|
}
|
|
@@ -1082,8 +1412,7 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
|
1082
1412
|
* re2.literal? #=> true
|
|
1083
1413
|
*/
|
|
1084
1414
|
static VALUE re2_regexp_literal(const VALUE self) {
|
|
1085
|
-
re2_pattern *p;
|
|
1086
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1415
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1087
1416
|
|
|
1088
1417
|
return BOOL2RUBY(p->pattern->options().literal());
|
|
1089
1418
|
}
|
|
@@ -1098,8 +1427,7 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
|
1098
1427
|
* re2.never_nl? #=> true
|
|
1099
1428
|
*/
|
|
1100
1429
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
1101
|
-
re2_pattern *p;
|
|
1102
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1430
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1103
1431
|
|
|
1104
1432
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
|
1105
1433
|
}
|
|
@@ -1114,8 +1442,7 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
|
1114
1442
|
* re2.case_sensitive? #=> true
|
|
1115
1443
|
*/
|
|
1116
1444
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
|
1117
|
-
re2_pattern *p;
|
|
1118
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1445
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1119
1446
|
|
|
1120
1447
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
|
1121
1448
|
}
|
|
@@ -1144,8 +1471,7 @@ static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
|
|
1144
1471
|
* re2.perl_classes? #=> true
|
|
1145
1472
|
*/
|
|
1146
1473
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1147
|
-
re2_pattern *p;
|
|
1148
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1474
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1149
1475
|
|
|
1150
1476
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
|
1151
1477
|
}
|
|
@@ -1160,8 +1486,7 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
|
1160
1486
|
* re2.word_boundary? #=> true
|
|
1161
1487
|
*/
|
|
1162
1488
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1163
|
-
re2_pattern *p;
|
|
1164
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1489
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1165
1490
|
|
|
1166
1491
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
|
1167
1492
|
}
|
|
@@ -1176,8 +1501,7 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
|
1176
1501
|
* re2.one_line? #=> true
|
|
1177
1502
|
*/
|
|
1178
1503
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1179
|
-
re2_pattern *p;
|
|
1180
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1504
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1181
1505
|
|
|
1182
1506
|
return BOOL2RUBY(p->pattern->options().one_line());
|
|
1183
1507
|
}
|
|
@@ -1189,8 +1513,7 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
|
1189
1513
|
* @return [String, nil] the error string or `nil`
|
|
1190
1514
|
*/
|
|
1191
1515
|
static VALUE re2_regexp_error(const VALUE self) {
|
|
1192
|
-
re2_pattern *p;
|
|
1193
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1516
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1194
1517
|
|
|
1195
1518
|
if (p->pattern->ok()) {
|
|
1196
1519
|
return Qnil;
|
|
@@ -1210,8 +1533,7 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
|
1210
1533
|
* @return [String, nil] the offending portion of the regexp or `nil`
|
|
1211
1534
|
*/
|
|
1212
1535
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
|
1213
|
-
re2_pattern *p;
|
|
1214
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1536
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1215
1537
|
|
|
1216
1538
|
if (p->pattern->ok()) {
|
|
1217
1539
|
return Qnil;
|
|
@@ -1230,8 +1552,7 @@ static VALUE re2_regexp_error_arg(const VALUE self) {
|
|
|
1230
1552
|
* @return [Integer] the regexp "cost"
|
|
1231
1553
|
*/
|
|
1232
1554
|
static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1233
|
-
re2_pattern *p;
|
|
1234
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1555
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1235
1556
|
|
|
1236
1557
|
return INT2FIX(p->pattern->ProgramSize());
|
|
1237
1558
|
}
|
|
@@ -1242,9 +1563,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
|
1242
1563
|
* @return [Hash] the options
|
|
1243
1564
|
*/
|
|
1244
1565
|
static VALUE re2_regexp_options(const VALUE self) {
|
|
1245
|
-
re2_pattern *p;
|
|
1246
|
-
|
|
1247
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1566
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1248
1567
|
VALUE options = rb_hash_new();
|
|
1249
1568
|
|
|
1250
1569
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
|
@@ -1294,8 +1613,7 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
|
1294
1613
|
* @return [Integer] the number of capturing subpatterns
|
|
1295
1614
|
*/
|
|
1296
1615
|
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1297
|
-
re2_pattern *p;
|
|
1298
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1616
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1299
1617
|
|
|
1300
1618
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
|
1301
1619
|
}
|
|
@@ -1310,9 +1628,7 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
|
1310
1628
|
* @return [Hash] a hash of names to capturing indices
|
|
1311
1629
|
*/
|
|
1312
1630
|
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
|
1313
|
-
re2_pattern *p;
|
|
1314
|
-
|
|
1315
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1631
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1316
1632
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
|
1317
1633
|
VALUE capturing_groups = rb_hash_new();
|
|
1318
1634
|
|
|
@@ -1418,7 +1734,7 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1418
1734
|
/* Ensure text is a string. */
|
|
1419
1735
|
StringValue(text);
|
|
1420
1736
|
|
|
1421
|
-
|
|
1737
|
+
p = unwrap_re2_regexp(self);
|
|
1422
1738
|
|
|
1423
1739
|
int n;
|
|
1424
1740
|
int startpos = 0;
|
|
@@ -1561,16 +1877,15 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
1561
1877
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
|
1562
1878
|
* `PartialMatch`}.
|
|
1563
1879
|
*
|
|
1880
|
+
* @param [String] text the text to search
|
|
1564
1881
|
* @return [Boolean] whether the match was successful
|
|
1565
1882
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1566
1883
|
*/
|
|
1567
1884
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
1568
|
-
re2_pattern *p;
|
|
1569
|
-
|
|
1570
1885
|
/* Ensure text is a string. */
|
|
1571
1886
|
StringValue(text);
|
|
1572
1887
|
|
|
1573
|
-
|
|
1888
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1574
1889
|
|
|
1575
1890
|
return BOOL2RUBY(RE2::PartialMatch(
|
|
1576
1891
|
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
@@ -1581,16 +1896,15 @@ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
|
|
1581
1896
|
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
|
1582
1897
|
* `FullMatch`}.
|
|
1583
1898
|
*
|
|
1899
|
+
* @param [String] text the text to search
|
|
1584
1900
|
* @return [Boolean] whether the match was successful
|
|
1585
1901
|
* @raise [TypeError] if text cannot be coerced to a `String`
|
|
1586
1902
|
*/
|
|
1587
1903
|
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
|
1588
|
-
re2_pattern *p;
|
|
1589
|
-
|
|
1590
1904
|
/* Ensure text is a string. */
|
|
1591
1905
|
StringValue(text);
|
|
1592
1906
|
|
|
1593
|
-
|
|
1907
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1594
1908
|
|
|
1595
1909
|
return BOOL2RUBY(RE2::FullMatch(
|
|
1596
1910
|
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
|
@@ -1612,10 +1926,8 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
|
1612
1926
|
/* Ensure text is a string. */
|
|
1613
1927
|
StringValue(text);
|
|
1614
1928
|
|
|
1615
|
-
re2_pattern *p;
|
|
1929
|
+
re2_pattern *p = unwrap_re2_regexp(self);
|
|
1616
1930
|
re2_scanner *c;
|
|
1617
|
-
|
|
1618
|
-
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
|
1619
1931
|
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
|
1620
1932
|
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
|
1621
1933
|
|
|
@@ -1675,11 +1987,11 @@ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
|
|
1675
1987
|
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
|
1676
1988
|
* {RE2::Regexp}) cannot be coerced to `String`s
|
|
1677
1989
|
* @example
|
|
1678
|
-
* RE2.
|
|
1990
|
+
* RE2.replace("hello there", "hello", "howdy") #=> "howdy there"
|
|
1679
1991
|
* re2 = RE2::Regexp.new("hel+o")
|
|
1680
|
-
* RE2.
|
|
1992
|
+
* RE2.replace("hello there", re2, "yo") #=> "yo there"
|
|
1681
1993
|
*/
|
|
1682
|
-
static VALUE
|
|
1994
|
+
static VALUE re2_replace(VALUE, VALUE str, VALUE pattern,
|
|
1683
1995
|
VALUE rewrite) {
|
|
1684
1996
|
/* Ensure rewrite is a string. */
|
|
1685
1997
|
StringValue(rewrite);
|
|
@@ -1694,7 +2006,7 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
|
1694
2006
|
|
|
1695
2007
|
/* Do the replacement. */
|
|
1696
2008
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
1697
|
-
|
|
2009
|
+
p = unwrap_re2_regexp(pattern);
|
|
1698
2010
|
RE2::Replace(&str_as_string, *p->pattern,
|
|
1699
2011
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
1700
2012
|
|
|
@@ -1729,10 +2041,10 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
|
1729
2041
|
* @return [String] the resulting string
|
|
1730
2042
|
* @example
|
|
1731
2043
|
* re2 = RE2::Regexp.new("oo?")
|
|
1732
|
-
* RE2.
|
|
1733
|
-
* RE2.
|
|
2044
|
+
* RE2.global_replace("whoops-doops", re2, "e") #=> "wheps-deps"
|
|
2045
|
+
* RE2.global_replace("hello there", "e", "i") #=> "hillo thiri"
|
|
1734
2046
|
*/
|
|
1735
|
-
static VALUE
|
|
2047
|
+
static VALUE re2_global_replace(VALUE, VALUE str, VALUE pattern,
|
|
1736
2048
|
VALUE rewrite) {
|
|
1737
2049
|
/* Ensure rewrite is a string. */
|
|
1738
2050
|
StringValue(rewrite);
|
|
@@ -1746,7 +2058,7 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
|
1746
2058
|
|
|
1747
2059
|
/* Do the replacement. */
|
|
1748
2060
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
1749
|
-
|
|
2061
|
+
p = unwrap_re2_regexp(pattern);
|
|
1750
2062
|
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
|
1751
2063
|
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
|
1752
2064
|
|
|
@@ -1764,6 +2076,71 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
|
1764
2076
|
}
|
|
1765
2077
|
}
|
|
1766
2078
|
|
|
2079
|
+
/*
|
|
2080
|
+
* If `pattern` matches `text`, returns a copy of `rewrite` with substitutions
|
|
2081
|
+
* using
|
|
2082
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L499-L510
|
|
2083
|
+
* `Extract`}. Non-matching portions of `text` are ignored.
|
|
2084
|
+
*
|
|
2085
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
|
2086
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
|
2087
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
|
2088
|
+
*
|
|
2089
|
+
* @param [String] text the string from which to extract
|
|
2090
|
+
* @param [String, RE2::Regexp] pattern a regexp matching the text
|
|
2091
|
+
* @param [String] rewrite the rewrite string with `\1`-style substitutions
|
|
2092
|
+
* @return [String, nil] the extracted string on a successful match or nil if
|
|
2093
|
+
* there is no match
|
|
2094
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
|
2095
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
|
2096
|
+
* @example
|
|
2097
|
+
* RE2.extract("alice@example.com", '(\w+)@(\w+)', '\2-\1')
|
|
2098
|
+
* #=> "example-alice"
|
|
2099
|
+
* RE2.extract("no match", '(\d+)', '\1') #=> nil
|
|
2100
|
+
*/
|
|
2101
|
+
static VALUE re2_extract(VALUE, VALUE text, VALUE pattern,
|
|
2102
|
+
VALUE rewrite) {
|
|
2103
|
+
/* Ensure rewrite and text are strings. */
|
|
2104
|
+
StringValue(rewrite);
|
|
2105
|
+
StringValue(text);
|
|
2106
|
+
|
|
2107
|
+
re2_pattern *p;
|
|
2108
|
+
std::string out;
|
|
2109
|
+
bool extracted;
|
|
2110
|
+
|
|
2111
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
|
2112
|
+
p = unwrap_re2_regexp(pattern);
|
|
2113
|
+
extracted = RE2::Extract(
|
|
2114
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2115
|
+
*p->pattern,
|
|
2116
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2117
|
+
&out);
|
|
2118
|
+
|
|
2119
|
+
if (extracted) {
|
|
2120
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2121
|
+
p->pattern->options().encoding());
|
|
2122
|
+
} else {
|
|
2123
|
+
return Qnil;
|
|
2124
|
+
}
|
|
2125
|
+
} else {
|
|
2126
|
+
/* Ensure pattern is a string. */
|
|
2127
|
+
StringValue(pattern);
|
|
2128
|
+
|
|
2129
|
+
extracted = RE2::Extract(
|
|
2130
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
|
2131
|
+
RE2(re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern))),
|
|
2132
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)),
|
|
2133
|
+
&out);
|
|
2134
|
+
|
|
2135
|
+
if (extracted) {
|
|
2136
|
+
return encoded_str_new(out.data(), out.size(),
|
|
2137
|
+
RE2::Options::EncodingUTF8);
|
|
2138
|
+
} else {
|
|
2139
|
+
return Qnil;
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
|
|
1767
2144
|
/*
|
|
1768
2145
|
* Returns a version of `str` with all potentially meaningful regexp characters
|
|
1769
2146
|
* escaped using
|
|
@@ -1775,9 +2152,12 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
|
1775
2152
|
* @raise [TypeError] if the given unquoted string cannot be coerced to a `String`
|
|
1776
2153
|
* @return [String] the escaped string
|
|
1777
2154
|
* @example
|
|
1778
|
-
* RE2
|
|
2155
|
+
* RE2.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2156
|
+
* RE2.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2157
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
2158
|
+
* RE2::Regexp.quote("1.5-2.0?") #=> "1\\.5\\-2\\.0\\?"
|
|
1779
2159
|
*/
|
|
1780
|
-
static VALUE
|
|
2160
|
+
static VALUE re2_escape(VALUE, VALUE unquoted) {
|
|
1781
2161
|
StringValue(unquoted);
|
|
1782
2162
|
|
|
1783
2163
|
std::string quoted_string = RE2::QuoteMeta(
|
|
@@ -1818,6 +2198,15 @@ static const rb_data_type_t re2_set_data_type = {
|
|
|
1818
2198
|
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
1819
2199
|
};
|
|
1820
2200
|
|
|
2201
|
+
static re2_set *unwrap_re2_set(VALUE self) {
|
|
2202
|
+
re2_set *s;
|
|
2203
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2204
|
+
if (!s->set) {
|
|
2205
|
+
rb_raise(rb_eTypeError, "uninitialized RE2::Set");
|
|
2206
|
+
}
|
|
2207
|
+
return s;
|
|
2208
|
+
}
|
|
2209
|
+
|
|
1821
2210
|
static VALUE re2_set_allocate(VALUE klass) {
|
|
1822
2211
|
re2_set *s;
|
|
1823
2212
|
VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
|
|
@@ -1825,6 +2214,10 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
|
1825
2214
|
return result;
|
|
1826
2215
|
}
|
|
1827
2216
|
|
|
2217
|
+
static VALUE re2_set_initialize_copy(VALUE, VALUE) {
|
|
2218
|
+
rb_raise(rb_eTypeError, "cannot copy RE2::Set");
|
|
2219
|
+
}
|
|
2220
|
+
|
|
1828
2221
|
/*
|
|
1829
2222
|
* Returns a new {RE2::Set} object, a collection of patterns that can be
|
|
1830
2223
|
* searched for simultaneously.
|
|
@@ -1895,6 +2288,10 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1895
2288
|
parse_re2_options(&re2_options, options);
|
|
1896
2289
|
}
|
|
1897
2290
|
|
|
2291
|
+
if (s->set) {
|
|
2292
|
+
delete s->set;
|
|
2293
|
+
}
|
|
2294
|
+
|
|
1898
2295
|
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
|
1899
2296
|
if (s->set == 0) {
|
|
1900
2297
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
|
@@ -1919,8 +2316,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
1919
2316
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1920
2317
|
StringValue(pattern);
|
|
1921
2318
|
|
|
1922
|
-
re2_set *s;
|
|
1923
|
-
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2319
|
+
re2_set *s = unwrap_re2_set(self);
|
|
1924
2320
|
|
|
1925
2321
|
int index;
|
|
1926
2322
|
VALUE msg;
|
|
@@ -1951,8 +2347,7 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
|
1951
2347
|
* set.compile #=> true
|
|
1952
2348
|
*/
|
|
1953
2349
|
static VALUE re2_set_compile(VALUE self) {
|
|
1954
|
-
re2_set *s;
|
|
1955
|
-
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2350
|
+
re2_set *s = unwrap_re2_set(self);
|
|
1956
2351
|
|
|
1957
2352
|
return BOOL2RUBY(s->set->Compile());
|
|
1958
2353
|
}
|
|
@@ -1968,8 +2363,7 @@ static VALUE re2_set_compile(VALUE self) {
|
|
|
1968
2363
|
*/
|
|
1969
2364
|
static VALUE re2_set_size(VALUE self) {
|
|
1970
2365
|
#ifdef HAVE_SET_SIZE
|
|
1971
|
-
re2_set *s;
|
|
1972
|
-
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2366
|
+
re2_set *s = unwrap_re2_set(self);
|
|
1973
2367
|
|
|
1974
2368
|
return INT2FIX(s->set->Size());
|
|
1975
2369
|
#else
|
|
@@ -2052,8 +2446,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
|
2052
2446
|
rb_scan_args(argc, argv, "11", &str, &options);
|
|
2053
2447
|
|
|
2054
2448
|
StringValue(str);
|
|
2055
|
-
re2_set *s;
|
|
2056
|
-
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
|
2449
|
+
re2_set *s = unwrap_re2_set(self);
|
|
2057
2450
|
|
|
2058
2451
|
if (RTEST(options)) {
|
|
2059
2452
|
Check_Type(options, T_HASH);
|
|
@@ -2147,6 +2540,14 @@ extern "C" void Init_re2(void) {
|
|
|
2147
2540
|
RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
|
|
2148
2541
|
rb_define_method(re2_cMatchData, "end",
|
|
2149
2542
|
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
|
2543
|
+
rb_define_method(re2_cMatchData, "pre_match",
|
|
2544
|
+
RUBY_METHOD_FUNC(re2_matchdata_pre_match), 0);
|
|
2545
|
+
rb_define_method(re2_cMatchData, "post_match",
|
|
2546
|
+
RUBY_METHOD_FUNC(re2_matchdata_post_match), 0);
|
|
2547
|
+
rb_define_method(re2_cMatchData, "offset",
|
|
2548
|
+
RUBY_METHOD_FUNC(re2_matchdata_offset), 1);
|
|
2549
|
+
rb_define_method(re2_cMatchData, "match_length",
|
|
2550
|
+
RUBY_METHOD_FUNC(re2_matchdata_match_length), 1);
|
|
2150
2551
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
|
2151
2552
|
-1);
|
|
2152
2553
|
rb_define_method(re2_cMatchData, "to_s",
|
|
@@ -2155,8 +2556,18 @@ extern "C" void Init_re2(void) {
|
|
|
2155
2556
|
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
|
2156
2557
|
rb_define_method(re2_cMatchData, "deconstruct",
|
|
2157
2558
|
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
|
2559
|
+
rb_define_method(re2_cMatchData, "captures",
|
|
2560
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
|
2561
|
+
rb_define_method(re2_cMatchData, "named_captures",
|
|
2562
|
+
RUBY_METHOD_FUNC(re2_matchdata_named_captures), -1);
|
|
2563
|
+
rb_define_method(re2_cMatchData, "names",
|
|
2564
|
+
RUBY_METHOD_FUNC(re2_matchdata_names), 0);
|
|
2565
|
+
rb_define_method(re2_cMatchData, "values_at",
|
|
2566
|
+
RUBY_METHOD_FUNC(re2_matchdata_values_at), -1);
|
|
2158
2567
|
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
|
2159
2568
|
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
|
2569
|
+
rb_define_method(re2_cMatchData, "initialize_copy",
|
|
2570
|
+
RUBY_METHOD_FUNC(re2_matchdata_initialize_copy), 1);
|
|
2160
2571
|
|
|
2161
2572
|
rb_define_method(re2_cScanner, "string",
|
|
2162
2573
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
|
@@ -2168,11 +2579,15 @@ extern "C" void Init_re2(void) {
|
|
|
2168
2579
|
RUBY_METHOD_FUNC(re2_scanner_scan), 0);
|
|
2169
2580
|
rb_define_method(re2_cScanner, "rewind",
|
|
2170
2581
|
RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
|
|
2582
|
+
rb_define_method(re2_cScanner, "initialize_copy",
|
|
2583
|
+
RUBY_METHOD_FUNC(re2_scanner_initialize_copy), 1);
|
|
2171
2584
|
|
|
2172
2585
|
rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
|
|
2173
2586
|
RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
|
|
2174
2587
|
rb_define_method(re2_cRegexp, "initialize",
|
|
2175
2588
|
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
|
2589
|
+
rb_define_method(re2_cRegexp, "initialize_copy",
|
|
2590
|
+
RUBY_METHOD_FUNC(re2_regexp_initialize_copy), 1);
|
|
2176
2591
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
|
2177
2592
|
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
|
2178
2593
|
0);
|
|
@@ -2186,6 +2601,10 @@ extern "C" void Init_re2(void) {
|
|
|
2186
2601
|
RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
|
|
2187
2602
|
rb_define_method(re2_cRegexp, "named_capturing_groups",
|
|
2188
2603
|
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
|
2604
|
+
rb_define_method(re2_cRegexp, "named_captures",
|
|
2605
|
+
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
|
2606
|
+
rb_define_method(re2_cRegexp, "names",
|
|
2607
|
+
RUBY_METHOD_FUNC(re2_regexp_names), 0);
|
|
2189
2608
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
|
2190
2609
|
-1);
|
|
2191
2610
|
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
|
@@ -2240,22 +2659,34 @@ extern "C" void Init_re2(void) {
|
|
|
2240
2659
|
RUBY_METHOD_FUNC(re2_set_size_p), 0);
|
|
2241
2660
|
rb_define_method(re2_cSet, "initialize",
|
|
2242
2661
|
RUBY_METHOD_FUNC(re2_set_initialize), -1);
|
|
2662
|
+
rb_define_method(re2_cSet, "initialize_copy",
|
|
2663
|
+
RUBY_METHOD_FUNC(re2_set_initialize_copy), 1);
|
|
2243
2664
|
rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
|
|
2244
2665
|
rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
|
|
2245
2666
|
rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
|
|
2246
2667
|
rb_define_method(re2_cSet, "size", RUBY_METHOD_FUNC(re2_set_size), 0);
|
|
2247
2668
|
rb_define_method(re2_cSet, "length", RUBY_METHOD_FUNC(re2_set_size), 0);
|
|
2248
2669
|
|
|
2670
|
+
rb_define_module_function(re2_mRE2, "replace",
|
|
2671
|
+
RUBY_METHOD_FUNC(re2_replace), 3);
|
|
2249
2672
|
rb_define_module_function(re2_mRE2, "Replace",
|
|
2250
|
-
RUBY_METHOD_FUNC(
|
|
2673
|
+
RUBY_METHOD_FUNC(re2_replace), 3);
|
|
2674
|
+
rb_define_module_function(re2_mRE2, "global_replace",
|
|
2675
|
+
RUBY_METHOD_FUNC(re2_global_replace), 3);
|
|
2251
2676
|
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
|
2252
|
-
RUBY_METHOD_FUNC(
|
|
2677
|
+
RUBY_METHOD_FUNC(re2_global_replace), 3);
|
|
2678
|
+
rb_define_module_function(re2_mRE2, "extract",
|
|
2679
|
+
RUBY_METHOD_FUNC(re2_extract), 3);
|
|
2253
2680
|
rb_define_module_function(re2_mRE2, "QuoteMeta",
|
|
2254
|
-
RUBY_METHOD_FUNC(
|
|
2681
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2682
|
+
rb_define_module_function(re2_mRE2, "escape",
|
|
2683
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2684
|
+
rb_define_module_function(re2_mRE2, "quote",
|
|
2685
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2255
2686
|
rb_define_singleton_method(re2_cRegexp, "escape",
|
|
2256
|
-
RUBY_METHOD_FUNC(
|
|
2687
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2257
2688
|
rb_define_singleton_method(re2_cRegexp, "quote",
|
|
2258
|
-
RUBY_METHOD_FUNC(
|
|
2689
|
+
RUBY_METHOD_FUNC(re2_escape), 1);
|
|
2259
2690
|
|
|
2260
2691
|
// (see RE2::Regexp#initialize)
|
|
2261
2692
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
|
@@ -2283,4 +2714,5 @@ extern "C" void Init_re2(void) {
|
|
|
2283
2714
|
id_submatches = rb_intern("submatches");
|
|
2284
2715
|
id_startpos = rb_intern("startpos");
|
|
2285
2716
|
id_endpos = rb_intern("endpos");
|
|
2717
|
+
id_symbolize_names = rb_intern("symbolize_names");
|
|
2286
2718
|
}
|