re2 2.4.3 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +2 -0
- data/README.md +281 -192
- data/Rakefile +1 -1
- data/dependencies.yml +4 -4
- data/ext/re2/extconf.rb +250 -358
- data/ext/re2/re2.cc +505 -284
- data/ext/re2/recipes.rb +31 -20
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +11 -0
- data/lib/re2/string.rb +12 -59
- data/lib/re2/version.rb +10 -1
- data/lib/re2.rb +9 -3
- data/ports/archives/20240116.1.tar.gz +0 -0
- data/ports/archives/re2-2024-04-01.tar.gz +0 -0
- data/re2.gemspec +5 -2
- data/spec/kernel_spec.rb +10 -2
- data/spec/re2/match_data_spec.rb +98 -28
- data/spec/re2/regexp_spec.rb +546 -113
- data/spec/re2/scanner_spec.rb +26 -9
- data/spec/re2/set_spec.rb +28 -18
- data/spec/re2/string_spec.rb +2 -0
- data/spec/re2_spec.rb +34 -4
- data/spec/spec_helper.rb +2 -0
- metadata +10 -9
- data/ports/archives/20230802.1.tar.gz +0 -0
- data/ports/archives/re2-2023-11-01.tar.gz +0 -0
data/ext/re2/re2.cc
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
/*
|
2
|
-
* re2 (
|
3
|
-
* Ruby bindings to
|
2
|
+
* re2 (https://github.com/mudge/re2)
|
3
|
+
* Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to
|
4
|
+
* backtracking regular expression engines like those used in PCRE, Perl, and
|
5
|
+
* Python".
|
4
6
|
*
|
5
|
-
* Copyright (c) 2010
|
7
|
+
* Copyright (c) 2010, Paul Mucur (https://mudge.name)
|
6
8
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
9
|
*/
|
8
10
|
|
@@ -42,13 +44,14 @@ typedef struct {
|
|
42
44
|
} re2_set;
|
43
45
|
|
44
46
|
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
45
|
-
re2_eSetMatchError, re2_eSetUnsupportedError;
|
47
|
+
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
46
48
|
|
47
49
|
/* Symbols used in RE2 options. */
|
48
50
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
49
51
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
50
|
-
id_perl_classes, id_word_boundary, id_one_line,
|
51
|
-
|
52
|
+
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
53
|
+
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
54
|
+
id_submatches, id_startpos, id_endpos;
|
52
55
|
|
53
56
|
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
54
57
|
if (encoding == RE2::Options::EncodingUTF8) {
|
@@ -122,7 +125,7 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
122
125
|
}
|
123
126
|
}
|
124
127
|
|
125
|
-
/* For compatibility with
|
128
|
+
/* For compatibility with Ruby < 2.7 */
|
126
129
|
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
127
130
|
#define re2_compact_callback(x) (x),
|
128
131
|
#else
|
@@ -270,12 +273,14 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
270
273
|
}
|
271
274
|
|
272
275
|
/*
|
273
|
-
* Returns a frozen copy of the
|
276
|
+
* Returns a frozen copy of the text supplied when matching.
|
274
277
|
*
|
275
|
-
*
|
278
|
+
* If the text was already a frozen string, returns the original.
|
279
|
+
*
|
280
|
+
* @return [String] a frozen string with the text supplied when matching
|
276
281
|
* @example
|
277
|
-
* m = RE2::Regexp.new('(\d+)').
|
278
|
-
* m.string
|
282
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
283
|
+
* m.string #=> "bob 123"
|
279
284
|
*/
|
280
285
|
static VALUE re2_matchdata_string(const VALUE self) {
|
281
286
|
re2_matchdata *m;
|
@@ -285,9 +290,10 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
285
290
|
}
|
286
291
|
|
287
292
|
/*
|
288
|
-
* Returns the
|
293
|
+
* Returns the text supplied when incrementally matching with
|
294
|
+
* {RE2::Regexp#scan}.
|
289
295
|
*
|
290
|
-
* @return [String] the original string
|
296
|
+
* @return [String] the original string passed to {RE2::Regexp#scan}
|
291
297
|
* @example
|
292
298
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
293
299
|
* c.string #=> "foo"
|
@@ -300,9 +306,9 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
300
306
|
}
|
301
307
|
|
302
308
|
/*
|
303
|
-
* Returns whether the
|
309
|
+
* Returns whether the {RE2::Scanner} has consumed all input or not.
|
304
310
|
*
|
305
|
-
* @return [Boolean] whether the
|
311
|
+
* @return [Boolean] whether the {RE2::Scanner} has consumed all input or not
|
306
312
|
* @example
|
307
313
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
308
314
|
* c.eof? #=> true
|
@@ -315,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
315
321
|
}
|
316
322
|
|
317
323
|
/*
|
318
|
-
* Rewind the
|
324
|
+
* Rewind the {RE2::Scanner} to the start of the string.
|
319
325
|
*
|
320
326
|
* @example
|
321
327
|
* s = RE2::Regexp.new('(\d+)').scan("1 2 3")
|
@@ -330,21 +336,27 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
330
336
|
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
331
337
|
|
332
338
|
delete c->input;
|
333
|
-
c->input = new(std::nothrow) re2::StringPiece(
|
339
|
+
c->input = new(std::nothrow) re2::StringPiece(
|
340
|
+
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
334
341
|
c->eof = false;
|
335
342
|
|
336
343
|
return self;
|
337
344
|
}
|
338
345
|
|
339
346
|
/*
|
340
|
-
* Scan the given text incrementally for matches
|
341
|
-
*
|
347
|
+
* Scan the given text incrementally for matches using
|
348
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
349
|
+
* `FindAndConsume`}, returning an array of submatches on each subsequent
|
350
|
+
* call. Returns `nil` if no matches are found or an empty array for every
|
351
|
+
* match if the pattern has no capturing groups.
|
342
352
|
*
|
343
353
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
344
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
345
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
354
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
355
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
346
356
|
*
|
347
|
-
* @return [Array<String>] the
|
357
|
+
* @return [Array<String>] if the pattern has capturing groups
|
358
|
+
* @return [[]] if the pattern does not have capturing groups
|
359
|
+
* @return [nil] if no matches are found
|
348
360
|
* @example
|
349
361
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
350
362
|
* s.scan #=> ["Foo"]
|
@@ -359,7 +371,7 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
359
371
|
|
360
372
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
361
373
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
362
|
-
std::vector<
|
374
|
+
std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
|
363
375
|
|
364
376
|
if (c->eof) {
|
365
377
|
return Qnil;
|
@@ -403,9 +415,6 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
403
415
|
}
|
404
416
|
}
|
405
417
|
|
406
|
-
/*
|
407
|
-
* Retrieve a matchdata by index or name.
|
408
|
-
*/
|
409
418
|
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
410
419
|
re2_matchdata *m;
|
411
420
|
re2_pattern *p;
|
@@ -417,10 +426,20 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
417
426
|
|
418
427
|
if (FIXNUM_P(idx)) {
|
419
428
|
id = FIX2INT(idx);
|
429
|
+
} else if (SYMBOL_P(idx)) {
|
430
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
431
|
+
std::map<std::string, int>::const_iterator search = groups.find(rb_id2name(SYM2ID(idx)));
|
432
|
+
|
433
|
+
if (search != groups.end()) {
|
434
|
+
id = search->second;
|
435
|
+
} else {
|
436
|
+
return NULL;
|
437
|
+
}
|
420
438
|
} else {
|
421
|
-
|
439
|
+
StringValue(idx);
|
440
|
+
|
422
441
|
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
423
|
-
std::map<std::string, int>::const_iterator search = groups.find(
|
442
|
+
std::map<std::string, int>::const_iterator search = groups.find(std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)));
|
424
443
|
|
425
444
|
if (search != groups.end()) {
|
426
445
|
id = search->second;
|
@@ -441,13 +460,14 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
441
460
|
}
|
442
461
|
|
443
462
|
/*
|
444
|
-
* Returns the number of elements in the
|
463
|
+
* Returns the number of elements in the {RE2::MatchData} (including the
|
464
|
+
* overall match, submatches and any `nils`).
|
445
465
|
*
|
446
466
|
* @return [Integer] the number of elements
|
447
467
|
* @example
|
448
468
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
449
|
-
* m.size
|
450
|
-
* m.length
|
469
|
+
* m.size #=> 2
|
470
|
+
* m.length #=> 2
|
451
471
|
*/
|
452
472
|
static VALUE re2_matchdata_size(const VALUE self) {
|
453
473
|
re2_matchdata *m;
|
@@ -458,14 +478,15 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
458
478
|
}
|
459
479
|
|
460
480
|
/*
|
461
|
-
* Returns the offset of the start of the nth element of the
|
481
|
+
* Returns the offset of the start of the nth element of the {RE2::MatchData}.
|
462
482
|
*
|
463
|
-
* @param [Integer, String, Symbol] n the name or number of the
|
464
|
-
* @return [Integer] the offset of the start of the match
|
483
|
+
* @param [Integer, String, Symbol] n the name or number of the submatch
|
484
|
+
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
485
|
+
* there is no such submatch
|
465
486
|
* @example
|
466
487
|
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
467
|
-
* m.begin(0)
|
468
|
-
* m.begin(1)
|
488
|
+
* m.begin(0) #=> 1
|
489
|
+
* m.begin(1) #=> 4
|
469
490
|
*/
|
470
491
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
471
492
|
re2_matchdata *m;
|
@@ -483,14 +504,16 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
483
504
|
}
|
484
505
|
|
485
506
|
/*
|
486
|
-
* Returns the offset of the character following the end of the nth element of
|
507
|
+
* Returns the offset of the character following the end of the nth element of
|
508
|
+
* the {RE2::MatchData}.
|
487
509
|
*
|
488
510
|
* @param [Integer, String, Symbol] n the name or number of the match
|
489
|
-
* @return [Integer] the offset of the character following the end of the
|
511
|
+
* @return [Integer, nil] the offset of the character following the end of the
|
512
|
+
* match or `nil` if there is no such match
|
490
513
|
* @example
|
491
514
|
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
492
|
-
* m.end(0)
|
493
|
-
* m.end(1)
|
515
|
+
* m.end(0) #=> 9
|
516
|
+
* m.end(1) #=> 7
|
494
517
|
*/
|
495
518
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
496
519
|
re2_matchdata *m;
|
@@ -510,10 +533,10 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
510
533
|
/*
|
511
534
|
* Returns the {RE2::Regexp} used in the match.
|
512
535
|
*
|
513
|
-
* @return [RE2::Regexp] the
|
536
|
+
* @return [RE2::Regexp] the regular expression used in the match
|
514
537
|
* @example
|
515
538
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
516
|
-
* m.regexp
|
539
|
+
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
517
540
|
*/
|
518
541
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
519
542
|
re2_matchdata *m;
|
@@ -523,12 +546,12 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
523
546
|
}
|
524
547
|
|
525
548
|
/*
|
526
|
-
* Returns the {RE2::Regexp} used in the
|
549
|
+
* Returns the {RE2::Regexp} used in the {RE2::Scanner}.
|
527
550
|
*
|
528
|
-
* @return [RE2::Regexp] the
|
551
|
+
* @return [RE2::Regexp] the regular expression used in the {RE2::Scanner}
|
529
552
|
* @example
|
530
553
|
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
531
|
-
* c.regexp
|
554
|
+
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
532
555
|
*/
|
533
556
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
534
557
|
re2_scanner *c;
|
@@ -544,16 +567,17 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
544
567
|
}
|
545
568
|
|
546
569
|
/*
|
547
|
-
* Returns the array of matches
|
570
|
+
* Returns the array of matches including the overall match, submatches and any
|
571
|
+
* `nil`s.
|
548
572
|
*
|
549
573
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
550
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
551
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
574
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
575
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
552
576
|
*
|
553
577
|
* @return [Array<String, nil>] the array of matches
|
554
578
|
* @example
|
555
579
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
556
|
-
* m.to_a
|
580
|
+
* m.to_a #=> ["123", "123"]
|
557
581
|
*/
|
558
582
|
static VALUE re2_matchdata_to_a(const VALUE self) {
|
559
583
|
re2_matchdata *m;
|
@@ -598,7 +622,7 @@ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
|
598
622
|
}
|
599
623
|
}
|
600
624
|
|
601
|
-
static VALUE re2_matchdata_named_match(const
|
625
|
+
static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self) {
|
602
626
|
re2_matchdata *m;
|
603
627
|
re2_pattern *p;
|
604
628
|
|
@@ -619,19 +643,17 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
619
643
|
* Retrieve zero, one or more matches by index or name.
|
620
644
|
*
|
621
645
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
622
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
623
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
624
|
-
*
|
625
|
-
* @return [Array<String, nil>, String, Boolean]
|
646
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
647
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
626
648
|
*
|
627
649
|
* @overload [](index)
|
628
650
|
* Access a particular match by index.
|
629
651
|
*
|
630
652
|
* @param [Integer] index the index of the match to fetch
|
631
|
-
* @return [String, nil] the specified match
|
653
|
+
* @return [String, nil] the specified match or `nil` if it isn't present
|
632
654
|
* @example
|
633
655
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
634
|
-
* m[0]
|
656
|
+
* m[0] #=> "123"
|
635
657
|
*
|
636
658
|
* @overload [](start, length)
|
637
659
|
* Access a range of matches by starting index and length.
|
@@ -641,7 +663,7 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
641
663
|
* @return [Array<String, nil>] the specified matches
|
642
664
|
* @example
|
643
665
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
644
|
-
* m[0, 1]
|
666
|
+
* m[0, 1] #=> ["123"]
|
645
667
|
*
|
646
668
|
* @overload [](range)
|
647
669
|
* Access a range of matches by index.
|
@@ -650,13 +672,13 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
650
672
|
* @return [Array<String, nil>] the specified matches
|
651
673
|
* @example
|
652
674
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
653
|
-
* m[0..1]
|
675
|
+
* m[0..1] #=> "[123", "123"]
|
654
676
|
*
|
655
677
|
* @overload [](name)
|
656
678
|
* Access a particular match by name.
|
657
679
|
*
|
658
680
|
* @param [String, Symbol] name the name of the match to fetch
|
659
|
-
* @return [String, nil] the specific match
|
681
|
+
* @return [String, nil] the specific match or `nil` if it isn't present
|
660
682
|
* @example
|
661
683
|
* m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
|
662
684
|
* m["number"] #=> "123"
|
@@ -667,7 +689,8 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
|
667
689
|
rb_scan_args(argc, argv, "11", &idx, &rest);
|
668
690
|
|
669
691
|
if (TYPE(idx) == T_STRING) {
|
670
|
-
return re2_matchdata_named_match(
|
692
|
+
return re2_matchdata_named_match(
|
693
|
+
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self);
|
671
694
|
} else if (SYMBOL_P(idx)) {
|
672
695
|
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
|
673
696
|
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
|
@@ -690,13 +713,13 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
690
713
|
* Returns a printable version of the match.
|
691
714
|
*
|
692
715
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
693
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
694
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
716
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
717
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
695
718
|
*
|
696
719
|
* @return [String] a printable version of the match
|
697
720
|
* @example
|
698
721
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
699
|
-
* m.inspect
|
722
|
+
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
700
723
|
*/
|
701
724
|
static VALUE re2_matchdata_inspect(const VALUE self) {
|
702
725
|
re2_matchdata *m;
|
@@ -720,7 +743,9 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
720
743
|
if (match == Qnil) {
|
721
744
|
output << "nil";
|
722
745
|
} else {
|
723
|
-
output << "\""
|
746
|
+
output << "\"";
|
747
|
+
output.write(RSTRING_PTR(match), RSTRING_LEN(match));
|
748
|
+
output << "\"";
|
724
749
|
}
|
725
750
|
}
|
726
751
|
|
@@ -734,13 +759,14 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
734
759
|
* Returns the array of submatches for pattern matching.
|
735
760
|
*
|
736
761
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
737
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
738
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is
|
762
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
763
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
764
|
+
* undefined).
|
739
765
|
*
|
740
766
|
* @return [Array<String, nil>] the array of submatches
|
741
767
|
* @example
|
742
768
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
743
|
-
* m.deconstruct
|
769
|
+
* m.deconstruct #=> ["123"]
|
744
770
|
*
|
745
771
|
* @example pattern matching
|
746
772
|
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
@@ -780,17 +806,18 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
780
806
|
* order but an invalid name will cause the hash to be immediately returned.
|
781
807
|
*
|
782
808
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
783
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
784
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
809
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
810
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
785
811
|
*
|
786
812
|
* @return [Hash] a hash of capturing group names to submatches
|
787
|
-
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names
|
813
|
+
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
814
|
+
* or `nil` to return all names
|
788
815
|
* @example
|
789
816
|
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
790
|
-
* m.deconstruct_keys(nil)
|
791
|
-
* m.deconstruct_keys([:numbers])
|
792
|
-
* m.deconstruct_keys([:fruit])
|
793
|
-
* m.deconstruct_keys([:letters, :fruit])
|
817
|
+
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
818
|
+
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
819
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
820
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
794
821
|
*
|
795
822
|
* @example pattern matching
|
796
823
|
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
@@ -839,11 +866,9 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
839
866
|
}
|
840
867
|
|
841
868
|
/*
|
842
|
-
*
|
843
|
-
* +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
|
869
|
+
* Shorthand to compile a new {RE2::Regexp}.
|
844
870
|
*
|
845
871
|
* @see RE2::Regexp#initialize
|
846
|
-
*
|
847
872
|
*/
|
848
873
|
static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
849
874
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
@@ -851,22 +876,21 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
|
851
876
|
|
852
877
|
/*
|
853
878
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
854
|
-
*
|
855
|
-
*
|
856
|
-
* @return [RE2::Regexp]
|
879
|
+
* `pattern` stored inside.
|
857
880
|
*
|
858
881
|
* @overload initialize(pattern)
|
859
882
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
860
|
-
*
|
883
|
+
* `pattern` stored inside with the default options.
|
861
884
|
*
|
862
885
|
* @param [String] pattern the pattern to compile
|
863
|
-
* @return [RE2::Regexp]
|
886
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern
|
887
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
864
888
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
865
|
-
*
|
889
|
+
* pattern
|
866
890
|
*
|
867
891
|
* @overload initialize(pattern, options)
|
868
892
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
869
|
-
*
|
893
|
+
* `pattern` stored inside with the specified options.
|
870
894
|
*
|
871
895
|
* @param [String] pattern the pattern to compile
|
872
896
|
* @param [Hash] options the options with which to compile the pattern
|
@@ -876,12 +900,13 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
|
876
900
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
877
901
|
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
878
902
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
879
|
-
* @option options [Boolean] :never_nl (false) never match
|
880
|
-
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
881
|
-
* @option options [Boolean] :perl_classes (false) allow Perl's
|
882
|
-
* @option options [Boolean] :word_boundary (false) allow
|
883
|
-
* @option options [Boolean] :one_line (false)
|
884
|
-
* @return [RE2::Regexp]
|
903
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
904
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
905
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
906
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
907
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
908
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern and options
|
909
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
885
910
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
886
911
|
*/
|
887
912
|
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
@@ -899,9 +924,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
899
924
|
RE2::Options re2_options;
|
900
925
|
parse_re2_options(&re2_options, options);
|
901
926
|
|
902
|
-
p->pattern = new(std::nothrow) RE2(
|
927
|
+
p->pattern = new(std::nothrow) RE2(
|
928
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), re2_options);
|
903
929
|
} else {
|
904
|
-
p->pattern = new(std::nothrow) RE2(
|
930
|
+
p->pattern = new(std::nothrow) RE2(
|
931
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
|
905
932
|
}
|
906
933
|
|
907
934
|
if (p->pattern == 0) {
|
@@ -912,16 +939,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
912
939
|
}
|
913
940
|
|
914
941
|
/*
|
915
|
-
* Returns a printable version of the regular expression
|
942
|
+
* Returns a printable version of the regular expression.
|
916
943
|
*
|
917
944
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
918
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
919
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is
|
945
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
946
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
947
|
+
* undefined).
|
920
948
|
*
|
921
949
|
* @return [String] a printable version of the regular expression
|
922
950
|
* @example
|
923
951
|
* re2 = RE2::Regexp.new("woo?")
|
924
|
-
* re2.inspect
|
952
|
+
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
925
953
|
*/
|
926
954
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
927
955
|
re2_pattern *p;
|
@@ -937,16 +965,16 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
937
965
|
}
|
938
966
|
|
939
967
|
/*
|
940
|
-
* Returns a string version of the regular expression
|
968
|
+
* Returns a string version of the regular expression.
|
941
969
|
*
|
942
970
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
943
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
944
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
971
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
972
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
945
973
|
*
|
946
974
|
* @return [String] a string version of the regular expression
|
947
975
|
* @example
|
948
976
|
* re2 = RE2::Regexp.new("woo?")
|
949
|
-
* re2.to_s
|
977
|
+
* re2.to_s #=> "woo?"
|
950
978
|
*/
|
951
979
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
952
980
|
re2_pattern *p;
|
@@ -958,13 +986,12 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
958
986
|
}
|
959
987
|
|
960
988
|
/*
|
961
|
-
* Returns whether or not the regular expression
|
962
|
-
* was compiled successfully or not.
|
989
|
+
* Returns whether or not the regular expression was compiled successfully.
|
963
990
|
*
|
964
991
|
* @return [Boolean] whether or not compilation was successful
|
965
992
|
* @example
|
966
993
|
* re2 = RE2::Regexp.new("woo?")
|
967
|
-
* re2.ok?
|
994
|
+
* re2.ok? #=> true
|
968
995
|
*/
|
969
996
|
static VALUE re2_regexp_ok(const VALUE self) {
|
970
997
|
re2_pattern *p;
|
@@ -974,13 +1001,13 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
974
1001
|
}
|
975
1002
|
|
976
1003
|
/*
|
977
|
-
* Returns whether or not the regular expression
|
978
|
-
*
|
1004
|
+
* Returns whether or not the regular expression was compiled with the `utf8`
|
1005
|
+
* option set to `true`.
|
979
1006
|
*
|
980
|
-
* @return [Boolean] the utf8 option
|
1007
|
+
* @return [Boolean] the `utf8` option
|
981
1008
|
* @example
|
982
|
-
* re2 = RE2::Regexp.new("woo?", :
|
983
|
-
* re2.utf8?
|
1009
|
+
* re2 = RE2::Regexp.new("woo?", utf8: true)
|
1010
|
+
* re2.utf8? #=> true
|
984
1011
|
*/
|
985
1012
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
986
1013
|
re2_pattern *p;
|
@@ -990,13 +1017,13 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
990
1017
|
}
|
991
1018
|
|
992
1019
|
/*
|
993
|
-
* Returns whether or not the regular expression
|
994
|
-
*
|
1020
|
+
* Returns whether or not the regular expression was compiled with the
|
1021
|
+
* `posix_syntax` option set to `true`.
|
995
1022
|
*
|
996
|
-
* @return [Boolean] the posix_syntax option
|
1023
|
+
* @return [Boolean] the `posix_syntax` option
|
997
1024
|
* @example
|
998
|
-
* re2 = RE2::Regexp.new("woo?", :
|
999
|
-
* re2.posix_syntax?
|
1025
|
+
* re2 = RE2::Regexp.new("woo?", posix_syntax: true)
|
1026
|
+
* re2.posix_syntax? #=> true
|
1000
1027
|
*/
|
1001
1028
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
1002
1029
|
re2_pattern *p;
|
@@ -1006,13 +1033,13 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
1006
1033
|
}
|
1007
1034
|
|
1008
1035
|
/*
|
1009
|
-
* Returns whether or not the regular expression
|
1010
|
-
*
|
1036
|
+
* Returns whether or not the regular expression was compiled with the
|
1037
|
+
* `longest_match` option set to `true`.
|
1011
1038
|
*
|
1012
|
-
* @return [Boolean] the longest_match option
|
1039
|
+
* @return [Boolean] the `longest_match` option
|
1013
1040
|
* @example
|
1014
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1015
|
-
* re2.longest_match?
|
1041
|
+
* re2 = RE2::Regexp.new("woo?", longest_match: true)
|
1042
|
+
* re2.longest_match? #=> true
|
1016
1043
|
*/
|
1017
1044
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
1018
1045
|
re2_pattern *p;
|
@@ -1022,13 +1049,13 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
1022
1049
|
}
|
1023
1050
|
|
1024
1051
|
/*
|
1025
|
-
* Returns whether or not the regular expression
|
1026
|
-
*
|
1052
|
+
* Returns whether or not the regular expression was compiled with the
|
1053
|
+
* `log_errors` option set to `true`.
|
1027
1054
|
*
|
1028
|
-
* @return [Boolean] the log_errors option
|
1055
|
+
* @return [Boolean] the `log_errors` option
|
1029
1056
|
* @example
|
1030
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1031
|
-
* re2.log_errors?
|
1057
|
+
* re2 = RE2::Regexp.new("woo?", log_errors: true)
|
1058
|
+
* re2.log_errors? #=> true
|
1032
1059
|
*/
|
1033
1060
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
1034
1061
|
re2_pattern *p;
|
@@ -1038,13 +1065,12 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
1038
1065
|
}
|
1039
1066
|
|
1040
1067
|
/*
|
1041
|
-
* Returns the max_mem setting for the regular expression
|
1042
|
-
* +re2+.
|
1068
|
+
* Returns the `max_mem` setting for the regular expression.
|
1043
1069
|
*
|
1044
|
-
* @return [Integer] the max_mem option
|
1070
|
+
* @return [Integer] the `max_mem` option
|
1045
1071
|
* @example
|
1046
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1047
|
-
* re2.max_mem
|
1072
|
+
* re2 = RE2::Regexp.new("woo?", max_mem: 1024)
|
1073
|
+
* re2.max_mem #=> 1024
|
1048
1074
|
*/
|
1049
1075
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
1050
1076
|
re2_pattern *p;
|
@@ -1054,13 +1080,13 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
1054
1080
|
}
|
1055
1081
|
|
1056
1082
|
/*
|
1057
|
-
* Returns whether or not the regular expression
|
1058
|
-
*
|
1083
|
+
* Returns whether or not the regular expression was compiled with the
|
1084
|
+
* `literal` option set to `true`.
|
1059
1085
|
*
|
1060
|
-
* @return [Boolean] the literal option
|
1086
|
+
* @return [Boolean] the `literal` option
|
1061
1087
|
* @example
|
1062
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1063
|
-
* re2.literal?
|
1088
|
+
* re2 = RE2::Regexp.new("woo?", literal: true)
|
1089
|
+
* re2.literal? #=> true
|
1064
1090
|
*/
|
1065
1091
|
static VALUE re2_regexp_literal(const VALUE self) {
|
1066
1092
|
re2_pattern *p;
|
@@ -1070,13 +1096,13 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
1070
1096
|
}
|
1071
1097
|
|
1072
1098
|
/*
|
1073
|
-
* Returns whether or not the regular expression
|
1074
|
-
*
|
1099
|
+
* Returns whether or not the regular expression was compiled with the
|
1100
|
+
* `never_nl` option set to `true`.
|
1075
1101
|
*
|
1076
|
-
* @return [Boolean] the never_nl option
|
1102
|
+
* @return [Boolean] the `never_nl` option
|
1077
1103
|
* @example
|
1078
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1079
|
-
* re2.never_nl?
|
1104
|
+
* re2 = RE2::Regexp.new("woo?", never_nl: true)
|
1105
|
+
* re2.never_nl? #=> true
|
1080
1106
|
*/
|
1081
1107
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1082
1108
|
re2_pattern *p;
|
@@ -1086,13 +1112,13 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
1086
1112
|
}
|
1087
1113
|
|
1088
1114
|
/*
|
1089
|
-
* Returns whether or not the regular expression
|
1090
|
-
*
|
1115
|
+
* Returns whether or not the regular expression was compiled with the
|
1116
|
+
* `case_sensitive` option set to `true`.
|
1091
1117
|
*
|
1092
|
-
* @return [Boolean] the case_sensitive option
|
1118
|
+
* @return [Boolean] the `case_sensitive` option
|
1093
1119
|
* @example
|
1094
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1095
|
-
* re2.case_sensitive?
|
1120
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1121
|
+
* re2.case_sensitive? #=> true
|
1096
1122
|
*/
|
1097
1123
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1098
1124
|
re2_pattern *p;
|
@@ -1102,27 +1128,27 @@ static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
|
1102
1128
|
}
|
1103
1129
|
|
1104
1130
|
/*
|
1105
|
-
* Returns whether or not the regular expression
|
1106
|
-
*
|
1131
|
+
* Returns whether or not the regular expression was compiled with the
|
1132
|
+
* `case_sensitive` option set to `false`.
|
1107
1133
|
*
|
1108
|
-
* @return [Boolean] the inverse of the case_sensitive option
|
1134
|
+
* @return [Boolean] the inverse of the `case_sensitive` option
|
1109
1135
|
* @example
|
1110
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1111
|
-
* re2.case_insensitive?
|
1112
|
-
* re2.casefold?
|
1136
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1137
|
+
* re2.case_insensitive? #=> false
|
1138
|
+
* re2.casefold? #=> false
|
1113
1139
|
*/
|
1114
1140
|
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1115
1141
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1116
1142
|
}
|
1117
1143
|
|
1118
1144
|
/*
|
1119
|
-
* Returns whether or not the regular expression
|
1120
|
-
*
|
1145
|
+
* Returns whether or not the regular expression was compiled with the
|
1146
|
+
* perl_classes option set to `true`.
|
1121
1147
|
*
|
1122
|
-
* @return [Boolean] the perl_classes option
|
1148
|
+
* @return [Boolean] the `perl_classes` option
|
1123
1149
|
* @example
|
1124
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1125
|
-
* re2.perl_classes?
|
1150
|
+
* re2 = RE2::Regexp.new("woo?", perl_classes: true)
|
1151
|
+
* re2.perl_classes? #=> true
|
1126
1152
|
*/
|
1127
1153
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1128
1154
|
re2_pattern *p;
|
@@ -1132,13 +1158,13 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1132
1158
|
}
|
1133
1159
|
|
1134
1160
|
/*
|
1135
|
-
* Returns whether or not the regular expression
|
1136
|
-
*
|
1161
|
+
* Returns whether or not the regular expression was compiled with the
|
1162
|
+
* `word_boundary` option set to `true`.
|
1137
1163
|
*
|
1138
|
-
* @return [Boolean] the word_boundary option
|
1164
|
+
* @return [Boolean] the `word_boundary` option
|
1139
1165
|
* @example
|
1140
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1141
|
-
* re2.word_boundary?
|
1166
|
+
* re2 = RE2::Regexp.new("woo?", word_boundary: true)
|
1167
|
+
* re2.word_boundary? #=> true
|
1142
1168
|
*/
|
1143
1169
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1144
1170
|
re2_pattern *p;
|
@@ -1148,13 +1174,13 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1148
1174
|
}
|
1149
1175
|
|
1150
1176
|
/*
|
1151
|
-
* Returns whether or not the regular expression
|
1152
|
-
*
|
1177
|
+
* Returns whether or not the regular expression was compiled with the
|
1178
|
+
* `one_line` option set to `true`.
|
1153
1179
|
*
|
1154
|
-
* @return [Boolean] the one_line option
|
1180
|
+
* @return [Boolean] the `one_line` option
|
1155
1181
|
* @example
|
1156
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1157
|
-
* re2.one_line?
|
1182
|
+
* re2 = RE2::Regexp.new("woo?", one_line: true)
|
1183
|
+
* re2.one_line? #=> true
|
1158
1184
|
*/
|
1159
1185
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
1160
1186
|
re2_pattern *p;
|
@@ -1164,10 +1190,10 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1164
1190
|
}
|
1165
1191
|
|
1166
1192
|
/*
|
1167
|
-
* If the RE2 could not be created properly, returns an
|
1168
|
-
*
|
1193
|
+
* If the {RE2::Regexp} could not be created properly, returns an error string
|
1194
|
+
* otherwise returns `nil`.
|
1169
1195
|
*
|
1170
|
-
* @return [String, nil] the error string or nil
|
1196
|
+
* @return [String, nil] the error string or `nil`
|
1171
1197
|
*/
|
1172
1198
|
static VALUE re2_regexp_error(const VALUE self) {
|
1173
1199
|
re2_pattern *p;
|
@@ -1181,14 +1207,14 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
1181
1207
|
}
|
1182
1208
|
|
1183
1209
|
/*
|
1184
|
-
* If the RE2 could not be created properly, returns
|
1185
|
-
* the offending portion of the regexp otherwise returns nil
|
1210
|
+
* If the {RE2::Regexp} could not be created properly, returns
|
1211
|
+
* the offending portion of the regexp otherwise returns `nil`.
|
1186
1212
|
*
|
1187
1213
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1188
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1189
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1214
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1215
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1190
1216
|
*
|
1191
|
-
* @return [String, nil] the offending portion of the regexp or nil
|
1217
|
+
* @return [String, nil] the offending portion of the regexp or `nil`
|
1192
1218
|
*/
|
1193
1219
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1194
1220
|
re2_pattern *p;
|
@@ -1218,8 +1244,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1218
1244
|
}
|
1219
1245
|
|
1220
1246
|
/*
|
1221
|
-
* Returns a hash of the options currently set for
|
1222
|
-
* +re2+.
|
1247
|
+
* Returns a hash of the options currently set for the {RE2::Regexp}.
|
1223
1248
|
*
|
1224
1249
|
* @return [Hash] the options
|
1225
1250
|
*/
|
@@ -1270,8 +1295,8 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
1270
1295
|
|
1271
1296
|
/*
|
1272
1297
|
* Returns the number of capturing subpatterns, or -1 if the regexp
|
1273
|
-
* wasn't valid on construction. The overall match (
|
1274
|
-
* count: if the regexp is "(a)(b)"
|
1298
|
+
* wasn't valid on construction. The overall match (`$0`) does not
|
1299
|
+
* count: if the regexp is `"(a)(b)"`, returns 2.
|
1275
1300
|
*
|
1276
1301
|
* @return [Integer] the number of capturing subpatterns
|
1277
1302
|
*/
|
@@ -1286,8 +1311,8 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1286
1311
|
* Returns a hash of names to capturing indices of groups.
|
1287
1312
|
*
|
1288
1313
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1289
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1290
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1314
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1315
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1291
1316
|
*
|
1292
1317
|
* @return [Hash] a hash of names to capturing indices
|
1293
1318
|
*/
|
@@ -1309,63 +1334,93 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
|
1309
1334
|
}
|
1310
1335
|
|
1311
1336
|
/*
|
1312
|
-
*
|
1313
|
-
*
|
1314
|
-
* instance
|
1337
|
+
* General matching: match the pattern against the given `text` using
|
1338
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1339
|
+
* `Match`} and return a {RE2::MatchData} instance with the specified number of
|
1340
|
+
* submatches (defaults to the total number of capturing groups) or a boolean
|
1341
|
+
* (if no submatches are required).
|
1315
1342
|
*
|
1316
|
-
*
|
1343
|
+
* The number of submatches has a significant impact on performance: requesting
|
1344
|
+
* one submatch is much faster than requesting more than one and requesting
|
1345
|
+
* zero submatches is faster still.
|
1317
1346
|
*
|
1318
1347
|
* @overload match(text)
|
1319
|
-
* Returns
|
1320
|
-
*
|
1348
|
+
* Returns a {RE2::MatchData} containing the matching pattern and all
|
1349
|
+
* submatches resulting from looking for the regexp in `text` if the pattern
|
1321
1350
|
* contains capturing groups.
|
1322
1351
|
*
|
1323
|
-
* Returns either true or false indicating whether a successful match was
|
1352
|
+
* Returns either `true` or `false` indicating whether a successful match was
|
1324
1353
|
* made if the pattern contains no capturing groups.
|
1325
1354
|
*
|
1326
1355
|
* @param [String] text the text to search
|
1327
|
-
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1356
|
+
* @return [RE2::MatchData, nil] if the pattern contains capturing groups
|
1328
1357
|
* @return [Boolean] if the pattern does not contain capturing groups
|
1329
|
-
* @raise [NoMemoryError] if there was not enough memory to allocate the
|
1358
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1359
|
+
* @raise [TypeError] if given text that cannot be coerced to a `String`
|
1330
1360
|
* @example Matching with capturing groups
|
1331
1361
|
* r = RE2::Regexp.new('w(o)(o)')
|
1332
|
-
* r.match('woo')
|
1362
|
+
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1333
1363
|
* @example Matching without capturing groups
|
1334
1364
|
* r = RE2::Regexp.new('woo')
|
1335
|
-
* r.match('woo')
|
1365
|
+
* r.match('woo') #=> true
|
1336
1366
|
*
|
1337
|
-
* @overload match(text,
|
1338
|
-
*
|
1339
|
-
*
|
1367
|
+
* @overload match(text, options)
|
1368
|
+
* See `match(text)` but with customisable offsets for starting and ending
|
1369
|
+
* matches, optional anchoring to the start or both ends of the text and a
|
1370
|
+
* specific number of submatches to extract (padded with `nil`s if
|
1371
|
+
* necessary).
|
1340
1372
|
*
|
1341
1373
|
* @param [String] text the text to search
|
1342
|
-
* @
|
1374
|
+
* @param [Hash] options the options with which to perform the match
|
1375
|
+
* @option options [Integer] :startpos (0) offset at which to start matching
|
1376
|
+
* @option options [Integer] :endpos offset at which to stop matching, defaults to the text length
|
1377
|
+
* @option options [Symbol] :anchor (:unanchored) one of :unanchored, :anchor_start, :anchor_both to anchor the match
|
1378
|
+
* @option options [Integer] :submatches how many submatches to extract (0 is
|
1379
|
+
* fastest), defaults to the number of capturing groups
|
1380
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1381
|
+
* @return [Boolean] if not extracting any submatches
|
1382
|
+
* @raise [ArgumentError] if given a negative number of submatches, invalid
|
1383
|
+
* anchor or invalid startpos, endpos pair
|
1343
1384
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1344
|
-
* @
|
1385
|
+
* @raise [TypeError] if given non-String text, non-numeric number of
|
1386
|
+
* submatches, non-symbol anchor or non-hash options
|
1387
|
+
* @raise [RE2::Regexp::UnsupportedError] if given an endpos argument on a
|
1388
|
+
* version of RE2 that does not support it
|
1389
|
+
* @example Matching with capturing groups
|
1345
1390
|
* r = RE2::Regexp.new('w(o)(o)')
|
1346
|
-
* r.match('woo',
|
1347
|
-
* r.match('
|
1391
|
+
* r.match('woo', submatches: 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1392
|
+
* r.match('woo', submatches: 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1393
|
+
* r.match('woot', anchor: :anchor_both, submatches: 0)
|
1394
|
+
* #=> false
|
1395
|
+
* r.match('woot', anchor: :anchor_start, submatches: 0)
|
1396
|
+
* #=> true
|
1397
|
+
* @example Matching without capturing groups
|
1398
|
+
* r = RE2::Regexp.new('wo+')
|
1399
|
+
* r.match('woot', anchor: :anchor_both) #=> false
|
1400
|
+
* r.match('woot', anchor: :anchor_start) #=> true
|
1348
1401
|
*
|
1349
|
-
* @overload match(text,
|
1350
|
-
*
|
1351
|
-
*
|
1402
|
+
* @overload match(text, submatches)
|
1403
|
+
* @deprecated Legacy syntax for matching against `text` with a specific
|
1404
|
+
* number of submatches to extract. Use `match(text, submatches: n)` instead.
|
1352
1405
|
*
|
1353
1406
|
* @param [String] text the text to search
|
1354
|
-
* @param [Integer]
|
1355
|
-
* @return [RE2::MatchData]
|
1356
|
-
* @
|
1357
|
-
* @raise [NoMemoryError] if there was not enough memory to allocate the
|
1407
|
+
* @param [Integer] submatches the number of submatches to extract
|
1408
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1409
|
+
* @return [Boolean] if not extracting any submatches
|
1410
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1411
|
+
* @raise [TypeError] if given non-numeric number of submatches
|
1358
1412
|
* @example
|
1359
1413
|
* r = RE2::Regexp.new('w(o)(o)')
|
1414
|
+
* r.match('woo', 0) #=> true
|
1360
1415
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1361
|
-
* r.match('woo',
|
1416
|
+
* r.match('woo', 2) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1362
1417
|
*/
|
1363
1418
|
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1364
1419
|
re2_pattern *p;
|
1365
1420
|
re2_matchdata *m;
|
1366
|
-
VALUE text,
|
1421
|
+
VALUE text, options;
|
1367
1422
|
|
1368
|
-
rb_scan_args(argc, argv, "11", &text, &
|
1423
|
+
rb_scan_args(argc, argv, "11", &text, &options);
|
1369
1424
|
|
1370
1425
|
/* Ensure text is a string. */
|
1371
1426
|
StringValue(text);
|
@@ -1373,12 +1428,80 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1373
1428
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1374
1429
|
|
1375
1430
|
int n;
|
1431
|
+
int startpos = 0;
|
1432
|
+
int endpos = RSTRING_LEN(text);
|
1433
|
+
RE2::Anchor anchor = RE2::UNANCHORED;
|
1434
|
+
|
1435
|
+
if (RTEST(options)) {
|
1436
|
+
if (FIXNUM_P(options)) {
|
1437
|
+
n = NUM2INT(options);
|
1438
|
+
|
1439
|
+
if (n < 0) {
|
1440
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1441
|
+
}
|
1442
|
+
} else {
|
1443
|
+
if (TYPE(options) != T_HASH) {
|
1444
|
+
options = rb_Hash(options);
|
1445
|
+
}
|
1446
|
+
|
1447
|
+
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
|
1448
|
+
if (!NIL_P(endpos_option)) {
|
1449
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1450
|
+
Check_Type(endpos_option, T_FIXNUM);
|
1451
|
+
|
1452
|
+
endpos = NUM2INT(endpos_option);
|
1453
|
+
|
1454
|
+
if (endpos < 0) {
|
1455
|
+
rb_raise(rb_eArgError, "endpos should be >= 0");
|
1456
|
+
}
|
1457
|
+
#else
|
1458
|
+
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
|
1459
|
+
#endif
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
|
1463
|
+
if (!NIL_P(anchor_option)) {
|
1464
|
+
Check_Type(anchor_option, T_SYMBOL);
|
1465
|
+
|
1466
|
+
ID id_anchor_option = SYM2ID(anchor_option);
|
1467
|
+
if (id_anchor_option == id_unanchored) {
|
1468
|
+
anchor = RE2::UNANCHORED;
|
1469
|
+
} else if (id_anchor_option == id_anchor_start) {
|
1470
|
+
anchor = RE2::ANCHOR_START;
|
1471
|
+
} else if (id_anchor_option == id_anchor_both) {
|
1472
|
+
anchor = RE2::ANCHOR_BOTH;
|
1473
|
+
} else {
|
1474
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
|
1478
|
+
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
|
1479
|
+
if (!NIL_P(submatches_option)) {
|
1480
|
+
Check_Type(submatches_option, T_FIXNUM);
|
1481
|
+
|
1482
|
+
n = NUM2INT(submatches_option);
|
1376
1483
|
|
1377
|
-
|
1378
|
-
|
1484
|
+
if (n < 0) {
|
1485
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1486
|
+
}
|
1487
|
+
} else {
|
1488
|
+
if (!p->pattern->ok()) {
|
1489
|
+
return Qnil;
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
n = p->pattern->NumberOfCapturingGroups();
|
1493
|
+
}
|
1494
|
+
|
1495
|
+
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
|
1496
|
+
if (!NIL_P(startpos_option)) {
|
1497
|
+
Check_Type(startpos_option, T_FIXNUM);
|
1379
1498
|
|
1380
|
-
|
1381
|
-
|
1499
|
+
startpos = NUM2INT(startpos_option);
|
1500
|
+
|
1501
|
+
if (startpos < 0) {
|
1502
|
+
rb_raise(rb_eArgError, "startpos should be >= 0");
|
1503
|
+
}
|
1504
|
+
}
|
1382
1505
|
}
|
1383
1506
|
} else {
|
1384
1507
|
if (!p->pattern->ok()) {
|
@@ -1388,13 +1511,19 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1388
1511
|
n = p->pattern->NumberOfCapturingGroups();
|
1389
1512
|
}
|
1390
1513
|
|
1514
|
+
if (startpos > endpos) {
|
1515
|
+
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
1516
|
+
}
|
1517
|
+
|
1391
1518
|
if (n == 0) {
|
1392
1519
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
1393
|
-
bool matched = p->pattern->Match(
|
1394
|
-
|
1520
|
+
bool matched = p->pattern->Match(
|
1521
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
1522
|
+
startpos, endpos, anchor, 0, 0);
|
1395
1523
|
#else
|
1396
|
-
bool matched = p->pattern->Match(
|
1397
|
-
|
1524
|
+
bool matched = p->pattern->Match(
|
1525
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
1526
|
+
startpos, anchor, 0, 0);
|
1398
1527
|
#endif
|
1399
1528
|
return BOOL2RUBY(matched);
|
1400
1529
|
} else {
|
@@ -1418,11 +1547,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1418
1547
|
m->number_of_matches = n;
|
1419
1548
|
|
1420
1549
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
1421
|
-
bool matched = p->pattern->Match(
|
1422
|
-
|
1550
|
+
bool matched = p->pattern->Match(
|
1551
|
+
re2::StringPiece(RSTRING_PTR(m->text), RSTRING_LEN(m->text)),
|
1552
|
+
startpos, endpos, anchor, m->matches, n);
|
1423
1553
|
#else
|
1424
|
-
bool matched = p->pattern->Match(
|
1425
|
-
|
1554
|
+
bool matched = p->pattern->Match(
|
1555
|
+
re2::StringPiece(RSTRING_PTR(m->text), RSTRING_LEN(m->text)),
|
1556
|
+
startpos, anchor, m->matches, n);
|
1426
1557
|
#endif
|
1427
1558
|
if (matched) {
|
1428
1559
|
return matchdata;
|
@@ -1433,22 +1564,56 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1433
1564
|
}
|
1434
1565
|
|
1435
1566
|
/*
|
1436
|
-
* Returns true
|
1437
|
-
*
|
1567
|
+
* Returns true if the pattern matches any substring of the given text using
|
1568
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
1569
|
+
* `PartialMatch`}.
|
1438
1570
|
*
|
1439
1571
|
* @return [Boolean] whether the match was successful
|
1572
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1440
1573
|
*/
|
1441
1574
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1442
|
-
|
1575
|
+
re2_pattern *p;
|
1576
|
+
|
1577
|
+
/* Ensure text is a string. */
|
1578
|
+
StringValue(text);
|
1579
|
+
|
1580
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1581
|
+
|
1582
|
+
return BOOL2RUBY(RE2::PartialMatch(
|
1583
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
1584
|
+
}
|
1585
|
+
|
1586
|
+
/*
|
1587
|
+
* Returns true if the pattern matches the given text using
|
1588
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
1589
|
+
* `FullMatch`}.
|
1590
|
+
*
|
1591
|
+
* @return [Boolean] whether the match was successful
|
1592
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1593
|
+
*/
|
1594
|
+
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
1595
|
+
re2_pattern *p;
|
1596
|
+
|
1597
|
+
/* Ensure text is a string. */
|
1598
|
+
StringValue(text);
|
1443
1599
|
|
1444
|
-
|
1600
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1601
|
+
|
1602
|
+
return BOOL2RUBY(RE2::FullMatch(
|
1603
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
1445
1604
|
}
|
1446
1605
|
|
1447
1606
|
/*
|
1448
|
-
* Returns a {RE2::Scanner} for scanning the given text incrementally
|
1607
|
+
* Returns a {RE2::Scanner} for scanning the given text incrementally with
|
1608
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
1609
|
+
* `FindAndConsume`}.
|
1449
1610
|
*
|
1611
|
+
* @param [text] text the text to scan incrementally
|
1612
|
+
* @return [RE2::Scanner] an `Enumerable` {RE2::Scanner} object
|
1613
|
+
* @raise [TypeError] if `text` cannot be coerced to a `String`
|
1450
1614
|
* @example
|
1451
1615
|
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1616
|
+
* #=> #<RE2::Scanner:0x0000000000000001>
|
1452
1617
|
*/
|
1453
1618
|
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1454
1619
|
/* Ensure text is a string. */
|
@@ -1461,7 +1626,8 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1461
1626
|
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1462
1627
|
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
1463
1628
|
|
1464
|
-
c->input = new(std::nothrow) re2::StringPiece(
|
1629
|
+
c->input = new(std::nothrow) re2::StringPiece(
|
1630
|
+
RSTRING_PTR(text), RSTRING_LEN(text));
|
1465
1631
|
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
1466
1632
|
RB_OBJ_WRITE(scanner, &c->text, text);
|
1467
1633
|
|
@@ -1477,17 +1643,40 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1477
1643
|
}
|
1478
1644
|
|
1479
1645
|
/*
|
1480
|
-
* Returns
|
1481
|
-
*
|
1646
|
+
* Returns whether the underlying RE2 version supports passing an `endpos`
|
1647
|
+
* argument to
|
1648
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1649
|
+
* Match}. If not, {RE2::Regexp#match} will raise an error if attempting to
|
1650
|
+
* pass an `endpos`.
|
1651
|
+
*
|
1652
|
+
* @return [Boolean] whether the underlying
|
1653
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1654
|
+
* Match} has an endpos argument
|
1655
|
+
*/
|
1656
|
+
static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
1657
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1658
|
+
return Qtrue;
|
1659
|
+
#else
|
1660
|
+
return Qfalse;
|
1661
|
+
#endif
|
1662
|
+
}
|
1663
|
+
|
1664
|
+
/*
|
1665
|
+
* Returns a copy of `str` with the first occurrence `pattern` replaced with
|
1666
|
+
* `rewrite` using
|
1667
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L465-L480
|
1668
|
+
* `Replace`}.
|
1482
1669
|
*
|
1483
1670
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1484
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1485
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1671
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1672
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1486
1673
|
*
|
1487
1674
|
* @param [String] str the string to modify
|
1488
1675
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1489
1676
|
* @param [String] rewrite the string to replace with
|
1490
1677
|
* @return [String] the resulting string
|
1678
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
1679
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1491
1680
|
* @example
|
1492
1681
|
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
1493
1682
|
* re2 = RE2::Regexp.new("hel+o")
|
@@ -1503,12 +1692,14 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
1503
1692
|
/* Take a copy of str so it can be modified in-place by
|
1504
1693
|
* RE2::Replace.
|
1505
1694
|
*/
|
1506
|
-
|
1695
|
+
StringValue(str);
|
1696
|
+
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
1507
1697
|
|
1508
1698
|
/* Do the replacement. */
|
1509
1699
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1510
1700
|
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1511
|
-
RE2::Replace(&str_as_string, *p->pattern,
|
1701
|
+
RE2::Replace(&str_as_string, *p->pattern,
|
1702
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1512
1703
|
|
1513
1704
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1514
1705
|
p->pattern->options().encoding());
|
@@ -1516,27 +1707,33 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
1516
1707
|
/* Ensure pattern is a string. */
|
1517
1708
|
StringValue(pattern);
|
1518
1709
|
|
1519
|
-
RE2::Replace(&str_as_string,
|
1710
|
+
RE2::Replace(&str_as_string,
|
1711
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
1712
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1520
1713
|
|
1521
1714
|
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1522
1715
|
}
|
1523
1716
|
}
|
1524
1717
|
|
1525
1718
|
/*
|
1526
|
-
* Return a copy of
|
1719
|
+
* Return a copy of `str` with `pattern` replaced by `rewrite` using
|
1720
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L482-L497
|
1721
|
+
* `GlobalReplace`}.
|
1527
1722
|
*
|
1528
1723
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1529
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1530
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1724
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1725
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1531
1726
|
*
|
1532
1727
|
* @param [String] str the string to modify
|
1533
1728
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1534
1729
|
* @param [String] rewrite the string to replace with
|
1730
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
1731
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1535
1732
|
* @return [String] the resulting string
|
1536
1733
|
* @example
|
1537
1734
|
* re2 = RE2::Regexp.new("oo?")
|
1538
|
-
* RE2.GlobalReplace("whoops-doops", re2, "e")
|
1539
|
-
* RE2.GlobalReplace("hello there", "e", "i")
|
1735
|
+
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1736
|
+
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1540
1737
|
*/
|
1541
1738
|
static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
1542
1739
|
VALUE rewrite) {
|
@@ -1547,12 +1744,14 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
1547
1744
|
* RE2::GlobalReplace.
|
1548
1745
|
*/
|
1549
1746
|
re2_pattern *p;
|
1550
|
-
|
1747
|
+
StringValue(str);
|
1748
|
+
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
1551
1749
|
|
1552
1750
|
/* Do the replacement. */
|
1553
1751
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1554
1752
|
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1555
|
-
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
1753
|
+
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
1754
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1556
1755
|
|
1557
1756
|
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1558
1757
|
p->pattern->options().encoding());
|
@@ -1560,27 +1759,32 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
1560
1759
|
/* Ensure pattern is a string. */
|
1561
1760
|
StringValue(pattern);
|
1562
1761
|
|
1563
|
-
RE2::GlobalReplace(&str_as_string,
|
1564
|
-
RSTRING_PTR(
|
1762
|
+
RE2::GlobalReplace(&str_as_string,
|
1763
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
1764
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1565
1765
|
|
1566
1766
|
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1567
1767
|
}
|
1568
1768
|
}
|
1569
1769
|
|
1570
1770
|
/*
|
1571
|
-
* Returns a version of str with all potentially meaningful regexp
|
1572
|
-
*
|
1573
|
-
*
|
1771
|
+
* Returns a version of `str` with all potentially meaningful regexp characters
|
1772
|
+
* escaped using
|
1773
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L512-L518
|
1774
|
+
* `QuoteMeta`}. The returned string, used as a regular expression, will
|
1775
|
+
* exactly match the original string.
|
1574
1776
|
*
|
1575
1777
|
* @param [String] unquoted the unquoted string
|
1778
|
+
* @raise [TypeError] if the given unquoted string cannot be coerced to a `String`
|
1576
1779
|
* @return [String] the escaped string
|
1577
1780
|
* @example
|
1578
|
-
* RE2::Regexp.escape("1.5-2.0?")
|
1781
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1579
1782
|
*/
|
1580
1783
|
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
1581
1784
|
StringValue(unquoted);
|
1582
1785
|
|
1583
|
-
std::string quoted_string = RE2::QuoteMeta(
|
1786
|
+
std::string quoted_string = RE2::QuoteMeta(
|
1787
|
+
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
|
1584
1788
|
|
1585
1789
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1586
1790
|
}
|
@@ -1641,14 +1845,14 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1641
1845
|
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1642
1846
|
* options.
|
1643
1847
|
*
|
1644
|
-
* @param [Symbol] anchor
|
1645
|
-
* @raise [ArgumentError] if anchor is not
|
1848
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1849
|
+
* @raise [ArgumentError] if anchor is not `:unanchored`, `:anchor_start` or `:anchor_both`
|
1646
1850
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1647
1851
|
*
|
1648
1852
|
* @overload initialize(anchor, options)
|
1649
1853
|
* Returns a new {RE2::Set} object with the specified options.
|
1650
1854
|
*
|
1651
|
-
* @param [Symbol] anchor
|
1855
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1652
1856
|
* @param [Hash] options the options with which to compile the pattern
|
1653
1857
|
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1654
1858
|
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
@@ -1656,13 +1860,13 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1656
1860
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1657
1861
|
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
1658
1862
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1659
|
-
* @option options [Boolean] :never_nl (false) never match
|
1660
|
-
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
1661
|
-
* @option options [Boolean] :perl_classes (false) allow Perl's
|
1662
|
-
* @option options [Boolean] :word_boundary (false) allow
|
1663
|
-
* @option options [Boolean] :one_line (false)
|
1664
|
-
* @return [RE2::Set]
|
1665
|
-
* @raise [ArgumentError] if anchor is not one of the accepted choices
|
1863
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
1864
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
1865
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
1866
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
1867
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
1868
|
+
* @return [RE2::Set] a {RE2::Set} with the specified anchor and options
|
1869
|
+
* @raise [ArgumentError] if `anchor` is not one of the accepted choices
|
1666
1870
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1667
1871
|
*/
|
1668
1872
|
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
@@ -1676,12 +1880,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1676
1880
|
|
1677
1881
|
if (!NIL_P(anchor)) {
|
1678
1882
|
Check_Type(anchor, T_SYMBOL);
|
1679
|
-
ID
|
1680
|
-
if (
|
1883
|
+
ID id_anchor_arg = SYM2ID(anchor);
|
1884
|
+
if (id_anchor_arg == id_unanchored) {
|
1681
1885
|
re2_anchor = RE2::UNANCHORED;
|
1682
|
-
} else if (
|
1886
|
+
} else if (id_anchor_arg == id_anchor_start) {
|
1683
1887
|
re2_anchor = RE2::ANCHOR_START;
|
1684
|
-
} else if (
|
1888
|
+
} else if (id_anchor_arg == id_anchor_both) {
|
1685
1889
|
re2_anchor = RE2::ANCHOR_BOTH;
|
1686
1890
|
} else {
|
1687
1891
|
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
@@ -1704,15 +1908,16 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1704
1908
|
|
1705
1909
|
/*
|
1706
1910
|
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1707
|
-
* in the output of #match. Cannot be called after #compile
|
1911
|
+
* in the output of {RE2::Set#match}. Cannot be called after {RE2::Set#compile}
|
1912
|
+
* has been called.
|
1708
1913
|
*
|
1709
1914
|
* @param [String] pattern the regex pattern
|
1710
1915
|
* @return [Integer] the index of the pattern in the set
|
1711
1916
|
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1712
1917
|
* @example
|
1713
1918
|
* set = RE2::Set.new
|
1714
|
-
* set.add("abc")
|
1715
|
-
* set.add("def")
|
1919
|
+
* set.add("abc") #=> 0
|
1920
|
+
* set.add("def") #=> 1
|
1716
1921
|
*/
|
1717
1922
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1718
1923
|
StringValue(pattern);
|
@@ -1728,7 +1933,8 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1728
1933
|
|
1729
1934
|
{
|
1730
1935
|
std::string err;
|
1731
|
-
index = s->set->Add(
|
1936
|
+
index = s->set->Add(
|
1937
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), &err);
|
1732
1938
|
strlcpy(msg, err.c_str(), sizeof(msg));
|
1733
1939
|
}
|
1734
1940
|
|
@@ -1740,14 +1946,14 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1740
1946
|
}
|
1741
1947
|
|
1742
1948
|
/*
|
1743
|
-
* Compiles a Set so it can be used to match against. Must be called
|
1744
|
-
* and before #match.
|
1949
|
+
* Compiles a {RE2::Set} so it can be used to match against. Must be called
|
1950
|
+
* after {RE2::Set#add} and before {RE2::Set#match}.
|
1745
1951
|
*
|
1746
|
-
* @return [
|
1952
|
+
* @return [Boolean] whether compilation was a success
|
1747
1953
|
* @example
|
1748
1954
|
* set = RE2::Set.new
|
1749
1955
|
* set.add("abc")
|
1750
|
-
* set.compile
|
1956
|
+
* set.compile #=> true
|
1751
1957
|
*/
|
1752
1958
|
static VALUE re2_set_compile(VALUE self) {
|
1753
1959
|
re2_set *s;
|
@@ -1757,11 +1963,12 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1757
1963
|
}
|
1758
1964
|
|
1759
1965
|
/*
|
1760
|
-
* Returns whether the underlying
|
1761
|
-
*
|
1762
|
-
*
|
1966
|
+
* Returns whether the underlying RE2 version outputs error information from
|
1967
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/set.h#L62-L65
|
1968
|
+
* `RE2::Set::Match`}. If not, {RE2::Set#match} will raise an error if attempting to set
|
1969
|
+
* its `:exception` option to `true`.
|
1763
1970
|
*
|
1764
|
-
* @return [
|
1971
|
+
* @return [Boolean] whether the underlying RE2 outputs error information from {RE2::Set} matches
|
1765
1972
|
*/
|
1766
1973
|
static VALUE re2_set_match_raises_errors_p(VALUE) {
|
1767
1974
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
@@ -1785,31 +1992,31 @@ static VALUE re2_set_match_raises_errors_p(VALUE) {
|
|
1785
1992
|
* @param [String] str the text to match against
|
1786
1993
|
* @return [Array<Integer>] the indices of matching regexps
|
1787
1994
|
* @raise [MatchError] if an error occurs while matching
|
1788
|
-
* @raise [UnsupportedError] if the underlying version of
|
1995
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1789
1996
|
* @example
|
1790
1997
|
* set = RE2::Set.new
|
1791
1998
|
* set.add("abc")
|
1792
1999
|
* set.add("def")
|
1793
2000
|
* set.compile
|
1794
|
-
* set.match("abcdef")
|
2001
|
+
* set.match("abcdef") #=> [0, 1]
|
1795
2002
|
*
|
1796
2003
|
* @overload match(str, options)
|
1797
2004
|
* Returns an array of integer indices of patterns matching the given string
|
1798
2005
|
* (if any). Raises exceptions if there are any errors while matching and the
|
1799
|
-
*
|
2006
|
+
* `:exception` option is set to true.
|
1800
2007
|
*
|
1801
2008
|
* @param [String] str the text to match against
|
1802
2009
|
* @param [Hash] options the options with which to match
|
1803
|
-
* @option options [Boolean] :exception (true) whether to raise exceptions with
|
2010
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)
|
1804
2011
|
* @return [Array<Integer>] the indices of matching regexps
|
1805
2012
|
* @raise [MatchError] if an error occurs while matching
|
1806
|
-
* @raise [UnsupportedError] if the underlying version of
|
2013
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1807
2014
|
* @example
|
1808
2015
|
* set = RE2::Set.new
|
1809
2016
|
* set.add("abc")
|
1810
2017
|
* set.add("def")
|
1811
2018
|
* set.compile
|
1812
|
-
* set.match("abcdef", :
|
2019
|
+
* set.match("abcdef", exception: true) #=> [0, 1]
|
1813
2020
|
*/
|
1814
2021
|
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
1815
2022
|
VALUE str, options;
|
@@ -1834,7 +2041,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1834
2041
|
if (raise_exception) {
|
1835
2042
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1836
2043
|
RE2::Set::ErrorInfo e;
|
1837
|
-
bool match_failed = !s->set->Match(
|
2044
|
+
bool match_failed = !s->set->Match(
|
2045
|
+
re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v, &e);
|
1838
2046
|
VALUE result = rb_ary_new2(v.size());
|
1839
2047
|
|
1840
2048
|
if (match_failed) {
|
@@ -1861,7 +2069,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1861
2069
|
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
1862
2070
|
#endif
|
1863
2071
|
} else {
|
1864
|
-
bool matched = s->set->Match(
|
2072
|
+
bool matched = s->set->Match(
|
2073
|
+
re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v);
|
1865
2074
|
VALUE result = rb_ary_new2(v.size());
|
1866
2075
|
|
1867
2076
|
if (matched) {
|
@@ -1877,6 +2086,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1877
2086
|
extern "C" void Init_re2(void) {
|
1878
2087
|
re2_mRE2 = rb_define_module("RE2");
|
1879
2088
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
2089
|
+
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
2090
|
+
"UnsupportedError", rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1880
2091
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1881
2092
|
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
1882
2093
|
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
@@ -1930,6 +2141,8 @@ extern "C" void Init_re2(void) {
|
|
1930
2141
|
rb_define_method(re2_cScanner, "rewind",
|
1931
2142
|
RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
|
1932
2143
|
|
2144
|
+
rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
|
2145
|
+
RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
|
1933
2146
|
rb_define_method(re2_cRegexp, "initialize",
|
1934
2147
|
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
1935
2148
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
@@ -1947,12 +2160,14 @@ extern "C" void Init_re2(void) {
|
|
1947
2160
|
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
1948
2161
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1949
2162
|
-1);
|
1950
|
-
rb_define_method(re2_cRegexp, "match?",
|
1951
|
-
|
1952
|
-
rb_define_method(re2_cRegexp, "
|
1953
|
-
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1954
|
-
rb_define_method(re2_cRegexp, "===",
|
2163
|
+
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
2164
|
+
1);
|
2165
|
+
rb_define_method(re2_cRegexp, "partial_match?",
|
1955
2166
|
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2167
|
+
rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2168
|
+
rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2169
|
+
rb_define_method(re2_cRegexp, "full_match?",
|
2170
|
+
RUBY_METHOD_FUNC(re2_regexp_full_match_p), 1);
|
1956
2171
|
rb_define_method(re2_cRegexp, "scan",
|
1957
2172
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1958
2173
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
@@ -2009,6 +2224,8 @@ extern "C" void Init_re2(void) {
|
|
2009
2224
|
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2010
2225
|
rb_define_singleton_method(re2_cRegexp, "quote",
|
2011
2226
|
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2227
|
+
|
2228
|
+
// (see RE2::Regexp#initialize)
|
2012
2229
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
2013
2230
|
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
2014
2231
|
|
@@ -2027,7 +2244,11 @@ extern "C" void Init_re2(void) {
|
|
2027
2244
|
id_word_boundary = rb_intern("word_boundary");
|
2028
2245
|
id_one_line = rb_intern("one_line");
|
2029
2246
|
id_unanchored = rb_intern("unanchored");
|
2247
|
+
id_anchor = rb_intern("anchor");
|
2030
2248
|
id_anchor_start = rb_intern("anchor_start");
|
2031
2249
|
id_anchor_both = rb_intern("anchor_both");
|
2032
2250
|
id_exception = rb_intern("exception");
|
2251
|
+
id_submatches = rb_intern("submatches");
|
2252
|
+
id_startpos = rb_intern("startpos");
|
2253
|
+
id_endpos = rb_intern("endpos");
|
2033
2254
|
}
|