re2 2.4.3-aarch64-linux → 2.5.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +236 -192
- data/ext/re2/extconf.rb +6 -70
- data/ext/re2/re2.cc +450 -263
- data/ext/re2/recipes.rb +8 -0
- data/lib/2.6/re2.so +0 -0
- data/lib/2.7/re2.so +0 -0
- data/lib/3.0/re2.so +0 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/re2/regexp.rb +69 -0
- data/lib/re2/scanner.rb +8 -0
- data/lib/re2/string.rb +9 -59
- data/lib/re2/version.rb +9 -1
- data/lib/re2.rb +7 -3
- data/re2.gemspec +1 -0
- data/spec/kernel_spec.rb +2 -2
- data/spec/re2/match_data_spec.rb +64 -25
- data/spec/re2/regexp_spec.rb +492 -113
- data/spec/re2/scanner_spec.rb +3 -8
- data/spec/re2/set_spec.rb +18 -18
- data/spec/re2_spec.rb +4 -4
- metadata +3 -2
data/ext/re2/re2.cc
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
/*
|
2
|
-
* re2 (
|
3
|
-
* Ruby bindings to
|
2
|
+
* re2 (https://github.com/mudge/re2)
|
3
|
+
* Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to
|
4
|
+
* backtracking regular expression engines like those used in PCRE, Perl, and
|
5
|
+
* Python".
|
4
6
|
*
|
5
|
-
* Copyright (c) 2010
|
7
|
+
* Copyright (c) 2010, Paul Mucur (https://mudge.name)
|
6
8
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
9
|
*/
|
8
10
|
|
@@ -42,13 +44,14 @@ typedef struct {
|
|
42
44
|
} re2_set;
|
43
45
|
|
44
46
|
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
45
|
-
re2_eSetMatchError, re2_eSetUnsupportedError;
|
47
|
+
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
46
48
|
|
47
49
|
/* Symbols used in RE2 options. */
|
48
50
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
49
51
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
50
|
-
id_perl_classes, id_word_boundary, id_one_line,
|
51
|
-
|
52
|
+
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
53
|
+
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
54
|
+
id_submatches, id_startpos, id_endpos;
|
52
55
|
|
53
56
|
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
54
57
|
if (encoding == RE2::Options::EncodingUTF8) {
|
@@ -122,7 +125,7 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
122
125
|
}
|
123
126
|
}
|
124
127
|
|
125
|
-
/* For compatibility with
|
128
|
+
/* For compatibility with Ruby < 2.7 */
|
126
129
|
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
127
130
|
#define re2_compact_callback(x) (x),
|
128
131
|
#else
|
@@ -270,12 +273,14 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
270
273
|
}
|
271
274
|
|
272
275
|
/*
|
273
|
-
* Returns a frozen copy of the
|
276
|
+
* Returns a frozen copy of the text supplied when matching.
|
274
277
|
*
|
275
|
-
*
|
278
|
+
* If the text was already a frozen string, returns the original.
|
279
|
+
*
|
280
|
+
* @return [String] a frozen string with the text supplied when matching
|
276
281
|
* @example
|
277
|
-
* m = RE2::Regexp.new('(\d+)').
|
278
|
-
* m.string
|
282
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
283
|
+
* m.string #=> "bob 123"
|
279
284
|
*/
|
280
285
|
static VALUE re2_matchdata_string(const VALUE self) {
|
281
286
|
re2_matchdata *m;
|
@@ -285,9 +290,10 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
285
290
|
}
|
286
291
|
|
287
292
|
/*
|
288
|
-
* Returns the
|
293
|
+
* Returns the text supplied when incrementally matching with
|
294
|
+
* {RE2::Regexp#scan}.
|
289
295
|
*
|
290
|
-
* @return [String] the original string
|
296
|
+
* @return [String] the original string passed to {RE2::Regexp#scan}
|
291
297
|
* @example
|
292
298
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
293
299
|
* c.string #=> "foo"
|
@@ -300,9 +306,9 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
300
306
|
}
|
301
307
|
|
302
308
|
/*
|
303
|
-
* Returns whether the
|
309
|
+
* Returns whether the {RE2::Scanner} has consumed all input or not.
|
304
310
|
*
|
305
|
-
* @return [Boolean] whether the
|
311
|
+
* @return [Boolean] whether the {RE2::Scanner} has consumed all input or not
|
306
312
|
* @example
|
307
313
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
308
314
|
* c.eof? #=> true
|
@@ -315,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
315
321
|
}
|
316
322
|
|
317
323
|
/*
|
318
|
-
* Rewind the
|
324
|
+
* Rewind the {RE2::Scanner} to the start of the string.
|
319
325
|
*
|
320
326
|
* @example
|
321
327
|
* s = RE2::Regexp.new('(\d+)').scan("1 2 3")
|
@@ -337,14 +343,19 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
337
343
|
}
|
338
344
|
|
339
345
|
/*
|
340
|
-
* Scan the given text incrementally for matches
|
341
|
-
*
|
346
|
+
* Scan the given text incrementally for matches using
|
347
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
348
|
+
* `FindAndConsume`}, returning an array of submatches on each subsequent
|
349
|
+
* call. Returns `nil` if no matches are found or an empty array for every
|
350
|
+
* match if the pattern has no capturing groups.
|
342
351
|
*
|
343
352
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
344
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
345
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
353
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
354
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
346
355
|
*
|
347
|
-
* @return [Array<String>] the
|
356
|
+
* @return [Array<String>] if the pattern has capturing groups
|
357
|
+
* @return [[]] if the pattern does not have capturing groups
|
358
|
+
* @return [nil] if no matches are found
|
348
359
|
* @example
|
349
360
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
350
361
|
* s.scan #=> ["Foo"]
|
@@ -359,7 +370,7 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
359
370
|
|
360
371
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
361
372
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
362
|
-
std::vector<
|
373
|
+
std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
|
363
374
|
|
364
375
|
if (c->eof) {
|
365
376
|
return Qnil;
|
@@ -403,9 +414,6 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
403
414
|
}
|
404
415
|
}
|
405
416
|
|
406
|
-
/*
|
407
|
-
* Retrieve a matchdata by index or name.
|
408
|
-
*/
|
409
417
|
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
410
418
|
re2_matchdata *m;
|
411
419
|
re2_pattern *p;
|
@@ -441,13 +449,14 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
441
449
|
}
|
442
450
|
|
443
451
|
/*
|
444
|
-
* Returns the number of elements in the
|
452
|
+
* Returns the number of elements in the {RE2::MatchData} (including the
|
453
|
+
* overall match, submatches and any `nils`).
|
445
454
|
*
|
446
455
|
* @return [Integer] the number of elements
|
447
456
|
* @example
|
448
457
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
449
|
-
* m.size
|
450
|
-
* m.length
|
458
|
+
* m.size #=> 2
|
459
|
+
* m.length #=> 2
|
451
460
|
*/
|
452
461
|
static VALUE re2_matchdata_size(const VALUE self) {
|
453
462
|
re2_matchdata *m;
|
@@ -458,14 +467,15 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
458
467
|
}
|
459
468
|
|
460
469
|
/*
|
461
|
-
* Returns the offset of the start of the nth element of the
|
470
|
+
* Returns the offset of the start of the nth element of the {RE2::MatchData}.
|
462
471
|
*
|
463
|
-
* @param [Integer, String, Symbol] n the name or number of the
|
464
|
-
* @return [Integer] the offset of the start of the match
|
472
|
+
* @param [Integer, String, Symbol] n the name or number of the submatch
|
473
|
+
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
474
|
+
* there is no such submatch
|
465
475
|
* @example
|
466
476
|
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
467
|
-
* m.begin(0)
|
468
|
-
* m.begin(1)
|
477
|
+
* m.begin(0) #=> 1
|
478
|
+
* m.begin(1) #=> 4
|
469
479
|
*/
|
470
480
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
471
481
|
re2_matchdata *m;
|
@@ -483,14 +493,16 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
483
493
|
}
|
484
494
|
|
485
495
|
/*
|
486
|
-
* Returns the offset of the character following the end of the nth element of
|
496
|
+
* Returns the offset of the character following the end of the nth element of
|
497
|
+
* the {RE2::MatchData}.
|
487
498
|
*
|
488
499
|
* @param [Integer, String, Symbol] n the name or number of the match
|
489
|
-
* @return [Integer] the offset of the character following the end of the
|
500
|
+
* @return [Integer, nil] the offset of the character following the end of the
|
501
|
+
* match or `nil` if there is no such match
|
490
502
|
* @example
|
491
503
|
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
492
|
-
* m.end(0)
|
493
|
-
* m.end(1)
|
504
|
+
* m.end(0) #=> 9
|
505
|
+
* m.end(1) #=> 7
|
494
506
|
*/
|
495
507
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
496
508
|
re2_matchdata *m;
|
@@ -510,10 +522,10 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
510
522
|
/*
|
511
523
|
* Returns the {RE2::Regexp} used in the match.
|
512
524
|
*
|
513
|
-
* @return [RE2::Regexp] the
|
525
|
+
* @return [RE2::Regexp] the regular expression used in the match
|
514
526
|
* @example
|
515
527
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
516
|
-
* m.regexp
|
528
|
+
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
517
529
|
*/
|
518
530
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
519
531
|
re2_matchdata *m;
|
@@ -523,12 +535,12 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
523
535
|
}
|
524
536
|
|
525
537
|
/*
|
526
|
-
* Returns the {RE2::Regexp} used in the
|
538
|
+
* Returns the {RE2::Regexp} used in the {RE2::Scanner}.
|
527
539
|
*
|
528
|
-
* @return [RE2::Regexp] the
|
540
|
+
* @return [RE2::Regexp] the regular expression used in the {RE2::Scanner}
|
529
541
|
* @example
|
530
542
|
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
531
|
-
* c.regexp
|
543
|
+
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
532
544
|
*/
|
533
545
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
534
546
|
re2_scanner *c;
|
@@ -544,16 +556,17 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
544
556
|
}
|
545
557
|
|
546
558
|
/*
|
547
|
-
* Returns the array of matches
|
559
|
+
* Returns the array of matches including the overall match, submatches and any
|
560
|
+
* `nil`s.
|
548
561
|
*
|
549
562
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
550
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
551
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
563
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
564
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
552
565
|
*
|
553
566
|
* @return [Array<String, nil>] the array of matches
|
554
567
|
* @example
|
555
568
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
556
|
-
* m.to_a
|
569
|
+
* m.to_a #=> ["123", "123"]
|
557
570
|
*/
|
558
571
|
static VALUE re2_matchdata_to_a(const VALUE self) {
|
559
572
|
re2_matchdata *m;
|
@@ -619,19 +632,17 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
619
632
|
* Retrieve zero, one or more matches by index or name.
|
620
633
|
*
|
621
634
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
622
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
623
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
624
|
-
*
|
625
|
-
* @return [Array<String, nil>, String, Boolean]
|
635
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
636
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
626
637
|
*
|
627
638
|
* @overload [](index)
|
628
639
|
* Access a particular match by index.
|
629
640
|
*
|
630
641
|
* @param [Integer] index the index of the match to fetch
|
631
|
-
* @return [String, nil] the specified match
|
642
|
+
* @return [String, nil] the specified match or `nil` if it isn't present
|
632
643
|
* @example
|
633
644
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
634
|
-
* m[0]
|
645
|
+
* m[0] #=> "123"
|
635
646
|
*
|
636
647
|
* @overload [](start, length)
|
637
648
|
* Access a range of matches by starting index and length.
|
@@ -641,7 +652,7 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
641
652
|
* @return [Array<String, nil>] the specified matches
|
642
653
|
* @example
|
643
654
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
644
|
-
* m[0, 1]
|
655
|
+
* m[0, 1] #=> ["123"]
|
645
656
|
*
|
646
657
|
* @overload [](range)
|
647
658
|
* Access a range of matches by index.
|
@@ -650,13 +661,13 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
650
661
|
* @return [Array<String, nil>] the specified matches
|
651
662
|
* @example
|
652
663
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
653
|
-
* m[0..1]
|
664
|
+
* m[0..1] #=> "[123", "123"]
|
654
665
|
*
|
655
666
|
* @overload [](name)
|
656
667
|
* Access a particular match by name.
|
657
668
|
*
|
658
669
|
* @param [String, Symbol] name the name of the match to fetch
|
659
|
-
* @return [String, nil] the specific match
|
670
|
+
* @return [String, nil] the specific match or `nil` if it isn't present
|
660
671
|
* @example
|
661
672
|
* m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
|
662
673
|
* m["number"] #=> "123"
|
@@ -690,13 +701,13 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
690
701
|
* Returns a printable version of the match.
|
691
702
|
*
|
692
703
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
693
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
694
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
704
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
705
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
695
706
|
*
|
696
707
|
* @return [String] a printable version of the match
|
697
708
|
* @example
|
698
709
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
699
|
-
* m.inspect
|
710
|
+
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
700
711
|
*/
|
701
712
|
static VALUE re2_matchdata_inspect(const VALUE self) {
|
702
713
|
re2_matchdata *m;
|
@@ -734,13 +745,14 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
734
745
|
* Returns the array of submatches for pattern matching.
|
735
746
|
*
|
736
747
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
737
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
738
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is
|
748
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
749
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
750
|
+
* undefined).
|
739
751
|
*
|
740
752
|
* @return [Array<String, nil>] the array of submatches
|
741
753
|
* @example
|
742
754
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
743
|
-
* m.deconstruct
|
755
|
+
* m.deconstruct #=> ["123"]
|
744
756
|
*
|
745
757
|
* @example pattern matching
|
746
758
|
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
@@ -780,17 +792,18 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
780
792
|
* order but an invalid name will cause the hash to be immediately returned.
|
781
793
|
*
|
782
794
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
783
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
784
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
795
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
796
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
785
797
|
*
|
786
798
|
* @return [Hash] a hash of capturing group names to submatches
|
787
|
-
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names
|
799
|
+
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
800
|
+
* or `nil` to return all names
|
788
801
|
* @example
|
789
802
|
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
790
|
-
* m.deconstruct_keys(nil)
|
791
|
-
* m.deconstruct_keys([:numbers])
|
792
|
-
* m.deconstruct_keys([:fruit])
|
793
|
-
* m.deconstruct_keys([:letters, :fruit])
|
803
|
+
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
804
|
+
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
805
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
806
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
794
807
|
*
|
795
808
|
* @example pattern matching
|
796
809
|
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
@@ -839,11 +852,9 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
839
852
|
}
|
840
853
|
|
841
854
|
/*
|
842
|
-
*
|
843
|
-
* +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
|
855
|
+
* Shorthand to compile a new {RE2::Regexp}.
|
844
856
|
*
|
845
857
|
* @see RE2::Regexp#initialize
|
846
|
-
*
|
847
858
|
*/
|
848
859
|
static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
849
860
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
@@ -851,22 +862,21 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
|
851
862
|
|
852
863
|
/*
|
853
864
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
854
|
-
*
|
855
|
-
*
|
856
|
-
* @return [RE2::Regexp]
|
865
|
+
* `pattern` stored inside.
|
857
866
|
*
|
858
867
|
* @overload initialize(pattern)
|
859
868
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
860
|
-
*
|
869
|
+
* `pattern` stored inside with the default options.
|
861
870
|
*
|
862
871
|
* @param [String] pattern the pattern to compile
|
863
|
-
* @return [RE2::Regexp]
|
872
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern
|
873
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
864
874
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
865
|
-
*
|
875
|
+
* pattern
|
866
876
|
*
|
867
877
|
* @overload initialize(pattern, options)
|
868
878
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
869
|
-
*
|
879
|
+
* `pattern` stored inside with the specified options.
|
870
880
|
*
|
871
881
|
* @param [String] pattern the pattern to compile
|
872
882
|
* @param [Hash] options the options with which to compile the pattern
|
@@ -876,12 +886,13 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
|
876
886
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
877
887
|
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
878
888
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
879
|
-
* @option options [Boolean] :never_nl (false) never match
|
880
|
-
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
881
|
-
* @option options [Boolean] :perl_classes (false) allow Perl's
|
882
|
-
* @option options [Boolean] :word_boundary (false) allow
|
883
|
-
* @option options [Boolean] :one_line (false)
|
884
|
-
* @return [RE2::Regexp]
|
889
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
890
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
891
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
892
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
893
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
894
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern and options
|
895
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
885
896
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
886
897
|
*/
|
887
898
|
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
@@ -912,16 +923,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
912
923
|
}
|
913
924
|
|
914
925
|
/*
|
915
|
-
* Returns a printable version of the regular expression
|
926
|
+
* Returns a printable version of the regular expression.
|
916
927
|
*
|
917
928
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
918
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
919
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is
|
929
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
930
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
931
|
+
* undefined).
|
920
932
|
*
|
921
933
|
* @return [String] a printable version of the regular expression
|
922
934
|
* @example
|
923
935
|
* re2 = RE2::Regexp.new("woo?")
|
924
|
-
* re2.inspect
|
936
|
+
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
925
937
|
*/
|
926
938
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
927
939
|
re2_pattern *p;
|
@@ -937,16 +949,16 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
937
949
|
}
|
938
950
|
|
939
951
|
/*
|
940
|
-
* Returns a string version of the regular expression
|
952
|
+
* Returns a string version of the regular expression.
|
941
953
|
*
|
942
954
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
943
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
944
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
955
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
956
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
945
957
|
*
|
946
958
|
* @return [String] a string version of the regular expression
|
947
959
|
* @example
|
948
960
|
* re2 = RE2::Regexp.new("woo?")
|
949
|
-
* re2.to_s
|
961
|
+
* re2.to_s #=> "woo?"
|
950
962
|
*/
|
951
963
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
952
964
|
re2_pattern *p;
|
@@ -958,13 +970,12 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
958
970
|
}
|
959
971
|
|
960
972
|
/*
|
961
|
-
* Returns whether or not the regular expression
|
962
|
-
* was compiled successfully or not.
|
973
|
+
* Returns whether or not the regular expression was compiled successfully.
|
963
974
|
*
|
964
975
|
* @return [Boolean] whether or not compilation was successful
|
965
976
|
* @example
|
966
977
|
* re2 = RE2::Regexp.new("woo?")
|
967
|
-
* re2.ok?
|
978
|
+
* re2.ok? #=> true
|
968
979
|
*/
|
969
980
|
static VALUE re2_regexp_ok(const VALUE self) {
|
970
981
|
re2_pattern *p;
|
@@ -974,13 +985,13 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
974
985
|
}
|
975
986
|
|
976
987
|
/*
|
977
|
-
* Returns whether or not the regular expression
|
978
|
-
*
|
988
|
+
* Returns whether or not the regular expression was compiled with the `utf8`
|
989
|
+
* option set to `true`.
|
979
990
|
*
|
980
|
-
* @return [Boolean] the utf8 option
|
991
|
+
* @return [Boolean] the `utf8` option
|
981
992
|
* @example
|
982
|
-
* re2 = RE2::Regexp.new("woo?", :
|
983
|
-
* re2.utf8?
|
993
|
+
* re2 = RE2::Regexp.new("woo?", utf8: true)
|
994
|
+
* re2.utf8? #=> true
|
984
995
|
*/
|
985
996
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
986
997
|
re2_pattern *p;
|
@@ -990,13 +1001,13 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
990
1001
|
}
|
991
1002
|
|
992
1003
|
/*
|
993
|
-
* Returns whether or not the regular expression
|
994
|
-
*
|
1004
|
+
* Returns whether or not the regular expression was compiled with the
|
1005
|
+
* `posix_syntax` option set to `true`.
|
995
1006
|
*
|
996
|
-
* @return [Boolean] the posix_syntax option
|
1007
|
+
* @return [Boolean] the `posix_syntax` option
|
997
1008
|
* @example
|
998
|
-
* re2 = RE2::Regexp.new("woo?", :
|
999
|
-
* re2.posix_syntax?
|
1009
|
+
* re2 = RE2::Regexp.new("woo?", posix_syntax: true)
|
1010
|
+
* re2.posix_syntax? #=> true
|
1000
1011
|
*/
|
1001
1012
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
1002
1013
|
re2_pattern *p;
|
@@ -1006,13 +1017,13 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
1006
1017
|
}
|
1007
1018
|
|
1008
1019
|
/*
|
1009
|
-
* Returns whether or not the regular expression
|
1010
|
-
*
|
1020
|
+
* Returns whether or not the regular expression was compiled with the
|
1021
|
+
* `longest_match` option set to `true`.
|
1011
1022
|
*
|
1012
|
-
* @return [Boolean] the longest_match option
|
1023
|
+
* @return [Boolean] the `longest_match` option
|
1013
1024
|
* @example
|
1014
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1015
|
-
* re2.longest_match?
|
1025
|
+
* re2 = RE2::Regexp.new("woo?", longest_match: true)
|
1026
|
+
* re2.longest_match? #=> true
|
1016
1027
|
*/
|
1017
1028
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
1018
1029
|
re2_pattern *p;
|
@@ -1022,13 +1033,13 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
1022
1033
|
}
|
1023
1034
|
|
1024
1035
|
/*
|
1025
|
-
* Returns whether or not the regular expression
|
1026
|
-
*
|
1036
|
+
* Returns whether or not the regular expression was compiled with the
|
1037
|
+
* `log_errors` option set to `true`.
|
1027
1038
|
*
|
1028
|
-
* @return [Boolean] the log_errors option
|
1039
|
+
* @return [Boolean] the `log_errors` option
|
1029
1040
|
* @example
|
1030
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1031
|
-
* re2.log_errors?
|
1041
|
+
* re2 = RE2::Regexp.new("woo?", log_errors: true)
|
1042
|
+
* re2.log_errors? #=> true
|
1032
1043
|
*/
|
1033
1044
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
1034
1045
|
re2_pattern *p;
|
@@ -1038,13 +1049,12 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
1038
1049
|
}
|
1039
1050
|
|
1040
1051
|
/*
|
1041
|
-
* Returns the max_mem setting for the regular expression
|
1042
|
-
* +re2+.
|
1052
|
+
* Returns the `max_mem` setting for the regular expression.
|
1043
1053
|
*
|
1044
|
-
* @return [Integer] the max_mem option
|
1054
|
+
* @return [Integer] the `max_mem` option
|
1045
1055
|
* @example
|
1046
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1047
|
-
* re2.max_mem
|
1056
|
+
* re2 = RE2::Regexp.new("woo?", max_mem: 1024)
|
1057
|
+
* re2.max_mem #=> 1024
|
1048
1058
|
*/
|
1049
1059
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
1050
1060
|
re2_pattern *p;
|
@@ -1054,13 +1064,13 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
1054
1064
|
}
|
1055
1065
|
|
1056
1066
|
/*
|
1057
|
-
* Returns whether or not the regular expression
|
1058
|
-
*
|
1067
|
+
* Returns whether or not the regular expression was compiled with the
|
1068
|
+
* `literal` option set to `true`.
|
1059
1069
|
*
|
1060
|
-
* @return [Boolean] the literal option
|
1070
|
+
* @return [Boolean] the `literal` option
|
1061
1071
|
* @example
|
1062
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1063
|
-
* re2.literal?
|
1072
|
+
* re2 = RE2::Regexp.new("woo?", literal: true)
|
1073
|
+
* re2.literal? #=> true
|
1064
1074
|
*/
|
1065
1075
|
static VALUE re2_regexp_literal(const VALUE self) {
|
1066
1076
|
re2_pattern *p;
|
@@ -1070,13 +1080,13 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
1070
1080
|
}
|
1071
1081
|
|
1072
1082
|
/*
|
1073
|
-
* Returns whether or not the regular expression
|
1074
|
-
*
|
1083
|
+
* Returns whether or not the regular expression was compiled with the
|
1084
|
+
* `never_nl` option set to `true`.
|
1075
1085
|
*
|
1076
|
-
* @return [Boolean] the never_nl option
|
1086
|
+
* @return [Boolean] the `never_nl` option
|
1077
1087
|
* @example
|
1078
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1079
|
-
* re2.never_nl?
|
1088
|
+
* re2 = RE2::Regexp.new("woo?", never_nl: true)
|
1089
|
+
* re2.never_nl? #=> true
|
1080
1090
|
*/
|
1081
1091
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1082
1092
|
re2_pattern *p;
|
@@ -1086,13 +1096,13 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
1086
1096
|
}
|
1087
1097
|
|
1088
1098
|
/*
|
1089
|
-
* Returns whether or not the regular expression
|
1090
|
-
*
|
1099
|
+
* Returns whether or not the regular expression was compiled with the
|
1100
|
+
* `case_sensitive` option set to `true`.
|
1091
1101
|
*
|
1092
|
-
* @return [Boolean] the case_sensitive option
|
1102
|
+
* @return [Boolean] the `case_sensitive` option
|
1093
1103
|
* @example
|
1094
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1095
|
-
* re2.case_sensitive?
|
1104
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1105
|
+
* re2.case_sensitive? #=> true
|
1096
1106
|
*/
|
1097
1107
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1098
1108
|
re2_pattern *p;
|
@@ -1102,27 +1112,27 @@ static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
|
1102
1112
|
}
|
1103
1113
|
|
1104
1114
|
/*
|
1105
|
-
* Returns whether or not the regular expression
|
1106
|
-
*
|
1115
|
+
* Returns whether or not the regular expression was compiled with the
|
1116
|
+
* `case_sensitive` option set to `false`.
|
1107
1117
|
*
|
1108
|
-
* @return [Boolean] the inverse of the case_sensitive option
|
1118
|
+
* @return [Boolean] the inverse of the `case_sensitive` option
|
1109
1119
|
* @example
|
1110
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1111
|
-
* re2.case_insensitive?
|
1112
|
-
* re2.casefold?
|
1120
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1121
|
+
* re2.case_insensitive? #=> false
|
1122
|
+
* re2.casefold? #=> false
|
1113
1123
|
*/
|
1114
1124
|
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1115
1125
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1116
1126
|
}
|
1117
1127
|
|
1118
1128
|
/*
|
1119
|
-
* Returns whether or not the regular expression
|
1120
|
-
*
|
1129
|
+
* Returns whether or not the regular expression was compiled with the
|
1130
|
+
* perl_classes option set to `true`.
|
1121
1131
|
*
|
1122
|
-
* @return [Boolean] the perl_classes option
|
1132
|
+
* @return [Boolean] the `perl_classes` option
|
1123
1133
|
* @example
|
1124
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1125
|
-
* re2.perl_classes?
|
1134
|
+
* re2 = RE2::Regexp.new("woo?", perl_classes: true)
|
1135
|
+
* re2.perl_classes? #=> true
|
1126
1136
|
*/
|
1127
1137
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1128
1138
|
re2_pattern *p;
|
@@ -1132,13 +1142,13 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1132
1142
|
}
|
1133
1143
|
|
1134
1144
|
/*
|
1135
|
-
* Returns whether or not the regular expression
|
1136
|
-
*
|
1145
|
+
* Returns whether or not the regular expression was compiled with the
|
1146
|
+
* `word_boundary` option set to `true`.
|
1137
1147
|
*
|
1138
|
-
* @return [Boolean] the word_boundary option
|
1148
|
+
* @return [Boolean] the `word_boundary` option
|
1139
1149
|
* @example
|
1140
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1141
|
-
* re2.word_boundary?
|
1150
|
+
* re2 = RE2::Regexp.new("woo?", word_boundary: true)
|
1151
|
+
* re2.word_boundary? #=> true
|
1142
1152
|
*/
|
1143
1153
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1144
1154
|
re2_pattern *p;
|
@@ -1148,13 +1158,13 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1148
1158
|
}
|
1149
1159
|
|
1150
1160
|
/*
|
1151
|
-
* Returns whether or not the regular expression
|
1152
|
-
*
|
1161
|
+
* Returns whether or not the regular expression was compiled with the
|
1162
|
+
* `one_line` option set to `true`.
|
1153
1163
|
*
|
1154
|
-
* @return [Boolean] the one_line option
|
1164
|
+
* @return [Boolean] the `one_line` option
|
1155
1165
|
* @example
|
1156
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1157
|
-
* re2.one_line?
|
1166
|
+
* re2 = RE2::Regexp.new("woo?", one_line: true)
|
1167
|
+
* re2.one_line? #=> true
|
1158
1168
|
*/
|
1159
1169
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
1160
1170
|
re2_pattern *p;
|
@@ -1164,10 +1174,10 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1164
1174
|
}
|
1165
1175
|
|
1166
1176
|
/*
|
1167
|
-
* If the RE2 could not be created properly, returns an
|
1168
|
-
*
|
1177
|
+
* If the {RE2::Regexp} could not be created properly, returns an error string
|
1178
|
+
* otherwise returns `nil`.
|
1169
1179
|
*
|
1170
|
-
* @return [String, nil] the error string or nil
|
1180
|
+
* @return [String, nil] the error string or `nil`
|
1171
1181
|
*/
|
1172
1182
|
static VALUE re2_regexp_error(const VALUE self) {
|
1173
1183
|
re2_pattern *p;
|
@@ -1181,14 +1191,14 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
1181
1191
|
}
|
1182
1192
|
|
1183
1193
|
/*
|
1184
|
-
* If the RE2 could not be created properly, returns
|
1185
|
-
* the offending portion of the regexp otherwise returns nil
|
1194
|
+
* If the {RE2::Regexp} could not be created properly, returns
|
1195
|
+
* the offending portion of the regexp otherwise returns `nil`.
|
1186
1196
|
*
|
1187
1197
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1188
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1189
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1198
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1199
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1190
1200
|
*
|
1191
|
-
* @return [String, nil] the offending portion of the regexp or nil
|
1201
|
+
* @return [String, nil] the offending portion of the regexp or `nil`
|
1192
1202
|
*/
|
1193
1203
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1194
1204
|
re2_pattern *p;
|
@@ -1218,8 +1228,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1218
1228
|
}
|
1219
1229
|
|
1220
1230
|
/*
|
1221
|
-
* Returns a hash of the options currently set for
|
1222
|
-
* +re2+.
|
1231
|
+
* Returns a hash of the options currently set for the {RE2::Regexp}.
|
1223
1232
|
*
|
1224
1233
|
* @return [Hash] the options
|
1225
1234
|
*/
|
@@ -1270,8 +1279,8 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
1270
1279
|
|
1271
1280
|
/*
|
1272
1281
|
* Returns the number of capturing subpatterns, or -1 if the regexp
|
1273
|
-
* wasn't valid on construction. The overall match (
|
1274
|
-
* count: if the regexp is "(a)(b)"
|
1282
|
+
* wasn't valid on construction. The overall match (`$0`) does not
|
1283
|
+
* count: if the regexp is `"(a)(b)"`, returns 2.
|
1275
1284
|
*
|
1276
1285
|
* @return [Integer] the number of capturing subpatterns
|
1277
1286
|
*/
|
@@ -1286,8 +1295,8 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1286
1295
|
* Returns a hash of names to capturing indices of groups.
|
1287
1296
|
*
|
1288
1297
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1289
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1290
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1298
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1299
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1291
1300
|
*
|
1292
1301
|
* @return [Hash] a hash of names to capturing indices
|
1293
1302
|
*/
|
@@ -1309,63 +1318,93 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
|
1309
1318
|
}
|
1310
1319
|
|
1311
1320
|
/*
|
1312
|
-
*
|
1313
|
-
*
|
1314
|
-
* instance
|
1321
|
+
* General matching: match the pattern against the given `text` using
|
1322
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1323
|
+
* `Match`} and return a {RE2::MatchData} instance with the specified number of
|
1324
|
+
* submatches (defaults to the total number of capturing groups) or a boolean
|
1325
|
+
* (if no submatches are required).
|
1315
1326
|
*
|
1316
|
-
*
|
1327
|
+
* The number of submatches has a significant impact on performance: requesting
|
1328
|
+
* one submatch is much faster than requesting more than one and requesting
|
1329
|
+
* zero submatches is faster still.
|
1317
1330
|
*
|
1318
1331
|
* @overload match(text)
|
1319
|
-
* Returns
|
1320
|
-
*
|
1332
|
+
* Returns a {RE2::MatchData} containing the matching pattern and all
|
1333
|
+
* submatches resulting from looking for the regexp in `text` if the pattern
|
1321
1334
|
* contains capturing groups.
|
1322
1335
|
*
|
1323
|
-
* Returns either true or false indicating whether a successful match was
|
1336
|
+
* Returns either `true` or `false` indicating whether a successful match was
|
1324
1337
|
* made if the pattern contains no capturing groups.
|
1325
1338
|
*
|
1326
1339
|
* @param [String] text the text to search
|
1327
|
-
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1340
|
+
* @return [RE2::MatchData, nil] if the pattern contains capturing groups
|
1328
1341
|
* @return [Boolean] if the pattern does not contain capturing groups
|
1329
|
-
* @raise [NoMemoryError] if there was not enough memory to allocate the
|
1342
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1343
|
+
* @raise [TypeError] if given text that cannot be coerced to a `String`
|
1330
1344
|
* @example Matching with capturing groups
|
1331
1345
|
* r = RE2::Regexp.new('w(o)(o)')
|
1332
|
-
* r.match('woo')
|
1346
|
+
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1333
1347
|
* @example Matching without capturing groups
|
1334
1348
|
* r = RE2::Regexp.new('woo')
|
1335
|
-
* r.match('woo')
|
1349
|
+
* r.match('woo') #=> true
|
1336
1350
|
*
|
1337
|
-
* @overload match(text,
|
1338
|
-
*
|
1339
|
-
*
|
1351
|
+
* @overload match(text, options)
|
1352
|
+
* See `match(text)` but with customisable offsets for starting and ending
|
1353
|
+
* matches, optional anchoring to the start or both ends of the text and a
|
1354
|
+
* specific number of submatches to extract (padded with `nil`s if
|
1355
|
+
* necessary).
|
1340
1356
|
*
|
1341
1357
|
* @param [String] text the text to search
|
1342
|
-
* @
|
1358
|
+
* @param [Hash] options the options with which to perform the match
|
1359
|
+
* @option options [Integer] :startpos (0) offset at which to start matching
|
1360
|
+
* @option options [Integer] :endpos offset at which to stop matching, defaults to the text length
|
1361
|
+
* @option options [Symbol] :anchor (:unanchored) one of :unanchored, :anchor_start, :anchor_both to anchor the match
|
1362
|
+
* @option options [Integer] :submatches how many submatches to extract (0 is
|
1363
|
+
* fastest), defaults to the number of capturing groups
|
1364
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1365
|
+
* @return [Boolean] if not extracting any submatches
|
1366
|
+
* @raise [ArgumentError] if given a negative number of submatches, invalid
|
1367
|
+
* anchor or invalid startpos, endpos pair
|
1343
1368
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1344
|
-
* @
|
1369
|
+
* @raise [TypeError] if given non-String text, non-numeric number of
|
1370
|
+
* submatches, non-symbol anchor or non-hash options
|
1371
|
+
* @raise [RE2::Regexp::UnsupportedError] if given an endpos argument on a
|
1372
|
+
* version of RE2 that does not support it
|
1373
|
+
* @example Matching with capturing groups
|
1345
1374
|
* r = RE2::Regexp.new('w(o)(o)')
|
1346
|
-
* r.match('woo',
|
1347
|
-
* r.match('
|
1375
|
+
* r.match('woo', submatches: 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1376
|
+
* r.match('woo', submatches: 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1377
|
+
* r.match('woot', anchor: :anchor_both, submatches: 0)
|
1378
|
+
* #=> false
|
1379
|
+
* r.match('woot', anchor: :anchor_start, submatches: 0)
|
1380
|
+
* #=> true
|
1381
|
+
* @example Matching without capturing groups
|
1382
|
+
* r = RE2::Regexp.new('wo+')
|
1383
|
+
* r.match('woot', anchor: :anchor_both) #=> false
|
1384
|
+
* r.match('woot', anchor: :anchor_start) #=> true
|
1348
1385
|
*
|
1349
|
-
* @overload match(text,
|
1350
|
-
*
|
1351
|
-
*
|
1386
|
+
* @overload match(text, submatches)
|
1387
|
+
* @deprecated Legacy syntax for matching against `text` with a specific
|
1388
|
+
* number of submatches to extract. Use `match(text, submatches: n)` instead.
|
1352
1389
|
*
|
1353
1390
|
* @param [String] text the text to search
|
1354
|
-
* @param [Integer]
|
1355
|
-
* @return [RE2::MatchData]
|
1356
|
-
* @
|
1357
|
-
* @raise [NoMemoryError] if there was not enough memory to allocate the
|
1391
|
+
* @param [Integer] submatches the number of submatches to extract
|
1392
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1393
|
+
* @return [Boolean] if not extracting any submatches
|
1394
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1395
|
+
* @raise [TypeError] if given non-numeric number of submatches
|
1358
1396
|
* @example
|
1359
1397
|
* r = RE2::Regexp.new('w(o)(o)')
|
1398
|
+
* r.match('woo', 0) #=> true
|
1360
1399
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1361
|
-
* r.match('woo',
|
1400
|
+
* r.match('woo', 2) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1362
1401
|
*/
|
1363
1402
|
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1364
1403
|
re2_pattern *p;
|
1365
1404
|
re2_matchdata *m;
|
1366
|
-
VALUE text,
|
1405
|
+
VALUE text, options;
|
1367
1406
|
|
1368
|
-
rb_scan_args(argc, argv, "11", &text, &
|
1407
|
+
rb_scan_args(argc, argv, "11", &text, &options);
|
1369
1408
|
|
1370
1409
|
/* Ensure text is a string. */
|
1371
1410
|
StringValue(text);
|
@@ -1373,12 +1412,80 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1373
1412
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1374
1413
|
|
1375
1414
|
int n;
|
1415
|
+
int startpos = 0;
|
1416
|
+
int endpos = RSTRING_LEN(text);
|
1417
|
+
RE2::Anchor anchor = RE2::UNANCHORED;
|
1376
1418
|
|
1377
|
-
if (RTEST(
|
1378
|
-
|
1419
|
+
if (RTEST(options)) {
|
1420
|
+
if (FIXNUM_P(options)) {
|
1421
|
+
n = NUM2INT(options);
|
1422
|
+
|
1423
|
+
if (n < 0) {
|
1424
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1425
|
+
}
|
1426
|
+
} else {
|
1427
|
+
if (TYPE(options) != T_HASH) {
|
1428
|
+
options = rb_Hash(options);
|
1429
|
+
}
|
1379
1430
|
|
1380
|
-
|
1381
|
-
|
1431
|
+
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
|
1432
|
+
if (!NIL_P(endpos_option)) {
|
1433
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1434
|
+
Check_Type(endpos_option, T_FIXNUM);
|
1435
|
+
|
1436
|
+
endpos = NUM2INT(endpos_option);
|
1437
|
+
|
1438
|
+
if (endpos < 0) {
|
1439
|
+
rb_raise(rb_eArgError, "endpos should be >= 0");
|
1440
|
+
}
|
1441
|
+
#else
|
1442
|
+
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
|
1443
|
+
#endif
|
1444
|
+
}
|
1445
|
+
|
1446
|
+
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
|
1447
|
+
if (!NIL_P(anchor_option)) {
|
1448
|
+
Check_Type(anchor_option, T_SYMBOL);
|
1449
|
+
|
1450
|
+
ID id_anchor_option = SYM2ID(anchor_option);
|
1451
|
+
if (id_anchor_option == id_unanchored) {
|
1452
|
+
anchor = RE2::UNANCHORED;
|
1453
|
+
} else if (id_anchor_option == id_anchor_start) {
|
1454
|
+
anchor = RE2::ANCHOR_START;
|
1455
|
+
} else if (id_anchor_option == id_anchor_both) {
|
1456
|
+
anchor = RE2::ANCHOR_BOTH;
|
1457
|
+
} else {
|
1458
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1459
|
+
}
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
|
1463
|
+
if (!NIL_P(submatches_option)) {
|
1464
|
+
Check_Type(submatches_option, T_FIXNUM);
|
1465
|
+
|
1466
|
+
n = NUM2INT(submatches_option);
|
1467
|
+
|
1468
|
+
if (n < 0) {
|
1469
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1470
|
+
}
|
1471
|
+
} else {
|
1472
|
+
if (!p->pattern->ok()) {
|
1473
|
+
return Qnil;
|
1474
|
+
}
|
1475
|
+
|
1476
|
+
n = p->pattern->NumberOfCapturingGroups();
|
1477
|
+
}
|
1478
|
+
|
1479
|
+
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
|
1480
|
+
if (!NIL_P(startpos_option)) {
|
1481
|
+
Check_Type(startpos_option, T_FIXNUM);
|
1482
|
+
|
1483
|
+
startpos = NUM2INT(startpos_option);
|
1484
|
+
|
1485
|
+
if (startpos < 0) {
|
1486
|
+
rb_raise(rb_eArgError, "startpos should be >= 0");
|
1487
|
+
}
|
1488
|
+
}
|
1382
1489
|
}
|
1383
1490
|
} else {
|
1384
1491
|
if (!p->pattern->ok()) {
|
@@ -1388,12 +1495,16 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1388
1495
|
n = p->pattern->NumberOfCapturingGroups();
|
1389
1496
|
}
|
1390
1497
|
|
1498
|
+
if (startpos > endpos) {
|
1499
|
+
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
1500
|
+
}
|
1501
|
+
|
1391
1502
|
if (n == 0) {
|
1392
1503
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
1393
|
-
bool matched = p->pattern->Match(RSTRING_PTR(text),
|
1394
|
-
|
1504
|
+
bool matched = p->pattern->Match(RSTRING_PTR(text), startpos,
|
1505
|
+
endpos, anchor, 0, 0);
|
1395
1506
|
#else
|
1396
|
-
bool matched = p->pattern->Match(RSTRING_PTR(text),
|
1507
|
+
bool matched = p->pattern->Match(RSTRING_PTR(text), startpos, anchor,
|
1397
1508
|
0, 0);
|
1398
1509
|
#endif
|
1399
1510
|
return BOOL2RUBY(matched);
|
@@ -1418,11 +1529,11 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1418
1529
|
m->number_of_matches = n;
|
1419
1530
|
|
1420
1531
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
1421
|
-
bool matched = p->pattern->Match(RSTRING_PTR(m->text),
|
1422
|
-
|
1532
|
+
bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
|
1533
|
+
endpos, anchor, m->matches, n);
|
1423
1534
|
#else
|
1424
|
-
bool matched = p->pattern->Match(RSTRING_PTR(m->text),
|
1425
|
-
|
1535
|
+
bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
|
1536
|
+
anchor, m->matches, n);
|
1426
1537
|
#endif
|
1427
1538
|
if (matched) {
|
1428
1539
|
return matchdata;
|
@@ -1433,22 +1544,54 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1433
1544
|
}
|
1434
1545
|
|
1435
1546
|
/*
|
1436
|
-
* Returns true
|
1437
|
-
*
|
1547
|
+
* Returns true if the pattern matches any substring of the given text using
|
1548
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
1549
|
+
* `PartialMatch`}.
|
1438
1550
|
*
|
1439
1551
|
* @return [Boolean] whether the match was successful
|
1552
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1440
1553
|
*/
|
1441
1554
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1442
|
-
|
1555
|
+
re2_pattern *p;
|
1556
|
+
|
1557
|
+
/* Ensure text is a string. */
|
1558
|
+
StringValue(text);
|
1559
|
+
|
1560
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1561
|
+
|
1562
|
+
return BOOL2RUBY(RE2::PartialMatch(RSTRING_PTR(text), *p->pattern));
|
1563
|
+
}
|
1564
|
+
|
1565
|
+
/*
|
1566
|
+
* Returns true if the pattern matches the given text using
|
1567
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
1568
|
+
* `FullMatch`}.
|
1569
|
+
*
|
1570
|
+
* @return [Boolean] whether the match was successful
|
1571
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1572
|
+
*/
|
1573
|
+
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
1574
|
+
re2_pattern *p;
|
1575
|
+
|
1576
|
+
/* Ensure text is a string. */
|
1577
|
+
StringValue(text);
|
1443
1578
|
|
1444
|
-
|
1579
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1580
|
+
|
1581
|
+
return BOOL2RUBY(RE2::FullMatch(RSTRING_PTR(text), *p->pattern));
|
1445
1582
|
}
|
1446
1583
|
|
1447
1584
|
/*
|
1448
|
-
* Returns a {RE2::Scanner} for scanning the given text incrementally
|
1585
|
+
* Returns a {RE2::Scanner} for scanning the given text incrementally with
|
1586
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
1587
|
+
* `FindAndConsume`}.
|
1449
1588
|
*
|
1589
|
+
* @param [text] text the text to scan incrementally
|
1590
|
+
* @return [RE2::Scanner] an `Enumerable` {RE2::Scanner} object
|
1591
|
+
* @raises [TypeError] if `text` cannot be coerced to a `String`
|
1450
1592
|
* @example
|
1451
1593
|
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1594
|
+
* #=> #<RE2::Scanner:0x0000000000000001>
|
1452
1595
|
*/
|
1453
1596
|
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1454
1597
|
/* Ensure text is a string. */
|
@@ -1477,17 +1620,40 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1477
1620
|
}
|
1478
1621
|
|
1479
1622
|
/*
|
1480
|
-
* Returns
|
1481
|
-
*
|
1623
|
+
* Returns whether the underlying RE2 version supports passing an `endpos`
|
1624
|
+
* argument to
|
1625
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1626
|
+
* Match}. If not, {RE2::Regexp#match} will raise an error if attempting to
|
1627
|
+
* pass an `endpos`.
|
1628
|
+
*
|
1629
|
+
* @return [Boolean] whether the underlying
|
1630
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1631
|
+
* Match} has an endpos argument
|
1632
|
+
*/
|
1633
|
+
static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
1634
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1635
|
+
return Qtrue;
|
1636
|
+
#else
|
1637
|
+
return Qfalse;
|
1638
|
+
#endif
|
1639
|
+
}
|
1640
|
+
|
1641
|
+
/*
|
1642
|
+
* Returns a copy of `str` with the first occurrence `pattern` replaced with
|
1643
|
+
* `rewrite` using
|
1644
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L465-L480
|
1645
|
+
* `Replace`}.
|
1482
1646
|
*
|
1483
1647
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1484
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1485
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1648
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1649
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1486
1650
|
*
|
1487
1651
|
* @param [String] str the string to modify
|
1488
1652
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1489
1653
|
* @param [String] rewrite the string to replace with
|
1490
1654
|
* @return [String] the resulting string
|
1655
|
+
* @raises [TypeError] if the given rewrite or pattern (if not provided as a
|
1656
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1491
1657
|
* @example
|
1492
1658
|
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
1493
1659
|
* re2 = RE2::Regexp.new("hel+o")
|
@@ -1523,20 +1689,24 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
1523
1689
|
}
|
1524
1690
|
|
1525
1691
|
/*
|
1526
|
-
* Return a copy of
|
1692
|
+
* Return a copy of `str` with `pattern` replaced by `rewrite` using
|
1693
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L482-L497
|
1694
|
+
* `GlobalReplace`}.
|
1527
1695
|
*
|
1528
1696
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1529
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1530
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1697
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1698
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1531
1699
|
*
|
1532
1700
|
* @param [String] str the string to modify
|
1533
1701
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1534
1702
|
* @param [String] rewrite the string to replace with
|
1703
|
+
* @raises [TypeError] if the given rewrite or pattern (if not provided as a
|
1704
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1535
1705
|
* @return [String] the resulting string
|
1536
1706
|
* @example
|
1537
1707
|
* re2 = RE2::Regexp.new("oo?")
|
1538
|
-
* RE2.GlobalReplace("whoops-doops", re2, "e")
|
1539
|
-
* RE2.GlobalReplace("hello there", "e", "i")
|
1708
|
+
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1709
|
+
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1540
1710
|
*/
|
1541
1711
|
static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
1542
1712
|
VALUE rewrite) {
|
@@ -1568,14 +1738,17 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
1568
1738
|
}
|
1569
1739
|
|
1570
1740
|
/*
|
1571
|
-
* Returns a version of str with all potentially meaningful regexp
|
1572
|
-
*
|
1573
|
-
*
|
1741
|
+
* Returns a version of `str` with all potentially meaningful regexp characters
|
1742
|
+
* escaped using
|
1743
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L512-L518
|
1744
|
+
* `QuoteMeta`}. The returned string, used as a regular expression, will
|
1745
|
+
* exactly match the original string.
|
1574
1746
|
*
|
1575
1747
|
* @param [String] unquoted the unquoted string
|
1748
|
+
* @raises [TypeError] if the given unquoted string cannot be coerced to a `String`
|
1576
1749
|
* @return [String] the escaped string
|
1577
1750
|
* @example
|
1578
|
-
* RE2::Regexp.escape("1.5-2.0?")
|
1751
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1579
1752
|
*/
|
1580
1753
|
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
1581
1754
|
StringValue(unquoted);
|
@@ -1641,14 +1814,14 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1641
1814
|
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1642
1815
|
* options.
|
1643
1816
|
*
|
1644
|
-
* @param [Symbol] anchor
|
1645
|
-
* @raise [ArgumentError] if anchor is not
|
1817
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1818
|
+
* @raise [ArgumentError] if anchor is not `:unanchored`, `:anchor_start` or `:anchor_both`
|
1646
1819
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1647
1820
|
*
|
1648
1821
|
* @overload initialize(anchor, options)
|
1649
1822
|
* Returns a new {RE2::Set} object with the specified options.
|
1650
1823
|
*
|
1651
|
-
* @param [Symbol] anchor
|
1824
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1652
1825
|
* @param [Hash] options the options with which to compile the pattern
|
1653
1826
|
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1654
1827
|
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
@@ -1656,13 +1829,13 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1656
1829
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1657
1830
|
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
1658
1831
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1659
|
-
* @option options [Boolean] :never_nl (false) never match
|
1660
|
-
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
1661
|
-
* @option options [Boolean] :perl_classes (false) allow Perl's
|
1662
|
-
* @option options [Boolean] :word_boundary (false) allow
|
1663
|
-
* @option options [Boolean] :one_line (false)
|
1664
|
-
* @return [RE2::Set]
|
1665
|
-
* @raise [ArgumentError] if anchor is not one of the accepted choices
|
1832
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
1833
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
1834
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
1835
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
1836
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
1837
|
+
* @return [RE2::Set] a {RE2::Set} with the specified anchor and options
|
1838
|
+
* @raise [ArgumentError] if `anchor` is not one of the accepted choices
|
1666
1839
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1667
1840
|
*/
|
1668
1841
|
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
@@ -1676,12 +1849,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1676
1849
|
|
1677
1850
|
if (!NIL_P(anchor)) {
|
1678
1851
|
Check_Type(anchor, T_SYMBOL);
|
1679
|
-
ID
|
1680
|
-
if (
|
1852
|
+
ID id_anchor_arg = SYM2ID(anchor);
|
1853
|
+
if (id_anchor_arg == id_unanchored) {
|
1681
1854
|
re2_anchor = RE2::UNANCHORED;
|
1682
|
-
} else if (
|
1855
|
+
} else if (id_anchor_arg == id_anchor_start) {
|
1683
1856
|
re2_anchor = RE2::ANCHOR_START;
|
1684
|
-
} else if (
|
1857
|
+
} else if (id_anchor_arg == id_anchor_both) {
|
1685
1858
|
re2_anchor = RE2::ANCHOR_BOTH;
|
1686
1859
|
} else {
|
1687
1860
|
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
@@ -1704,15 +1877,16 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1704
1877
|
|
1705
1878
|
/*
|
1706
1879
|
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1707
|
-
* in the output of #match. Cannot be called after #compile
|
1880
|
+
* in the output of {RE2::Set#match}. Cannot be called after {RE2::Set#compile}
|
1881
|
+
* has been called.
|
1708
1882
|
*
|
1709
1883
|
* @param [String] pattern the regex pattern
|
1710
1884
|
* @return [Integer] the index of the pattern in the set
|
1711
1885
|
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1712
1886
|
* @example
|
1713
1887
|
* set = RE2::Set.new
|
1714
|
-
* set.add("abc")
|
1715
|
-
* set.add("def")
|
1888
|
+
* set.add("abc") #=> 0
|
1889
|
+
* set.add("def") #=> 1
|
1716
1890
|
*/
|
1717
1891
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1718
1892
|
StringValue(pattern);
|
@@ -1740,14 +1914,14 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1740
1914
|
}
|
1741
1915
|
|
1742
1916
|
/*
|
1743
|
-
* Compiles a Set so it can be used to match against. Must be called
|
1744
|
-
* and before #match.
|
1917
|
+
* Compiles a {RE2::Set} so it can be used to match against. Must be called
|
1918
|
+
* after {RE2::Set#add} and before {RE2::Set#match}.
|
1745
1919
|
*
|
1746
|
-
* @return [
|
1920
|
+
* @return [Boolean] whether compilation was a success
|
1747
1921
|
* @example
|
1748
1922
|
* set = RE2::Set.new
|
1749
1923
|
* set.add("abc")
|
1750
|
-
* set.compile
|
1924
|
+
* set.compile #=> true
|
1751
1925
|
*/
|
1752
1926
|
static VALUE re2_set_compile(VALUE self) {
|
1753
1927
|
re2_set *s;
|
@@ -1757,11 +1931,12 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1757
1931
|
}
|
1758
1932
|
|
1759
1933
|
/*
|
1760
|
-
* Returns whether the underlying
|
1761
|
-
*
|
1762
|
-
*
|
1934
|
+
* Returns whether the underlying RE2 version outputs error information from
|
1935
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/set.h#L62-L65
|
1936
|
+
* `RE2::Set::Match`}. If not, {RE2::Set#match} will raise an error if attempting to set
|
1937
|
+
* its `:exception` option to `true`.
|
1763
1938
|
*
|
1764
|
-
* @return [
|
1939
|
+
* @return [Boolean] whether the underlying RE2 outputs error information from {RE2::Set} matches
|
1765
1940
|
*/
|
1766
1941
|
static VALUE re2_set_match_raises_errors_p(VALUE) {
|
1767
1942
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
@@ -1785,31 +1960,31 @@ static VALUE re2_set_match_raises_errors_p(VALUE) {
|
|
1785
1960
|
* @param [String] str the text to match against
|
1786
1961
|
* @return [Array<Integer>] the indices of matching regexps
|
1787
1962
|
* @raise [MatchError] if an error occurs while matching
|
1788
|
-
* @raise [UnsupportedError] if the underlying version of
|
1963
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1789
1964
|
* @example
|
1790
1965
|
* set = RE2::Set.new
|
1791
1966
|
* set.add("abc")
|
1792
1967
|
* set.add("def")
|
1793
1968
|
* set.compile
|
1794
|
-
* set.match("abcdef")
|
1969
|
+
* set.match("abcdef") #=> [0, 1]
|
1795
1970
|
*
|
1796
1971
|
* @overload match(str, options)
|
1797
1972
|
* Returns an array of integer indices of patterns matching the given string
|
1798
1973
|
* (if any). Raises exceptions if there are any errors while matching and the
|
1799
|
-
*
|
1974
|
+
* `:exception` option is set to true.
|
1800
1975
|
*
|
1801
1976
|
* @param [String] str the text to match against
|
1802
1977
|
* @param [Hash] options the options with which to match
|
1803
|
-
* @option options [Boolean] :exception (true) whether to raise exceptions with
|
1978
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)
|
1804
1979
|
* @return [Array<Integer>] the indices of matching regexps
|
1805
1980
|
* @raise [MatchError] if an error occurs while matching
|
1806
|
-
* @raise [UnsupportedError] if the underlying version of
|
1981
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1807
1982
|
* @example
|
1808
1983
|
* set = RE2::Set.new
|
1809
1984
|
* set.add("abc")
|
1810
1985
|
* set.add("def")
|
1811
1986
|
* set.compile
|
1812
|
-
* set.match("abcdef", :
|
1987
|
+
* set.match("abcdef", exception: true) #=> [0, 1]
|
1813
1988
|
*/
|
1814
1989
|
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
1815
1990
|
VALUE str, options;
|
@@ -1877,6 +2052,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1877
2052
|
extern "C" void Init_re2(void) {
|
1878
2053
|
re2_mRE2 = rb_define_module("RE2");
|
1879
2054
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
2055
|
+
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
2056
|
+
"UnsupportedError", rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1880
2057
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1881
2058
|
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
1882
2059
|
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
@@ -1930,6 +2107,8 @@ extern "C" void Init_re2(void) {
|
|
1930
2107
|
rb_define_method(re2_cScanner, "rewind",
|
1931
2108
|
RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
|
1932
2109
|
|
2110
|
+
rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
|
2111
|
+
RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
|
1933
2112
|
rb_define_method(re2_cRegexp, "initialize",
|
1934
2113
|
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
1935
2114
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
@@ -1947,12 +2126,14 @@ extern "C" void Init_re2(void) {
|
|
1947
2126
|
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
1948
2127
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1949
2128
|
-1);
|
1950
|
-
rb_define_method(re2_cRegexp, "match?",
|
1951
|
-
|
1952
|
-
rb_define_method(re2_cRegexp, "
|
1953
|
-
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1954
|
-
rb_define_method(re2_cRegexp, "===",
|
2129
|
+
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
2130
|
+
1);
|
2131
|
+
rb_define_method(re2_cRegexp, "partial_match?",
|
1955
2132
|
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2133
|
+
rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2134
|
+
rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2135
|
+
rb_define_method(re2_cRegexp, "full_match?",
|
2136
|
+
RUBY_METHOD_FUNC(re2_regexp_full_match_p), 1);
|
1956
2137
|
rb_define_method(re2_cRegexp, "scan",
|
1957
2138
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1958
2139
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
@@ -2009,6 +2190,8 @@ extern "C" void Init_re2(void) {
|
|
2009
2190
|
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2010
2191
|
rb_define_singleton_method(re2_cRegexp, "quote",
|
2011
2192
|
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2193
|
+
|
2194
|
+
// (see RE2::Regexp#initialize)
|
2012
2195
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
2013
2196
|
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
2014
2197
|
|
@@ -2027,7 +2210,11 @@ extern "C" void Init_re2(void) {
|
|
2027
2210
|
id_word_boundary = rb_intern("word_boundary");
|
2028
2211
|
id_one_line = rb_intern("one_line");
|
2029
2212
|
id_unanchored = rb_intern("unanchored");
|
2213
|
+
id_anchor = rb_intern("anchor");
|
2030
2214
|
id_anchor_start = rb_intern("anchor_start");
|
2031
2215
|
id_anchor_both = rb_intern("anchor_both");
|
2032
2216
|
id_exception = rb_intern("exception");
|
2217
|
+
id_submatches = rb_intern("submatches");
|
2218
|
+
id_startpos = rb_intern("startpos");
|
2219
|
+
id_endpos = rb_intern("endpos");
|
2033
2220
|
}
|